root/net/ipv4/ip.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ip_options_build
  2. ip_options_echo
  3. ip_options_fragment
  4. ip_options_compile
  5. ip_ioctl
  6. ip_send
  7. ip_send_room
  8. ip_build_header
  9. ip_send_check
  10. ip_frag_create
  11. ip_find
  12. ip_free
  13. ip_expire
  14. ip_create
  15. ip_done
  16. ip_glue
  17. ip_defrag
  18. ip_fragment
  19. ip_encap
  20. ip_forward
  21. ip_rcv
  22. ip_loopback
  23. ip_queue_xmit
  24. ip_mc_procinfo
  25. ip_mc_find_devfor
  26. ip_setsockopt
  27. ip_getsockopt
  28. ip_build_xmit
  29. ip_rt_event
  30. ip_init

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              The Internet Protocol (IP) module.
   7  *
   8  * Version:     @(#)ip.c        1.0.16b 9/1/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Donald Becker, <becker@super.org>
  13  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  14  *              Richard Underwood
  15  *              Stefan Becker, <stefanb@yello.ping.de>
  16  *              Jorge Cwik, <jorge@laser.satlink.net>
  17  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  18  *              
  19  *
  20  * Fixes:
  21  *              Alan Cox        :       Commented a couple of minor bits of surplus code
  22  *              Alan Cox        :       Undefining IP_FORWARD doesn't include the code
  23  *                                      (just stops a compiler warning).
  24  *              Alan Cox        :       Frames with >=MAX_ROUTE record routes, strict routes or loose routes
  25  *                                      are junked rather than corrupting things.
  26  *              Alan Cox        :       Frames to bad broadcast subnets are dumped
  27  *                                      We used to process them non broadcast and
  28  *                                      boy could that cause havoc.
  29  *              Alan Cox        :       ip_forward sets the free flag on the
  30  *                                      new frame it queues. Still crap because
  31  *                                      it copies the frame but at least it
  32  *                                      doesn't eat memory too.
  33  *              Alan Cox        :       Generic queue code and memory fixes.
  34  *              Fred Van Kempen :       IP fragment support (borrowed from NET2E)
  35  *              Gerhard Koerting:       Forward fragmented frames correctly.
  36  *              Gerhard Koerting:       Fixes to my fix of the above 8-).
  37  *              Gerhard Koerting:       IP interface addressing fix.
  38  *              Linus Torvalds  :       More robustness checks
  39  *              Alan Cox        :       Even more checks: Still not as robust as it ought to be
  40  *              Alan Cox        :       Save IP header pointer for later
  41  *              Alan Cox        :       ip option setting
  42  *              Alan Cox        :       Use ip_tos/ip_ttl settings
  43  *              Alan Cox        :       Fragmentation bogosity removed
  44  *                                      (Thanks to Mark.Bush@prg.ox.ac.uk)
  45  *              Dmitry Gorodchanin :    Send of a raw packet crash fix.
  46  *              Alan Cox        :       Silly ip bug when an overlength
  47  *                                      fragment turns up. Now frees the
  48  *                                      queue.
  49  *              Linus Torvalds/ :       Memory leakage on fragmentation
  50  *              Alan Cox        :       handling.
  51  *              Gerhard Koerting:       Forwarding uses IP priority hints
  52  *              Teemu Rantanen  :       Fragment problems.
  53  *              Alan Cox        :       General cleanup, comments and reformat
  54  *              Alan Cox        :       SNMP statistics
  55  *              Alan Cox        :       BSD address rule semantics. Also see
  56  *                                      UDP as there is a nasty checksum issue
  57  *                                      if you do things the wrong way.
  58  *              Alan Cox        :       Always defrag, moved IP_FORWARD to the config.in file
  59  *              Alan Cox        :       IP options adjust sk->priority.
  60  *              Pedro Roque     :       Fix mtu/length error in ip_forward.
  61  *              Alan Cox        :       Avoid ip_chk_addr when possible.
  62  *      Richard Underwood       :       IP multicasting.
  63  *              Alan Cox        :       Cleaned up multicast handlers.
  64  *              Alan Cox        :       RAW sockets demultiplex in the BSD style.
  65  *              Gunther Mayer   :       Fix the SNMP reporting typo
  66  *              Alan Cox        :       Always in group 224.0.0.1
  67  *      Pauline Middelink       :       Fast ip_checksum update when forwarding
  68  *                                      Masquerading support.
  69  *              Alan Cox        :       Multicast loopback error for 224.0.0.1
  70  *              Alan Cox        :       IP_MULTICAST_LOOP option.
  71  *              Alan Cox        :       Use notifiers.
  72  *              Bjorn Ekwall    :       Removed ip_csum (from slhc.c too)
  73  *              Bjorn Ekwall    :       Moved ip_fast_csum to ip.h (inline!)
  74  *              Stefan Becker   :       Send out ICMP HOST REDIRECT
  75  *      Arnt Gulbrandsen        :       ip_build_xmit
  76  *              Alan Cox        :       Per socket routing cache
  77  *              Alan Cox        :       Fixed routing cache, added header cache.
  78  *              Alan Cox        :       Loopback didnt work right in original ip_build_xmit - fixed it.
  79  *              Alan Cox        :       Only send ICMP_REDIRECT if src/dest are the same net.
  80  *              Alan Cox        :       Incoming IP option handling.
  81  *              Alan Cox        :       Set saddr on raw output frames as per BSD.
  82  *              Alan Cox        :       Stopped broadcast source route explosions.
  83  *              Alan Cox        :       Can disable source routing
  84  *              Takeshi Sone    :       Masquerading didn't work.
  85  *      Dave Bonn,Alan Cox      :       Faster IP forwarding whenever possible.
  86  *              Alan Cox        :       Memory leaks, tramples, misc debugging.
  87  *              Alan Cox        :       Fixed multicast (by popular demand 8))
  88  *              Alan Cox        :       Fixed forwarding (by even more popular demand 8))
  89  *              Alan Cox        :       Fixed SNMP statistics [I think]
  90  *      Gerhard Koerting        :       IP fragmentation forwarding fix
  91  *              Alan Cox        :       Device lock against page fault.
  92  *              Alan Cox        :       IP_HDRINCL facility.
  93  *      Werner Almesberger      :       Zero fragment bug
  94  *              Alan Cox        :       RAW IP frame length bug
  95  *              Alan Cox        :       Outgoing firewall on build_xmit
  96  *              A.N.Kuznetsov   :       IP_OPTIONS support throughout the kernel
  97  *              Alan Cox        :       Multicast routing hooks
  98  *
  99  *  
 100  *
 101  * To Fix:
 102  *              IP option processing is mostly not needed. ip_forward needs to know about routing rules
 103  *              and time stamp but that's about all. Use the route mtu field here too
 104  *              IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient
 105  *              and could be made very efficient with the addition of some virtual memory hacks to permit
 106  *              the allocation of a buffer that can then be 'grown' by twiddling page tables.
 107  *              Output fragmentation wants updating along with the buffer management to use a single 
 108  *              interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet
 109  *              output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause
 110  *              fragmentation anyway.
 111  *
 112  *              FIXME: copy frag 0 iph to qp->iph
 113  *
 114  *              This program is free software; you can redistribute it and/or
 115  *              modify it under the terms of the GNU General Public License
 116  *              as published by the Free Software Foundation; either version
 117  *              2 of the License, or (at your option) any later version.
 118  */
 119 
 120 #include <asm/segment.h>
 121 #include <asm/system.h>
 122 #include <linux/types.h>
 123 #include <linux/kernel.h>
 124 #include <linux/sched.h>
 125 #include <linux/mm.h>
 126 #include <linux/string.h>
 127 #include <linux/errno.h>
 128 #include <linux/config.h>
 129 
 130 #include <linux/socket.h>
 131 #include <linux/sockios.h>
 132 #include <linux/in.h>
 133 #include <linux/inet.h>
 134 #include <linux/netdevice.h>
 135 #include <linux/etherdevice.h>
 136 #include <linux/proc_fs.h>
 137 #include <linux/stat.h>
 138 
 139 #include <net/snmp.h>
 140 #include <net/ip.h>
 141 #include <net/protocol.h>
 142 #include <net/route.h>
 143 #include <net/tcp.h>
 144 #include <net/udp.h>
 145 #include <linux/skbuff.h>
 146 #include <net/sock.h>
 147 #include <net/arp.h>
 148 #include <net/icmp.h>
 149 #include <net/raw.h>
 150 #include <net/checksum.h>
 151 #include <linux/igmp.h>
 152 #include <linux/ip_fw.h>
 153 #include <linux/mroute.h>
 154 
 155 #define CONFIG_IP_DEFRAG
 156 
 157 extern int last_retran;
 158 extern void sort_send(struct sock *sk);
 159 
 160 #define min(a,b)        ((a)<(b)?(a):(b))
 161 
 162 /*
 163  *      SNMP management statistics
 164  */
 165 
 166 #ifdef CONFIG_IP_FORWARD
 167 struct ip_mib ip_statistics={1,64,};    /* Forwarding=Yes, Default TTL=64 */
 168 #else
 169 struct ip_mib ip_statistics={2,64,};    /* Forwarding=No, Default TTL=64 */
 170 #endif
 171 
 172 /* 
 173  * Write options to IP header, record destination address to
 174  * source route option, address of outgoing interface
 175  * (we should already know it, so that this  function is allowed be
 176  * called only after routing decision) and timestamp,
 177  * if we originate this datagram.
 178  */
 179 
 180 static void ip_options_build(struct sk_buff * skb, struct options * opt,
     /* [previous][next][first][last][top][bottom][index][help] */
 181                             __u32 daddr, __u32 saddr,
 182                             int is_frag) {
 183         unsigned char * iph = (unsigned char*)skb->ip_hdr;
 184 
 185         memcpy(skb->proto_priv, opt, sizeof(struct options));
 186         memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen);
 187         opt = (struct options*)skb->proto_priv;
 188         opt->is_data = 0;
 189 
 190         if (opt->srr)
 191           memcpy(iph+opt->srr+iph[opt->srr+1]-4, &daddr, 4);
 192 
 193         if (!is_frag) {
 194                 if (opt->rr_needaddr)
 195                   memcpy(iph+opt->rr+iph[opt->rr+2]-5, &saddr, 4);
 196                 if (opt->ts_needaddr)
 197                   memcpy(iph+opt->ts+iph[opt->ts+2]-9, &saddr, 4);
 198                 if (opt->ts_needtime) {
 199                         struct timeval tv;
 200                         __u32 midtime;
 201                         do_gettimeofday(&tv);
 202                         midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000);
 203                         memcpy(iph+opt->ts+iph[opt->ts+2]-5, &midtime, 4);
 204                 }
 205                 return;
 206         }
 207         if (opt->rr) {
 208                 memset(iph+opt->rr, IPOPT_NOP, iph[opt->rr+1]);
 209                 opt->rr = 0;
 210                 opt->rr_needaddr = 0;
 211         }
 212         if (opt->ts) {
 213                 memset(iph+opt->ts, IPOPT_NOP, iph[opt->ts+1]);
 214                 opt->ts = 0;
 215                 opt->ts_needaddr = opt->ts_needtime = 0;
 216         }
 217 }
 218 
 219 int ip_options_echo(struct options * dopt, struct options * sopt,
     /* [previous][next][first][last][top][bottom][index][help] */
 220                      __u32 daddr, __u32 saddr,
 221                      struct sk_buff * skb) {
 222         unsigned char *sptr, *dptr;
 223         int soffset, doffset;
 224         int     optlen;
 225 
 226         memset(dopt, 0, sizeof(struct options));
 227 
 228         dopt->is_data = 1;
 229 
 230         if (!sopt)
 231           sopt = (struct options*)skb->proto_priv;
 232 
 233         if (sopt->optlen == 0) {
 234                 dopt->optlen = 0;
 235                 return 0;
 236         }
 237 
 238         sptr = (sopt->is_data ? sopt->__data - sizeof(struct iphdr) :
 239                 (unsigned char *)skb->ip_hdr);
 240         dptr = dopt->__data;
 241 
 242         if (sopt->rr) {
 243                 optlen  = sptr[sopt->rr+1];
 244                 soffset = sptr[sopt->rr+2];
 245                 dopt->rr = dopt->optlen + sizeof(struct iphdr);
 246                 memcpy(dptr, sptr+sopt->rr, optlen);
 247                 if (sopt->rr_needaddr && soffset <= optlen) {
 248                         if (soffset + 3 > optlen)
 249                           return -EINVAL;
 250                         dptr[2] = soffset + 4;
 251                         dopt->rr_needaddr = 1;
 252                 }
 253                 dptr     += optlen;
 254                 dopt->optlen += optlen;
 255         }
 256         if (sopt->ts) {
 257                 optlen = sptr[sopt->ts+1];
 258                 soffset = sptr[sopt->ts+2];
 259                 dopt->ts = dopt->optlen + sizeof(struct iphdr);
 260                 memcpy(dptr, sptr+sopt->ts, optlen);
 261                 if (soffset <= optlen) {
 262                         if (dopt->ts_needaddr) {
 263                                 if (soffset + 3 > optlen)
 264                                   return -EINVAL;
 265                                 dopt->ts_needaddr = 1;
 266                                 soffset += 4;
 267                         }
 268                         if (dopt->ts_needtime) {
 269                                 if (soffset + 3 > optlen)
 270                                   return -EINVAL;
 271                                 dopt->ts_needtime = 1;
 272                                 soffset += 4;
 273                         }
 274                         if (((struct timestamp*)(dptr+1))->flags == IPOPT_TS_PRESPEC) {
 275                                 __u32 addr;
 276                                 memcpy(&addr, sptr+soffset-9, 4);
 277                                 if (ip_chk_addr(addr) == 0) {
 278                                         dopt->ts_needtime = 0;
 279                                         dopt->ts_needaddr = 0;
 280                                         soffset -= 8;
 281                                 }
 282                         }
 283                         dptr[2] = soffset;
 284                 }
 285                 dptr += optlen;
 286                 dopt->optlen += optlen;
 287         }
 288         if (sopt->srr) {
 289                 unsigned char * start = sptr+sopt->srr;
 290                 __u32 faddr;
 291 
 292                 optlen  = start[1];
 293                 soffset = start[2];
 294                 doffset = 0;
 295                 if (soffset > optlen)
 296                   soffset = optlen + 1;
 297                 soffset -= 4;
 298                 if (soffset > 3) {
 299                         memcpy(&faddr, &start[soffset-1], 4);
 300                         for (soffset-=4, doffset=4; soffset > 3; soffset-=4, doffset+=4)
 301                           memcpy(&dptr[doffset-1], &start[soffset-1], 4);
 302                         /*
 303                          * RFC1812 requires to fix illegal source routes.
 304                          */
 305                         if (memcmp(&saddr, &start[soffset+3], 4) == 0)
 306                           doffset -= 4;
 307                 }
 308                 if (doffset > 3) {
 309                         memcpy(&start[doffset-1], &daddr, 4);
 310                         dopt->faddr = faddr;
 311                         dptr[0] = start[0];
 312                         dptr[1] = doffset+3;
 313                         dptr[2] = 4;
 314                         dptr += doffset+3;
 315                         dopt->srr = dopt->optlen + sizeof(struct iphdr);
 316                         dopt->optlen += doffset+3;
 317                         dopt->is_strictroute = sopt->is_strictroute;
 318                 }
 319         }
 320         while (dopt->optlen & 3) {
 321                 *dptr++ = IPOPT_END;
 322                 dopt->optlen++;
 323         }
 324         return 0;
 325 }
 326 
 327 static void ip_options_fragment(struct sk_buff * skb) {
     /* [previous][next][first][last][top][bottom][index][help] */
 328         unsigned char * optptr = (unsigned char*)skb->ip_hdr;
 329         struct options * opt = (struct options*)skb->proto_priv;
 330         int  l = opt->optlen;
 331         int  optlen;
 332 
 333         while (l > 0) {
 334                 switch (*optptr) {
 335                       case IPOPT_END:
 336                         return;
 337                       case IPOPT_NOOP:
 338                         l--;
 339                         optptr++;
 340                         continue;
 341                 }
 342                 optlen = optptr[1];
 343                 if (l<2 || optlen>l)
 344                   return;
 345                 if (!(*optptr & 0x80))
 346                   memset(optptr, IPOPT_NOOP, optlen);
 347                 l -= optlen;
 348                 optptr += optlen;
 349         }
 350         opt->ts = 0;
 351         opt->rr = 0;
 352         opt->rr_needaddr = 0;
 353         opt->ts_needaddr = 0;
 354         opt->ts_needtime = 0;
 355         return;
 356 }
 357 
 358 /*
 359  * Verify options and fill pointers in struct optinos.
 360  * Caller should clear *opt, and set opt->data.
 361  * If opt == NULL, then skb->data should point to IP header.
 362  */
 363 
 364 int ip_options_compile(struct options * opt, struct sk_buff * skb)
     /* [previous][next][first][last][top][bottom][index][help] */
 365 {
 366         int l;
 367         unsigned char * iph;
 368         unsigned char * optptr;
 369         int optlen;
 370         unsigned char * pp_ptr = NULL;
 371 
 372         if (!opt) {
 373                 opt = (struct options*)skb->proto_priv;
 374                 memset(opt, 0, sizeof(struct options));
 375                 iph = (unsigned char*)skb->ip_hdr;
 376                 opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr);
 377                 optptr = iph + sizeof(struct iphdr);
 378                 opt->is_data = 0;
 379         } else {
 380                 optptr = opt->is_data ? opt->__data : (unsigned char*)&skb->ip_hdr[1];
 381                 iph = optptr - sizeof(struct iphdr);
 382         }
 383 
 384         for (l = opt->optlen; l > 0; ) {
 385                 switch (*optptr) {
 386                       case IPOPT_END:
 387                         for (optptr++, l--; l>0; l--) {
 388                                 if (*optptr != IPOPT_END) {
 389                                         *optptr = IPOPT_END;
 390                                         opt->is_changed = 1;
 391                                 }
 392                         }
 393                         goto eol;
 394                       case IPOPT_NOOP:
 395                         l--;
 396                         optptr++;
 397                         continue;
 398                 }
 399                 optlen = optptr[1];
 400                 if (l<2 || optlen>l) {
 401                         pp_ptr = optptr;
 402                         break;
 403                 }
 404                 switch (*optptr) {
 405                       case IPOPT_SSRR:
 406                       case IPOPT_LSRR:
 407                         if (optlen < 3) {
 408                                 pp_ptr = optptr + 1;
 409                                 break;
 410                         }
 411                         if (optptr[2] < 4) {
 412                                 pp_ptr = optptr + 2;
 413                                 break;
 414                         }
 415                         /* NB: cf RFC-1812 5.2.4.1 */
 416                         if (opt->srr) {
 417                                 pp_ptr = optptr;
 418                                 break;
 419                         }
 420                         if (!skb) {
 421                                 if (optptr[2] != 4 || optlen < 7 || ((optlen-3) & 3)) {
 422                                         pp_ptr = optptr + 1;
 423                                         break;
 424                                 }
 425                                 memcpy(&opt->faddr, &optptr[3], 4);
 426                                 if (optlen > 7)
 427                                   memmove(&optptr[3], &optptr[7], optlen-7);
 428                         }
 429                         opt->is_strictroute = (optptr[0] == IPOPT_SSRR);
 430                         opt->srr = optptr - iph;
 431                         break;
 432                       case IPOPT_RR:
 433                         if (opt->rr) {
 434                                 pp_ptr = optptr;
 435                                 break;
 436                         }
 437                         if (optlen < 3) {
 438                                 pp_ptr = optptr + 1;
 439                                 break;
 440                         }
 441                         if (optptr[2] < 4) {
 442                                 pp_ptr = optptr + 2;
 443                                 break;
 444                         }
 445                         if (optptr[2] <= optlen) {
 446                                 if (optptr[2]+3 > optlen) {
 447                                         pp_ptr = optptr + 2;
 448                                         break;
 449                                 }
 450                                 if (skb) {
 451                                         memcpy(&optptr[optptr[2]-1], &skb->dev->pa_addr, 4);
 452                                         opt->is_changed = 1;
 453                                 }
 454                                 optptr[2] += 4;
 455                                 opt->rr_needaddr = 1;
 456                         }
 457                         opt->rr = optptr - iph;
 458                         break;
 459                       case IPOPT_TIMESTAMP:
 460                         if (opt->ts) {
 461                                 pp_ptr = optptr;
 462                                 break;
 463                         }
 464                         if (optlen < 4) {
 465                                 pp_ptr = optptr + 1;
 466                                 break;
 467                         }
 468                         if (optptr[2] < 5) {
 469                                 pp_ptr = optptr + 2;
 470                                 break;
 471                         }
 472                         if (optptr[2] <= optlen) {
 473                                 struct timestamp * ts = (struct timestamp*)(optptr+1);
 474                                 __u32 * timeptr = NULL;
 475                                 if (ts->ptr+3 > ts->len) {
 476                                         pp_ptr = optptr + 2;
 477                                         break;
 478                                 }
 479                                 switch (ts->flags) {
 480                                       case IPOPT_TS_TSONLY:
 481                                         opt->ts = optptr - iph;
 482                                         if (skb) {
 483                                                 timeptr = (__u32*)&optptr[ts->ptr-1];
 484                                                 opt->is_changed = 1;
 485                                         }
 486                                         ts->ptr += 4;
 487                                         break;
 488                                       case IPOPT_TS_TSANDADDR:
 489                                         if (ts->ptr+7 > ts->len) {
 490                                                 pp_ptr = optptr + 2;
 491                                                 break;
 492                                         }
 493                                         opt->ts = optptr - iph;
 494                                         if (skb) {
 495                                                 memcpy(&optptr[ts->ptr-1], &skb->dev->pa_addr, 4);
 496                                                 timeptr = (__u32*)&optptr[ts->ptr+3];
 497                                         }
 498                                         opt->ts_needaddr = 1;
 499                                         opt->ts_needtime = 1;
 500                                         ts->ptr += 8;
 501                                         break;
 502                                       case IPOPT_TS_PRESPEC:
 503                                         if (ts->ptr+7 > ts->len) {
 504                                                 pp_ptr = optptr + 2;
 505                                                 break;
 506                                         }
 507                                         opt->ts = optptr - iph;
 508                                         {
 509                                                 __u32 addr;
 510                                                 memcpy(&addr, &optptr[ts->ptr-1], 4);
 511                                                 if (ip_chk_addr(addr) == 0)
 512                                                   break;
 513                                                 if (skb)
 514                                                   timeptr = (__u32*)&optptr[ts->ptr+3];
 515                                         }
 516                                         opt->ts_needaddr = 1;
 517                                         opt->ts_needtime = 1;
 518                                         ts->ptr += 8;
 519                                         break;
 520                                       default:
 521                                         pp_ptr = optptr + 3;
 522                                         break;
 523                                 }
 524                                 if (timeptr) {
 525                                         struct timeval tv;
 526                                         __u32  midtime;
 527                                         do_gettimeofday(&tv);
 528                                         midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000);
 529                                         memcpy(timeptr, &midtime, sizeof(__u32));
 530                                         opt->is_changed = 1;
 531                                 }
 532                         } else {
 533                                 struct timestamp * ts = (struct timestamp*)(optptr+1);
 534                                 if (ts->overflow == 15) {
 535                                         pp_ptr = optptr + 3;
 536                                         break;
 537                                 }
 538                                 opt->ts = optptr - iph;
 539                                 if (skb) {
 540                                         ts->overflow++;
 541                                         opt->is_changed = 1;
 542                                 }
 543                         }
 544                         break;
 545                       case IPOPT_SEC:
 546                       case IPOPT_SID:
 547                       default:
 548                         if (!skb) {
 549                                 pp_ptr = optptr;
 550                                 break;
 551                         }
 552                         break;
 553                 }
 554                 l -= optlen;
 555                 optptr += optlen;
 556         }
 557 
 558 eol:
 559         if (!pp_ptr)
 560           return 0;
 561 
 562         if (skb) {
 563                 icmp_send(skb, ICMP_PARAMETERPROB, 0, pp_ptr-iph, skb->dev);
 564                 kfree_skb(skb, FREE_READ);
 565         }
 566         return -EINVAL;
 567 }
 568 
 569 /*
 570  *      Handle the issuing of an ioctl() request
 571  *      for the ip device. This is scheduled to
 572  *      disappear
 573  */
 574 
 575 int ip_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 576 {
 577         switch(cmd)
 578         {
 579                 default:
 580                         return(-EINVAL);
 581         }
 582 }
 583 
 584 
 585 /*
 586  *      Take an skb, and fill in the MAC header.
 587  */
 588 
 589 static int ip_send(struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
     /* [previous][next][first][last][top][bottom][index][help] */
 590 {
 591         int mac = 0;
 592 
 593         skb->dev = dev;
 594         skb->arp = 1;
 595         if (dev->hard_header)
 596         {
 597                 /*
 598                  *      Build a hardware header. Source address is our mac, destination unknown
 599                  *      (rebuild header will sort this out)
 600                  */
 601                 skb_reserve(skb,(dev->hard_header_len+15)&~15); /* 16 byte aligned IP headers are good */
 602                 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
 603                 if (mac < 0)
 604                 {
 605                         mac = -mac;
 606                         skb->arp = 0;
 607                         skb->raddr = daddr;     /* next routing address */
 608                 }
 609         }
 610         return mac;
 611 }
 612 
 613 static int ip_send_room(struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
     /* [previous][next][first][last][top][bottom][index][help] */
 614 {
 615         int mac = 0;
 616 
 617         skb->dev = dev;
 618         skb->arp = 1;
 619         if (dev->hard_header)
 620         {
 621                 skb_reserve(skb,MAX_HEADER);
 622                 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
 623                 if (mac < 0)
 624                 {
 625                         mac = -mac;
 626                         skb->arp = 0;
 627                         skb->raddr = daddr;     /* next routing address */
 628                 }
 629         }
 630         return mac;
 631 }
 632 
 633 int ip_id_count = 0;
 634 
 635 /*
 636  * This routine builds the appropriate hardware/IP headers for
 637  * the routine.  It assumes that if *dev != NULL then the
 638  * protocol knows what it's doing, otherwise it uses the
 639  * routing/ARP tables to select a device struct.
 640  */
 641 int ip_build_header(struct sk_buff *skb, __u32 saddr, __u32 daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 642                 struct device **dev, int type, struct options *opt, int len, int tos, int ttl)
 643 {
 644         struct rtable *rt;
 645         __u32 raddr;
 646         int tmp;
 647         __u32 src;
 648         struct iphdr *iph;
 649         __u32 final_daddr = daddr;
 650 
 651         if (opt && opt->srr)
 652           daddr = opt->faddr;
 653 
 654         /*
 655          *      See if we need to look up the device.
 656          */
 657 
 658 #ifdef CONFIG_IP_MULTICAST      
 659         if(MULTICAST(daddr) && *dev==NULL && skb->sk && *skb->sk->ip_mc_name)
 660                 *dev=dev_get(skb->sk->ip_mc_name);
 661 #endif
 662         if (*dev == NULL)
 663         {
 664                 if(skb->localroute)
 665                         rt = ip_rt_local(daddr, NULL, &src);
 666                 else
 667                         rt = ip_rt_route(daddr, NULL, &src);
 668                 if (rt == NULL)
 669                 {
 670                         ip_statistics.IpOutNoRoutes++;
 671                         return(-ENETUNREACH);
 672                 }
 673 
 674                 *dev = rt->rt_dev;
 675                 /*
 676                  *      If the frame is from us and going off machine it MUST MUST MUST
 677                  *      have the output device ip address and never the loopback
 678                  */
 679                 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
 680                         saddr = src;/*rt->rt_dev->pa_addr;*/
 681                 raddr = rt->rt_gateway;
 682 
 683         }
 684         else
 685         {
 686                 /*
 687                  *      We still need the address of the first hop.
 688                  */
 689                 if(skb->localroute)
 690                         rt = ip_rt_local(daddr, NULL, &src);
 691                 else
 692                         rt = ip_rt_route(daddr, NULL, &src);
 693                 /*
 694                  *      If the frame is from us and going off machine it MUST MUST MUST
 695                  *      have the output device ip address and never the loopback
 696                  */
 697                 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
 698                         saddr = src;/*rt->rt_dev->pa_addr;*/
 699 
 700                 raddr = (rt == NULL) ? 0 : rt->rt_gateway;
 701         }
 702 
 703         /*
 704          *      No source addr so make it our addr
 705          */
 706         if (saddr == 0)
 707                 saddr = src;
 708 
 709         /*
 710          *      No gateway so aim at the real destination
 711          */
 712         if (raddr == 0)
 713                 raddr = daddr;
 714 
 715         /*
 716          *      Now build the MAC header.
 717          */
 718 
 719         if(type==IPPROTO_TCP)
 720                 tmp = ip_send_room(skb, raddr, len, *dev, saddr);
 721         else
 722                 tmp = ip_send(skb, raddr, len, *dev, saddr);
 723 
 724         /*
 725          *      Book keeping
 726          */
 727 
 728         skb->dev = *dev;
 729         skb->saddr = saddr;
 730 
 731         /*
 732          *      Now build the IP header.
 733          */
 734 
 735         /*
 736          *      If we are using IPPROTO_RAW, then we don't need an IP header, since
 737          *      one is being supplied to us by the user
 738          */
 739 
 740         if(type == IPPROTO_RAW)
 741                 return (tmp);
 742 
 743         /*
 744          *      Build the IP addresses
 745          */
 746          
 747         if (opt)
 748           iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr) + opt->optlen);
 749         else
 750           iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr));
 751 
 752         iph->version  = 4;
 753         iph->ihl      = 5;
 754         iph->tos      = tos;
 755         iph->frag_off = 0;
 756         iph->ttl      = ttl;
 757         iph->daddr    = daddr;
 758         iph->saddr    = saddr;
 759         iph->protocol = type;
 760         skb->ip_hdr   = iph;
 761 
 762         if (!opt || !opt->optlen)
 763           return sizeof(struct iphdr) + tmp;
 764         if (opt->is_strictroute && rt && rt->rt_gateway) {
 765           ip_statistics.IpOutNoRoutes++;
 766           return -ENETUNREACH;
 767         }
 768         iph->ihl += opt->optlen>>2;
 769         ip_options_build(skb, opt, final_daddr, (*dev)->pa_addr, 0);
 770         return iph->ihl*4 + tmp;
 771 }
 772 
 773 
 774 /*
 775  *      Generate a checksum for an outgoing IP datagram.
 776  */
 777 
 778 void ip_send_check(struct iphdr *iph)
     /* [previous][next][first][last][top][bottom][index][help] */
 779 {
 780         iph->check = 0;
 781         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 782 }
 783 
 784 
 785 /************************ Fragment Handlers From NET2E **********************************/
 786 
 787 
 788 /*
 789  *      This fragment handler is a bit of a heap. On the other hand it works quite
 790  *      happily and handles things quite well.
 791  */
 792 
 793 static struct ipq *ipqueue = NULL;              /* IP fragment queue    */
 794 
 795 /*
 796  *      Create a new fragment entry.
 797  */
 798 
 799 static struct ipfrag *ip_frag_create(int offset, int end, struct sk_buff *skb, unsigned char *ptr)
     /* [previous][next][first][last][top][bottom][index][help] */
 800 {
 801         struct ipfrag *fp;
 802 
 803         fp = (struct ipfrag *) kmalloc(sizeof(struct ipfrag), GFP_ATOMIC);
 804         if (fp == NULL)
 805         {
 806                 NETDEBUG(printk("IP: frag_create: no memory left !\n"));
 807                 return(NULL);
 808         }
 809         memset(fp, 0, sizeof(struct ipfrag));
 810 
 811         /* Fill in the structure. */
 812         fp->offset = offset;
 813         fp->end = end;
 814         fp->len = end - offset;
 815         fp->skb = skb;
 816         fp->ptr = ptr;
 817 
 818         return(fp);
 819 }
 820 
 821 
 822 /*
 823  *      Find the correct entry in the "incomplete datagrams" queue for
 824  *      this IP datagram, and return the queue entry address if found.
 825  */
 826 
 827 static struct ipq *ip_find(struct iphdr *iph)
     /* [previous][next][first][last][top][bottom][index][help] */
 828 {
 829         struct ipq *qp;
 830         struct ipq *qplast;
 831 
 832         cli();
 833         qplast = NULL;
 834         for(qp = ipqueue; qp != NULL; qplast = qp, qp = qp->next)
 835         {
 836                 if (iph->id== qp->iph->id && iph->saddr == qp->iph->saddr &&
 837                         iph->daddr == qp->iph->daddr && iph->protocol == qp->iph->protocol)
 838                 {
 839                         del_timer(&qp->timer);  /* So it doesn't vanish on us. The timer will be reset anyway */
 840                         sti();
 841                         return(qp);
 842                 }
 843         }
 844         sti();
 845         return(NULL);
 846 }
 847 
 848 
 849 /*
 850  *      Remove an entry from the "incomplete datagrams" queue, either
 851  *      because we completed, reassembled and processed it, or because
 852  *      it timed out.
 853  */
 854 
 855 static void ip_free(struct ipq *qp)
     /* [previous][next][first][last][top][bottom][index][help] */
 856 {
 857         struct ipfrag *fp;
 858         struct ipfrag *xp;
 859 
 860         /*
 861          * Stop the timer for this entry.
 862          */
 863 
 864         del_timer(&qp->timer);
 865 
 866         /* Remove this entry from the "incomplete datagrams" queue. */
 867         cli();
 868         if (qp->prev == NULL)
 869         {
 870                 ipqueue = qp->next;
 871                 if (ipqueue != NULL)
 872                         ipqueue->prev = NULL;
 873         }
 874         else
 875         {
 876                 qp->prev->next = qp->next;
 877                 if (qp->next != NULL)
 878                         qp->next->prev = qp->prev;
 879         }
 880 
 881         /* Release all fragment data. */
 882 
 883         fp = qp->fragments;
 884         while (fp != NULL)
 885         {
 886                 xp = fp->next;
 887                 IS_SKB(fp->skb);
 888                 kfree_skb(fp->skb,FREE_READ);
 889                 kfree_s(fp, sizeof(struct ipfrag));
 890                 fp = xp;
 891         }
 892 
 893         /* Release the IP header. */
 894         kfree_s(qp->iph, 64 + 8);
 895 
 896         /* Finally, release the queue descriptor itself. */
 897         kfree_s(qp, sizeof(struct ipq));
 898         sti();
 899 }
 900 
 901 
 902 /*
 903  *      Oops- a fragment queue timed out.  Kill it and send an ICMP reply.
 904  */
 905 
 906 static void ip_expire(unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 907 {
 908         struct ipq *qp;
 909 
 910         qp = (struct ipq *)arg;
 911 
 912         /*
 913          *      Send an ICMP "Fragment Reassembly Timeout" message.
 914          */
 915 
 916         ip_statistics.IpReasmTimeout++;
 917         ip_statistics.IpReasmFails++;   
 918         /* This if is always true... shrug */
 919         if(qp->fragments!=NULL)
 920                 icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED,
 921                                 ICMP_EXC_FRAGTIME, 0, qp->dev);
 922 
 923         /*
 924          *      Nuke the fragment queue.
 925          */
 926         ip_free(qp);
 927 }
 928 
 929 
 930 /*
 931  *      Add an entry to the 'ipq' queue for a newly received IP datagram.
 932  *      We will (hopefully :-) receive all other fragments of this datagram
 933  *      in time, so we just create a queue for this datagram, in which we
 934  *      will insert the received fragments at their respective positions.
 935  */
 936 
 937 static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 938 {
 939         struct ipq *qp;
 940         int ihlen;
 941 
 942         qp = (struct ipq *) kmalloc(sizeof(struct ipq), GFP_ATOMIC);
 943         if (qp == NULL)
 944         {
 945                 NETDEBUG(printk("IP: create: no memory left !\n"));
 946                 return(NULL);
 947                 skb->dev = qp->dev;
 948         }
 949         memset(qp, 0, sizeof(struct ipq));
 950 
 951         /*
 952          *      Allocate memory for the IP header (plus 8 octets for ICMP).
 953          */
 954 
 955         ihlen = iph->ihl * 4;
 956         qp->iph = (struct iphdr *) kmalloc(64 + 8, GFP_ATOMIC);
 957         if (qp->iph == NULL)
 958         {
 959                 NETDEBUG(printk("IP: create: no memory left !\n"));
 960                 kfree_s(qp, sizeof(struct ipq));
 961                 return(NULL);
 962         }
 963 
 964         memcpy(qp->iph, iph, ihlen + 8);
 965         qp->len = 0;
 966         qp->ihlen = ihlen;
 967         qp->fragments = NULL;
 968         qp->dev = dev;
 969 
 970         /* Start a timer for this entry. */
 971         qp->timer.expires = jiffies + IP_FRAG_TIME;     /* about 30 seconds     */
 972         qp->timer.data = (unsigned long) qp;            /* pointer to queue     */
 973         qp->timer.function = ip_expire;                 /* expire function      */
 974         add_timer(&qp->timer);
 975 
 976         /* Add this entry to the queue. */
 977         qp->prev = NULL;
 978         cli();
 979         qp->next = ipqueue;
 980         if (qp->next != NULL)
 981                 qp->next->prev = qp;
 982         ipqueue = qp;
 983         sti();
 984         return(qp);
 985 }
 986 
 987 
 988 /*
 989  *      See if a fragment queue is complete.
 990  */
 991 
 992 static int ip_done(struct ipq *qp)
     /* [previous][next][first][last][top][bottom][index][help] */
 993 {
 994         struct ipfrag *fp;
 995         int offset;
 996 
 997         /* Only possible if we received the final fragment. */
 998         if (qp->len == 0)
 999                 return(0);
1000 
1001         /* Check all fragment offsets to see if they connect. */
1002         fp = qp->fragments;
1003         offset = 0;
1004         while (fp != NULL)
1005         {
1006                 if (fp->offset > offset)
1007                         return(0);      /* fragment(s) missing */
1008                 offset = fp->end;
1009                 fp = fp->next;
1010         }
1011 
1012         /* All fragments are present. */
1013         return(1);
1014 }
1015 
1016 
1017 /*
1018  *      Build a new IP datagram from all its fragments.
1019  *
1020  *      FIXME: We copy here because we lack an effective way of handling lists
1021  *      of bits on input. Until the new skb data handling is in I'm not going
1022  *      to touch this with a bargepole. 
1023  */
1024 
1025 static struct sk_buff *ip_glue(struct ipq *qp)
     /* [previous][next][first][last][top][bottom][index][help] */
1026 {
1027         struct sk_buff *skb;
1028         struct iphdr *iph;
1029         struct ipfrag *fp;
1030         unsigned char *ptr;
1031         int count, len;
1032 
1033         /*
1034          *      Allocate a new buffer for the datagram.
1035          */
1036         len = qp->ihlen + qp->len;
1037 
1038         if ((skb = dev_alloc_skb(len)) == NULL)
1039         {
1040                 ip_statistics.IpReasmFails++;
1041                 NETDEBUG(printk("IP: queue_glue: no memory for gluing queue %p\n", qp));
1042                 ip_free(qp);
1043                 return(NULL);
1044         }
1045 
1046         /* Fill in the basic details. */
1047         skb_put(skb,len);
1048         skb->h.raw = skb->data;
1049         skb->free = 1;
1050 
1051         /* Copy the original IP headers into the new buffer. */
1052         ptr = (unsigned char *) skb->h.raw;
1053         memcpy(ptr, ((unsigned char *) qp->iph), qp->ihlen);
1054         ptr += qp->ihlen;
1055 
1056         count = 0;
1057 
1058         /* Copy the data portions of all fragments into the new buffer. */
1059         fp = qp->fragments;
1060         while(fp != NULL)
1061         {
1062                 if(count+fp->len > skb->len)
1063                 {
1064                         NETDEBUG(printk("Invalid fragment list: Fragment over size.\n"));
1065                         ip_free(qp);
1066                         kfree_skb(skb,FREE_WRITE);
1067                         ip_statistics.IpReasmFails++;
1068                         return NULL;
1069                 }
1070                 memcpy((ptr + fp->offset), fp->ptr, fp->len);
1071                 count += fp->len;
1072                 fp = fp->next;
1073         }
1074 
1075         /* We glued together all fragments, so remove the queue entry. */
1076         ip_free(qp);
1077 
1078         /* Done with all fragments. Fixup the new IP header. */
1079         iph = skb->h.iph;
1080         iph->frag_off = 0;
1081         iph->tot_len = htons((iph->ihl * 4) + count);
1082         skb->ip_hdr = iph;
1083 
1084         ip_statistics.IpReasmOKs++;
1085         return(skb);
1086 }
1087 
1088 
1089 /*
1090  *      Process an incoming IP datagram fragment.
1091  */
1092 
1093 static struct sk_buff *ip_defrag(struct iphdr *iph, struct sk_buff *skb, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
1094 {
1095         struct ipfrag *prev, *next, *tmp;
1096         struct ipfrag *tfp;
1097         struct ipq *qp;
1098         struct sk_buff *skb2;
1099         unsigned char *ptr;
1100         int flags, offset;
1101         int i, ihl, end;
1102 
1103         ip_statistics.IpReasmReqds++;
1104 
1105         /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */
1106         qp = ip_find(iph);
1107 
1108         /* Is this a non-fragmented datagram? */
1109         offset = ntohs(iph->frag_off);
1110         flags = offset & ~IP_OFFSET;
1111         offset &= IP_OFFSET;
1112         if (((flags & IP_MF) == 0) && (offset == 0))
1113         {
1114                 if (qp != NULL)
1115                         ip_free(qp);    /* Huh? How could this exist?? */
1116                 return(skb);
1117         }
1118 
1119         offset <<= 3;           /* offset is in 8-byte chunks */
1120         ihl = iph->ihl * 4;
1121 
1122         /*
1123          * If the queue already existed, keep restarting its timer as long
1124          * as we still are receiving fragments.  Otherwise, create a fresh
1125          * queue entry.
1126          */
1127 
1128         if (qp != NULL)
1129         {
1130                 /* ANK. If the first fragment is received,
1131                  * we should remember the correct IP header (with options)
1132                  */
1133                 if (offset == 0)
1134                 {
1135                         qp->ihlen = ihl;
1136                         memcpy(qp->iph, iph, ihl+8);
1137                 }
1138                 del_timer(&qp->timer);
1139                 qp->timer.expires = jiffies + IP_FRAG_TIME;     /* about 30 seconds */
1140                 qp->timer.data = (unsigned long) qp;    /* pointer to queue */
1141                 qp->timer.function = ip_expire;         /* expire function */
1142                 add_timer(&qp->timer);
1143         }
1144         else
1145         {
1146                 /*
1147                  *      If we failed to create it, then discard the frame
1148                  */
1149                 if ((qp = ip_create(skb, iph, dev)) == NULL)
1150                 {
1151                         skb->sk = NULL;
1152                         kfree_skb(skb, FREE_READ);
1153                         ip_statistics.IpReasmFails++;
1154                         return NULL;
1155                 }
1156         }
1157 
1158         /*
1159          *      Determine the position of this fragment.
1160          */
1161 
1162         end = offset + ntohs(iph->tot_len) - ihl;
1163 
1164         /*
1165          *      Point into the IP datagram 'data' part.
1166          */
1167 
1168         ptr = skb->data + ihl;
1169 
1170         /*
1171          *      Is this the final fragment?
1172          */
1173 
1174         if ((flags & IP_MF) == 0)
1175                 qp->len = end;
1176 
1177         /*
1178          *      Find out which fragments are in front and at the back of us
1179          *      in the chain of fragments so far.  We must know where to put
1180          *      this fragment, right?
1181          */
1182 
1183         prev = NULL;
1184         for(next = qp->fragments; next != NULL; next = next->next)
1185         {
1186                 if (next->offset > offset)
1187                         break;  /* bingo! */
1188                 prev = next;
1189         }
1190 
1191         /*
1192          *      We found where to put this one.
1193          *      Check for overlap with preceding fragment, and, if needed,
1194          *      align things so that any overlaps are eliminated.
1195          */
1196         if (prev != NULL && offset < prev->end)
1197         {
1198                 i = prev->end - offset;
1199                 offset += i;    /* ptr into datagram */
1200                 ptr += i;       /* ptr into fragment data */
1201         }
1202 
1203         /*
1204          * Look for overlap with succeeding segments.
1205          * If we can merge fragments, do it.
1206          */
1207 
1208         for(tmp=next; tmp != NULL; tmp = tfp)
1209         {
1210                 tfp = tmp->next;
1211                 if (tmp->offset >= end)
1212                         break;          /* no overlaps at all */
1213 
1214                 i = end - next->offset;                 /* overlap is 'i' bytes */
1215                 tmp->len -= i;                          /* so reduce size of    */
1216                 tmp->offset += i;                       /* next fragment        */
1217                 tmp->ptr += i;
1218                 /*
1219                  *      If we get a frag size of <= 0, remove it and the packet
1220                  *      that it goes with.
1221                  */
1222                 if (tmp->len <= 0)
1223                 {
1224                         if (tmp->prev != NULL)
1225                                 tmp->prev->next = tmp->next;
1226                         else
1227                                 qp->fragments = tmp->next;
1228 
1229                         if (tfp->next != NULL)
1230                                 tmp->next->prev = tmp->prev;
1231                         
1232                         next=tfp;       /* We have killed the original next frame */
1233 
1234                         kfree_skb(tmp->skb,FREE_READ);
1235                         kfree_s(tmp, sizeof(struct ipfrag));
1236                 }
1237         }
1238 
1239         /*
1240          *      Insert this fragment in the chain of fragments.
1241          */
1242 
1243         tfp = NULL;
1244         tfp = ip_frag_create(offset, end, skb, ptr);
1245 
1246         /*
1247          *      No memory to save the fragment - so throw the lot
1248          */
1249 
1250         if (!tfp)
1251         {
1252                 skb->sk = NULL;
1253                 kfree_skb(skb, FREE_READ);
1254                 return NULL;
1255         }
1256         tfp->prev = prev;
1257         tfp->next = next;
1258         if (prev != NULL)
1259                 prev->next = tfp;
1260         else
1261                 qp->fragments = tfp;
1262 
1263         if (next != NULL)
1264                 next->prev = tfp;
1265 
1266         /*
1267          *      OK, so we inserted this new fragment into the chain.
1268          *      Check if we now have a full IP datagram which we can
1269          *      bump up to the IP layer...
1270          */
1271 
1272         if (ip_done(qp))
1273         {
1274                 skb2 = ip_glue(qp);             /* glue together the fragments */
1275                 return(skb2);
1276         }
1277         return(NULL);
1278 }
1279 
1280 
1281 /*
1282  *      This IP datagram is too large to be sent in one piece.  Break it up into
1283  *      smaller pieces (each of size equal to the MAC header plus IP header plus
1284  *      a block of the data of the original IP data part) that will yet fit in a
1285  *      single device frame, and queue such a frame for sending by calling the
1286  *      ip_queue_xmit().  Note that this is recursion, and bad things will happen
1287  *      if this function causes a loop...
1288  *
1289  *      Yes this is inefficient, feel free to submit a quicker one.
1290  *
1291  */
1292  
1293 static void ip_fragment(struct sock *sk, struct sk_buff *skb, struct device *dev, int is_frag)
     /* [previous][next][first][last][top][bottom][index][help] */
1294 {
1295         struct iphdr *iph;
1296         unsigned char *raw;
1297         unsigned char *ptr;
1298         struct sk_buff *skb2;
1299         int left, mtu, hlen, len;
1300         int offset;
1301         unsigned long flags;
1302 
1303         /*
1304          *      Point into the IP datagram header.
1305          */
1306 
1307         raw = skb->data;
1308 #if 0
1309         iph = (struct iphdr *) (raw + dev->hard_header_len);    
1310         skb->ip_hdr = iph;
1311 #else
1312         iph = skb->ip_hdr;
1313 #endif
1314 
1315         /*
1316          *      Setup starting values.
1317          */
1318 
1319         hlen = iph->ihl * 4;
1320         left = ntohs(iph->tot_len) - hlen;      /* Space per frame */
1321         hlen += dev->hard_header_len;           /* Total header size */
1322         mtu = (dev->mtu - hlen);                /* Size of data space */
1323         ptr = (raw + hlen);                     /* Where to start from */
1324 
1325         /*
1326          *      Check for any "DF" flag. [DF means do not fragment]
1327          */
1328 
1329         if (ntohs(iph->frag_off) & IP_DF)
1330         {
1331                 ip_statistics.IpFragFails++;
1332                 printk("ip_queue_xmit: frag needed\n");
1333                 return;
1334         }
1335 
1336         /*
1337          *      The protocol doesn't seem to say what to do in the case that the
1338          *      frame + options doesn't fit the mtu. As it used to fall down dead
1339          *      in this case we were fortunate it didn't happen
1340          */
1341 
1342         if(mtu<8)
1343         {
1344                 /* It's wrong but it's better than nothing */
1345                 icmp_send(skb,ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED,dev->mtu, dev);
1346                 ip_statistics.IpFragFails++;
1347                 return;
1348         }
1349 
1350         /*
1351          *      Fragment the datagram.
1352          */
1353 
1354         /*
1355          *      The initial offset is 0 for a complete frame. When
1356          *      fragmenting fragments it's wherever this one starts.
1357          */
1358 
1359         if (is_frag & 2)
1360                 offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
1361         else
1362                 offset = 0;
1363 
1364 
1365         /*
1366          *      Keep copying data until we run out.
1367          */
1368 
1369         while(left > 0)
1370         {
1371                 len = left;
1372                 /* IF: it doesn't fit, use 'mtu' - the data space left */
1373                 if (len > mtu)
1374                         len = mtu;
1375                 /* IF: we are not sending upto and including the packet end
1376                    then align the next start on an eight byte boundary */
1377                 if (len < left)
1378                 {
1379                         len/=8;
1380                         len*=8;
1381                 }
1382                 /*
1383                  *      Allocate buffer.
1384                  */
1385 
1386                 if ((skb2 = alloc_skb(len + hlen+15,GFP_ATOMIC)) == NULL)
1387                 {
1388                         NETDEBUG(printk("IP: frag: no memory for new fragment!\n"));
1389                         ip_statistics.IpFragFails++;
1390                         return;
1391                 }
1392 
1393                 /*
1394                  *      Set up data on packet
1395                  */
1396 
1397                 skb2->arp = skb->arp;
1398                 if(skb->free==0)
1399                         printk("IP fragmenter: BUG free!=1 in fragmenter\n");
1400                 skb2->free = 1;
1401                 skb_put(skb2,len + hlen);
1402                 skb2->h.raw=(char *) skb2->data;
1403                 /*
1404                  *      Charge the memory for the fragment to any owner
1405                  *      it might possess
1406                  */
1407 
1408                 save_flags(flags);
1409                 if (sk)
1410                 {
1411                         cli();
1412                         sk->wmem_alloc += skb2->truesize;
1413                         skb2->sk=sk;
1414                 }
1415                 restore_flags(flags);
1416                 skb2->raddr = skb->raddr;       /* For rebuild_header - must be here */
1417 
1418                 /*
1419                  *      Copy the packet header into the new buffer.
1420                  */
1421 
1422                 memcpy(skb2->h.raw, raw, hlen);
1423 
1424                 /*
1425                  *      Copy a block of the IP datagram.
1426                  */
1427                 memcpy(skb2->h.raw + hlen, ptr, len);
1428                 left -= len;
1429 
1430                 skb2->h.raw+=dev->hard_header_len;
1431 
1432                 /*
1433                  *      Fill in the new header fields.
1434                  */
1435                 iph = (struct iphdr *)(skb2->h.raw/*+dev->hard_header_len*/);
1436                 iph->frag_off = htons((offset >> 3));
1437                 skb2->ip_hdr = iph;
1438 
1439                 /* ANK: dirty, but effective trick. Upgrade options only if
1440                  * the segment to be fragmented was THE FIRST (otherwise,
1441                  * options are already fixed) and make it ONCE
1442                  * on the initial skb, so that all the following fragments
1443                  * will inherit fixed options.
1444                  */
1445                 if (offset == 0)
1446                   ip_options_fragment(skb);
1447 
1448                 /*
1449                  *      Added AC : If we are fragmenting a fragment thats not the
1450                  *                 last fragment then keep MF on each bit
1451                  */
1452                 if (left > 0 || (is_frag & 1))
1453                         iph->frag_off |= htons(IP_MF);
1454                 ptr += len;
1455                 offset += len;
1456 
1457                 /*
1458                  *      Put this fragment into the sending queue.
1459                  */
1460 
1461                 ip_statistics.IpFragCreates++;
1462 
1463                 ip_queue_xmit(sk, dev, skb2, 2);
1464         }
1465         ip_statistics.IpFragOKs++;
1466 }
1467 
1468 
1469 
1470 #ifdef CONFIG_IP_FORWARD
1471 #ifdef CONFIG_IP_MROUTE
1472 
1473 /*
1474  *      Encapsulate a packet by attaching a valid IPIP header to it.
1475  *      This avoids tunnel drivers and other mess and gives us the speed so
1476  *      important for multicast video.
1477  */
1478  
1479 static void ip_encap(struct sk_buff *skb, int len, struct device *out, __u32 daddr)
     /* [previous][next][first][last][top][bottom][index][help] */
1480 {
1481         /*
1482          *      There is space for the IPIP header and MAC left.
1483          *
1484          *      Firstly push down and install the IPIP header.
1485          */
1486         struct iphdr *iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
1487         if(len>65515)
1488                 len=65515;
1489         iph->version    =       4;
1490         iph->tos        =       skb->ip_hdr->tos;
1491         iph->ttl        =       skb->ip_hdr->ttl;
1492         iph->frag_off   =       0;
1493         iph->daddr      =       daddr;
1494         iph->saddr      =       out->pa_addr;
1495         iph->protocol   =       IPPROTO_IPIP;
1496         iph->ihl        =       5;
1497         iph->tot_len    =       htons(skb->len);
1498         iph->id         =       htons(ip_id_count++);
1499         ip_send_check(iph);
1500 
1501         skb->dev = out;
1502         skb->arp = 1;
1503         skb->raddr=daddr;
1504         /*
1505          *      Now add the physical header (driver will push it down).
1506          */
1507         if (out->hard_header && out->hard_header(skb, out, ETH_P_IP, NULL, NULL, len)<0)
1508                         skb->arp=0;
1509         /*
1510          *      Read to queue for transmission.
1511          */
1512 }
1513 
1514 #endif
1515 
1516 /*
1517  *      Forward an IP datagram to its next destination.
1518  */
1519 
1520 int ip_forward(struct sk_buff *skb, struct device *dev, int is_frag,
     /* [previous][next][first][last][top][bottom][index][help] */
1521                __u32 target_addr)
1522 {
1523         struct device *dev2;    /* Output device */
1524         struct iphdr *iph;      /* Our header */
1525         struct sk_buff *skb2;   /* Output packet */
1526         struct rtable *rt;      /* Route we use */
1527         unsigned char *ptr;     /* Data pointer */
1528         unsigned long raddr;    /* Router IP address */
1529         struct   options * opt  = (struct options*)skb->proto_priv;
1530         int encap = 0;          /* Encap length */
1531 #ifdef CONFIG_IP_FIREWALL
1532         int fw_res = 0;         /* Forwarding result */ 
1533 #ifdef CONFIG_IP_MASQUERADE     
1534         struct sk_buff *skb_in = skb;   /* So we can remember if the masquerader did some swaps */
1535 #endif
1536         
1537         /* 
1538          *      See if we are allowed to forward this.
1539          *      Note: demasqueraded fragments are always 'back'warded.
1540          */
1541 
1542         
1543         if(!(is_frag&4))
1544         {
1545                 fw_res=ip_fw_chk(skb->h.iph, dev, ip_fw_fwd_chain, ip_fw_fwd_policy, 0);
1546                 switch (fw_res) {
1547                 case FW_ACCEPT:
1548 #ifdef CONFIG_IP_MASQUERADE
1549                 case FW_MASQUERADE:
1550 #endif
1551                         break;
1552                 case FW_REJECT:
1553                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, dev);
1554                         /* fall thru */
1555                 default:
1556                         return -1;
1557                 }
1558         }
1559 #endif
1560         /*
1561          *      According to the RFC, we must first decrease the TTL field. If
1562          *      that reaches zero, we must reply an ICMP control message telling
1563          *      that the packet's lifetime expired.
1564          *
1565          *      Exception:
1566          *      We may not generate an ICMP for an ICMP. icmp_send does the
1567          *      enforcement of this so we can forget it here. It is however
1568          *      sometimes VERY important.
1569          */
1570 
1571         iph = skb->h.iph;
1572         iph->ttl--;
1573 
1574         /*
1575          *      Re-compute the IP header checksum.
1576          *      This is inefficient. We know what has happened to the header
1577          *      and could thus adjust the checksum as Phil Karn does in KA9Q
1578          */
1579 
1580         iph->check = ntohs(iph->check) + 0x0100;
1581         if ((iph->check & 0xFF00) == 0)
1582                 iph->check++;           /* carry overflow */
1583         iph->check = htons(iph->check);
1584 
1585         if (iph->ttl <= 0)
1586         {
1587                 /* Tell the sender its packet died... */
1588                 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0, dev);
1589                 return -1;
1590         }
1591 
1592 #ifdef CONFIG_IP_MROUTE
1593         if(!(is_frag&8))
1594         {
1595 #endif  
1596                 /*
1597                  * OK, the packet is still valid.  Fetch its destination address,
1598                  * and give it to the IP sender for further processing.
1599                  */
1600 
1601                 rt = ip_rt_route(target_addr, NULL, NULL);
1602                 if (rt == NULL)
1603                 {
1604                         /*
1605                          *      Tell the sender its packet cannot be delivered. Again
1606                          *      ICMP is screened later.
1607                          */
1608                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, 0, dev);
1609                         return -1;
1610                 }
1611         
1612         
1613                 /*
1614                  * Gosh.  Not only is the packet valid; we even know how to
1615                  * forward it onto its final destination.  Can we say this
1616                  * is being plain lucky?
1617                  * If the router told us that there is no GW, use the dest.
1618                  * IP address itself- we seem to be connected directly...
1619                  */
1620 
1621                 raddr = rt->rt_gateway;
1622         
1623                 if (raddr != 0)
1624                 {
1625                         /*
1626                          *      Strict routing permits no gatewaying
1627                          */
1628         
1629                         if (opt->is_strictroute)
1630                         {
1631                                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0, dev);
1632                                 return -1;
1633                         }
1634                 
1635                         /*
1636                          *      There is a gateway so find the correct route for it.
1637                          *      Gateways cannot in turn be gatewayed.
1638                          */
1639                 }
1640                 else
1641                         raddr = target_addr;
1642 
1643                 /*
1644                  *      Having picked a route we can now send the frame out.
1645                  */
1646 
1647                 dev2 = rt->rt_dev;
1648                 /*
1649                  *      In IP you never have to forward a frame on the interface that it 
1650                  *      arrived upon. We now generate an ICMP HOST REDIRECT giving the route
1651                  *      we calculated.
1652                  */
1653 #ifndef CONFIG_IP_NO_ICMP_REDIRECT
1654                 if (dev == dev2 && !((iph->saddr^iph->daddr)&dev->pa_mask) &&
1655                     (rt->rt_flags&RTF_MODIFIED) && !opt->srr)
1656                         icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, raddr, dev);
1657 #endif
1658 #ifdef CONFIG_IP_MROUTE
1659         }
1660         else
1661         {
1662                 /*
1663                  *      Multicast route forward. Routing is already done
1664                  */
1665                 dev2=skb->dev;
1666                 raddr=skb->raddr;
1667                 if(is_frag&16)          /* VIFF_TUNNEL mode */
1668                         encap=20;
1669         }
1670 #endif  
1671         
1672 
1673         /*
1674          * We now may allocate a new buffer, and copy the datagram into it.
1675          * If the indicated interface is up and running, kick it.
1676          */
1677 
1678         if (dev2->flags & IFF_UP)
1679         {
1680 #ifdef CONFIG_IP_MASQUERADE
1681                 /*
1682                  * If this fragment needs masquerading, make it so...
1683                  * (Dont masquerade de-masqueraded fragments)
1684                  */
1685                 if (!(is_frag&4) && fw_res==2)
1686                         ip_fw_masquerade(&skb, dev2);
1687 #endif
1688                 IS_SKB(skb);
1689 
1690                 if (skb->len+encap > dev2->mtu && (ntohs(iph->frag_off) & IP_DF)) {
1691                   ip_statistics.IpFragFails++;
1692                   icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, dev2->mtu, dev);
1693                   return -1;
1694                 }
1695 
1696 #ifdef CONFIG_IP_MROUTE
1697                 if(skb_headroom(skb)-encap<dev2->hard_header_len)
1698                 {
1699                         skb2 = alloc_skb(dev2->hard_header_len + skb->len + encap + 15, GFP_ATOMIC);
1700 #else
1701                 if(skb_headroom(skb)<dev2->hard_header_len)
1702                 {
1703                         skb2 = alloc_skb(dev2->hard_header_len + skb->len + 15, GFP_ATOMIC);
1704 #endif          
1705                         /*
1706                          *      This is rare and since IP is tolerant of network failures
1707                          *      quite harmless.
1708                          */
1709                 
1710                         if (skb2 == NULL)
1711                         {
1712                                 NETDEBUG(printk("\nIP: No memory available for IP forward\n"));
1713                                 return -1;
1714                         }
1715                 
1716                         IS_SKB(skb2);
1717                         /*
1718                          *      Add the physical headers.
1719                          */
1720 #ifdef CONFIG_IP_MROUTE
1721                         if(is_frag&16)
1722                         {
1723                                 skb_reserve(skb,(encap+dev->hard_header_len+15)&~15);   /* 16 byte aligned IP headers are good */
1724                                 ip_encap(skb2,skb->len, dev2, raddr);
1725                         }
1726                         else
1727 #endif                  
1728                                 ip_send(skb2,raddr,skb->len,dev2,dev2->pa_addr);
1729 
1730                         /*
1731                          *      We have to copy the bytes over as the new header wouldn't fit
1732                          *      the old buffer. This should be very rare.
1733                          */              
1734                         
1735                         ptr = skb_put(skb2,skb->len);
1736                         skb2->free = 1;
1737                         skb2->h.raw = ptr;
1738 
1739                         /*
1740                          *      Copy the packet data into the new buffer.
1741                          */
1742                         memcpy(ptr, skb->h.raw, skb->len);
1743                         memcpy(skb2->proto_priv, skb->proto_priv, sizeof(skb->proto_priv));
1744                         iph = skb2->ip_hdr = skb2->h.iph;
1745                 }
1746                 else
1747                 {
1748                         /* 
1749                          *      Build a new MAC header. 
1750                          */
1751 
1752                         skb2 = skb;             
1753                         skb2->dev=dev2;
1754 #ifdef CONFIG_IP_MROUTE
1755                         if(is_frag&16)
1756                                 ip_encap(skb,skb->len, dev2, raddr);
1757                         else
1758                         {
1759 #endif
1760                                 skb->arp=1;
1761                                 skb->raddr=raddr;
1762                                 if(dev2->hard_header)
1763                                 {
1764                                         if(dev2->hard_header(skb, dev2, ETH_P_IP, NULL, NULL, skb->len)<0)
1765                                                 skb->arp=0;
1766                                 }
1767 #ifdef CONFIG_IP_MROUTE
1768                         }                               
1769 #endif                  
1770                         ip_statistics.IpForwDatagrams++;
1771                 }
1772 
1773                 if (opt->optlen) {
1774                         unsigned char * optptr;
1775                         if (opt->rr_needaddr) {
1776                                 optptr = (unsigned char *)iph + opt->rr;
1777                                 memcpy(&optptr[optptr[2]-5], &dev2->pa_addr, 4);
1778                                 opt->is_changed = 1;
1779                         }
1780                         if (opt->srr_is_hit) {
1781                                 int srrptr, srrspace;
1782 
1783                                 optptr = (unsigned char *)iph + opt->srr;
1784 
1785                                 for ( srrptr=optptr[2], srrspace = optptr[1];
1786                                       srrptr <= srrspace;
1787                                      srrptr += 4
1788                                     ) {
1789                                         if (srrptr + 3 > srrspace)
1790                                           break;
1791                                         if (memcmp(&target_addr, &optptr[srrptr-1], 4) == 0)
1792                                           break;
1793                                 }
1794                                 if (srrptr + 3 <= srrspace) {
1795                                         opt->is_changed = 1;
1796                                         memcpy(&optptr[srrptr-1], &dev2->pa_addr, 4);
1797                                         iph->daddr = target_addr;
1798                                         optptr[2] = srrptr+4;
1799                                 } else
1800                                         printk("ip_forward(): Argh! Destination lost!\n");
1801                         }
1802                         if (opt->ts_needaddr) {
1803                                 optptr = (unsigned char *)iph + opt->ts;
1804                                 memcpy(&optptr[optptr[2]-9], &dev2->pa_addr, 4);
1805                                 opt->is_changed = 1;
1806                         }
1807                         if (opt->is_changed) {
1808                                 opt->is_changed = 0;
1809                                 ip_send_check(iph);
1810                         }
1811                 }
1812 /*
1813  * ANK:  this is point of "no return", we cannot send an ICMP,
1814  *       because we changed SRR option.
1815  */
1816 
1817                 /*
1818                  *      See if it needs fragmenting. Note in ip_rcv we tagged
1819                  *      the fragment type. This must be right so that
1820                  *      the fragmenter does the right thing.
1821                  */
1822 
1823                 if(skb2->len > dev2->mtu + dev2->hard_header_len)
1824                 {
1825                         ip_fragment(NULL,skb2,dev2, is_frag);
1826                         kfree_skb(skb2,FREE_WRITE);
1827                 }
1828                 else
1829                 {
1830 #ifdef CONFIG_IP_ACCT           
1831                         /*
1832                          *      Count mapping we shortcut
1833                          */
1834                          
1835                         ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
1836 #endif                  
1837                         
1838                         /*
1839                          *      Map service types to priority. We lie about
1840                          *      throughput being low priority, but it's a good
1841                          *      choice to help improve general usage.
1842                          */
1843                         if(iph->tos & IPTOS_LOWDELAY)
1844                                 dev_queue_xmit(skb2, dev2, SOPRI_INTERACTIVE);
1845                         else if(iph->tos & IPTOS_THROUGHPUT)
1846                                 dev_queue_xmit(skb2, dev2, SOPRI_BACKGROUND);
1847                         else
1848                                 dev_queue_xmit(skb2, dev2, SOPRI_NORMAL);
1849                 }
1850         }
1851         else
1852                 return -1;
1853         
1854         /*
1855          *      Tell the caller if their buffer is free.
1856          */      
1857          
1858         if(skb==skb2)
1859                 return 0;       
1860 
1861 #ifdef CONFIG_IP_MASQUERADE     
1862         /*
1863          *      The original is free. Free our copy and
1864          *      tell the caller not to free.
1865          */
1866         if(skb!=skb_in)
1867         {
1868                 kfree_skb(skb_in, FREE_WRITE);
1869                 return 0;
1870         }
1871 #endif  
1872         return 1;
1873 }
1874 
1875 
1876 #endif
1877 
1878 
1879 /*
1880  *      This function receives all incoming IP datagrams.
1881  *
1882  *      On entry skb->data points to the start of the IP header and
1883  *      the MAC header has been removed.
1884  */
1885 
1886 int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
     /* [previous][next][first][last][top][bottom][index][help] */
1887 {
1888         struct iphdr *iph = skb->h.iph;
1889         struct sock *raw_sk=NULL;
1890         unsigned char hash;
1891         unsigned char flag = 0;
1892         struct inet_protocol *ipprot;
1893         int brd=IS_MYADDR;
1894         struct options * opt = NULL;
1895         int is_frag=0;
1896 #ifdef CONFIG_IP_FIREWALL
1897         int err;
1898 #endif  
1899 #ifdef CONFIG_IP_MROUTE
1900         int mroute_pkt=0;
1901 #endif  
1902 
1903 #ifdef CONFIG_NET_IPV6
1904         /* 
1905          *      Intercept IPv6 frames. We dump ST-II and invalid types just below..
1906          */
1907          
1908         if(iph->version == 6)
1909                 return ipv6_rcv(skb,dev,pt);
1910 #endif          
1911 
1912         ip_statistics.IpInReceives++;
1913 
1914         /*
1915          *      Tag the ip header of this packet so we can find it
1916          */
1917 
1918         skb->ip_hdr = iph;
1919 
1920         /*
1921          *      RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum.
1922          *      RFC1122: 3.1.2.3 MUST discard a frame with invalid source address [NEEDS FIXING].
1923          *
1924          *      Is the datagram acceptable?
1925          *
1926          *      1.      Length at least the size of an ip header
1927          *      2.      Version of 4
1928          *      3.      Checksums correctly. [Speed optimisation for later, skip loopback checksums]
1929          *      4.      Doesn't have a bogus length
1930          *      (5.     We ought to check for IP multicast addresses and undefined types.. does this matter ?)
1931          */
1932 
1933         if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0
1934                 || skb->len < ntohs(iph->tot_len))
1935         {
1936                 ip_statistics.IpInHdrErrors++;
1937                 kfree_skb(skb, FREE_WRITE);
1938                 return(0);
1939         }
1940 
1941         /*
1942          *      Our transport medium may have padded the buffer out. Now we know it
1943          *      is IP we can trim to the true length of the frame.
1944          *      Note this now means skb->len holds ntohs(iph->tot_len).
1945          */
1946 
1947         skb_trim(skb,ntohs(iph->tot_len));
1948 
1949         if (iph->ihl > 5) {
1950                 skb->ip_summed = 0;
1951                 if (ip_options_compile(NULL, skb))
1952                         return(0);
1953                 opt = (struct options*)skb->proto_priv;
1954 #ifdef CONFIG_IP_NOSR
1955                 if (opt->srr) {
1956                         kfree_skb(skb, FREE_READ);
1957                         return -EINVAL;
1958                 }
1959 #endif                                  
1960         }
1961         
1962         /*
1963          *      See if the firewall wants to dispose of the packet. 
1964          */
1965 
1966 #ifdef  CONFIG_IP_FIREWALL
1967         
1968         if ((err=ip_fw_chk(iph,dev,ip_fw_blk_chain,ip_fw_blk_policy, 0))<FW_ACCEPT)
1969         {
1970                 if(err==FW_REJECT)
1971                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0, dev);
1972                 kfree_skb(skb, FREE_WRITE);
1973                 return 0;       
1974         }
1975 
1976 #endif
1977         
1978         /*
1979          *      Remember if the frame is fragmented.
1980          */
1981          
1982         if(iph->frag_off)
1983         {
1984                 if (iph->frag_off & htons(IP_MF))
1985                         is_frag|=1;
1986                 /*
1987                  *      Last fragment ?
1988                  */
1989         
1990                 if (iph->frag_off & htons(IP_OFFSET))
1991                         is_frag|=2;
1992         }
1993         
1994         /*
1995          *      Do any IP forwarding required.  chk_addr() is expensive -- avoid it someday.
1996          *
1997          *      This is inefficient. While finding out if it is for us we could also compute
1998          *      the routing table entry. This is where the great unified cache theory comes
1999          *      in as and when someone implements it
2000          *
2001          *      For most hosts over 99% of packets match the first conditional
2002          *      and don't go via ip_chk_addr. Note: brd is set to IS_MYADDR at
2003          *      function entry.
2004          */
2005 
2006         if ( iph->daddr == skb->dev->pa_addr || (brd = ip_chk_addr(iph->daddr)) != 0)
2007         {
2008                 if (opt && opt->srr) {
2009                         int srrspace, srrptr;
2010                         __u32 nexthop;
2011                         unsigned char * optptr = ((unsigned char *)iph) + opt->srr;
2012 
2013                         if (brd != IS_MYADDR || skb->pkt_type != PACKET_HOST) {
2014                                 kfree_skb(skb, FREE_WRITE);
2015                                 return 0;
2016                         }
2017 
2018                         for ( srrptr=optptr[2], srrspace = optptr[1];
2019                               srrptr <= srrspace;
2020                               srrptr += 4
2021                              ) 
2022                         {
2023                                 int brd2;
2024                                 if (srrptr + 3 > srrspace) 
2025                                 {
2026                                         icmp_send(skb, ICMP_PARAMETERPROB, 0, opt->srr+2,
2027                                                   skb->dev);
2028                                         kfree_skb(skb, FREE_WRITE);
2029                                         return 0;
2030                                 }
2031                                 memcpy(&nexthop, &optptr[srrptr-1], 4);
2032                                 if ((brd2 = ip_chk_addr(nexthop)) == 0)
2033                                         break;
2034                                 if (brd2 != IS_MYADDR) 
2035                                 {
2036 
2037                                         /*
2038                                          *      ANK: should we implement weak tunneling of multicasts?
2039                                          *      Are they obsolete? DVMRP specs (RFC-1075) is old enough...
2040                                          *      [They are obsolete]
2041                                          */
2042                                         kfree_skb(skb, FREE_WRITE);
2043                                         return -EINVAL;
2044                                 }
2045                         }
2046                         if (srrptr <= srrspace) 
2047                         {
2048                                 opt->srr_is_hit = 1;
2049                                 opt->is_changed = 1;
2050 #ifdef CONFIG_IP_FORWARD
2051                                 if (ip_forward(skb, dev, is_frag, nexthop))
2052                                         kfree_skb(skb, FREE_WRITE);
2053 #else
2054                                 ip_statistics.IpInAddrErrors++;
2055                                 kfree_skb(skb, FREE_WRITE);
2056 #endif
2057                                 return 0;
2058                         }
2059                 }
2060 
2061 #ifdef CONFIG_IP_MULTICAST      
2062                 if(!(dev->flags&IFF_ALLMULTI) && brd==IS_MULTICAST && iph->daddr!=IGMP_ALL_HOSTS && !(dev->flags&IFF_LOOPBACK))
2063                 {
2064                         /*
2065                          *      Check it is for one of our groups
2066                          */
2067                         struct ip_mc_list *ip_mc=dev->ip_mc_list;
2068                         do
2069                         {
2070                                 if(ip_mc==NULL)
2071                                 {       
2072                                         kfree_skb(skb, FREE_WRITE);
2073                                         return 0;
2074                                 }
2075                                 if(ip_mc->multiaddr==iph->daddr)
2076                                         break;
2077                                 ip_mc=ip_mc->next;
2078                         }
2079                         while(1);
2080                 }
2081 #endif
2082 
2083 #ifdef CONFIG_IP_MASQUERADE
2084                 /*
2085                  * Do we need to de-masquerade this fragment?
2086                  */
2087                 if (ip_fw_demasquerade(skb)) 
2088                 {
2089                         struct iphdr *iph=skb->h.iph;
2090                         if (ip_forward(skb, dev, is_frag|4, iph->daddr))
2091                                 kfree_skb(skb, FREE_WRITE);
2092                         return(0);
2093                 }
2094 #endif
2095 
2096                 /*
2097                  *      Account for the packet
2098                  */
2099  
2100 #ifdef CONFIG_IP_ACCT
2101                 ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
2102 #endif  
2103 
2104                 /*
2105                  *      Reassemble IP fragments.
2106                  */
2107 
2108                 if(is_frag)
2109                 {
2110                         /* Defragment. Obtain the complete packet if there is one */
2111                         skb=ip_defrag(iph,skb,dev);
2112                         if(skb==NULL)
2113                                 return 0;
2114                         skb->dev = dev;
2115                         iph=skb->h.iph;
2116                 }
2117 
2118                 /*
2119                  *      Point into the IP datagram, just past the header.
2120                  */
2121 
2122                 skb->ip_hdr = iph;
2123                 skb->h.raw += iph->ihl*4;
2124 
2125 #ifdef CONFIG_IP_MROUTE         
2126                 /*
2127                  *      Check the state on multicast routing (multicast and not 224.0.0.z)
2128                  */
2129                  
2130                 if(brd==IS_MULTICAST && (iph->daddr&htonl(0xFFFFFF00))!=htonl(0xE0000000))
2131                         mroute_pkt=1;
2132 
2133 #endif
2134                 /*
2135                  *      Deliver to raw sockets. This is fun as to avoid copies we want to make no surplus copies.
2136                  *
2137                  *      RFC 1122: SHOULD pass TOS value up to the transport layer.
2138                  */
2139  
2140                 hash = iph->protocol & (SOCK_ARRAY_SIZE-1);
2141 
2142                 /* 
2143                  *      If there maybe a raw socket we must check - if not we don't care less 
2144                  */
2145                  
2146                 if((raw_sk=raw_prot.sock_array[hash])!=NULL)
2147                 {
2148                         struct sock *sknext=NULL;
2149                         struct sk_buff *skb1;
2150                         raw_sk=get_sock_raw(raw_sk, iph->protocol,  iph->saddr, iph->daddr);
2151                         if(raw_sk)      /* Any raw sockets */
2152                         {
2153                                 do
2154                                 {
2155                                         /* Find the next */
2156                                         sknext=get_sock_raw(raw_sk->next, iph->protocol, iph->saddr, iph->daddr);
2157                                         if(sknext)
2158                                                 skb1=skb_clone(skb, GFP_ATOMIC);
2159                                         else
2160                                                 break;  /* One pending raw socket left */
2161                                         if(skb1)
2162                                                 raw_rcv(raw_sk, skb1, dev, iph->saddr,iph->daddr);
2163                                         raw_sk=sknext;
2164                                 }
2165                                 while(raw_sk!=NULL);
2166                                 
2167                                 /*
2168                                  *      Here either raw_sk is the last raw socket, or NULL if none 
2169                                  */
2170                                  
2171                                 /*
2172                                  *      We deliver to the last raw socket AFTER the protocol checks as it avoids a surplus copy 
2173                                  */
2174                         }
2175                 }
2176         
2177                 /*
2178                  *      skb->h.raw now points at the protocol beyond the IP header.
2179                  */
2180         
2181                 hash = iph->protocol & (MAX_INET_PROTOS -1);
2182                 for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next)
2183                 {
2184                         struct sk_buff *skb2;
2185         
2186                         if (ipprot->protocol != iph->protocol)
2187                                 continue;
2188                        /*
2189                         *       See if we need to make a copy of it.  This will
2190                         *       only be set if more than one protocol wants it.
2191                         *       and then not for the last one. If there is a pending
2192                         *       raw delivery wait for that
2193                         */
2194         
2195 #ifdef CONFIG_IP_MROUTE
2196                         if (ipprot->copy || raw_sk || mroute_pkt)
2197 #else   
2198                         if (ipprot->copy || raw_sk)
2199 #endif                  
2200                         {
2201                                 skb2 = skb_clone(skb, GFP_ATOMIC);
2202                                 if(skb2==NULL)
2203                                         continue;
2204                         }
2205                         else
2206                         {
2207                                 skb2 = skb;
2208                         }
2209                         flag = 1;
2210 
2211                        /*
2212                         *       Pass on the datagram to each protocol that wants it,
2213                         *       based on the datagram protocol.  We should really
2214                         *       check the protocol handler's return values here...
2215                         */
2216 
2217                         ipprot->handler(skb2, dev, opt, iph->daddr,
2218                                 (ntohs(iph->tot_len) - (iph->ihl * 4)),
2219                                 iph->saddr, 0, ipprot);
2220                 }
2221 
2222                 /*
2223                  *      All protocols checked.
2224                  *      If this packet was a broadcast, we may *not* reply to it, since that
2225                  *      causes (proven, grin) ARP storms and a leakage of memory (i.e. all
2226                  *      ICMP reply messages get queued up for transmission...)
2227                  */
2228 
2229 #ifdef CONFIG_IP_MROUTE          
2230                 /*
2231                  *      Forward the last copy to the multicast router. If
2232                  *      there is a pending raw deliery however make a copy
2233                  *      and forward that.
2234                  */
2235                  
2236                 if(mroute_pkt)
2237                 {
2238                         flag=1;
2239                         if(raw_sk==NULL)
2240                                 ipmr_forward(skb, is_frag);
2241                         else
2242                         {
2243                                 struct sk_buff *skb2=skb_clone(skb, GFP_ATOMIC);
2244                                 if(skb2)
2245                                 {
2246                                         skb2->free=1;
2247                                         ipmr_forward(skb2, is_frag);
2248                                 }
2249                         }
2250                 }
2251 #endif          
2252 
2253                 if(raw_sk!=NULL)        /* Shift to last raw user */
2254                         raw_rcv(raw_sk, skb, dev, iph->saddr, iph->daddr);
2255                 else if (!flag)         /* Free and report errors */
2256                 {
2257                         if (brd != IS_BROADCAST && brd!=IS_MULTICAST)
2258                                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0, dev);   
2259                         kfree_skb(skb, FREE_WRITE);
2260                 }
2261 
2262                 return(0);
2263         }
2264 
2265         /*
2266          *      Do any unicast IP forwarding required.
2267          */
2268         
2269         /*
2270          *      Don't forward multicast or broadcast frames.
2271          */
2272 
2273         if(skb->pkt_type!=PACKET_HOST || brd==IS_BROADCAST)
2274         {
2275                 kfree_skb(skb,FREE_WRITE);
2276                 return 0;
2277         }
2278 
2279         /*
2280          *      The packet is for another target. Forward the frame
2281          */
2282 
2283 #ifdef CONFIG_IP_FORWARD
2284         if (opt && opt->is_strictroute) {
2285               icmp_send(skb, ICMP_PARAMETERPROB, 0, 16, skb->dev);
2286               kfree_skb(skb, FREE_WRITE);
2287               return -1;
2288         }
2289         if (ip_forward(skb, dev, is_frag, iph->daddr))
2290                 kfree_skb(skb, FREE_WRITE);
2291 #else
2292 /*      printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n",
2293                         iph->saddr,iph->daddr);*/
2294         ip_statistics.IpInAddrErrors++;
2295         kfree_skb(skb, FREE_WRITE);
2296 #endif
2297         return(0);
2298 }
2299         
2300 
2301 /*
2302  *      Loop a packet back to the sender.
2303  */
2304  
2305 static void ip_loopback(struct device *old_dev, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
2306 {
2307         struct device *dev=&loopback_dev;
2308         int len=ntohs(skb->ip_hdr->tot_len);
2309         struct sk_buff *newskb=dev_alloc_skb(len+dev->hard_header_len+15);
2310         
2311         if(newskb==NULL)
2312                 return;
2313                 
2314         newskb->link3=NULL;
2315         newskb->sk=NULL;
2316         newskb->dev=dev;
2317         newskb->saddr=skb->saddr;
2318         newskb->daddr=skb->daddr;
2319         newskb->raddr=skb->raddr;
2320         newskb->free=1;
2321         newskb->lock=0;
2322         newskb->users=0;
2323         newskb->pkt_type=skb->pkt_type;
2324         
2325         /*
2326          *      Put a MAC header on the packet
2327          */
2328         ip_send(newskb, skb->ip_hdr->daddr, len, dev, skb->ip_hdr->saddr);
2329         /*
2330          *      Add the rest of the data space. 
2331          */
2332         newskb->ip_hdr=(struct iphdr *)skb_put(newskb, len);
2333         memcpy(newskb->proto_priv, skb->proto_priv, sizeof(skb->proto_priv));
2334 
2335         /*
2336          *      Copy the data
2337          */
2338         memcpy(newskb->ip_hdr,skb->ip_hdr,len);
2339 
2340         /* Recurse. The device check against IFF_LOOPBACK will stop infinite recursion */
2341                 
2342         /*printk("Loopback output queued [%lX to %lX].\n", newskb->ip_hdr->saddr,newskb->ip_hdr->daddr);*/
2343         ip_queue_xmit(NULL, dev, newskb, 1);
2344 }
2345 
2346 
2347 /*
2348  * Queues a packet to be sent, and starts the transmitter
2349  * if necessary.  if free = 1 then we free the block after
2350  * transmit, otherwise we don't. If free==2 we not only
2351  * free the block but also don't assign a new ip seq number.
2352  * This routine also needs to put in the total length,
2353  * and compute the checksum
2354  */
2355 
2356 void ip_queue_xmit(struct sock *sk, struct device *dev,
     /* [previous][next][first][last][top][bottom][index][help] */
2357               struct sk_buff *skb, int free)
2358 {
2359         struct iphdr *iph;
2360 /*      unsigned char *ptr;*/
2361 
2362         /* Sanity check */
2363         if (dev == NULL)
2364         {
2365                 NETDEBUG(printk("IP: ip_queue_xmit dev = NULL\n"));
2366                 return;
2367         }
2368 
2369         IS_SKB(skb);
2370 
2371         /*
2372          *      Do some book-keeping in the packet for later
2373          */
2374 
2375 
2376         skb->dev = dev;
2377         skb->when = jiffies;
2378 
2379         /*
2380          *      Find the IP header and set the length. This is bad
2381          *      but once we get the skb data handling code in the
2382          *      hardware will push its header sensibly and we will
2383          *      set skb->ip_hdr to avoid this mess and the fixed
2384          *      header length problem
2385          */
2386 
2387         iph = skb->ip_hdr;
2388         iph->tot_len = ntohs(skb->len-(((unsigned char *)iph)-skb->data));
2389 
2390 #ifdef CONFIG_IP_FIREWALL
2391         if(ip_fw_chk(iph, dev, ip_fw_blk_chain, ip_fw_blk_policy, 0) < FW_ACCEPT)
2392                 /* just don't send this packet */
2393                 return;
2394 #endif  
2395 
2396         /*
2397          *      No reassigning numbers to fragments...
2398          */
2399 
2400         if(free!=2)
2401                 iph->id      = htons(ip_id_count++);
2402         else
2403                 free=1;
2404 
2405         /* All buffers without an owner socket get freed */
2406         if (sk == NULL)
2407                 free = 1;
2408 
2409         skb->free = free;
2410 
2411         /*
2412          *      Do we need to fragment. Again this is inefficient.
2413          *      We need to somehow lock the original buffer and use
2414          *      bits of it.
2415          */
2416 
2417         if(ntohs(iph->tot_len)> dev->mtu)
2418         {
2419                 ip_fragment(sk,skb,dev,0);
2420                 IS_SKB(skb);
2421                 kfree_skb(skb,FREE_WRITE);
2422                 return;
2423         }
2424 
2425         /*
2426          *      Add an IP checksum
2427          */
2428 
2429         ip_send_check(iph);
2430 
2431         /*
2432          *      Print the frame when debugging
2433          */
2434 
2435         /*
2436          *      More debugging. You cannot queue a packet already on a list
2437          *      Spot this and moan loudly.
2438          */
2439         if (skb->next != NULL)
2440         {
2441                 NETDEBUG(printk("ip_queue_xmit: next != NULL\n"));
2442                 skb_unlink(skb);
2443         }
2444 
2445         /*
2446          *      If a sender wishes the packet to remain unfreed
2447          *      we add it to his send queue. This arguably belongs
2448          *      in the TCP level since nobody else uses it. BUT
2449          *      remember IPng might change all the rules.
2450          */
2451 
2452         if (!free)
2453         {
2454                 unsigned long flags;
2455                 /* The socket now has more outstanding blocks */
2456 
2457                 sk->packets_out++;
2458 
2459                 /* Protect the list for a moment */
2460                 save_flags(flags);
2461                 cli();
2462 
2463                 if (skb->link3 != NULL)
2464                 {
2465                         NETDEBUG(printk("ip.c: link3 != NULL\n"));
2466                         skb->link3 = NULL;
2467                 }
2468                 if (sk->send_head == NULL)
2469                 {
2470                         sk->send_tail = skb;
2471                         sk->send_head = skb;
2472                 }
2473                 else
2474                 {
2475                         sk->send_tail->link3 = skb;
2476                         sk->send_tail = skb;
2477                 }
2478                 /* skb->link3 is NULL */
2479 
2480                 /* Interrupt restore */
2481                 restore_flags(flags);
2482         }
2483         else
2484                 /* Remember who owns the buffer */
2485                 skb->sk = sk;
2486 
2487         /*
2488          *      If the indicated interface is up and running, send the packet.
2489          */
2490          
2491         ip_statistics.IpOutRequests++;
2492 #ifdef CONFIG_IP_ACCT
2493         ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
2494 #endif  
2495         
2496 #ifdef CONFIG_IP_MULTICAST      
2497 
2498         /*
2499          *      Multicasts are looped back for other local users
2500          */
2501          
2502         if (MULTICAST(iph->daddr) && !(dev->flags&IFF_LOOPBACK))
2503         {
2504                 if(sk==NULL || sk->ip_mc_loop)
2505                 {
2506                         if(iph->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
2507                         {
2508                                 ip_loopback(dev,skb);
2509                         }
2510                         else
2511                         {
2512                                 struct ip_mc_list *imc=dev->ip_mc_list;
2513                                 while(imc!=NULL)
2514                                 {
2515                                         if(imc->multiaddr==iph->daddr)
2516                                         {
2517                                                 ip_loopback(dev,skb);
2518                                                 break;
2519                                         }
2520                                         imc=imc->next;
2521                                 }
2522                         }
2523                 }
2524                 /* Multicasts with ttl 0 must not go beyond the host */
2525                 
2526                 if(skb->ip_hdr->ttl==0)
2527                 {
2528                         kfree_skb(skb, FREE_READ);
2529                         return;
2530                 }
2531         }
2532 #endif
2533         if((dev->flags&IFF_BROADCAST) && (iph->daddr==dev->pa_brdaddr||iph->daddr==0xFFFFFFFF) && !(dev->flags&IFF_LOOPBACK))
2534                 ip_loopback(dev,skb);
2535                 
2536         if (dev->flags & IFF_UP)
2537         {
2538                 /*
2539                  *      If we have an owner use its priority setting,
2540                  *      otherwise use NORMAL
2541                  */
2542 
2543                 if (sk != NULL)
2544                 {
2545                         dev_queue_xmit(skb, dev, sk->priority);
2546                 }
2547                 else
2548                 {
2549                         dev_queue_xmit(skb, dev, SOPRI_NORMAL);
2550                 }
2551         }
2552         else
2553         {
2554                 if(sk)
2555                         sk->err = ENETDOWN;
2556                 ip_statistics.IpOutDiscards++;
2557                 if (free)
2558                         kfree_skb(skb, FREE_WRITE);
2559         }
2560 }
2561 
2562 
2563 
2564 #ifdef CONFIG_IP_MULTICAST
2565 
2566 /*
2567  *      Write an multicast group list table for the IGMP daemon to
2568  *      read.
2569  */
2570  
2571 int ip_mc_procinfo(char *buffer, char **start, off_t offset, int length, int dummy)
     /* [previous][next][first][last][top][bottom][index][help] */
2572 {
2573         off_t pos=0, begin=0;
2574         struct ip_mc_list *im;
2575         unsigned long flags;
2576         int len=0;
2577         struct device *dev;
2578         
2579         len=sprintf(buffer,"Device    : Count\tGroup    Users Timer\n");  
2580         save_flags(flags);
2581         cli();
2582         
2583         for(dev = dev_base; dev; dev = dev->next)
2584         {
2585                 if((dev->flags&IFF_UP)&&(dev->flags&IFF_MULTICAST))
2586                 {
2587                         len+=sprintf(buffer+len,"%-10s: %5d\n",
2588                                         dev->name, dev->mc_count);
2589                         for(im = dev->ip_mc_list; im; im = im->next)
2590                         {
2591                                 len+=sprintf(buffer+len,
2592                                         "\t\t\t%08lX %5d %d:%08lX\n",
2593                                         im->multiaddr, im->users,
2594                                         im->tm_running, im->timer.expires-jiffies);
2595                                 pos=begin+len;
2596                                 if(pos<offset)
2597                                 {
2598                                         len=0;
2599                                         begin=pos;
2600                                 }
2601                                 if(pos>offset+length)
2602                                         break;
2603                         }
2604                 }
2605         }
2606         restore_flags(flags);
2607         *start=buffer+(offset-begin);
2608         len-=(offset-begin);
2609         if(len>length)
2610                 len=length;     
2611         return len;
2612 }
2613 
2614 
2615 /*
2616  *      Socket option code for IP. This is the end of the line after any TCP,UDP etc options on
2617  *      an IP socket.
2618  *
2619  *      We implement IP_TOS (type of service), IP_TTL (time to live).
2620  *
2621  *      Next release we will sort out IP_OPTIONS since for some people are kind of important.
2622  */
2623 
2624 static struct device *ip_mc_find_devfor(unsigned long addr)
     /* [previous][next][first][last][top][bottom][index][help] */
2625 {
2626         struct device *dev;
2627         for(dev = dev_base; dev; dev = dev->next)
2628         {
2629                 if((dev->flags&IFF_UP)&&(dev->flags&IFF_MULTICAST)&&
2630                         (dev->pa_addr==addr))
2631                         return dev;
2632         }
2633 
2634         return NULL;
2635 }
2636 
2637 #endif
2638 
2639 int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
2640 {
2641         int val,err;
2642         unsigned char ucval;
2643 #if defined(CONFIG_IP_FIREWALL) || defined(CONFIG_IP_ACCT)
2644         struct ip_fw tmp_fw;
2645 #endif  
2646         if (optval == NULL)
2647         {
2648                 val=0;
2649                 ucval=0;
2650         }
2651         else
2652         {
2653                 err=verify_area(VERIFY_READ, optval, sizeof(int));
2654                 if(err)
2655                         return err;
2656                 val = get_user((int *) optval);
2657                 ucval=get_user((unsigned char *) optval);
2658         }
2659         
2660         if(level!=SOL_IP)
2661                 return -EOPNOTSUPP;
2662 #ifdef CONFIG_IP_MROUTE
2663         if(optname>=MRT_BASE && optname <=MRT_BASE+10)
2664         {
2665                 return ip_mroute_setsockopt(sk,optname,optval,optlen);
2666         }
2667 #endif
2668         
2669         switch(optname)
2670         {
2671                 case IP_OPTIONS:
2672                   {
2673                           struct options * opt = NULL;
2674                           struct options * old_opt;
2675                           if (optlen > 40 || optlen < 0)
2676                             return -EINVAL;
2677                           err = verify_area(VERIFY_READ, optval, optlen);
2678                           if (err)
2679                             return err;
2680                           opt = kmalloc(sizeof(struct options)+((optlen+3)&~3), GFP_KERNEL);
2681                           if (!opt)
2682                             return -ENOMEM;
2683                           memset(opt, 0, sizeof(struct options));
2684                           if (optlen)
2685                             memcpy_fromfs(opt->__data, optval, optlen);
2686                           while (optlen & 3)
2687                             opt->__data[optlen++] = IPOPT_END;
2688                           opt->optlen = optlen;
2689                           opt->is_data = 1;
2690                           opt->is_setbyuser = 1;
2691                           if (optlen && ip_options_compile(opt, NULL)) {
2692                                   kfree_s(opt, sizeof(struct options) + optlen);
2693                                   return -EINVAL;
2694                           }
2695                           /*
2696                            * ANK: I'm afraid that receive handler may change
2697                            * options from under us.
2698                            */
2699                           cli();
2700                           old_opt = sk->opt;
2701                           sk->opt = opt;
2702                           sti();
2703                           if (old_opt)
2704                             kfree_s(old_opt, sizeof(struct optlen) + old_opt->optlen);
2705                           return 0;
2706                   }
2707                 case IP_TOS:
2708                         if(val<0||val>255)
2709                                 return -EINVAL;
2710                         sk->ip_tos=val;
2711                         if(val==IPTOS_LOWDELAY)
2712                                 sk->priority=SOPRI_INTERACTIVE;
2713                         if(val==IPTOS_THROUGHPUT)
2714                                 sk->priority=SOPRI_BACKGROUND;
2715                         return 0;
2716                 case IP_TTL:
2717                         if(val<1||val>255)
2718                                 return -EINVAL;
2719                         sk->ip_ttl=val;
2720                         return 0;
2721                 case IP_HDRINCL:
2722                         if(sk->type!=SOCK_RAW)
2723                                 return -ENOPROTOOPT;
2724                         sk->ip_hdrincl=val?1:0;
2725                         return 0;
2726 #ifdef CONFIG_IP_MULTICAST
2727                 case IP_MULTICAST_TTL: 
2728                 {
2729                         sk->ip_mc_ttl=(int)ucval;
2730                         return 0;
2731                 }
2732                 case IP_MULTICAST_LOOP: 
2733                 {
2734                         if(ucval!=0 && ucval!=1)
2735                                  return -EINVAL;
2736                         sk->ip_mc_loop=(int)ucval;
2737                         return 0;
2738                 }
2739                 case IP_MULTICAST_IF: 
2740                 {
2741                         struct in_addr addr;
2742                         struct device *dev=NULL;
2743                         
2744                         /*
2745                          *      Check the arguments are allowable
2746                          */
2747 
2748                         err=verify_area(VERIFY_READ, optval, sizeof(addr));
2749                         if(err)
2750                                 return err;
2751                                 
2752                         memcpy_fromfs(&addr,optval,sizeof(addr));
2753                         
2754                         
2755                         /*
2756                          *      What address has been requested
2757                          */
2758                         
2759                         if(addr.s_addr==INADDR_ANY)     /* Default */
2760                         {
2761                                 sk->ip_mc_name[0]=0;
2762                                 return 0;
2763                         }
2764                         
2765                         /*
2766                          *      Find the device
2767                          */
2768                          
2769                         dev=ip_mc_find_devfor(addr.s_addr);
2770                                                 
2771                         /*
2772                          *      Did we find one
2773                          */
2774                          
2775                         if(dev) 
2776                         {
2777                                 strcpy(sk->ip_mc_name,dev->name);
2778                                 return 0;
2779                         }
2780                         return -EADDRNOTAVAIL;
2781                 }
2782                 
2783                 case IP_ADD_MEMBERSHIP: 
2784                 {
2785                 
2786 /*
2787  *      FIXME: Add/Del membership should have a semaphore protecting them from re-entry
2788  */
2789                         struct ip_mreq mreq;
2790                         __u32 route_src;
2791                         struct rtable *rt;
2792                         struct device *dev=NULL;
2793                         
2794                         /*
2795                          *      Check the arguments.
2796                          */
2797 
2798                         err=verify_area(VERIFY_READ, optval, sizeof(mreq));
2799                         if(err)
2800                                 return err;
2801 
2802                         memcpy_fromfs(&mreq,optval,sizeof(mreq));
2803 
2804                         /* 
2805                          *      Get device for use later
2806                          */
2807 
2808                         if(mreq.imr_interface.s_addr==INADDR_ANY) 
2809                         {
2810                                 /*
2811                                  *      Not set so scan.
2812                                  */
2813                                 if((rt=ip_rt_route(mreq.imr_multiaddr.s_addr,NULL, &route_src))!=NULL)
2814                                 {
2815                                         dev=rt->rt_dev;
2816                                         rt->rt_use--;
2817                                 }
2818                         }
2819                         else
2820                         {
2821                                 /*
2822                                  *      Find a suitable device.
2823                                  */
2824                                 
2825                                 dev=ip_mc_find_devfor(mreq.imr_interface.s_addr);
2826                         }
2827                         
2828                         /*
2829                          *      No device, no cookies.
2830                          */
2831                          
2832                         if(!dev)
2833                                 return -ENODEV;
2834                                 
2835                         /*
2836                          *      Join group.
2837                          */
2838                          
2839                         return ip_mc_join_group(sk,dev,mreq.imr_multiaddr.s_addr);
2840                 }
2841                 
2842                 case IP_DROP_MEMBERSHIP: 
2843                 {
2844                         struct ip_mreq mreq;
2845                         struct rtable *rt;
2846                         __u32 route_src;
2847                         struct device *dev=NULL;
2848 
2849                         /*
2850                          *      Check the arguments
2851                          */
2852                          
2853                         err=verify_area(VERIFY_READ, optval, sizeof(mreq));
2854                         if(err)
2855                                 return err;
2856 
2857                         memcpy_fromfs(&mreq,optval,sizeof(mreq));
2858 
2859                         /*
2860                          *      Get device for use later 
2861                          */
2862  
2863                         if(mreq.imr_interface.s_addr==INADDR_ANY) 
2864                         {
2865                                 if((rt=ip_rt_route(mreq.imr_multiaddr.s_addr,NULL, &route_src))!=NULL)
2866                                 {
2867                                         dev=rt->rt_dev;
2868                                         rt->rt_use--;
2869                                 }
2870                         }
2871                         else 
2872                         {
2873                         
2874                                 dev=ip_mc_find_devfor(mreq.imr_interface.s_addr);
2875                         }
2876                         
2877                         /*
2878                          *      Did we find a suitable device.
2879                          */
2880                          
2881                         if(!dev)
2882                                 return -ENODEV;
2883                                 
2884                         /*
2885                          *      Leave group
2886                          */
2887                          
2888                         return ip_mc_leave_group(sk,dev,mreq.imr_multiaddr.s_addr);
2889                 }
2890 #endif                  
2891 #ifdef CONFIG_IP_FIREWALL
2892                 case IP_FW_ADD_BLK:
2893                 case IP_FW_DEL_BLK:
2894                 case IP_FW_ADD_FWD:
2895                 case IP_FW_DEL_FWD:
2896                 case IP_FW_CHK_BLK:
2897                 case IP_FW_CHK_FWD:
2898                 case IP_FW_FLUSH_BLK:
2899                 case IP_FW_FLUSH_FWD:
2900                 case IP_FW_ZERO_BLK:
2901                 case IP_FW_ZERO_FWD:
2902                 case IP_FW_POLICY_BLK:
2903                 case IP_FW_POLICY_FWD:
2904                         if(!suser())
2905                                 return -EPERM;
2906                         if(optlen>sizeof(tmp_fw) || optlen<1)
2907                                 return -EINVAL;
2908                         err=verify_area(VERIFY_READ,optval,optlen);
2909                         if(err)
2910                                 return err;
2911                         memcpy_fromfs(&tmp_fw,optval,optlen);
2912                         err=ip_fw_ctl(optname, &tmp_fw,optlen);
2913                         return -err;    /* -0 is 0 after all */
2914                         
2915 #endif
2916 #ifdef CONFIG_IP_ACCT
2917                 case IP_ACCT_DEL:
2918                 case IP_ACCT_ADD:
2919                 case IP_ACCT_FLUSH:
2920                 case IP_ACCT_ZERO:
2921                         if(!suser())
2922                                 return -EPERM;
2923                         if(optlen>sizeof(tmp_fw) || optlen<1)
2924                                 return -EINVAL;
2925                         err=verify_area(VERIFY_READ,optval,optlen);
2926                         if(err)
2927                                 return err;
2928                         memcpy_fromfs(&tmp_fw, optval,optlen);
2929                         err=ip_acct_ctl(optname, &tmp_fw,optlen);
2930                         return -err;    /* -0 is 0 after all */
2931 #endif
2932                 /* IP_OPTIONS and friends go here eventually */
2933                 default:
2934                         return(-ENOPROTOOPT);
2935         }
2936 }
2937 
2938 /*
2939  *      Get the options. Note for future reference. The GET of IP options gets the
2940  *      _received_ ones. The set sets the _sent_ ones.
2941  */
2942 
2943 int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
2944 {
2945         int val,err;
2946 #ifdef CONFIG_IP_MULTICAST
2947         int len;
2948 #endif
2949         
2950         if(level!=SOL_IP)
2951                 return -EOPNOTSUPP;
2952 
2953 #ifdef CONFIG_IP_MROUTE
2954         if(optname>=MRT_BASE && optname <=MRT_BASE+10)
2955         {
2956                 return ip_mroute_getsockopt(sk,optname,optval,optlen);
2957         }
2958 #endif
2959 
2960         switch(optname)
2961         {
2962                 case IP_OPTIONS:
2963                         {
2964                                 unsigned char optbuf[sizeof(struct options)+40];
2965                                 struct options * opt = (struct options*)optbuf;
2966                                 err = verify_area(VERIFY_WRITE, optlen, sizeof(int));
2967                                 if (err)
2968                                   return err;
2969                                 cli();
2970                                 opt->optlen = 0;
2971                                 if (sk->opt)
2972                                   memcpy(optbuf, sk->opt, sizeof(struct options)+sk->opt->optlen);
2973                                 sti();
2974                                 if (opt->optlen == 0) {
2975                                         put_fs_long(0,(unsigned long *) optlen);
2976                                         return 0;
2977                                 }
2978                                 err = verify_area(VERIFY_WRITE, optval, opt->optlen);
2979                                 if (err)
2980                                   return err;
2981 /*
2982  * Now we should undo all the changes done by ip_options_compile().
2983  */
2984                                 if (opt->srr) {
2985                                         unsigned  char * optptr = opt->__data+opt->srr-sizeof(struct  iphdr);
2986                                         memmove(optptr+7, optptr+4, optptr[1]-7);
2987                                         memcpy(optptr+3, &opt->faddr, 4);
2988                                 }
2989                                 if (opt->rr_needaddr) {
2990                                         unsigned  char * optptr = opt->__data+opt->rr-sizeof(struct  iphdr);
2991                                         memset(&optptr[optptr[2]-1], 0, 4);
2992                                         optptr[2] -= 4;
2993                                 }
2994                                 if (opt->ts) {
2995                                         unsigned  char * optptr = opt->__data+opt->ts-sizeof(struct  iphdr);
2996                                         if (opt->ts_needtime) {
2997                                                 memset(&optptr[optptr[2]-1], 0, 4);
2998                                                 optptr[2] -= 4;
2999                                         }
3000                                         if (opt->ts_needaddr) {
3001                                                 memset(&optptr[optptr[2]-1], 0, 4);
3002                                                 optptr[2] -= 4;
3003                                         }
3004                                 }
3005                                 put_fs_long(opt->optlen, (unsigned long *) optlen);
3006                                 memcpy_tofs(optval, opt->__data, opt->optlen);
3007                         }
3008                         return 0;
3009                 case IP_TOS:
3010                         val=sk->ip_tos;
3011                         break;
3012                 case IP_TTL:
3013                         val=sk->ip_ttl;
3014                         break;
3015                 case IP_HDRINCL:
3016                         val=sk->ip_hdrincl;
3017                         break;
3018 #ifdef CONFIG_IP_MULTICAST                      
3019                 case IP_MULTICAST_TTL:
3020                         val=sk->ip_mc_ttl;
3021                         break;
3022                 case IP_MULTICAST_LOOP:
3023                         val=sk->ip_mc_loop;
3024                         break;
3025                 case IP_MULTICAST_IF:
3026                         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
3027                         if(err)
3028                                 return err;
3029                         len=strlen(sk->ip_mc_name);
3030                         err=verify_area(VERIFY_WRITE, optval, len);
3031                         if(err)
3032                                 return err;
3033                         put_user(len,(int *) optlen);
3034                         memcpy_tofs((void *)optval,sk->ip_mc_name, len);
3035                         return 0;
3036 #endif
3037                 default:
3038                         return(-ENOPROTOOPT);
3039         }
3040         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
3041         if(err)
3042                 return err;
3043         put_user(sizeof(int),(int *) optlen);
3044 
3045         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
3046         if(err)
3047                 return err;
3048         put_user(val,(int *) optval);
3049 
3050         return(0);
3051 }
3052 
3053 /*
3054  *      Build and send a packet, with as little as one copy
3055  *
3056  *      Doesn't care much about ip options... option length can be
3057  *      different for fragment at 0 and other fragments.
3058  *
3059  *      Note that the fragment at the highest offset is sent first,
3060  *      so the getfrag routine can fill in the TCP/UDP checksum header
3061  *      field in the last fragment it sends... actually it also helps
3062  *      the reassemblers, they can put most packets in at the head of
3063  *      the fragment queue, and they know the total size in advance. This
3064  *      last feature will measurable improve the Linux fragment handler.
3065  *
3066  *      The callback has five args, an arbitrary pointer (copy of frag),
3067  *      the source IP address (may depend on the routing table), the 
3068  *      destination adddress (char *), the offset to copy from, and the
3069  *      length to be copied.
3070  * 
3071  */
3072 
3073 int ip_build_xmit(struct sock *sk,
     /* [previous][next][first][last][top][bottom][index][help] */
3074                    void getfrag (const void *,
3075                                  __u32,
3076                                  char *,
3077                                  unsigned int,  
3078                                  unsigned int),
3079                    const void *frag,
3080                    unsigned short int length,
3081                    __u32 daddr,
3082                    __u32 user_saddr,
3083                    struct options * opt,
3084                    int flags,
3085                    int type) 
3086 {
3087         struct rtable *rt;
3088         unsigned int fraglen, maxfraglen, fragheaderlen;
3089         int offset, mf;
3090         __u32 saddr;
3091         unsigned short id;
3092         struct iphdr *iph;
3093         int local=0;
3094         struct device *dev;
3095         int nfrags=0;
3096         __u32 true_daddr = daddr;
3097 
3098         if (opt && opt->srr && !sk->ip_hdrincl)
3099           daddr = opt->faddr;
3100         
3101         ip_statistics.IpOutRequests++;
3102 
3103 #ifdef CONFIG_IP_MULTICAST      
3104         if(sk && MULTICAST(daddr) && *sk->ip_mc_name)
3105         {
3106                 dev=dev_get(sk->ip_mc_name);
3107                 if(!dev)
3108                         return -ENODEV;
3109                 rt=NULL;
3110                 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
3111                         saddr = sk->saddr;
3112                 else
3113                         saddr = dev->pa_addr;
3114         }
3115         else
3116         {
3117 #endif  
3118                 /*
3119                  *      Perform the IP routing decisions
3120                  */
3121          
3122                 if(sk->localroute || flags&MSG_DONTROUTE)
3123                         local=1;
3124         
3125                 rt = sk->ip_route_cache;
3126                 
3127                 /*
3128                  *      See if the routing cache is outdated. We need to clean this up once we are happy it is reliable
3129                  *      by doing the invalidation actively in the route change and header change.
3130                  */
3131         
3132                 saddr=sk->ip_route_saddr;        
3133                 if(!rt || sk->ip_route_stamp != rt_stamp ||
3134                    daddr!=sk->ip_route_daddr || sk->ip_route_local!=local ||
3135                    (sk->saddr && sk->saddr != saddr))
3136                 {
3137                         if(local)
3138                                 rt = ip_rt_local(daddr, NULL, &saddr);
3139                         else
3140                                 rt = ip_rt_route(daddr, NULL, &saddr);
3141                         sk->ip_route_local=local;
3142                         sk->ip_route_daddr=daddr;
3143                         sk->ip_route_saddr=saddr;
3144                         sk->ip_route_stamp=rt_stamp;
3145                         sk->ip_route_cache=rt;
3146                         sk->ip_hcache_ver=NULL;
3147                         sk->ip_hcache_state= 0;
3148                 }
3149                 else if(rt)
3150                 {
3151                         /*
3152                          *      Attempt header caches only if the cached route is being reused. Header cache
3153                          *      is not ultra cheap to set up. This means we only set it up on the second packet,
3154                          *      so one shot communications are not slowed. We assume (seems reasonable) that 2 is
3155                          *      probably going to be a stream of data.
3156                          */
3157                         if(rt->rt_dev->header_cache && sk->ip_hcache_state!= -1)
3158                         {
3159                                 if(sk->ip_hcache_ver==NULL || sk->ip_hcache_stamp!=*sk->ip_hcache_ver)
3160                                         rt->rt_dev->header_cache(rt->rt_dev,sk,saddr,daddr);
3161                                 else
3162                                         /* Can't cache. Remember this */
3163                                         sk->ip_hcache_state= -1;
3164                         }
3165                 }
3166                 
3167                 if (rt == NULL) 
3168                 {
3169                         ip_statistics.IpOutNoRoutes++;
3170                         return(-ENETUNREACH);
3171                 }
3172         
3173                 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
3174                         saddr = sk->saddr;
3175                         
3176                 dev=rt->rt_dev;
3177 #ifdef CONFIG_IP_MULTICAST
3178         }
3179 #endif          
3180         if (user_saddr)
3181           saddr = user_saddr;
3182 
3183         /*
3184          *      Now compute the buffer space we require
3185          */ 
3186          
3187         /*
3188          *      Try the simple case first. This leaves broadcast, multicast, fragmented frames, and by
3189          *      choice RAW frames within 20 bytes of maximum size(rare) to the long path
3190          */
3191 
3192         length += 20;
3193         if (!sk->ip_hdrincl && opt) {
3194                 length += opt->optlen;
3195                 if (opt->is_strictroute && rt && rt->rt_gateway) {
3196                         ip_statistics.IpOutNoRoutes++;
3197                         return -ENETUNREACH;
3198                 }
3199         }
3200         if(length <= dev->mtu && !MULTICAST(daddr) && daddr!=0xFFFFFFFF && daddr!=dev->pa_brdaddr)
3201         {       
3202                 int error;
3203                 struct sk_buff *skb=sock_alloc_send_skb(sk, length+15+dev->hard_header_len,0, 0,&error);
3204                 if(skb==NULL)
3205                 {
3206                         ip_statistics.IpOutDiscards++;
3207                         return error;
3208                 }
3209                 skb->dev=dev;
3210                 skb->free=1;
3211                 skb->when=jiffies;
3212                 skb->sk=sk;
3213                 skb->arp=0;
3214                 skb->saddr=saddr;
3215                 skb->raddr=(rt&&rt->rt_gateway)?rt->rt_gateway:daddr;
3216                 skb_reserve(skb,(dev->hard_header_len+15)&~15);
3217                 if(sk->ip_hcache_state>0)
3218                 {
3219                         memcpy(skb_push(skb,dev->hard_header_len),sk->ip_hcache_data,dev->hard_header_len);
3220                         skb->arp=1;
3221                 }
3222                 else if(dev->hard_header)
3223                 {
3224                         if(dev->hard_header(skb,dev,ETH_P_IP,NULL,NULL,0)>0)
3225                                 skb->arp=1;
3226                 }
3227                 else
3228                         skb->arp=1;
3229                 skb->ip_hdr=iph=(struct iphdr *)skb_put(skb,length);
3230                 dev_lock_list();
3231                 if(!sk->ip_hdrincl)
3232                 {
3233                         iph->version=4;
3234                         iph->ihl=5;
3235                         iph->tos=sk->ip_tos;
3236                         iph->tot_len = htons(length);
3237                         iph->id=htons(ip_id_count++);
3238                         iph->frag_off = 0;
3239                         iph->ttl=sk->ip_ttl;
3240                         iph->protocol=type;
3241                         iph->saddr=saddr;
3242                         iph->daddr=daddr;
3243                         if (opt) {
3244                                 iph->ihl += opt->optlen>>2;
3245                                 ip_options_build(skb, opt,
3246                                                  true_daddr, dev->pa_addr, 0);
3247                         }
3248                         iph->check=0;
3249                         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
3250                         getfrag(frag,saddr,((char *)iph)+iph->ihl*4,0, length-iph->ihl*4);
3251                 }
3252                 else
3253                         getfrag(frag,saddr,(void *)iph,0,length-20);
3254                 dev_unlock_list();
3255 #ifdef CONFIG_IP_FIREWALL
3256                 if(ip_fw_chk(iph, dev, ip_fw_blk_chain, ip_fw_blk_policy,0) < FW_ACCEPT)
3257                 {
3258                         kfree_skb(skb, FREE_WRITE);
3259                         return -EPERM;
3260                 }
3261 #endif
3262 #ifdef CONFIG_IP_ACCT
3263                 ip_fw_chk((void *)skb->data,dev,ip_acct_chain, IP_FW_F_ACCEPT,1);
3264 #endif          
3265                 if(dev->flags&IFF_UP)
3266                         dev_queue_xmit(skb,dev,sk->priority);
3267                 else
3268                 {
3269                         ip_statistics.IpOutDiscards++;
3270                         kfree_skb(skb, FREE_WRITE);
3271                 }
3272                 return 0;
3273         }
3274         length-=20;
3275         if (sk && !sk->ip_hdrincl && opt) {
3276                 length -= opt->optlen;
3277                 fragheaderlen = dev->hard_header_len + sizeof(struct iphdr) + opt->optlen;
3278                 maxfraglen = ((dev->mtu-sizeof(struct iphdr)-opt->optlen) & ~7) + fragheaderlen;
3279         } else {
3280                 fragheaderlen = dev->hard_header_len;
3281                 if(!sk->ip_hdrincl)
3282                   fragheaderlen += 20;
3283                 
3284         /*
3285          *      Fragheaderlen is the size of 'overhead' on each buffer. Now work
3286          *      out the size of the frames to send.
3287          */
3288          
3289                 maxfraglen = ((dev->mtu-20) & ~7) + fragheaderlen;
3290         }
3291         
3292         /*
3293          *      Start at the end of the frame by handling the remainder.
3294          */
3295          
3296         offset = length - (length % (maxfraglen - fragheaderlen));
3297         
3298         /*
3299          *      Amount of memory to allocate for final fragment.
3300          */
3301          
3302         fraglen = length - offset + fragheaderlen;
3303         
3304         if(length-offset==0)
3305         {
3306                 fraglen = maxfraglen;
3307                 offset -= maxfraglen-fragheaderlen;
3308         }
3309         
3310         
3311         /*
3312          *      The last fragment will not have MF (more fragments) set.
3313          */
3314          
3315         mf = 0;
3316 
3317         /*
3318          *      Can't fragment raw packets 
3319          */
3320          
3321         if (sk->ip_hdrincl && offset > 0)
3322                 return(-EMSGSIZE);
3323 
3324         /*
3325          *      Lock the device lists.
3326          */
3327 
3328         dev_lock_list();
3329         
3330         /*
3331          *      Get an identifier
3332          */
3333          
3334         id = htons(ip_id_count++);
3335 
3336         /*
3337          *      Being outputting the bytes.
3338          */
3339          
3340         do 
3341         {
3342                 struct sk_buff * skb;
3343                 int error;
3344                 char *data;
3345 
3346                 /*
3347                  *      Get the memory we require with some space left for alignment.
3348                  */
3349 
3350                 skb = sock_alloc_send_skb(sk, fraglen+15, 0, 0, &error);
3351                 if (skb == NULL)
3352                 {
3353                         ip_statistics.IpOutDiscards++;
3354                         if(nfrags>1)
3355                                 ip_statistics.IpFragCreates++;                  
3356                         dev_unlock_list();
3357                         return(error);
3358                 }
3359                 
3360                 /*
3361                  *      Fill in the control structures
3362                  */
3363                  
3364                 skb->next = skb->prev = NULL;
3365                 skb->dev = dev;
3366                 skb->when = jiffies;
3367                 skb->free = 1; /* dubious, this one */
3368                 skb->sk = sk;
3369                 skb->arp = 0;
3370                 skb->saddr = saddr;
3371                 skb->raddr = (rt&&rt->rt_gateway) ? rt->rt_gateway : daddr;
3372                 skb_reserve(skb,(dev->hard_header_len+15)&~15);
3373                 data = skb_put(skb, fraglen-dev->hard_header_len);
3374 
3375                 /*
3376                  *      Save us ARP and stuff. In the optimal case we do no route lookup (route cache ok)
3377                  *      no ARP lookup (arp cache ok) and output. The cache checks are still too slow but
3378                  *      this can be fixed later. For gateway routes we ought to have a rt->.. header cache
3379                  *      pointer to speed header cache builds for identical targets.
3380                  */
3381                  
3382                 if(sk->ip_hcache_state>0)
3383                 {
3384                         memcpy(skb_push(skb,dev->hard_header_len),sk->ip_hcache_data, dev->hard_header_len);
3385                         skb->arp=1;
3386                 }
3387                 else if (dev->hard_header)
3388                 {
3389                         if(dev->hard_header(skb, dev, ETH_P_IP, 
3390                                                 NULL, NULL, 0)>0)
3391                                 skb->arp=1;
3392                 }
3393                 
3394                 /*
3395                  *      Find where to start putting bytes.
3396                  */
3397                  
3398                 skb->ip_hdr = iph = (struct iphdr *)data;
3399 
3400                 /*
3401                  *      Only write IP header onto non-raw packets 
3402                  */
3403                  
3404                 if(!sk->ip_hdrincl) 
3405                 {
3406 
3407                         iph->version = 4;
3408                         iph->ihl = 5; /* ugh */
3409                         if (opt) {
3410                                 iph->ihl += opt->optlen>>2;
3411                                 ip_options_build(skb, opt,
3412                                                  true_daddr, dev->pa_addr, offset);
3413                         }
3414                         iph->tos = sk->ip_tos;
3415                         iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
3416                         iph->id = id;
3417                         iph->frag_off = htons(offset>>3);
3418                         iph->frag_off |= mf;
3419 #ifdef CONFIG_IP_MULTICAST
3420                         if (MULTICAST(daddr))
3421                                 iph->ttl = sk->ip_mc_ttl;
3422                         else
3423 #endif
3424                                 iph->ttl = sk->ip_ttl;
3425                         iph->protocol = type;
3426                         iph->check = 0;
3427                         iph->saddr = saddr;
3428                         iph->daddr = daddr;
3429                         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
3430                         data += iph->ihl*4;
3431                         
3432                         /*
3433                          *      Any further fragments will have MF set.
3434                          */
3435                          
3436                         mf = htons(IP_MF);
3437                 }
3438                 
3439                 /*
3440                  *      User data callback
3441                  */
3442 
3443                 getfrag(frag, saddr, data, offset, fraglen-fragheaderlen);
3444                 
3445                 /*
3446                  *      Account for the fragment.
3447                  */
3448                  
3449 #ifdef CONFIG_IP_FIREWALL
3450                 if(!offset && ip_fw_chk(iph, dev, ip_fw_blk_chain, ip_fw_blk_policy,0) < FW_ACCEPT)
3451                 {
3452                         kfree_skb(skb, FREE_WRITE);
3453                         dev_unlock_list();
3454                         return -EPERM;
3455                 }
3456 #endif          
3457 #ifdef CONFIG_IP_ACCT
3458                 if(!offset)
3459                         ip_fw_chk(iph, dev, ip_acct_chain, IP_FW_F_ACCEPT, 1);
3460 #endif  
3461                 offset -= (maxfraglen-fragheaderlen);
3462                 fraglen = maxfraglen;
3463 
3464 #ifdef CONFIG_IP_MULTICAST
3465 
3466                 /*
3467                  *      Multicasts are looped back for other local users
3468                  */
3469          
3470                 if (MULTICAST(daddr) && !(dev->flags&IFF_LOOPBACK)) 
3471                 {
3472                         /*
3473                          *      Loop back any frames. The check for IGMP_ALL_HOSTS is because
3474                          *      you are always magically a member of this group.
3475                          *
3476                          *      Always loop back all host messages when running as a multicast router.
3477                          */
3478                          
3479                         if(sk==NULL || sk->ip_mc_loop)
3480                         {
3481                                 if(skb->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
3482                                         ip_loopback(rt?rt->rt_dev:dev,skb);
3483                                 else 
3484                                 {
3485                                         struct ip_mc_list *imc=rt?rt->rt_dev->ip_mc_list:dev->ip_mc_list;
3486                                         while(imc!=NULL) 
3487                                         {
3488                                                 if(imc->multiaddr==daddr) 
3489                                                 {
3490                                                         ip_loopback(rt?rt->rt_dev:dev,skb);
3491                                                         break;
3492                                                 }
3493                                                 imc=imc->next;
3494                                         }
3495                                 }
3496                         }
3497 
3498                         /*
3499                          *      Multicasts with ttl 0 must not go beyond the host. Fixme: avoid the
3500                          *      extra clone.
3501                          */
3502 
3503                         if(skb->ip_hdr->ttl==0)
3504                                 kfree_skb(skb, FREE_READ);
3505                 }
3506 #endif
3507 
3508                 nfrags++;
3509                 
3510                 /*
3511                  *      BSD loops broadcasts
3512                  */
3513                  
3514                 if((dev->flags&IFF_BROADCAST) && (daddr==0xFFFFFFFF || daddr==dev->pa_brdaddr) && !(dev->flags&IFF_LOOPBACK))
3515                         ip_loopback(dev,skb);
3516 
3517                 /*
3518                  *      Now queue the bytes into the device.
3519                  */
3520                  
3521                 if (dev->flags & IFF_UP) 
3522                 {
3523                         dev_queue_xmit(skb, dev, sk->priority);
3524                 } 
3525                 else 
3526                 {
3527                         /*
3528                          *      Whoops... 
3529                          */
3530                          
3531                         ip_statistics.IpOutDiscards++;
3532                         if(nfrags>1)
3533                                 ip_statistics.IpFragCreates+=nfrags;
3534                         kfree_skb(skb, FREE_WRITE);
3535                         dev_unlock_list();
3536                         /*
3537                          *      BSD behaviour.
3538                          */
3539                         if(sk!=NULL)
3540                                 sk->err=ENETDOWN;
3541                         return(0); /* lose rest of fragments */
3542                 }
3543         } 
3544         while (offset >= 0);
3545         if(nfrags>1)
3546                 ip_statistics.IpFragCreates+=nfrags;
3547         dev_unlock_list();
3548         return(0);
3549 }
3550     
3551 
3552 /*
3553  *      IP protocol layer initialiser
3554  */
3555 
3556 static struct packet_type ip_packet_type =
3557 {
3558         0,      /* MUTTER ntohs(ETH_P_IP),*/
3559         NULL,   /* All devices */
3560         ip_rcv,
3561         NULL,
3562         NULL,
3563 };
3564 
3565 /*
3566  *      Device notifier
3567  */
3568  
3569 static int ip_rt_event(unsigned long event, void *ptr)
     /* [previous][next][first][last][top][bottom][index][help] */
3570 {
3571         if(event==NETDEV_DOWN)
3572                 ip_rt_flush(ptr);
3573         return NOTIFY_DONE;
3574 }
3575 
3576 struct notifier_block ip_rt_notifier={
3577         ip_rt_event,
3578         NULL,
3579         0
3580 };
3581 
3582 /*
3583  *      IP registers the packet type and then calls the subprotocol initialisers
3584  */
3585 
3586 void ip_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
3587 {
3588         ip_packet_type.type=htons(ETH_P_IP);
3589         dev_add_pack(&ip_packet_type);
3590 
3591         /* So we flush routes when a device is downed */        
3592         register_netdevice_notifier(&ip_rt_notifier);
3593 
3594 /*      ip_raw_init();
3595         ip_packet_init();
3596         ip_tcp_init();
3597         ip_udp_init();*/
3598 
3599 #ifdef CONFIG_IP_MULTICAST
3600         proc_net_register(&(struct proc_dir_entry) {
3601                 PROC_NET_IGMP, 4, "igmp",
3602                 S_IFREG | S_IRUGO, 1, 0, 0,
3603                 0, &proc_net_inode_operations,
3604                 ip_mc_procinfo
3605         });
3606 #endif
3607 }
3608 

/* [previous][next][first][last][top][bottom][index][help] */