root/net/ipv4/ip_output.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ip_loopback
  2. ip_send
  3. ip_send_room
  4. ip_build_header
  5. ip_send_check
  6. ip_queue_xmit
  7. ip_build_xmit
  8. ip_netlink_msg
  9. ip_rt_event
  10. ip_init

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              The Internet Protocol (IP) output module.
   7  *
   8  * Version:     @(#)ip.c        1.0.16b 9/1/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Donald Becker, <becker@super.org>
  13  *              Alan Cox, <Alan.Cox@linux.org>
  14  *              Richard Underwood
  15  *              Stefan Becker, <stefanb@yello.ping.de>
  16  *              Jorge Cwik, <jorge@laser.satlink.net>
  17  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  18  *
  19  *      See ip_input.c for original log
  20  *
  21  *      Fixes:
  22  *              Alan Cox        :       Missing nonblock feature in ip_build_xmit.
  23  *              Mike Kilburn    :       htons() missing in ip_build_xmit.
  24  */
  25 
  26 #include <asm/segment.h>
  27 #include <asm/system.h>
  28 #include <linux/types.h>
  29 #include <linux/kernel.h>
  30 #include <linux/sched.h>
  31 #include <linux/mm.h>
  32 #include <linux/string.h>
  33 #include <linux/errno.h>
  34 #include <linux/config.h>
  35 
  36 #include <linux/socket.h>
  37 #include <linux/sockios.h>
  38 #include <linux/in.h>
  39 #include <linux/inet.h>
  40 #include <linux/netdevice.h>
  41 #include <linux/etherdevice.h>
  42 #include <linux/proc_fs.h>
  43 #include <linux/stat.h>
  44 
  45 #include <net/snmp.h>
  46 #include <net/ip.h>
  47 #include <net/protocol.h>
  48 #include <net/route.h>
  49 #include <net/tcp.h>
  50 #include <net/udp.h>
  51 #include <linux/skbuff.h>
  52 #include <net/sock.h>
  53 #include <net/arp.h>
  54 #include <net/icmp.h>
  55 #include <net/raw.h>
  56 #include <net/checksum.h>
  57 #include <linux/igmp.h>
  58 #include <linux/ip_fw.h>
  59 #include <linux/firewall.h>
  60 #include <linux/mroute.h>
  61 #include <net/netlink.h>
  62 
  63 /*
  64  *      Loop a packet back to the sender.
  65  */
  66  
  67 static void ip_loopback(struct device *old_dev, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
  68 {
  69         struct device *dev=&loopback_dev;
  70         int len=ntohs(skb->ip_hdr->tot_len);
  71         struct sk_buff *newskb=dev_alloc_skb(len+dev->hard_header_len+15);
  72         
  73         if(newskb==NULL)
  74                 return;
  75                 
  76         newskb->link3=NULL;
  77         newskb->sk=NULL;
  78         newskb->dev=dev;
  79         newskb->saddr=skb->saddr;
  80         newskb->daddr=skb->daddr;
  81         newskb->raddr=skb->raddr;
  82         newskb->free=1;
  83         newskb->lock=0;
  84         newskb->users=0;
  85         newskb->pkt_type=skb->pkt_type;
  86         
  87         /*
  88          *      Put a MAC header on the packet
  89          */
  90         ip_send(NULL,newskb, skb->ip_hdr->daddr, len, dev, skb->ip_hdr->saddr);
  91         /*
  92          *      Add the rest of the data space. 
  93          */
  94         newskb->ip_hdr=(struct iphdr *)skb_put(newskb, len);
  95         memcpy(newskb->proto_priv, skb->proto_priv, sizeof(skb->proto_priv));
  96 
  97         /*
  98          *      Copy the data
  99          */
 100         memcpy(newskb->ip_hdr,skb->ip_hdr,len);
 101 
 102         /* Recurse. The device check against IFF_LOOPBACK will stop infinite recursion */
 103                 
 104         /*printk("Loopback output queued [%lX to %lX].\n", newskb->ip_hdr->saddr,newskb->ip_hdr->daddr);*/
 105         ip_queue_xmit(NULL, dev, newskb, 1);
 106 }
 107 
 108 
 109 
 110 /*
 111  *      Take an skb, and fill in the MAC header.
 112  */
 113 
 114 int ip_send(struct rtable * rt, struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
     /* [previous][next][first][last][top][bottom][index][help] */
 115 {
 116         int mac = 0;
 117 
 118         skb->dev = dev;
 119         skb->arp = 1;
 120         skb->protocol = htons(ETH_P_IP);
 121         if (dev->hard_header)
 122         {
 123                 /*
 124                  *      Build a hardware header. Source address is our mac, destination unknown
 125                  *      (rebuild header will sort this out)
 126                  */
 127                 skb_reserve(skb,(dev->hard_header_len+15)&~15); /* 16 byte aligned IP headers are good */
 128                 if (rt && dev == rt->rt_dev && rt->rt_hh)
 129                 {
 130                         memcpy(skb_push(skb,dev->hard_header_len),rt->rt_hh->hh_data,dev->hard_header_len);
 131                         if (rt->rt_hh->hh_uptodate)
 132                                 return dev->hard_header_len;
 133 #if RT_CACHE_DEBUG >= 2
 134                         printk("ip_send: hh miss %08x via %08x\n", daddr, rt->rt_gateway);
 135 #endif
 136                         skb->arp = 0;
 137                         skb->raddr = daddr;
 138                         return -dev->hard_header_len;
 139                 }
 140                 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
 141                 if (mac < 0)
 142                 {
 143                         mac = -mac;
 144                         skb->arp = 0;
 145                         skb->raddr = daddr;     /* next routing address */
 146                 }
 147         }
 148         return mac;
 149 }
 150 
 151 static int ip_send_room(struct rtable * rt, struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
     /* [previous][next][first][last][top][bottom][index][help] */
 152 {
 153         int mac = 0;
 154 
 155         skb->dev = dev;
 156         skb->arp = 1;
 157         skb->protocol = htons(ETH_P_IP);
 158         if (dev->hard_header)
 159         {
 160                 skb_reserve(skb,MAX_HEADER);
 161                 if (rt && dev == rt->rt_dev && rt->rt_hh)
 162                 {
 163                         memcpy(skb_push(skb,dev->hard_header_len),rt->rt_hh->hh_data,dev->hard_header_len);
 164                         if (rt->rt_hh->hh_uptodate)
 165                                 return dev->hard_header_len;
 166 #if RT_CACHE_DEBUG >= 2
 167                         printk("ip_send_room: hh miss %08x via %08x\n", daddr, rt->rt_gateway);
 168 #endif
 169                         skb->arp = 0;
 170                         skb->raddr = daddr;
 171                         return -dev->hard_header_len;
 172                 }
 173                 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
 174                 if (mac < 0)
 175                 {
 176                         mac = -mac;
 177                         skb->arp = 0;
 178                         skb->raddr = daddr;     /* next routing address */
 179                 }
 180         }
 181         return mac;
 182 }
 183 
 184 int ip_id_count = 0;
 185 
 186 /*
 187  * This routine builds the appropriate hardware/IP headers for
 188  * the routine.  It assumes that if *dev != NULL then the
 189  * protocol knows what it's doing, otherwise it uses the
 190  * routing/ARP tables to select a device struct.
 191  */
 192 int ip_build_header(struct sk_buff *skb, __u32 saddr, __u32 daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 193                 struct device **dev, int type, struct options *opt,
 194                 int len, int tos, int ttl, struct rtable ** rp)
 195 {
 196         struct rtable *rt;
 197         __u32 raddr;
 198         int tmp;
 199         struct iphdr *iph;
 200         __u32 final_daddr = daddr;
 201 
 202 
 203         if (opt && opt->srr)
 204                 daddr = opt->faddr;
 205 
 206         /*
 207          *      See if we need to look up the device.
 208          */
 209 
 210 #ifdef CONFIG_IP_MULTICAST      
 211         if(MULTICAST(daddr) && *dev==NULL && skb->sk && *skb->sk->ip_mc_name)
 212                 *dev=dev_get(skb->sk->ip_mc_name);
 213 #endif
 214         if (rp)
 215         {
 216                 rt = ip_check_route(rp, daddr, skb->localroute);
 217                 /*
 218                  * If rp != NULL rt_put following below should not
 219                  * release route, so that...
 220                  */
 221                 if (rt)
 222                         ATOMIC_INCR(&rt->rt_refcnt);
 223         }
 224         else
 225                 rt = ip_rt_route(daddr, skb->localroute);
 226 
 227 
 228         if (*dev == NULL)
 229         {
 230                 if (rt == NULL)
 231                 {
 232                         ip_statistics.IpOutNoRoutes++;
 233                         return(-ENETUNREACH);
 234                 }
 235 
 236                 *dev = rt->rt_dev;
 237         }
 238 
 239         if ((LOOPBACK(saddr) && !LOOPBACK(daddr)) || !saddr)
 240                 saddr = rt ? rt->rt_src : (*dev)->pa_addr;
 241 
 242         raddr = rt ? rt->rt_gateway : daddr;
 243 
 244         if (opt && opt->is_strictroute && rt && (rt->rt_flags & RTF_GATEWAY))
 245         {
 246                 ip_rt_put(rt);
 247                 ip_statistics.IpOutNoRoutes++;
 248                 return -ENETUNREACH;
 249         }
 250 
 251         /*
 252          *      Now build the MAC header.
 253          */
 254 
 255         if (type==IPPROTO_TCP)
 256                 tmp = ip_send_room(rt, skb, raddr, len, *dev, saddr);
 257         else
 258                 tmp = ip_send(rt, skb, raddr, len, *dev, saddr);
 259 
 260         ip_rt_put(rt);
 261 
 262         /*
 263          *      Book keeping
 264          */
 265 
 266         skb->dev = *dev;
 267         skb->saddr = saddr;
 268 
 269         /*
 270          *      Now build the IP header.
 271          */
 272 
 273         /*
 274          *      If we are using IPPROTO_RAW, then we don't need an IP header, since
 275          *      one is being supplied to us by the user
 276          */
 277 
 278         if(type == IPPROTO_RAW)
 279                 return (tmp);
 280 
 281         /*
 282          *      Build the IP addresses
 283          */
 284          
 285         if (opt)
 286                 iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr) + opt->optlen);
 287         else
 288                 iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr));
 289 
 290         iph->version  = 4;
 291         iph->ihl      = 5;
 292         iph->tos      = tos;
 293         iph->frag_off = 0;
 294         iph->ttl      = ttl;
 295         iph->daddr    = daddr;
 296         iph->saddr    = saddr;
 297         iph->protocol = type;
 298         skb->ip_hdr   = iph;
 299 
 300         if (!opt || !opt->optlen)
 301                 return sizeof(struct iphdr) + tmp;
 302         iph->ihl += opt->optlen>>2;
 303         ip_options_build(skb, opt, final_daddr, (*dev)->pa_addr, 0);
 304         return iph->ihl*4 + tmp;
 305 }
 306 
 307 
 308 /*
 309  *      Generate a checksum for an outgoing IP datagram.
 310  */
 311 
 312 void ip_send_check(struct iphdr *iph)
     /* [previous][next][first][last][top][bottom][index][help] */
 313 {
 314         iph->check = 0;
 315         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 316 }
 317 
 318 /*
 319  * Queues a packet to be sent, and starts the transmitter
 320  * if necessary.  if free = 1 then we free the block after
 321  * transmit, otherwise we don't. If free==2 we not only
 322  * free the block but also don't assign a new ip seq number.
 323  * This routine also needs to put in the total length,
 324  * and compute the checksum
 325  */
 326 
 327 void ip_queue_xmit(struct sock *sk, struct device *dev,
     /* [previous][next][first][last][top][bottom][index][help] */
 328               struct sk_buff *skb, int free)
 329 {
 330         struct iphdr *iph;
 331 /*      unsigned char *ptr;*/
 332 
 333         /* Sanity check */
 334         if (dev == NULL)
 335         {
 336                 NETDEBUG(printk("IP: ip_queue_xmit dev = NULL\n"));
 337                 return;
 338         }
 339 
 340         IS_SKB(skb);
 341 
 342         /*
 343          *      Do some book-keeping in the packet for later
 344          */
 345 
 346 
 347         skb->dev = dev;
 348         skb->when = jiffies;
 349 
 350         /*
 351          *      Find the IP header and set the length. This is bad
 352          *      but once we get the skb data handling code in the
 353          *      hardware will push its header sensibly and we will
 354          *      set skb->ip_hdr to avoid this mess and the fixed
 355          *      header length problem
 356          */
 357 
 358         iph = skb->ip_hdr;
 359         iph->tot_len = htons(skb->len-(((unsigned char *)iph)-skb->data));
 360 
 361 #ifdef CONFIG_FIREWALL
 362         if(call_out_firewall(PF_INET, skb, iph) < FW_ACCEPT)
 363                 /* just don't send this packet */
 364                 return;
 365 #endif  
 366 
 367         /*
 368          *      No reassigning numbers to fragments...
 369          */
 370 
 371         if(free!=2)
 372                 iph->id      = htons(ip_id_count++);
 373         else
 374                 free=1;
 375 
 376         /* All buffers without an owner socket get freed */
 377         if (sk == NULL)
 378                 free = 1;
 379 
 380         skb->free = free;
 381 
 382         /*
 383          *      Do we need to fragment. Again this is inefficient.
 384          *      We need to somehow lock the original buffer and use
 385          *      bits of it.
 386          */
 387 
 388         if(ntohs(iph->tot_len)> dev->mtu)
 389         {
 390                 ip_fragment(sk,skb,dev,0);
 391                 IS_SKB(skb);
 392                 kfree_skb(skb,FREE_WRITE);
 393                 return;
 394         }
 395 
 396         /*
 397          *      Add an IP checksum
 398          */
 399 
 400         ip_send_check(iph);
 401 
 402         /*
 403          *      Print the frame when debugging
 404          */
 405 
 406         /*
 407          *      More debugging. You cannot queue a packet already on a list
 408          *      Spot this and moan loudly.
 409          */
 410         if (skb->next != NULL)
 411         {
 412                 NETDEBUG(printk("ip_queue_xmit: next != NULL\n"));
 413                 skb_unlink(skb);
 414         }
 415 
 416         /*
 417          *      If a sender wishes the packet to remain unfreed
 418          *      we add it to his send queue. This arguably belongs
 419          *      in the TCP level since nobody else uses it. BUT
 420          *      remember IPng might change all the rules.
 421          */
 422 
 423         if (!free)
 424         {
 425                 unsigned long flags;
 426                 /* The socket now has more outstanding blocks */
 427 
 428                 sk->packets_out++;
 429 
 430                 /* Protect the list for a moment */
 431                 save_flags(flags);
 432                 cli();
 433 
 434                 if (skb->link3 != NULL)
 435                 {
 436                         NETDEBUG(printk("ip.c: link3 != NULL\n"));
 437                         skb->link3 = NULL;
 438                 }
 439                 if (sk->send_head == NULL)
 440                 {
 441                         sk->send_tail = skb;
 442                         sk->send_head = skb;
 443                 }
 444                 else
 445                 {
 446                         sk->send_tail->link3 = skb;
 447                         sk->send_tail = skb;
 448                 }
 449                 /* skb->link3 is NULL */
 450 
 451                 /* Interrupt restore */
 452                 restore_flags(flags);
 453         }
 454         else
 455                 /* Remember who owns the buffer */
 456                 skb->sk = sk;
 457 
 458         /*
 459          *      If the indicated interface is up and running, send the packet.
 460          */
 461          
 462         ip_statistics.IpOutRequests++;
 463 #ifdef CONFIG_IP_ACCT
 464         ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
 465 #endif  
 466         
 467 #ifdef CONFIG_IP_MULTICAST      
 468 
 469         /*
 470          *      Multicasts are looped back for other local users
 471          */
 472          
 473         if (MULTICAST(iph->daddr) && !(dev->flags&IFF_LOOPBACK))
 474         {
 475                 if(sk==NULL || sk->ip_mc_loop)
 476                 {
 477                         if(iph->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
 478                         {
 479                                 ip_loopback(dev,skb);
 480                         }
 481                         else
 482                         {
 483                                 struct ip_mc_list *imc=dev->ip_mc_list;
 484                                 while(imc!=NULL)
 485                                 {
 486                                         if(imc->multiaddr==iph->daddr)
 487                                         {
 488                                                 ip_loopback(dev,skb);
 489                                                 break;
 490                                         }
 491                                         imc=imc->next;
 492                                 }
 493                         }
 494                 }
 495                 /* Multicasts with ttl 0 must not go beyond the host */
 496                 
 497                 if(skb->ip_hdr->ttl==0)
 498                 {
 499                         kfree_skb(skb, FREE_READ);
 500                         return;
 501                 }
 502         }
 503 #endif
 504         if((dev->flags&IFF_BROADCAST) && (iph->daddr==dev->pa_brdaddr||iph->daddr==0xFFFFFFFF) && !(dev->flags&IFF_LOOPBACK))
 505                 ip_loopback(dev,skb);
 506                 
 507         if (dev->flags & IFF_UP)
 508         {
 509                 /*
 510                  *      If we have an owner use its priority setting,
 511                  *      otherwise use NORMAL
 512                  */
 513 
 514                 if (sk != NULL)
 515                 {
 516                         dev_queue_xmit(skb, dev, sk->priority);
 517                 }
 518                 else
 519                 {
 520                         dev_queue_xmit(skb, dev, SOPRI_NORMAL);
 521                 }
 522         }
 523         else
 524         {
 525                 if(sk)
 526                         sk->err = ENETDOWN;
 527                 ip_statistics.IpOutDiscards++;
 528                 if (free)
 529                         kfree_skb(skb, FREE_WRITE);
 530         }
 531 }
 532 
 533 
 534 /*
 535  *      Build and send a packet, with as little as one copy
 536  *
 537  *      Doesn't care much about ip options... option length can be
 538  *      different for fragment at 0 and other fragments.
 539  *
 540  *      Note that the fragment at the highest offset is sent first,
 541  *      so the getfrag routine can fill in the TCP/UDP checksum header
 542  *      field in the last fragment it sends... actually it also helps
 543  *      the reassemblers, they can put most packets in at the head of
 544  *      the fragment queue, and they know the total size in advance. This
 545  *      last feature will measurable improve the Linux fragment handler.
 546  *
 547  *      The callback has five args, an arbitrary pointer (copy of frag),
 548  *      the source IP address (may depend on the routing table), the 
 549  *      destination adddress (char *), the offset to copy from, and the
 550  *      length to be copied.
 551  * 
 552  */
 553 
 554 int ip_build_xmit(struct sock *sk,
     /* [previous][next][first][last][top][bottom][index][help] */
 555                    void getfrag (const void *,
 556                                  __u32,
 557                                  char *,
 558                                  unsigned int,  
 559                                  unsigned int),
 560                    const void *frag,
 561                    unsigned short int length,
 562                    __u32 daddr,
 563                    __u32 user_saddr,
 564                    struct options * opt,
 565                    int flags,
 566                    int type,
 567                    int noblock) 
 568 {
 569         struct rtable *rt;
 570         unsigned int fraglen, maxfraglen, fragheaderlen;
 571         int offset, mf;
 572         __u32 saddr;
 573         unsigned short id;
 574         struct iphdr *iph;
 575         __u32 raddr;
 576         struct device *dev = NULL;
 577         struct hh_cache * hh=NULL;
 578         int nfrags=0;
 579         __u32 true_daddr = daddr;
 580 
 581         if (opt && opt->srr && !sk->ip_hdrincl)
 582           daddr = opt->faddr;
 583         
 584         ip_statistics.IpOutRequests++;
 585 
 586 #ifdef CONFIG_IP_MULTICAST      
 587         if(sk && MULTICAST(daddr) && *sk->ip_mc_name)
 588         {
 589                 dev=dev_get(sk->ip_mc_name);
 590                 if(!dev)
 591                         return -ENODEV;
 592                 rt=NULL;
 593                 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
 594                         saddr = sk->saddr;
 595                 else
 596                         saddr = dev->pa_addr;
 597         }
 598         else
 599         {
 600 #endif  
 601                 rt = ip_check_route(&sk->ip_route_cache, daddr,
 602                                     sk->localroute || (flags&MSG_DONTROUTE) ||
 603                                     (opt && opt->is_strictroute));
 604                 if (rt == NULL) 
 605                 {
 606                         ip_statistics.IpOutNoRoutes++;
 607                         return(-ENETUNREACH);
 608                 }
 609                 saddr = rt->rt_src;
 610 
 611                 hh = rt->rt_hh;
 612         
 613                 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
 614                         saddr = sk->saddr;
 615                         
 616                 dev=rt->rt_dev;
 617 #ifdef CONFIG_IP_MULTICAST
 618         }
 619         if (rt && !dev)
 620                 dev = rt->rt_dev;
 621 #endif          
 622         if (user_saddr)
 623                 saddr = user_saddr;
 624 
 625         raddr = rt ? rt->rt_gateway : daddr;
 626         /*
 627          *      Now compute the buffer space we require
 628          */ 
 629          
 630         /*
 631          *      Try the simple case first. This leaves broadcast, multicast, fragmented frames, and by
 632          *      choice RAW frames within 20 bytes of maximum size(rare) to the long path
 633          */
 634 
 635         length += sizeof(struct iphdr);
 636         if (!sk->ip_hdrincl && opt) 
 637                 length += opt->optlen;
 638 
 639         if(length <= dev->mtu && !MULTICAST(daddr) && daddr!=0xFFFFFFFF && daddr!=dev->pa_brdaddr)
 640         {       
 641                 int error;
 642                 struct sk_buff *skb=sock_alloc_send_skb(sk, length+15+dev->hard_header_len,0, noblock, &error);
 643                 if(skb==NULL)
 644                 {
 645                         ip_statistics.IpOutDiscards++;
 646                         return error;
 647                 }
 648                 skb->dev=dev;
 649                 skb->protocol = htons(ETH_P_IP);
 650                 skb->free=1;
 651                 skb->when=jiffies;
 652                 skb->sk=sk;
 653                 skb->arp=0;
 654                 skb->saddr=saddr;
 655                 skb->raddr = raddr;
 656                 skb_reserve(skb,(dev->hard_header_len+15)&~15);
 657                 if (hh)
 658                 {
 659                         skb->arp=1;
 660                         memcpy(skb_push(skb,dev->hard_header_len),hh->hh_data,dev->hard_header_len);
 661                         if (!hh->hh_uptodate)
 662                         {
 663                                 skb->arp = 0;
 664 #if RT_CACHE_DEBUG >= 2
 665                                 printk("ip_build_xmit: hh miss %08x via %08x\n", rt->rt_dst, rt->rt_gateway);
 666 #endif                          
 667                         }
 668                 }
 669                 else if(dev->hard_header)
 670                 {
 671                         if(dev->hard_header(skb,dev,ETH_P_IP,NULL,NULL,0)>0)
 672                                 skb->arp=1;
 673                 }
 674                 else
 675                         skb->arp=1;
 676                 skb->ip_hdr=iph=(struct iphdr *)skb_put(skb,length);
 677                 dev_lock_list();
 678                 if(!sk->ip_hdrincl)
 679                 {
 680                         iph->version=4;
 681                         iph->ihl=5;
 682                         iph->tos=sk->ip_tos;
 683                         iph->tot_len = htons(length);
 684                         iph->id=htons(ip_id_count++);
 685                         iph->frag_off = 0;
 686                         iph->ttl=sk->ip_ttl;
 687                         iph->protocol=type;
 688                         iph->saddr=saddr;
 689                         iph->daddr=daddr;
 690                         if (opt) 
 691                         {
 692                                 iph->ihl += opt->optlen>>2;
 693                                 ip_options_build(skb, opt,
 694                                                  true_daddr, dev->pa_addr, 0);
 695                         }
 696                         iph->check=0;
 697                         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 698                         getfrag(frag,saddr,((char *)iph)+iph->ihl*4,0, length-iph->ihl*4);
 699                 }
 700                 else
 701                         getfrag(frag,saddr,(void *)iph,0,length-20);
 702                 dev_unlock_list();
 703 #ifdef CONFIG_FIREWALL
 704                 if(call_out_firewall(PF_INET, skb, iph)< FW_ACCEPT)
 705                 {
 706                         kfree_skb(skb, FREE_WRITE);
 707                         return -EPERM;
 708                 }
 709 #endif
 710 #ifdef CONFIG_IP_ACCT
 711                 ip_fw_chk(iph,dev,ip_acct_chain, IP_FW_F_ACCEPT,1);
 712 #endif          
 713                 if(dev->flags&IFF_UP)
 714                         dev_queue_xmit(skb,dev,sk->priority);
 715                 else
 716                 {
 717                         ip_statistics.IpOutDiscards++;
 718                         kfree_skb(skb, FREE_WRITE);
 719                 }
 720                 return 0;
 721         }
 722         length -= sizeof(struct iphdr);
 723         if (sk && !sk->ip_hdrincl && opt) 
 724         {
 725                 length -= opt->optlen;
 726                 fragheaderlen = dev->hard_header_len + sizeof(struct iphdr) + opt->optlen;
 727                 maxfraglen = ((dev->mtu-sizeof(struct iphdr)-opt->optlen) & ~7) + fragheaderlen;
 728         }
 729         else 
 730         {
 731                 fragheaderlen = dev->hard_header_len;
 732                 if(!sk->ip_hdrincl)
 733                         fragheaderlen += 20;
 734                 
 735                 /*
 736                  *      Fragheaderlen is the size of 'overhead' on each buffer. Now work
 737                  *      out the size of the frames to send.
 738                  */
 739          
 740                 maxfraglen = ((dev->mtu-20) & ~7) + fragheaderlen;
 741         }
 742         
 743         /*
 744          *      Start at the end of the frame by handling the remainder.
 745          */
 746          
 747         offset = length - (length % (maxfraglen - fragheaderlen));
 748         
 749         /*
 750          *      Amount of memory to allocate for final fragment.
 751          */
 752          
 753         fraglen = length - offset + fragheaderlen;
 754         
 755         if(length-offset==0)
 756         {
 757                 fraglen = maxfraglen;
 758                 offset -= maxfraglen-fragheaderlen;
 759         }
 760         
 761         
 762         /*
 763          *      The last fragment will not have MF (more fragments) set.
 764          */
 765          
 766         mf = 0;
 767 
 768         /*
 769          *      Can't fragment raw packets 
 770          */
 771          
 772         if (sk->ip_hdrincl && offset > 0)
 773                 return(-EMSGSIZE);
 774 
 775         /*
 776          *      Lock the device lists.
 777          */
 778 
 779         dev_lock_list();
 780         
 781         /*
 782          *      Get an identifier
 783          */
 784          
 785         id = htons(ip_id_count++);
 786 
 787         /*
 788          *      Being outputting the bytes.
 789          */
 790          
 791         do 
 792         {
 793                 struct sk_buff * skb;
 794                 int error;
 795                 char *data;
 796 
 797                 /*
 798                  *      Get the memory we require with some space left for alignment.
 799                  */
 800 
 801                 skb = sock_alloc_send_skb(sk, fraglen+15, 0, noblock, &error);
 802                 if (skb == NULL)
 803                 {
 804                         ip_statistics.IpOutDiscards++;
 805                         if(nfrags>1)
 806                                 ip_statistics.IpFragCreates++;                  
 807                         dev_unlock_list();
 808                         return(error);
 809                 }
 810                 
 811                 /*
 812                  *      Fill in the control structures
 813                  */
 814                  
 815                 skb->dev = dev;
 816                 skb->protocol = htons(ETH_P_IP);
 817                 skb->when = jiffies;
 818                 skb->free = 1; /* dubious, this one */
 819                 skb->sk = sk;
 820                 skb->arp = 0;
 821                 skb->saddr = saddr;
 822                 skb->raddr = raddr;
 823                 skb_reserve(skb,(dev->hard_header_len+15)&~15);
 824                 data = skb_put(skb, fraglen-dev->hard_header_len);
 825 
 826                 /*
 827                  *      Save us ARP and stuff. In the optimal case we do no route lookup (route cache ok)
 828                  *      no ARP lookup (arp cache ok) and output. The cache checks are still too slow but
 829                  *      this can be fixed later. For gateway routes we ought to have a rt->.. header cache
 830                  *      pointer to speed header cache builds for identical targets.
 831                  */
 832                  
 833                 if (hh)
 834                 {
 835                         skb->arp=1;
 836                         memcpy(skb_push(skb,dev->hard_header_len),hh->hh_data,dev->hard_header_len);
 837                         if (!hh->hh_uptodate)
 838                         {
 839                                 skb->arp = 0;
 840 #if RT_CACHE_DEBUG >= 2
 841                                 printk("ip_build_xmit: hh miss %08x via %08x\n", rt->rt_dst, rt->rt_gateway);
 842 #endif                          
 843                         }
 844                 }
 845                 else if (dev->hard_header)
 846                 {
 847                         if(dev->hard_header(skb, dev, ETH_P_IP, 
 848                                                 NULL, NULL, 0)>0)
 849                                 skb->arp=1;
 850                 }
 851                 
 852                 /*
 853                  *      Find where to start putting bytes.
 854                  */
 855                  
 856                 skb->ip_hdr = iph = (struct iphdr *)data;
 857 
 858                 /*
 859                  *      Only write IP header onto non-raw packets 
 860                  */
 861                  
 862                 if(!sk->ip_hdrincl) 
 863                 {
 864 
 865                         iph->version = 4;
 866                         iph->ihl = 5; /* ugh */
 867                         if (opt) {
 868                                 iph->ihl += opt->optlen>>2;
 869                                 ip_options_build(skb, opt,
 870                                                  true_daddr, dev->pa_addr, offset);
 871                         }
 872                         iph->tos = sk->ip_tos;
 873                         iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
 874                         iph->id = id;
 875                         iph->frag_off = htons(offset>>3);
 876                         iph->frag_off |= mf;
 877 #ifdef CONFIG_IP_MULTICAST
 878                         if (MULTICAST(daddr))
 879                                 iph->ttl = sk->ip_mc_ttl;
 880                         else
 881 #endif
 882                                 iph->ttl = sk->ip_ttl;
 883                         iph->protocol = type;
 884                         iph->check = 0;
 885                         iph->saddr = saddr;
 886                         iph->daddr = daddr;
 887                         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 888                         data += iph->ihl*4;
 889                         
 890                         /*
 891                          *      Any further fragments will have MF set.
 892                          */
 893                          
 894                         mf = htons(IP_MF);
 895                 }
 896                 
 897                 /*
 898                  *      User data callback
 899                  */
 900 
 901                 getfrag(frag, saddr, data, offset, fraglen-fragheaderlen);
 902                 
 903                 /*
 904                  *      Account for the fragment.
 905                  */
 906                  
 907 #ifdef CONFIG_FIREWALL
 908                 if(!offset && call_out_firewall(PF_INET, skb, iph) < FW_ACCEPT)
 909                 {
 910                         kfree_skb(skb, FREE_WRITE);
 911                         dev_unlock_list();
 912                         return -EPERM;
 913                 }
 914 #endif          
 915 #ifdef CONFIG_IP_ACCT
 916                 if(!offset)
 917                         ip_fw_chk(iph, dev, ip_acct_chain, IP_FW_F_ACCEPT, 1);
 918 #endif  
 919                 offset -= (maxfraglen-fragheaderlen);
 920                 fraglen = maxfraglen;
 921 
 922 #ifdef CONFIG_IP_MULTICAST
 923 
 924                 /*
 925                  *      Multicasts are looped back for other local users
 926                  */
 927          
 928                 if (MULTICAST(daddr) && !(dev->flags&IFF_LOOPBACK)) 
 929                 {
 930                         /*
 931                          *      Loop back any frames. The check for IGMP_ALL_HOSTS is because
 932                          *      you are always magically a member of this group.
 933                          *
 934                          *      Always loop back all host messages when running as a multicast router.
 935                          */
 936                          
 937                         if(sk==NULL || sk->ip_mc_loop)
 938                         {
 939                                 if(skb->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
 940                                         ip_loopback(dev,skb);
 941                                 else 
 942                                 {
 943                                         struct ip_mc_list *imc=dev->ip_mc_list;
 944                                         while(imc!=NULL) 
 945                                         {
 946                                                 if(imc->multiaddr==daddr) 
 947                                                 {
 948                                                         ip_loopback(dev,skb);
 949                                                         break;
 950                                                 }
 951                                                 imc=imc->next;
 952                                         }
 953                                 }
 954                         }
 955 
 956                         /*
 957                          *      Multicasts with ttl 0 must not go beyond the host. Fixme: avoid the
 958                          *      extra clone.
 959                          */
 960 
 961                         if(skb->ip_hdr->ttl==0)
 962                                 kfree_skb(skb, FREE_READ);
 963                 }
 964 #endif
 965 
 966                 nfrags++;
 967                 
 968                 /*
 969                  *      BSD loops broadcasts
 970                  */
 971                  
 972                 if((dev->flags&IFF_BROADCAST) && (daddr==0xFFFFFFFF || daddr==dev->pa_brdaddr) && !(dev->flags&IFF_LOOPBACK))
 973                         ip_loopback(dev,skb);
 974 
 975                 /*
 976                  *      Now queue the bytes into the device.
 977                  */
 978                  
 979                 if (dev->flags & IFF_UP) 
 980                 {
 981                         dev_queue_xmit(skb, dev, sk->priority);
 982                 } 
 983                 else 
 984                 {
 985                         /*
 986                          *      Whoops... 
 987                          */
 988                          
 989                         ip_statistics.IpOutDiscards++;
 990                         if(nfrags>1)
 991                                 ip_statistics.IpFragCreates+=nfrags;
 992                         kfree_skb(skb, FREE_WRITE);
 993                         dev_unlock_list();
 994                         /*
 995                          *      BSD behaviour.
 996                          */
 997                         if(sk!=NULL)
 998                                 sk->err=ENETDOWN;
 999                         return(0); /* lose rest of fragments */
1000                 }
1001         } 
1002         while (offset >= 0);
1003         if(nfrags>1)
1004                 ip_statistics.IpFragCreates+=nfrags;
1005         dev_unlock_list();
1006         return(0);
1007 }
1008     
1009 
1010 /*
1011  *      IP protocol layer initialiser
1012  */
1013 
1014 static struct packet_type ip_packet_type =
1015 {
1016         0,      /* MUTTER ntohs(ETH_P_IP),*/
1017         NULL,   /* All devices */
1018         ip_rcv,
1019         NULL,
1020         NULL,
1021 };
1022 
1023 #ifdef CONFIG_RTNETLINK
1024 
1025 /*
1026  *      Netlink hooks for IP
1027  */
1028  
1029 void ip_netlink_msg(unsigned long msg, __u32 daddr, __u32 gw, __u32 mask, short flags, short metric, char *name)
     /* [previous][next][first][last][top][bottom][index][help] */
1030 {
1031         struct sk_buff *skb=alloc_skb(sizeof(struct netlink_rtinfo), GFP_ATOMIC);
1032         struct netlink_rtinfo *nrt;
1033         struct sockaddr_in *s;
1034         if(skb==NULL)
1035                 return;
1036         skb->free=1;
1037         nrt=(struct netlink_rtinfo *)skb_put(skb, sizeof(struct netlink_rtinfo));
1038         nrt->rtmsg_type=msg;
1039         s=(struct sockaddr_in *)&nrt->rtmsg_dst;
1040         s->sin_family=AF_INET;
1041         s->sin_addr.s_addr=daddr;
1042         s=(struct sockaddr_in *)&nrt->rtmsg_gateway;
1043         s->sin_family=AF_INET;
1044         s->sin_addr.s_addr=gw;
1045         s=(struct sockaddr_in *)&nrt->rtmsg_genmask;
1046         s->sin_family=AF_INET;
1047         s->sin_addr.s_addr=mask;
1048         nrt->rtmsg_flags=flags;
1049         nrt->rtmsg_metric=metric;
1050         strcpy(nrt->rtmsg_device,name);
1051         netlink_post(NETLINK_ROUTE, skb);
1052 }       
1053 
1054 #endif
1055 
1056 /*
1057  *      Device notifier
1058  */
1059  
1060 static int ip_rt_event(struct notifier_block *this, unsigned long event, void *ptr)
     /* [previous][next][first][last][top][bottom][index][help] */
1061 {
1062         struct device *dev=ptr;
1063         if(event==NETDEV_DOWN)
1064         {
1065                 ip_netlink_msg(RTMSG_DELDEVICE, 0,0,0,0,0,dev->name);
1066                 ip_rt_flush(dev);
1067         }
1068 /*
1069  *      Join the intial group if multicast.
1070  */             
1071         if(event==NETDEV_UP)
1072         {
1073 #ifdef CONFIG_IP_MULTICAST      
1074                 ip_mc_allhost(dev);
1075 #endif          
1076                 ip_netlink_msg(RTMSG_NEWDEVICE, 0,0,0,0,0,dev->name);
1077         }
1078         return NOTIFY_DONE;
1079 }
1080 
1081 struct notifier_block ip_rt_notifier={
1082         ip_rt_event,
1083         NULL,
1084         0
1085 };
1086 
1087 /*
1088  *      IP registers the packet type and then calls the subprotocol initialisers
1089  */
1090 
1091 void ip_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1092 {
1093         ip_packet_type.type=htons(ETH_P_IP);
1094         dev_add_pack(&ip_packet_type);
1095 
1096         /* So we flush routes when a device is downed */        
1097         register_netdevice_notifier(&ip_rt_notifier);
1098 
1099 /*      ip_raw_init();
1100         ip_packet_init();
1101         ip_tcp_init();
1102         ip_udp_init();*/
1103 
1104 #ifdef CONFIG_IP_MULTICAST
1105         proc_net_register(&(struct proc_dir_entry) {
1106                 PROC_NET_IGMP, 4, "igmp",
1107                 S_IFREG | S_IRUGO, 1, 0, 0,
1108                 0, &proc_net_inode_operations,
1109                 ip_mc_procinfo
1110         });
1111 #endif
1112 }
1113 

/* [previous][next][first][last][top][bottom][index][help] */