root/net/ipv4/ip_output.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ip_loopback
  2. ip_send
  3. ip_send_room
  4. ip_build_header
  5. ip_send_check
  6. ip_queue_xmit
  7. ip_build_xmit
  8. ip_netlink_msg
  9. ip_rt_event
  10. ip_init

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              The Internet Protocol (IP) output module.
   7  *
   8  * Version:     @(#)ip.c        1.0.16b 9/1/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Donald Becker, <becker@super.org>
  13  *              Alan Cox, <Alan.Cox@linux.org>
  14  *              Richard Underwood
  15  *              Stefan Becker, <stefanb@yello.ping.de>
  16  *              Jorge Cwik, <jorge@laser.satlink.net>
  17  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  18  *
  19  *      See ip_input.c for original log
  20  *
  21  *      Fixes:
  22  *              Alan Cox        :       Missing nonblock feature in ip_build_xmit.
  23  */
  24 
  25 #include <asm/segment.h>
  26 #include <asm/system.h>
  27 #include <linux/types.h>
  28 #include <linux/kernel.h>
  29 #include <linux/sched.h>
  30 #include <linux/mm.h>
  31 #include <linux/string.h>
  32 #include <linux/errno.h>
  33 #include <linux/config.h>
  34 
  35 #include <linux/socket.h>
  36 #include <linux/sockios.h>
  37 #include <linux/in.h>
  38 #include <linux/inet.h>
  39 #include <linux/netdevice.h>
  40 #include <linux/etherdevice.h>
  41 #include <linux/proc_fs.h>
  42 #include <linux/stat.h>
  43 
  44 #include <net/snmp.h>
  45 #include <net/ip.h>
  46 #include <net/protocol.h>
  47 #include <net/route.h>
  48 #include <net/tcp.h>
  49 #include <net/udp.h>
  50 #include <linux/skbuff.h>
  51 #include <net/sock.h>
  52 #include <net/arp.h>
  53 #include <net/icmp.h>
  54 #include <net/raw.h>
  55 #include <net/checksum.h>
  56 #include <linux/igmp.h>
  57 #include <linux/ip_fw.h>
  58 #include <linux/firewall.h>
  59 #include <linux/mroute.h>
  60 #include <net/netlink.h>
  61 
  62 /*
  63  *      Loop a packet back to the sender.
  64  */
  65  
  66 static void ip_loopback(struct device *old_dev, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
  67 {
  68         struct device *dev=&loopback_dev;
  69         int len=ntohs(skb->ip_hdr->tot_len);
  70         struct sk_buff *newskb=dev_alloc_skb(len+dev->hard_header_len+15);
  71         
  72         if(newskb==NULL)
  73                 return;
  74                 
  75         newskb->link3=NULL;
  76         newskb->sk=NULL;
  77         newskb->dev=dev;
  78         newskb->saddr=skb->saddr;
  79         newskb->daddr=skb->daddr;
  80         newskb->raddr=skb->raddr;
  81         newskb->free=1;
  82         newskb->lock=0;
  83         newskb->users=0;
  84         newskb->pkt_type=skb->pkt_type;
  85         
  86         /*
  87          *      Put a MAC header on the packet
  88          */
  89         ip_send(NULL,newskb, skb->ip_hdr->daddr, len, dev, skb->ip_hdr->saddr);
  90         /*
  91          *      Add the rest of the data space. 
  92          */
  93         newskb->ip_hdr=(struct iphdr *)skb_put(newskb, len);
  94         memcpy(newskb->proto_priv, skb->proto_priv, sizeof(skb->proto_priv));
  95 
  96         /*
  97          *      Copy the data
  98          */
  99         memcpy(newskb->ip_hdr,skb->ip_hdr,len);
 100 
 101         /* Recurse. The device check against IFF_LOOPBACK will stop infinite recursion */
 102                 
 103         /*printk("Loopback output queued [%lX to %lX].\n", newskb->ip_hdr->saddr,newskb->ip_hdr->daddr);*/
 104         ip_queue_xmit(NULL, dev, newskb, 1);
 105 }
 106 
 107 
 108 
 109 /*
 110  *      Take an skb, and fill in the MAC header.
 111  */
 112 
 113 int ip_send(struct rtable * rt, struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
     /* [previous][next][first][last][top][bottom][index][help] */
 114 {
 115         int mac = 0;
 116 
 117         skb->dev = dev;
 118         skb->arp = 1;
 119         skb->protocol = htons(ETH_P_IP);
 120         if (dev->hard_header)
 121         {
 122                 /*
 123                  *      Build a hardware header. Source address is our mac, destination unknown
 124                  *      (rebuild header will sort this out)
 125                  */
 126                 skb_reserve(skb,(dev->hard_header_len+15)&~15); /* 16 byte aligned IP headers are good */
 127                 if (rt && dev == rt->rt_dev && rt->rt_hh)
 128                 {
 129                         memcpy(skb_push(skb,dev->hard_header_len),rt->rt_hh->hh_data,dev->hard_header_len);
 130                         if (rt->rt_hh->hh_uptodate)
 131                                 return dev->hard_header_len;
 132 #if RT_CACHE_DEBUG >= 2
 133                         printk("ip_send: hh miss %08x via %08x\n", daddr, rt->rt_gateway);
 134 #endif
 135                         skb->arp = 0;
 136                         skb->raddr = daddr;
 137                         return -dev->hard_header_len;
 138                 }
 139                 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
 140                 if (mac < 0)
 141                 {
 142                         mac = -mac;
 143                         skb->arp = 0;
 144                         skb->raddr = daddr;     /* next routing address */
 145                 }
 146         }
 147         return mac;
 148 }
 149 
 150 static int ip_send_room(struct rtable * rt, struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
     /* [previous][next][first][last][top][bottom][index][help] */
 151 {
 152         int mac = 0;
 153 
 154         skb->dev = dev;
 155         skb->arp = 1;
 156         skb->protocol = ETH_P_IP;
 157         if (dev->hard_header)
 158         {
 159                 skb_reserve(skb,MAX_HEADER);
 160                 if (rt && dev == rt->rt_dev && rt->rt_hh)
 161                 {
 162                         memcpy(skb_push(skb,dev->hard_header_len),rt->rt_hh->hh_data,dev->hard_header_len);
 163                         if (rt->rt_hh->hh_uptodate)
 164                                 return dev->hard_header_len;
 165 #if RT_CACHE_DEBUG >= 2
 166                         printk("ip_send_room: hh miss %08x via %08x\n", daddr, rt->rt_gateway);
 167 #endif
 168                         skb->arp = 0;
 169                         skb->raddr = daddr;
 170                         return -dev->hard_header_len;
 171                 }
 172                 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
 173                 if (mac < 0)
 174                 {
 175                         mac = -mac;
 176                         skb->arp = 0;
 177                         skb->raddr = daddr;     /* next routing address */
 178                 }
 179         }
 180         return mac;
 181 }
 182 
 183 int ip_id_count = 0;
 184 
 185 /*
 186  * This routine builds the appropriate hardware/IP headers for
 187  * the routine.  It assumes that if *dev != NULL then the
 188  * protocol knows what it's doing, otherwise it uses the
 189  * routing/ARP tables to select a device struct.
 190  */
 191 int ip_build_header(struct sk_buff *skb, __u32 saddr, __u32 daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 192                 struct device **dev, int type, struct options *opt,
 193                 int len, int tos, int ttl, struct rtable ** rp)
 194 {
 195         struct rtable *rt;
 196         __u32 raddr;
 197         int tmp;
 198         struct iphdr *iph;
 199         __u32 final_daddr = daddr;
 200 
 201 
 202         if (opt && opt->srr)
 203                 daddr = opt->faddr;
 204 
 205         /*
 206          *      See if we need to look up the device.
 207          */
 208 
 209 #ifdef CONFIG_IP_MULTICAST      
 210         if(MULTICAST(daddr) && *dev==NULL && skb->sk && *skb->sk->ip_mc_name)
 211                 *dev=dev_get(skb->sk->ip_mc_name);
 212 #endif
 213         if (rp)
 214         {
 215                 rt = ip_check_route(rp, daddr, skb->localroute);
 216                 /*
 217                  * If rp != NULL rt_put following below should not
 218                  * release route, so that...
 219                  */
 220                 if (rt)
 221                         ATOMIC_INCR(&rt->rt_refcnt);
 222         }
 223         else
 224                 rt = ip_rt_route(daddr, skb->localroute);
 225 
 226 
 227         if (*dev == NULL)
 228         {
 229                 if (rt == NULL)
 230                 {
 231                         ip_statistics.IpOutNoRoutes++;
 232                         return(-ENETUNREACH);
 233                 }
 234 
 235                 *dev = rt->rt_dev;
 236         }
 237 
 238         if ((LOOPBACK(saddr) && !LOOPBACK(daddr)) || !saddr)
 239                 saddr = rt ? rt->rt_src : (*dev)->pa_addr;
 240 
 241         raddr = rt ? rt->rt_gateway : 0;
 242 
 243         if (opt && opt->is_strictroute && rt && (rt->rt_flags & RTF_GATEWAY))
 244         {
 245                 ip_rt_put(rt);
 246                 ip_statistics.IpOutNoRoutes++;
 247                 return -ENETUNREACH;
 248         }
 249 
 250         /*
 251          *      No gateway so aim at the real destination
 252          */
 253 
 254         if (raddr == 0)
 255                 raddr = daddr;
 256 
 257         /*
 258          *      Now build the MAC header.
 259          */
 260 
 261         if (type==IPPROTO_TCP)
 262                 tmp = ip_send_room(rt, skb, raddr, len, *dev, saddr);
 263         else
 264                 tmp = ip_send(rt, skb, raddr, len, *dev, saddr);
 265 
 266         ip_rt_put(rt);
 267 
 268         /*
 269          *      Book keeping
 270          */
 271 
 272         skb->dev = *dev;
 273         skb->saddr = saddr;
 274 
 275         /*
 276          *      Now build the IP header.
 277          */
 278 
 279         /*
 280          *      If we are using IPPROTO_RAW, then we don't need an IP header, since
 281          *      one is being supplied to us by the user
 282          */
 283 
 284         if(type == IPPROTO_RAW)
 285                 return (tmp);
 286 
 287         /*
 288          *      Build the IP addresses
 289          */
 290          
 291         if (opt)
 292                 iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr) + opt->optlen);
 293         else
 294                 iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr));
 295 
 296         iph->version  = 4;
 297         iph->ihl      = 5;
 298         iph->tos      = tos;
 299         iph->frag_off = 0;
 300         iph->ttl      = ttl;
 301         iph->daddr    = daddr;
 302         iph->saddr    = saddr;
 303         iph->protocol = type;
 304         skb->ip_hdr   = iph;
 305 
 306         if (!opt || !opt->optlen)
 307                 return sizeof(struct iphdr) + tmp;
 308         iph->ihl += opt->optlen>>2;
 309         ip_options_build(skb, opt, final_daddr, (*dev)->pa_addr, 0);
 310         return iph->ihl*4 + tmp;
 311 }
 312 
 313 
 314 /*
 315  *      Generate a checksum for an outgoing IP datagram.
 316  */
 317 
 318 void ip_send_check(struct iphdr *iph)
     /* [previous][next][first][last][top][bottom][index][help] */
 319 {
 320         iph->check = 0;
 321         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 322 }
 323 
 324 /*
 325  * Queues a packet to be sent, and starts the transmitter
 326  * if necessary.  if free = 1 then we free the block after
 327  * transmit, otherwise we don't. If free==2 we not only
 328  * free the block but also don't assign a new ip seq number.
 329  * This routine also needs to put in the total length,
 330  * and compute the checksum
 331  */
 332 
 333 void ip_queue_xmit(struct sock *sk, struct device *dev,
     /* [previous][next][first][last][top][bottom][index][help] */
 334               struct sk_buff *skb, int free)
 335 {
 336         struct iphdr *iph;
 337 /*      unsigned char *ptr;*/
 338 
 339         /* Sanity check */
 340         if (dev == NULL)
 341         {
 342                 NETDEBUG(printk("IP: ip_queue_xmit dev = NULL\n"));
 343                 return;
 344         }
 345 
 346         IS_SKB(skb);
 347 
 348         /*
 349          *      Do some book-keeping in the packet for later
 350          */
 351 
 352 
 353         skb->dev = dev;
 354         skb->when = jiffies;
 355 
 356         /*
 357          *      Find the IP header and set the length. This is bad
 358          *      but once we get the skb data handling code in the
 359          *      hardware will push its header sensibly and we will
 360          *      set skb->ip_hdr to avoid this mess and the fixed
 361          *      header length problem
 362          */
 363 
 364         iph = skb->ip_hdr;
 365         iph->tot_len = ntohs(skb->len-(((unsigned char *)iph)-skb->data));
 366 
 367 #ifdef CONFIG_FIREWALL
 368         if(call_out_firewall(PF_INET, skb, iph) < FW_ACCEPT)
 369                 /* just don't send this packet */
 370                 return;
 371 #endif  
 372 
 373         /*
 374          *      No reassigning numbers to fragments...
 375          */
 376 
 377         if(free!=2)
 378                 iph->id      = htons(ip_id_count++);
 379         else
 380                 free=1;
 381 
 382         /* All buffers without an owner socket get freed */
 383         if (sk == NULL)
 384                 free = 1;
 385 
 386         skb->free = free;
 387 
 388         /*
 389          *      Do we need to fragment. Again this is inefficient.
 390          *      We need to somehow lock the original buffer and use
 391          *      bits of it.
 392          */
 393 
 394         if(ntohs(iph->tot_len)> dev->mtu)
 395         {
 396                 ip_fragment(sk,skb,dev,0);
 397                 IS_SKB(skb);
 398                 kfree_skb(skb,FREE_WRITE);
 399                 return;
 400         }
 401 
 402         /*
 403          *      Add an IP checksum
 404          */
 405 
 406         ip_send_check(iph);
 407 
 408         /*
 409          *      Print the frame when debugging
 410          */
 411 
 412         /*
 413          *      More debugging. You cannot queue a packet already on a list
 414          *      Spot this and moan loudly.
 415          */
 416         if (skb->next != NULL)
 417         {
 418                 NETDEBUG(printk("ip_queue_xmit: next != NULL\n"));
 419                 skb_unlink(skb);
 420         }
 421 
 422         /*
 423          *      If a sender wishes the packet to remain unfreed
 424          *      we add it to his send queue. This arguably belongs
 425          *      in the TCP level since nobody else uses it. BUT
 426          *      remember IPng might change all the rules.
 427          */
 428 
 429         if (!free)
 430         {
 431                 unsigned long flags;
 432                 /* The socket now has more outstanding blocks */
 433 
 434                 sk->packets_out++;
 435 
 436                 /* Protect the list for a moment */
 437                 save_flags(flags);
 438                 cli();
 439 
 440                 if (skb->link3 != NULL)
 441                 {
 442                         NETDEBUG(printk("ip.c: link3 != NULL\n"));
 443                         skb->link3 = NULL;
 444                 }
 445                 if (sk->send_head == NULL)
 446                 {
 447                         sk->send_tail = skb;
 448                         sk->send_head = skb;
 449                 }
 450                 else
 451                 {
 452                         sk->send_tail->link3 = skb;
 453                         sk->send_tail = skb;
 454                 }
 455                 /* skb->link3 is NULL */
 456 
 457                 /* Interrupt restore */
 458                 restore_flags(flags);
 459         }
 460         else
 461                 /* Remember who owns the buffer */
 462                 skb->sk = sk;
 463 
 464         /*
 465          *      If the indicated interface is up and running, send the packet.
 466          */
 467          
 468         ip_statistics.IpOutRequests++;
 469 #ifdef CONFIG_IP_ACCT
 470         ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
 471 #endif  
 472         
 473 #ifdef CONFIG_IP_MULTICAST      
 474 
 475         /*
 476          *      Multicasts are looped back for other local users
 477          */
 478          
 479         if (MULTICAST(iph->daddr) && !(dev->flags&IFF_LOOPBACK))
 480         {
 481                 if(sk==NULL || sk->ip_mc_loop)
 482                 {
 483                         if(iph->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
 484                         {
 485                                 ip_loopback(dev,skb);
 486                         }
 487                         else
 488                         {
 489                                 struct ip_mc_list *imc=dev->ip_mc_list;
 490                                 while(imc!=NULL)
 491                                 {
 492                                         if(imc->multiaddr==iph->daddr)
 493                                         {
 494                                                 ip_loopback(dev,skb);
 495                                                 break;
 496                                         }
 497                                         imc=imc->next;
 498                                 }
 499                         }
 500                 }
 501                 /* Multicasts with ttl 0 must not go beyond the host */
 502                 
 503                 if(skb->ip_hdr->ttl==0)
 504                 {
 505                         kfree_skb(skb, FREE_READ);
 506                         return;
 507                 }
 508         }
 509 #endif
 510         if((dev->flags&IFF_BROADCAST) && (iph->daddr==dev->pa_brdaddr||iph->daddr==0xFFFFFFFF) && !(dev->flags&IFF_LOOPBACK))
 511                 ip_loopback(dev,skb);
 512                 
 513         if (dev->flags & IFF_UP)
 514         {
 515                 /*
 516                  *      If we have an owner use its priority setting,
 517                  *      otherwise use NORMAL
 518                  */
 519 
 520                 if (sk != NULL)
 521                 {
 522                         dev_queue_xmit(skb, dev, sk->priority);
 523                 }
 524                 else
 525                 {
 526                         dev_queue_xmit(skb, dev, SOPRI_NORMAL);
 527                 }
 528         }
 529         else
 530         {
 531                 if(sk)
 532                         sk->err = ENETDOWN;
 533                 ip_statistics.IpOutDiscards++;
 534                 if (free)
 535                         kfree_skb(skb, FREE_WRITE);
 536         }
 537 }
 538 
 539 
 540 /*
 541  *      Build and send a packet, with as little as one copy
 542  *
 543  *      Doesn't care much about ip options... option length can be
 544  *      different for fragment at 0 and other fragments.
 545  *
 546  *      Note that the fragment at the highest offset is sent first,
 547  *      so the getfrag routine can fill in the TCP/UDP checksum header
 548  *      field in the last fragment it sends... actually it also helps
 549  *      the reassemblers, they can put most packets in at the head of
 550  *      the fragment queue, and they know the total size in advance. This
 551  *      last feature will measurable improve the Linux fragment handler.
 552  *
 553  *      The callback has five args, an arbitrary pointer (copy of frag),
 554  *      the source IP address (may depend on the routing table), the 
 555  *      destination adddress (char *), the offset to copy from, and the
 556  *      length to be copied.
 557  * 
 558  */
 559 
 560 int ip_build_xmit(struct sock *sk,
     /* [previous][next][first][last][top][bottom][index][help] */
 561                    void getfrag (const void *,
 562                                  __u32,
 563                                  char *,
 564                                  unsigned int,  
 565                                  unsigned int),
 566                    const void *frag,
 567                    unsigned short int length,
 568                    __u32 daddr,
 569                    __u32 user_saddr,
 570                    struct options * opt,
 571                    int flags,
 572                    int type,
 573                    int noblock) 
 574 {
 575         struct rtable *rt;
 576         unsigned int fraglen, maxfraglen, fragheaderlen;
 577         int offset, mf;
 578         __u32 saddr;
 579         unsigned short id;
 580         struct iphdr *iph;
 581         __u32 raddr;
 582         struct device *dev = NULL;
 583         struct hh_cache * hh=NULL;
 584         int nfrags=0;
 585         __u32 true_daddr = daddr;
 586 
 587         if (opt && opt->srr && !sk->ip_hdrincl)
 588           daddr = opt->faddr;
 589         
 590         ip_statistics.IpOutRequests++;
 591 
 592 #ifdef CONFIG_IP_MULTICAST      
 593         if(sk && MULTICAST(daddr) && *sk->ip_mc_name)
 594         {
 595                 dev=dev_get(sk->ip_mc_name);
 596                 if(!dev)
 597                         return -ENODEV;
 598                 rt=NULL;
 599                 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
 600                         saddr = sk->saddr;
 601                 else
 602                         saddr = dev->pa_addr;
 603         }
 604         else
 605         {
 606 #endif  
 607                 rt = ip_check_route(&sk->ip_route_cache, daddr,
 608                                     sk->localroute || (flags&MSG_DONTROUTE) ||
 609                                     (opt && opt->is_strictroute));
 610                 if (rt == NULL) 
 611                 {
 612                         ip_statistics.IpOutNoRoutes++;
 613                         return(-ENETUNREACH);
 614                 }
 615                 saddr = rt->rt_src;
 616 
 617                 hh = rt->rt_hh;
 618         
 619                 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
 620                         saddr = sk->saddr;
 621                         
 622                 dev=rt->rt_dev;
 623 #ifdef CONFIG_IP_MULTICAST
 624         }
 625         if (rt && !dev)
 626                 dev = rt->rt_dev;
 627 #endif          
 628         if (user_saddr)
 629                 saddr = user_saddr;
 630 
 631         raddr = rt ? rt->rt_gateway : daddr;
 632         /*
 633          *      Now compute the buffer space we require
 634          */ 
 635          
 636         /*
 637          *      Try the simple case first. This leaves broadcast, multicast, fragmented frames, and by
 638          *      choice RAW frames within 20 bytes of maximum size(rare) to the long path
 639          */
 640 
 641         length += sizeof(struct iphdr);
 642         if (!sk->ip_hdrincl && opt) 
 643                 length += opt->optlen;
 644 
 645         if(length <= dev->mtu && !MULTICAST(daddr) && daddr!=0xFFFFFFFF && daddr!=dev->pa_brdaddr)
 646         {       
 647                 int error;
 648                 struct sk_buff *skb=sock_alloc_send_skb(sk, length+15+dev->hard_header_len,0, noblock, &error);
 649                 if(skb==NULL)
 650                 {
 651                         ip_statistics.IpOutDiscards++;
 652                         return error;
 653                 }
 654                 skb->dev=dev;
 655                 skb->free=1;
 656                 skb->when=jiffies;
 657                 skb->sk=sk;
 658                 skb->arp=0;
 659                 skb->saddr=saddr;
 660                 skb->raddr = raddr;
 661                 skb_reserve(skb,(dev->hard_header_len+15)&~15);
 662                 if (hh)
 663                 {
 664                         skb->arp=1;
 665                         memcpy(skb_push(skb,dev->hard_header_len),hh->hh_data,dev->hard_header_len);
 666                         if (!hh->hh_uptodate)
 667                         {
 668                                 skb->arp = 0;
 669 #if RT_CACHE_DEBUG >= 2
 670                                 printk("ip_build_xmit: hh miss %08x via %08x\n", rt->rt_dst, rt->rt_gateway);
 671 #endif                          
 672                         }
 673                 }
 674                 else if(dev->hard_header)
 675                 {
 676                         if(dev->hard_header(skb,dev,ETH_P_IP,NULL,NULL,0)>0)
 677                                 skb->arp=1;
 678                 }
 679                 else
 680                         skb->arp=1;
 681                 skb->ip_hdr=iph=(struct iphdr *)skb_put(skb,length);
 682                 dev_lock_list();
 683                 if(!sk->ip_hdrincl)
 684                 {
 685                         iph->version=4;
 686                         iph->ihl=5;
 687                         iph->tos=sk->ip_tos;
 688                         iph->tot_len = htons(length);
 689                         iph->id=htons(ip_id_count++);
 690                         iph->frag_off = 0;
 691                         iph->ttl=sk->ip_ttl;
 692                         iph->protocol=type;
 693                         iph->saddr=saddr;
 694                         iph->daddr=daddr;
 695                         if (opt) 
 696                         {
 697                                 iph->ihl += opt->optlen>>2;
 698                                 ip_options_build(skb, opt,
 699                                                  true_daddr, dev->pa_addr, 0);
 700                         }
 701                         iph->check=0;
 702                         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 703                         getfrag(frag,saddr,((char *)iph)+iph->ihl*4,0, length-iph->ihl*4);
 704                 }
 705                 else
 706                         getfrag(frag,saddr,(void *)iph,0,length-20);
 707                 dev_unlock_list();
 708 #ifdef CONFIG_FIREWALL
 709                 if(call_out_firewall(PF_INET, skb, iph)< FW_ACCEPT)
 710                 {
 711                         kfree_skb(skb, FREE_WRITE);
 712                         return -EPERM;
 713                 }
 714 #endif
 715 #ifdef CONFIG_IP_ACCT
 716                 ip_fw_chk((void *)skb->data,dev,ip_acct_chain, IP_FW_F_ACCEPT,1);
 717 #endif          
 718                 if(dev->flags&IFF_UP)
 719                         dev_queue_xmit(skb,dev,sk->priority);
 720                 else
 721                 {
 722                         ip_statistics.IpOutDiscards++;
 723                         kfree_skb(skb, FREE_WRITE);
 724                 }
 725                 return 0;
 726         }
 727         length -= sizeof(struct iphdr);
 728         if (sk && !sk->ip_hdrincl && opt) 
 729         {
 730                 length -= opt->optlen;
 731                 fragheaderlen = dev->hard_header_len + sizeof(struct iphdr) + opt->optlen;
 732                 maxfraglen = ((dev->mtu-sizeof(struct iphdr)-opt->optlen) & ~7) + fragheaderlen;
 733         }
 734         else 
 735         {
 736                 fragheaderlen = dev->hard_header_len;
 737                 if(!sk->ip_hdrincl)
 738                         fragheaderlen += 20;
 739                 
 740                 /*
 741                  *      Fragheaderlen is the size of 'overhead' on each buffer. Now work
 742                  *      out the size of the frames to send.
 743                  */
 744          
 745                 maxfraglen = ((dev->mtu-20) & ~7) + fragheaderlen;
 746         }
 747         
 748         /*
 749          *      Start at the end of the frame by handling the remainder.
 750          */
 751          
 752         offset = length - (length % (maxfraglen - fragheaderlen));
 753         
 754         /*
 755          *      Amount of memory to allocate for final fragment.
 756          */
 757          
 758         fraglen = length - offset + fragheaderlen;
 759         
 760         if(length-offset==0)
 761         {
 762                 fraglen = maxfraglen;
 763                 offset -= maxfraglen-fragheaderlen;
 764         }
 765         
 766         
 767         /*
 768          *      The last fragment will not have MF (more fragments) set.
 769          */
 770          
 771         mf = 0;
 772 
 773         /*
 774          *      Can't fragment raw packets 
 775          */
 776          
 777         if (sk->ip_hdrincl && offset > 0)
 778                 return(-EMSGSIZE);
 779 
 780         /*
 781          *      Lock the device lists.
 782          */
 783 
 784         dev_lock_list();
 785         
 786         /*
 787          *      Get an identifier
 788          */
 789          
 790         id = htons(ip_id_count++);
 791 
 792         /*
 793          *      Being outputting the bytes.
 794          */
 795          
 796         do 
 797         {
 798                 struct sk_buff * skb;
 799                 int error;
 800                 char *data;
 801 
 802                 /*
 803                  *      Get the memory we require with some space left for alignment.
 804                  */
 805 
 806                 skb = sock_alloc_send_skb(sk, fraglen+15, 0, noblock, &error);
 807                 if (skb == NULL)
 808                 {
 809                         ip_statistics.IpOutDiscards++;
 810                         if(nfrags>1)
 811                                 ip_statistics.IpFragCreates++;                  
 812                         dev_unlock_list();
 813                         return(error);
 814                 }
 815                 
 816                 /*
 817                  *      Fill in the control structures
 818                  */
 819                  
 820                 skb->next = skb->prev = NULL;
 821                 skb->dev = dev;
 822                 skb->when = jiffies;
 823                 skb->free = 1; /* dubious, this one */
 824                 skb->sk = sk;
 825                 skb->arp = 0;
 826                 skb->saddr = saddr;
 827                 skb->raddr = raddr;
 828                 skb_reserve(skb,(dev->hard_header_len+15)&~15);
 829                 data = skb_put(skb, fraglen-dev->hard_header_len);
 830 
 831                 /*
 832                  *      Save us ARP and stuff. In the optimal case we do no route lookup (route cache ok)
 833                  *      no ARP lookup (arp cache ok) and output. The cache checks are still too slow but
 834                  *      this can be fixed later. For gateway routes we ought to have a rt->.. header cache
 835                  *      pointer to speed header cache builds for identical targets.
 836                  */
 837                  
 838                 if (hh)
 839                 {
 840                         skb->arp=1;
 841                         memcpy(skb_push(skb,dev->hard_header_len),hh->hh_data,dev->hard_header_len);
 842                         if (!hh->hh_uptodate)
 843                         {
 844                                 skb->arp = 0;
 845 #if RT_CACHE_DEBUG >= 2
 846                                 printk("ip_build_xmit: hh miss %08x via %08x\n", rt->rt_dst, rt->rt_gateway);
 847 #endif                          
 848                         }
 849                 }
 850                 else if (dev->hard_header)
 851                 {
 852                         if(dev->hard_header(skb, dev, ETH_P_IP, 
 853                                                 NULL, NULL, 0)>0)
 854                                 skb->arp=1;
 855                 }
 856                 
 857                 /*
 858                  *      Find where to start putting bytes.
 859                  */
 860                  
 861                 skb->ip_hdr = iph = (struct iphdr *)data;
 862 
 863                 /*
 864                  *      Only write IP header onto non-raw packets 
 865                  */
 866                  
 867                 if(!sk->ip_hdrincl) 
 868                 {
 869 
 870                         iph->version = 4;
 871                         iph->ihl = 5; /* ugh */
 872                         if (opt) {
 873                                 iph->ihl += opt->optlen>>2;
 874                                 ip_options_build(skb, opt,
 875                                                  true_daddr, dev->pa_addr, offset);
 876                         }
 877                         iph->tos = sk->ip_tos;
 878                         iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
 879                         iph->id = id;
 880                         iph->frag_off = htons(offset>>3);
 881                         iph->frag_off |= mf;
 882 #ifdef CONFIG_IP_MULTICAST
 883                         if (MULTICAST(daddr))
 884                                 iph->ttl = sk->ip_mc_ttl;
 885                         else
 886 #endif
 887                                 iph->ttl = sk->ip_ttl;
 888                         iph->protocol = type;
 889                         iph->check = 0;
 890                         iph->saddr = saddr;
 891                         iph->daddr = daddr;
 892                         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 893                         data += iph->ihl*4;
 894                         
 895                         /*
 896                          *      Any further fragments will have MF set.
 897                          */
 898                          
 899                         mf = htons(IP_MF);
 900                 }
 901                 
 902                 /*
 903                  *      User data callback
 904                  */
 905 
 906                 getfrag(frag, saddr, data, offset, fraglen-fragheaderlen);
 907                 
 908                 /*
 909                  *      Account for the fragment.
 910                  */
 911                  
 912 #ifdef CONFIG_FIREWALL
 913                 if(!offset && call_out_firewall(PF_INET, skb, iph) < FW_ACCEPT)
 914                 {
 915                         kfree_skb(skb, FREE_WRITE);
 916                         dev_unlock_list();
 917                         return -EPERM;
 918                 }
 919 #endif          
 920 #ifdef CONFIG_IP_ACCT
 921                 if(!offset)
 922                         ip_fw_chk(iph, dev, ip_acct_chain, IP_FW_F_ACCEPT, 1);
 923 #endif  
 924                 offset -= (maxfraglen-fragheaderlen);
 925                 fraglen = maxfraglen;
 926 
 927 #ifdef CONFIG_IP_MULTICAST
 928 
 929                 /*
 930                  *      Multicasts are looped back for other local users
 931                  */
 932          
 933                 if (MULTICAST(daddr) && !(dev->flags&IFF_LOOPBACK)) 
 934                 {
 935                         /*
 936                          *      Loop back any frames. The check for IGMP_ALL_HOSTS is because
 937                          *      you are always magically a member of this group.
 938                          *
 939                          *      Always loop back all host messages when running as a multicast router.
 940                          */
 941                          
 942                         if(sk==NULL || sk->ip_mc_loop)
 943                         {
 944                                 if(skb->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
 945                                         ip_loopback(dev,skb);
 946                                 else 
 947                                 {
 948                                         struct ip_mc_list *imc=dev->ip_mc_list;
 949                                         while(imc!=NULL) 
 950                                         {
 951                                                 if(imc->multiaddr==daddr) 
 952                                                 {
 953                                                         ip_loopback(dev,skb);
 954                                                         break;
 955                                                 }
 956                                                 imc=imc->next;
 957                                         }
 958                                 }
 959                         }
 960 
 961                         /*
 962                          *      Multicasts with ttl 0 must not go beyond the host. Fixme: avoid the
 963                          *      extra clone.
 964                          */
 965 
 966                         if(skb->ip_hdr->ttl==0)
 967                                 kfree_skb(skb, FREE_READ);
 968                 }
 969 #endif
 970 
 971                 nfrags++;
 972                 
 973                 /*
 974                  *      BSD loops broadcasts
 975                  */
 976                  
 977                 if((dev->flags&IFF_BROADCAST) && (daddr==0xFFFFFFFF || daddr==dev->pa_brdaddr) && !(dev->flags&IFF_LOOPBACK))
 978                         ip_loopback(dev,skb);
 979 
 980                 /*
 981                  *      Now queue the bytes into the device.
 982                  */
 983                  
 984                 if (dev->flags & IFF_UP) 
 985                 {
 986                         dev_queue_xmit(skb, dev, sk->priority);
 987                 } 
 988                 else 
 989                 {
 990                         /*
 991                          *      Whoops... 
 992                          */
 993                          
 994                         ip_statistics.IpOutDiscards++;
 995                         if(nfrags>1)
 996                                 ip_statistics.IpFragCreates+=nfrags;
 997                         kfree_skb(skb, FREE_WRITE);
 998                         dev_unlock_list();
 999                         /*
1000                          *      BSD behaviour.
1001                          */
1002                         if(sk!=NULL)
1003                                 sk->err=ENETDOWN;
1004                         return(0); /* lose rest of fragments */
1005                 }
1006         } 
1007         while (offset >= 0);
1008         if(nfrags>1)
1009                 ip_statistics.IpFragCreates+=nfrags;
1010         dev_unlock_list();
1011         return(0);
1012 }
1013     
1014 
1015 /*
1016  *      IP protocol layer initialiser
1017  */
1018 
1019 static struct packet_type ip_packet_type =
1020 {
1021         0,      /* MUTTER ntohs(ETH_P_IP),*/
1022         NULL,   /* All devices */
1023         ip_rcv,
1024         NULL,
1025         NULL,
1026 };
1027 
1028 #ifdef CONFIG_RTNETLINK
1029 
1030 /*
1031  *      Netlink hooks for IP
1032  */
1033  
1034 void ip_netlink_msg(unsigned long msg, __u32 daddr, __u32 gw, __u32 mask, short flags, short metric, char *name)
     /* [previous][next][first][last][top][bottom][index][help] */
1035 {
1036         struct sk_buff *skb=alloc_skb(sizeof(struct netlink_rtinfo), GFP_ATOMIC);
1037         struct netlink_rtinfo *nrt;
1038         struct sockaddr_in *s;
1039         if(skb==NULL)
1040                 return;
1041         nrt=(struct netlink_rtinfo *)skb_put(skb, sizeof(struct netlink_rtinfo));
1042         nrt->rtmsg_type=msg;
1043         s=(struct sockaddr_in *)&nrt->rtmsg_dst;
1044         s->sin_family=AF_INET;
1045         s->sin_addr.s_addr=daddr;
1046         s=(struct sockaddr_in *)&nrt->rtmsg_gateway;
1047         s->sin_family=AF_INET;
1048         s->sin_addr.s_addr=gw;
1049         s=(struct sockaddr_in *)&nrt->rtmsg_genmask;
1050         s->sin_family=AF_INET;
1051         s->sin_addr.s_addr=mask;
1052         nrt->rtmsg_flags=flags;
1053         nrt->rtmsg_metric=metric;
1054         strcpy(nrt->rtmsg_device,name);
1055         netlink_post(NETLINK_ROUTE, skb);
1056 }       
1057 
1058 #endif
1059 
1060 /*
1061  *      Device notifier
1062  */
1063  
1064 static int ip_rt_event(struct notifier_block *this, unsigned long event, void *ptr)
     /* [previous][next][first][last][top][bottom][index][help] */
1065 {
1066         struct device *dev=ptr;
1067         if(event==NETDEV_DOWN)
1068         {
1069                 ip_netlink_msg(RTMSG_DELDEVICE, 0,0,0,0,0,dev->name);
1070                 ip_rt_flush(dev);
1071         }
1072 /*
1073  *      Join the intial group if multicast.
1074  */             
1075         if(event==NETDEV_UP)
1076         {
1077 #ifdef CONFIG_IP_MULTICAST      
1078                 ip_mc_allhost(dev);
1079 #endif          
1080                 ip_netlink_msg(RTMSG_NEWDEVICE, 0,0,0,0,0,dev->name);
1081         }
1082         return NOTIFY_DONE;
1083 }
1084 
1085 struct notifier_block ip_rt_notifier={
1086         ip_rt_event,
1087         NULL,
1088         0
1089 };
1090 
1091 /*
1092  *      IP registers the packet type and then calls the subprotocol initialisers
1093  */
1094 
1095 void ip_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1096 {
1097         ip_packet_type.type=htons(ETH_P_IP);
1098         dev_add_pack(&ip_packet_type);
1099 
1100         /* So we flush routes when a device is downed */        
1101         register_netdevice_notifier(&ip_rt_notifier);
1102 
1103 /*      ip_raw_init();
1104         ip_packet_init();
1105         ip_tcp_init();
1106         ip_udp_init();*/
1107 
1108 #ifdef CONFIG_IP_MULTICAST
1109         proc_net_register(&(struct proc_dir_entry) {
1110                 PROC_NET_IGMP, 4, "igmp",
1111                 S_IFREG | S_IRUGO, 1, 0, 0,
1112                 0, &proc_net_inode_operations,
1113                 ip_mc_procinfo
1114         });
1115 #endif
1116 }
1117 

/* [previous][next][first][last][top][bottom][index][help] */