root/net/ipv4/ip_output.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ip_loopback
  2. ip_send
  3. ip_send_room
  4. ip_build_header
  5. ip_send_check
  6. ip_queue_xmit
  7. ip_build_xmit
  8. ip_netlink_msg
  9. ip_rt_event
  10. ip_init

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              The Internet Protocol (IP) output module.
   7  *
   8  * Version:     @(#)ip.c        1.0.16b 9/1/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Donald Becker, <becker@super.org>
  13  *              Alan Cox, <Alan.Cox@linux.org>
  14  *              Richard Underwood
  15  *              Stefan Becker, <stefanb@yello.ping.de>
  16  *              Jorge Cwik, <jorge@laser.satlink.net>
  17  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  18  *
  19  *      See ip_input.c for original log
  20  *
  21  *      Fixes:
  22  *              Alan Cox        :       Missing nonblock feature in ip_build_xmit.
  23  */
  24 
  25 #include <asm/segment.h>
  26 #include <asm/system.h>
  27 #include <linux/types.h>
  28 #include <linux/kernel.h>
  29 #include <linux/sched.h>
  30 #include <linux/mm.h>
  31 #include <linux/string.h>
  32 #include <linux/errno.h>
  33 #include <linux/config.h>
  34 
  35 #include <linux/socket.h>
  36 #include <linux/sockios.h>
  37 #include <linux/in.h>
  38 #include <linux/inet.h>
  39 #include <linux/netdevice.h>
  40 #include <linux/etherdevice.h>
  41 #include <linux/proc_fs.h>
  42 #include <linux/stat.h>
  43 
  44 #include <net/snmp.h>
  45 #include <net/ip.h>
  46 #include <net/protocol.h>
  47 #include <net/route.h>
  48 #include <net/tcp.h>
  49 #include <net/udp.h>
  50 #include <linux/skbuff.h>
  51 #include <net/sock.h>
  52 #include <net/arp.h>
  53 #include <net/icmp.h>
  54 #include <net/raw.h>
  55 #include <net/checksum.h>
  56 #include <linux/igmp.h>
  57 #include <linux/ip_fw.h>
  58 #include <linux/firewall.h>
  59 #include <linux/mroute.h>
  60 #include <net/netlink.h>
  61 
  62 /*
  63  *      Loop a packet back to the sender.
  64  */
  65  
  66 static void ip_loopback(struct device *old_dev, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
  67 {
  68         struct device *dev=&loopback_dev;
  69         int len=ntohs(skb->ip_hdr->tot_len);
  70         struct sk_buff *newskb=dev_alloc_skb(len+dev->hard_header_len+15);
  71         
  72         if(newskb==NULL)
  73                 return;
  74                 
  75         newskb->link3=NULL;
  76         newskb->sk=NULL;
  77         newskb->dev=dev;
  78         newskb->saddr=skb->saddr;
  79         newskb->daddr=skb->daddr;
  80         newskb->raddr=skb->raddr;
  81         newskb->free=1;
  82         newskb->lock=0;
  83         newskb->users=0;
  84         newskb->pkt_type=skb->pkt_type;
  85         
  86         /*
  87          *      Put a MAC header on the packet
  88          */
  89         ip_send(newskb, skb->ip_hdr->daddr, len, dev, skb->ip_hdr->saddr);
  90         /*
  91          *      Add the rest of the data space. 
  92          */
  93         newskb->ip_hdr=(struct iphdr *)skb_put(newskb, len);
  94         memcpy(newskb->proto_priv, skb->proto_priv, sizeof(skb->proto_priv));
  95 
  96         /*
  97          *      Copy the data
  98          */
  99         memcpy(newskb->ip_hdr,skb->ip_hdr,len);
 100 
 101         /* Recurse. The device check against IFF_LOOPBACK will stop infinite recursion */
 102                 
 103         /*printk("Loopback output queued [%lX to %lX].\n", newskb->ip_hdr->saddr,newskb->ip_hdr->daddr);*/
 104         ip_queue_xmit(NULL, dev, newskb, 1);
 105 }
 106 
 107 
 108 
 109 /*
 110  *      Take an skb, and fill in the MAC header.
 111  */
 112 
 113 int ip_send(struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
     /* [previous][next][first][last][top][bottom][index][help] */
 114 {
 115         int mac = 0;
 116 
 117         skb->dev = dev;
 118         skb->arp = 1;
 119         if (dev->hard_header)
 120         {
 121                 /*
 122                  *      Build a hardware header. Source address is our mac, destination unknown
 123                  *      (rebuild header will sort this out)
 124                  */
 125                 skb_reserve(skb,(dev->hard_header_len+15)&~15); /* 16 byte aligned IP headers are good */
 126                 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
 127                 if (mac < 0)
 128                 {
 129                         mac = -mac;
 130                         skb->arp = 0;
 131                         skb->raddr = daddr;     /* next routing address */
 132                 }
 133         }
 134         return mac;
 135 }
 136 
 137 static int ip_send_room(struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
     /* [previous][next][first][last][top][bottom][index][help] */
 138 {
 139         int mac = 0;
 140 
 141         skb->dev = dev;
 142         skb->arp = 1;
 143         if (dev->hard_header)
 144         {
 145                 skb_reserve(skb,MAX_HEADER);
 146                 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
 147                 if (mac < 0)
 148                 {
 149                         mac = -mac;
 150                         skb->arp = 0;
 151                         skb->raddr = daddr;     /* next routing address */
 152                 }
 153         }
 154         return mac;
 155 }
 156 
 157 int ip_id_count = 0;
 158 
 159 /*
 160  * This routine builds the appropriate hardware/IP headers for
 161  * the routine.  It assumes that if *dev != NULL then the
 162  * protocol knows what it's doing, otherwise it uses the
 163  * routing/ARP tables to select a device struct.
 164  */
 165 int ip_build_header(struct sk_buff *skb, __u32 saddr, __u32 daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 166                 struct device **dev, int type, struct options *opt, int len, int tos, int ttl)
 167 {
 168         struct rtable *rt;
 169         __u32 raddr;
 170         int tmp;
 171         __u32 src;
 172         struct iphdr *iph;
 173         __u32 final_daddr = daddr;
 174 
 175         if (opt && opt->srr)
 176                 daddr = opt->faddr;
 177 
 178         /*
 179          *      See if we need to look up the device.
 180          */
 181 
 182 #ifdef CONFIG_IP_MULTICAST      
 183         if(MULTICAST(daddr) && *dev==NULL && skb->sk && *skb->sk->ip_mc_name)
 184                 *dev=dev_get(skb->sk->ip_mc_name);
 185 #endif
 186         if (*dev == NULL)
 187         {
 188                 if(skb->localroute)
 189                         rt = ip_rt_local(daddr, NULL, &src);
 190                 else
 191                         rt = ip_rt_route(daddr, NULL, &src);
 192                 if (rt == NULL)
 193                 {
 194                         ip_statistics.IpOutNoRoutes++;
 195                         return(-ENETUNREACH);
 196                 }
 197 
 198                 *dev = rt->rt_dev;
 199                 /*
 200                  *      If the frame is from us and going off machine it MUST MUST MUST
 201                  *      have the output device ip address and never the loopback
 202                  */
 203                 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
 204                         saddr = src;/*rt->rt_dev->pa_addr;*/
 205                 raddr = rt->rt_gateway;
 206 
 207         }
 208         else
 209         {
 210                 /*
 211                  *      We still need the address of the first hop.
 212                  */
 213                 if(skb->localroute)
 214                         rt = ip_rt_local(daddr, NULL, &src);
 215                 else
 216                         rt = ip_rt_route(daddr, NULL, &src);
 217                 /*
 218                  *      If the frame is from us and going off machine it MUST MUST MUST
 219                  *      have the output device ip address and never the loopback
 220                  */
 221                 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
 222                         saddr = src;/*rt->rt_dev->pa_addr;*/
 223 
 224                 raddr = (rt == NULL) ? 0 : rt->rt_gateway;
 225         }
 226 
 227         /*
 228          *      No source addr so make it our addr
 229          */
 230         if (saddr == 0)
 231                 saddr = src;
 232 
 233         /*
 234          *      No gateway so aim at the real destination
 235          */
 236         if (raddr == 0)
 237                 raddr = daddr;
 238 
 239         /*
 240          *      Now build the MAC header.
 241          */
 242 
 243         if(type==IPPROTO_TCP)
 244                 tmp = ip_send_room(skb, raddr, len, *dev, saddr);
 245         else
 246                 tmp = ip_send(skb, raddr, len, *dev, saddr);
 247 
 248         /*
 249          *      Book keeping
 250          */
 251 
 252         skb->dev = *dev;
 253         skb->saddr = saddr;
 254 
 255         /*
 256          *      Now build the IP header.
 257          */
 258 
 259         /*
 260          *      If we are using IPPROTO_RAW, then we don't need an IP header, since
 261          *      one is being supplied to us by the user
 262          */
 263 
 264         if(type == IPPROTO_RAW)
 265                 return (tmp);
 266 
 267         /*
 268          *      Build the IP addresses
 269          */
 270          
 271         if (opt)
 272                 iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr) + opt->optlen);
 273         else
 274                 iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr));
 275 
 276         iph->version  = 4;
 277         iph->ihl      = 5;
 278         iph->tos      = tos;
 279         iph->frag_off = 0;
 280         iph->ttl      = ttl;
 281         iph->daddr    = daddr;
 282         iph->saddr    = saddr;
 283         iph->protocol = type;
 284         skb->ip_hdr   = iph;
 285 
 286         if (!opt || !opt->optlen)
 287                 return sizeof(struct iphdr) + tmp;
 288         if (opt->is_strictroute && rt && rt->rt_gateway) 
 289         {
 290                 ip_statistics.IpOutNoRoutes++;
 291                 return -ENETUNREACH;
 292         }
 293         iph->ihl += opt->optlen>>2;
 294         ip_options_build(skb, opt, final_daddr, (*dev)->pa_addr, 0);
 295         return iph->ihl*4 + tmp;
 296 }
 297 
 298 
 299 /*
 300  *      Generate a checksum for an outgoing IP datagram.
 301  */
 302 
 303 void ip_send_check(struct iphdr *iph)
     /* [previous][next][first][last][top][bottom][index][help] */
 304 {
 305         iph->check = 0;
 306         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 307 }
 308 
 309 /*
 310  * Queues a packet to be sent, and starts the transmitter
 311  * if necessary.  if free = 1 then we free the block after
 312  * transmit, otherwise we don't. If free==2 we not only
 313  * free the block but also don't assign a new ip seq number.
 314  * This routine also needs to put in the total length,
 315  * and compute the checksum
 316  */
 317 
 318 void ip_queue_xmit(struct sock *sk, struct device *dev,
     /* [previous][next][first][last][top][bottom][index][help] */
 319               struct sk_buff *skb, int free)
 320 {
 321         struct iphdr *iph;
 322 /*      unsigned char *ptr;*/
 323 
 324         /* Sanity check */
 325         if (dev == NULL)
 326         {
 327                 NETDEBUG(printk("IP: ip_queue_xmit dev = NULL\n"));
 328                 return;
 329         }
 330 
 331         IS_SKB(skb);
 332 
 333         /*
 334          *      Do some book-keeping in the packet for later
 335          */
 336 
 337 
 338         skb->dev = dev;
 339         skb->when = jiffies;
 340 
 341         /*
 342          *      Find the IP header and set the length. This is bad
 343          *      but once we get the skb data handling code in the
 344          *      hardware will push its header sensibly and we will
 345          *      set skb->ip_hdr to avoid this mess and the fixed
 346          *      header length problem
 347          */
 348 
 349         iph = skb->ip_hdr;
 350         iph->tot_len = ntohs(skb->len-(((unsigned char *)iph)-skb->data));
 351 
 352 #ifdef CONFIG_FIREWALL
 353         if(call_out_firewall(PF_INET, skb, iph) < FW_ACCEPT)
 354                 /* just don't send this packet */
 355                 return;
 356 #endif  
 357 
 358         /*
 359          *      No reassigning numbers to fragments...
 360          */
 361 
 362         if(free!=2)
 363                 iph->id      = htons(ip_id_count++);
 364         else
 365                 free=1;
 366 
 367         /* All buffers without an owner socket get freed */
 368         if (sk == NULL)
 369                 free = 1;
 370 
 371         skb->free = free;
 372 
 373         /*
 374          *      Do we need to fragment. Again this is inefficient.
 375          *      We need to somehow lock the original buffer and use
 376          *      bits of it.
 377          */
 378 
 379         if(ntohs(iph->tot_len)> dev->mtu)
 380         {
 381                 ip_fragment(sk,skb,dev,0);
 382                 IS_SKB(skb);
 383                 kfree_skb(skb,FREE_WRITE);
 384                 return;
 385         }
 386 
 387         /*
 388          *      Add an IP checksum
 389          */
 390 
 391         ip_send_check(iph);
 392 
 393         /*
 394          *      Print the frame when debugging
 395          */
 396 
 397         /*
 398          *      More debugging. You cannot queue a packet already on a list
 399          *      Spot this and moan loudly.
 400          */
 401         if (skb->next != NULL)
 402         {
 403                 NETDEBUG(printk("ip_queue_xmit: next != NULL\n"));
 404                 skb_unlink(skb);
 405         }
 406 
 407         /*
 408          *      If a sender wishes the packet to remain unfreed
 409          *      we add it to his send queue. This arguably belongs
 410          *      in the TCP level since nobody else uses it. BUT
 411          *      remember IPng might change all the rules.
 412          */
 413 
 414         if (!free)
 415         {
 416                 unsigned long flags;
 417                 /* The socket now has more outstanding blocks */
 418 
 419                 sk->packets_out++;
 420 
 421                 /* Protect the list for a moment */
 422                 save_flags(flags);
 423                 cli();
 424 
 425                 if (skb->link3 != NULL)
 426                 {
 427                         NETDEBUG(printk("ip.c: link3 != NULL\n"));
 428                         skb->link3 = NULL;
 429                 }
 430                 if (sk->send_head == NULL)
 431                 {
 432                         sk->send_tail = skb;
 433                         sk->send_head = skb;
 434                 }
 435                 else
 436                 {
 437                         sk->send_tail->link3 = skb;
 438                         sk->send_tail = skb;
 439                 }
 440                 /* skb->link3 is NULL */
 441 
 442                 /* Interrupt restore */
 443                 restore_flags(flags);
 444         }
 445         else
 446                 /* Remember who owns the buffer */
 447                 skb->sk = sk;
 448 
 449         /*
 450          *      If the indicated interface is up and running, send the packet.
 451          */
 452          
 453         ip_statistics.IpOutRequests++;
 454 #ifdef CONFIG_IP_ACCT
 455         ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
 456 #endif  
 457         
 458 #ifdef CONFIG_IP_MULTICAST      
 459 
 460         /*
 461          *      Multicasts are looped back for other local users
 462          */
 463          
 464         if (MULTICAST(iph->daddr) && !(dev->flags&IFF_LOOPBACK))
 465         {
 466                 if(sk==NULL || sk->ip_mc_loop)
 467                 {
 468                         if(iph->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
 469                         {
 470                                 ip_loopback(dev,skb);
 471                         }
 472                         else
 473                         {
 474                                 struct ip_mc_list *imc=dev->ip_mc_list;
 475                                 while(imc!=NULL)
 476                                 {
 477                                         if(imc->multiaddr==iph->daddr)
 478                                         {
 479                                                 ip_loopback(dev,skb);
 480                                                 break;
 481                                         }
 482                                         imc=imc->next;
 483                                 }
 484                         }
 485                 }
 486                 /* Multicasts with ttl 0 must not go beyond the host */
 487                 
 488                 if(skb->ip_hdr->ttl==0)
 489                 {
 490                         kfree_skb(skb, FREE_READ);
 491                         return;
 492                 }
 493         }
 494 #endif
 495         if((dev->flags&IFF_BROADCAST) && (iph->daddr==dev->pa_brdaddr||iph->daddr==0xFFFFFFFF) && !(dev->flags&IFF_LOOPBACK))
 496                 ip_loopback(dev,skb);
 497                 
 498         if (dev->flags & IFF_UP)
 499         {
 500                 /*
 501                  *      If we have an owner use its priority setting,
 502                  *      otherwise use NORMAL
 503                  */
 504 
 505                 if (sk != NULL)
 506                 {
 507                         dev_queue_xmit(skb, dev, sk->priority);
 508                 }
 509                 else
 510                 {
 511                         dev_queue_xmit(skb, dev, SOPRI_NORMAL);
 512                 }
 513         }
 514         else
 515         {
 516                 if(sk)
 517                         sk->err = ENETDOWN;
 518                 ip_statistics.IpOutDiscards++;
 519                 if (free)
 520                         kfree_skb(skb, FREE_WRITE);
 521         }
 522 }
 523 
 524 
 525 /*
 526  *      Build and send a packet, with as little as one copy
 527  *
 528  *      Doesn't care much about ip options... option length can be
 529  *      different for fragment at 0 and other fragments.
 530  *
 531  *      Note that the fragment at the highest offset is sent first,
 532  *      so the getfrag routine can fill in the TCP/UDP checksum header
 533  *      field in the last fragment it sends... actually it also helps
 534  *      the reassemblers, they can put most packets in at the head of
 535  *      the fragment queue, and they know the total size in advance. This
 536  *      last feature will measurable improve the Linux fragment handler.
 537  *
 538  *      The callback has five args, an arbitrary pointer (copy of frag),
 539  *      the source IP address (may depend on the routing table), the 
 540  *      destination adddress (char *), the offset to copy from, and the
 541  *      length to be copied.
 542  * 
 543  */
 544 
 545 int ip_build_xmit(struct sock *sk,
     /* [previous][next][first][last][top][bottom][index][help] */
 546                    void getfrag (const void *,
 547                                  __u32,
 548                                  char *,
 549                                  unsigned int,  
 550                                  unsigned int),
 551                    const void *frag,
 552                    unsigned short int length,
 553                    __u32 daddr,
 554                    __u32 user_saddr,
 555                    struct options * opt,
 556                    int flags,
 557                    int type,
 558                    int noblock) 
 559 {
 560         struct rtable *rt;
 561         unsigned int fraglen, maxfraglen, fragheaderlen;
 562         int offset, mf;
 563         __u32 saddr;
 564         unsigned short id;
 565         struct iphdr *iph;
 566         int local=0;
 567         struct device *dev;
 568         int nfrags=0;
 569         __u32 true_daddr = daddr;
 570 
 571         if (opt && opt->srr && !sk->ip_hdrincl)
 572           daddr = opt->faddr;
 573         
 574         ip_statistics.IpOutRequests++;
 575 
 576 #ifdef CONFIG_IP_MULTICAST      
 577         if(sk && MULTICAST(daddr) && *sk->ip_mc_name)
 578         {
 579                 dev=dev_get(sk->ip_mc_name);
 580                 if(!dev)
 581                         return -ENODEV;
 582                 rt=NULL;
 583                 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
 584                         saddr = sk->saddr;
 585                 else
 586                         saddr = dev->pa_addr;
 587         }
 588         else
 589         {
 590 #endif  
 591                 /*
 592                  *      Perform the IP routing decisions
 593                  */
 594          
 595                 if(sk->localroute || flags&MSG_DONTROUTE)
 596                         local=1;
 597         
 598                 rt = sk->ip_route_cache;
 599                 
 600                 /*
 601                  *      See if the routing cache is outdated. We need to clean this up once we are happy it is reliable
 602                  *      by doing the invalidation actively in the route change and header change.
 603                  */
 604         
 605                 saddr=sk->ip_route_saddr;        
 606                 if(!rt || sk->ip_route_stamp != rt_stamp ||
 607                    daddr!=sk->ip_route_daddr || sk->ip_route_local!=local ||
 608                    (sk->saddr && sk->saddr != saddr))
 609                 {
 610                         if(local)
 611                                 rt = ip_rt_local(daddr, NULL, &saddr);
 612                         else
 613                                 rt = ip_rt_route(daddr, NULL, &saddr);
 614                         sk->ip_route_local=local;
 615                         sk->ip_route_daddr=daddr;
 616                         sk->ip_route_saddr=saddr;
 617                         sk->ip_route_stamp=rt_stamp;
 618                         sk->ip_route_cache=rt;
 619                         sk->ip_hcache_ver=NULL;
 620                         sk->ip_hcache_state= 0;
 621                 }
 622                 else if(rt)
 623                 {
 624                         /*
 625                          *      Attempt header caches only if the cached route is being reused. Header cache
 626                          *      is not ultra cheap to set up. This means we only set it up on the second packet,
 627                          *      so one shot communications are not slowed. We assume (seems reasonable) that 2 is
 628                          *      probably going to be a stream of data.
 629                          */
 630                         if(rt->rt_dev->header_cache && sk->ip_hcache_state!= -1)
 631                         {
 632                                 if(sk->ip_hcache_ver==NULL || sk->ip_hcache_stamp!=*sk->ip_hcache_ver)
 633                                         rt->rt_dev->header_cache(rt->rt_dev,sk,saddr,daddr);
 634                                 else
 635                                         /* Can't cache. Remember this */
 636                                         sk->ip_hcache_state= -1;
 637                         }
 638                 }
 639                 
 640                 if (rt == NULL) 
 641                 {
 642                         ip_statistics.IpOutNoRoutes++;
 643                         return(-ENETUNREACH);
 644                 }
 645         
 646                 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
 647                         saddr = sk->saddr;
 648                         
 649                 dev=rt->rt_dev;
 650 #ifdef CONFIG_IP_MULTICAST
 651         }
 652 #endif          
 653         if (user_saddr)
 654                 saddr = user_saddr;
 655 
 656         /*
 657          *      Now compute the buffer space we require
 658          */ 
 659          
 660         /*
 661          *      Try the simple case first. This leaves broadcast, multicast, fragmented frames, and by
 662          *      choice RAW frames within 20 bytes of maximum size(rare) to the long path
 663          */
 664 
 665         length += 20;
 666         if (!sk->ip_hdrincl && opt) 
 667         {
 668                 length += opt->optlen;
 669                 if (opt->is_strictroute && rt && rt->rt_gateway) 
 670                 {
 671                         ip_statistics.IpOutNoRoutes++;
 672                         return -ENETUNREACH;
 673                 }
 674         }
 675         if(length <= dev->mtu && !MULTICAST(daddr) && daddr!=0xFFFFFFFF && daddr!=dev->pa_brdaddr)
 676         {       
 677                 int error;
 678                 struct sk_buff *skb=sock_alloc_send_skb(sk, length+15+dev->hard_header_len,0, noblock, &error);
 679                 if(skb==NULL)
 680                 {
 681                         ip_statistics.IpOutDiscards++;
 682                         return error;
 683                 }
 684                 skb->dev=dev;
 685                 skb->free=1;
 686                 skb->when=jiffies;
 687                 skb->sk=sk;
 688                 skb->arp=0;
 689                 skb->saddr=saddr;
 690                 skb->raddr=(rt&&rt->rt_gateway)?rt->rt_gateway:daddr;
 691                 skb_reserve(skb,(dev->hard_header_len+15)&~15);
 692                 if(sk->ip_hcache_state>0)
 693                 {
 694                         memcpy(skb_push(skb,dev->hard_header_len),sk->ip_hcache_data,dev->hard_header_len);
 695                         skb->arp=1;
 696                 }
 697                 else if(dev->hard_header)
 698                 {
 699                         if(dev->hard_header(skb,dev,ETH_P_IP,NULL,NULL,0)>0)
 700                                 skb->arp=1;
 701                 }
 702                 else
 703                         skb->arp=1;
 704                 skb->ip_hdr=iph=(struct iphdr *)skb_put(skb,length);
 705                 dev_lock_list();
 706                 if(!sk->ip_hdrincl)
 707                 {
 708                         iph->version=4;
 709                         iph->ihl=5;
 710                         iph->tos=sk->ip_tos;
 711                         iph->tot_len = htons(length);
 712                         iph->id=htons(ip_id_count++);
 713                         iph->frag_off = 0;
 714                         iph->ttl=sk->ip_ttl;
 715                         iph->protocol=type;
 716                         iph->saddr=saddr;
 717                         iph->daddr=daddr;
 718                         if (opt) 
 719                         {
 720                                 iph->ihl += opt->optlen>>2;
 721                                 ip_options_build(skb, opt,
 722                                                  true_daddr, dev->pa_addr, 0);
 723                         }
 724                         iph->check=0;
 725                         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 726                         getfrag(frag,saddr,((char *)iph)+iph->ihl*4,0, length-iph->ihl*4);
 727                 }
 728                 else
 729                         getfrag(frag,saddr,(void *)iph,0,length-20);
 730                 dev_unlock_list();
 731 #ifdef CONFIG_FIREWALL
 732                 if(call_out_firewall(PF_INET, skb, iph)< FW_ACCEPT)
 733                 {
 734                         kfree_skb(skb, FREE_WRITE);
 735                         return -EPERM;
 736                 }
 737 #endif
 738 #ifdef CONFIG_IP_ACCT
 739                 ip_fw_chk((void *)skb->data,dev,ip_acct_chain, IP_FW_F_ACCEPT,1);
 740 #endif          
 741                 if(dev->flags&IFF_UP)
 742                         dev_queue_xmit(skb,dev,sk->priority);
 743                 else
 744                 {
 745                         ip_statistics.IpOutDiscards++;
 746                         kfree_skb(skb, FREE_WRITE);
 747                 }
 748                 return 0;
 749         }
 750         length-=20;
 751         if (sk && !sk->ip_hdrincl && opt) 
 752         {
 753                 length -= opt->optlen;
 754                 fragheaderlen = dev->hard_header_len + sizeof(struct iphdr) + opt->optlen;
 755                 maxfraglen = ((dev->mtu-sizeof(struct iphdr)-opt->optlen) & ~7) + fragheaderlen;
 756         }
 757         else 
 758         {
 759                 fragheaderlen = dev->hard_header_len;
 760                 if(!sk->ip_hdrincl)
 761                         fragheaderlen += 20;
 762                 
 763                 /*
 764                  *      Fragheaderlen is the size of 'overhead' on each buffer. Now work
 765                  *      out the size of the frames to send.
 766                  */
 767          
 768                 maxfraglen = ((dev->mtu-20) & ~7) + fragheaderlen;
 769         }
 770         
 771         /*
 772          *      Start at the end of the frame by handling the remainder.
 773          */
 774          
 775         offset = length - (length % (maxfraglen - fragheaderlen));
 776         
 777         /*
 778          *      Amount of memory to allocate for final fragment.
 779          */
 780          
 781         fraglen = length - offset + fragheaderlen;
 782         
 783         if(length-offset==0)
 784         {
 785                 fraglen = maxfraglen;
 786                 offset -= maxfraglen-fragheaderlen;
 787         }
 788         
 789         
 790         /*
 791          *      The last fragment will not have MF (more fragments) set.
 792          */
 793          
 794         mf = 0;
 795 
 796         /*
 797          *      Can't fragment raw packets 
 798          */
 799          
 800         if (sk->ip_hdrincl && offset > 0)
 801                 return(-EMSGSIZE);
 802 
 803         /*
 804          *      Lock the device lists.
 805          */
 806 
 807         dev_lock_list();
 808         
 809         /*
 810          *      Get an identifier
 811          */
 812          
 813         id = htons(ip_id_count++);
 814 
 815         /*
 816          *      Being outputting the bytes.
 817          */
 818          
 819         do 
 820         {
 821                 struct sk_buff * skb;
 822                 int error;
 823                 char *data;
 824 
 825                 /*
 826                  *      Get the memory we require with some space left for alignment.
 827                  */
 828 
 829                 skb = sock_alloc_send_skb(sk, fraglen+15, 0, noblock, &error);
 830                 if (skb == NULL)
 831                 {
 832                         ip_statistics.IpOutDiscards++;
 833                         if(nfrags>1)
 834                                 ip_statistics.IpFragCreates++;                  
 835                         dev_unlock_list();
 836                         return(error);
 837                 }
 838                 
 839                 /*
 840                  *      Fill in the control structures
 841                  */
 842                  
 843                 skb->next = skb->prev = NULL;
 844                 skb->dev = dev;
 845                 skb->when = jiffies;
 846                 skb->free = 1; /* dubious, this one */
 847                 skb->sk = sk;
 848                 skb->arp = 0;
 849                 skb->saddr = saddr;
 850                 skb->raddr = (rt&&rt->rt_gateway) ? rt->rt_gateway : daddr;
 851                 skb_reserve(skb,(dev->hard_header_len+15)&~15);
 852                 data = skb_put(skb, fraglen-dev->hard_header_len);
 853 
 854                 /*
 855                  *      Save us ARP and stuff. In the optimal case we do no route lookup (route cache ok)
 856                  *      no ARP lookup (arp cache ok) and output. The cache checks are still too slow but
 857                  *      this can be fixed later. For gateway routes we ought to have a rt->.. header cache
 858                  *      pointer to speed header cache builds for identical targets.
 859                  */
 860                  
 861                 if(sk->ip_hcache_state>0)
 862                 {
 863                         memcpy(skb_push(skb,dev->hard_header_len),sk->ip_hcache_data, dev->hard_header_len);
 864                         skb->arp=1;
 865                 }
 866                 else if (dev->hard_header)
 867                 {
 868                         if(dev->hard_header(skb, dev, ETH_P_IP, 
 869                                                 NULL, NULL, 0)>0)
 870                                 skb->arp=1;
 871                 }
 872                 
 873                 /*
 874                  *      Find where to start putting bytes.
 875                  */
 876                  
 877                 skb->ip_hdr = iph = (struct iphdr *)data;
 878 
 879                 /*
 880                  *      Only write IP header onto non-raw packets 
 881                  */
 882                  
 883                 if(!sk->ip_hdrincl) 
 884                 {
 885 
 886                         iph->version = 4;
 887                         iph->ihl = 5; /* ugh */
 888                         if (opt) {
 889                                 iph->ihl += opt->optlen>>2;
 890                                 ip_options_build(skb, opt,
 891                                                  true_daddr, dev->pa_addr, offset);
 892                         }
 893                         iph->tos = sk->ip_tos;
 894                         iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
 895                         iph->id = id;
 896                         iph->frag_off = htons(offset>>3);
 897                         iph->frag_off |= mf;
 898 #ifdef CONFIG_IP_MULTICAST
 899                         if (MULTICAST(daddr))
 900                                 iph->ttl = sk->ip_mc_ttl;
 901                         else
 902 #endif
 903                                 iph->ttl = sk->ip_ttl;
 904                         iph->protocol = type;
 905                         iph->check = 0;
 906                         iph->saddr = saddr;
 907                         iph->daddr = daddr;
 908                         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 909                         data += iph->ihl*4;
 910                         
 911                         /*
 912                          *      Any further fragments will have MF set.
 913                          */
 914                          
 915                         mf = htons(IP_MF);
 916                 }
 917                 
 918                 /*
 919                  *      User data callback
 920                  */
 921 
 922                 getfrag(frag, saddr, data, offset, fraglen-fragheaderlen);
 923                 
 924                 /*
 925                  *      Account for the fragment.
 926                  */
 927                  
 928 #ifdef CONFIG_FIREWALL
 929                 if(!offset && call_out_firewall(PF_INET, skb, iph) < FW_ACCEPT)
 930                 {
 931                         kfree_skb(skb, FREE_WRITE);
 932                         dev_unlock_list();
 933                         return -EPERM;
 934                 }
 935 #endif          
 936 #ifdef CONFIG_IP_ACCT
 937                 if(!offset)
 938                         ip_fw_chk(iph, dev, ip_acct_chain, IP_FW_F_ACCEPT, 1);
 939 #endif  
 940                 offset -= (maxfraglen-fragheaderlen);
 941                 fraglen = maxfraglen;
 942 
 943 #ifdef CONFIG_IP_MULTICAST
 944 
 945                 /*
 946                  *      Multicasts are looped back for other local users
 947                  */
 948          
 949                 if (MULTICAST(daddr) && !(dev->flags&IFF_LOOPBACK)) 
 950                 {
 951                         /*
 952                          *      Loop back any frames. The check for IGMP_ALL_HOSTS is because
 953                          *      you are always magically a member of this group.
 954                          *
 955                          *      Always loop back all host messages when running as a multicast router.
 956                          */
 957                          
 958                         if(sk==NULL || sk->ip_mc_loop)
 959                         {
 960                                 if(skb->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
 961                                         ip_loopback(rt?rt->rt_dev:dev,skb);
 962                                 else 
 963                                 {
 964                                         struct ip_mc_list *imc=rt?rt->rt_dev->ip_mc_list:dev->ip_mc_list;
 965                                         while(imc!=NULL) 
 966                                         {
 967                                                 if(imc->multiaddr==daddr) 
 968                                                 {
 969                                                         ip_loopback(rt?rt->rt_dev:dev,skb);
 970                                                         break;
 971                                                 }
 972                                                 imc=imc->next;
 973                                         }
 974                                 }
 975                         }
 976 
 977                         /*
 978                          *      Multicasts with ttl 0 must not go beyond the host. Fixme: avoid the
 979                          *      extra clone.
 980                          */
 981 
 982                         if(skb->ip_hdr->ttl==0)
 983                                 kfree_skb(skb, FREE_READ);
 984                 }
 985 #endif
 986 
 987                 nfrags++;
 988                 
 989                 /*
 990                  *      BSD loops broadcasts
 991                  */
 992                  
 993                 if((dev->flags&IFF_BROADCAST) && (daddr==0xFFFFFFFF || daddr==dev->pa_brdaddr) && !(dev->flags&IFF_LOOPBACK))
 994                         ip_loopback(dev,skb);
 995 
 996                 /*
 997                  *      Now queue the bytes into the device.
 998                  */
 999                  
1000                 if (dev->flags & IFF_UP) 
1001                 {
1002                         dev_queue_xmit(skb, dev, sk->priority);
1003                 } 
1004                 else 
1005                 {
1006                         /*
1007                          *      Whoops... 
1008                          */
1009                          
1010                         ip_statistics.IpOutDiscards++;
1011                         if(nfrags>1)
1012                                 ip_statistics.IpFragCreates+=nfrags;
1013                         kfree_skb(skb, FREE_WRITE);
1014                         dev_unlock_list();
1015                         /*
1016                          *      BSD behaviour.
1017                          */
1018                         if(sk!=NULL)
1019                                 sk->err=ENETDOWN;
1020                         return(0); /* lose rest of fragments */
1021                 }
1022         } 
1023         while (offset >= 0);
1024         if(nfrags>1)
1025                 ip_statistics.IpFragCreates+=nfrags;
1026         dev_unlock_list();
1027         return(0);
1028 }
1029     
1030 
1031 /*
1032  *      IP protocol layer initialiser
1033  */
1034 
1035 static struct packet_type ip_packet_type =
1036 {
1037         0,      /* MUTTER ntohs(ETH_P_IP),*/
1038         NULL,   /* All devices */
1039         ip_rcv,
1040         NULL,
1041         NULL,
1042 };
1043 
1044 #ifdef CONFIG_RTNETLINK
1045 
1046 /*
1047  *      Netlink hooks for IP
1048  */
1049  
1050 void ip_netlink_msg(unsigned long msg, __u32 daddr, __u32 gw, __u32 mask, short flags, short metric, char *name)
     /* [previous][next][first][last][top][bottom][index][help] */
1051 {
1052         struct sk_buff *skb=alloc_skb(sizeof(struct netlink_rtinfo), GFP_ATOMIC);
1053         struct netlink_rtinfo *nrt;
1054         struct sockaddr_in *s;
1055         if(skb==NULL)
1056                 return;
1057         nrt=(struct netlink_rtinfo *)skb_put(skb, sizeof(struct netlink_rtinfo));
1058         nrt->rtmsg_type=msg;
1059         s=(struct sockaddr_in *)&nrt->rtmsg_dst;
1060         s->sin_family=AF_INET;
1061         s->sin_addr.s_addr=daddr;
1062         s=(struct sockaddr_in *)&nrt->rtmsg_gateway;
1063         s->sin_family=AF_INET;
1064         s->sin_addr.s_addr=gw;
1065         s=(struct sockaddr_in *)&nrt->rtmsg_genmask;
1066         s->sin_family=AF_INET;
1067         s->sin_addr.s_addr=mask;
1068         nrt->rtmsg_flags=flags;
1069         nrt->rtmsg_metric=metric;
1070         strcpy(nrt->rtmsg_device,name);
1071         netlink_post(NETLINK_ROUTE, skb);
1072 }       
1073 
1074 #endif
1075 
1076 /*
1077  *      Device notifier
1078  */
1079  
1080 static int ip_rt_event(struct notifier_block *this, unsigned long event, void *ptr)
     /* [previous][next][first][last][top][bottom][index][help] */
1081 {
1082         struct device *dev=ptr;
1083         if(event==NETDEV_DOWN)
1084         {
1085                 ip_netlink_msg(RTMSG_DELDEVICE, 0,0,0,0,0,dev->name);
1086                 ip_rt_flush(dev);
1087         }
1088 /*
1089  *      Join the intial group if multicast.
1090  */             
1091         if(event==NETDEV_UP)
1092         {
1093 #ifdef CONFIG_IP_MULTICAST      
1094                 ip_mc_allhost(dev);
1095 #endif          
1096                 ip_netlink_msg(RTMSG_NEWDEVICE, 0,0,0,0,0,dev->name);
1097         }
1098         return NOTIFY_DONE;
1099 }
1100 
1101 struct notifier_block ip_rt_notifier={
1102         ip_rt_event,
1103         NULL,
1104         0
1105 };
1106 
1107 /*
1108  *      IP registers the packet type and then calls the subprotocol initialisers
1109  */
1110 
1111 void ip_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1112 {
1113         ip_packet_type.type=htons(ETH_P_IP);
1114         dev_add_pack(&ip_packet_type);
1115 
1116         /* So we flush routes when a device is downed */        
1117         register_netdevice_notifier(&ip_rt_notifier);
1118 
1119 /*      ip_raw_init();
1120         ip_packet_init();
1121         ip_tcp_init();
1122         ip_udp_init();*/
1123 
1124 #ifdef CONFIG_IP_MULTICAST
1125         proc_net_register(&(struct proc_dir_entry) {
1126                 PROC_NET_IGMP, 4, "igmp",
1127                 S_IFREG | S_IRUGO, 1, 0, 0,
1128                 0, &proc_net_inode_operations,
1129                 ip_mc_procinfo
1130         });
1131 #endif
1132 }
1133 

/* [previous][next][first][last][top][bottom][index][help] */