root/net/ipv4/ip_output.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ip_loopback
  2. ip_send
  3. ip_send_room
  4. ip_build_header
  5. ip_send_check
  6. ip_queue_xmit
  7. ip_build_xmit
  8. ip_netlink_msg
  9. ip_rt_event
  10. ip_init

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              The Internet Protocol (IP) output module.
   7  *
   8  * Version:     @(#)ip.c        1.0.16b 9/1/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Donald Becker, <becker@super.org>
  13  *              Alan Cox, <Alan.Cox@linux.org>
  14  *              Richard Underwood
  15  *              Stefan Becker, <stefanb@yello.ping.de>
  16  *              Jorge Cwik, <jorge@laser.satlink.net>
  17  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  18  *
  19  *      See ip_input.c for original log
  20  */
  21 
  22 #include <asm/segment.h>
  23 #include <asm/system.h>
  24 #include <linux/types.h>
  25 #include <linux/kernel.h>
  26 #include <linux/sched.h>
  27 #include <linux/mm.h>
  28 #include <linux/string.h>
  29 #include <linux/errno.h>
  30 #include <linux/config.h>
  31 
  32 #include <linux/socket.h>
  33 #include <linux/sockios.h>
  34 #include <linux/in.h>
  35 #include <linux/inet.h>
  36 #include <linux/netdevice.h>
  37 #include <linux/etherdevice.h>
  38 #include <linux/proc_fs.h>
  39 #include <linux/stat.h>
  40 
  41 #include <net/snmp.h>
  42 #include <net/ip.h>
  43 #include <net/protocol.h>
  44 #include <net/route.h>
  45 #include <net/tcp.h>
  46 #include <net/udp.h>
  47 #include <linux/skbuff.h>
  48 #include <net/sock.h>
  49 #include <net/arp.h>
  50 #include <net/icmp.h>
  51 #include <net/raw.h>
  52 #include <net/checksum.h>
  53 #include <linux/igmp.h>
  54 #include <linux/ip_fw.h>
  55 #include <linux/firewall.h>
  56 #include <linux/mroute.h>
  57 #include <net/netlink.h>
  58 
  59 /*
  60  *      Loop a packet back to the sender.
  61  */
  62  
  63 static void ip_loopback(struct device *old_dev, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
  64 {
  65         struct device *dev=&loopback_dev;
  66         int len=ntohs(skb->ip_hdr->tot_len);
  67         struct sk_buff *newskb=dev_alloc_skb(len+dev->hard_header_len+15);
  68         
  69         if(newskb==NULL)
  70                 return;
  71                 
  72         newskb->link3=NULL;
  73         newskb->sk=NULL;
  74         newskb->dev=dev;
  75         newskb->saddr=skb->saddr;
  76         newskb->daddr=skb->daddr;
  77         newskb->raddr=skb->raddr;
  78         newskb->free=1;
  79         newskb->lock=0;
  80         newskb->users=0;
  81         newskb->pkt_type=skb->pkt_type;
  82         
  83         /*
  84          *      Put a MAC header on the packet
  85          */
  86         ip_send(newskb, skb->ip_hdr->daddr, len, dev, skb->ip_hdr->saddr);
  87         /*
  88          *      Add the rest of the data space. 
  89          */
  90         newskb->ip_hdr=(struct iphdr *)skb_put(newskb, len);
  91         memcpy(newskb->proto_priv, skb->proto_priv, sizeof(skb->proto_priv));
  92 
  93         /*
  94          *      Copy the data
  95          */
  96         memcpy(newskb->ip_hdr,skb->ip_hdr,len);
  97 
  98         /* Recurse. The device check against IFF_LOOPBACK will stop infinite recursion */
  99                 
 100         /*printk("Loopback output queued [%lX to %lX].\n", newskb->ip_hdr->saddr,newskb->ip_hdr->daddr);*/
 101         ip_queue_xmit(NULL, dev, newskb, 1);
 102 }
 103 
 104 
 105 
 106 /*
 107  *      Take an skb, and fill in the MAC header.
 108  */
 109 
 110 int ip_send(struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
     /* [previous][next][first][last][top][bottom][index][help] */
 111 {
 112         int mac = 0;
 113 
 114         skb->dev = dev;
 115         skb->arp = 1;
 116         if (dev->hard_header)
 117         {
 118                 /*
 119                  *      Build a hardware header. Source address is our mac, destination unknown
 120                  *      (rebuild header will sort this out)
 121                  */
 122                 skb_reserve(skb,(dev->hard_header_len+15)&~15); /* 16 byte aligned IP headers are good */
 123                 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
 124                 if (mac < 0)
 125                 {
 126                         mac = -mac;
 127                         skb->arp = 0;
 128                         skb->raddr = daddr;     /* next routing address */
 129                 }
 130         }
 131         return mac;
 132 }
 133 
 134 static int ip_send_room(struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
     /* [previous][next][first][last][top][bottom][index][help] */
 135 {
 136         int mac = 0;
 137 
 138         skb->dev = dev;
 139         skb->arp = 1;
 140         if (dev->hard_header)
 141         {
 142                 skb_reserve(skb,MAX_HEADER);
 143                 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
 144                 if (mac < 0)
 145                 {
 146                         mac = -mac;
 147                         skb->arp = 0;
 148                         skb->raddr = daddr;     /* next routing address */
 149                 }
 150         }
 151         return mac;
 152 }
 153 
 154 int ip_id_count = 0;
 155 
 156 /*
 157  * This routine builds the appropriate hardware/IP headers for
 158  * the routine.  It assumes that if *dev != NULL then the
 159  * protocol knows what it's doing, otherwise it uses the
 160  * routing/ARP tables to select a device struct.
 161  */
 162 int ip_build_header(struct sk_buff *skb, __u32 saddr, __u32 daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 163                 struct device **dev, int type, struct options *opt, int len, int tos, int ttl)
 164 {
 165         struct rtable *rt;
 166         __u32 raddr;
 167         int tmp;
 168         __u32 src;
 169         struct iphdr *iph;
 170         __u32 final_daddr = daddr;
 171 
 172         if (opt && opt->srr)
 173                 daddr = opt->faddr;
 174 
 175         /*
 176          *      See if we need to look up the device.
 177          */
 178 
 179 #ifdef CONFIG_IP_MULTICAST      
 180         if(MULTICAST(daddr) && *dev==NULL && skb->sk && *skb->sk->ip_mc_name)
 181                 *dev=dev_get(skb->sk->ip_mc_name);
 182 #endif
 183         if (*dev == NULL)
 184         {
 185                 if(skb->localroute)
 186                         rt = ip_rt_local(daddr, NULL, &src);
 187                 else
 188                         rt = ip_rt_route(daddr, NULL, &src);
 189                 if (rt == NULL)
 190                 {
 191                         ip_statistics.IpOutNoRoutes++;
 192                         return(-ENETUNREACH);
 193                 }
 194 
 195                 *dev = rt->rt_dev;
 196                 /*
 197                  *      If the frame is from us and going off machine it MUST MUST MUST
 198                  *      have the output device ip address and never the loopback
 199                  */
 200                 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
 201                         saddr = src;/*rt->rt_dev->pa_addr;*/
 202                 raddr = rt->rt_gateway;
 203 
 204         }
 205         else
 206         {
 207                 /*
 208                  *      We still need the address of the first hop.
 209                  */
 210                 if(skb->localroute)
 211                         rt = ip_rt_local(daddr, NULL, &src);
 212                 else
 213                         rt = ip_rt_route(daddr, NULL, &src);
 214                 /*
 215                  *      If the frame is from us and going off machine it MUST MUST MUST
 216                  *      have the output device ip address and never the loopback
 217                  */
 218                 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
 219                         saddr = src;/*rt->rt_dev->pa_addr;*/
 220 
 221                 raddr = (rt == NULL) ? 0 : rt->rt_gateway;
 222         }
 223 
 224         /*
 225          *      No source addr so make it our addr
 226          */
 227         if (saddr == 0)
 228                 saddr = src;
 229 
 230         /*
 231          *      No gateway so aim at the real destination
 232          */
 233         if (raddr == 0)
 234                 raddr = daddr;
 235 
 236         /*
 237          *      Now build the MAC header.
 238          */
 239 
 240         if(type==IPPROTO_TCP)
 241                 tmp = ip_send_room(skb, raddr, len, *dev, saddr);
 242         else
 243                 tmp = ip_send(skb, raddr, len, *dev, saddr);
 244 
 245         /*
 246          *      Book keeping
 247          */
 248 
 249         skb->dev = *dev;
 250         skb->saddr = saddr;
 251 
 252         /*
 253          *      Now build the IP header.
 254          */
 255 
 256         /*
 257          *      If we are using IPPROTO_RAW, then we don't need an IP header, since
 258          *      one is being supplied to us by the user
 259          */
 260 
 261         if(type == IPPROTO_RAW)
 262                 return (tmp);
 263 
 264         /*
 265          *      Build the IP addresses
 266          */
 267          
 268         if (opt)
 269                 iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr) + opt->optlen);
 270         else
 271                 iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr));
 272 
 273         iph->version  = 4;
 274         iph->ihl      = 5;
 275         iph->tos      = tos;
 276         iph->frag_off = 0;
 277         iph->ttl      = ttl;
 278         iph->daddr    = daddr;
 279         iph->saddr    = saddr;
 280         iph->protocol = type;
 281         skb->ip_hdr   = iph;
 282 
 283         if (!opt || !opt->optlen)
 284                 return sizeof(struct iphdr) + tmp;
 285         if (opt->is_strictroute && rt && rt->rt_gateway) 
 286         {
 287                 ip_statistics.IpOutNoRoutes++;
 288                 return -ENETUNREACH;
 289         }
 290         iph->ihl += opt->optlen>>2;
 291         ip_options_build(skb, opt, final_daddr, (*dev)->pa_addr, 0);
 292         return iph->ihl*4 + tmp;
 293 }
 294 
 295 
 296 /*
 297  *      Generate a checksum for an outgoing IP datagram.
 298  */
 299 
 300 void ip_send_check(struct iphdr *iph)
     /* [previous][next][first][last][top][bottom][index][help] */
 301 {
 302         iph->check = 0;
 303         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 304 }
 305 
 306 /*
 307  * Queues a packet to be sent, and starts the transmitter
 308  * if necessary.  if free = 1 then we free the block after
 309  * transmit, otherwise we don't. If free==2 we not only
 310  * free the block but also don't assign a new ip seq number.
 311  * This routine also needs to put in the total length,
 312  * and compute the checksum
 313  */
 314 
 315 void ip_queue_xmit(struct sock *sk, struct device *dev,
     /* [previous][next][first][last][top][bottom][index][help] */
 316               struct sk_buff *skb, int free)
 317 {
 318         struct iphdr *iph;
 319 /*      unsigned char *ptr;*/
 320 
 321         /* Sanity check */
 322         if (dev == NULL)
 323         {
 324                 NETDEBUG(printk("IP: ip_queue_xmit dev = NULL\n"));
 325                 return;
 326         }
 327 
 328         IS_SKB(skb);
 329 
 330         /*
 331          *      Do some book-keeping in the packet for later
 332          */
 333 
 334 
 335         skb->dev = dev;
 336         skb->when = jiffies;
 337 
 338         /*
 339          *      Find the IP header and set the length. This is bad
 340          *      but once we get the skb data handling code in the
 341          *      hardware will push its header sensibly and we will
 342          *      set skb->ip_hdr to avoid this mess and the fixed
 343          *      header length problem
 344          */
 345 
 346         iph = skb->ip_hdr;
 347         iph->tot_len = ntohs(skb->len-(((unsigned char *)iph)-skb->data));
 348 
 349 #ifdef CONFIG_FIREWALL
 350         if(call_out_firewall(PF_INET, skb, iph) < FW_ACCEPT)
 351                 /* just don't send this packet */
 352                 return;
 353 #endif  
 354 
 355         /*
 356          *      No reassigning numbers to fragments...
 357          */
 358 
 359         if(free!=2)
 360                 iph->id      = htons(ip_id_count++);
 361         else
 362                 free=1;
 363 
 364         /* All buffers without an owner socket get freed */
 365         if (sk == NULL)
 366                 free = 1;
 367 
 368         skb->free = free;
 369 
 370         /*
 371          *      Do we need to fragment. Again this is inefficient.
 372          *      We need to somehow lock the original buffer and use
 373          *      bits of it.
 374          */
 375 
 376         if(ntohs(iph->tot_len)> dev->mtu)
 377         {
 378                 ip_fragment(sk,skb,dev,0);
 379                 IS_SKB(skb);
 380                 kfree_skb(skb,FREE_WRITE);
 381                 return;
 382         }
 383 
 384         /*
 385          *      Add an IP checksum
 386          */
 387 
 388         ip_send_check(iph);
 389 
 390         /*
 391          *      Print the frame when debugging
 392          */
 393 
 394         /*
 395          *      More debugging. You cannot queue a packet already on a list
 396          *      Spot this and moan loudly.
 397          */
 398         if (skb->next != NULL)
 399         {
 400                 NETDEBUG(printk("ip_queue_xmit: next != NULL\n"));
 401                 skb_unlink(skb);
 402         }
 403 
 404         /*
 405          *      If a sender wishes the packet to remain unfreed
 406          *      we add it to his send queue. This arguably belongs
 407          *      in the TCP level since nobody else uses it. BUT
 408          *      remember IPng might change all the rules.
 409          */
 410 
 411         if (!free)
 412         {
 413                 unsigned long flags;
 414                 /* The socket now has more outstanding blocks */
 415 
 416                 sk->packets_out++;
 417 
 418                 /* Protect the list for a moment */
 419                 save_flags(flags);
 420                 cli();
 421 
 422                 if (skb->link3 != NULL)
 423                 {
 424                         NETDEBUG(printk("ip.c: link3 != NULL\n"));
 425                         skb->link3 = NULL;
 426                 }
 427                 if (sk->send_head == NULL)
 428                 {
 429                         sk->send_tail = skb;
 430                         sk->send_head = skb;
 431                 }
 432                 else
 433                 {
 434                         sk->send_tail->link3 = skb;
 435                         sk->send_tail = skb;
 436                 }
 437                 /* skb->link3 is NULL */
 438 
 439                 /* Interrupt restore */
 440                 restore_flags(flags);
 441         }
 442         else
 443                 /* Remember who owns the buffer */
 444                 skb->sk = sk;
 445 
 446         /*
 447          *      If the indicated interface is up and running, send the packet.
 448          */
 449          
 450         ip_statistics.IpOutRequests++;
 451 #ifdef CONFIG_IP_ACCT
 452         ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
 453 #endif  
 454         
 455 #ifdef CONFIG_IP_MULTICAST      
 456 
 457         /*
 458          *      Multicasts are looped back for other local users
 459          */
 460          
 461         if (MULTICAST(iph->daddr) && !(dev->flags&IFF_LOOPBACK))
 462         {
 463                 if(sk==NULL || sk->ip_mc_loop)
 464                 {
 465                         if(iph->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
 466                         {
 467                                 ip_loopback(dev,skb);
 468                         }
 469                         else
 470                         {
 471                                 struct ip_mc_list *imc=dev->ip_mc_list;
 472                                 while(imc!=NULL)
 473                                 {
 474                                         if(imc->multiaddr==iph->daddr)
 475                                         {
 476                                                 ip_loopback(dev,skb);
 477                                                 break;
 478                                         }
 479                                         imc=imc->next;
 480                                 }
 481                         }
 482                 }
 483                 /* Multicasts with ttl 0 must not go beyond the host */
 484                 
 485                 if(skb->ip_hdr->ttl==0)
 486                 {
 487                         kfree_skb(skb, FREE_READ);
 488                         return;
 489                 }
 490         }
 491 #endif
 492         if((dev->flags&IFF_BROADCAST) && (iph->daddr==dev->pa_brdaddr||iph->daddr==0xFFFFFFFF) && !(dev->flags&IFF_LOOPBACK))
 493                 ip_loopback(dev,skb);
 494                 
 495         if (dev->flags & IFF_UP)
 496         {
 497                 /*
 498                  *      If we have an owner use its priority setting,
 499                  *      otherwise use NORMAL
 500                  */
 501 
 502                 if (sk != NULL)
 503                 {
 504                         dev_queue_xmit(skb, dev, sk->priority);
 505                 }
 506                 else
 507                 {
 508                         dev_queue_xmit(skb, dev, SOPRI_NORMAL);
 509                 }
 510         }
 511         else
 512         {
 513                 if(sk)
 514                         sk->err = ENETDOWN;
 515                 ip_statistics.IpOutDiscards++;
 516                 if (free)
 517                         kfree_skb(skb, FREE_WRITE);
 518         }
 519 }
 520 
 521 
 522 /*
 523  *      Build and send a packet, with as little as one copy
 524  *
 525  *      Doesn't care much about ip options... option length can be
 526  *      different for fragment at 0 and other fragments.
 527  *
 528  *      Note that the fragment at the highest offset is sent first,
 529  *      so the getfrag routine can fill in the TCP/UDP checksum header
 530  *      field in the last fragment it sends... actually it also helps
 531  *      the reassemblers, they can put most packets in at the head of
 532  *      the fragment queue, and they know the total size in advance. This
 533  *      last feature will measurable improve the Linux fragment handler.
 534  *
 535  *      The callback has five args, an arbitrary pointer (copy of frag),
 536  *      the source IP address (may depend on the routing table), the 
 537  *      destination adddress (char *), the offset to copy from, and the
 538  *      length to be copied.
 539  * 
 540  */
 541 
 542 int ip_build_xmit(struct sock *sk,
     /* [previous][next][first][last][top][bottom][index][help] */
 543                    void getfrag (const void *,
 544                                  __u32,
 545                                  char *,
 546                                  unsigned int,  
 547                                  unsigned int),
 548                    const void *frag,
 549                    unsigned short int length,
 550                    __u32 daddr,
 551                    __u32 user_saddr,
 552                    struct options * opt,
 553                    int flags,
 554                    int type) 
 555 {
 556         struct rtable *rt;
 557         unsigned int fraglen, maxfraglen, fragheaderlen;
 558         int offset, mf;
 559         __u32 saddr;
 560         unsigned short id;
 561         struct iphdr *iph;
 562         int local=0;
 563         struct device *dev;
 564         int nfrags=0;
 565         __u32 true_daddr = daddr;
 566 
 567         if (opt && opt->srr && !sk->ip_hdrincl)
 568           daddr = opt->faddr;
 569         
 570         ip_statistics.IpOutRequests++;
 571 
 572 #ifdef CONFIG_IP_MULTICAST      
 573         if(sk && MULTICAST(daddr) && *sk->ip_mc_name)
 574         {
 575                 dev=dev_get(sk->ip_mc_name);
 576                 if(!dev)
 577                         return -ENODEV;
 578                 rt=NULL;
 579                 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
 580                         saddr = sk->saddr;
 581                 else
 582                         saddr = dev->pa_addr;
 583         }
 584         else
 585         {
 586 #endif  
 587                 /*
 588                  *      Perform the IP routing decisions
 589                  */
 590          
 591                 if(sk->localroute || flags&MSG_DONTROUTE)
 592                         local=1;
 593         
 594                 rt = sk->ip_route_cache;
 595                 
 596                 /*
 597                  *      See if the routing cache is outdated. We need to clean this up once we are happy it is reliable
 598                  *      by doing the invalidation actively in the route change and header change.
 599                  */
 600         
 601                 saddr=sk->ip_route_saddr;        
 602                 if(!rt || sk->ip_route_stamp != rt_stamp ||
 603                    daddr!=sk->ip_route_daddr || sk->ip_route_local!=local ||
 604                    (sk->saddr && sk->saddr != saddr))
 605                 {
 606                         if(local)
 607                                 rt = ip_rt_local(daddr, NULL, &saddr);
 608                         else
 609                                 rt = ip_rt_route(daddr, NULL, &saddr);
 610                         sk->ip_route_local=local;
 611                         sk->ip_route_daddr=daddr;
 612                         sk->ip_route_saddr=saddr;
 613                         sk->ip_route_stamp=rt_stamp;
 614                         sk->ip_route_cache=rt;
 615                         sk->ip_hcache_ver=NULL;
 616                         sk->ip_hcache_state= 0;
 617                 }
 618                 else if(rt)
 619                 {
 620                         /*
 621                          *      Attempt header caches only if the cached route is being reused. Header cache
 622                          *      is not ultra cheap to set up. This means we only set it up on the second packet,
 623                          *      so one shot communications are not slowed. We assume (seems reasonable) that 2 is
 624                          *      probably going to be a stream of data.
 625                          */
 626                         if(rt->rt_dev->header_cache && sk->ip_hcache_state!= -1)
 627                         {
 628                                 if(sk->ip_hcache_ver==NULL || sk->ip_hcache_stamp!=*sk->ip_hcache_ver)
 629                                         rt->rt_dev->header_cache(rt->rt_dev,sk,saddr,daddr);
 630                                 else
 631                                         /* Can't cache. Remember this */
 632                                         sk->ip_hcache_state= -1;
 633                         }
 634                 }
 635                 
 636                 if (rt == NULL) 
 637                 {
 638                         ip_statistics.IpOutNoRoutes++;
 639                         return(-ENETUNREACH);
 640                 }
 641         
 642                 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
 643                         saddr = sk->saddr;
 644                         
 645                 dev=rt->rt_dev;
 646 #ifdef CONFIG_IP_MULTICAST
 647         }
 648 #endif          
 649         if (user_saddr)
 650                 saddr = user_saddr;
 651 
 652         /*
 653          *      Now compute the buffer space we require
 654          */ 
 655          
 656         /*
 657          *      Try the simple case first. This leaves broadcast, multicast, fragmented frames, and by
 658          *      choice RAW frames within 20 bytes of maximum size(rare) to the long path
 659          */
 660 
 661         length += 20;
 662         if (!sk->ip_hdrincl && opt) 
 663         {
 664                 length += opt->optlen;
 665                 if (opt->is_strictroute && rt && rt->rt_gateway) 
 666                 {
 667                         ip_statistics.IpOutNoRoutes++;
 668                         return -ENETUNREACH;
 669                 }
 670         }
 671         if(length <= dev->mtu && !MULTICAST(daddr) && daddr!=0xFFFFFFFF && daddr!=dev->pa_brdaddr)
 672         {       
 673                 int error;
 674                 struct sk_buff *skb=sock_alloc_send_skb(sk, length+15+dev->hard_header_len,0, 0,&error);
 675                 if(skb==NULL)
 676                 {
 677                         ip_statistics.IpOutDiscards++;
 678                         return error;
 679                 }
 680                 skb->dev=dev;
 681                 skb->free=1;
 682                 skb->when=jiffies;
 683                 skb->sk=sk;
 684                 skb->arp=0;
 685                 skb->saddr=saddr;
 686                 skb->raddr=(rt&&rt->rt_gateway)?rt->rt_gateway:daddr;
 687                 skb_reserve(skb,(dev->hard_header_len+15)&~15);
 688                 if(sk->ip_hcache_state>0)
 689                 {
 690                         memcpy(skb_push(skb,dev->hard_header_len),sk->ip_hcache_data,dev->hard_header_len);
 691                         skb->arp=1;
 692                 }
 693                 else if(dev->hard_header)
 694                 {
 695                         if(dev->hard_header(skb,dev,ETH_P_IP,NULL,NULL,0)>0)
 696                                 skb->arp=1;
 697                 }
 698                 else
 699                         skb->arp=1;
 700                 skb->ip_hdr=iph=(struct iphdr *)skb_put(skb,length);
 701                 dev_lock_list();
 702                 if(!sk->ip_hdrincl)
 703                 {
 704                         iph->version=4;
 705                         iph->ihl=5;
 706                         iph->tos=sk->ip_tos;
 707                         iph->tot_len = htons(length);
 708                         iph->id=htons(ip_id_count++);
 709                         iph->frag_off = 0;
 710                         iph->ttl=sk->ip_ttl;
 711                         iph->protocol=type;
 712                         iph->saddr=saddr;
 713                         iph->daddr=daddr;
 714                         if (opt) 
 715                         {
 716                                 iph->ihl += opt->optlen>>2;
 717                                 ip_options_build(skb, opt,
 718                                                  true_daddr, dev->pa_addr, 0);
 719                         }
 720                         iph->check=0;
 721                         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 722                         getfrag(frag,saddr,((char *)iph)+iph->ihl*4,0, length-iph->ihl*4);
 723                 }
 724                 else
 725                         getfrag(frag,saddr,(void *)iph,0,length-20);
 726                 dev_unlock_list();
 727 #ifdef CONFIG_FIREWALL
 728                 if(call_out_firewall(PF_INET, skb, iph)< FW_ACCEPT)
 729                 {
 730                         kfree_skb(skb, FREE_WRITE);
 731                         return -EPERM;
 732                 }
 733 #endif
 734 #ifdef CONFIG_IP_ACCT
 735                 ip_fw_chk((void *)skb->data,dev,ip_acct_chain, IP_FW_F_ACCEPT,1);
 736 #endif          
 737                 if(dev->flags&IFF_UP)
 738                         dev_queue_xmit(skb,dev,sk->priority);
 739                 else
 740                 {
 741                         ip_statistics.IpOutDiscards++;
 742                         kfree_skb(skb, FREE_WRITE);
 743                 }
 744                 return 0;
 745         }
 746         length-=20;
 747         if (sk && !sk->ip_hdrincl && opt) 
 748         {
 749                 length -= opt->optlen;
 750                 fragheaderlen = dev->hard_header_len + sizeof(struct iphdr) + opt->optlen;
 751                 maxfraglen = ((dev->mtu-sizeof(struct iphdr)-opt->optlen) & ~7) + fragheaderlen;
 752         }
 753         else 
 754         {
 755                 fragheaderlen = dev->hard_header_len;
 756                 if(!sk->ip_hdrincl)
 757                         fragheaderlen += 20;
 758                 
 759                 /*
 760                  *      Fragheaderlen is the size of 'overhead' on each buffer. Now work
 761                  *      out the size of the frames to send.
 762                  */
 763          
 764                 maxfraglen = ((dev->mtu-20) & ~7) + fragheaderlen;
 765         }
 766         
 767         /*
 768          *      Start at the end of the frame by handling the remainder.
 769          */
 770          
 771         offset = length - (length % (maxfraglen - fragheaderlen));
 772         
 773         /*
 774          *      Amount of memory to allocate for final fragment.
 775          */
 776          
 777         fraglen = length - offset + fragheaderlen;
 778         
 779         if(length-offset==0)
 780         {
 781                 fraglen = maxfraglen;
 782                 offset -= maxfraglen-fragheaderlen;
 783         }
 784         
 785         
 786         /*
 787          *      The last fragment will not have MF (more fragments) set.
 788          */
 789          
 790         mf = 0;
 791 
 792         /*
 793          *      Can't fragment raw packets 
 794          */
 795          
 796         if (sk->ip_hdrincl && offset > 0)
 797                 return(-EMSGSIZE);
 798 
 799         /*
 800          *      Lock the device lists.
 801          */
 802 
 803         dev_lock_list();
 804         
 805         /*
 806          *      Get an identifier
 807          */
 808          
 809         id = htons(ip_id_count++);
 810 
 811         /*
 812          *      Being outputting the bytes.
 813          */
 814          
 815         do 
 816         {
 817                 struct sk_buff * skb;
 818                 int error;
 819                 char *data;
 820 
 821                 /*
 822                  *      Get the memory we require with some space left for alignment.
 823                  */
 824 
 825                 skb = sock_alloc_send_skb(sk, fraglen+15, 0, 0, &error);
 826                 if (skb == NULL)
 827                 {
 828                         ip_statistics.IpOutDiscards++;
 829                         if(nfrags>1)
 830                                 ip_statistics.IpFragCreates++;                  
 831                         dev_unlock_list();
 832                         return(error);
 833                 }
 834                 
 835                 /*
 836                  *      Fill in the control structures
 837                  */
 838                  
 839                 skb->next = skb->prev = NULL;
 840                 skb->dev = dev;
 841                 skb->when = jiffies;
 842                 skb->free = 1; /* dubious, this one */
 843                 skb->sk = sk;
 844                 skb->arp = 0;
 845                 skb->saddr = saddr;
 846                 skb->raddr = (rt&&rt->rt_gateway) ? rt->rt_gateway : daddr;
 847                 skb_reserve(skb,(dev->hard_header_len+15)&~15);
 848                 data = skb_put(skb, fraglen-dev->hard_header_len);
 849 
 850                 /*
 851                  *      Save us ARP and stuff. In the optimal case we do no route lookup (route cache ok)
 852                  *      no ARP lookup (arp cache ok) and output. The cache checks are still too slow but
 853                  *      this can be fixed later. For gateway routes we ought to have a rt->.. header cache
 854                  *      pointer to speed header cache builds for identical targets.
 855                  */
 856                  
 857                 if(sk->ip_hcache_state>0)
 858                 {
 859                         memcpy(skb_push(skb,dev->hard_header_len),sk->ip_hcache_data, dev->hard_header_len);
 860                         skb->arp=1;
 861                 }
 862                 else if (dev->hard_header)
 863                 {
 864                         if(dev->hard_header(skb, dev, ETH_P_IP, 
 865                                                 NULL, NULL, 0)>0)
 866                                 skb->arp=1;
 867                 }
 868                 
 869                 /*
 870                  *      Find where to start putting bytes.
 871                  */
 872                  
 873                 skb->ip_hdr = iph = (struct iphdr *)data;
 874 
 875                 /*
 876                  *      Only write IP header onto non-raw packets 
 877                  */
 878                  
 879                 if(!sk->ip_hdrincl) 
 880                 {
 881 
 882                         iph->version = 4;
 883                         iph->ihl = 5; /* ugh */
 884                         if (opt) {
 885                                 iph->ihl += opt->optlen>>2;
 886                                 ip_options_build(skb, opt,
 887                                                  true_daddr, dev->pa_addr, offset);
 888                         }
 889                         iph->tos = sk->ip_tos;
 890                         iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
 891                         iph->id = id;
 892                         iph->frag_off = htons(offset>>3);
 893                         iph->frag_off |= mf;
 894 #ifdef CONFIG_IP_MULTICAST
 895                         if (MULTICAST(daddr))
 896                                 iph->ttl = sk->ip_mc_ttl;
 897                         else
 898 #endif
 899                                 iph->ttl = sk->ip_ttl;
 900                         iph->protocol = type;
 901                         iph->check = 0;
 902                         iph->saddr = saddr;
 903                         iph->daddr = daddr;
 904                         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 905                         data += iph->ihl*4;
 906                         
 907                         /*
 908                          *      Any further fragments will have MF set.
 909                          */
 910                          
 911                         mf = htons(IP_MF);
 912                 }
 913                 
 914                 /*
 915                  *      User data callback
 916                  */
 917 
 918                 getfrag(frag, saddr, data, offset, fraglen-fragheaderlen);
 919                 
 920                 /*
 921                  *      Account for the fragment.
 922                  */
 923                  
 924 #ifdef CONFIG_FIREWALL
 925                 if(!offset && call_out_firewall(PF_INET, skb, iph) < FW_ACCEPT)
 926                 {
 927                         kfree_skb(skb, FREE_WRITE);
 928                         dev_unlock_list();
 929                         return -EPERM;
 930                 }
 931 #endif          
 932 #ifdef CONFIG_IP_ACCT
 933                 if(!offset)
 934                         ip_fw_chk(iph, dev, ip_acct_chain, IP_FW_F_ACCEPT, 1);
 935 #endif  
 936                 offset -= (maxfraglen-fragheaderlen);
 937                 fraglen = maxfraglen;
 938 
 939 #ifdef CONFIG_IP_MULTICAST
 940 
 941                 /*
 942                  *      Multicasts are looped back for other local users
 943                  */
 944          
 945                 if (MULTICAST(daddr) && !(dev->flags&IFF_LOOPBACK)) 
 946                 {
 947                         /*
 948                          *      Loop back any frames. The check for IGMP_ALL_HOSTS is because
 949                          *      you are always magically a member of this group.
 950                          *
 951                          *      Always loop back all host messages when running as a multicast router.
 952                          */
 953                          
 954                         if(sk==NULL || sk->ip_mc_loop)
 955                         {
 956                                 if(skb->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
 957                                         ip_loopback(rt?rt->rt_dev:dev,skb);
 958                                 else 
 959                                 {
 960                                         struct ip_mc_list *imc=rt?rt->rt_dev->ip_mc_list:dev->ip_mc_list;
 961                                         while(imc!=NULL) 
 962                                         {
 963                                                 if(imc->multiaddr==daddr) 
 964                                                 {
 965                                                         ip_loopback(rt?rt->rt_dev:dev,skb);
 966                                                         break;
 967                                                 }
 968                                                 imc=imc->next;
 969                                         }
 970                                 }
 971                         }
 972 
 973                         /*
 974                          *      Multicasts with ttl 0 must not go beyond the host. Fixme: avoid the
 975                          *      extra clone.
 976                          */
 977 
 978                         if(skb->ip_hdr->ttl==0)
 979                                 kfree_skb(skb, FREE_READ);
 980                 }
 981 #endif
 982 
 983                 nfrags++;
 984                 
 985                 /*
 986                  *      BSD loops broadcasts
 987                  */
 988                  
 989                 if((dev->flags&IFF_BROADCAST) && (daddr==0xFFFFFFFF || daddr==dev->pa_brdaddr) && !(dev->flags&IFF_LOOPBACK))
 990                         ip_loopback(dev,skb);
 991 
 992                 /*
 993                  *      Now queue the bytes into the device.
 994                  */
 995                  
 996                 if (dev->flags & IFF_UP) 
 997                 {
 998                         dev_queue_xmit(skb, dev, sk->priority);
 999                 } 
1000                 else 
1001                 {
1002                         /*
1003                          *      Whoops... 
1004                          */
1005                          
1006                         ip_statistics.IpOutDiscards++;
1007                         if(nfrags>1)
1008                                 ip_statistics.IpFragCreates+=nfrags;
1009                         kfree_skb(skb, FREE_WRITE);
1010                         dev_unlock_list();
1011                         /*
1012                          *      BSD behaviour.
1013                          */
1014                         if(sk!=NULL)
1015                                 sk->err=ENETDOWN;
1016                         return(0); /* lose rest of fragments */
1017                 }
1018         } 
1019         while (offset >= 0);
1020         if(nfrags>1)
1021                 ip_statistics.IpFragCreates+=nfrags;
1022         dev_unlock_list();
1023         return(0);
1024 }
1025     
1026 
1027 /*
1028  *      IP protocol layer initialiser
1029  */
1030 
1031 static struct packet_type ip_packet_type =
1032 {
1033         0,      /* MUTTER ntohs(ETH_P_IP),*/
1034         NULL,   /* All devices */
1035         ip_rcv,
1036         NULL,
1037         NULL,
1038 };
1039 
1040 #ifdef CONFIG_RTNETLINK
1041 
1042 /*
1043  *      Netlink hooks for IP
1044  */
1045  
1046 void ip_netlink_msg(unsigned long msg, __u32 daddr, __u32 gw, __u32 mask, short flags, short metric, char *name)
     /* [previous][next][first][last][top][bottom][index][help] */
1047 {
1048         struct sk_buff *skb=alloc_skb(sizeof(struct netlink_rtinfo), GFP_ATOMIC);
1049         struct netlink_rtinfo *nrt;
1050         struct sockaddr_in *s;
1051         if(skb==NULL)
1052                 return;
1053         nrt=(struct netlink_rtinfo *)skb_put(skb, sizeof(struct netlink_rtinfo));
1054         nrt->rtmsg_type=msg;
1055         s=(struct sockaddr_in *)&nrt->rtmsg_dst;
1056         s->sin_family=AF_INET;
1057         s->sin_addr.s_addr=daddr;
1058         s=(struct sockaddr_in *)&nrt->rtmsg_gateway;
1059         s->sin_family=AF_INET;
1060         s->sin_addr.s_addr=gw;
1061         s=(struct sockaddr_in *)&nrt->rtmsg_genmask;
1062         s->sin_family=AF_INET;
1063         s->sin_addr.s_addr=mask;
1064         nrt->rtmsg_flags=flags;
1065         nrt->rtmsg_metric=metric;
1066         strcpy(nrt->rtmsg_device,name);
1067         netlink_post(NETLINK_ROUTE, skb);
1068 }       
1069 
1070 #endif
1071 
1072 /*
1073  *      Device notifier
1074  */
1075  
1076 static int ip_rt_event(unsigned long event, void *ptr)
     /* [previous][next][first][last][top][bottom][index][help] */
1077 {
1078         struct device *dev=ptr;
1079         if(event==NETDEV_DOWN)
1080         {
1081                 ip_netlink_msg(RTMSG_DELDEVICE, 0,0,0,0,0,dev->name);
1082                 ip_rt_flush(dev);
1083         }
1084 /*
1085  *      Join the intial group if multicast.
1086  */             
1087         if(event==NETDEV_UP)
1088         {
1089 #ifdef CONFIG_IP_MULTICAST      
1090                 ip_mc_allhost(dev);
1091 #endif          
1092                 ip_netlink_msg(RTMSG_NEWDEVICE, 0,0,0,0,0,dev->name);
1093         }
1094         return NOTIFY_DONE;
1095 }
1096 
1097 struct notifier_block ip_rt_notifier={
1098         ip_rt_event,
1099         NULL,
1100         0
1101 };
1102 
1103 /*
1104  *      IP registers the packet type and then calls the subprotocol initialisers
1105  */
1106 
1107 void ip_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1108 {
1109         ip_packet_type.type=htons(ETH_P_IP);
1110         dev_add_pack(&ip_packet_type);
1111 
1112         /* So we flush routes when a device is downed */        
1113         register_netdevice_notifier(&ip_rt_notifier);
1114 
1115 /*      ip_raw_init();
1116         ip_packet_init();
1117         ip_tcp_init();
1118         ip_udp_init();*/
1119 
1120 #ifdef CONFIG_IP_MULTICAST
1121         proc_net_register(&(struct proc_dir_entry) {
1122                 PROC_NET_IGMP, 4, "igmp",
1123                 S_IFREG | S_IRUGO, 1, 0, 0,
1124                 0, &proc_net_inode_operations,
1125                 ip_mc_procinfo
1126         });
1127 #endif
1128 }
1129 

/* [previous][next][first][last][top][bottom][index][help] */