root/net/ipv4/ip_output.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ip_loopback
  2. ip_send
  3. ip_send_room
  4. ip_build_header
  5. ip_send_check
  6. ip_queue_xmit
  7. ip_build_xmit
  8. ip_netlink_msg
  9. ip_rt_event
  10. ip_init

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              The Internet Protocol (IP) output module.
   7  *
   8  * Version:     @(#)ip.c        1.0.16b 9/1/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Donald Becker, <becker@super.org>
  13  *              Alan Cox, <Alan.Cox@linux.org>
  14  *              Richard Underwood
  15  *              Stefan Becker, <stefanb@yello.ping.de>
  16  *              Jorge Cwik, <jorge@laser.satlink.net>
  17  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  18  *
  19  *      See ip_input.c for original log
  20  *
  21  *      Fixes:
  22  *              Alan Cox        :       Missing nonblock feature in ip_build_xmit.
  23  */
  24 
  25 #include <asm/segment.h>
  26 #include <asm/system.h>
  27 #include <linux/types.h>
  28 #include <linux/kernel.h>
  29 #include <linux/sched.h>
  30 #include <linux/mm.h>
  31 #include <linux/string.h>
  32 #include <linux/errno.h>
  33 #include <linux/config.h>
  34 
  35 #include <linux/socket.h>
  36 #include <linux/sockios.h>
  37 #include <linux/in.h>
  38 #include <linux/inet.h>
  39 #include <linux/netdevice.h>
  40 #include <linux/etherdevice.h>
  41 #include <linux/proc_fs.h>
  42 #include <linux/stat.h>
  43 
  44 #include <net/snmp.h>
  45 #include <net/ip.h>
  46 #include <net/protocol.h>
  47 #include <net/route.h>
  48 #include <net/tcp.h>
  49 #include <net/udp.h>
  50 #include <linux/skbuff.h>
  51 #include <net/sock.h>
  52 #include <net/arp.h>
  53 #include <net/icmp.h>
  54 #include <net/raw.h>
  55 #include <net/checksum.h>
  56 #include <linux/igmp.h>
  57 #include <linux/ip_fw.h>
  58 #include <linux/firewall.h>
  59 #include <linux/mroute.h>
  60 #include <net/netlink.h>
  61 
  62 /*
  63  *      Loop a packet back to the sender.
  64  */
  65  
  66 static void ip_loopback(struct device *old_dev, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
  67 {
  68         struct device *dev=&loopback_dev;
  69         int len=ntohs(skb->ip_hdr->tot_len);
  70         struct sk_buff *newskb=dev_alloc_skb(len+dev->hard_header_len+15);
  71         
  72         if(newskb==NULL)
  73                 return;
  74                 
  75         newskb->link3=NULL;
  76         newskb->sk=NULL;
  77         newskb->dev=dev;
  78         newskb->saddr=skb->saddr;
  79         newskb->daddr=skb->daddr;
  80         newskb->raddr=skb->raddr;
  81         newskb->free=1;
  82         newskb->lock=0;
  83         newskb->users=0;
  84         newskb->pkt_type=skb->pkt_type;
  85         
  86         /*
  87          *      Put a MAC header on the packet
  88          */
  89         ip_send(NULL,newskb, skb->ip_hdr->daddr, len, dev, skb->ip_hdr->saddr);
  90         /*
  91          *      Add the rest of the data space. 
  92          */
  93         newskb->ip_hdr=(struct iphdr *)skb_put(newskb, len);
  94         memcpy(newskb->proto_priv, skb->proto_priv, sizeof(skb->proto_priv));
  95 
  96         /*
  97          *      Copy the data
  98          */
  99         memcpy(newskb->ip_hdr,skb->ip_hdr,len);
 100 
 101         /* Recurse. The device check against IFF_LOOPBACK will stop infinite recursion */
 102                 
 103         /*printk("Loopback output queued [%lX to %lX].\n", newskb->ip_hdr->saddr,newskb->ip_hdr->daddr);*/
 104         ip_queue_xmit(NULL, dev, newskb, 1);
 105 }
 106 
 107 
 108 
 109 /*
 110  *      Take an skb, and fill in the MAC header.
 111  */
 112 
 113 int ip_send(struct rtable * rt, struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
     /* [previous][next][first][last][top][bottom][index][help] */
 114 {
 115         int mac = 0;
 116 
 117         skb->dev = dev;
 118         skb->arp = 1;
 119         if (dev->hard_header)
 120         {
 121                 /*
 122                  *      Build a hardware header. Source address is our mac, destination unknown
 123                  *      (rebuild header will sort this out)
 124                  */
 125                 skb_reserve(skb,(dev->hard_header_len+15)&~15); /* 16 byte aligned IP headers are good */
 126                 if (rt && dev == rt->rt_dev && rt->rt_hh)
 127                 {
 128                         memcpy(skb_push(skb,dev->hard_header_len),rt->rt_hh->hh_data,dev->hard_header_len);
 129                         if (rt->rt_hh->hh_uptodate)
 130                                 return dev->hard_header_len;
 131 #if RT_CACHE_DEBUG >= 2
 132                         printk("ip_send: hh miss %08x via %08x\n", daddr, rt->rt_gateway);
 133 #endif
 134                         skb->arp = 0;
 135                         skb->raddr = daddr;
 136                         return -dev->hard_header_len;
 137                 }
 138                 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
 139                 if (mac < 0)
 140                 {
 141                         mac = -mac;
 142                         skb->arp = 0;
 143                         skb->raddr = daddr;     /* next routing address */
 144                 }
 145         }
 146         return mac;
 147 }
 148 
 149 static int ip_send_room(struct rtable * rt, struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
     /* [previous][next][first][last][top][bottom][index][help] */
 150 {
 151         int mac = 0;
 152 
 153         skb->dev = dev;
 154         skb->arp = 1;
 155         if (dev->hard_header)
 156         {
 157                 skb_reserve(skb,MAX_HEADER);
 158                 if (rt && dev == rt->rt_dev && rt->rt_hh)
 159                 {
 160                         memcpy(skb_push(skb,dev->hard_header_len),rt->rt_hh->hh_data,dev->hard_header_len);
 161                         if (rt->rt_hh->hh_uptodate)
 162                                 return dev->hard_header_len;
 163 #if RT_CACHE_DEBUG >= 2
 164                         printk("ip_send_room: hh miss %08x via %08x\n", daddr, rt->rt_gateway);
 165 #endif
 166                         skb->arp = 0;
 167                         skb->raddr = daddr;
 168                         return -dev->hard_header_len;
 169                 }
 170                 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
 171                 if (mac < 0)
 172                 {
 173                         mac = -mac;
 174                         skb->arp = 0;
 175                         skb->raddr = daddr;     /* next routing address */
 176                 }
 177         }
 178         return mac;
 179 }
 180 
 181 int ip_id_count = 0;
 182 
 183 /*
 184  * This routine builds the appropriate hardware/IP headers for
 185  * the routine.  It assumes that if *dev != NULL then the
 186  * protocol knows what it's doing, otherwise it uses the
 187  * routing/ARP tables to select a device struct.
 188  */
 189 int ip_build_header(struct sk_buff *skb, __u32 saddr, __u32 daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 190                 struct device **dev, int type, struct options *opt,
 191                 int len, int tos, int ttl, struct rtable ** rp)
 192 {
 193         struct rtable *rt;
 194         __u32 raddr;
 195         int tmp;
 196         struct iphdr *iph;
 197         __u32 final_daddr = daddr;
 198 
 199 
 200         if (opt && opt->srr)
 201                 daddr = opt->faddr;
 202 
 203         /*
 204          *      See if we need to look up the device.
 205          */
 206 
 207 #ifdef CONFIG_IP_MULTICAST      
 208         if(MULTICAST(daddr) && *dev==NULL && skb->sk && *skb->sk->ip_mc_name)
 209                 *dev=dev_get(skb->sk->ip_mc_name);
 210 #endif
 211         if (rp)
 212         {
 213                 rt = ip_check_route(rp, daddr, skb->localroute);
 214                 /*
 215                  * If rp != NULL rt_put following below should not
 216                  * release route, so that...
 217                  */
 218                 if (rt)
 219                         ATOMIC_INCR(&rt->rt_refcnt);
 220         }
 221         else
 222                 rt = ip_rt_route(daddr, skb->localroute);
 223 
 224 
 225         if (*dev == NULL)
 226         {
 227                 if (rt == NULL)
 228                 {
 229                         ip_statistics.IpOutNoRoutes++;
 230                         return(-ENETUNREACH);
 231                 }
 232 
 233                 *dev = rt->rt_dev;
 234         }
 235 
 236         if ((LOOPBACK(saddr) && !LOOPBACK(daddr)) || !saddr)
 237                 saddr = rt ? rt->rt_src : (*dev)->pa_addr;
 238 
 239         raddr = rt ? rt->rt_gateway : 0;
 240 
 241         if (opt && opt->is_strictroute && rt && (rt->rt_flags & RTF_GATEWAY))
 242         {
 243                 ip_rt_put(rt);
 244                 ip_statistics.IpOutNoRoutes++;
 245                 return -ENETUNREACH;
 246         }
 247 
 248         /*
 249          *      No gateway so aim at the real destination
 250          */
 251 
 252         if (raddr == 0)
 253                 raddr = daddr;
 254 
 255         /*
 256          *      Now build the MAC header.
 257          */
 258 
 259         if (type==IPPROTO_TCP)
 260                 tmp = ip_send_room(rt, skb, raddr, len, *dev, saddr);
 261         else
 262                 tmp = ip_send(rt, skb, raddr, len, *dev, saddr);
 263 
 264         ip_rt_put(rt);
 265 
 266         /*
 267          *      Book keeping
 268          */
 269 
 270         skb->dev = *dev;
 271         skb->saddr = saddr;
 272 
 273         /*
 274          *      Now build the IP header.
 275          */
 276 
 277         /*
 278          *      If we are using IPPROTO_RAW, then we don't need an IP header, since
 279          *      one is being supplied to us by the user
 280          */
 281 
 282         if(type == IPPROTO_RAW)
 283                 return (tmp);
 284 
 285         /*
 286          *      Build the IP addresses
 287          */
 288          
 289         if (opt)
 290                 iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr) + opt->optlen);
 291         else
 292                 iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr));
 293 
 294         iph->version  = 4;
 295         iph->ihl      = 5;
 296         iph->tos      = tos;
 297         iph->frag_off = 0;
 298         iph->ttl      = ttl;
 299         iph->daddr    = daddr;
 300         iph->saddr    = saddr;
 301         iph->protocol = type;
 302         skb->ip_hdr   = iph;
 303 
 304         if (!opt || !opt->optlen)
 305                 return sizeof(struct iphdr) + tmp;
 306         iph->ihl += opt->optlen>>2;
 307         ip_options_build(skb, opt, final_daddr, (*dev)->pa_addr, 0);
 308         return iph->ihl*4 + tmp;
 309 }
 310 
 311 
 312 /*
 313  *      Generate a checksum for an outgoing IP datagram.
 314  */
 315 
 316 void ip_send_check(struct iphdr *iph)
     /* [previous][next][first][last][top][bottom][index][help] */
 317 {
 318         iph->check = 0;
 319         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 320 }
 321 
 322 /*
 323  * Queues a packet to be sent, and starts the transmitter
 324  * if necessary.  if free = 1 then we free the block after
 325  * transmit, otherwise we don't. If free==2 we not only
 326  * free the block but also don't assign a new ip seq number.
 327  * This routine also needs to put in the total length,
 328  * and compute the checksum
 329  */
 330 
 331 void ip_queue_xmit(struct sock *sk, struct device *dev,
     /* [previous][next][first][last][top][bottom][index][help] */
 332               struct sk_buff *skb, int free)
 333 {
 334         struct iphdr *iph;
 335 /*      unsigned char *ptr;*/
 336 
 337         /* Sanity check */
 338         if (dev == NULL)
 339         {
 340                 NETDEBUG(printk("IP: ip_queue_xmit dev = NULL\n"));
 341                 return;
 342         }
 343 
 344         IS_SKB(skb);
 345 
 346         /*
 347          *      Do some book-keeping in the packet for later
 348          */
 349 
 350 
 351         skb->dev = dev;
 352         skb->when = jiffies;
 353 
 354         /*
 355          *      Find the IP header and set the length. This is bad
 356          *      but once we get the skb data handling code in the
 357          *      hardware will push its header sensibly and we will
 358          *      set skb->ip_hdr to avoid this mess and the fixed
 359          *      header length problem
 360          */
 361 
 362         iph = skb->ip_hdr;
 363         iph->tot_len = ntohs(skb->len-(((unsigned char *)iph)-skb->data));
 364 
 365 #ifdef CONFIG_FIREWALL
 366         if(call_out_firewall(PF_INET, skb, iph) < FW_ACCEPT)
 367                 /* just don't send this packet */
 368                 return;
 369 #endif  
 370 
 371         /*
 372          *      No reassigning numbers to fragments...
 373          */
 374 
 375         if(free!=2)
 376                 iph->id      = htons(ip_id_count++);
 377         else
 378                 free=1;
 379 
 380         /* All buffers without an owner socket get freed */
 381         if (sk == NULL)
 382                 free = 1;
 383 
 384         skb->free = free;
 385 
 386         /*
 387          *      Do we need to fragment. Again this is inefficient.
 388          *      We need to somehow lock the original buffer and use
 389          *      bits of it.
 390          */
 391 
 392         if(ntohs(iph->tot_len)> dev->mtu)
 393         {
 394                 ip_fragment(sk,skb,dev,0);
 395                 IS_SKB(skb);
 396                 kfree_skb(skb,FREE_WRITE);
 397                 return;
 398         }
 399 
 400         /*
 401          *      Add an IP checksum
 402          */
 403 
 404         ip_send_check(iph);
 405 
 406         /*
 407          *      Print the frame when debugging
 408          */
 409 
 410         /*
 411          *      More debugging. You cannot queue a packet already on a list
 412          *      Spot this and moan loudly.
 413          */
 414         if (skb->next != NULL)
 415         {
 416                 NETDEBUG(printk("ip_queue_xmit: next != NULL\n"));
 417                 skb_unlink(skb);
 418         }
 419 
 420         /*
 421          *      If a sender wishes the packet to remain unfreed
 422          *      we add it to his send queue. This arguably belongs
 423          *      in the TCP level since nobody else uses it. BUT
 424          *      remember IPng might change all the rules.
 425          */
 426 
 427         if (!free)
 428         {
 429                 unsigned long flags;
 430                 /* The socket now has more outstanding blocks */
 431 
 432                 sk->packets_out++;
 433 
 434                 /* Protect the list for a moment */
 435                 save_flags(flags);
 436                 cli();
 437 
 438                 if (skb->link3 != NULL)
 439                 {
 440                         NETDEBUG(printk("ip.c: link3 != NULL\n"));
 441                         skb->link3 = NULL;
 442                 }
 443                 if (sk->send_head == NULL)
 444                 {
 445                         sk->send_tail = skb;
 446                         sk->send_head = skb;
 447                 }
 448                 else
 449                 {
 450                         sk->send_tail->link3 = skb;
 451                         sk->send_tail = skb;
 452                 }
 453                 /* skb->link3 is NULL */
 454 
 455                 /* Interrupt restore */
 456                 restore_flags(flags);
 457         }
 458         else
 459                 /* Remember who owns the buffer */
 460                 skb->sk = sk;
 461 
 462         /*
 463          *      If the indicated interface is up and running, send the packet.
 464          */
 465          
 466         ip_statistics.IpOutRequests++;
 467 #ifdef CONFIG_IP_ACCT
 468         ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
 469 #endif  
 470         
 471 #ifdef CONFIG_IP_MULTICAST      
 472 
 473         /*
 474          *      Multicasts are looped back for other local users
 475          */
 476          
 477         if (MULTICAST(iph->daddr) && !(dev->flags&IFF_LOOPBACK))
 478         {
 479                 if(sk==NULL || sk->ip_mc_loop)
 480                 {
 481                         if(iph->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
 482                         {
 483                                 ip_loopback(dev,skb);
 484                         }
 485                         else
 486                         {
 487                                 struct ip_mc_list *imc=dev->ip_mc_list;
 488                                 while(imc!=NULL)
 489                                 {
 490                                         if(imc->multiaddr==iph->daddr)
 491                                         {
 492                                                 ip_loopback(dev,skb);
 493                                                 break;
 494                                         }
 495                                         imc=imc->next;
 496                                 }
 497                         }
 498                 }
 499                 /* Multicasts with ttl 0 must not go beyond the host */
 500                 
 501                 if(skb->ip_hdr->ttl==0)
 502                 {
 503                         kfree_skb(skb, FREE_READ);
 504                         return;
 505                 }
 506         }
 507 #endif
 508         if((dev->flags&IFF_BROADCAST) && (iph->daddr==dev->pa_brdaddr||iph->daddr==0xFFFFFFFF) && !(dev->flags&IFF_LOOPBACK))
 509                 ip_loopback(dev,skb);
 510                 
 511         if (dev->flags & IFF_UP)
 512         {
 513                 /*
 514                  *      If we have an owner use its priority setting,
 515                  *      otherwise use NORMAL
 516                  */
 517 
 518                 if (sk != NULL)
 519                 {
 520                         dev_queue_xmit(skb, dev, sk->priority);
 521                 }
 522                 else
 523                 {
 524                         dev_queue_xmit(skb, dev, SOPRI_NORMAL);
 525                 }
 526         }
 527         else
 528         {
 529                 if(sk)
 530                         sk->err = ENETDOWN;
 531                 ip_statistics.IpOutDiscards++;
 532                 if (free)
 533                         kfree_skb(skb, FREE_WRITE);
 534         }
 535 }
 536 
 537 
 538 /*
 539  *      Build and send a packet, with as little as one copy
 540  *
 541  *      Doesn't care much about ip options... option length can be
 542  *      different for fragment at 0 and other fragments.
 543  *
 544  *      Note that the fragment at the highest offset is sent first,
 545  *      so the getfrag routine can fill in the TCP/UDP checksum header
 546  *      field in the last fragment it sends... actually it also helps
 547  *      the reassemblers, they can put most packets in at the head of
 548  *      the fragment queue, and they know the total size in advance. This
 549  *      last feature will measurable improve the Linux fragment handler.
 550  *
 551  *      The callback has five args, an arbitrary pointer (copy of frag),
 552  *      the source IP address (may depend on the routing table), the 
 553  *      destination adddress (char *), the offset to copy from, and the
 554  *      length to be copied.
 555  * 
 556  */
 557 
 558 int ip_build_xmit(struct sock *sk,
     /* [previous][next][first][last][top][bottom][index][help] */
 559                    void getfrag (const void *,
 560                                  __u32,
 561                                  char *,
 562                                  unsigned int,  
 563                                  unsigned int),
 564                    const void *frag,
 565                    unsigned short int length,
 566                    __u32 daddr,
 567                    __u32 user_saddr,
 568                    struct options * opt,
 569                    int flags,
 570                    int type,
 571                    int noblock) 
 572 {
 573         struct rtable *rt;
 574         unsigned int fraglen, maxfraglen, fragheaderlen;
 575         int offset, mf;
 576         __u32 saddr;
 577         unsigned short id;
 578         struct iphdr *iph;
 579         __u32 raddr;
 580         struct device *dev = NULL;
 581         struct hh_cache * hh=NULL;
 582         int nfrags=0;
 583         __u32 true_daddr = daddr;
 584 
 585         if (opt && opt->srr && !sk->ip_hdrincl)
 586           daddr = opt->faddr;
 587         
 588         ip_statistics.IpOutRequests++;
 589 
 590 #ifdef CONFIG_IP_MULTICAST      
 591         if(sk && MULTICAST(daddr) && *sk->ip_mc_name)
 592         {
 593                 dev=dev_get(sk->ip_mc_name);
 594                 if(!dev)
 595                         return -ENODEV;
 596                 rt=NULL;
 597                 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
 598                         saddr = sk->saddr;
 599                 else
 600                         saddr = dev->pa_addr;
 601         }
 602         else
 603         {
 604 #endif  
 605                 rt = ip_check_route(&sk->ip_route_cache, daddr,
 606                                     sk->localroute || (flags&MSG_DONTROUTE) ||
 607                                     (opt && opt->is_strictroute));
 608                 if (rt == NULL) 
 609                 {
 610                         ip_statistics.IpOutNoRoutes++;
 611                         return(-ENETUNREACH);
 612                 }
 613                 saddr = rt->rt_src;
 614 
 615                 hh = rt->rt_hh;
 616         
 617                 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
 618                         saddr = sk->saddr;
 619                         
 620                 dev=rt->rt_dev;
 621 #ifdef CONFIG_IP_MULTICAST
 622         }
 623         if (rt && !dev)
 624                 dev = rt->rt_dev;
 625 #endif          
 626         if (user_saddr)
 627                 saddr = user_saddr;
 628 
 629         raddr = rt ? rt->rt_gateway : daddr;
 630         /*
 631          *      Now compute the buffer space we require
 632          */ 
 633          
 634         /*
 635          *      Try the simple case first. This leaves broadcast, multicast, fragmented frames, and by
 636          *      choice RAW frames within 20 bytes of maximum size(rare) to the long path
 637          */
 638 
 639         length += sizeof(struct iphdr);
 640         if (!sk->ip_hdrincl && opt) 
 641                 length += opt->optlen;
 642 
 643         if(length <= dev->mtu && !MULTICAST(daddr) && daddr!=0xFFFFFFFF && daddr!=dev->pa_brdaddr)
 644         {       
 645                 int error;
 646                 struct sk_buff *skb=sock_alloc_send_skb(sk, length+15+dev->hard_header_len,0, noblock, &error);
 647                 if(skb==NULL)
 648                 {
 649                         ip_statistics.IpOutDiscards++;
 650                         return error;
 651                 }
 652                 skb->dev=dev;
 653                 skb->free=1;
 654                 skb->when=jiffies;
 655                 skb->sk=sk;
 656                 skb->arp=0;
 657                 skb->saddr=saddr;
 658                 skb->raddr = raddr;
 659                 skb_reserve(skb,(dev->hard_header_len+15)&~15);
 660                 if (hh)
 661                 {
 662                         skb->arp=1;
 663                         memcpy(skb_push(skb,dev->hard_header_len),hh->hh_data,dev->hard_header_len);
 664                         if (!hh->hh_uptodate)
 665                         {
 666                                 skb->arp = 0;
 667 #if RT_CACHE_DEBUG >= 2
 668                                 printk("ip_build_xmit: hh miss %08x via %08x\n", rt->rt_dst, rt->rt_gateway);
 669 #endif                          
 670                         }
 671                 }
 672                 else if(dev->hard_header)
 673                 {
 674                         if(dev->hard_header(skb,dev,ETH_P_IP,NULL,NULL,0)>0)
 675                                 skb->arp=1;
 676                 }
 677                 else
 678                         skb->arp=1;
 679                 skb->ip_hdr=iph=(struct iphdr *)skb_put(skb,length);
 680                 dev_lock_list();
 681                 if(!sk->ip_hdrincl)
 682                 {
 683                         iph->version=4;
 684                         iph->ihl=5;
 685                         iph->tos=sk->ip_tos;
 686                         iph->tot_len = htons(length);
 687                         iph->id=htons(ip_id_count++);
 688                         iph->frag_off = 0;
 689                         iph->ttl=sk->ip_ttl;
 690                         iph->protocol=type;
 691                         iph->saddr=saddr;
 692                         iph->daddr=daddr;
 693                         if (opt) 
 694                         {
 695                                 iph->ihl += opt->optlen>>2;
 696                                 ip_options_build(skb, opt,
 697                                                  true_daddr, dev->pa_addr, 0);
 698                         }
 699                         iph->check=0;
 700                         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 701                         getfrag(frag,saddr,((char *)iph)+iph->ihl*4,0, length-iph->ihl*4);
 702                 }
 703                 else
 704                         getfrag(frag,saddr,(void *)iph,0,length-20);
 705                 dev_unlock_list();
 706 #ifdef CONFIG_FIREWALL
 707                 if(call_out_firewall(PF_INET, skb, iph)< FW_ACCEPT)
 708                 {
 709                         kfree_skb(skb, FREE_WRITE);
 710                         return -EPERM;
 711                 }
 712 #endif
 713 #ifdef CONFIG_IP_ACCT
 714                 ip_fw_chk((void *)skb->data,dev,ip_acct_chain, IP_FW_F_ACCEPT,1);
 715 #endif          
 716                 if(dev->flags&IFF_UP)
 717                         dev_queue_xmit(skb,dev,sk->priority);
 718                 else
 719                 {
 720                         ip_statistics.IpOutDiscards++;
 721                         kfree_skb(skb, FREE_WRITE);
 722                 }
 723                 return 0;
 724         }
 725         length -= sizeof(struct iphdr);
 726         if (sk && !sk->ip_hdrincl && opt) 
 727         {
 728                 length -= opt->optlen;
 729                 fragheaderlen = dev->hard_header_len + sizeof(struct iphdr) + opt->optlen;
 730                 maxfraglen = ((dev->mtu-sizeof(struct iphdr)-opt->optlen) & ~7) + fragheaderlen;
 731         }
 732         else 
 733         {
 734                 fragheaderlen = dev->hard_header_len;
 735                 if(!sk->ip_hdrincl)
 736                         fragheaderlen += 20;
 737                 
 738                 /*
 739                  *      Fragheaderlen is the size of 'overhead' on each buffer. Now work
 740                  *      out the size of the frames to send.
 741                  */
 742          
 743                 maxfraglen = ((dev->mtu-20) & ~7) + fragheaderlen;
 744         }
 745         
 746         /*
 747          *      Start at the end of the frame by handling the remainder.
 748          */
 749          
 750         offset = length - (length % (maxfraglen - fragheaderlen));
 751         
 752         /*
 753          *      Amount of memory to allocate for final fragment.
 754          */
 755          
 756         fraglen = length - offset + fragheaderlen;
 757         
 758         if(length-offset==0)
 759         {
 760                 fraglen = maxfraglen;
 761                 offset -= maxfraglen-fragheaderlen;
 762         }
 763         
 764         
 765         /*
 766          *      The last fragment will not have MF (more fragments) set.
 767          */
 768          
 769         mf = 0;
 770 
 771         /*
 772          *      Can't fragment raw packets 
 773          */
 774          
 775         if (sk->ip_hdrincl && offset > 0)
 776                 return(-EMSGSIZE);
 777 
 778         /*
 779          *      Lock the device lists.
 780          */
 781 
 782         dev_lock_list();
 783         
 784         /*
 785          *      Get an identifier
 786          */
 787          
 788         id = htons(ip_id_count++);
 789 
 790         /*
 791          *      Being outputting the bytes.
 792          */
 793          
 794         do 
 795         {
 796                 struct sk_buff * skb;
 797                 int error;
 798                 char *data;
 799 
 800                 /*
 801                  *      Get the memory we require with some space left for alignment.
 802                  */
 803 
 804                 skb = sock_alloc_send_skb(sk, fraglen+15, 0, noblock, &error);
 805                 if (skb == NULL)
 806                 {
 807                         ip_statistics.IpOutDiscards++;
 808                         if(nfrags>1)
 809                                 ip_statistics.IpFragCreates++;                  
 810                         dev_unlock_list();
 811                         return(error);
 812                 }
 813                 
 814                 /*
 815                  *      Fill in the control structures
 816                  */
 817                  
 818                 skb->next = skb->prev = NULL;
 819                 skb->dev = dev;
 820                 skb->when = jiffies;
 821                 skb->free = 1; /* dubious, this one */
 822                 skb->sk = sk;
 823                 skb->arp = 0;
 824                 skb->saddr = saddr;
 825                 skb->raddr = raddr;
 826                 skb_reserve(skb,(dev->hard_header_len+15)&~15);
 827                 data = skb_put(skb, fraglen-dev->hard_header_len);
 828 
 829                 /*
 830                  *      Save us ARP and stuff. In the optimal case we do no route lookup (route cache ok)
 831                  *      no ARP lookup (arp cache ok) and output. The cache checks are still too slow but
 832                  *      this can be fixed later. For gateway routes we ought to have a rt->.. header cache
 833                  *      pointer to speed header cache builds for identical targets.
 834                  */
 835                  
 836                 if (hh)
 837                 {
 838                         skb->arp=1;
 839                         memcpy(skb_push(skb,dev->hard_header_len),hh->hh_data,dev->hard_header_len);
 840                         if (!hh->hh_uptodate)
 841                         {
 842                                 skb->arp = 0;
 843 #if RT_CACHE_DEBUG >= 2
 844                                 printk("ip_build_xmit: hh miss %08x via %08x\n", rt->rt_dst, rt->rt_gateway);
 845 #endif                          
 846                         }
 847                 }
 848                 else if (dev->hard_header)
 849                 {
 850                         if(dev->hard_header(skb, dev, ETH_P_IP, 
 851                                                 NULL, NULL, 0)>0)
 852                                 skb->arp=1;
 853                 }
 854                 
 855                 /*
 856                  *      Find where to start putting bytes.
 857                  */
 858                  
 859                 skb->ip_hdr = iph = (struct iphdr *)data;
 860 
 861                 /*
 862                  *      Only write IP header onto non-raw packets 
 863                  */
 864                  
 865                 if(!sk->ip_hdrincl) 
 866                 {
 867 
 868                         iph->version = 4;
 869                         iph->ihl = 5; /* ugh */
 870                         if (opt) {
 871                                 iph->ihl += opt->optlen>>2;
 872                                 ip_options_build(skb, opt,
 873                                                  true_daddr, dev->pa_addr, offset);
 874                         }
 875                         iph->tos = sk->ip_tos;
 876                         iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
 877                         iph->id = id;
 878                         iph->frag_off = htons(offset>>3);
 879                         iph->frag_off |= mf;
 880 #ifdef CONFIG_IP_MULTICAST
 881                         if (MULTICAST(daddr))
 882                                 iph->ttl = sk->ip_mc_ttl;
 883                         else
 884 #endif
 885                                 iph->ttl = sk->ip_ttl;
 886                         iph->protocol = type;
 887                         iph->check = 0;
 888                         iph->saddr = saddr;
 889                         iph->daddr = daddr;
 890                         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 891                         data += iph->ihl*4;
 892                         
 893                         /*
 894                          *      Any further fragments will have MF set.
 895                          */
 896                          
 897                         mf = htons(IP_MF);
 898                 }
 899                 
 900                 /*
 901                  *      User data callback
 902                  */
 903 
 904                 getfrag(frag, saddr, data, offset, fraglen-fragheaderlen);
 905                 
 906                 /*
 907                  *      Account for the fragment.
 908                  */
 909                  
 910 #ifdef CONFIG_FIREWALL
 911                 if(!offset && call_out_firewall(PF_INET, skb, iph) < FW_ACCEPT)
 912                 {
 913                         kfree_skb(skb, FREE_WRITE);
 914                         dev_unlock_list();
 915                         return -EPERM;
 916                 }
 917 #endif          
 918 #ifdef CONFIG_IP_ACCT
 919                 if(!offset)
 920                         ip_fw_chk(iph, dev, ip_acct_chain, IP_FW_F_ACCEPT, 1);
 921 #endif  
 922                 offset -= (maxfraglen-fragheaderlen);
 923                 fraglen = maxfraglen;
 924 
 925 #ifdef CONFIG_IP_MULTICAST
 926 
 927                 /*
 928                  *      Multicasts are looped back for other local users
 929                  */
 930          
 931                 if (MULTICAST(daddr) && !(dev->flags&IFF_LOOPBACK)) 
 932                 {
 933                         /*
 934                          *      Loop back any frames. The check for IGMP_ALL_HOSTS is because
 935                          *      you are always magically a member of this group.
 936                          *
 937                          *      Always loop back all host messages when running as a multicast router.
 938                          */
 939                          
 940                         if(sk==NULL || sk->ip_mc_loop)
 941                         {
 942                                 if(skb->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
 943                                         ip_loopback(dev,skb);
 944                                 else 
 945                                 {
 946                                         struct ip_mc_list *imc=dev->ip_mc_list;
 947                                         while(imc!=NULL) 
 948                                         {
 949                                                 if(imc->multiaddr==daddr) 
 950                                                 {
 951                                                         ip_loopback(dev,skb);
 952                                                         break;
 953                                                 }
 954                                                 imc=imc->next;
 955                                         }
 956                                 }
 957                         }
 958 
 959                         /*
 960                          *      Multicasts with ttl 0 must not go beyond the host. Fixme: avoid the
 961                          *      extra clone.
 962                          */
 963 
 964                         if(skb->ip_hdr->ttl==0)
 965                                 kfree_skb(skb, FREE_READ);
 966                 }
 967 #endif
 968 
 969                 nfrags++;
 970                 
 971                 /*
 972                  *      BSD loops broadcasts
 973                  */
 974                  
 975                 if((dev->flags&IFF_BROADCAST) && (daddr==0xFFFFFFFF || daddr==dev->pa_brdaddr) && !(dev->flags&IFF_LOOPBACK))
 976                         ip_loopback(dev,skb);
 977 
 978                 /*
 979                  *      Now queue the bytes into the device.
 980                  */
 981                  
 982                 if (dev->flags & IFF_UP) 
 983                 {
 984                         dev_queue_xmit(skb, dev, sk->priority);
 985                 } 
 986                 else 
 987                 {
 988                         /*
 989                          *      Whoops... 
 990                          */
 991                          
 992                         ip_statistics.IpOutDiscards++;
 993                         if(nfrags>1)
 994                                 ip_statistics.IpFragCreates+=nfrags;
 995                         kfree_skb(skb, FREE_WRITE);
 996                         dev_unlock_list();
 997                         /*
 998                          *      BSD behaviour.
 999                          */
1000                         if(sk!=NULL)
1001                                 sk->err=ENETDOWN;
1002                         return(0); /* lose rest of fragments */
1003                 }
1004         } 
1005         while (offset >= 0);
1006         if(nfrags>1)
1007                 ip_statistics.IpFragCreates+=nfrags;
1008         dev_unlock_list();
1009         return(0);
1010 }
1011     
1012 
1013 /*
1014  *      IP protocol layer initialiser
1015  */
1016 
1017 static struct packet_type ip_packet_type =
1018 {
1019         0,      /* MUTTER ntohs(ETH_P_IP),*/
1020         NULL,   /* All devices */
1021         ip_rcv,
1022         NULL,
1023         NULL,
1024 };
1025 
1026 #ifdef CONFIG_RTNETLINK
1027 
1028 /*
1029  *      Netlink hooks for IP
1030  */
1031  
1032 void ip_netlink_msg(unsigned long msg, __u32 daddr, __u32 gw, __u32 mask, short flags, short metric, char *name)
     /* [previous][next][first][last][top][bottom][index][help] */
1033 {
1034         struct sk_buff *skb=alloc_skb(sizeof(struct netlink_rtinfo), GFP_ATOMIC);
1035         struct netlink_rtinfo *nrt;
1036         struct sockaddr_in *s;
1037         if(skb==NULL)
1038                 return;
1039         nrt=(struct netlink_rtinfo *)skb_put(skb, sizeof(struct netlink_rtinfo));
1040         nrt->rtmsg_type=msg;
1041         s=(struct sockaddr_in *)&nrt->rtmsg_dst;
1042         s->sin_family=AF_INET;
1043         s->sin_addr.s_addr=daddr;
1044         s=(struct sockaddr_in *)&nrt->rtmsg_gateway;
1045         s->sin_family=AF_INET;
1046         s->sin_addr.s_addr=gw;
1047         s=(struct sockaddr_in *)&nrt->rtmsg_genmask;
1048         s->sin_family=AF_INET;
1049         s->sin_addr.s_addr=mask;
1050         nrt->rtmsg_flags=flags;
1051         nrt->rtmsg_metric=metric;
1052         strcpy(nrt->rtmsg_device,name);
1053         netlink_post(NETLINK_ROUTE, skb);
1054 }       
1055 
1056 #endif
1057 
1058 /*
1059  *      Device notifier
1060  */
1061  
1062 static int ip_rt_event(struct notifier_block *this, unsigned long event, void *ptr)
     /* [previous][next][first][last][top][bottom][index][help] */
1063 {
1064         struct device *dev=ptr;
1065         if(event==NETDEV_DOWN)
1066         {
1067                 ip_netlink_msg(RTMSG_DELDEVICE, 0,0,0,0,0,dev->name);
1068                 ip_rt_flush(dev);
1069         }
1070 /*
1071  *      Join the intial group if multicast.
1072  */             
1073         if(event==NETDEV_UP)
1074         {
1075 #ifdef CONFIG_IP_MULTICAST      
1076                 ip_mc_allhost(dev);
1077 #endif          
1078                 ip_netlink_msg(RTMSG_NEWDEVICE, 0,0,0,0,0,dev->name);
1079         }
1080         return NOTIFY_DONE;
1081 }
1082 
1083 struct notifier_block ip_rt_notifier={
1084         ip_rt_event,
1085         NULL,
1086         0
1087 };
1088 
1089 /*
1090  *      IP registers the packet type and then calls the subprotocol initialisers
1091  */
1092 
1093 void ip_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1094 {
1095         ip_packet_type.type=htons(ETH_P_IP);
1096         dev_add_pack(&ip_packet_type);
1097 
1098         /* So we flush routes when a device is downed */        
1099         register_netdevice_notifier(&ip_rt_notifier);
1100 
1101 /*      ip_raw_init();
1102         ip_packet_init();
1103         ip_tcp_init();
1104         ip_udp_init();*/
1105 
1106 #ifdef CONFIG_IP_MULTICAST
1107         proc_net_register(&(struct proc_dir_entry) {
1108                 PROC_NET_IGMP, 4, "igmp",
1109                 S_IFREG | S_IRUGO, 1, 0, 0,
1110                 0, &proc_net_inode_operations,
1111                 ip_mc_procinfo
1112         });
1113 #endif
1114 }
1115 

/* [previous][next][first][last][top][bottom][index][help] */