root/net/ipv4/ip_output.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ip_loopback
  2. ip_send
  3. ip_send_room
  4. ip_build_header
  5. ip_send_check
  6. ip_queue_xmit
  7. ip_build_xmit
  8. ip_netlink_msg
  9. ip_rt_event
  10. ip_init

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              The Internet Protocol (IP) output module.
   7  *
   8  * Version:     @(#)ip.c        1.0.16b 9/1/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Donald Becker, <becker@super.org>
  13  *              Alan Cox, <Alan.Cox@linux.org>
  14  *              Richard Underwood
  15  *              Stefan Becker, <stefanb@yello.ping.de>
  16  *              Jorge Cwik, <jorge@laser.satlink.net>
  17  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  18  *
  19  *      See ip_input.c for original log
  20  *
  21  *      Fixes:
  22  *              Alan Cox        :       Missing nonblock feature in ip_build_xmit.
  23  *              Mike Kilburn    :       htons() missing in ip_build_xmit.
  24  *              Bradford Johnson:       Fix faulty handling of some frames when 
  25  *                                      no route is found.
  26  *              Alexander Demenshin:    Missing sk/skb free in ip_queue_xmit
  27  *                                      (in case if packet not accepted by
  28  *                                      output firewall rules)
  29  */
  30 
  31 #include <asm/segment.h>
  32 #include <asm/system.h>
  33 #include <linux/types.h>
  34 #include <linux/kernel.h>
  35 #include <linux/sched.h>
  36 #include <linux/mm.h>
  37 #include <linux/string.h>
  38 #include <linux/errno.h>
  39 #include <linux/config.h>
  40 
  41 #include <linux/socket.h>
  42 #include <linux/sockios.h>
  43 #include <linux/in.h>
  44 #include <linux/inet.h>
  45 #include <linux/netdevice.h>
  46 #include <linux/etherdevice.h>
  47 #include <linux/proc_fs.h>
  48 #include <linux/stat.h>
  49 
  50 #include <net/snmp.h>
  51 #include <net/ip.h>
  52 #include <net/protocol.h>
  53 #include <net/route.h>
  54 #include <net/tcp.h>
  55 #include <net/udp.h>
  56 #include <linux/skbuff.h>
  57 #include <net/sock.h>
  58 #include <net/arp.h>
  59 #include <net/icmp.h>
  60 #include <net/raw.h>
  61 #include <net/checksum.h>
  62 #include <linux/igmp.h>
  63 #include <linux/ip_fw.h>
  64 #include <linux/firewall.h>
  65 #include <linux/mroute.h>
  66 #include <net/netlink.h>
  67 
  68 /*
  69  *      Loop a packet back to the sender.
  70  */
  71  
  72 static void ip_loopback(struct device *old_dev, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
  73 {
  74         struct device *dev=&loopback_dev;
  75         int len=ntohs(skb->ip_hdr->tot_len);
  76         struct sk_buff *newskb=dev_alloc_skb(len+dev->hard_header_len+15);
  77         
  78         if(newskb==NULL)
  79                 return;
  80                 
  81         newskb->link3=NULL;
  82         newskb->sk=NULL;
  83         newskb->dev=dev;
  84         newskb->saddr=skb->saddr;
  85         newskb->daddr=skb->daddr;
  86         newskb->raddr=skb->raddr;
  87         newskb->free=1;
  88         newskb->lock=0;
  89         newskb->users=0;
  90         newskb->pkt_type=skb->pkt_type;
  91         
  92         /*
  93          *      Put a MAC header on the packet
  94          */
  95         ip_send(NULL,newskb, skb->ip_hdr->daddr, len, dev, skb->ip_hdr->saddr);
  96         /*
  97          *      Add the rest of the data space. 
  98          */
  99         newskb->ip_hdr=(struct iphdr *)skb_put(newskb, len);
 100         memcpy(newskb->proto_priv, skb->proto_priv, sizeof(skb->proto_priv));
 101 
 102         /*
 103          *      Copy the data
 104          */
 105         memcpy(newskb->ip_hdr,skb->ip_hdr,len);
 106 
 107         /* Recurse. The device check against IFF_LOOPBACK will stop infinite recursion */
 108                 
 109         /*printk("Loopback output queued [%lX to %lX].\n", newskb->ip_hdr->saddr,newskb->ip_hdr->daddr);*/
 110         ip_queue_xmit(NULL, dev, newskb, 1);
 111 }
 112 
 113 
 114 
 115 /*
 116  *      Take an skb, and fill in the MAC header.
 117  */
 118 
 119 int ip_send(struct rtable * rt, struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
     /* [previous][next][first][last][top][bottom][index][help] */
 120 {
 121         int mac = 0;
 122 
 123         skb->dev = dev;
 124         skb->arp = 1;
 125         skb->protocol = htons(ETH_P_IP);
 126         if (dev->hard_header)
 127         {
 128                 /*
 129                  *      Build a hardware header. Source address is our mac, destination unknown
 130                  *      (rebuild header will sort this out)
 131                  */
 132                 skb_reserve(skb,(dev->hard_header_len+15)&~15); /* 16 byte aligned IP headers are good */
 133                 if (rt && dev == rt->rt_dev && rt->rt_hh)
 134                 {
 135                         memcpy(skb_push(skb,dev->hard_header_len),rt->rt_hh->hh_data,dev->hard_header_len);
 136                         if (rt->rt_hh->hh_uptodate)
 137                                 return dev->hard_header_len;
 138 #if RT_CACHE_DEBUG >= 2
 139                         printk("ip_send: hh miss %08x via %08x\n", daddr, rt->rt_gateway);
 140 #endif
 141                         skb->arp = 0;
 142                         skb->raddr = daddr;
 143                         return dev->hard_header_len;
 144                 }
 145                 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
 146                 if (mac < 0)
 147                 {
 148                         mac = -mac;
 149                         skb->arp = 0;
 150                         skb->raddr = daddr;     /* next routing address */
 151                 }
 152         }
 153         return mac;
 154 }
 155 
 156 static int ip_send_room(struct rtable * rt, struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
     /* [previous][next][first][last][top][bottom][index][help] */
 157 {
 158         int mac = 0;
 159 
 160         skb->dev = dev;
 161         skb->arp = 1;
 162         skb->protocol = htons(ETH_P_IP);
 163         if (dev->hard_header)
 164         {
 165                 skb_reserve(skb,MAX_HEADER);
 166                 if (rt && dev == rt->rt_dev && rt->rt_hh)
 167                 {
 168                         memcpy(skb_push(skb,dev->hard_header_len),rt->rt_hh->hh_data,dev->hard_header_len);
 169                         if (rt->rt_hh->hh_uptodate)
 170                                 return dev->hard_header_len;
 171 #if RT_CACHE_DEBUG >= 2
 172                         printk("ip_send_room: hh miss %08x via %08x\n", daddr, rt->rt_gateway);
 173 #endif
 174                         skb->arp = 0;
 175                         skb->raddr = daddr;
 176                         return dev->hard_header_len;
 177                 }
 178                 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
 179                 if (mac < 0)
 180                 {
 181                         mac = -mac;
 182                         skb->arp = 0;
 183                         skb->raddr = daddr;     /* next routing address */
 184                 }
 185         }
 186         return mac;
 187 }
 188 
 189 int ip_id_count = 0;
 190 
 191 /*
 192  * This routine builds the appropriate hardware/IP headers for
 193  * the routine.  It assumes that if *dev != NULL then the
 194  * protocol knows what it's doing, otherwise it uses the
 195  * routing/ARP tables to select a device struct.
 196  */
 197 int ip_build_header(struct sk_buff *skb, __u32 saddr, __u32 daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 198                 struct device **dev, int type, struct options *opt,
 199                 int len, int tos, int ttl, struct rtable ** rp)
 200 {
 201         struct rtable *rt;
 202         __u32 raddr;
 203         int tmp;
 204         struct iphdr *iph;
 205         __u32 final_daddr = daddr;
 206 
 207 
 208         if (opt && opt->srr)
 209                 daddr = opt->faddr;
 210 
 211         /*
 212          *      See if we need to look up the device.
 213          */
 214 
 215 #ifdef CONFIG_IP_MULTICAST      
 216         if(MULTICAST(daddr) && *dev==NULL && skb->sk && *skb->sk->ip_mc_name)
 217                 *dev=dev_get(skb->sk->ip_mc_name);
 218 #endif
 219         if (rp)
 220         {
 221                 rt = ip_check_route(rp, daddr, skb->localroute);
 222                 /*
 223                  * If rp != NULL rt_put following below should not
 224                  * release route, so that...
 225                  */
 226                 if (rt)
 227                         ATOMIC_INCR(&rt->rt_refcnt);
 228         }
 229         else
 230                 rt = ip_rt_route(daddr, skb->localroute);
 231 
 232 
 233         if (*dev == NULL)
 234         {
 235                 if (rt == NULL)
 236                 {
 237                         ip_statistics.IpOutNoRoutes++;
 238                         return(-ENETUNREACH);
 239                 }
 240 
 241                 *dev = rt->rt_dev;
 242         }
 243 
 244         if ((LOOPBACK(saddr) && !LOOPBACK(daddr)) || !saddr)
 245                 saddr = rt ? rt->rt_src : (*dev)->pa_addr;
 246 
 247         raddr = rt ? rt->rt_gateway : daddr;
 248 
 249         if (opt && opt->is_strictroute && rt && (rt->rt_flags & RTF_GATEWAY))
 250         {
 251                 ip_rt_put(rt);
 252                 ip_statistics.IpOutNoRoutes++;
 253                 return -ENETUNREACH;
 254         }
 255 
 256         /*
 257          *      Now build the MAC header.
 258          */
 259 
 260         if (type==IPPROTO_TCP)
 261                 tmp = ip_send_room(rt, skb, raddr, len, *dev, saddr);
 262         else
 263                 tmp = ip_send(rt, skb, raddr, len, *dev, saddr);
 264 
 265         ip_rt_put(rt);
 266 
 267         /*
 268          *      Book keeping
 269          */
 270 
 271         skb->dev = *dev;
 272         skb->saddr = saddr;
 273         
 274         /*
 275          *      Now build the IP header.
 276          */
 277 
 278         /*
 279          *      If we are using IPPROTO_RAW, then we don't need an IP header, since
 280          *      one is being supplied to us by the user
 281          */
 282 
 283         if(type == IPPROTO_RAW)
 284                 return (tmp);
 285 
 286         /*
 287          *      Build the IP addresses
 288          */
 289          
 290         if (opt)
 291                 iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr) + opt->optlen);
 292         else
 293                 iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr));
 294 
 295         iph->version  = 4;
 296         iph->ihl      = 5;
 297         iph->tos      = tos;
 298         iph->frag_off = 0;
 299         iph->ttl      = ttl;
 300         iph->daddr    = daddr;
 301         iph->saddr    = saddr;
 302         iph->protocol = type;
 303         skb->ip_hdr   = iph;
 304 
 305         if (!opt || !opt->optlen)
 306                 return sizeof(struct iphdr) + tmp;
 307         iph->ihl += opt->optlen>>2;
 308         ip_options_build(skb, opt, final_daddr, (*dev)->pa_addr, 0);
 309         return iph->ihl*4 + tmp;
 310 }
 311 
 312 
 313 /*
 314  *      Generate a checksum for an outgoing IP datagram.
 315  */
 316 
 317 void ip_send_check(struct iphdr *iph)
     /* [previous][next][first][last][top][bottom][index][help] */
 318 {
 319         iph->check = 0;
 320         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 321 }
 322 
 323 /*
 324  * Queues a packet to be sent, and starts the transmitter
 325  * if necessary.  if free = 1 then we free the block after
 326  * transmit, otherwise we don't. If free==2 we not only
 327  * free the block but also don't assign a new ip seq number.
 328  * This routine also needs to put in the total length,
 329  * and compute the checksum
 330  */
 331 
 332 void ip_queue_xmit(struct sock *sk, struct device *dev,
     /* [previous][next][first][last][top][bottom][index][help] */
 333               struct sk_buff *skb, int free)
 334 {
 335         struct iphdr *iph;
 336 /*      unsigned char *ptr;*/
 337 
 338         /* Sanity check */
 339         if (dev == NULL)
 340         {
 341                 NETDEBUG(printk("IP: ip_queue_xmit dev = NULL\n"));
 342                 return;
 343         }
 344 
 345         IS_SKB(skb);
 346 
 347         /*
 348          *      Do some book-keeping in the packet for later
 349          */
 350 
 351 
 352         skb->dev = dev;
 353         skb->when = jiffies;
 354 
 355         /*
 356          *      Find the IP header and set the length. This is bad
 357          *      but once we get the skb data handling code in the
 358          *      hardware will push its header sensibly and we will
 359          *      set skb->ip_hdr to avoid this mess and the fixed
 360          *      header length problem
 361          */
 362 
 363         iph = skb->ip_hdr;
 364         iph->tot_len = htons(skb->len-(((unsigned char *)iph)-skb->data));
 365 
 366         /*
 367          *      No reassigning numbers to fragments...
 368          */
 369 
 370         if(free!=2)
 371                 iph->id      = htons(ip_id_count++);
 372         else
 373                 free=1;
 374 
 375         /* All buffers without an owner socket get freed */
 376         if (sk == NULL)
 377                 free = 1;
 378 
 379         skb->free = free;
 380 
 381 #ifdef CONFIG_FIREWALL
 382         if(call_out_firewall(PF_INET, skb->dev, iph) < FW_ACCEPT) {
 383                 /* just don't send this packet */
 384                 /* and free socket buffers ;) <aldem@barnet.kharkov.ua> */
 385                 if (free)
 386                   skb->sk = sk;         /* I am not sure *this* really need, */
 387                 kfree_skb(skb, FREE_WRITE);     /* but *this* must be here */
 388                 return;
 389         }
 390 #endif  
 391 
 392         /*
 393          *      Do we need to fragment. Again this is inefficient.
 394          *      We need to somehow lock the original buffer and use
 395          *      bits of it.
 396          */
 397 
 398         if(ntohs(iph->tot_len)> dev->mtu)
 399         {
 400                 ip_fragment(sk,skb,dev,0);
 401                 IS_SKB(skb);
 402                 kfree_skb(skb,FREE_WRITE);
 403                 return;
 404         }
 405 
 406         /*
 407          *      Add an IP checksum
 408          */
 409 
 410         ip_send_check(iph);
 411 
 412         /*
 413          *      Print the frame when debugging
 414          */
 415 
 416         /*
 417          *      More debugging. You cannot queue a packet already on a list
 418          *      Spot this and moan loudly.
 419          */
 420         if (skb->next != NULL)
 421         {
 422                 NETDEBUG(printk("ip_queue_xmit: next != NULL\n"));
 423                 skb_unlink(skb);
 424         }
 425 
 426         /*
 427          *      If a sender wishes the packet to remain unfreed
 428          *      we add it to his send queue. This arguably belongs
 429          *      in the TCP level since nobody else uses it. BUT
 430          *      remember IPng might change all the rules.
 431          */
 432 
 433         if (!free)
 434         {
 435                 unsigned long flags;
 436                 /* The socket now has more outstanding blocks */
 437 
 438                 sk->packets_out++;
 439 
 440                 /* Protect the list for a moment */
 441                 save_flags(flags);
 442                 cli();
 443 
 444                 if (skb->link3 != NULL)
 445                 {
 446                         NETDEBUG(printk("ip.c: link3 != NULL\n"));
 447                         skb->link3 = NULL;
 448                 }
 449                 if (sk->send_head == NULL)
 450                 {
 451                         sk->send_tail = skb;
 452                         sk->send_head = skb;
 453                 }
 454                 else
 455                 {
 456                         sk->send_tail->link3 = skb;
 457                         sk->send_tail = skb;
 458                 }
 459                 /* skb->link3 is NULL */
 460 
 461                 /* Interrupt restore */
 462                 restore_flags(flags);
 463         }
 464         else
 465                 /* Remember who owns the buffer */
 466                 skb->sk = sk;
 467 
 468         /*
 469          *      If the indicated interface is up and running, send the packet.
 470          */
 471          
 472         ip_statistics.IpOutRequests++;
 473 #ifdef CONFIG_IP_ACCT
 474         ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
 475 #endif  
 476         
 477 #ifdef CONFIG_IP_MULTICAST      
 478 
 479         /*
 480          *      Multicasts are looped back for other local users
 481          */
 482          
 483         if (MULTICAST(iph->daddr) && !(dev->flags&IFF_LOOPBACK))
 484         {
 485                 if(sk==NULL || sk->ip_mc_loop)
 486                 {
 487                         if(iph->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
 488                         {
 489                                 ip_loopback(dev,skb);
 490                         }
 491                         else
 492                         {
 493                                 struct ip_mc_list *imc=dev->ip_mc_list;
 494                                 while(imc!=NULL)
 495                                 {
 496                                         if(imc->multiaddr==iph->daddr)
 497                                         {
 498                                                 ip_loopback(dev,skb);
 499                                                 break;
 500                                         }
 501                                         imc=imc->next;
 502                                 }
 503                         }
 504                 }
 505                 /* Multicasts with ttl 0 must not go beyond the host */
 506                 
 507                 if(skb->ip_hdr->ttl==0)
 508                 {
 509                         kfree_skb(skb, FREE_READ);
 510                         return;
 511                 }
 512         }
 513 #endif
 514         if((dev->flags&IFF_BROADCAST) && (iph->daddr==dev->pa_brdaddr||iph->daddr==0xFFFFFFFF) && !(dev->flags&IFF_LOOPBACK))
 515                 ip_loopback(dev,skb);
 516                 
 517         if (dev->flags & IFF_UP)
 518         {
 519                 /*
 520                  *      If we have an owner use its priority setting,
 521                  *      otherwise use NORMAL
 522                  */
 523 
 524                 if (sk != NULL)
 525                 {
 526                         dev_queue_xmit(skb, dev, sk->priority);
 527                 }
 528                 else
 529                 {
 530                         dev_queue_xmit(skb, dev, SOPRI_NORMAL);
 531                 }
 532         }
 533         else
 534         {
 535                 if(sk)
 536                         sk->err = ENETDOWN;
 537                 ip_statistics.IpOutDiscards++;
 538                 if (free)
 539                         kfree_skb(skb, FREE_WRITE);
 540         }
 541 }
 542 
 543 
 544 /*
 545  *      Build and send a packet, with as little as one copy
 546  *
 547  *      Doesn't care much about ip options... option length can be
 548  *      different for fragment at 0 and other fragments.
 549  *
 550  *      Note that the fragment at the highest offset is sent first,
 551  *      so the getfrag routine can fill in the TCP/UDP checksum header
 552  *      field in the last fragment it sends... actually it also helps
 553  *      the reassemblers, they can put most packets in at the head of
 554  *      the fragment queue, and they know the total size in advance. This
 555  *      last feature will measurable improve the Linux fragment handler.
 556  *
 557  *      The callback has five args, an arbitrary pointer (copy of frag),
 558  *      the source IP address (may depend on the routing table), the 
 559  *      destination address (char *), the offset to copy from, and the
 560  *      length to be copied.
 561  * 
 562  */
 563 
 564 int ip_build_xmit(struct sock *sk,
     /* [previous][next][first][last][top][bottom][index][help] */
 565                    void getfrag (const void *,
 566                                  __u32,
 567                                  char *,
 568                                  unsigned int,  
 569                                  unsigned int),
 570                    const void *frag,
 571                    unsigned short int length,
 572                    __u32 daddr,
 573                    __u32 user_saddr,
 574                    struct options * opt,
 575                    int flags,
 576                    int type,
 577                    int noblock) 
 578 {
 579         struct rtable *rt;
 580         unsigned int fraglen, maxfraglen, fragheaderlen;
 581         int offset, mf;
 582         __u32 saddr;
 583         unsigned short id;
 584         struct iphdr *iph;
 585         __u32 raddr;
 586         struct device *dev = NULL;
 587         struct hh_cache * hh=NULL;
 588         int nfrags=0;
 589         __u32 true_daddr = daddr;
 590 
 591         if (opt && opt->srr && !sk->ip_hdrincl)
 592           daddr = opt->faddr;
 593         
 594         ip_statistics.IpOutRequests++;
 595 
 596 #ifdef CONFIG_IP_MULTICAST      
 597         if(sk && MULTICAST(daddr) && *sk->ip_mc_name)
 598         {
 599                 dev=dev_get(sk->ip_mc_name);
 600                 if(!dev)
 601                         return -ENODEV;
 602                 rt=NULL;
 603                 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
 604                         saddr = sk->saddr;
 605                 else
 606                         saddr = dev->pa_addr;
 607         }
 608         else
 609         {
 610 #endif  
 611                 rt = ip_check_route(&sk->ip_route_cache, daddr,
 612                                     sk->localroute || (flags&MSG_DONTROUTE) ||
 613                                     (opt && opt->is_strictroute));
 614                 if (rt == NULL) 
 615                 {
 616                         ip_statistics.IpOutNoRoutes++;
 617                         return(-ENETUNREACH);
 618                 }
 619                 saddr = rt->rt_src;
 620 
 621                 hh = rt->rt_hh;
 622         
 623                 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
 624                         saddr = sk->saddr;
 625                         
 626                 dev=rt->rt_dev;
 627 #ifdef CONFIG_IP_MULTICAST
 628         }
 629         if (rt && !dev)
 630                 dev = rt->rt_dev;
 631 #endif          
 632         if (user_saddr)
 633                 saddr = user_saddr;
 634 
 635         raddr = rt ? rt->rt_gateway : daddr;
 636         /*
 637          *      Now compute the buffer space we require
 638          */ 
 639          
 640         /*
 641          *      Try the simple case first. This leaves broadcast, multicast, fragmented frames, and by
 642          *      choice RAW frames within 20 bytes of maximum size(rare) to the long path
 643          */
 644 
 645         if (!sk->ip_hdrincl) {
 646                 length += sizeof(struct iphdr);
 647                 if(opt) length += opt->optlen;
 648         }
 649 
 650         if(length <= dev->mtu && !MULTICAST(daddr) && daddr!=0xFFFFFFFF && daddr!=dev->pa_brdaddr)
 651         {       
 652                 int error;
 653                 struct sk_buff *skb=sock_alloc_send_skb(sk, length+15+dev->hard_header_len,0, noblock, &error);
 654                 if(skb==NULL)
 655                 {
 656                         ip_statistics.IpOutDiscards++;
 657                         return error;
 658                 }
 659                 skb->dev=dev;
 660                 skb->protocol = htons(ETH_P_IP);
 661                 skb->free=1;
 662                 skb->when=jiffies;
 663                 skb->sk=sk;
 664                 skb->arp=0;
 665                 skb->saddr=saddr;
 666                 skb->raddr = raddr;
 667                 skb_reserve(skb,(dev->hard_header_len+15)&~15);
 668                 if (hh)
 669                 {
 670                         skb->arp=1;
 671                         memcpy(skb_push(skb,dev->hard_header_len),hh->hh_data,dev->hard_header_len);
 672                         if (!hh->hh_uptodate)
 673                         {
 674                                 skb->arp = 0;
 675 #if RT_CACHE_DEBUG >= 2
 676                                 printk("ip_build_xmit: hh miss %08x via %08x\n", rt->rt_dst, rt->rt_gateway);
 677 #endif                          
 678                         }
 679                 }
 680                 else if(dev->hard_header)
 681                 {
 682                         if(dev->hard_header(skb,dev,ETH_P_IP,NULL,NULL,0)>0)
 683                                 skb->arp=1;
 684                 }
 685                 else
 686                         skb->arp=1;
 687                 skb->ip_hdr=iph=(struct iphdr *)skb_put(skb,length);
 688                 dev_lock_list();
 689                 if(!sk->ip_hdrincl)
 690                 {
 691                         iph->version=4;
 692                         iph->ihl=5;
 693                         iph->tos=sk->ip_tos;
 694                         iph->tot_len = htons(length);
 695                         iph->id=htons(ip_id_count++);
 696                         iph->frag_off = 0;
 697                         iph->ttl=sk->ip_ttl;
 698                         iph->protocol=type;
 699                         iph->saddr=saddr;
 700                         iph->daddr=daddr;
 701                         if (opt) 
 702                         {
 703                                 iph->ihl += opt->optlen>>2;
 704                                 ip_options_build(skb, opt,
 705                                                  true_daddr, dev->pa_addr, 0);
 706                         }
 707                         iph->check=0;
 708                         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 709                         getfrag(frag,saddr,((char *)iph)+iph->ihl*4,0, length-iph->ihl*4);
 710                 }
 711                 else
 712                         getfrag(frag,saddr,(void *)iph,0,length);
 713                 dev_unlock_list();
 714 #ifdef CONFIG_FIREWALL
 715                 if(call_out_firewall(PF_INET, skb->dev, iph)< FW_ACCEPT)
 716                 {
 717                         kfree_skb(skb, FREE_WRITE);
 718                         return -EPERM;
 719                 }
 720 #endif
 721 #ifdef CONFIG_IP_ACCT
 722                 ip_fw_chk(iph,dev,ip_acct_chain, IP_FW_F_ACCEPT,1);
 723 #endif          
 724                 if(dev->flags&IFF_UP)
 725                         dev_queue_xmit(skb,dev,sk->priority);
 726                 else
 727                 {
 728                         ip_statistics.IpOutDiscards++;
 729                         kfree_skb(skb, FREE_WRITE);
 730                 }
 731                 return 0;
 732         }
 733         length -= sizeof(struct iphdr);
 734         if (sk && !sk->ip_hdrincl && opt) 
 735         {
 736                 length -= opt->optlen;
 737                 fragheaderlen = dev->hard_header_len + sizeof(struct iphdr) + opt->optlen;
 738                 maxfraglen = ((dev->mtu-sizeof(struct iphdr)-opt->optlen) & ~7) + fragheaderlen;
 739         }
 740         else 
 741         {
 742                 fragheaderlen = dev->hard_header_len;
 743                 if(!sk->ip_hdrincl)
 744                         fragheaderlen += 20;
 745                 
 746                 /*
 747                  *      Fragheaderlen is the size of 'overhead' on each buffer. Now work
 748                  *      out the size of the frames to send.
 749                  */
 750          
 751                 maxfraglen = ((dev->mtu-20) & ~7) + fragheaderlen;
 752         }
 753         
 754         /*
 755          *      Start at the end of the frame by handling the remainder.
 756          */
 757          
 758         offset = length - (length % (maxfraglen - fragheaderlen));
 759         
 760         /*
 761          *      Amount of memory to allocate for final fragment.
 762          */
 763          
 764         fraglen = length - offset + fragheaderlen;
 765         
 766         if(length-offset==0)
 767         {
 768                 fraglen = maxfraglen;
 769                 offset -= maxfraglen-fragheaderlen;
 770         }
 771         
 772         
 773         /*
 774          *      The last fragment will not have MF (more fragments) set.
 775          */
 776          
 777         mf = 0;
 778 
 779         /*
 780          *      Can't fragment raw packets 
 781          */
 782          
 783         if (sk->ip_hdrincl && offset > 0)
 784                 return(-EMSGSIZE);
 785 
 786         /*
 787          *      Lock the device lists.
 788          */
 789 
 790         dev_lock_list();
 791         
 792         /*
 793          *      Get an identifier
 794          */
 795          
 796         id = htons(ip_id_count++);
 797 
 798         /*
 799          *      Being outputting the bytes.
 800          */
 801          
 802         do 
 803         {
 804                 struct sk_buff * skb;
 805                 int error;
 806                 char *data;
 807 
 808                 /*
 809                  *      Get the memory we require with some space left for alignment.
 810                  */
 811 
 812                 skb = sock_alloc_send_skb(sk, fraglen+15, 0, noblock, &error);
 813                 if (skb == NULL)
 814                 {
 815                         ip_statistics.IpOutDiscards++;
 816                         if(nfrags>1)
 817                                 ip_statistics.IpFragCreates++;                  
 818                         dev_unlock_list();
 819                         return(error);
 820                 }
 821                 
 822                 /*
 823                  *      Fill in the control structures
 824                  */
 825                  
 826                 skb->dev = dev;
 827                 skb->protocol = htons(ETH_P_IP);
 828                 skb->when = jiffies;
 829                 skb->free = 1; /* dubious, this one */
 830                 skb->sk = sk;
 831                 skb->arp = 0;
 832                 skb->saddr = saddr;
 833                 skb->raddr = raddr;
 834                 skb_reserve(skb,(dev->hard_header_len+15)&~15);
 835                 data = skb_put(skb, fraglen-dev->hard_header_len);
 836 
 837                 /*
 838                  *      Save us ARP and stuff. In the optimal case we do no route lookup (route cache ok)
 839                  *      no ARP lookup (arp cache ok) and output. The cache checks are still too slow but
 840                  *      this can be fixed later. For gateway routes we ought to have a rt->.. header cache
 841                  *      pointer to speed header cache builds for identical targets.
 842                  */
 843                  
 844                 if (hh)
 845                 {
 846                         skb->arp=1;
 847                         memcpy(skb_push(skb,dev->hard_header_len),hh->hh_data,dev->hard_header_len);
 848                         if (!hh->hh_uptodate)
 849                         {
 850                                 skb->arp = 0;
 851 #if RT_CACHE_DEBUG >= 2
 852                                 printk("ip_build_xmit: hh miss %08x via %08x\n", rt->rt_dst, rt->rt_gateway);
 853 #endif                          
 854                         }
 855                 }
 856                 else if (dev->hard_header)
 857                 {
 858                         if(dev->hard_header(skb, dev, ETH_P_IP, 
 859                                                 NULL, NULL, 0)>0)
 860                                 skb->arp=1;
 861                 }
 862                 
 863                 /*
 864                  *      Find where to start putting bytes.
 865                  */
 866                  
 867                 skb->ip_hdr = iph = (struct iphdr *)data;
 868 
 869                 /*
 870                  *      Only write IP header onto non-raw packets 
 871                  */
 872                  
 873                 if(!sk->ip_hdrincl) 
 874                 {
 875 
 876                         iph->version = 4;
 877                         iph->ihl = 5; /* ugh */
 878                         if (opt) {
 879                                 iph->ihl += opt->optlen>>2;
 880                                 ip_options_build(skb, opt,
 881                                                  true_daddr, dev->pa_addr, offset);
 882                         }
 883                         iph->tos = sk->ip_tos;
 884                         iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
 885                         iph->id = id;
 886                         iph->frag_off = htons(offset>>3);
 887                         iph->frag_off |= mf;
 888 #ifdef CONFIG_IP_MULTICAST
 889                         if (MULTICAST(daddr))
 890                                 iph->ttl = sk->ip_mc_ttl;
 891                         else
 892 #endif
 893                                 iph->ttl = sk->ip_ttl;
 894                         iph->protocol = type;
 895                         iph->check = 0;
 896                         iph->saddr = saddr;
 897                         iph->daddr = daddr;
 898                         iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 899                         data += iph->ihl*4;
 900                         
 901                         /*
 902                          *      Any further fragments will have MF set.
 903                          */
 904                          
 905                         mf = htons(IP_MF);
 906                 }
 907                 
 908                 /*
 909                  *      User data callback
 910                  */
 911 
 912                 getfrag(frag, saddr, data, offset, fraglen-fragheaderlen);
 913                 
 914                 /*
 915                  *      Account for the fragment.
 916                  */
 917                  
 918 #ifdef CONFIG_FIREWALL
 919                 if(!offset && call_out_firewall(PF_INET, skb->dev, iph) < FW_ACCEPT)
 920                 {
 921                         kfree_skb(skb, FREE_WRITE);
 922                         dev_unlock_list();
 923                         return -EPERM;
 924                 }
 925 #endif          
 926 #ifdef CONFIG_IP_ACCT
 927                 if(!offset)
 928                         ip_fw_chk(iph, dev, ip_acct_chain, IP_FW_F_ACCEPT, 1);
 929 #endif  
 930                 offset -= (maxfraglen-fragheaderlen);
 931                 fraglen = maxfraglen;
 932 
 933 #ifdef CONFIG_IP_MULTICAST
 934 
 935                 /*
 936                  *      Multicasts are looped back for other local users
 937                  */
 938          
 939                 if (MULTICAST(daddr) && !(dev->flags&IFF_LOOPBACK)) 
 940                 {
 941                         /*
 942                          *      Loop back any frames. The check for IGMP_ALL_HOSTS is because
 943                          *      you are always magically a member of this group.
 944                          *
 945                          *      Always loop back all host messages when running as a multicast router.
 946                          */
 947                          
 948                         if(sk==NULL || sk->ip_mc_loop)
 949                         {
 950                                 if(skb->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
 951                                         ip_loopback(dev,skb);
 952                                 else 
 953                                 {
 954                                         struct ip_mc_list *imc=dev->ip_mc_list;
 955                                         while(imc!=NULL) 
 956                                         {
 957                                                 if(imc->multiaddr==daddr) 
 958                                                 {
 959                                                         ip_loopback(dev,skb);
 960                                                         break;
 961                                                 }
 962                                                 imc=imc->next;
 963                                         }
 964                                 }
 965                         }
 966 
 967                         /*
 968                          *      Multicasts with ttl 0 must not go beyond the host. Fixme: avoid the
 969                          *      extra clone.
 970                          */
 971 
 972                         if(skb->ip_hdr->ttl==0)
 973                                 kfree_skb(skb, FREE_READ);
 974                 }
 975 #endif
 976 
 977                 nfrags++;
 978                 
 979                 /*
 980                  *      BSD loops broadcasts
 981                  */
 982                  
 983                 if((dev->flags&IFF_BROADCAST) && (daddr==0xFFFFFFFF || daddr==dev->pa_brdaddr) && !(dev->flags&IFF_LOOPBACK))
 984                         ip_loopback(dev,skb);
 985 
 986                 /*
 987                  *      Now queue the bytes into the device.
 988                  */
 989                  
 990                 if (dev->flags & IFF_UP) 
 991                 {
 992                         dev_queue_xmit(skb, dev, sk->priority);
 993                 } 
 994                 else 
 995                 {
 996                         /*
 997                          *      Whoops... 
 998                          */
 999                          
1000                         ip_statistics.IpOutDiscards++;
1001                         if(nfrags>1)
1002                                 ip_statistics.IpFragCreates+=nfrags;
1003                         kfree_skb(skb, FREE_WRITE);
1004                         dev_unlock_list();
1005                         /*
1006                          *      BSD behaviour.
1007                          */
1008                         if(sk!=NULL)
1009                                 sk->err=ENETDOWN;
1010                         return(0); /* lose rest of fragments */
1011                 }
1012         } 
1013         while (offset >= 0);
1014         if(nfrags>1)
1015                 ip_statistics.IpFragCreates+=nfrags;
1016         dev_unlock_list();
1017         return(0);
1018 }
1019     
1020 
1021 /*
1022  *      IP protocol layer initialiser
1023  */
1024 
1025 static struct packet_type ip_packet_type =
1026 {
1027         0,      /* MUTTER ntohs(ETH_P_IP),*/
1028         NULL,   /* All devices */
1029         ip_rcv,
1030         NULL,
1031         NULL,
1032 };
1033 
1034 #ifdef CONFIG_RTNETLINK
1035 
1036 /*
1037  *      Netlink hooks for IP
1038  */
1039  
1040 void ip_netlink_msg(unsigned long msg, __u32 daddr, __u32 gw, __u32 mask, short flags, short metric, char *name)
     /* [previous][next][first][last][top][bottom][index][help] */
1041 {
1042         struct sk_buff *skb=alloc_skb(sizeof(struct netlink_rtinfo), GFP_ATOMIC);
1043         struct netlink_rtinfo *nrt;
1044         struct sockaddr_in *s;
1045         if(skb==NULL)
1046                 return;
1047         skb->free=1;
1048         nrt=(struct netlink_rtinfo *)skb_put(skb, sizeof(struct netlink_rtinfo));
1049         nrt->rtmsg_type=msg;
1050         s=(struct sockaddr_in *)&nrt->rtmsg_dst;
1051         s->sin_family=AF_INET;
1052         s->sin_addr.s_addr=daddr;
1053         s=(struct sockaddr_in *)&nrt->rtmsg_gateway;
1054         s->sin_family=AF_INET;
1055         s->sin_addr.s_addr=gw;
1056         s=(struct sockaddr_in *)&nrt->rtmsg_genmask;
1057         s->sin_family=AF_INET;
1058         s->sin_addr.s_addr=mask;
1059         nrt->rtmsg_flags=flags;
1060         nrt->rtmsg_metric=metric;
1061         strcpy(nrt->rtmsg_device,name);
1062         netlink_post(NETLINK_ROUTE, skb);
1063 }       
1064 
1065 #endif
1066 
1067 /*
1068  *      Device notifier
1069  */
1070  
1071 static int ip_rt_event(struct notifier_block *this, unsigned long event, void *ptr)
     /* [previous][next][first][last][top][bottom][index][help] */
1072 {
1073         struct device *dev=ptr;
1074         if(event==NETDEV_DOWN)
1075         {
1076                 ip_netlink_msg(RTMSG_DELDEVICE, 0,0,0,0,0,dev->name);
1077                 ip_rt_flush(dev);
1078         }
1079 /*
1080  *      Join the initial group if multicast.
1081  */             
1082         if(event==NETDEV_UP)
1083         {
1084 #ifdef CONFIG_IP_MULTICAST      
1085                 ip_mc_allhost(dev);
1086 #endif          
1087                 ip_netlink_msg(RTMSG_NEWDEVICE, 0,0,0,0,0,dev->name);
1088         }
1089         return NOTIFY_DONE;
1090 }
1091 
1092 struct notifier_block ip_rt_notifier={
1093         ip_rt_event,
1094         NULL,
1095         0
1096 };
1097 
1098 /*
1099  *      IP registers the packet type and then calls the subprotocol initialisers
1100  */
1101 
1102 void ip_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1103 {
1104         ip_packet_type.type=htons(ETH_P_IP);
1105         dev_add_pack(&ip_packet_type);
1106 
1107         /* So we flush routes when a device is downed */        
1108         register_netdevice_notifier(&ip_rt_notifier);
1109 
1110 /*      ip_raw_init();
1111         ip_packet_init();
1112         ip_tcp_init();
1113         ip_udp_init();*/
1114 
1115 #ifdef CONFIG_IP_MULTICAST
1116         proc_net_register(&(struct proc_dir_entry) {
1117                 PROC_NET_IGMP, 4, "igmp",
1118                 S_IFREG | S_IRUGO, 1, 0, 0,
1119                 0, &proc_net_inode_operations,
1120                 ip_mc_procinfo
1121         });
1122 #endif
1123 }
1124 

/* [previous][next][first][last][top][bottom][index][help] */