root/net/ipv4/route.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rt_del
  2. ip_rt_flush
  3. default_mask
  4. guess_mask
  5. get_gw_dev
  6. ip_rt_add
  7. bad_mask
  8. rt_new
  9. rt_kill
  10. rt_get_info
  11. ip_rt_route
  12. ip_rt_local
  13. ip_rt_ioctl

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              ROUTE - implementation of the IP router.
   7  *
   8  * Version:     @(#)route.c     1.0.14  05/31/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  13  *              Linus Torvalds, <Linus.Torvalds@helsinki.fi>
  14  *
  15  * Fixes:
  16  *              Alan Cox        :       Verify area fixes.
  17  *              Alan Cox        :       cli() protects routing changes
  18  *              Rui Oliveira    :       ICMP routing table updates
  19  *              (rco@di.uminho.pt)      Routing table insertion and update
  20  *              Linus Torvalds  :       Rewrote bits to be sensible
  21  *              Alan Cox        :       Added BSD route gw semantics
  22  *              Alan Cox        :       Super /proc >4K 
  23  *              Alan Cox        :       MTU in route table
  24  *              Alan Cox        :       MSS actually. Also added the window
  25  *                                      clamper.
  26  *              Sam Lantinga    :       Fixed route matching in rt_del()
  27  *              Alan Cox        :       Routing cache support.
  28  *              Alan Cox        :       Removed compatibility cruft.
  29  *              Alan Cox        :       RTF_REJECT support.
  30  *              Alan Cox        :       TCP irtt support.
  31  *              Jonathan Naylor :       Added Metric support.
  32  *      Miquel van Smoorenburg  :       BSD API fixes.
  33  *      Miquel van Smoorenburg  :       Metrics.
  34  *              Alan Cox        :       Use __u32 properly
  35  *
  36  *              This program is free software; you can redistribute it and/or
  37  *              modify it under the terms of the GNU General Public License
  38  *              as published by the Free Software Foundation; either version
  39  *              2 of the License, or (at your option) any later version.
  40  */
  41 
  42 #include <asm/segment.h>
  43 #include <asm/system.h>
  44 #include <linux/types.h>
  45 #include <linux/kernel.h>
  46 #include <linux/sched.h>
  47 #include <linux/mm.h>
  48 #include <linux/string.h>
  49 #include <linux/socket.h>
  50 #include <linux/sockios.h>
  51 #include <linux/errno.h>
  52 #include <linux/in.h>
  53 #include <linux/inet.h>
  54 #include <linux/netdevice.h>
  55 #include <net/ip.h>
  56 #include <net/protocol.h>
  57 #include <net/route.h>
  58 #include <net/tcp.h>
  59 #include <linux/skbuff.h>
  60 #include <net/sock.h>
  61 #include <net/icmp.h>
  62 
  63 /*
  64  *      The routing table list
  65  */
  66 
  67 static struct rtable *rt_base = NULL;
  68 unsigned long rt_stamp = 1;             /* Routing table version stamp for caches ( 0 is 'unset' ) */
  69 
  70 /*
  71  *      Pointer to the loopback route
  72  */
  73  
  74 static struct rtable *rt_loopback = NULL;
  75 
  76 /*
  77  *      Remove a routing table entry.
  78  *      Should we return a status value here ?
  79  */
  80 
  81 static void rt_del(__u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
  82                 char *devname, __u32 gtw, short rt_flags, short metric)
  83 {
  84         struct rtable *r, **rp;
  85         unsigned long flags;
  86 
  87         rp = &rt_base;
  88         
  89         /*
  90          *      This must be done with interrupts off because we could take
  91          *      an ICMP_REDIRECT.
  92          */
  93          
  94         save_flags(flags);
  95         cli();
  96         while((r = *rp) != NULL) 
  97         {
  98                 /*
  99                  *      Make sure the destination and netmask match.
 100                  *      metric, gateway and device are also checked
 101                  *      if they were specified.
 102                  */
 103                 if (r->rt_dst != dst ||
 104                     (mask && r->rt_mask != mask) ||
 105                     (gtw && r->rt_gateway != gtw) ||
 106                     (metric >= 0 && r->rt_metric != metric) ||
 107                     (devname && strcmp((r->rt_dev)->name,devname) != 0) )
 108                 {
 109                         rp = &r->rt_next;
 110                         continue;
 111                 }
 112                 *rp = r->rt_next;
 113                 
 114                 /*
 115                  *      If we delete the loopback route update its pointer.
 116                  */
 117                  
 118                 if (rt_loopback == r)
 119                         rt_loopback = NULL;
 120                 kfree_s(r, sizeof(struct rtable));
 121         } 
 122         rt_stamp++;             /* New table revision */
 123         
 124         restore_flags(flags);
 125 }
 126 
 127 
 128 /*
 129  *      Remove all routing table entries for a device. This is called when
 130  *      a device is downed.
 131  */
 132  
 133 void ip_rt_flush(struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 134 {
 135         struct rtable *r;
 136         struct rtable **rp;
 137         unsigned long flags;
 138 
 139         rp = &rt_base;
 140         save_flags(flags);
 141         cli();
 142         while ((r = *rp) != NULL) {
 143                 if (r->rt_dev != dev) {
 144                         rp = &r->rt_next;
 145                         continue;
 146                 }
 147                 *rp = r->rt_next;
 148                 if (rt_loopback == r)
 149                         rt_loopback = NULL;
 150                 kfree_s(r, sizeof(struct rtable));
 151         } 
 152         rt_stamp++;             /* New table revision */
 153         restore_flags(flags);
 154 }
 155 
 156 /*
 157  *      Used by 'rt_add()' when we can't get the netmask any other way..
 158  *
 159  *      If the lower byte or two are zero, we guess the mask based on the
 160  *      number of zero 8-bit net numbers, otherwise we use the "default"
 161  *      masks judging by the destination address and our device netmask.
 162  */
 163  
 164 static __u32 unsigned long default_mask(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 165 {
 166         dst = ntohl(dst);
 167         if (IN_CLASSA(dst))
 168                 return htonl(IN_CLASSA_NET);
 169         if (IN_CLASSB(dst))
 170                 return htonl(IN_CLASSB_NET);
 171         return htonl(IN_CLASSC_NET);
 172 }
 173 
 174 
 175 /*
 176  *      If no mask is specified then generate a default entry.
 177  */
 178 
 179 static __u32 guess_mask(__u32 dst, struct device * dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 180 {
 181         __u32 mask;
 182 
 183         if (!dst)
 184                 return 0;
 185         mask = default_mask(dst);
 186         if ((dst ^ dev->pa_addr) & mask)
 187                 return mask;
 188         return dev->pa_mask;
 189 }
 190 
 191 
 192 /*
 193  *      Find the route entry through which our gateway will be reached
 194  */
 195  
 196 static inline struct device * get_gw_dev(__u32 gw)
     /* [previous][next][first][last][top][bottom][index][help] */
 197 {
 198         struct rtable * rt;
 199 
 200         for (rt = rt_base ; ; rt = rt->rt_next) 
 201         {
 202                 if (!rt)
 203                         return NULL;
 204                 if ((gw ^ rt->rt_dst) & rt->rt_mask)
 205                         continue;
 206                 /* 
 207                  *      Gateways behind gateways are a no-no 
 208                  */
 209                  
 210                 if (rt->rt_flags & RTF_GATEWAY)
 211                         return NULL;
 212                 return rt->rt_dev;
 213         }
 214 }
 215 
 216 /*
 217  *      Rewrote rt_add(), as the old one was weird - Linus
 218  *
 219  *      This routine is used to update the IP routing table, either
 220  *      from the kernel (ICMP_REDIRECT) or via an ioctl call issued
 221  *      by the superuser.
 222  */
 223  
 224 void ip_rt_add(short flags, __u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
 225         __u32 gw, struct device *dev, unsigned short mtu,
 226         unsigned long window, unsigned short irtt, short metric)
 227 {
 228         struct rtable *r, *rt;
 229         struct rtable **rp;
 230         unsigned long cpuflags;
 231         int duplicate = 0;
 232 
 233         /*
 234          *      A host is a unique machine and has no network bits.
 235          */
 236          
 237         if (flags & RTF_HOST) 
 238         {
 239                 mask = 0xffffffff;
 240         } 
 241         
 242         /*
 243          *      Calculate the network mask
 244          */
 245          
 246         else if (!mask) 
 247         {
 248                 if (!((dst ^ dev->pa_addr) & dev->pa_mask)) 
 249                 {
 250                         mask = dev->pa_mask;
 251                         flags &= ~RTF_GATEWAY;
 252                         if (flags & RTF_DYNAMIC) 
 253                         {
 254                                 /*printk("Dynamic route to my own net rejected\n");*/
 255                                 return;
 256                         }
 257                 } 
 258                 else
 259                         mask = guess_mask(dst, dev);
 260                 dst &= mask;
 261         }
 262         
 263         /*
 264          *      A gateway must be reachable and not a local address
 265          */
 266          
 267         if (gw == dev->pa_addr)
 268                 flags &= ~RTF_GATEWAY;
 269                 
 270         if (flags & RTF_GATEWAY) 
 271         {
 272                 /*
 273                  *      Don't try to add a gateway we can't reach.. 
 274                  */
 275                  
 276                 if (dev != get_gw_dev(gw))
 277                         return;
 278                         
 279                 flags |= RTF_GATEWAY;
 280         } 
 281         else
 282                 gw = 0;
 283                 
 284         /*
 285          *      Allocate an entry and fill it in.
 286          */
 287          
 288         rt = (struct rtable *) kmalloc(sizeof(struct rtable), GFP_ATOMIC);
 289         if (rt == NULL) 
 290         {
 291                 return;
 292         }
 293         memset(rt, 0, sizeof(struct rtable));
 294         rt->rt_flags = flags | RTF_UP;
 295         rt->rt_dst = dst;
 296         rt->rt_dev = dev;
 297         rt->rt_gateway = gw;
 298         rt->rt_mask = mask;
 299         rt->rt_mss = dev->mtu - HEADER_SIZE;
 300         rt->rt_metric = metric;
 301         rt->rt_window = 0;      /* Default is no clamping */
 302 
 303         /* Are the MSS/Window valid ? */
 304 
 305         if(rt->rt_flags & RTF_MSS)
 306                 rt->rt_mss = mtu;
 307                 
 308         if(rt->rt_flags & RTF_WINDOW)
 309                 rt->rt_window = window;
 310         if(rt->rt_flags & RTF_IRTT)
 311                 rt->rt_irtt = irtt;
 312 
 313         /*
 314          *      What we have to do is loop though this until we have
 315          *      found the first address which has a higher generality than
 316          *      the one in rt.  Then we can put rt in right before it.
 317          *      The interrupts must be off for this process.
 318          */
 319 
 320         save_flags(cpuflags);
 321         cli();
 322 
 323         /*
 324          *      Remove old route if we are getting a duplicate. 
 325          */
 326          
 327         rp = &rt_base;
 328         while ((r = *rp) != NULL) 
 329         {
 330                 if (r->rt_dst != dst || 
 331                     r->rt_mask != mask)
 332                 {
 333                         rp = &r->rt_next;
 334                         continue;
 335                 }
 336                 if (r->rt_metric != metric && r->rt_gateway != gw)
 337                 {
 338                         duplicate = 1;
 339                         rp = &r->rt_next;
 340                         continue;
 341                 }
 342                 *rp = r->rt_next;
 343                 if (rt_loopback == r)
 344                         rt_loopback = NULL;
 345                 kfree_s(r, sizeof(struct rtable));
 346         }
 347         
 348         /*
 349          *      Add the new route 
 350          */
 351          
 352         rp = &rt_base;
 353         while ((r = *rp) != NULL) {
 354                 /*
 355                  * When adding a duplicate route, add it before
 356                  * the route with a higher metric.
 357                  */
 358                 if (duplicate &&
 359                     r->rt_dst == dst &&
 360                     r->rt_mask == mask &&
 361                     r->rt_metric > metric)
 362                         break;
 363                 else
 364                 /*
 365                  * Otherwise, just add it before the
 366                  * route with a higher generality.
 367                  */
 368                         if ((r->rt_mask & mask) != mask)
 369                                 break;
 370                 rp = &r->rt_next;
 371         }
 372         rt->rt_next = r;
 373         *rp = rt;
 374         
 375         /*
 376          *      Update the loopback route
 377          */
 378          
 379         if ((rt->rt_dev->flags & IFF_LOOPBACK) && !rt_loopback)
 380                 rt_loopback = rt;
 381 
 382         rt_stamp++;             /* New table revision */
 383                 
 384         /*
 385          *      Restore the interrupts and return
 386          */
 387          
 388         restore_flags(cpuflags);
 389         return;
 390 }
 391 
 392 
 393 /*
 394  *      Check if a mask is acceptable.
 395  */
 396  
 397 static inline int bad_mask(__u32 mask, __u32 addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 398 {
 399         if (addr & (mask = ~mask))
 400                 return 1;
 401         mask = ntohl(mask);
 402         if (mask & (mask+1))
 403                 return 1;
 404         return 0;
 405 }
 406 
 407 /*
 408  *      Process a route add request from the user
 409  */
 410  
 411 static int rt_new(struct rtentry *r)
     /* [previous][next][first][last][top][bottom][index][help] */
 412 {
 413         int err;
 414         char * devname;
 415         struct device * dev = NULL;
 416         unsigned long flags;
 417         __u32 daddr, mask, gw;
 418         short metric;
 419 
 420         /*
 421          *      If a device is specified find it.
 422          */
 423          
 424         if ((devname = r->rt_dev) != NULL) 
 425         {
 426                 err = getname(devname, &devname);
 427                 if (err)
 428                         return err;
 429                 dev = dev_get(devname);
 430                 putname(devname);
 431                 if (!dev)
 432                         return -EINVAL;
 433         }
 434         
 435         /*
 436          *      If the device isn't INET, don't allow it
 437          */
 438 
 439         if (r->rt_dst.sa_family != AF_INET)
 440                 return -EAFNOSUPPORT;
 441 
 442         /*
 443          *      Make local copies of the important bits
 444          *      We decrement the metric by one for BSD compatibility.
 445          */
 446          
 447         flags = r->rt_flags;
 448         daddr = (__u32) ((struct sockaddr_in *) &r->rt_dst)->sin_addr.s_addr;
 449         mask  = (__u32) ((struct sockaddr_in *) &r->rt_genmask)->sin_addr.s_addr;
 450         gw    = (__u32) ((struct sockaddr_in *) &r->rt_gateway)->sin_addr.s_addr;
 451         metric = r->rt_metric > 0 ? r->rt_metric - 1 : 0;
 452 
 453         /*
 454          *      BSD emulation: Permits route add someroute gw one-of-my-addresses
 455          *      to indicate which iface. Not as clean as the nice Linux dev technique
 456          *      but people keep using it... 
 457          */
 458          
 459         if (!dev && (flags & RTF_GATEWAY)) 
 460         {
 461                 struct device *dev2;
 462                 for (dev2 = dev_base ; dev2 != NULL ; dev2 = dev2->next) 
 463                 {
 464                         if ((dev2->flags & IFF_UP) && dev2->pa_addr == gw) 
 465                         {
 466                                 flags &= ~RTF_GATEWAY;
 467                                 dev = dev2;
 468                                 break;
 469                         }
 470                 }
 471         }
 472 
 473         /*
 474          *      Ignore faulty masks
 475          */
 476          
 477         if (bad_mask(mask, daddr))
 478                 mask=0;
 479 
 480         /*
 481          *      Set the mask to nothing for host routes.
 482          */
 483          
 484         if (flags & RTF_HOST)
 485                 mask = 0xffffffff;
 486         else if (mask && r->rt_genmask.sa_family != AF_INET)
 487                 return -EAFNOSUPPORT;
 488 
 489         /*
 490          *      You can only gateway IP via IP..
 491          */
 492          
 493         if (flags & RTF_GATEWAY) 
 494         {
 495                 if (r->rt_gateway.sa_family != AF_INET)
 496                         return -EAFNOSUPPORT;
 497                 if (!dev)
 498                         dev = get_gw_dev(gw);
 499         } 
 500         else if (!dev)
 501                 dev = ip_dev_check(daddr);
 502 
 503         /*
 504          *      Unknown device.
 505          */
 506          
 507         if (dev == NULL)
 508                 return -ENETUNREACH;
 509 
 510         /*
 511          *      Add the route
 512          */
 513          
 514         ip_rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window, r->rt_irtt, metric);
 515         return 0;
 516 }
 517 
 518 
 519 /*
 520  *      Remove a route, as requested by the user.
 521  */
 522 
 523 static int rt_kill(struct rtentry *r)
     /* [previous][next][first][last][top][bottom][index][help] */
 524 {
 525         struct sockaddr_in *trg;
 526         struct sockaddr_in *msk;
 527         struct sockaddr_in *gtw;
 528         char *devname;
 529         int err;
 530 
 531         trg = (struct sockaddr_in *) &r->rt_dst;
 532         msk = (struct sockaddr_in *) &r->rt_genmask;
 533         gtw = (struct sockaddr_in *) &r->rt_gateway;
 534         if ((devname = r->rt_dev) != NULL) 
 535         {
 536                 err = getname(devname, &devname);
 537                 if (err)
 538                         return err;
 539         }
 540         /*
 541          * metric can become negative here if it wasn't filled in
 542          * but that's a fortunate accident; we really use that in rt_del.
 543          */
 544         rt_del((__u32)trg->sin_addr.s_addr, (__u32)msk->sin_addr.s_addr, devname,
 545                 (__u32)gtw->sin_addr.s_addr, r->rt_flags, r->rt_metric - 1);
 546         if ( devname != NULL )
 547                 putname(devname);
 548         return 0;
 549 }
 550 
 551 
 552 /* 
 553  *      Called from the PROCfs module. This outputs /proc/net/route.
 554  */
 555  
 556 int rt_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
     /* [previous][next][first][last][top][bottom][index][help] */
 557 {
 558         struct rtable *r;
 559         int len=0;
 560         off_t pos=0;
 561         off_t begin=0;
 562         int size;
 563 
 564         len += sprintf(buffer,
 565                  "Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\n");
 566         pos=len;
 567   
 568         /*
 569          *      This isn't quite right -- r->rt_dst is a struct! 
 570          */
 571          
 572         for (r = rt_base; r != NULL; r = r->rt_next) 
 573         {
 574                 size = sprintf(buffer+len, "%s\t%08lX\t%08lX\t%02X\t%d\t%lu\t%d\t%08lX\t%d\t%lu\t%u\n",
 575                         r->rt_dev->name, (unsigned long)r->rt_dst, (unsigned long)r->rt_gateway,
 576                         r->rt_flags, r->rt_refcnt, r->rt_use, r->rt_metric,
 577                         (unsigned long)r->rt_mask, (int)r->rt_mss, r->rt_window, (int)r->rt_irtt);
 578                 len+=size;
 579                 pos+=size;
 580                 if(pos<offset)
 581                 {
 582                         len=0;
 583                         begin=pos;
 584                 }
 585                 if(pos>offset+length)
 586                         break;
 587         }
 588         
 589         *start=buffer+(offset-begin);
 590         len-=(offset-begin);
 591         if(len>length)
 592                 len=length;
 593         return len;
 594 }
 595 
 596 /*
 597  *      This is hackish, but results in better code. Use "-S" to see why.
 598  */
 599  
 600 #define early_out ({ goto no_route; 1; })
 601 
 602 /*
 603  *      Route a packet. This needs to be fairly quick. Florian & Co. 
 604  *      suggested a unified ARP and IP routing cache. Done right its
 605  *      probably a brilliant idea. I'd actually suggest a unified
 606  *      ARP/IP routing/Socket pointer cache. Volunteers welcome
 607  */
 608  
 609 struct rtable * ip_rt_route(__u32 daddr, struct options *opt, __u32 *src_addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 610 {
 611         struct rtable *rt;
 612 
 613         for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next) 
 614         {
 615                 if (!((rt->rt_dst ^ daddr) & rt->rt_mask))
 616                         break;
 617                 /*
 618                  *      broadcast addresses can be special cases.. 
 619                  */
 620                 if (rt->rt_flags & RTF_GATEWAY)
 621                         continue;                
 622                 if ((rt->rt_dev->flags & IFF_BROADCAST) &&
 623                     (rt->rt_dev->pa_brdaddr == daddr))
 624                         break;
 625         }
 626         
 627         if(rt->rt_flags&RTF_REJECT)
 628                 return NULL;
 629         
 630         if(src_addr!=NULL)
 631                 *src_addr= rt->rt_dev->pa_addr;
 632                 
 633         if (daddr == rt->rt_dev->pa_addr) {
 634                 if ((rt = rt_loopback) == NULL)
 635                         goto no_route;
 636         }
 637         rt->rt_use++;
 638         return rt;
 639 no_route:
 640         return NULL;
 641 }
 642 
 643 struct rtable * ip_rt_local(__u32 daddr, struct options *opt, __u32 *src_addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 644 {
 645         struct rtable *rt;
 646 
 647         for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next) 
 648         {
 649                 /*
 650                  *      No routed addressing.
 651                  */
 652                 if (rt->rt_flags&RTF_GATEWAY)
 653                         continue;
 654                         
 655                 if (!((rt->rt_dst ^ daddr) & rt->rt_mask))
 656                         break;
 657                 /*
 658                  *      broadcast addresses can be special cases.. 
 659                  */
 660                  
 661                 if ((rt->rt_dev->flags & IFF_BROADCAST) &&
 662                      rt->rt_dev->pa_brdaddr == daddr)
 663                         break;
 664         }
 665         
 666         if(src_addr!=NULL)
 667                 *src_addr= rt->rt_dev->pa_addr;
 668                 
 669         if (daddr == rt->rt_dev->pa_addr) {
 670                 if ((rt = rt_loopback) == NULL)
 671                         goto no_route;
 672         }
 673         rt->rt_use++;
 674         return rt;
 675 no_route:
 676         return NULL;
 677 }
 678 
 679 /*
 680  *      Handle IP routing ioctl calls. These are used to manipulate the routing tables
 681  */
 682  
 683 int ip_rt_ioctl(unsigned int cmd, void *arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 684 {
 685         int err;
 686         struct rtentry rt;
 687 
 688         switch(cmd) 
 689         {
 690                 case SIOCADDRT:         /* Add a route */
 691                 case SIOCDELRT:         /* Delete a route */
 692                         if (!suser())
 693                                 return -EPERM;
 694                         err=verify_area(VERIFY_READ, arg, sizeof(struct rtentry));
 695                         if (err)
 696                                 return err;
 697                         memcpy_fromfs(&rt, arg, sizeof(struct rtentry));
 698                         return (cmd == SIOCDELRT) ? rt_kill(&rt) : rt_new(&rt);
 699         }
 700 
 701         return -EINVAL;
 702 }

/* [previous][next][first][last][top][bottom][index][help] */