root/net/ipv4/route.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rt_del
  2. ip_rt_flush
  3. default_mask
  4. guess_mask
  5. get_gw_dev
  6. ip_rt_add
  7. bad_mask
  8. rt_new
  9. rt_kill
  10. rt_get_info
  11. ip_rt_route
  12. ip_rt_local
  13. ip_rt_ioctl

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              ROUTE - implementation of the IP router.
   7  *
   8  * Version:     @(#)route.c     1.0.14  05/31/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  13  *              Linus Torvalds, <Linus.Torvalds@helsinki.fi>
  14  *
  15  * Fixes:
  16  *              Alan Cox        :       Verify area fixes.
  17  *              Alan Cox        :       cli() protects routing changes
  18  *              Rui Oliveira    :       ICMP routing table updates
  19  *              (rco@di.uminho.pt)      Routing table insertion and update
  20  *              Linus Torvalds  :       Rewrote bits to be sensible
  21  *              Alan Cox        :       Added BSD route gw semantics
  22  *              Alan Cox        :       Super /proc >4K 
  23  *              Alan Cox        :       MTU in route table
  24  *              Alan Cox        :       MSS actually. Also added the window
  25  *                                      clamper.
  26  *              Sam Lantinga    :       Fixed route matching in rt_del()
  27  *              Alan Cox        :       Routing cache support.
  28  *              Alan Cox        :       Removed compatibility cruft.
  29  *              Alan Cox        :       RTF_REJECT support.
  30  *              Alan Cox        :       TCP irtt support.
  31  *              Jonathan Naylor :       Added Metric support.
  32  *      Miquel van Smoorenburg  :       BSD API fixes.
  33  *      Miquel van Smoorenburg  :       Metrics.
  34  *
  35  *              This program is free software; you can redistribute it and/or
  36  *              modify it under the terms of the GNU General Public License
  37  *              as published by the Free Software Foundation; either version
  38  *              2 of the License, or (at your option) any later version.
  39  */
  40 
  41 #include <asm/segment.h>
  42 #include <asm/system.h>
  43 #include <linux/types.h>
  44 #include <linux/kernel.h>
  45 #include <linux/sched.h>
  46 #include <linux/mm.h>
  47 #include <linux/string.h>
  48 #include <linux/socket.h>
  49 #include <linux/sockios.h>
  50 #include <linux/errno.h>
  51 #include <linux/in.h>
  52 #include <linux/inet.h>
  53 #include <linux/netdevice.h>
  54 #include <net/ip.h>
  55 #include <net/protocol.h>
  56 #include <net/route.h>
  57 #include <net/tcp.h>
  58 #include <linux/skbuff.h>
  59 #include <net/sock.h>
  60 #include <net/icmp.h>
  61 
  62 /*
  63  *      The routing table list
  64  */
  65 
  66 static struct rtable *rt_base = NULL;
  67 unsigned long rt_stamp = 1;             /* Routing table version stamp for caches ( 0 is 'unset' ) */
  68 
  69 /*
  70  *      Pointer to the loopback route
  71  */
  72  
  73 static struct rtable *rt_loopback = NULL;
  74 
  75 /*
  76  *      Remove a routing table entry.
  77  *      Should we return a status value here ?
  78  */
  79 
  80 static void rt_del(unsigned long dst, unsigned long mask,
     /* [previous][next][first][last][top][bottom][index][help] */
  81                 char *devname, unsigned long gtw, short rt_flags, short metric)
  82 {
  83         struct rtable *r, **rp;
  84         unsigned long flags;
  85 
  86         rp = &rt_base;
  87         
  88         /*
  89          *      This must be done with interrupts off because we could take
  90          *      an ICMP_REDIRECT.
  91          */
  92          
  93         save_flags(flags);
  94         cli();
  95         while((r = *rp) != NULL) 
  96         {
  97                 /*
  98                  *      Make sure the destination and netmask match.
  99                  *      metric, gateway and device are also checked
 100                  *      if they were specified.
 101                  */
 102                 if (r->rt_dst != dst ||
 103                     (mask && r->rt_mask != mask) ||
 104                     (gtw && r->rt_gateway != gtw) ||
 105                     (metric >= 0 && r->rt_metric != metric) ||
 106                     (devname && strcmp((r->rt_dev)->name,devname) != 0) )
 107                 {
 108                         rp = &r->rt_next;
 109                         continue;
 110                 }
 111                 *rp = r->rt_next;
 112                 
 113                 /*
 114                  *      If we delete the loopback route update its pointer.
 115                  */
 116                  
 117                 if (rt_loopback == r)
 118                         rt_loopback = NULL;
 119                 kfree_s(r, sizeof(struct rtable));
 120         } 
 121         rt_stamp++;             /* New table revision */
 122         
 123         restore_flags(flags);
 124 }
 125 
 126 
 127 /*
 128  *      Remove all routing table entries for a device. This is called when
 129  *      a device is downed.
 130  */
 131  
 132 void ip_rt_flush(struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 133 {
 134         struct rtable *r;
 135         struct rtable **rp;
 136         unsigned long flags;
 137 
 138         rp = &rt_base;
 139         save_flags(flags);
 140         cli();
 141         while ((r = *rp) != NULL) {
 142                 if (r->rt_dev != dev) {
 143                         rp = &r->rt_next;
 144                         continue;
 145                 }
 146                 *rp = r->rt_next;
 147                 if (rt_loopback == r)
 148                         rt_loopback = NULL;
 149                 kfree_s(r, sizeof(struct rtable));
 150         } 
 151         rt_stamp++;             /* New table revision */
 152         restore_flags(flags);
 153 }
 154 
 155 /*
 156  *      Used by 'rt_add()' when we can't get the netmask any other way..
 157  *
 158  *      If the lower byte or two are zero, we guess the mask based on the
 159  *      number of zero 8-bit net numbers, otherwise we use the "default"
 160  *      masks judging by the destination address and our device netmask.
 161  */
 162  
 163 static inline unsigned long default_mask(unsigned long dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 164 {
 165         dst = ntohl(dst);
 166         if (IN_CLASSA(dst))
 167                 return htonl(IN_CLASSA_NET);
 168         if (IN_CLASSB(dst))
 169                 return htonl(IN_CLASSB_NET);
 170         return htonl(IN_CLASSC_NET);
 171 }
 172 
 173 
 174 /*
 175  *      If no mask is specified then generate a default entry.
 176  */
 177 
 178 static unsigned long guess_mask(unsigned long dst, struct device * dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 179 {
 180         unsigned long mask;
 181 
 182         if (!dst)
 183                 return 0;
 184         mask = default_mask(dst);
 185         if ((dst ^ dev->pa_addr) & mask)
 186                 return mask;
 187         return dev->pa_mask;
 188 }
 189 
 190 
 191 /*
 192  *      Find the route entry through which our gateway will be reached
 193  */
 194  
 195 static inline struct device * get_gw_dev(unsigned long gw)
     /* [previous][next][first][last][top][bottom][index][help] */
 196 {
 197         struct rtable * rt;
 198 
 199         for (rt = rt_base ; ; rt = rt->rt_next) 
 200         {
 201                 if (!rt)
 202                         return NULL;
 203                 if ((gw ^ rt->rt_dst) & rt->rt_mask)
 204                         continue;
 205                 /* 
 206                  *      Gateways behind gateways are a no-no 
 207                  */
 208                  
 209                 if (rt->rt_flags & RTF_GATEWAY)
 210                         return NULL;
 211                 return rt->rt_dev;
 212         }
 213 }
 214 
 215 /*
 216  *      Rewrote rt_add(), as the old one was weird - Linus
 217  *
 218  *      This routine is used to update the IP routing table, either
 219  *      from the kernel (ICMP_REDIRECT) or via an ioctl call issued
 220  *      by the superuser.
 221  */
 222  
 223 void ip_rt_add(short flags, unsigned long dst, unsigned long mask,
     /* [previous][next][first][last][top][bottom][index][help] */
 224         unsigned long gw, struct device *dev, unsigned short mtu,
 225         unsigned long window, unsigned short irtt, short metric)
 226 {
 227         struct rtable *r, *rt;
 228         struct rtable **rp;
 229         unsigned long cpuflags;
 230         int duplicate = 0;
 231 
 232         /*
 233          *      A host is a unique machine and has no network bits.
 234          */
 235          
 236         if (flags & RTF_HOST) 
 237         {
 238                 mask = 0xffffffff;
 239         } 
 240         
 241         /*
 242          *      Calculate the network mask
 243          */
 244          
 245         else if (!mask) 
 246         {
 247                 if (!((dst ^ dev->pa_addr) & dev->pa_mask)) 
 248                 {
 249                         mask = dev->pa_mask;
 250                         flags &= ~RTF_GATEWAY;
 251                         if (flags & RTF_DYNAMIC) 
 252                         {
 253                                 /*printk("Dynamic route to my own net rejected\n");*/
 254                                 return;
 255                         }
 256                 } 
 257                 else
 258                         mask = guess_mask(dst, dev);
 259                 dst &= mask;
 260         }
 261         
 262         /*
 263          *      A gateway must be reachable and not a local address
 264          */
 265          
 266         if (gw == dev->pa_addr)
 267                 flags &= ~RTF_GATEWAY;
 268                 
 269         if (flags & RTF_GATEWAY) 
 270         {
 271                 /*
 272                  *      Don't try to add a gateway we can't reach.. 
 273                  */
 274                  
 275                 if (dev != get_gw_dev(gw))
 276                         return;
 277                         
 278                 flags |= RTF_GATEWAY;
 279         } 
 280         else
 281                 gw = 0;
 282                 
 283         /*
 284          *      Allocate an entry and fill it in.
 285          */
 286          
 287         rt = (struct rtable *) kmalloc(sizeof(struct rtable), GFP_ATOMIC);
 288         if (rt == NULL) 
 289         {
 290                 return;
 291         }
 292         memset(rt, 0, sizeof(struct rtable));
 293         rt->rt_flags = flags | RTF_UP;
 294         rt->rt_dst = dst;
 295         rt->rt_dev = dev;
 296         rt->rt_gateway = gw;
 297         rt->rt_mask = mask;
 298         rt->rt_mss = dev->mtu - HEADER_SIZE;
 299         rt->rt_metric = metric;
 300         rt->rt_window = 0;      /* Default is no clamping */
 301 
 302         /* Are the MSS/Window valid ? */
 303 
 304         if(rt->rt_flags & RTF_MSS)
 305                 rt->rt_mss = mtu;
 306                 
 307         if(rt->rt_flags & RTF_WINDOW)
 308                 rt->rt_window = window;
 309         if(rt->rt_flags & RTF_IRTT)
 310                 rt->rt_irtt = irtt;
 311 
 312         /*
 313          *      What we have to do is loop though this until we have
 314          *      found the first address which has a higher generality than
 315          *      the one in rt.  Then we can put rt in right before it.
 316          *      The interrupts must be off for this process.
 317          */
 318 
 319         save_flags(cpuflags);
 320         cli();
 321 
 322         /*
 323          *      Remove old route if we are getting a duplicate. 
 324          */
 325          
 326         rp = &rt_base;
 327         while ((r = *rp) != NULL) 
 328         {
 329                 if (r->rt_dst != dst || 
 330                     r->rt_mask != mask)
 331                 {
 332                         rp = &r->rt_next;
 333                         continue;
 334                 }
 335                 if (r->rt_metric != metric && r->rt_gateway != gw)
 336                 {
 337                         duplicate = 1;
 338                         rp = &r->rt_next;
 339                         continue;
 340                 }
 341                 *rp = r->rt_next;
 342                 if (rt_loopback == r)
 343                         rt_loopback = NULL;
 344                 kfree_s(r, sizeof(struct rtable));
 345         }
 346         
 347         /*
 348          *      Add the new route 
 349          */
 350          
 351         rp = &rt_base;
 352         while ((r = *rp) != NULL) {
 353                 /*
 354                  * When adding a duplicate route, add it before
 355                  * the route with a higher metric.
 356                  */
 357                 if (duplicate &&
 358                     r->rt_dst == dst &&
 359                     r->rt_mask == mask &&
 360                     r->rt_metric > metric)
 361                         break;
 362                 else
 363                 /*
 364                  * Otherwise, just add it before the
 365                  * route with a higher generality.
 366                  */
 367                         if ((r->rt_mask & mask) != mask)
 368                                 break;
 369                 rp = &r->rt_next;
 370         }
 371         rt->rt_next = r;
 372         *rp = rt;
 373         
 374         /*
 375          *      Update the loopback route
 376          */
 377          
 378         if ((rt->rt_dev->flags & IFF_LOOPBACK) && !rt_loopback)
 379                 rt_loopback = rt;
 380 
 381         rt_stamp++;             /* New table revision */
 382                 
 383         /*
 384          *      Restore the interrupts and return
 385          */
 386          
 387         restore_flags(cpuflags);
 388         return;
 389 }
 390 
 391 
 392 /*
 393  *      Check if a mask is acceptable.
 394  */
 395  
 396 static inline int bad_mask(unsigned long mask, unsigned long addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 397 {
 398         if (addr & (mask = ~mask))
 399                 return 1;
 400         mask = ntohl(mask);
 401         if (mask & (mask+1))
 402                 return 1;
 403         return 0;
 404 }
 405 
 406 /*
 407  *      Process a route add request from the user
 408  */
 409  
 410 static int rt_new(struct rtentry *r)
     /* [previous][next][first][last][top][bottom][index][help] */
 411 {
 412         int err;
 413         char * devname;
 414         struct device * dev = NULL;
 415         unsigned long flags, daddr, mask, gw;
 416         short metric;
 417 
 418         /*
 419          *      If a device is specified find it.
 420          */
 421          
 422         if ((devname = r->rt_dev) != NULL) 
 423         {
 424                 err = getname(devname, &devname);
 425                 if (err)
 426                         return err;
 427                 dev = dev_get(devname);
 428                 putname(devname);
 429                 if (!dev)
 430                         return -EINVAL;
 431         }
 432         
 433         /*
 434          *      If the device isn't INET, don't allow it
 435          */
 436 
 437         if (r->rt_dst.sa_family != AF_INET)
 438                 return -EAFNOSUPPORT;
 439 
 440         /*
 441          *      Make local copies of the important bits
 442          *      We decrement the metric by one for BSD compatibility.
 443          */
 444          
 445         flags = r->rt_flags;
 446         daddr = ((struct sockaddr_in *) &r->rt_dst)->sin_addr.s_addr;
 447         mask = ((struct sockaddr_in *) &r->rt_genmask)->sin_addr.s_addr;
 448         gw = ((struct sockaddr_in *) &r->rt_gateway)->sin_addr.s_addr;
 449         metric = r->rt_metric > 0 ? r->rt_metric - 1 : 0;
 450 
 451         /*
 452          *      BSD emulation: Permits route add someroute gw one-of-my-addresses
 453          *      to indicate which iface. Not as clean as the nice Linux dev technique
 454          *      but people keep using it... 
 455          */
 456          
 457         if (!dev && (flags & RTF_GATEWAY)) 
 458         {
 459                 struct device *dev2;
 460                 for (dev2 = dev_base ; dev2 != NULL ; dev2 = dev2->next) 
 461                 {
 462                         if ((dev2->flags & IFF_UP) && dev2->pa_addr == gw) 
 463                         {
 464                                 flags &= ~RTF_GATEWAY;
 465                                 dev = dev2;
 466                                 break;
 467                         }
 468                 }
 469         }
 470 
 471         /*
 472          *      Ignore faulty masks
 473          */
 474          
 475         if (bad_mask(mask, daddr))
 476                 mask = 0;
 477 
 478         /*
 479          *      Set the mask to nothing for host routes.
 480          */
 481          
 482         if (flags & RTF_HOST)
 483                 mask = 0xffffffff;
 484         else if (mask && r->rt_genmask.sa_family != AF_INET)
 485                 return -EAFNOSUPPORT;
 486 
 487         /*
 488          *      You can only gateway IP via IP..
 489          */
 490          
 491         if (flags & RTF_GATEWAY) 
 492         {
 493                 if (r->rt_gateway.sa_family != AF_INET)
 494                         return -EAFNOSUPPORT;
 495                 if (!dev)
 496                         dev = get_gw_dev(gw);
 497         } 
 498         else if (!dev)
 499                 dev = ip_dev_check(daddr);
 500 
 501         /*
 502          *      Unknown device.
 503          */
 504          
 505         if (dev == NULL)
 506                 return -ENETUNREACH;
 507 
 508         /*
 509          *      Add the route
 510          */
 511          
 512         ip_rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window, r->rt_irtt, metric);
 513         return 0;
 514 }
 515 
 516 
 517 /*
 518  *      Remove a route, as requested by the user.
 519  */
 520 
 521 static int rt_kill(struct rtentry *r)
     /* [previous][next][first][last][top][bottom][index][help] */
 522 {
 523         struct sockaddr_in *trg;
 524         struct sockaddr_in *msk;
 525         struct sockaddr_in *gtw;
 526         char *devname;
 527         int err;
 528 
 529         trg = (struct sockaddr_in *) &r->rt_dst;
 530         msk = (struct sockaddr_in *) &r->rt_genmask;
 531         gtw = (struct sockaddr_in *) &r->rt_gateway;
 532         if ((devname = r->rt_dev) != NULL) 
 533         {
 534                 err = getname(devname, &devname);
 535                 if (err)
 536                         return err;
 537         }
 538         /*
 539          * metric can become negative here if it wasn't filled in
 540          * but that's a fortunate accident; we really use that in rt_del.
 541          */
 542         rt_del(trg->sin_addr.s_addr, msk->sin_addr.s_addr, devname,
 543                 gtw->sin_addr.s_addr, r->rt_flags, r->rt_metric - 1);
 544         if ( devname != NULL )
 545                 putname(devname);
 546         return 0;
 547 }
 548 
 549 
 550 /* 
 551  *      Called from the PROCfs module. This outputs /proc/net/route.
 552  */
 553  
 554 int rt_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
     /* [previous][next][first][last][top][bottom][index][help] */
 555 {
 556         struct rtable *r;
 557         int len=0;
 558         off_t pos=0;
 559         off_t begin=0;
 560         int size;
 561 
 562         len += sprintf(buffer,
 563                  "Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\n");
 564         pos=len;
 565   
 566         /*
 567          *      This isn't quite right -- r->rt_dst is a struct! 
 568          */
 569          
 570         for (r = rt_base; r != NULL; r = r->rt_next) 
 571         {
 572                 size = sprintf(buffer+len, "%s\t%08lX\t%08lX\t%02X\t%d\t%lu\t%d\t%08lX\t%d\t%lu\t%u\n",
 573                         r->rt_dev->name, r->rt_dst, r->rt_gateway,
 574                         r->rt_flags, r->rt_refcnt, r->rt_use, r->rt_metric,
 575                         r->rt_mask, (int)r->rt_mss, r->rt_window, (int)r->rt_irtt);
 576                 len+=size;
 577                 pos+=size;
 578                 if(pos<offset)
 579                 {
 580                         len=0;
 581                         begin=pos;
 582                 }
 583                 if(pos>offset+length)
 584                         break;
 585         }
 586         
 587         *start=buffer+(offset-begin);
 588         len-=(offset-begin);
 589         if(len>length)
 590                 len=length;
 591         return len;
 592 }
 593 
 594 /*
 595  *      This is hackish, but results in better code. Use "-S" to see why.
 596  */
 597  
 598 #define early_out ({ goto no_route; 1; })
 599 
 600 /*
 601  *      Route a packet. This needs to be fairly quick. Florian & Co. 
 602  *      suggested a unified ARP and IP routing cache. Done right its
 603  *      probably a brilliant idea. I'd actually suggest a unified
 604  *      ARP/IP routing/Socket pointer cache. Volunteers welcome
 605  */
 606  
 607 struct rtable * ip_rt_route(unsigned long daddr, struct options *opt, unsigned long *src_addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 608 {
 609         struct rtable *rt;
 610 
 611         for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next) 
 612         {
 613                 if (!((rt->rt_dst ^ daddr) & rt->rt_mask))
 614                         break;
 615                 /*
 616                  *      broadcast addresses can be special cases.. 
 617                  */
 618                 if (rt->rt_flags & RTF_GATEWAY)
 619                         continue;                
 620                 if ((rt->rt_dev->flags & IFF_BROADCAST) &&
 621                     (rt->rt_dev->pa_brdaddr == daddr))
 622                         break;
 623         }
 624         
 625         if(rt->rt_flags&RTF_REJECT)
 626                 return NULL;
 627         
 628         if(src_addr!=NULL)
 629                 *src_addr= rt->rt_dev->pa_addr;
 630                 
 631         if (daddr == rt->rt_dev->pa_addr) {
 632                 if ((rt = rt_loopback) == NULL)
 633                         goto no_route;
 634         }
 635         rt->rt_use++;
 636         return rt;
 637 no_route:
 638         return NULL;
 639 }
 640 
 641 struct rtable * ip_rt_local(unsigned long daddr, struct options *opt, unsigned long *src_addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 642 {
 643         struct rtable *rt;
 644 
 645         for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next) 
 646         {
 647                 /*
 648                  *      No routed addressing.
 649                  */
 650                 if (rt->rt_flags&RTF_GATEWAY)
 651                         continue;
 652                         
 653                 if (!((rt->rt_dst ^ daddr) & rt->rt_mask))
 654                         break;
 655                 /*
 656                  *      broadcast addresses can be special cases.. 
 657                  */
 658                  
 659                 if ((rt->rt_dev->flags & IFF_BROADCAST) &&
 660                      rt->rt_dev->pa_brdaddr == daddr)
 661                         break;
 662         }
 663         
 664         if(src_addr!=NULL)
 665                 *src_addr= rt->rt_dev->pa_addr;
 666                 
 667         if (daddr == rt->rt_dev->pa_addr) {
 668                 if ((rt = rt_loopback) == NULL)
 669                         goto no_route;
 670         }
 671         rt->rt_use++;
 672         return rt;
 673 no_route:
 674         return NULL;
 675 }
 676 
 677 /*
 678  *      Handle IP routing ioctl calls. These are used to manipulate the routing tables
 679  */
 680  
 681 int ip_rt_ioctl(unsigned int cmd, void *arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 682 {
 683         int err;
 684         struct rtentry rt;
 685 
 686         switch(cmd) 
 687         {
 688                 case SIOCADDRT:         /* Add a route */
 689                 case SIOCDELRT:         /* Delete a route */
 690                         if (!suser())
 691                                 return -EPERM;
 692                         err=verify_area(VERIFY_READ, arg, sizeof(struct rtentry));
 693                         if (err)
 694                                 return err;
 695                         memcpy_fromfs(&rt, arg, sizeof(struct rtentry));
 696                         return (cmd == SIOCDELRT) ? rt_kill(&rt) : rt_new(&rt);
 697         }
 698 
 699         return -EINVAL;
 700 }

/* [previous][next][first][last][top][bottom][index][help] */