root/net/ipv4/route.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rt_del
  2. ip_rt_flush
  3. default_mask
  4. guess_mask
  5. get_gw_dev
  6. ip_rt_add
  7. bad_mask
  8. rt_new
  9. rt_kill
  10. rt_get_info
  11. ip_rt_route
  12. ip_rt_local
  13. ip_rt_ioctl

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              ROUTE - implementation of the IP router.
   7  *
   8  * Version:     @(#)route.c     1.0.14  05/31/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  13  *              Linus Torvalds, <Linus.Torvalds@helsinki.fi>
  14  *
  15  * Fixes:
  16  *              Alan Cox        :       Verify area fixes.
  17  *              Alan Cox        :       cli() protects routing changes
  18  *              Rui Oliveira    :       ICMP routing table updates
  19  *              (rco@di.uminho.pt)      Routing table insertion and update
  20  *              Linus Torvalds  :       Rewrote bits to be sensible
  21  *              Alan Cox        :       Added BSD route gw semantics
  22  *              Alan Cox        :       Super /proc >4K 
  23  *              Alan Cox        :       MTU in route table
  24  *              Alan Cox        :       MSS actually. Also added the window
  25  *                                      clamper.
  26  *              Sam Lantinga    :       Fixed route matching in rt_del()
  27  *              Alan Cox        :       Routing cache support.
  28  *              Alan Cox        :       Removed compatibility cruft.
  29  *              Alan Cox        :       RTF_REJECT support.
  30  *              Alan Cox        :       TCP irtt support.
  31  *              Jonathan Naylor :       Added Metric support.
  32  *      Miquel van Smoorenburg  :       BSD API fixes.
  33  *
  34  *              This program is free software; you can redistribute it and/or
  35  *              modify it under the terms of the GNU General Public License
  36  *              as published by the Free Software Foundation; either version
  37  *              2 of the License, or (at your option) any later version.
  38  */
  39 
  40 #include <asm/segment.h>
  41 #include <asm/system.h>
  42 #include <linux/types.h>
  43 #include <linux/kernel.h>
  44 #include <linux/sched.h>
  45 #include <linux/mm.h>
  46 #include <linux/string.h>
  47 #include <linux/socket.h>
  48 #include <linux/sockios.h>
  49 #include <linux/errno.h>
  50 #include <linux/in.h>
  51 #include <linux/inet.h>
  52 #include <linux/netdevice.h>
  53 #include <net/ip.h>
  54 #include <net/protocol.h>
  55 #include <net/route.h>
  56 #include <net/tcp.h>
  57 #include <linux/skbuff.h>
  58 #include <net/sock.h>
  59 #include <net/icmp.h>
  60 
  61 /*
  62  *      The routing table list
  63  */
  64 
  65 static struct rtable *rt_base = NULL;
  66 unsigned long rt_stamp = 1;             /* Routing table version stamp for caches ( 0 is 'unset' ) */
  67 
  68 /*
  69  *      Pointer to the loopback route
  70  */
  71  
  72 static struct rtable *rt_loopback = NULL;
  73 
  74 /*
  75  *      Remove a routing table entry.
  76  */
  77 
  78 static void rt_del(unsigned long dst, char *devname)
     /* [previous][next][first][last][top][bottom][index][help] */
  79 {
  80         struct rtable *r, **rp;
  81         unsigned long flags;
  82 
  83         rp = &rt_base;
  84         
  85         /*
  86          *      This must be done with interrupts off because we could take
  87          *      an ICMP_REDIRECT.
  88          */
  89          
  90         save_flags(flags);
  91         cli();
  92         while((r = *rp) != NULL) 
  93         {
  94                 /* Make sure both the destination and the device match */
  95                 if ( r->rt_dst != dst ||
  96                 (devname != NULL && strcmp((r->rt_dev)->name,devname) != 0) )
  97                 {
  98                         rp = &r->rt_next;
  99                         continue;
 100                 }
 101                 *rp = r->rt_next;
 102                 
 103                 /*
 104                  *      If we delete the loopback route update its pointer.
 105                  */
 106                  
 107                 if (rt_loopback == r)
 108                         rt_loopback = NULL;
 109                 kfree_s(r, sizeof(struct rtable));
 110         } 
 111         rt_stamp++;             /* New table revision */
 112         
 113         restore_flags(flags);
 114 }
 115 
 116 
 117 /*
 118  *      Remove all routing table entries for a device. This is called when
 119  *      a device is downed.
 120  */
 121  
 122 void ip_rt_flush(struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 123 {
 124         struct rtable *r;
 125         struct rtable **rp;
 126         unsigned long flags;
 127 
 128         rp = &rt_base;
 129         save_flags(flags);
 130         cli();
 131         while ((r = *rp) != NULL) {
 132                 if (r->rt_dev != dev) {
 133                         rp = &r->rt_next;
 134                         continue;
 135                 }
 136                 *rp = r->rt_next;
 137                 if (rt_loopback == r)
 138                         rt_loopback = NULL;
 139                 kfree_s(r, sizeof(struct rtable));
 140         } 
 141         rt_stamp++;             /* New table revision */
 142         restore_flags(flags);
 143 }
 144 
 145 /*
 146  *      Used by 'rt_add()' when we can't get the netmask any other way..
 147  *
 148  *      If the lower byte or two are zero, we guess the mask based on the
 149  *      number of zero 8-bit net numbers, otherwise we use the "default"
 150  *      masks judging by the destination address and our device netmask.
 151  */
 152  
 153 static inline unsigned long default_mask(unsigned long dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 154 {
 155         dst = ntohl(dst);
 156         if (IN_CLASSA(dst))
 157                 return htonl(IN_CLASSA_NET);
 158         if (IN_CLASSB(dst))
 159                 return htonl(IN_CLASSB_NET);
 160         return htonl(IN_CLASSC_NET);
 161 }
 162 
 163 
 164 /*
 165  *      If no mask is specified then generate a default entry.
 166  */
 167 
 168 static unsigned long guess_mask(unsigned long dst, struct device * dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 169 {
 170         unsigned long mask;
 171 
 172         if (!dst)
 173                 return 0;
 174         mask = default_mask(dst);
 175         if ((dst ^ dev->pa_addr) & mask)
 176                 return mask;
 177         return dev->pa_mask;
 178 }
 179 
 180 
 181 /*
 182  *      Find the route entry through which our gateway will be reached
 183  */
 184  
 185 static inline struct device * get_gw_dev(unsigned long gw)
     /* [previous][next][first][last][top][bottom][index][help] */
 186 {
 187         struct rtable * rt;
 188 
 189         for (rt = rt_base ; ; rt = rt->rt_next) 
 190         {
 191                 if (!rt)
 192                         return NULL;
 193                 if ((gw ^ rt->rt_dst) & rt->rt_mask)
 194                         continue;
 195                 /* 
 196                  *      Gateways behind gateways are a no-no 
 197                  */
 198                  
 199                 if (rt->rt_flags & RTF_GATEWAY)
 200                         return NULL;
 201                 return rt->rt_dev;
 202         }
 203 }
 204 
 205 /*
 206  *      Rewrote rt_add(), as the old one was weird - Linus
 207  *
 208  *      This routine is used to update the IP routing table, either
 209  *      from the kernel (ICMP_REDIRECT) or via an ioctl call issued
 210  *      by the superuser.
 211  */
 212  
 213 void ip_rt_add(short flags, unsigned long dst, unsigned long mask,
     /* [previous][next][first][last][top][bottom][index][help] */
 214         unsigned long gw, struct device *dev, unsigned short mtu, unsigned long window, unsigned short irtt, unsigned char metric)
 215 {
 216         struct rtable *r, *rt;
 217         struct rtable **rp;
 218         unsigned long cpuflags;
 219 
 220         /*
 221          *      A host is a unique machine and has no network bits.
 222          */
 223          
 224         if (flags & RTF_HOST) 
 225         {
 226                 mask = 0xffffffff;
 227         } 
 228         
 229         /*
 230          *      Calculate the network mask
 231          */
 232          
 233         else if (!mask) 
 234         {
 235                 if (!((dst ^ dev->pa_addr) & dev->pa_mask)) 
 236                 {
 237                         mask = dev->pa_mask;
 238                         flags &= ~RTF_GATEWAY;
 239                         if (flags & RTF_DYNAMIC) 
 240                         {
 241                                 /*printk("Dynamic route to my own net rejected\n");*/
 242                                 return;
 243                         }
 244                 } 
 245                 else
 246                         mask = guess_mask(dst, dev);
 247                 dst &= mask;
 248         }
 249         
 250         /*
 251          *      A gateway must be reachable and not a local address
 252          */
 253          
 254         if (gw == dev->pa_addr)
 255                 flags &= ~RTF_GATEWAY;
 256                 
 257         if (flags & RTF_GATEWAY) 
 258         {
 259                 /*
 260                  *      Don't try to add a gateway we can't reach.. 
 261                  */
 262                  
 263                 if (dev != get_gw_dev(gw))
 264                         return;
 265                         
 266                 flags |= RTF_GATEWAY;
 267         } 
 268         else
 269                 gw = 0;
 270                 
 271         /*
 272          *      Allocate an entry and fill it in.
 273          */
 274          
 275         rt = (struct rtable *) kmalloc(sizeof(struct rtable), GFP_ATOMIC);
 276         if (rt == NULL) 
 277         {
 278                 return;
 279         }
 280         memset(rt, 0, sizeof(struct rtable));
 281         rt->rt_flags = flags | RTF_UP;
 282         rt->rt_dst = dst;
 283         rt->rt_dev = dev;
 284         rt->rt_gateway = gw;
 285         rt->rt_mask = mask;
 286         rt->rt_mss = dev->mtu - HEADER_SIZE;
 287         rt->rt_metric = metric;
 288         rt->rt_window = 0;      /* Default is no clamping */
 289 
 290         /* Are the MSS/Window valid ? */
 291 
 292         if(rt->rt_flags & RTF_MSS)
 293                 rt->rt_mss = mtu;
 294                 
 295         if(rt->rt_flags & RTF_WINDOW)
 296                 rt->rt_window = window;
 297         if(rt->rt_flags & RTF_IRTT)
 298                 rt->rt_irtt = irtt;
 299 
 300         /*
 301          *      What we have to do is loop though this until we have
 302          *      found the first address which has a higher generality than
 303          *      the one in rt.  Then we can put rt in right before it.
 304          *      The interrupts must be off for this process.
 305          */
 306 
 307         save_flags(cpuflags);
 308         cli();
 309 
 310         /*
 311          *      Remove old route if we are getting a duplicate. 
 312          */
 313          
 314         rp = &rt_base;
 315         while ((r = *rp) != NULL) 
 316         {
 317                 if (r->rt_dst != dst || 
 318                     r->rt_mask != mask)
 319                 {
 320                         rp = &r->rt_next;
 321                         continue;
 322                 }
 323                 *rp = r->rt_next;
 324                 if (rt_loopback == r)
 325                         rt_loopback = NULL;
 326                 kfree_s(r, sizeof(struct rtable));
 327         }
 328         
 329         /*
 330          *      Add the new route 
 331          */
 332          
 333         rp = &rt_base;
 334         while ((r = *rp) != NULL) {
 335                 if ((r->rt_mask & mask) != mask)
 336                         break;
 337                 rp = &r->rt_next;
 338         }
 339         rt->rt_next = r;
 340         *rp = rt;
 341         
 342         /*
 343          *      Update the loopback route
 344          */
 345          
 346         if ((rt->rt_dev->flags & IFF_LOOPBACK) && !rt_loopback)
 347                 rt_loopback = rt;
 348 
 349         rt_stamp++;             /* New table revision */
 350                 
 351         /*
 352          *      Restore the interrupts and return
 353          */
 354          
 355         restore_flags(cpuflags);
 356         return;
 357 }
 358 
 359 
 360 /*
 361  *      Check if a mask is acceptable.
 362  */
 363  
 364 static inline int bad_mask(unsigned long mask, unsigned long addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 365 {
 366         if (addr & (mask = ~mask))
 367                 return 1;
 368         mask = ntohl(mask);
 369         if (mask & (mask+1))
 370                 return 1;
 371         return 0;
 372 }
 373 
 374 /*
 375  *      Process a route add request from the user
 376  */
 377  
 378 static int rt_new(struct rtentry *r)
     /* [previous][next][first][last][top][bottom][index][help] */
 379 {
 380         int err;
 381         char * devname;
 382         struct device * dev = NULL;
 383         unsigned long flags, daddr, mask, gw;
 384         unsigned char metric;
 385 
 386         /*
 387          *      If a device is specified find it.
 388          */
 389          
 390         if ((devname = r->rt_dev) != NULL) 
 391         {
 392                 err = getname(devname, &devname);
 393                 if (err)
 394                         return err;
 395                 dev = dev_get(devname);
 396                 putname(devname);
 397                 if (!dev)
 398                         return -EINVAL;
 399         }
 400         
 401         /*
 402          *      If the device isn't INET, don't allow it
 403          */
 404 
 405         if (r->rt_dst.sa_family != AF_INET)
 406                 return -EAFNOSUPPORT;
 407 
 408         /*
 409          *      Make local copies of the important bits
 410          *      We decrement the metric by one for BSD compatibility.
 411          */
 412          
 413         flags = r->rt_flags;
 414         daddr = ((struct sockaddr_in *) &r->rt_dst)->sin_addr.s_addr;
 415         mask = ((struct sockaddr_in *) &r->rt_genmask)->sin_addr.s_addr;
 416         gw = ((struct sockaddr_in *) &r->rt_gateway)->sin_addr.s_addr;
 417         metric = r->rt_metric > 0 ? r->rt_metric - 1 : 0;
 418 
 419         /*
 420          *      BSD emulation: Permits route add someroute gw one-of-my-addresses
 421          *      to indicate which iface. Not as clean as the nice Linux dev technique
 422          *      but people keep using it... 
 423          */
 424          
 425         if (!dev && (flags & RTF_GATEWAY)) 
 426         {
 427                 struct device *dev2;
 428                 for (dev2 = dev_base ; dev2 != NULL ; dev2 = dev2->next) 
 429                 {
 430                         if ((dev2->flags & IFF_UP) && dev2->pa_addr == gw) 
 431                         {
 432                                 flags &= ~RTF_GATEWAY;
 433                                 dev = dev2;
 434                                 break;
 435                         }
 436                 }
 437         }
 438 
 439         /*
 440          *      Ignore faulty masks
 441          */
 442          
 443         if (bad_mask(mask, daddr))
 444                 mask = 0;
 445 
 446         /*
 447          *      Set the mask to nothing for host routes.
 448          */
 449          
 450         if (flags & RTF_HOST)
 451                 mask = 0xffffffff;
 452         else if (mask && r->rt_genmask.sa_family != AF_INET)
 453                 return -EAFNOSUPPORT;
 454 
 455         /*
 456          *      You can only gateway IP via IP..
 457          */
 458          
 459         if (flags & RTF_GATEWAY) 
 460         {
 461                 if (r->rt_gateway.sa_family != AF_INET)
 462                         return -EAFNOSUPPORT;
 463                 if (!dev)
 464                         dev = get_gw_dev(gw);
 465         } 
 466         else if (!dev)
 467                 dev = ip_dev_check(daddr);
 468 
 469         /*
 470          *      Unknown device.
 471          */
 472          
 473         if (dev == NULL)
 474                 return -ENETUNREACH;
 475 
 476         /*
 477          *      Add the route
 478          */
 479          
 480         ip_rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window, r->rt_irtt, metric);
 481         return 0;
 482 }
 483 
 484 
 485 /*
 486  *      Remove a route, as requested by the user.
 487  */
 488 
 489 static int rt_kill(struct rtentry *r)
     /* [previous][next][first][last][top][bottom][index][help] */
 490 {
 491         struct sockaddr_in *trg;
 492         char *devname;
 493         int err;
 494 
 495         trg = (struct sockaddr_in *) &r->rt_dst;
 496         if ((devname = r->rt_dev) != NULL) 
 497         {
 498                 err = getname(devname, &devname);
 499                 if (err)
 500                         return err;
 501         }
 502         rt_del(trg->sin_addr.s_addr, devname);
 503         if ( devname != NULL )
 504                 putname(devname);
 505         return 0;
 506 }
 507 
 508 
 509 /* 
 510  *      Called from the PROCfs module. This outputs /proc/net/route.
 511  */
 512  
 513 int rt_get_info(char *buffer, char **start, off_t offset, int length)
     /* [previous][next][first][last][top][bottom][index][help] */
 514 {
 515         struct rtable *r;
 516         int len=0;
 517         off_t pos=0;
 518         off_t begin=0;
 519         int size;
 520 
 521         len += sprintf(buffer,
 522                  "Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\n");
 523         pos=len;
 524   
 525         /*
 526          *      This isn't quite right -- r->rt_dst is a struct! 
 527          */
 528          
 529         for (r = rt_base; r != NULL; r = r->rt_next) 
 530         {
 531                 size = sprintf(buffer+len, "%s\t%08lX\t%08lX\t%02X\t%d\t%lu\t%d\t%08lX\t%d\t%lu\t%u\n",
 532                         r->rt_dev->name, r->rt_dst, r->rt_gateway,
 533                         r->rt_flags, r->rt_refcnt, r->rt_use, r->rt_metric,
 534                         r->rt_mask, (int)r->rt_mss, r->rt_window, (int)r->rt_irtt);
 535                 len+=size;
 536                 pos+=size;
 537                 if(pos<offset)
 538                 {
 539                         len=0;
 540                         begin=pos;
 541                 }
 542                 if(pos>offset+length)
 543                         break;
 544         }
 545         
 546         *start=buffer+(offset-begin);
 547         len-=(offset-begin);
 548         if(len>length)
 549                 len=length;
 550         return len;
 551 }
 552 
 553 /*
 554  *      This is hackish, but results in better code. Use "-S" to see why.
 555  */
 556  
 557 #define early_out ({ goto no_route; 1; })
 558 
 559 /*
 560  *      Route a packet. This needs to be fairly quick. Florian & Co. 
 561  *      suggested a unified ARP and IP routing cache. Done right its
 562  *      probably a brilliant idea. I'd actually suggest a unified
 563  *      ARP/IP routing/Socket pointer cache. Volunteers welcome
 564  */
 565  
 566 struct rtable * ip_rt_route(unsigned long daddr, struct options *opt, unsigned long *src_addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 567 {
 568         struct rtable *rt;
 569 
 570         for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next) 
 571         {
 572                 if (!((rt->rt_dst ^ daddr) & rt->rt_mask))
 573                         break;
 574                 /*
 575                  *      broadcast addresses can be special cases.. 
 576                  */
 577                 if (rt->rt_flags & RTF_GATEWAY)
 578                         continue;                
 579                 if ((rt->rt_dev->flags & IFF_BROADCAST) &&
 580                     (rt->rt_dev->pa_brdaddr == daddr))
 581                         break;
 582         }
 583         
 584         if(rt->rt_flags&RTF_REJECT)
 585                 return NULL;
 586         
 587         if(src_addr!=NULL)
 588                 *src_addr= rt->rt_dev->pa_addr;
 589                 
 590         if (daddr == rt->rt_dev->pa_addr) {
 591                 if ((rt = rt_loopback) == NULL)
 592                         goto no_route;
 593         }
 594         rt->rt_use++;
 595         return rt;
 596 no_route:
 597         return NULL;
 598 }
 599 
 600 struct rtable * ip_rt_local(unsigned long daddr, struct options *opt, unsigned long *src_addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 601 {
 602         struct rtable *rt;
 603 
 604         for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next) 
 605         {
 606                 /*
 607                  *      No routed addressing.
 608                  */
 609                 if (rt->rt_flags&RTF_GATEWAY)
 610                         continue;
 611                         
 612                 if (!((rt->rt_dst ^ daddr) & rt->rt_mask))
 613                         break;
 614                 /*
 615                  *      broadcast addresses can be special cases.. 
 616                  */
 617                  
 618                 if ((rt->rt_dev->flags & IFF_BROADCAST) &&
 619                      rt->rt_dev->pa_brdaddr == daddr)
 620                         break;
 621         }
 622         
 623         if(src_addr!=NULL)
 624                 *src_addr= rt->rt_dev->pa_addr;
 625                 
 626         if (daddr == rt->rt_dev->pa_addr) {
 627                 if ((rt = rt_loopback) == NULL)
 628                         goto no_route;
 629         }
 630         rt->rt_use++;
 631         return rt;
 632 no_route:
 633         return NULL;
 634 }
 635 
 636 /*
 637  *      Handle IP routing ioctl calls. These are used to manipulate the routing tables
 638  */
 639  
 640 int ip_rt_ioctl(unsigned int cmd, void *arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 641 {
 642         int err;
 643         struct rtentry rt;
 644 
 645         switch(cmd) 
 646         {
 647                 case SIOCADDRT:         /* Add a route */
 648                 case SIOCDELRT:         /* Delete a route */
 649                         if (!suser())
 650                                 return -EPERM;
 651                         err=verify_area(VERIFY_READ, arg, sizeof(struct rtentry));
 652                         if (err)
 653                                 return err;
 654                         memcpy_fromfs(&rt, arg, sizeof(struct rtentry));
 655                         return (cmd == SIOCDELRT) ? rt_kill(&rt) : rt_new(&rt);
 656         }
 657 
 658         return -EINVAL;
 659 }

/* [previous][next][first][last][top][bottom][index][help] */