root/net/ipv4/route.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rt_del
  2. ip_rt_flush
  3. default_mask
  4. guess_mask
  5. get_gw_dev
  6. ip_rt_add
  7. bad_mask
  8. rt_new
  9. rt_kill
  10. rt_get_info
  11. ip_rt_route
  12. ip_rt_local
  13. ip_rt_ioctl

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              ROUTE - implementation of the IP router.
   7  *
   8  * Version:     @(#)route.c     1.0.14  05/31/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  13  *              Linus Torvalds, <Linus.Torvalds@helsinki.fi>
  14  *
  15  * Fixes:
  16  *              Alan Cox        :       Verify area fixes.
  17  *              Alan Cox        :       cli() protects routing changes
  18  *              Rui Oliveira    :       ICMP routing table updates
  19  *              (rco@di.uminho.pt)      Routing table insertion and update
  20  *              Linus Torvalds  :       Rewrote bits to be sensible
  21  *              Alan Cox        :       Added BSD route gw semantics
  22  *              Alan Cox        :       Super /proc >4K 
  23  *              Alan Cox        :       MTU in route table
  24  *              Alan Cox        :       MSS actually. Also added the window
  25  *                                      clamper.
  26  *              Sam Lantinga    :       Fixed route matching in rt_del()
  27  *              Alan Cox        :       Routing cache support.
  28  *              Alan Cox        :       Removed compatibility cruft.
  29  *              Alan Cox        :       RTF_REJECT support.
  30  *              Alan Cox        :       TCP irtt support.
  31  *
  32  *              This program is free software; you can redistribute it and/or
  33  *              modify it under the terms of the GNU General Public License
  34  *              as published by the Free Software Foundation; either version
  35  *              2 of the License, or (at your option) any later version.
  36  */
  37 
  38 #include <asm/segment.h>
  39 #include <asm/system.h>
  40 #include <linux/types.h>
  41 #include <linux/kernel.h>
  42 #include <linux/sched.h>
  43 #include <linux/mm.h>
  44 #include <linux/string.h>
  45 #include <linux/socket.h>
  46 #include <linux/sockios.h>
  47 #include <linux/errno.h>
  48 #include <linux/in.h>
  49 #include <linux/inet.h>
  50 #include <linux/netdevice.h>
  51 #include <net/ip.h>
  52 #include <net/protocol.h>
  53 #include <net/route.h>
  54 #include <net/tcp.h>
  55 #include <linux/skbuff.h>
  56 #include <net/sock.h>
  57 #include <net/icmp.h>
  58 
  59 /*
  60  *      The routing table list
  61  */
  62 
  63 static struct rtable *rt_base = NULL;
  64 unsigned long rt_stamp = 1;             /* Routing table version stamp for caches ( 0 is 'unset' ) */
  65 
  66 /*
  67  *      Pointer to the loopback route
  68  */
  69  
  70 static struct rtable *rt_loopback = NULL;
  71 
  72 /*
  73  *      Remove a routing table entry.
  74  */
  75 
  76 static void rt_del(unsigned long dst, char *devname)
     /* [previous][next][first][last][top][bottom][index][help] */
  77 {
  78         struct rtable *r, **rp;
  79         unsigned long flags;
  80 
  81         rp = &rt_base;
  82         
  83         /*
  84          *      This must be done with interrupts off because we could take
  85          *      an ICMP_REDIRECT.
  86          */
  87          
  88         save_flags(flags);
  89         cli();
  90         while((r = *rp) != NULL) 
  91         {
  92                 /* Make sure both the destination and the device match */
  93                 if ( r->rt_dst != dst ||
  94                 (devname != NULL && strcmp((r->rt_dev)->name,devname) != 0) )
  95                 {
  96                         rp = &r->rt_next;
  97                         continue;
  98                 }
  99                 *rp = r->rt_next;
 100                 
 101                 /*
 102                  *      If we delete the loopback route update its pointer.
 103                  */
 104                  
 105                 if (rt_loopback == r)
 106                         rt_loopback = NULL;
 107                 kfree_s(r, sizeof(struct rtable));
 108         } 
 109         rt_stamp++;             /* New table revision */
 110         
 111         restore_flags(flags);
 112 }
 113 
 114 
 115 /*
 116  *      Remove all routing table entries for a device. This is called when
 117  *      a device is downed.
 118  */
 119  
 120 void ip_rt_flush(struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 121 {
 122         struct rtable *r;
 123         struct rtable **rp;
 124         unsigned long flags;
 125 
 126         rp = &rt_base;
 127         save_flags(flags);
 128         cli();
 129         while ((r = *rp) != NULL) {
 130                 if (r->rt_dev != dev) {
 131                         rp = &r->rt_next;
 132                         continue;
 133                 }
 134                 *rp = r->rt_next;
 135                 if (rt_loopback == r)
 136                         rt_loopback = NULL;
 137                 kfree_s(r, sizeof(struct rtable));
 138         } 
 139         rt_stamp++;             /* New table revision */
 140         restore_flags(flags);
 141 }
 142 
 143 /*
 144  *      Used by 'rt_add()' when we can't get the netmask any other way..
 145  *
 146  *      If the lower byte or two are zero, we guess the mask based on the
 147  *      number of zero 8-bit net numbers, otherwise we use the "default"
 148  *      masks judging by the destination address and our device netmask.
 149  */
 150  
 151 static inline unsigned long default_mask(unsigned long dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 152 {
 153         dst = ntohl(dst);
 154         if (IN_CLASSA(dst))
 155                 return htonl(IN_CLASSA_NET);
 156         if (IN_CLASSB(dst))
 157                 return htonl(IN_CLASSB_NET);
 158         return htonl(IN_CLASSC_NET);
 159 }
 160 
 161 
 162 /*
 163  *      If no mask is specified then generate a default entry.
 164  */
 165 
 166 static unsigned long guess_mask(unsigned long dst, struct device * dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 167 {
 168         unsigned long mask;
 169 
 170         if (!dst)
 171                 return 0;
 172         mask = default_mask(dst);
 173         if ((dst ^ dev->pa_addr) & mask)
 174                 return mask;
 175         return dev->pa_mask;
 176 }
 177 
 178 
 179 /*
 180  *      Find the route entry through which our gateway will be reached
 181  */
 182  
 183 static inline struct device * get_gw_dev(unsigned long gw)
     /* [previous][next][first][last][top][bottom][index][help] */
 184 {
 185         struct rtable * rt;
 186 
 187         for (rt = rt_base ; ; rt = rt->rt_next) 
 188         {
 189                 if (!rt)
 190                         return NULL;
 191                 if ((gw ^ rt->rt_dst) & rt->rt_mask)
 192                         continue;
 193                 /* 
 194                  *      Gateways behind gateways are a no-no 
 195                  */
 196                  
 197                 if (rt->rt_flags & RTF_GATEWAY)
 198                         return NULL;
 199                 return rt->rt_dev;
 200         }
 201 }
 202 
 203 /*
 204  *      Rewrote rt_add(), as the old one was weird - Linus
 205  *
 206  *      This routine is used to update the IP routing table, either
 207  *      from the kernel (ICMP_REDIRECT) or via an ioctl call issued
 208  *      by the superuser.
 209  */
 210  
 211 void ip_rt_add(short flags, unsigned long dst, unsigned long mask,
     /* [previous][next][first][last][top][bottom][index][help] */
 212         unsigned long gw, struct device *dev, unsigned short mtu, unsigned long window, unsigned short irtt)
 213 {
 214         struct rtable *r, *rt;
 215         struct rtable **rp;
 216         unsigned long cpuflags;
 217 
 218         /*
 219          *      A host is a unique machine and has no network bits.
 220          */
 221          
 222         if (flags & RTF_HOST) 
 223         {
 224                 mask = 0xffffffff;
 225         } 
 226         
 227         /*
 228          *      Calculate the network mask
 229          */
 230          
 231         else if (!mask) 
 232         {
 233                 if (!((dst ^ dev->pa_addr) & dev->pa_mask)) 
 234                 {
 235                         mask = dev->pa_mask;
 236                         flags &= ~RTF_GATEWAY;
 237                         if (flags & RTF_DYNAMIC) 
 238                         {
 239                                 /*printk("Dynamic route to my own net rejected\n");*/
 240                                 return;
 241                         }
 242                 } 
 243                 else
 244                         mask = guess_mask(dst, dev);
 245                 dst &= mask;
 246         }
 247         
 248         /*
 249          *      A gateway must be reachable and not a local address
 250          */
 251          
 252         if (gw == dev->pa_addr)
 253                 flags &= ~RTF_GATEWAY;
 254                 
 255         if (flags & RTF_GATEWAY) 
 256         {
 257                 /*
 258                  *      Don't try to add a gateway we can't reach.. 
 259                  */
 260                  
 261                 if (dev != get_gw_dev(gw))
 262                         return;
 263                         
 264                 flags |= RTF_GATEWAY;
 265         } 
 266         else
 267                 gw = 0;
 268                 
 269         /*
 270          *      Allocate an entry and fill it in.
 271          */
 272          
 273         rt = (struct rtable *) kmalloc(sizeof(struct rtable), GFP_ATOMIC);
 274         if (rt == NULL) 
 275         {
 276                 return;
 277         }
 278         memset(rt, 0, sizeof(struct rtable));
 279         rt->rt_flags = flags | RTF_UP;
 280         rt->rt_dst = dst;
 281         rt->rt_dev = dev;
 282         rt->rt_gateway = gw;
 283         rt->rt_mask = mask;
 284         rt->rt_mss = dev->mtu - HEADER_SIZE;
 285         rt->rt_window = 0;      /* Default is no clamping */
 286 
 287         /* Are the MSS/Window valid ? */
 288 
 289         if(rt->rt_flags & RTF_MSS)
 290                 rt->rt_mss = mtu;
 291                 
 292         if(rt->rt_flags & RTF_WINDOW)
 293                 rt->rt_window = window;
 294         if(rt->rt_flags & RTF_IRTT)
 295                 rt->rt_irtt = irtt;
 296 
 297         /*
 298          *      What we have to do is loop though this until we have
 299          *      found the first address which has a higher generality than
 300          *      the one in rt.  Then we can put rt in right before it.
 301          *      The interrupts must be off for this process.
 302          */
 303 
 304         save_flags(cpuflags);
 305         cli();
 306 
 307         /*
 308          *      Remove old route if we are getting a duplicate. 
 309          */
 310          
 311         rp = &rt_base;
 312         while ((r = *rp) != NULL) 
 313         {
 314                 if (r->rt_dst != dst || 
 315                     r->rt_mask != mask) 
 316                 {
 317                         rp = &r->rt_next;
 318                         continue;
 319                 }
 320                 *rp = r->rt_next;
 321                 if (rt_loopback == r)
 322                         rt_loopback = NULL;
 323                 kfree_s(r, sizeof(struct rtable));
 324         }
 325         
 326         /*
 327          *      Add the new route 
 328          */
 329          
 330         rp = &rt_base;
 331         while ((r = *rp) != NULL) {
 332                 if ((r->rt_mask & mask) != mask)
 333                         break;
 334                 rp = &r->rt_next;
 335         }
 336         rt->rt_next = r;
 337         *rp = rt;
 338         
 339         /*
 340          *      Update the loopback route
 341          */
 342          
 343         if ((rt->rt_dev->flags & IFF_LOOPBACK) && !rt_loopback)
 344                 rt_loopback = rt;
 345 
 346         rt_stamp++;             /* New table revision */
 347                 
 348         /*
 349          *      Restore the interrupts and return
 350          */
 351          
 352         restore_flags(cpuflags);
 353         return;
 354 }
 355 
 356 
 357 /*
 358  *      Check if a mask is acceptable.
 359  */
 360  
 361 static inline int bad_mask(unsigned long mask, unsigned long addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 362 {
 363         if (addr & (mask = ~mask))
 364                 return 1;
 365         mask = ntohl(mask);
 366         if (mask & (mask+1))
 367                 return 1;
 368         return 0;
 369 }
 370 
 371 /*
 372  *      Process a route add request from the user
 373  */
 374  
 375 static int rt_new(struct rtentry *r)
     /* [previous][next][first][last][top][bottom][index][help] */
 376 {
 377         int err;
 378         char * devname;
 379         struct device * dev = NULL;
 380         unsigned long flags, daddr, mask, gw;
 381 
 382         /*
 383          *      If a device is specified find it.
 384          */
 385          
 386         if ((devname = r->rt_dev) != NULL) 
 387         {
 388                 err = getname(devname, &devname);
 389                 if (err)
 390                         return err;
 391                 dev = dev_get(devname);
 392                 putname(devname);
 393                 if (!dev)
 394                         return -EINVAL;
 395         }
 396         
 397         /*
 398          *      If the device isn't INET, don't allow it
 399          */
 400 
 401         if (r->rt_dst.sa_family != AF_INET)
 402                 return -EAFNOSUPPORT;
 403 
 404         /*
 405          *      Make local copies of the important bits
 406          */
 407          
 408         flags = r->rt_flags;
 409         daddr = ((struct sockaddr_in *) &r->rt_dst)->sin_addr.s_addr;
 410         mask = ((struct sockaddr_in *) &r->rt_genmask)->sin_addr.s_addr;
 411         gw = ((struct sockaddr_in *) &r->rt_gateway)->sin_addr.s_addr;
 412 
 413 
 414         /*
 415          *      BSD emulation: Permits route add someroute gw one-of-my-addresses
 416          *      to indicate which iface. Not as clean as the nice Linux dev technique
 417          *      but people keep using it... 
 418          */
 419          
 420         if (!dev && (flags & RTF_GATEWAY)) 
 421         {
 422                 struct device *dev2;
 423                 for (dev2 = dev_base ; dev2 != NULL ; dev2 = dev2->next) 
 424                 {
 425                         if ((dev2->flags & IFF_UP) && dev2->pa_addr == gw) 
 426                         {
 427                                 flags &= ~RTF_GATEWAY;
 428                                 dev = dev2;
 429                                 break;
 430                         }
 431                 }
 432         }
 433 
 434         /*
 435          *      Ignore faulty masks
 436          */
 437          
 438         if (bad_mask(mask, daddr))
 439                 mask = 0;
 440 
 441         /*
 442          *      Set the mask to nothing for host routes.
 443          */
 444          
 445         if (flags & RTF_HOST)
 446                 mask = 0xffffffff;
 447         else if (mask && r->rt_genmask.sa_family != AF_INET)
 448                 return -EAFNOSUPPORT;
 449 
 450         /*
 451          *      You can only gateway IP via IP..
 452          */
 453          
 454         if (flags & RTF_GATEWAY) 
 455         {
 456                 if (r->rt_gateway.sa_family != AF_INET)
 457                         return -EAFNOSUPPORT;
 458                 if (!dev)
 459                         dev = get_gw_dev(gw);
 460         } 
 461         else if (!dev)
 462                 dev = ip_dev_check(daddr);
 463 
 464         /*
 465          *      Unknown device.
 466          */
 467          
 468         if (dev == NULL)
 469                 return -ENETUNREACH;
 470 
 471         /*
 472          *      Add the route
 473          */
 474          
 475         ip_rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window, r->rt_irtt);
 476         return 0;
 477 }
 478 
 479 
 480 /*
 481  *      Remove a route, as requested by the user.
 482  */
 483 
 484 static int rt_kill(struct rtentry *r)
     /* [previous][next][first][last][top][bottom][index][help] */
 485 {
 486         struct sockaddr_in *trg;
 487         char *devname;
 488         int err;
 489 
 490         trg = (struct sockaddr_in *) &r->rt_dst;
 491         if ((devname = r->rt_dev) != NULL) 
 492         {
 493                 err = getname(devname, &devname);
 494                 if (err)
 495                         return err;
 496         }
 497         rt_del(trg->sin_addr.s_addr, devname);
 498         if ( devname != NULL )
 499                 putname(devname);
 500         return 0;
 501 }
 502 
 503 
 504 /* 
 505  *      Called from the PROCfs module. This outputs /proc/net/route.
 506  */
 507  
 508 int rt_get_info(char *buffer, char **start, off_t offset, int length)
     /* [previous][next][first][last][top][bottom][index][help] */
 509 {
 510         struct rtable *r;
 511         int len=0;
 512         off_t pos=0;
 513         off_t begin=0;
 514         int size;
 515 
 516         len += sprintf(buffer,
 517                  "Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\n");
 518         pos=len;
 519   
 520         /*
 521          *      This isn't quite right -- r->rt_dst is a struct! 
 522          */
 523          
 524         for (r = rt_base; r != NULL; r = r->rt_next) 
 525         {
 526                 size = sprintf(buffer+len, "%s\t%08lX\t%08lX\t%02X\t%d\t%lu\t%d\t%08lX\t%d\t%lu\t%u\n",
 527                         r->rt_dev->name, r->rt_dst, r->rt_gateway,
 528                         r->rt_flags, r->rt_refcnt, r->rt_use, r->rt_metric,
 529                         r->rt_mask, (int)r->rt_mss, r->rt_window, (int)r->rt_irtt);
 530                 len+=size;
 531                 pos+=size;
 532                 if(pos<offset)
 533                 {
 534                         len=0;
 535                         begin=pos;
 536                 }
 537                 if(pos>offset+length)
 538                         break;
 539         }
 540         
 541         *start=buffer+(offset-begin);
 542         len-=(offset-begin);
 543         if(len>length)
 544                 len=length;
 545         return len;
 546 }
 547 
 548 /*
 549  *      This is hackish, but results in better code. Use "-S" to see why.
 550  */
 551  
 552 #define early_out ({ goto no_route; 1; })
 553 
 554 /*
 555  *      Route a packet. This needs to be fairly quick. Florian & Co. 
 556  *      suggested a unified ARP and IP routing cache. Done right its
 557  *      probably a brilliant idea. I'd actually suggest a unified
 558  *      ARP/IP routing/Socket pointer cache. Volunteers welcome
 559  */
 560  
 561 struct rtable * ip_rt_route(unsigned long daddr, struct options *opt, unsigned long *src_addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 562 {
 563         struct rtable *rt;
 564 
 565         for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next) 
 566         {
 567                 if (!((rt->rt_dst ^ daddr) & rt->rt_mask))
 568                         break;
 569                 /*
 570                  *      broadcast addresses can be special cases.. 
 571                  */
 572                 if (rt->rt_flags & RTF_GATEWAY)
 573                         continue;                
 574                 if ((rt->rt_dev->flags & IFF_BROADCAST) &&
 575                     (rt->rt_dev->pa_brdaddr == daddr))
 576                         break;
 577         }
 578         
 579         if(rt->rt_flags&RTF_REJECT)
 580                 return NULL;
 581         
 582         if(src_addr!=NULL)
 583                 *src_addr= rt->rt_dev->pa_addr;
 584                 
 585         if (daddr == rt->rt_dev->pa_addr) {
 586                 if ((rt = rt_loopback) == NULL)
 587                         goto no_route;
 588         }
 589         rt->rt_use++;
 590         return rt;
 591 no_route:
 592         return NULL;
 593 }
 594 
 595 struct rtable * ip_rt_local(unsigned long daddr, struct options *opt, unsigned long *src_addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 596 {
 597         struct rtable *rt;
 598 
 599         for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next) 
 600         {
 601                 /*
 602                  *      No routed addressing.
 603                  */
 604                 if (rt->rt_flags&RTF_GATEWAY)
 605                         continue;
 606                         
 607                 if (!((rt->rt_dst ^ daddr) & rt->rt_mask))
 608                         break;
 609                 /*
 610                  *      broadcast addresses can be special cases.. 
 611                  */
 612                  
 613                 if ((rt->rt_dev->flags & IFF_BROADCAST) &&
 614                      rt->rt_dev->pa_brdaddr == daddr)
 615                         break;
 616         }
 617         
 618         if(src_addr!=NULL)
 619                 *src_addr= rt->rt_dev->pa_addr;
 620                 
 621         if (daddr == rt->rt_dev->pa_addr) {
 622                 if ((rt = rt_loopback) == NULL)
 623                         goto no_route;
 624         }
 625         rt->rt_use++;
 626         return rt;
 627 no_route:
 628         return NULL;
 629 }
 630 
 631 /*
 632  *      Handle IP routing ioctl calls. These are used to manipulate the routing tables
 633  */
 634  
 635 int ip_rt_ioctl(unsigned int cmd, void *arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 636 {
 637         int err;
 638         struct rtentry rt;
 639 
 640         switch(cmd) 
 641         {
 642                 case SIOCADDRT:         /* Add a route */
 643                 case SIOCDELRT:         /* Delete a route */
 644                         if (!suser())
 645                                 return -EPERM;
 646                         err=verify_area(VERIFY_READ, arg, sizeof(struct rtentry));
 647                         if (err)
 648                                 return err;
 649                         memcpy_fromfs(&rt, arg, sizeof(struct rtentry));
 650                         return (cmd == SIOCDELRT) ? rt_kill(&rt) : rt_new(&rt);
 651         }
 652 
 653         return -EINVAL;
 654 }

/* [previous][next][first][last][top][bottom][index][help] */