root/net/ipv4/route.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rt_logmask
  2. rt_mask
  3. fz_hash_code
  4. fib_free_node
  5. fib_lookup_gateway
  6. fib_lookup_local
  7. fib_lookup
  8. get_gw_dev
  9. default_mask
  10. guess_mask
  11. bad_mask
  12. fib_del_list
  13. fib_del_1
  14. fib_create_info
  15. fib_add_1
  16. rt_flush_list
  17. fib_flush_1
  18. rt_get_info
  19. rt_cache_get_info
  20. rt_free
  21. rt_kick_free_queue
  22. ip_rt_run_bh
  23. ip_rt_check_expire
  24. rt_redirect_1
  25. rt_cache_flush
  26. rt_garbage_collect_1
  27. rt_req_enqueue
  28. rt_req_dequeue
  29. rt_kick_backlog
  30. rt_del
  31. rt_add
  32. ip_rt_flush
  33. ip_rt_redirect
  34. rt_garbage_collect
  35. rt_cache_add
  36. ip_rt_slow_route
  37. ip_rt_put
  38. ip_rt_route
  39. ip_rt_new
  40. rt_kill
  41. ip_rt_ioctl
  42. ip_rt_advice

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              ROUTE - implementation of the IP router.
   7  *
   8  * Version:     @(#)route.c     1.0.14  05/31/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  13  *              Linus Torvalds, <Linus.Torvalds@helsinki.fi>
  14  *
  15  * Fixes:
  16  *              Alan Cox        :       Verify area fixes.
  17  *              Alan Cox        :       cli() protects routing changes
  18  *              Rui Oliveira    :       ICMP routing table updates
  19  *              (rco@di.uminho.pt)      Routing table insertion and update
  20  *              Linus Torvalds  :       Rewrote bits to be sensible
  21  *              Alan Cox        :       Added BSD route gw semantics
  22  *              Alan Cox        :       Super /proc >4K 
  23  *              Alan Cox        :       MTU in route table
  24  *              Alan Cox        :       MSS actually. Also added the window
  25  *                                      clamper.
  26  *              Sam Lantinga    :       Fixed route matching in rt_del()
  27  *              Alan Cox        :       Routing cache support.
  28  *              Alan Cox        :       Removed compatibility cruft.
  29  *              Alan Cox        :       RTF_REJECT support.
  30  *              Alan Cox        :       TCP irtt support.
  31  *              Jonathan Naylor :       Added Metric support.
  32  *      Miquel van Smoorenburg  :       BSD API fixes.
  33  *      Miquel van Smoorenburg  :       Metrics.
  34  *              Alan Cox        :       Use __u32 properly
  35  *              Alan Cox        :       Aligned routing errors more closely with BSD
  36  *                                      our system is still very different.
  37  *              Alan Cox        :       Faster /proc handling
  38  *      Alexey Kuznetsov        :       Massive rework to support tree based routing,
  39  *                                      routing caches and better behaviour.
  40  *
  41  *              This program is free software; you can redistribute it and/or
  42  *              modify it under the terms of the GNU General Public License
  43  *              as published by the Free Software Foundation; either version
  44  *              2 of the License, or (at your option) any later version.
  45  */
  46 
  47 #include <linux/config.h>
  48 #include <asm/segment.h>
  49 #include <asm/system.h>
  50 #include <asm/bitops.h>
  51 #include <linux/types.h>
  52 #include <linux/kernel.h>
  53 #include <linux/sched.h>
  54 #include <linux/mm.h>
  55 #include <linux/string.h>
  56 #include <linux/socket.h>
  57 #include <linux/sockios.h>
  58 #include <linux/errno.h>
  59 #include <linux/in.h>
  60 #include <linux/inet.h>
  61 #include <linux/netdevice.h>
  62 #include <net/ip.h>
  63 #include <net/protocol.h>
  64 #include <net/route.h>
  65 #include <net/tcp.h>
  66 #include <linux/skbuff.h>
  67 #include <net/sock.h>
  68 #include <net/icmp.h>
  69 #include <net/netlink.h>
  70 
  71 /*
  72  * Forwarding Information Base definitions.
  73  */
  74 
  75 struct fib_node
  76 {
  77         struct fib_node         *fib_next;
  78         __u32                   fib_dst;
  79         unsigned long           fib_use;
  80         struct fib_info         *fib_info;
  81         short                   fib_metric;
  82         unsigned char           fib_tos;
  83 };
  84 
  85 /*
  86  * This structure contains data shared by many of routes.
  87  */     
  88 
  89 struct fib_info
  90 {
  91         struct fib_info         *fib_next;
  92         struct fib_info         *fib_prev;
  93         __u32                   fib_gateway;
  94         struct device           *fib_dev;
  95         int                     fib_refcnt;
  96         unsigned long           fib_window;
  97         unsigned short          fib_flags;
  98         unsigned short          fib_mtu;
  99         unsigned short          fib_irtt;
 100 };
 101 
 102 struct fib_zone
 103 {
 104         struct fib_zone *fz_next;
 105         struct fib_node **fz_hash_table;
 106         struct fib_node *fz_list;
 107         int             fz_nent;
 108         int             fz_logmask;
 109         __u32           fz_mask;
 110 };
 111 
 112 static struct fib_zone  *fib_zones[33];
 113 static struct fib_zone  *fib_zone_list;
 114 static struct fib_node  *fib_loopback = NULL;
 115 static struct fib_info  *fib_info_list;
 116 
 117 /*
 118  * Backlogging.
 119  */
 120 
 121 #define RT_BH_REDIRECT          0
 122 #define RT_BH_GARBAGE_COLLECT   1
 123 #define RT_BH_FREE              2
 124 
 125 struct rt_req
 126 {
 127         struct rt_req * rtr_next;
 128         struct device *dev;
 129         __u32 dst;
 130         __u32 gw;
 131         unsigned char tos;
 132 };
 133 
 134 int                     ip_rt_lock;
 135 unsigned                ip_rt_bh_mask;
 136 static struct rt_req    *rt_backlog;
 137 
 138 /*
 139  * Route cache.
 140  */
 141 
 142 struct rtable           *ip_rt_hash_table[RT_HASH_DIVISOR];
 143 static int              rt_cache_size;
 144 static struct rtable    *rt_free_queue;
 145 struct wait_queue       *rt_wait;
 146 
 147 static void rt_kick_backlog(void);
 148 static void rt_cache_add(unsigned hash, struct rtable * rth);
 149 static void rt_cache_flush(void);
 150 static void rt_garbage_collect_1(void);
 151 
 152 /* 
 153  * Evaluate mask length.
 154  */
 155 
 156 static __inline__ int rt_logmask(__u32 mask)
     /* [previous][next][first][last][top][bottom][index][help] */
 157 {
 158         if (!(mask = ntohl(mask)))
 159                 return 32;
 160         return ffz(~mask);
 161 }
 162 
 163 /* 
 164  * Create mask from length.
 165  */
 166 
 167 static __inline__ __u32 rt_mask(int logmask)
     /* [previous][next][first][last][top][bottom][index][help] */
 168 {
 169         if (logmask >= 32)
 170                 return 0;
 171         return htonl(~((1<<logmask)-1));
 172 }
 173 
 174 static __inline__ unsigned fz_hash_code(__u32 dst, int logmask)
     /* [previous][next][first][last][top][bottom][index][help] */
 175 {
 176         return ip_rt_hash_code(ntohl(dst)>>logmask);
 177 }
 178 
 179 /*
 180  * Free FIB node.
 181  */
 182 
 183 static void fib_free_node(struct fib_node * f)
     /* [previous][next][first][last][top][bottom][index][help] */
 184 {
 185         struct fib_info * fi = f->fib_info;
 186         if (!--fi->fib_refcnt)
 187         {
 188 #if RT_CACHE_DEBUG >= 2
 189                 printk("fib_free_node: fi %08x/%s is free\n", fi->fib_gateway, fi->fib_dev->name);
 190 #endif
 191                 if (fi->fib_next)
 192                         fi->fib_next->fib_prev = fi->fib_prev;
 193                 if (fi->fib_prev)
 194                         fi->fib_prev->fib_next = fi->fib_next;
 195                 if (fi == fib_info_list)
 196                         fib_info_list = fi->fib_next;
 197         }
 198         kfree_s(f, sizeof(struct fib_node));
 199 }
 200 
 201 /*
 202  * Find gateway route by address.
 203  */
 204 
 205 static struct fib_node * fib_lookup_gateway(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 206 {
 207         struct fib_zone * fz;
 208         struct fib_node * f;
 209 
 210         for (fz = fib_zone_list; fz; fz = fz->fz_next) 
 211         {
 212                 if (fz->fz_hash_table)
 213                         f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 214                 else
 215                         f = fz->fz_list;
 216                 
 217                 for ( ; f; f = f->fib_next)
 218                 {
 219                         if ((dst ^ f->fib_dst) & fz->fz_mask)
 220                                 continue;
 221                         if (f->fib_info->fib_flags & RTF_GATEWAY)
 222                                 return NULL;
 223                         return f;
 224                 }
 225         }
 226         return NULL;
 227 }
 228 
 229 /*
 230  * Find local route by address.
 231  * FIXME: I use "longest match" principle. If destination
 232  *        has some non-local route, I'll not search shorter matches.
 233  *        It's possible, I'm wrong, but I wanted to prevent following
 234  *        situation:
 235  *      route add 193.233.7.128 netmask 255.255.255.192 gw xxxxxx
 236  *      route add 193.233.7.0   netmask 255.255.255.0 eth1
 237  *        (Two ethernets connected by serial line, one is small and other is large)
 238  *        Host 193.233.7.129 is locally unreachable,
 239  *        but old (<=1.3.37) code will send packets destined for it to eth1.
 240  *
 241  */
 242 
 243 static struct fib_node * fib_lookup_local(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 244 {
 245         struct fib_zone * fz;
 246         struct fib_node * f;
 247 
 248         for (fz = fib_zone_list; fz; fz = fz->fz_next) 
 249         {
 250                 int longest_match_found = 0;
 251 
 252                 if (fz->fz_hash_table)
 253                         f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 254                 else
 255                         f = fz->fz_list;
 256                 
 257                 for ( ; f; f = f->fib_next)
 258                 {
 259                         if ((dst ^ f->fib_dst) & fz->fz_mask)
 260                                 continue;
 261                         if (!(f->fib_info->fib_flags & RTF_GATEWAY))
 262                                 return f;
 263                         longest_match_found = 1;
 264                 }
 265                 if (longest_match_found)
 266                         return NULL;
 267         }
 268         return NULL;
 269 }
 270 
 271 /*
 272  * Main lookup routine.
 273  *      IMPORTANT NOTE: this algorithm has small difference from <=1.3.37 visible
 274  *      by user. It doesn't route non-CIDR broadcasts by default.
 275  *
 276  *      F.e.
 277  *              ifconfig eth0 193.233.7.65 netmask 255.255.255.192 broadcast 193.233.7.255
 278  *      is valid, but if you really are not able (not allowed, do not want) to
 279  *      use CIDR compliant broadcast 193.233.7.127, you should add host route:
 280  *              route add -host 193.233.7.255 eth0
 281  */
 282 
 283 static struct fib_node * fib_lookup(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 284 {
 285         struct fib_zone * fz;
 286         struct fib_node * f;
 287 
 288         for (fz = fib_zone_list; fz; fz = fz->fz_next) 
 289         {
 290                 if (fz->fz_hash_table)
 291                         f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 292                 else
 293                         f = fz->fz_list;
 294                 
 295                 for ( ; f; f = f->fib_next)
 296                 {
 297                         if ((dst ^ f->fib_dst) & fz->fz_mask)
 298                                 continue;
 299                         return f;
 300                 }
 301         }
 302         return NULL;
 303 }
 304 
 305 static __inline__ struct device * get_gw_dev(__u32 gw)
     /* [previous][next][first][last][top][bottom][index][help] */
 306 {
 307         struct fib_node * f;
 308         f = fib_lookup_gateway(gw);
 309         if (f)
 310                 return f->fib_info->fib_dev;
 311         return NULL;
 312 }
 313 
 314 /*
 315  *      Used by 'rt_add()' when we can't get the netmask any other way..
 316  *
 317  *      If the lower byte or two are zero, we guess the mask based on the
 318  *      number of zero 8-bit net numbers, otherwise we use the "default"
 319  *      masks judging by the destination address and our device netmask.
 320  */
 321  
 322 static __u32 unsigned long default_mask(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 323 {
 324         dst = ntohl(dst);
 325         if (IN_CLASSA(dst))
 326                 return htonl(IN_CLASSA_NET);
 327         if (IN_CLASSB(dst))
 328                 return htonl(IN_CLASSB_NET);
 329         return htonl(IN_CLASSC_NET);
 330 }
 331 
 332 
 333 /*
 334  *      If no mask is specified then generate a default entry.
 335  */
 336 
 337 static __u32 guess_mask(__u32 dst, struct device * dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 338 {
 339         __u32 mask;
 340 
 341         if (!dst)
 342                 return 0;
 343         mask = default_mask(dst);
 344         if ((dst ^ dev->pa_addr) & mask)
 345                 return mask;
 346         return dev->pa_mask;
 347 }
 348 
 349 
 350 /*
 351  *      Check if a mask is acceptable.
 352  */
 353  
 354 static inline int bad_mask(__u32 mask, __u32 addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 355 {
 356         if (addr & (mask = ~mask))
 357                 return 1;
 358         mask = ntohl(mask);
 359         if (mask & (mask+1))
 360                 return 1;
 361         return 0;
 362 }
 363 
 364 
 365 static int fib_del_list(struct fib_node **fp, __u32 dst,
     /* [previous][next][first][last][top][bottom][index][help] */
 366                 struct device * dev, __u32 gtw, short flags, short metric, __u32 mask)
 367 {
 368         struct fib_node *f;
 369         int found=0;
 370 
 371         while((f = *fp) != NULL) 
 372         {
 373                 struct fib_info * fi = f->fib_info;
 374 
 375                 /*
 376                  *      Make sure the destination and netmask match.
 377                  *      metric, gateway and device are also checked
 378                  *      if they were specified.
 379                  */
 380                 if (f->fib_dst != dst ||
 381                     (gtw && fi->fib_gateway != gtw) ||
 382                     (metric >= 0 && f->fib_metric != metric) ||
 383                     (dev && fi->fib_dev != dev) )
 384                 {
 385                         fp = &f->fib_next;
 386                         continue;
 387                 }
 388                 cli();
 389                 *fp = f->fib_next;
 390                 if (fib_loopback == f)
 391                         fib_loopback = NULL;
 392                 sti();
 393                 ip_netlink_msg(RTMSG_DELROUTE, dst, gtw, mask, flags, metric, fi->fib_dev->name);
 394                 fib_free_node(f);
 395                 found++;
 396         }
 397         return found;
 398 }
 399 
 400 static __inline__ int fib_del_1(__u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
 401                 struct device * dev, __u32 gtw, short flags, short metric)
 402 {
 403         struct fib_node **fp;
 404         struct fib_zone *fz;
 405         int found=0;
 406 
 407         if (!mask)
 408         {
 409                 for (fz=fib_zone_list; fz; fz = fz->fz_next)
 410                 {
 411                         int tmp;
 412                         if (fz->fz_hash_table)
 413                                 fp = &fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 414                         else
 415                                 fp = &fz->fz_list;
 416 
 417                         tmp = fib_del_list(fp, dst, dev, gtw, flags, metric, mask);
 418                         fz->fz_nent -= tmp;
 419                         found += tmp;
 420                 }
 421         } 
 422         else
 423         {
 424                 if ((fz = fib_zones[rt_logmask(mask)]) != NULL)
 425                 {
 426                         if (fz->fz_hash_table)
 427                                 fp = &fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 428                         else
 429                                 fp = &fz->fz_list;
 430         
 431                         found = fib_del_list(fp, dst, dev, gtw, flags, metric, mask);
 432                         fz->fz_nent -= found;
 433                 }
 434         }
 435 
 436         if (found)
 437         {
 438                 rt_cache_flush();
 439                 return 0;
 440         }
 441         return -ESRCH;
 442 }
 443 
 444 
 445 static struct fib_info * fib_create_info(__u32 gw, struct device * dev,
     /* [previous][next][first][last][top][bottom][index][help] */
 446                                          unsigned short flags, unsigned short mss,
 447                                          unsigned long window, unsigned short irtt)
 448 {
 449         struct fib_info * fi;
 450 
 451         if (!(flags & RTF_MSS))
 452         {
 453                 mss = dev->mtu;
 454 #ifdef CONFIG_NO_PATH_MTU_DISCOVERY
 455                 /*
 456                  *      If MTU was not specified, use default.
 457                  *      If you want to increase MTU for some net (local subnet)
 458                  *      use "route add .... mss xxx".
 459                  *
 460                  *      The MTU isnt currently always used and computed as it
 461                  *      should be as far as I can tell. [Still verifying this is right]
 462                  */
 463                 if ((flags & RTF_GATEWAY) && mss > 576)
 464                         mss = 576;
 465 #endif
 466         }
 467         if (!(flags & RTF_WINDOW))
 468                 window = 0;
 469         if (!(flags & RTF_IRTT))
 470                 irtt = 0;
 471 
 472         for (fi=fib_info_list; fi; fi = fi->fib_next)
 473         {
 474                 if (fi->fib_gateway != gw ||
 475                     fi->fib_dev != dev  ||
 476                     fi->fib_flags != flags ||
 477                     fi->fib_mtu != mss ||
 478                     fi->fib_window != window ||
 479                     fi->fib_irtt != irtt)
 480                         continue;
 481                 fi->fib_refcnt++;
 482 #if RT_CACHE_DEBUG >= 2
 483                 printk("fib_create_info: fi %08x/%s is duplicate\n", fi->fib_gateway, fi->fib_dev->name);
 484 #endif
 485                 return fi;
 486         }
 487         fi = (struct fib_info*)kmalloc(sizeof(struct fib_info), GFP_KERNEL);
 488         if (!fi)
 489                 return NULL;
 490         memset(fi, 0, sizeof(struct fib_info));
 491         fi->fib_flags = flags;
 492         fi->fib_dev = dev;
 493         fi->fib_gateway = gw;
 494         fi->fib_mtu = mss;
 495         fi->fib_window = window;
 496         fi->fib_refcnt++;
 497         fi->fib_next = fib_info_list;
 498         fi->fib_prev = NULL;
 499         if (fib_info_list)
 500                 fib_info_list->fib_prev = fi;
 501         fib_info_list = fi;
 502 #if RT_CACHE_DEBUG >= 2
 503         printk("fib_create_info: fi %08x/%s is created\n", fi->fib_gateway, fi->fib_dev->name);
 504 #endif
 505         return fi;
 506 }
 507 
 508 
 509 static __inline__ void fib_add_1(short flags, __u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
 510         __u32 gw, struct device *dev, unsigned short mss,
 511         unsigned long window, unsigned short irtt, short metric)
 512 {
 513         struct fib_node *f, *f1;
 514         struct fib_node **fp;
 515         struct fib_node **dup_fp = NULL;
 516         struct fib_zone * fz;
 517         struct fib_info * fi;
 518         int logmask;
 519 
 520         if (flags & RTF_HOST) 
 521                 mask = 0xffffffff;
 522         /*
 523          * If mask is not specified, try to guess it.
 524          */
 525         else if (!mask)
 526         {
 527                 if (!((dst ^ dev->pa_addr) & dev->pa_mask)) 
 528                 {
 529                         mask = dev->pa_mask;
 530                         flags &= ~RTF_GATEWAY;
 531                         if (flags & RTF_DYNAMIC) 
 532                         {
 533                                 printk("Dynamic route to my own net rejected\n");
 534                                 return;
 535                         }
 536                 } 
 537                 else
 538                         mask = guess_mask(dst, dev);
 539                 dst &= mask;
 540         }
 541         
 542         /*
 543          *      A gateway must be reachable and not a local address
 544          */
 545          
 546         if (gw == dev->pa_addr)
 547                 flags &= ~RTF_GATEWAY;
 548                 
 549         if (flags & RTF_GATEWAY) 
 550         {
 551                 /*
 552                  *      Don't try to add a gateway we can't reach.. 
 553                  */
 554                  
 555                 if (dev != get_gw_dev(gw))
 556                         return;
 557                         
 558                 flags |= RTF_GATEWAY;
 559         } 
 560         else
 561                 gw = 0;
 562                 
 563         /*
 564          *      Allocate an entry and fill it in.
 565          */
 566          
 567         f = (struct fib_node *) kmalloc(sizeof(struct fib_node), GFP_KERNEL);
 568         if (f == NULL)
 569                 return;
 570 
 571         memset(f, 0, sizeof(struct fib_node));
 572         f->fib_dst = dst;
 573         f->fib_metric = metric;
 574         f->fib_tos    = 0;
 575 
 576         if  ((fi = fib_create_info(gw, dev, flags, mss, window, irtt)) == NULL)
 577         {
 578                 kfree_s(f, sizeof(struct fib_node));
 579                 return;
 580         }
 581         f->fib_info = fi;
 582 
 583         logmask = rt_logmask(mask);
 584         fz = fib_zones[logmask];
 585 
 586 
 587         if (!fz)
 588         {
 589                 int i;
 590                 fz = kmalloc(sizeof(struct fib_zone), GFP_KERNEL);
 591                 if (!fz)
 592                 {
 593                         fib_free_node(f);
 594                         return;
 595                 }
 596                 memset(fz, 0, sizeof(struct fib_zone));
 597                 fz->fz_logmask = logmask;
 598                 fz->fz_mask = mask;
 599                 for (i=logmask-1; i>=0; i--)
 600                         if (fib_zones[i])
 601                                 break;
 602                 cli();
 603                 if (i<0)
 604                 {
 605                         fz->fz_next = fib_zone_list;
 606                         fib_zone_list = fz;
 607                 }
 608                 else
 609                 {
 610                         fz->fz_next = fib_zones[i]->fz_next;
 611                         fib_zones[i]->fz_next = fz;
 612                 }
 613                 fib_zones[logmask] = fz;
 614                 sti();
 615         }
 616 
 617         /*
 618          * If zone overgrows RTZ_HASHING_LIMIT, create hash table.
 619          */
 620 
 621         if (fz->fz_nent >= RTZ_HASHING_LIMIT && !fz->fz_hash_table && logmask<32)
 622         {
 623                 struct fib_node ** ht;
 624 #if RT_CACHE_DEBUG
 625                 printk("fib_add_1: hashing for zone %d started\n", logmask);
 626 #endif
 627                 ht = kmalloc(RTZ_HASH_DIVISOR*sizeof(struct rtable*), GFP_KERNEL);
 628 
 629                 if (ht)
 630                 {
 631                         memset(ht, 0, RTZ_HASH_DIVISOR*sizeof(struct fib_node*));
 632                         cli();
 633                         f1 = fz->fz_list;
 634                         while (f1)
 635                         {
 636                                 struct fib_node * next;
 637                                 unsigned hash = fz_hash_code(f1->fib_dst, logmask);
 638                                 next = f1->fib_next;
 639                                 f1->fib_next = ht[hash];
 640                                 ht[hash] = f1;
 641                                 f1 = next;
 642                         }
 643                         fz->fz_list = NULL;
 644                         fz->fz_hash_table = ht; 
 645                         sti();
 646                 }
 647         }
 648 
 649         if (fz->fz_hash_table)
 650                 fp = &fz->fz_hash_table[fz_hash_code(dst, logmask)];
 651         else
 652                 fp = &fz->fz_list;
 653 
 654         /*
 655          * Scan list to find the first route with the same destination
 656          */
 657         while ((f1 = *fp) != NULL)
 658         {
 659                 if (f1->fib_dst == dst)
 660                         break;
 661                 fp = &f1->fib_next;
 662         }
 663 
 664         /*
 665          * Find route with the same destination and less (or equal) metric.
 666          */
 667         while ((f1 = *fp) != NULL && f1->fib_dst == dst)
 668         {
 669                 if (f1->fib_metric >= metric)
 670                         break;
 671                 /*
 672                  *      Record route with the same destination and gateway,
 673                  *      but less metric. We'll delete it 
 674                  *      after instantiation of new route.
 675                  */
 676                 if (f1->fib_info->fib_gateway == gw)
 677                         dup_fp = fp;
 678                 fp = &f1->fib_next;
 679         }
 680 
 681         /*
 682          * Is it already present?
 683          */
 684 
 685         if (f1 && f1->fib_metric == metric && f1->fib_info == fi)
 686         {
 687                 fib_free_node(f);
 688                 return;
 689         }
 690         
 691         /*
 692          * Insert new entry to the list.
 693          */
 694 
 695         cli();
 696         f->fib_next = f1;
 697         *fp = f;
 698         if (!fib_loopback && (fi->fib_dev->flags & IFF_LOOPBACK))
 699                 fib_loopback = f;
 700         sti();
 701         fz->fz_nent++;
 702         ip_netlink_msg(RTMSG_NEWROUTE, dst, gw, mask, flags, metric, fi->fib_dev->name);
 703 
 704         /*
 705          *      Delete route with the same destination and gateway.
 706          *      Note that we should have at most one such route.
 707          */
 708         if (dup_fp)
 709                 fp = dup_fp;
 710         else
 711                 fp = &f->fib_next;
 712 
 713         while ((f1 = *fp) != NULL && f1->fib_dst == dst)
 714         {
 715                 if (f1->fib_info->fib_gateway == gw)
 716                 {
 717                         cli();
 718                         *fp = f1->fib_next;
 719                         if (fib_loopback == f1)
 720                                 fib_loopback = NULL;
 721                         sti();
 722                         ip_netlink_msg(RTMSG_DELROUTE, dst, gw, mask, flags, metric, f1->fib_info->fib_dev->name);
 723                         fib_free_node(f1);
 724                         fz->fz_nent--;
 725                         break;
 726                 }
 727                 fp = &f1->fib_next;
 728         }
 729         rt_cache_flush();
 730         return;
 731 }
 732 
 733 static int rt_flush_list(struct fib_node ** fp, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 734 {
 735         int found = 0;
 736         struct fib_node *f;
 737 
 738         while ((f = *fp) != NULL) {
 739                 if (f->fib_info->fib_dev != dev) {
 740                         fp = &f->fib_next;
 741                         continue;
 742                 }
 743                 cli();
 744                 *fp = f->fib_next;
 745                 if (fib_loopback == f)
 746                         fib_loopback = NULL;
 747                 sti();
 748                 fib_free_node(f);
 749                 found++;
 750         }
 751         return found;
 752 }
 753 
 754 static __inline__ void fib_flush_1(struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 755 {
 756         struct fib_zone *fz;
 757         int found = 0;
 758 
 759         for (fz = fib_zone_list; fz; fz = fz->fz_next)
 760         {
 761                 if (fz->fz_hash_table)
 762                 {
 763                         int i;
 764                         int tmp = 0;
 765                         for (i=0; i<RTZ_HASH_DIVISOR; i++)
 766                                 tmp += rt_flush_list(&fz->fz_hash_table[i], dev);
 767                         fz->fz_nent -= tmp;
 768                         found += tmp;
 769                 }
 770                 else
 771                 {
 772                         int tmp;
 773                         tmp = rt_flush_list(&fz->fz_list, dev);
 774                         fz->fz_nent -= tmp;
 775                         found += tmp;
 776                 }
 777         }
 778                 
 779         if (found)
 780                 rt_cache_flush();
 781 }
 782 
 783 
 784 /* 
 785  *      Called from the PROCfs module. This outputs /proc/net/route.
 786  *
 787  *      We preserve the old format but pad the buffers out. This means that
 788  *      we can spin over the other entries as we read them. Remember the
 789  *      gated BGP4 code could need to read 60,000+ routes on occasion (thats
 790  *      about 7Mb of data). To do that ok we will need to also cache the
 791  *      last route we got to (reads will generally be following on from
 792  *      one another without gaps).
 793  */
 794  
 795 int rt_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
     /* [previous][next][first][last][top][bottom][index][help] */
 796 {
 797         struct fib_zone *fz;
 798         struct fib_node *f;
 799         int len=0;
 800         off_t pos=0;
 801         char temp[129];
 802         int i;
 803         
 804         pos = 128;
 805 
 806         if (offset<128)
 807         {
 808                 sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT");
 809                 len = 128;
 810         }
 811         
 812         while  (ip_rt_lock)
 813                 sleep_on(&rt_wait);
 814         ip_rt_fast_lock();
 815 
 816         for (fz=fib_zone_list; fz; fz = fz->fz_next)
 817         {
 818                 int maxslot;
 819                 struct fib_node ** fp;
 820 
 821                 if (fz->fz_nent == 0)
 822                         continue;
 823 
 824                 if (pos + 128*fz->fz_nent <= offset)
 825                 {
 826                         pos += 128*fz->fz_nent;
 827                         len = 0;
 828                         continue;
 829                 }
 830 
 831                 if (fz->fz_hash_table)
 832                 {
 833                         maxslot = RTZ_HASH_DIVISOR;
 834                         fp      = fz->fz_hash_table;
 835                 }
 836                 else
 837                 {
 838                         maxslot = 1;
 839                         fp      = &fz->fz_list;
 840                 }
 841                         
 842                 for (i=0; i < maxslot; i++, fp++)
 843                 {
 844                         
 845                         for (f = *fp; f; f = f->fib_next) 
 846                         {
 847                                 struct fib_info * fi;
 848                                 /*
 849                                  *      Spin through entries until we are ready
 850                                  */
 851                                 pos += 128;
 852 
 853                                 if (pos <= offset)
 854                                 {
 855                                         len=0;
 856                                         continue;
 857                                 }
 858                                         
 859                                 fi = f->fib_info;
 860                                 sprintf(temp, "%s\t%08lX\t%08lX\t%02X\t%d\t%lu\t%d\t%08lX\t%d\t%lu\t%u",
 861                                         fi->fib_dev->name, (unsigned long)f->fib_dst, (unsigned long)fi->fib_gateway,
 862                                         fi->fib_flags, 0, f->fib_use, f->fib_metric,
 863                                         (unsigned long)fz->fz_mask, (int)fi->fib_mtu, fi->fib_window, (int)fi->fib_irtt);
 864                                 sprintf(buffer+len,"%-127s\n",temp);
 865 
 866                                 len += 128;
 867                                 if (pos >= offset+length)
 868                                         goto done;
 869                         }
 870                 }
 871         }
 872 
 873 done:
 874         ip_rt_unlock();
 875         wake_up(&rt_wait);
 876         
 877         *start = buffer+len-(pos-offset);
 878         len = pos - offset;
 879         if (len>length)
 880                 len = length;
 881         return len;
 882 }
 883 
 884 int rt_cache_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
     /* [previous][next][first][last][top][bottom][index][help] */
 885 {
 886         int len=0;
 887         off_t pos=0;
 888         char temp[129];
 889         struct rtable *r;
 890         int i;
 891 
 892         pos = 128;
 893 
 894         if (offset<128)
 895         {
 896                 sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tSource\t\tMTU\tWindow\tIRTT\tHH\tARP\n");
 897                 len = 128;
 898         }
 899         
 900         
 901         while  (ip_rt_lock)
 902                 sleep_on(&rt_wait);
 903         ip_rt_fast_lock();
 904 
 905         for (i = 0; i<RT_HASH_DIVISOR; i++)
 906         {
 907                 for (r = ip_rt_hash_table[i]; r; r = r->rt_next) 
 908                 {
 909                         /*
 910                          *      Spin through entries until we are ready
 911                          */
 912                         pos += 128;
 913 
 914                         if (pos <= offset)
 915                         {
 916                                 len = 0;
 917                                 continue;
 918                         }
 919                                         
 920                         sprintf(temp, "%s\t%08lX\t%08lX\t%02X\t%ld\t%lu\t%d\t%08lX\t%d\t%lu\t%u\t%ld\t%1d",
 921                                 r->rt_dev->name, (unsigned long)r->rt_dst, (unsigned long)r->rt_gateway,
 922                                 r->rt_flags, r->rt_refcnt, r->rt_use, 0,
 923                                 (unsigned long)r->rt_src, (int)r->rt_mtu, r->rt_window, (int)r->rt_irtt, r->rt_hh ? r->rt_hh->hh_refcnt : -1, r->rt_hh ? r->rt_hh->hh_uptodate : 0);
 924                         sprintf(buffer+len,"%-127s\n",temp);
 925                         len += 128;
 926                         if (pos >= offset+length)
 927                                 goto done;
 928                 }
 929         }
 930 
 931 done:
 932         ip_rt_unlock();
 933         wake_up(&rt_wait);
 934         
 935         *start = buffer+len-(pos-offset);
 936         len = pos-offset;
 937         if (len>length)
 938                 len = length;
 939         return len;
 940 }
 941 
 942 
 943 static void rt_free(struct rtable * rt)
     /* [previous][next][first][last][top][bottom][index][help] */
 944 {
 945         unsigned long flags;
 946 
 947         save_flags(flags);
 948         cli();
 949         if (!rt->rt_refcnt)
 950         {
 951                 struct hh_cache * hh = rt->rt_hh;
 952                 rt->rt_hh = NULL;
 953                 if (hh && !--hh->hh_refcnt)
 954                 {
 955                         restore_flags(flags);
 956                         kfree_s(hh, sizeof(struct hh_cache));
 957                 }
 958                 restore_flags(flags);
 959                 kfree_s(rt, sizeof(struct rt_table));
 960                 return;
 961         }
 962         rt->rt_next = rt_free_queue;
 963         rt->rt_flags &= ~RTF_UP;
 964         rt_free_queue = rt;
 965         ip_rt_bh_mask |= RT_BH_FREE;
 966 #if RT_CACHE_DEBUG >= 2
 967         printk("rt_free: %08x\n", rt->rt_dst);
 968 #endif
 969         restore_flags(flags);
 970 }
 971 
 972 /*
 973  * RT "bottom half" handlers. Called with masked inetrrupts.
 974  */
 975 
 976 static __inline__ void rt_kick_free_queue(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 977 {
 978         struct rtable *rt, **rtp;
 979 
 980         rtp = &rt_free_queue;
 981 
 982         while ((rt = *rtp) != NULL)
 983         {
 984                 if  (!rt->rt_refcnt)
 985                 {
 986                         struct hh_cache * hh = rt->rt_hh;
 987 #if RT_CACHE_DEBUG >= 2
 988                         __u32 daddr = rt->rt_dst;
 989 #endif
 990                         *rtp = rt->rt_next;
 991                         rt->rt_hh = NULL;
 992                         if (hh && !--hh->hh_refcnt)
 993                         {
 994                                 sti();
 995                                 kfree_s(hh, sizeof(struct hh_cache));
 996                         }
 997                         sti();
 998                         kfree_s(rt, sizeof(struct rt_table));
 999 #if RT_CACHE_DEBUG >= 2
1000                         printk("rt_kick_free_queue: %08x is free\n", daddr);
1001 #endif
1002                         cli();
1003                         continue;
1004                 }
1005                 rtp = &rt->rt_next;
1006         }
1007 }
1008 
1009 void ip_rt_run_bh() {
     /* [previous][next][first][last][top][bottom][index][help] */
1010         unsigned long flags;
1011         save_flags(flags);
1012         cli();
1013         if (ip_rt_bh_mask && !ip_rt_lock)
1014         {
1015                 if (ip_rt_bh_mask & RT_BH_REDIRECT)
1016                         rt_kick_backlog();
1017 
1018                 if (ip_rt_bh_mask & RT_BH_GARBAGE_COLLECT)
1019                 {
1020                         ip_rt_fast_lock();
1021                         ip_rt_bh_mask &= ~RT_BH_GARBAGE_COLLECT;
1022                         sti();
1023                         rt_garbage_collect_1();
1024                         cli();
1025                         ip_rt_fast_unlock();
1026                 }
1027 
1028                 if (ip_rt_bh_mask & RT_BH_FREE)
1029                         rt_kick_free_queue();
1030         }
1031         restore_flags(flags);
1032 }
1033 
1034 
1035 void ip_rt_check_expire()
     /* [previous][next][first][last][top][bottom][index][help] */
1036 {
1037         ip_rt_fast_lock();
1038         if (ip_rt_lock == 1)
1039         {
1040                 int i;
1041                 struct rtable *rth, **rthp;
1042                 unsigned long flags;
1043                 unsigned long now = jiffies;
1044 
1045                 save_flags(flags);
1046                 for (i=0; i<RT_HASH_DIVISOR; i++)
1047                 {
1048                         rthp = &ip_rt_hash_table[i];
1049 
1050                         while ((rth = *rthp) != NULL)
1051                         {
1052                                 struct rtable * rth_next = rth->rt_next;
1053 
1054                                 /*
1055                                  * Cleanup aged off entries.
1056                                  */
1057 
1058                                 cli();
1059                                 if (!rth->rt_refcnt && rth->rt_lastuse + RT_CACHE_TIMEOUT < now)
1060                                 {
1061                                         *rthp = rth_next;
1062                                         sti();
1063                                         rt_cache_size--;
1064 #if RT_CACHE_DEBUG >= 2
1065                                         printk("rt_check_expire clean %02x@%08x\n", i, rth->rt_dst);
1066 #endif
1067                                         rt_free(rth);
1068                                         continue;
1069                                 }
1070                                 sti();
1071 
1072                                 if (!rth_next)
1073                                         break;
1074 
1075                                 /*
1076                                  * LRU ordering.
1077                                  */
1078 
1079                                 if (rth->rt_lastuse + RT_CACHE_BUBBLE_THRESHOULD < rth_next->rt_lastuse ||
1080                                     (rth->rt_lastuse < rth_next->rt_lastuse &&
1081                                      rth->rt_use < rth_next->rt_use))
1082                                 {
1083 #if RT_CACHE_DEBUG >= 2
1084                                         printk("rt_check_expire bubbled %02x@%08x<->%08x\n", i, rth->rt_dst, rth_next->rt_dst);
1085 #endif
1086                                         cli();
1087                                         *rthp = rth_next;
1088                                         rth->rt_next = rth_next->rt_next;
1089                                         rth_next->rt_next = rth;
1090                                         sti();
1091                                         rthp = &rth_next->rt_next;
1092                                         continue;
1093                                 }
1094                                 rthp = &rth->rt_next;
1095                         }
1096                 }
1097                 restore_flags(flags);
1098                 rt_kick_free_queue();
1099         }
1100         ip_rt_unlock();
1101 }
1102 
1103 static void rt_redirect_1(__u32 dst, __u32 gw, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
1104 {
1105         struct rtable *rt;
1106         unsigned long hash = ip_rt_hash_code(dst);
1107 
1108         if (gw == dev->pa_addr)
1109                 return;
1110         if (dev != get_gw_dev(gw))
1111                 return;
1112         rt = (struct rtable *) kmalloc(sizeof(struct rtable), GFP_ATOMIC);
1113         if (rt == NULL) 
1114                 return;
1115         memset(rt, 0, sizeof(struct rtable));
1116         rt->rt_flags = RTF_DYNAMIC | RTF_MODIFIED | RTF_HOST | RTF_GATEWAY | RTF_UP;
1117         rt->rt_dst = dst;
1118         rt->rt_dev = dev;
1119         rt->rt_gateway = gw;
1120         rt->rt_src = dev->pa_addr;
1121         rt->rt_mtu = dev->mtu;
1122 #ifdef CONFIG_NO_PATH_MTU_DISCOVERY
1123         if (dev->mtu > 576)
1124                 rt->rt_mtu = 576;
1125 #endif
1126         rt->rt_lastuse  = jiffies;
1127         rt->rt_refcnt  = 1;
1128         rt_cache_add(hash, rt);
1129         ip_rt_put(rt);
1130         return;
1131 }
1132 
1133 static void rt_cache_flush(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1134 {
1135         int i;
1136         struct rtable * rth, * next;
1137 
1138         for (i=0; i<RT_HASH_DIVISOR; i++)
1139         {
1140                 int nr=0;
1141 
1142                 cli();
1143                 if (!(rth = ip_rt_hash_table[i]))
1144                 {
1145                         sti();
1146                         continue;
1147                 }
1148 
1149                 ip_rt_hash_table[i] = NULL;
1150                 sti();
1151 
1152                 for (; rth; rth=next)
1153                 {
1154                         next = rth->rt_next;
1155                         rt_cache_size--;
1156                         nr++;
1157                         rth->rt_next = NULL;
1158                         rt_free(rth);
1159                 }
1160 #if RT_CACHE_DEBUG >= 2
1161                 if (nr > 0)
1162                         printk("rt_cache_flush: %d@%02x\n", nr, i);
1163 #endif
1164         }
1165 #if RT_CACHE_DEBUG >= 1
1166         if (rt_cache_size)
1167         {
1168                 printk("rt_cache_flush: bug rt_cache_size=%d\n", rt_cache_size);
1169                 rt_cache_size = 0;
1170         }
1171 #endif
1172 }
1173 
1174 static void rt_garbage_collect_1(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1175 {
1176         int i;
1177         unsigned expire = RT_CACHE_TIMEOUT>>1;
1178         struct rtable * rth, **rthp;
1179         unsigned long now = jiffies;
1180 
1181         for (;;)
1182         {
1183                 for (i=0; i<RT_HASH_DIVISOR; i++)
1184                 {
1185                         if (!ip_rt_hash_table[i])
1186                                 continue;
1187                         for (rthp=&ip_rt_hash_table[i]; (rth=*rthp); rthp=&rth->rt_next)
1188                         {
1189                                 if (rth->rt_lastuse + expire*(rth->rt_refcnt+1) > now)
1190                                         continue;
1191                                 rt_cache_size--;
1192                                 cli();
1193                                 *rthp=rth->rt_next;
1194                                 rth->rt_next = NULL;
1195                                 sti();
1196                                 rt_free(rth);
1197                                 break;
1198                         }
1199                 }
1200                 if (rt_cache_size < RT_CACHE_SIZE_MAX)
1201                         return;
1202                 expire >>= 1;
1203         }
1204 }
1205 
1206 static __inline__ void rt_req_enqueue(struct rt_req **q, struct rt_req *rtr)
     /* [previous][next][first][last][top][bottom][index][help] */
1207 {
1208         unsigned long flags;
1209         struct rt_req * tail;
1210 
1211         save_flags(flags);
1212         cli();
1213         tail = *q;
1214         if (!tail)
1215                 rtr->rtr_next = rtr;
1216         else
1217         {
1218                 rtr->rtr_next = tail->rtr_next;
1219                 tail->rtr_next = rtr;
1220         }
1221         *q = rtr;
1222         restore_flags(flags);
1223         return;
1224 }
1225 
1226 /*
1227  * Caller should mask interrupts.
1228  */
1229 
1230 static __inline__ struct rt_req * rt_req_dequeue(struct rt_req **q)
     /* [previous][next][first][last][top][bottom][index][help] */
1231 {
1232         struct rt_req * rtr;
1233 
1234         if (*q)
1235         {
1236                 rtr = (*q)->rtr_next;
1237                 (*q)->rtr_next = rtr->rtr_next;
1238                 if (rtr->rtr_next == rtr)
1239                         *q = NULL;
1240                 rtr->rtr_next = NULL;
1241                 return rtr;
1242         }
1243         return NULL;
1244 }
1245 
1246 /*
1247    Called with masked interrupts
1248  */
1249 
1250 static void rt_kick_backlog()
     /* [previous][next][first][last][top][bottom][index][help] */
1251 {
1252         if (!ip_rt_lock)
1253         {
1254                 struct rt_req * rtr;
1255 
1256                 ip_rt_fast_lock();
1257 
1258                 while ((rtr = rt_req_dequeue(&rt_backlog)) != NULL)
1259                 {
1260                         sti();
1261                         rt_redirect_1(rtr->dst, rtr->gw, rtr->dev);
1262                         kfree_s(rtr, sizeof(struct rt_req));
1263                         cli();
1264                 }
1265 
1266                 ip_rt_bh_mask &= ~RT_BH_REDIRECT;
1267 
1268                 ip_rt_fast_unlock();
1269         }
1270 }
1271 
1272 /*
1273  * rt_{del|add|flush} called only from USER process. Waiting is OK.
1274  */
1275 
1276 static int rt_del(__u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
1277                 struct device * dev, __u32 gtw, short rt_flags, short metric)
1278 {
1279         int retval;
1280 
1281         while (ip_rt_lock)
1282                 sleep_on(&rt_wait);
1283         ip_rt_fast_lock();
1284         retval = fib_del_1(dst, mask, dev, gtw, rt_flags, metric);
1285         ip_rt_unlock();
1286         wake_up(&rt_wait);
1287         return retval;
1288 }
1289 
1290 static void rt_add(short flags, __u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
1291         __u32 gw, struct device *dev, unsigned short mss,
1292         unsigned long window, unsigned short irtt, short metric)
1293 {
1294         while (ip_rt_lock)
1295                 sleep_on(&rt_wait);
1296         ip_rt_fast_lock();
1297         fib_add_1(flags, dst, mask, gw, dev, mss, window, irtt, metric);
1298         ip_rt_unlock();
1299         wake_up(&rt_wait);
1300 }
1301 
1302 void ip_rt_flush(struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
1303 {
1304         while (ip_rt_lock)
1305                 sleep_on(&rt_wait);
1306         ip_rt_fast_lock();
1307         fib_flush_1(dev);
1308         ip_rt_unlock();
1309         wake_up(&rt_wait);
1310 }
1311 
1312 /*
1313    Called by ICMP module.
1314  */
1315 
1316 void ip_rt_redirect(__u32 src, __u32 dst, __u32 gw, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
1317 {
1318         struct rt_req * rtr;
1319         struct rtable * rt;
1320 
1321         rt = ip_rt_route(dst, 0);
1322         if (!rt)
1323                 return;
1324 
1325         if (rt->rt_gateway != src ||
1326             rt->rt_dev != dev ||
1327             ((gw^dev->pa_addr)&dev->pa_mask) ||
1328             ip_chk_addr(gw))
1329         {
1330                 ip_rt_put(rt);
1331                 return;
1332         }
1333         ip_rt_put(rt);
1334 
1335         ip_rt_fast_lock();
1336         if (ip_rt_lock == 1)
1337         {
1338                 rt_redirect_1(dst, gw, dev);
1339                 ip_rt_unlock();
1340                 return;
1341         }
1342 
1343         rtr = kmalloc(sizeof(struct rt_req), GFP_ATOMIC);
1344         if (rtr)
1345         {
1346                 rtr->dst = dst;
1347                 rtr->gw = gw;
1348                 rtr->dev = dev;
1349                 rt_req_enqueue(&rt_backlog, rtr);
1350                 ip_rt_bh_mask |= RT_BH_REDIRECT;
1351         }
1352         ip_rt_unlock();
1353 }
1354 
1355 
1356 static __inline__ void rt_garbage_collect(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1357 {
1358         if (ip_rt_lock == 1)
1359         {
1360                 rt_garbage_collect_1();
1361                 return;
1362         }
1363         ip_rt_bh_mask |= RT_BH_GARBAGE_COLLECT;
1364 }
1365 
1366 static void rt_cache_add(unsigned hash, struct rtable * rth)
     /* [previous][next][first][last][top][bottom][index][help] */
1367 {
1368         unsigned long   flags;
1369         struct rtable   **rthp;
1370         __u32           daddr = rth->rt_dst;
1371         unsigned long   now = jiffies;
1372 
1373 #if RT_CACHE_DEBUG >= 2
1374         if (ip_rt_lock != 1)
1375         {
1376                 printk("rt_cache_add: ip_rt_lock==%d\n", ip_rt_lock);
1377                 return;
1378         }
1379 #endif
1380 
1381         save_flags(flags);
1382 
1383         if (rth->rt_dev->header_cache_bind)
1384         {
1385                 struct rtable * rtg = rth;
1386 
1387                 if (rth->rt_gateway != daddr)
1388                 {
1389                         ip_rt_fast_unlock();
1390                         rtg = ip_rt_route(rth->rt_gateway, 0);
1391                         ip_rt_fast_lock();
1392                 }
1393 
1394                 if (rtg)
1395                 {
1396                         if (rtg == rth)
1397                                 rtg->rt_dev->header_cache_bind(&rtg->rt_hh, rtg->rt_dev, ETH_P_IP, rtg->rt_dst);
1398                         else
1399                         {
1400                                 if (rtg->rt_hh)
1401                                         ATOMIC_INCR(&rtg->rt_hh->hh_refcnt);
1402                                 rth->rt_hh = rtg->rt_hh;
1403                                 ip_rt_put(rtg);
1404                         }
1405                 }
1406         }
1407 
1408         if (rt_cache_size >= RT_CACHE_SIZE_MAX)
1409                 rt_garbage_collect();
1410 
1411         cli();
1412         rth->rt_next = ip_rt_hash_table[hash];
1413 #if RT_CACHE_DEBUG >= 2
1414         if (rth->rt_next)
1415         {
1416                 struct rtable * trth;
1417                 printk("rt_cache @%02x: %08x", hash, daddr);
1418                 for (trth=rth->rt_next; trth; trth=trth->rt_next)
1419                         printk(" . %08x", trth->rt_dst);
1420                 printk("\n");
1421         }
1422 #endif
1423         ip_rt_hash_table[hash] = rth;
1424         rthp = &rth->rt_next;
1425         sti();
1426         rt_cache_size++;
1427 
1428         /*
1429          * Cleanup duplicate (and aged off) entries.
1430          */
1431 
1432         while ((rth = *rthp) != NULL)
1433         {
1434 
1435                 cli();
1436                 if ((!rth->rt_refcnt && rth->rt_lastuse + RT_CACHE_TIMEOUT < now)
1437                     || rth->rt_dst == daddr)
1438                 {
1439                         *rthp = rth->rt_next;
1440                         rt_cache_size--;
1441                         sti();
1442 #if RT_CACHE_DEBUG >= 2
1443                         printk("rt_cache clean %02x@%08x\n", hash, rth->rt_dst);
1444 #endif
1445                         rt_free(rth);
1446                         continue;
1447                 }
1448                 sti();
1449                 rthp = &rth->rt_next;
1450         }
1451         restore_flags(flags);
1452 }
1453 
1454 /*
1455    RT should be already locked.
1456    
1457    We could improve this by keeping a chain of say 32 struct rtable's
1458    last freed for fast recycling.
1459    
1460  */
1461 
1462 struct rtable * ip_rt_slow_route (__u32 daddr, int local)
     /* [previous][next][first][last][top][bottom][index][help] */
1463 {
1464         unsigned hash = ip_rt_hash_code(daddr)^local;
1465         struct rtable * rth;
1466         struct fib_node * f;
1467         struct fib_info * fi;
1468         __u32 saddr;
1469 
1470 #if RT_CACHE_DEBUG >= 2
1471         printk("rt_cache miss @%08x\n", daddr);
1472 #endif
1473 
1474         rth = kmalloc(sizeof(struct rtable), GFP_ATOMIC);
1475         if (!rth)
1476         {
1477                 ip_rt_unlock();
1478                 return NULL;
1479         }
1480 
1481         if (local)
1482                 f = fib_lookup_local(daddr);
1483         else
1484                 f = fib_lookup (daddr);
1485 
1486         if (f)
1487         {
1488                 fi = f->fib_info;
1489                 f->fib_use++;
1490         }
1491 
1492         if (!f || (fi->fib_flags & RTF_REJECT))
1493         {
1494 #if RT_CACHE_DEBUG >= 2
1495                 printk("rt_route failed @%08x\n", daddr);
1496 #endif
1497                 ip_rt_unlock();
1498                 kfree_s(rth, sizeof(struct rtable));
1499                 return NULL;
1500         }
1501 
1502         saddr = fi->fib_dev->pa_addr;
1503 
1504         if (daddr == fi->fib_dev->pa_addr)
1505         {
1506                 f->fib_use--;
1507                 if ((f = fib_loopback) != NULL)
1508                 {
1509                         f->fib_use++;
1510                         fi = f->fib_info;
1511                 }
1512         }
1513 
1514         if (!f)
1515         {
1516                 ip_rt_unlock();
1517                 kfree_s(rth, sizeof(struct rtable));
1518                 return NULL;
1519         }
1520 
1521         rth->rt_dst     = daddr;
1522         rth->rt_src     = saddr;
1523         rth->rt_lastuse = jiffies;
1524         rth->rt_refcnt  = 1;
1525         rth->rt_use     = 1;
1526         rth->rt_next    = NULL;
1527         rth->rt_hh      = NULL;
1528         rth->rt_gateway = fi->fib_gateway;
1529         rth->rt_dev     = fi->fib_dev;
1530         rth->rt_mtu     = fi->fib_mtu;
1531         rth->rt_window  = fi->fib_window;
1532         rth->rt_irtt    = fi->fib_irtt;
1533         rth->rt_tos     = f->fib_tos;
1534         rth->rt_flags   = fi->fib_flags | RTF_HOST;
1535         if (local)
1536                 rth->rt_flags   |= RTF_LOCAL;
1537 
1538         if (!(rth->rt_flags & RTF_GATEWAY))
1539                 rth->rt_gateway = rth->rt_dst;
1540 
1541         if (ip_rt_lock == 1)
1542                 rt_cache_add(hash, rth);
1543         else
1544         {
1545                 rt_free(rth);
1546 #if RT_CACHE_DEBUG >= 1
1547                 printk("rt_cache: route to %08x was born dead\n", daddr);
1548 #endif
1549         }
1550 
1551         ip_rt_unlock();
1552         return rth;
1553 }
1554 
1555 void ip_rt_put(struct rtable * rt)
     /* [previous][next][first][last][top][bottom][index][help] */
1556 {
1557         if (rt)
1558                 ATOMIC_DECR(&rt->rt_refcnt);
1559 }
1560 
1561 struct rtable * ip_rt_route(__u32 daddr, int local)
     /* [previous][next][first][last][top][bottom][index][help] */
1562 {
1563         struct rtable * rth;
1564 
1565         ip_rt_fast_lock();
1566 
1567         for (rth=ip_rt_hash_table[ip_rt_hash_code(daddr)^local]; rth; rth=rth->rt_next)
1568         {
1569                 if (rth->rt_dst == daddr)
1570                 {
1571                         rth->rt_lastuse = jiffies;
1572                         ATOMIC_INCR(&rth->rt_use);
1573                         ATOMIC_INCR(&rth->rt_refcnt);
1574                         ip_rt_unlock();
1575                         return rth;
1576                 }
1577         }
1578         return ip_rt_slow_route (daddr, local);
1579 }
1580 
1581 
1582 /*
1583  *      Process a route add request from the user, or from a kernel
1584  *      task.
1585  */
1586  
1587 int ip_rt_new(struct rtentry *r)
     /* [previous][next][first][last][top][bottom][index][help] */
1588 {
1589         int err;
1590         char * devname;
1591         struct device * dev = NULL;
1592         unsigned long flags;
1593         __u32 daddr, mask, gw;
1594         short metric;
1595 
1596         /*
1597          *      If a device is specified find it.
1598          */
1599          
1600         if ((devname = r->rt_dev) != NULL) 
1601         {
1602                 err = getname(devname, &devname);
1603                 if (err)
1604                         return err;
1605                 dev = dev_get(devname);
1606                 putname(devname);
1607                 if (!dev)
1608                         return -ENODEV;
1609         }
1610         
1611         /*
1612          *      If the device isn't INET, don't allow it
1613          */
1614 
1615         if (r->rt_dst.sa_family != AF_INET)
1616                 return -EAFNOSUPPORT;
1617 
1618         /*
1619          *      Make local copies of the important bits
1620          *      We decrement the metric by one for BSD compatibility.
1621          */
1622          
1623         flags = r->rt_flags;
1624         daddr = (__u32) ((struct sockaddr_in *) &r->rt_dst)->sin_addr.s_addr;
1625         mask  = (__u32) ((struct sockaddr_in *) &r->rt_genmask)->sin_addr.s_addr;
1626         gw    = (__u32) ((struct sockaddr_in *) &r->rt_gateway)->sin_addr.s_addr;
1627         metric = r->rt_metric > 0 ? r->rt_metric - 1 : 0;
1628 
1629         /*
1630          *      BSD emulation: Permits route add someroute gw one-of-my-addresses
1631          *      to indicate which iface. Not as clean as the nice Linux dev technique
1632          *      but people keep using it...  (and gated likes it ;))
1633          */
1634          
1635         if (!dev && (flags & RTF_GATEWAY)) 
1636         {
1637                 struct device *dev2;
1638                 for (dev2 = dev_base ; dev2 != NULL ; dev2 = dev2->next) 
1639                 {
1640                         if ((dev2->flags & IFF_UP) && dev2->pa_addr == gw) 
1641                         {
1642                                 flags &= ~RTF_GATEWAY;
1643                                 dev = dev2;
1644                                 break;
1645                         }
1646                 }
1647         }
1648 
1649         /*
1650          *      Ignore faulty masks
1651          */
1652          
1653         if (bad_mask(mask, daddr))
1654                 mask=0;
1655 
1656         /*
1657          *      Set the mask to nothing for host routes.
1658          */
1659          
1660         if (flags & RTF_HOST)
1661                 mask = 0xffffffff;
1662         else if (mask && r->rt_genmask.sa_family != AF_INET)
1663                 return -EAFNOSUPPORT;
1664 
1665         /*
1666          *      You can only gateway IP via IP..
1667          */
1668          
1669         if (flags & RTF_GATEWAY) 
1670         {
1671                 if (r->rt_gateway.sa_family != AF_INET)
1672                         return -EAFNOSUPPORT;
1673                 if (!dev)
1674                         dev = get_gw_dev(gw);
1675         } 
1676         else if (!dev)
1677                 dev = ip_dev_check(daddr);
1678 
1679         /*
1680          *      Unknown device.
1681          */
1682          
1683         if (dev == NULL)
1684                 return -ENETUNREACH;
1685 
1686         /*
1687          *      Add the route
1688          */
1689 
1690         rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window, r->rt_irtt, metric);
1691         return 0;
1692 }
1693 
1694 
1695 /*
1696  *      Remove a route, as requested by the user.
1697  */
1698 
1699 static int rt_kill(struct rtentry *r)
     /* [previous][next][first][last][top][bottom][index][help] */
1700 {
1701         struct sockaddr_in *trg;
1702         struct sockaddr_in *msk;
1703         struct sockaddr_in *gtw;
1704         char *devname;
1705         int err;
1706         struct device * dev = NULL;
1707 
1708         trg = (struct sockaddr_in *) &r->rt_dst;
1709         msk = (struct sockaddr_in *) &r->rt_genmask;
1710         gtw = (struct sockaddr_in *) &r->rt_gateway;
1711         if ((devname = r->rt_dev) != NULL) 
1712         {
1713                 err = getname(devname, &devname);
1714                 if (err)
1715                         return err;
1716                 dev = dev_get(devname);
1717                 putname(devname);
1718                 if (!dev)
1719                         return -ENODEV;
1720         }
1721         /*
1722          * metric can become negative here if it wasn't filled in
1723          * but that's a fortunate accident; we really use that in rt_del.
1724          */
1725         err=rt_del((__u32)trg->sin_addr.s_addr, (__u32)msk->sin_addr.s_addr, dev,
1726                 (__u32)gtw->sin_addr.s_addr, r->rt_flags, r->rt_metric - 1);
1727         return err;
1728 }
1729 
1730 /*
1731  *      Handle IP routing ioctl calls. These are used to manipulate the routing tables
1732  */
1733  
1734 int ip_rt_ioctl(unsigned int cmd, void *arg)
     /* [previous][next][first][last][top][bottom][index][help] */
1735 {
1736         int err;
1737         struct rtentry rt;
1738 
1739         switch(cmd) 
1740         {
1741                 case SIOCADDRT:         /* Add a route */
1742                 case SIOCDELRT:         /* Delete a route */
1743                         if (!suser())
1744                                 return -EPERM;
1745                         err=verify_area(VERIFY_READ, arg, sizeof(struct rtentry));
1746                         if (err)
1747                                 return err;
1748                         memcpy_fromfs(&rt, arg, sizeof(struct rtentry));
1749                         return (cmd == SIOCDELRT) ? rt_kill(&rt) : ip_rt_new(&rt);
1750         }
1751 
1752         return -EINVAL;
1753 }
1754 
1755 void ip_rt_advice(struct rtable **rp, int advice)
     /* [previous][next][first][last][top][bottom][index][help] */
1756 {
1757         /* Thanks! */
1758         return;
1759 }
1760 

/* [previous][next][first][last][top][bottom][index][help] */