root/net/ipv4/route.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rt_logmask
  2. rt_mask
  3. fz_hash_code
  4. fib_free_node
  5. fib_lookup_gateway
  6. fib_lookup_local
  7. fib_lookup
  8. get_gw_dev
  9. default_mask
  10. guess_mask
  11. bad_mask
  12. fib_del_list
  13. fib_del_1
  14. fib_create_info
  15. fib_add_1
  16. rt_flush_list
  17. fib_flush_1
  18. rt_get_info
  19. rt_cache_get_info
  20. rt_free
  21. rt_kick_free_queue
  22. ip_rt_run_bh
  23. ip_rt_check_expire
  24. rt_redirect_1
  25. rt_cache_flush
  26. rt_garbage_collect_1
  27. rt_req_enqueue
  28. rt_req_dequeue
  29. rt_kick_backlog
  30. rt_del
  31. rt_add
  32. ip_rt_flush
  33. ip_rt_redirect
  34. rt_garbage_collect
  35. rt_cache_add
  36. ip_rt_slow_route
  37. ip_rt_put
  38. ip_rt_route
  39. ip_rt_new
  40. rt_kill
  41. ip_rt_ioctl
  42. ip_rt_advice

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              ROUTE - implementation of the IP router.
   7  *
   8  * Version:     @(#)route.c     1.0.14  05/31/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  13  *              Linus Torvalds, <Linus.Torvalds@helsinki.fi>
  14  *
  15  * Fixes:
  16  *              Alan Cox        :       Verify area fixes.
  17  *              Alan Cox        :       cli() protects routing changes
  18  *              Rui Oliveira    :       ICMP routing table updates
  19  *              (rco@di.uminho.pt)      Routing table insertion and update
  20  *              Linus Torvalds  :       Rewrote bits to be sensible
  21  *              Alan Cox        :       Added BSD route gw semantics
  22  *              Alan Cox        :       Super /proc >4K 
  23  *              Alan Cox        :       MTU in route table
  24  *              Alan Cox        :       MSS actually. Also added the window
  25  *                                      clamper.
  26  *              Sam Lantinga    :       Fixed route matching in rt_del()
  27  *              Alan Cox        :       Routing cache support.
  28  *              Alan Cox        :       Removed compatibility cruft.
  29  *              Alan Cox        :       RTF_REJECT support.
  30  *              Alan Cox        :       TCP irtt support.
  31  *              Jonathan Naylor :       Added Metric support.
  32  *      Miquel van Smoorenburg  :       BSD API fixes.
  33  *      Miquel van Smoorenburg  :       Metrics.
  34  *              Alan Cox        :       Use __u32 properly
  35  *              Alan Cox        :       Aligned routing errors more closely with BSD
  36  *                                      our system is still very different.
  37  *              Alan Cox        :       Faster /proc handling
  38  *      Alexey Kuznetsov        :       Massive rework to support tree based routing,
  39  *                                      routing caches and better behaviour.
  40  *              
  41  *              Olaf Erb        :       irtt wasnt being copied right.
  42  *
  43  *              This program is free software; you can redistribute it and/or
  44  *              modify it under the terms of the GNU General Public License
  45  *              as published by the Free Software Foundation; either version
  46  *              2 of the License, or (at your option) any later version.
  47  */
  48 
  49 #include <linux/config.h>
  50 #include <asm/segment.h>
  51 #include <asm/system.h>
  52 #include <asm/bitops.h>
  53 #include <linux/types.h>
  54 #include <linux/kernel.h>
  55 #include <linux/sched.h>
  56 #include <linux/mm.h>
  57 #include <linux/string.h>
  58 #include <linux/socket.h>
  59 #include <linux/sockios.h>
  60 #include <linux/errno.h>
  61 #include <linux/in.h>
  62 #include <linux/inet.h>
  63 #include <linux/netdevice.h>
  64 #include <net/ip.h>
  65 #include <net/protocol.h>
  66 #include <net/route.h>
  67 #include <net/tcp.h>
  68 #include <linux/skbuff.h>
  69 #include <net/sock.h>
  70 #include <net/icmp.h>
  71 #include <net/netlink.h>
  72 
  73 /*
  74  * Forwarding Information Base definitions.
  75  */
  76 
  77 struct fib_node
  78 {
  79         struct fib_node         *fib_next;
  80         __u32                   fib_dst;
  81         unsigned long           fib_use;
  82         struct fib_info         *fib_info;
  83         short                   fib_metric;
  84         unsigned char           fib_tos;
  85 };
  86 
  87 /*
  88  * This structure contains data shared by many of routes.
  89  */     
  90 
  91 struct fib_info
  92 {
  93         struct fib_info         *fib_next;
  94         struct fib_info         *fib_prev;
  95         __u32                   fib_gateway;
  96         struct device           *fib_dev;
  97         int                     fib_refcnt;
  98         unsigned long           fib_window;
  99         unsigned short          fib_flags;
 100         unsigned short          fib_mtu;
 101         unsigned short          fib_irtt;
 102 };
 103 
 104 struct fib_zone
 105 {
 106         struct fib_zone *fz_next;
 107         struct fib_node **fz_hash_table;
 108         struct fib_node *fz_list;
 109         int             fz_nent;
 110         int             fz_logmask;
 111         __u32           fz_mask;
 112 };
 113 
 114 static struct fib_zone  *fib_zones[33];
 115 static struct fib_zone  *fib_zone_list;
 116 static struct fib_node  *fib_loopback = NULL;
 117 static struct fib_info  *fib_info_list;
 118 
 119 /*
 120  * Backlogging.
 121  */
 122 
 123 #define RT_BH_REDIRECT          0
 124 #define RT_BH_GARBAGE_COLLECT   1
 125 #define RT_BH_FREE              2
 126 
 127 struct rt_req
 128 {
 129         struct rt_req * rtr_next;
 130         struct device *dev;
 131         __u32 dst;
 132         __u32 gw;
 133         unsigned char tos;
 134 };
 135 
 136 int                     ip_rt_lock;
 137 unsigned                ip_rt_bh_mask;
 138 static struct rt_req    *rt_backlog;
 139 
 140 /*
 141  * Route cache.
 142  */
 143 
 144 struct rtable           *ip_rt_hash_table[RT_HASH_DIVISOR];
 145 static int              rt_cache_size;
 146 static struct rtable    *rt_free_queue;
 147 struct wait_queue       *rt_wait;
 148 
 149 static void rt_kick_backlog(void);
 150 static void rt_cache_add(unsigned hash, struct rtable * rth);
 151 static void rt_cache_flush(void);
 152 static void rt_garbage_collect_1(void);
 153 
 154 /* 
 155  * Evaluate mask length.
 156  */
 157 
 158 static __inline__ int rt_logmask(__u32 mask)
     /* [previous][next][first][last][top][bottom][index][help] */
 159 {
 160         if (!(mask = ntohl(mask)))
 161                 return 32;
 162         return ffz(~mask);
 163 }
 164 
 165 /* 
 166  * Create mask from length.
 167  */
 168 
 169 static __inline__ __u32 rt_mask(int logmask)
     /* [previous][next][first][last][top][bottom][index][help] */
 170 {
 171         if (logmask >= 32)
 172                 return 0;
 173         return htonl(~((1<<logmask)-1));
 174 }
 175 
 176 static __inline__ unsigned fz_hash_code(__u32 dst, int logmask)
     /* [previous][next][first][last][top][bottom][index][help] */
 177 {
 178         return ip_rt_hash_code(ntohl(dst)>>logmask);
 179 }
 180 
 181 /*
 182  * Free FIB node.
 183  */
 184 
 185 static void fib_free_node(struct fib_node * f)
     /* [previous][next][first][last][top][bottom][index][help] */
 186 {
 187         struct fib_info * fi = f->fib_info;
 188         if (!--fi->fib_refcnt)
 189         {
 190 #if RT_CACHE_DEBUG >= 2
 191                 printk("fib_free_node: fi %08x/%s is free\n", fi->fib_gateway, fi->fib_dev->name);
 192 #endif
 193                 if (fi->fib_next)
 194                         fi->fib_next->fib_prev = fi->fib_prev;
 195                 if (fi->fib_prev)
 196                         fi->fib_prev->fib_next = fi->fib_next;
 197                 if (fi == fib_info_list)
 198                         fib_info_list = fi->fib_next;
 199         }
 200         kfree_s(f, sizeof(struct fib_node));
 201 }
 202 
 203 /*
 204  * Find gateway route by address.
 205  */
 206 
 207 static struct fib_node * fib_lookup_gateway(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 208 {
 209         struct fib_zone * fz;
 210         struct fib_node * f;
 211 
 212         for (fz = fib_zone_list; fz; fz = fz->fz_next) 
 213         {
 214                 if (fz->fz_hash_table)
 215                         f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 216                 else
 217                         f = fz->fz_list;
 218                 
 219                 for ( ; f; f = f->fib_next)
 220                 {
 221                         if ((dst ^ f->fib_dst) & fz->fz_mask)
 222                                 continue;
 223                         if (f->fib_info->fib_flags & RTF_GATEWAY)
 224                                 return NULL;
 225                         return f;
 226                 }
 227         }
 228         return NULL;
 229 }
 230 
 231 /*
 232  * Find local route by address.
 233  * FIXME: I use "longest match" principle. If destination
 234  *        has some non-local route, I'll not search shorter matches.
 235  *        It's possible, I'm wrong, but I wanted to prevent following
 236  *        situation:
 237  *      route add 193.233.7.128 netmask 255.255.255.192 gw xxxxxx
 238  *      route add 193.233.7.0   netmask 255.255.255.0 eth1
 239  *        (Two ethernets connected by serial line, one is small and other is large)
 240  *        Host 193.233.7.129 is locally unreachable,
 241  *        but old (<=1.3.37) code will send packets destined for it to eth1.
 242  *
 243  */
 244 
 245 static struct fib_node * fib_lookup_local(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 246 {
 247         struct fib_zone * fz;
 248         struct fib_node * f;
 249 
 250         for (fz = fib_zone_list; fz; fz = fz->fz_next) 
 251         {
 252                 int longest_match_found = 0;
 253 
 254                 if (fz->fz_hash_table)
 255                         f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 256                 else
 257                         f = fz->fz_list;
 258                 
 259                 for ( ; f; f = f->fib_next)
 260                 {
 261                         if ((dst ^ f->fib_dst) & fz->fz_mask)
 262                                 continue;
 263                         if (!(f->fib_info->fib_flags & RTF_GATEWAY))
 264                                 return f;
 265                         longest_match_found = 1;
 266                 }
 267                 if (longest_match_found)
 268                         return NULL;
 269         }
 270         return NULL;
 271 }
 272 
 273 /*
 274  * Main lookup routine.
 275  *      IMPORTANT NOTE: this algorithm has small difference from <=1.3.37 visible
 276  *      by user. It doesn't route non-CIDR broadcasts by default.
 277  *
 278  *      F.e.
 279  *              ifconfig eth0 193.233.7.65 netmask 255.255.255.192 broadcast 193.233.7.255
 280  *      is valid, but if you really are not able (not allowed, do not want) to
 281  *      use CIDR compliant broadcast 193.233.7.127, you should add host route:
 282  *              route add -host 193.233.7.255 eth0
 283  */
 284 
 285 static struct fib_node * fib_lookup(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 286 {
 287         struct fib_zone * fz;
 288         struct fib_node * f;
 289 
 290         for (fz = fib_zone_list; fz; fz = fz->fz_next) 
 291         {
 292                 if (fz->fz_hash_table)
 293                         f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 294                 else
 295                         f = fz->fz_list;
 296                 
 297                 for ( ; f; f = f->fib_next)
 298                 {
 299                         if ((dst ^ f->fib_dst) & fz->fz_mask)
 300                                 continue;
 301                         return f;
 302                 }
 303         }
 304         return NULL;
 305 }
 306 
 307 static __inline__ struct device * get_gw_dev(__u32 gw)
     /* [previous][next][first][last][top][bottom][index][help] */
 308 {
 309         struct fib_node * f;
 310         f = fib_lookup_gateway(gw);
 311         if (f)
 312                 return f->fib_info->fib_dev;
 313         return NULL;
 314 }
 315 
 316 /*
 317  *      Used by 'rt_add()' when we can't get the netmask any other way..
 318  *
 319  *      If the lower byte or two are zero, we guess the mask based on the
 320  *      number of zero 8-bit net numbers, otherwise we use the "default"
 321  *      masks judging by the destination address and our device netmask.
 322  */
 323  
 324 static __u32 unsigned long default_mask(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 325 {
 326         dst = ntohl(dst);
 327         if (IN_CLASSA(dst))
 328                 return htonl(IN_CLASSA_NET);
 329         if (IN_CLASSB(dst))
 330                 return htonl(IN_CLASSB_NET);
 331         return htonl(IN_CLASSC_NET);
 332 }
 333 
 334 
 335 /*
 336  *      If no mask is specified then generate a default entry.
 337  */
 338 
 339 static __u32 guess_mask(__u32 dst, struct device * dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 340 {
 341         __u32 mask;
 342 
 343         if (!dst)
 344                 return 0;
 345         mask = default_mask(dst);
 346         if ((dst ^ dev->pa_addr) & mask)
 347                 return mask;
 348         return dev->pa_mask;
 349 }
 350 
 351 
 352 /*
 353  *      Check if a mask is acceptable.
 354  */
 355  
 356 static inline int bad_mask(__u32 mask, __u32 addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 357 {
 358         if (addr & (mask = ~mask))
 359                 return 1;
 360         mask = ntohl(mask);
 361         if (mask & (mask+1))
 362                 return 1;
 363         return 0;
 364 }
 365 
 366 
 367 static int fib_del_list(struct fib_node **fp, __u32 dst,
     /* [previous][next][first][last][top][bottom][index][help] */
 368                 struct device * dev, __u32 gtw, short flags, short metric, __u32 mask)
 369 {
 370         struct fib_node *f;
 371         int found=0;
 372 
 373         while((f = *fp) != NULL) 
 374         {
 375                 struct fib_info * fi = f->fib_info;
 376 
 377                 /*
 378                  *      Make sure the destination and netmask match.
 379                  *      metric, gateway and device are also checked
 380                  *      if they were specified.
 381                  */
 382                 if (f->fib_dst != dst ||
 383                     (gtw && fi->fib_gateway != gtw) ||
 384                     (metric >= 0 && f->fib_metric != metric) ||
 385                     (dev && fi->fib_dev != dev) )
 386                 {
 387                         fp = &f->fib_next;
 388                         continue;
 389                 }
 390                 cli();
 391                 *fp = f->fib_next;
 392                 if (fib_loopback == f)
 393                         fib_loopback = NULL;
 394                 sti();
 395                 ip_netlink_msg(RTMSG_DELROUTE, dst, gtw, mask, flags, metric, fi->fib_dev->name);
 396                 fib_free_node(f);
 397                 found++;
 398         }
 399         return found;
 400 }
 401 
 402 static __inline__ int fib_del_1(__u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
 403                 struct device * dev, __u32 gtw, short flags, short metric)
 404 {
 405         struct fib_node **fp;
 406         struct fib_zone *fz;
 407         int found=0;
 408 
 409         if (!mask)
 410         {
 411                 for (fz=fib_zone_list; fz; fz = fz->fz_next)
 412                 {
 413                         int tmp;
 414                         if (fz->fz_hash_table)
 415                                 fp = &fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 416                         else
 417                                 fp = &fz->fz_list;
 418 
 419                         tmp = fib_del_list(fp, dst, dev, gtw, flags, metric, mask);
 420                         fz->fz_nent -= tmp;
 421                         found += tmp;
 422                 }
 423         } 
 424         else
 425         {
 426                 if ((fz = fib_zones[rt_logmask(mask)]) != NULL)
 427                 {
 428                         if (fz->fz_hash_table)
 429                                 fp = &fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 430                         else
 431                                 fp = &fz->fz_list;
 432         
 433                         found = fib_del_list(fp, dst, dev, gtw, flags, metric, mask);
 434                         fz->fz_nent -= found;
 435                 }
 436         }
 437 
 438         if (found)
 439         {
 440                 rt_cache_flush();
 441                 return 0;
 442         }
 443         return -ESRCH;
 444 }
 445 
 446 
 447 static struct fib_info * fib_create_info(__u32 gw, struct device * dev,
     /* [previous][next][first][last][top][bottom][index][help] */
 448                                          unsigned short flags, unsigned short mss,
 449                                          unsigned long window, unsigned short irtt)
 450 {
 451         struct fib_info * fi;
 452 
 453         if (!(flags & RTF_MSS))
 454         {
 455                 mss = dev->mtu;
 456 #ifdef CONFIG_NO_PATH_MTU_DISCOVERY
 457                 /*
 458                  *      If MTU was not specified, use default.
 459                  *      If you want to increase MTU for some net (local subnet)
 460                  *      use "route add .... mss xxx".
 461                  *
 462                  *      The MTU isnt currently always used and computed as it
 463                  *      should be as far as I can tell. [Still verifying this is right]
 464                  */
 465                 if ((flags & RTF_GATEWAY) && mss > 576)
 466                         mss = 576;
 467 #endif
 468         }
 469         if (!(flags & RTF_WINDOW))
 470                 window = 0;
 471         if (!(flags & RTF_IRTT))
 472                 irtt = 0;
 473 
 474         for (fi=fib_info_list; fi; fi = fi->fib_next)
 475         {
 476                 if (fi->fib_gateway != gw ||
 477                     fi->fib_dev != dev  ||
 478                     fi->fib_flags != flags ||
 479                     fi->fib_mtu != mss ||
 480                     fi->fib_window != window ||
 481                     fi->fib_irtt != irtt)
 482                         continue;
 483                 fi->fib_refcnt++;
 484 #if RT_CACHE_DEBUG >= 2
 485                 printk("fib_create_info: fi %08x/%s is duplicate\n", fi->fib_gateway, fi->fib_dev->name);
 486 #endif
 487                 return fi;
 488         }
 489         fi = (struct fib_info*)kmalloc(sizeof(struct fib_info), GFP_KERNEL);
 490         if (!fi)
 491                 return NULL;
 492         memset(fi, 0, sizeof(struct fib_info));
 493         fi->fib_flags = flags;
 494         fi->fib_dev = dev;
 495         fi->fib_gateway = gw;
 496         fi->fib_mtu = mss;
 497         fi->fib_window = window;
 498         fi->fib_refcnt++;
 499         fi->fib_next = fib_info_list;
 500         fi->fib_prev = NULL;
 501         fi->fib_irtt = irtt;
 502         if (fib_info_list)
 503                 fib_info_list->fib_prev = fi;
 504         fib_info_list = fi;
 505 #if RT_CACHE_DEBUG >= 2
 506         printk("fib_create_info: fi %08x/%s is created\n", fi->fib_gateway, fi->fib_dev->name);
 507 #endif
 508         return fi;
 509 }
 510 
 511 
 512 static __inline__ void fib_add_1(short flags, __u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
 513         __u32 gw, struct device *dev, unsigned short mss,
 514         unsigned long window, unsigned short irtt, short metric)
 515 {
 516         struct fib_node *f, *f1;
 517         struct fib_node **fp;
 518         struct fib_node **dup_fp = NULL;
 519         struct fib_zone * fz;
 520         struct fib_info * fi;
 521         int logmask;
 522 
 523         if (flags & RTF_HOST) 
 524                 mask = 0xffffffff;
 525         /*
 526          * If mask is not specified, try to guess it.
 527          */
 528         else if (!mask)
 529         {
 530                 if (!((dst ^ dev->pa_addr) & dev->pa_mask)) 
 531                 {
 532                         mask = dev->pa_mask;
 533                         flags &= ~RTF_GATEWAY;
 534                         if (flags & RTF_DYNAMIC) 
 535                         {
 536                                 printk("Dynamic route to my own net rejected\n");
 537                                 return;
 538                         }
 539                 } 
 540                 else
 541                         mask = guess_mask(dst, dev);
 542                 dst &= mask;
 543         }
 544         
 545         /*
 546          *      A gateway must be reachable and not a local address
 547          */
 548          
 549         if (gw == dev->pa_addr)
 550                 flags &= ~RTF_GATEWAY;
 551                 
 552         if (flags & RTF_GATEWAY) 
 553         {
 554                 /*
 555                  *      Don't try to add a gateway we can't reach.. 
 556                  */
 557                  
 558                 if (dev != get_gw_dev(gw))
 559                         return;
 560                         
 561                 flags |= RTF_GATEWAY;
 562         } 
 563         else
 564                 gw = 0;
 565                 
 566         /*
 567          *      Allocate an entry and fill it in.
 568          */
 569          
 570         f = (struct fib_node *) kmalloc(sizeof(struct fib_node), GFP_KERNEL);
 571         if (f == NULL)
 572                 return;
 573 
 574         memset(f, 0, sizeof(struct fib_node));
 575         f->fib_dst = dst;
 576         f->fib_metric = metric;
 577         f->fib_tos    = 0;
 578 
 579         if  ((fi = fib_create_info(gw, dev, flags, mss, window, irtt)) == NULL)
 580         {
 581                 kfree_s(f, sizeof(struct fib_node));
 582                 return;
 583         }
 584         f->fib_info = fi;
 585 
 586         logmask = rt_logmask(mask);
 587         fz = fib_zones[logmask];
 588 
 589 
 590         if (!fz)
 591         {
 592                 int i;
 593                 fz = kmalloc(sizeof(struct fib_zone), GFP_KERNEL);
 594                 if (!fz)
 595                 {
 596                         fib_free_node(f);
 597                         return;
 598                 }
 599                 memset(fz, 0, sizeof(struct fib_zone));
 600                 fz->fz_logmask = logmask;
 601                 fz->fz_mask = mask;
 602                 for (i=logmask-1; i>=0; i--)
 603                         if (fib_zones[i])
 604                                 break;
 605                 cli();
 606                 if (i<0)
 607                 {
 608                         fz->fz_next = fib_zone_list;
 609                         fib_zone_list = fz;
 610                 }
 611                 else
 612                 {
 613                         fz->fz_next = fib_zones[i]->fz_next;
 614                         fib_zones[i]->fz_next = fz;
 615                 }
 616                 fib_zones[logmask] = fz;
 617                 sti();
 618         }
 619 
 620         /*
 621          * If zone overgrows RTZ_HASHING_LIMIT, create hash table.
 622          */
 623 
 624         if (fz->fz_nent >= RTZ_HASHING_LIMIT && !fz->fz_hash_table && logmask<32)
 625         {
 626                 struct fib_node ** ht;
 627 #if RT_CACHE_DEBUG
 628                 printk("fib_add_1: hashing for zone %d started\n", logmask);
 629 #endif
 630                 ht = kmalloc(RTZ_HASH_DIVISOR*sizeof(struct rtable*), GFP_KERNEL);
 631 
 632                 if (ht)
 633                 {
 634                         memset(ht, 0, RTZ_HASH_DIVISOR*sizeof(struct fib_node*));
 635                         cli();
 636                         f1 = fz->fz_list;
 637                         while (f1)
 638                         {
 639                                 struct fib_node * next;
 640                                 unsigned hash = fz_hash_code(f1->fib_dst, logmask);
 641                                 next = f1->fib_next;
 642                                 f1->fib_next = ht[hash];
 643                                 ht[hash] = f1;
 644                                 f1 = next;
 645                         }
 646                         fz->fz_list = NULL;
 647                         fz->fz_hash_table = ht; 
 648                         sti();
 649                 }
 650         }
 651 
 652         if (fz->fz_hash_table)
 653                 fp = &fz->fz_hash_table[fz_hash_code(dst, logmask)];
 654         else
 655                 fp = &fz->fz_list;
 656 
 657         /*
 658          * Scan list to find the first route with the same destination
 659          */
 660         while ((f1 = *fp) != NULL)
 661         {
 662                 if (f1->fib_dst == dst)
 663                         break;
 664                 fp = &f1->fib_next;
 665         }
 666 
 667         /*
 668          * Find route with the same destination and less (or equal) metric.
 669          */
 670         while ((f1 = *fp) != NULL && f1->fib_dst == dst)
 671         {
 672                 if (f1->fib_metric >= metric)
 673                         break;
 674                 /*
 675                  *      Record route with the same destination and gateway,
 676                  *      but less metric. We'll delete it 
 677                  *      after instantiation of new route.
 678                  */
 679                 if (f1->fib_info->fib_gateway == gw)
 680                         dup_fp = fp;
 681                 fp = &f1->fib_next;
 682         }
 683 
 684         /*
 685          * Is it already present?
 686          */
 687 
 688         if (f1 && f1->fib_metric == metric && f1->fib_info == fi)
 689         {
 690                 fib_free_node(f);
 691                 return;
 692         }
 693         
 694         /*
 695          * Insert new entry to the list.
 696          */
 697 
 698         cli();
 699         f->fib_next = f1;
 700         *fp = f;
 701         if (!fib_loopback && (fi->fib_dev->flags & IFF_LOOPBACK))
 702                 fib_loopback = f;
 703         sti();
 704         fz->fz_nent++;
 705         ip_netlink_msg(RTMSG_NEWROUTE, dst, gw, mask, flags, metric, fi->fib_dev->name);
 706 
 707         /*
 708          *      Delete route with the same destination and gateway.
 709          *      Note that we should have at most one such route.
 710          */
 711         if (dup_fp)
 712                 fp = dup_fp;
 713         else
 714                 fp = &f->fib_next;
 715 
 716         while ((f1 = *fp) != NULL && f1->fib_dst == dst)
 717         {
 718                 if (f1->fib_info->fib_gateway == gw)
 719                 {
 720                         cli();
 721                         *fp = f1->fib_next;
 722                         if (fib_loopback == f1)
 723                                 fib_loopback = NULL;
 724                         sti();
 725                         ip_netlink_msg(RTMSG_DELROUTE, dst, gw, mask, flags, metric, f1->fib_info->fib_dev->name);
 726                         fib_free_node(f1);
 727                         fz->fz_nent--;
 728                         break;
 729                 }
 730                 fp = &f1->fib_next;
 731         }
 732         rt_cache_flush();
 733         return;
 734 }
 735 
 736 static int rt_flush_list(struct fib_node ** fp, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 737 {
 738         int found = 0;
 739         struct fib_node *f;
 740 
 741         while ((f = *fp) != NULL) {
 742                 if (f->fib_info->fib_dev != dev) {
 743                         fp = &f->fib_next;
 744                         continue;
 745                 }
 746                 cli();
 747                 *fp = f->fib_next;
 748                 if (fib_loopback == f)
 749                         fib_loopback = NULL;
 750                 sti();
 751                 fib_free_node(f);
 752                 found++;
 753         }
 754         return found;
 755 }
 756 
 757 static __inline__ void fib_flush_1(struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 758 {
 759         struct fib_zone *fz;
 760         int found = 0;
 761 
 762         for (fz = fib_zone_list; fz; fz = fz->fz_next)
 763         {
 764                 if (fz->fz_hash_table)
 765                 {
 766                         int i;
 767                         int tmp = 0;
 768                         for (i=0; i<RTZ_HASH_DIVISOR; i++)
 769                                 tmp += rt_flush_list(&fz->fz_hash_table[i], dev);
 770                         fz->fz_nent -= tmp;
 771                         found += tmp;
 772                 }
 773                 else
 774                 {
 775                         int tmp;
 776                         tmp = rt_flush_list(&fz->fz_list, dev);
 777                         fz->fz_nent -= tmp;
 778                         found += tmp;
 779                 }
 780         }
 781                 
 782         if (found)
 783                 rt_cache_flush();
 784 }
 785 
 786 
 787 /* 
 788  *      Called from the PROCfs module. This outputs /proc/net/route.
 789  *
 790  *      We preserve the old format but pad the buffers out. This means that
 791  *      we can spin over the other entries as we read them. Remember the
 792  *      gated BGP4 code could need to read 60,000+ routes on occasion (thats
 793  *      about 7Mb of data). To do that ok we will need to also cache the
 794  *      last route we got to (reads will generally be following on from
 795  *      one another without gaps).
 796  */
 797  
 798 int rt_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
     /* [previous][next][first][last][top][bottom][index][help] */
 799 {
 800         struct fib_zone *fz;
 801         struct fib_node *f;
 802         int len=0;
 803         off_t pos=0;
 804         char temp[129];
 805         int i;
 806         
 807         pos = 128;
 808 
 809         if (offset<128)
 810         {
 811                 sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT");
 812                 len = 128;
 813         }
 814         
 815         while  (ip_rt_lock)
 816                 sleep_on(&rt_wait);
 817         ip_rt_fast_lock();
 818 
 819         for (fz=fib_zone_list; fz; fz = fz->fz_next)
 820         {
 821                 int maxslot;
 822                 struct fib_node ** fp;
 823 
 824                 if (fz->fz_nent == 0)
 825                         continue;
 826 
 827                 if (pos + 128*fz->fz_nent <= offset)
 828                 {
 829                         pos += 128*fz->fz_nent;
 830                         len = 0;
 831                         continue;
 832                 }
 833 
 834                 if (fz->fz_hash_table)
 835                 {
 836                         maxslot = RTZ_HASH_DIVISOR;
 837                         fp      = fz->fz_hash_table;
 838                 }
 839                 else
 840                 {
 841                         maxslot = 1;
 842                         fp      = &fz->fz_list;
 843                 }
 844                         
 845                 for (i=0; i < maxslot; i++, fp++)
 846                 {
 847                         
 848                         for (f = *fp; f; f = f->fib_next) 
 849                         {
 850                                 struct fib_info * fi;
 851                                 /*
 852                                  *      Spin through entries until we are ready
 853                                  */
 854                                 pos += 128;
 855 
 856                                 if (pos <= offset)
 857                                 {
 858                                         len=0;
 859                                         continue;
 860                                 }
 861                                         
 862                                 fi = f->fib_info;
 863                                 sprintf(temp, "%s\t%08lX\t%08lX\t%02X\t%d\t%lu\t%d\t%08lX\t%d\t%lu\t%u",
 864                                         fi->fib_dev->name, (unsigned long)f->fib_dst, (unsigned long)fi->fib_gateway,
 865                                         fi->fib_flags, 0, f->fib_use, f->fib_metric,
 866                                         (unsigned long)fz->fz_mask, (int)fi->fib_mtu, fi->fib_window, (int)fi->fib_irtt);
 867                                 sprintf(buffer+len,"%-127s\n",temp);
 868 
 869                                 len += 128;
 870                                 if (pos >= offset+length)
 871                                         goto done;
 872                         }
 873                 }
 874         }
 875 
 876 done:
 877         ip_rt_unlock();
 878         wake_up(&rt_wait);
 879         
 880         *start = buffer+len-(pos-offset);
 881         len = pos - offset;
 882         if (len>length)
 883                 len = length;
 884         return len;
 885 }
 886 
 887 int rt_cache_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
     /* [previous][next][first][last][top][bottom][index][help] */
 888 {
 889         int len=0;
 890         off_t pos=0;
 891         char temp[129];
 892         struct rtable *r;
 893         int i;
 894 
 895         pos = 128;
 896 
 897         if (offset<128)
 898         {
 899                 sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tSource\t\tMTU\tWindow\tIRTT\tHH\tARP\n");
 900                 len = 128;
 901         }
 902         
 903         
 904         while  (ip_rt_lock)
 905                 sleep_on(&rt_wait);
 906         ip_rt_fast_lock();
 907 
 908         for (i = 0; i<RT_HASH_DIVISOR; i++)
 909         {
 910                 for (r = ip_rt_hash_table[i]; r; r = r->rt_next) 
 911                 {
 912                         /*
 913                          *      Spin through entries until we are ready
 914                          */
 915                         pos += 128;
 916 
 917                         if (pos <= offset)
 918                         {
 919                                 len = 0;
 920                                 continue;
 921                         }
 922                                         
 923                         sprintf(temp, "%s\t%08lX\t%08lX\t%02X\t%ld\t%lu\t%d\t%08lX\t%d\t%lu\t%u\t%ld\t%1d",
 924                                 r->rt_dev->name, (unsigned long)r->rt_dst, (unsigned long)r->rt_gateway,
 925                                 r->rt_flags, r->rt_refcnt, r->rt_use, 0,
 926                                 (unsigned long)r->rt_src, (int)r->rt_mtu, r->rt_window, (int)r->rt_irtt, r->rt_hh ? r->rt_hh->hh_refcnt : -1, r->rt_hh ? r->rt_hh->hh_uptodate : 0);
 927                         sprintf(buffer+len,"%-127s\n",temp);
 928                         len += 128;
 929                         if (pos >= offset+length)
 930                                 goto done;
 931                 }
 932         }
 933 
 934 done:
 935         ip_rt_unlock();
 936         wake_up(&rt_wait);
 937         
 938         *start = buffer+len-(pos-offset);
 939         len = pos-offset;
 940         if (len>length)
 941                 len = length;
 942         return len;
 943 }
 944 
 945 
 946 static void rt_free(struct rtable * rt)
     /* [previous][next][first][last][top][bottom][index][help] */
 947 {
 948         unsigned long flags;
 949 
 950         save_flags(flags);
 951         cli();
 952         if (!rt->rt_refcnt)
 953         {
 954                 struct hh_cache * hh = rt->rt_hh;
 955                 rt->rt_hh = NULL;
 956                 if (hh && !--hh->hh_refcnt)
 957                 {
 958                         restore_flags(flags);
 959                         kfree_s(hh, sizeof(struct hh_cache));
 960                 }
 961                 restore_flags(flags);
 962                 kfree_s(rt, sizeof(struct rt_table));
 963                 return;
 964         }
 965         rt->rt_next = rt_free_queue;
 966         rt->rt_flags &= ~RTF_UP;
 967         rt_free_queue = rt;
 968         ip_rt_bh_mask |= RT_BH_FREE;
 969 #if RT_CACHE_DEBUG >= 2
 970         printk("rt_free: %08x\n", rt->rt_dst);
 971 #endif
 972         restore_flags(flags);
 973 }
 974 
 975 /*
 976  * RT "bottom half" handlers. Called with masked inetrrupts.
 977  */
 978 
 979 static __inline__ void rt_kick_free_queue(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 980 {
 981         struct rtable *rt, **rtp;
 982 
 983         rtp = &rt_free_queue;
 984 
 985         while ((rt = *rtp) != NULL)
 986         {
 987                 if  (!rt->rt_refcnt)
 988                 {
 989                         struct hh_cache * hh = rt->rt_hh;
 990 #if RT_CACHE_DEBUG >= 2
 991                         __u32 daddr = rt->rt_dst;
 992 #endif
 993                         *rtp = rt->rt_next;
 994                         rt->rt_hh = NULL;
 995                         if (hh && !--hh->hh_refcnt)
 996                         {
 997                                 sti();
 998                                 kfree_s(hh, sizeof(struct hh_cache));
 999                         }
1000                         sti();
1001                         kfree_s(rt, sizeof(struct rt_table));
1002 #if RT_CACHE_DEBUG >= 2
1003                         printk("rt_kick_free_queue: %08x is free\n", daddr);
1004 #endif
1005                         cli();
1006                         continue;
1007                 }
1008                 rtp = &rt->rt_next;
1009         }
1010 }
1011 
1012 void ip_rt_run_bh() {
     /* [previous][next][first][last][top][bottom][index][help] */
1013         unsigned long flags;
1014         save_flags(flags);
1015         cli();
1016         if (ip_rt_bh_mask && !ip_rt_lock)
1017         {
1018                 if (ip_rt_bh_mask & RT_BH_REDIRECT)
1019                         rt_kick_backlog();
1020 
1021                 if (ip_rt_bh_mask & RT_BH_GARBAGE_COLLECT)
1022                 {
1023                         ip_rt_fast_lock();
1024                         ip_rt_bh_mask &= ~RT_BH_GARBAGE_COLLECT;
1025                         sti();
1026                         rt_garbage_collect_1();
1027                         cli();
1028                         ip_rt_fast_unlock();
1029                 }
1030 
1031                 if (ip_rt_bh_mask & RT_BH_FREE)
1032                         rt_kick_free_queue();
1033         }
1034         restore_flags(flags);
1035 }
1036 
1037 
1038 void ip_rt_check_expire()
     /* [previous][next][first][last][top][bottom][index][help] */
1039 {
1040         ip_rt_fast_lock();
1041         if (ip_rt_lock == 1)
1042         {
1043                 int i;
1044                 struct rtable *rth, **rthp;
1045                 unsigned long flags;
1046                 unsigned long now = jiffies;
1047 
1048                 save_flags(flags);
1049                 for (i=0; i<RT_HASH_DIVISOR; i++)
1050                 {
1051                         rthp = &ip_rt_hash_table[i];
1052 
1053                         while ((rth = *rthp) != NULL)
1054                         {
1055                                 struct rtable * rth_next = rth->rt_next;
1056 
1057                                 /*
1058                                  * Cleanup aged off entries.
1059                                  */
1060 
1061                                 cli();
1062                                 if (!rth->rt_refcnt && rth->rt_lastuse + RT_CACHE_TIMEOUT < now)
1063                                 {
1064                                         *rthp = rth_next;
1065                                         sti();
1066                                         rt_cache_size--;
1067 #if RT_CACHE_DEBUG >= 2
1068                                         printk("rt_check_expire clean %02x@%08x\n", i, rth->rt_dst);
1069 #endif
1070                                         rt_free(rth);
1071                                         continue;
1072                                 }
1073                                 sti();
1074 
1075                                 if (!rth_next)
1076                                         break;
1077 
1078                                 /*
1079                                  * LRU ordering.
1080                                  */
1081 
1082                                 if (rth->rt_lastuse + RT_CACHE_BUBBLE_THRESHOULD < rth_next->rt_lastuse ||
1083                                     (rth->rt_lastuse < rth_next->rt_lastuse &&
1084                                      rth->rt_use < rth_next->rt_use))
1085                                 {
1086 #if RT_CACHE_DEBUG >= 2
1087                                         printk("rt_check_expire bubbled %02x@%08x<->%08x\n", i, rth->rt_dst, rth_next->rt_dst);
1088 #endif
1089                                         cli();
1090                                         *rthp = rth_next;
1091                                         rth->rt_next = rth_next->rt_next;
1092                                         rth_next->rt_next = rth;
1093                                         sti();
1094                                         rthp = &rth_next->rt_next;
1095                                         continue;
1096                                 }
1097                                 rthp = &rth->rt_next;
1098                         }
1099                 }
1100                 restore_flags(flags);
1101                 rt_kick_free_queue();
1102         }
1103         ip_rt_unlock();
1104 }
1105 
1106 static void rt_redirect_1(__u32 dst, __u32 gw, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
1107 {
1108         struct rtable *rt;
1109         unsigned long hash = ip_rt_hash_code(dst);
1110 
1111         if (gw == dev->pa_addr)
1112                 return;
1113         if (dev != get_gw_dev(gw))
1114                 return;
1115         rt = (struct rtable *) kmalloc(sizeof(struct rtable), GFP_ATOMIC);
1116         if (rt == NULL) 
1117                 return;
1118         memset(rt, 0, sizeof(struct rtable));
1119         rt->rt_flags = RTF_DYNAMIC | RTF_MODIFIED | RTF_HOST | RTF_GATEWAY | RTF_UP;
1120         rt->rt_dst = dst;
1121         rt->rt_dev = dev;
1122         rt->rt_gateway = gw;
1123         rt->rt_src = dev->pa_addr;
1124         rt->rt_mtu = dev->mtu;
1125 #ifdef CONFIG_NO_PATH_MTU_DISCOVERY
1126         if (dev->mtu > 576)
1127                 rt->rt_mtu = 576;
1128 #endif
1129         rt->rt_lastuse  = jiffies;
1130         rt->rt_refcnt  = 1;
1131         rt_cache_add(hash, rt);
1132         ip_rt_put(rt);
1133         return;
1134 }
1135 
1136 static void rt_cache_flush(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1137 {
1138         int i;
1139         struct rtable * rth, * next;
1140 
1141         for (i=0; i<RT_HASH_DIVISOR; i++)
1142         {
1143                 int nr=0;
1144 
1145                 cli();
1146                 if (!(rth = ip_rt_hash_table[i]))
1147                 {
1148                         sti();
1149                         continue;
1150                 }
1151 
1152                 ip_rt_hash_table[i] = NULL;
1153                 sti();
1154 
1155                 for (; rth; rth=next)
1156                 {
1157                         next = rth->rt_next;
1158                         rt_cache_size--;
1159                         nr++;
1160                         rth->rt_next = NULL;
1161                         rt_free(rth);
1162                 }
1163 #if RT_CACHE_DEBUG >= 2
1164                 if (nr > 0)
1165                         printk("rt_cache_flush: %d@%02x\n", nr, i);
1166 #endif
1167         }
1168 #if RT_CACHE_DEBUG >= 1
1169         if (rt_cache_size)
1170         {
1171                 printk("rt_cache_flush: bug rt_cache_size=%d\n", rt_cache_size);
1172                 rt_cache_size = 0;
1173         }
1174 #endif
1175 }
1176 
1177 static void rt_garbage_collect_1(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1178 {
1179         int i;
1180         unsigned expire = RT_CACHE_TIMEOUT>>1;
1181         struct rtable * rth, **rthp;
1182         unsigned long now = jiffies;
1183 
1184         for (;;)
1185         {
1186                 for (i=0; i<RT_HASH_DIVISOR; i++)
1187                 {
1188                         if (!ip_rt_hash_table[i])
1189                                 continue;
1190                         for (rthp=&ip_rt_hash_table[i]; (rth=*rthp); rthp=&rth->rt_next)
1191                         {
1192                                 if (rth->rt_lastuse + expire*(rth->rt_refcnt+1) > now)
1193                                         continue;
1194                                 rt_cache_size--;
1195                                 cli();
1196                                 *rthp=rth->rt_next;
1197                                 rth->rt_next = NULL;
1198                                 sti();
1199                                 rt_free(rth);
1200                                 break;
1201                         }
1202                 }
1203                 if (rt_cache_size < RT_CACHE_SIZE_MAX)
1204                         return;
1205                 expire >>= 1;
1206         }
1207 }
1208 
1209 static __inline__ void rt_req_enqueue(struct rt_req **q, struct rt_req *rtr)
     /* [previous][next][first][last][top][bottom][index][help] */
1210 {
1211         unsigned long flags;
1212         struct rt_req * tail;
1213 
1214         save_flags(flags);
1215         cli();
1216         tail = *q;
1217         if (!tail)
1218                 rtr->rtr_next = rtr;
1219         else
1220         {
1221                 rtr->rtr_next = tail->rtr_next;
1222                 tail->rtr_next = rtr;
1223         }
1224         *q = rtr;
1225         restore_flags(flags);
1226         return;
1227 }
1228 
1229 /*
1230  * Caller should mask interrupts.
1231  */
1232 
1233 static __inline__ struct rt_req * rt_req_dequeue(struct rt_req **q)
     /* [previous][next][first][last][top][bottom][index][help] */
1234 {
1235         struct rt_req * rtr;
1236 
1237         if (*q)
1238         {
1239                 rtr = (*q)->rtr_next;
1240                 (*q)->rtr_next = rtr->rtr_next;
1241                 if (rtr->rtr_next == rtr)
1242                         *q = NULL;
1243                 rtr->rtr_next = NULL;
1244                 return rtr;
1245         }
1246         return NULL;
1247 }
1248 
1249 /*
1250    Called with masked interrupts
1251  */
1252 
1253 static void rt_kick_backlog()
     /* [previous][next][first][last][top][bottom][index][help] */
1254 {
1255         if (!ip_rt_lock)
1256         {
1257                 struct rt_req * rtr;
1258 
1259                 ip_rt_fast_lock();
1260 
1261                 while ((rtr = rt_req_dequeue(&rt_backlog)) != NULL)
1262                 {
1263                         sti();
1264                         rt_redirect_1(rtr->dst, rtr->gw, rtr->dev);
1265                         kfree_s(rtr, sizeof(struct rt_req));
1266                         cli();
1267                 }
1268 
1269                 ip_rt_bh_mask &= ~RT_BH_REDIRECT;
1270 
1271                 ip_rt_fast_unlock();
1272         }
1273 }
1274 
1275 /*
1276  * rt_{del|add|flush} called only from USER process. Waiting is OK.
1277  */
1278 
1279 static int rt_del(__u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
1280                 struct device * dev, __u32 gtw, short rt_flags, short metric)
1281 {
1282         int retval;
1283 
1284         while (ip_rt_lock)
1285                 sleep_on(&rt_wait);
1286         ip_rt_fast_lock();
1287         retval = fib_del_1(dst, mask, dev, gtw, rt_flags, metric);
1288         ip_rt_unlock();
1289         wake_up(&rt_wait);
1290         return retval;
1291 }
1292 
1293 static void rt_add(short flags, __u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
1294         __u32 gw, struct device *dev, unsigned short mss,
1295         unsigned long window, unsigned short irtt, short metric)
1296 {
1297         while (ip_rt_lock)
1298                 sleep_on(&rt_wait);
1299         ip_rt_fast_lock();
1300         fib_add_1(flags, dst, mask, gw, dev, mss, window, irtt, metric);
1301         ip_rt_unlock();
1302         wake_up(&rt_wait);
1303 }
1304 
1305 void ip_rt_flush(struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
1306 {
1307         while (ip_rt_lock)
1308                 sleep_on(&rt_wait);
1309         ip_rt_fast_lock();
1310         fib_flush_1(dev);
1311         ip_rt_unlock();
1312         wake_up(&rt_wait);
1313 }
1314 
1315 /*
1316    Called by ICMP module.
1317  */
1318 
1319 void ip_rt_redirect(__u32 src, __u32 dst, __u32 gw, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
1320 {
1321         struct rt_req * rtr;
1322         struct rtable * rt;
1323 
1324         rt = ip_rt_route(dst, 0);
1325         if (!rt)
1326                 return;
1327 
1328         if (rt->rt_gateway != src ||
1329             rt->rt_dev != dev ||
1330             ((gw^dev->pa_addr)&dev->pa_mask) ||
1331             ip_chk_addr(gw))
1332         {
1333                 ip_rt_put(rt);
1334                 return;
1335         }
1336         ip_rt_put(rt);
1337 
1338         ip_rt_fast_lock();
1339         if (ip_rt_lock == 1)
1340         {
1341                 rt_redirect_1(dst, gw, dev);
1342                 ip_rt_unlock();
1343                 return;
1344         }
1345 
1346         rtr = kmalloc(sizeof(struct rt_req), GFP_ATOMIC);
1347         if (rtr)
1348         {
1349                 rtr->dst = dst;
1350                 rtr->gw = gw;
1351                 rtr->dev = dev;
1352                 rt_req_enqueue(&rt_backlog, rtr);
1353                 ip_rt_bh_mask |= RT_BH_REDIRECT;
1354         }
1355         ip_rt_unlock();
1356 }
1357 
1358 
1359 static __inline__ void rt_garbage_collect(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1360 {
1361         if (ip_rt_lock == 1)
1362         {
1363                 rt_garbage_collect_1();
1364                 return;
1365         }
1366         ip_rt_bh_mask |= RT_BH_GARBAGE_COLLECT;
1367 }
1368 
1369 static void rt_cache_add(unsigned hash, struct rtable * rth)
     /* [previous][next][first][last][top][bottom][index][help] */
1370 {
1371         unsigned long   flags;
1372         struct rtable   **rthp;
1373         __u32           daddr = rth->rt_dst;
1374         unsigned long   now = jiffies;
1375 
1376 #if RT_CACHE_DEBUG >= 2
1377         if (ip_rt_lock != 1)
1378         {
1379                 printk("rt_cache_add: ip_rt_lock==%d\n", ip_rt_lock);
1380                 return;
1381         }
1382 #endif
1383 
1384         save_flags(flags);
1385 
1386         if (rth->rt_dev->header_cache_bind)
1387         {
1388                 struct rtable * rtg = rth;
1389 
1390                 if (rth->rt_gateway != daddr)
1391                 {
1392                         ip_rt_fast_unlock();
1393                         rtg = ip_rt_route(rth->rt_gateway, 0);
1394                         ip_rt_fast_lock();
1395                 }
1396 
1397                 if (rtg)
1398                 {
1399                         if (rtg == rth)
1400                                 rtg->rt_dev->header_cache_bind(&rtg->rt_hh, rtg->rt_dev, ETH_P_IP, rtg->rt_dst);
1401                         else
1402                         {
1403                                 if (rtg->rt_hh)
1404                                         ATOMIC_INCR(&rtg->rt_hh->hh_refcnt);
1405                                 rth->rt_hh = rtg->rt_hh;
1406                                 ip_rt_put(rtg);
1407                         }
1408                 }
1409         }
1410 
1411         if (rt_cache_size >= RT_CACHE_SIZE_MAX)
1412                 rt_garbage_collect();
1413 
1414         cli();
1415         rth->rt_next = ip_rt_hash_table[hash];
1416 #if RT_CACHE_DEBUG >= 2
1417         if (rth->rt_next)
1418         {
1419                 struct rtable * trth;
1420                 printk("rt_cache @%02x: %08x", hash, daddr);
1421                 for (trth=rth->rt_next; trth; trth=trth->rt_next)
1422                         printk(" . %08x", trth->rt_dst);
1423                 printk("\n");
1424         }
1425 #endif
1426         ip_rt_hash_table[hash] = rth;
1427         rthp = &rth->rt_next;
1428         sti();
1429         rt_cache_size++;
1430 
1431         /*
1432          * Cleanup duplicate (and aged off) entries.
1433          */
1434 
1435         while ((rth = *rthp) != NULL)
1436         {
1437 
1438                 cli();
1439                 if ((!rth->rt_refcnt && rth->rt_lastuse + RT_CACHE_TIMEOUT < now)
1440                     || rth->rt_dst == daddr)
1441                 {
1442                         *rthp = rth->rt_next;
1443                         rt_cache_size--;
1444                         sti();
1445 #if RT_CACHE_DEBUG >= 2
1446                         printk("rt_cache clean %02x@%08x\n", hash, rth->rt_dst);
1447 #endif
1448                         rt_free(rth);
1449                         continue;
1450                 }
1451                 sti();
1452                 rthp = &rth->rt_next;
1453         }
1454         restore_flags(flags);
1455 }
1456 
1457 /*
1458    RT should be already locked.
1459    
1460    We could improve this by keeping a chain of say 32 struct rtable's
1461    last freed for fast recycling.
1462    
1463  */
1464 
1465 struct rtable * ip_rt_slow_route (__u32 daddr, int local)
     /* [previous][next][first][last][top][bottom][index][help] */
1466 {
1467         unsigned hash = ip_rt_hash_code(daddr)^local;
1468         struct rtable * rth;
1469         struct fib_node * f;
1470         struct fib_info * fi;
1471         __u32 saddr;
1472 
1473 #if RT_CACHE_DEBUG >= 2
1474         printk("rt_cache miss @%08x\n", daddr);
1475 #endif
1476 
1477         rth = kmalloc(sizeof(struct rtable), GFP_ATOMIC);
1478         if (!rth)
1479         {
1480                 ip_rt_unlock();
1481                 return NULL;
1482         }
1483 
1484         if (local)
1485                 f = fib_lookup_local(daddr);
1486         else
1487                 f = fib_lookup (daddr);
1488 
1489         if (f)
1490         {
1491                 fi = f->fib_info;
1492                 f->fib_use++;
1493         }
1494 
1495         if (!f || (fi->fib_flags & RTF_REJECT))
1496         {
1497 #if RT_CACHE_DEBUG >= 2
1498                 printk("rt_route failed @%08x\n", daddr);
1499 #endif
1500                 ip_rt_unlock();
1501                 kfree_s(rth, sizeof(struct rtable));
1502                 return NULL;
1503         }
1504 
1505         saddr = fi->fib_dev->pa_addr;
1506 
1507         if (daddr == fi->fib_dev->pa_addr)
1508         {
1509                 f->fib_use--;
1510                 if ((f = fib_loopback) != NULL)
1511                 {
1512                         f->fib_use++;
1513                         fi = f->fib_info;
1514                 }
1515         }
1516 
1517         if (!f)
1518         {
1519                 ip_rt_unlock();
1520                 kfree_s(rth, sizeof(struct rtable));
1521                 return NULL;
1522         }
1523 
1524         rth->rt_dst     = daddr;
1525         rth->rt_src     = saddr;
1526         rth->rt_lastuse = jiffies;
1527         rth->rt_refcnt  = 1;
1528         rth->rt_use     = 1;
1529         rth->rt_next    = NULL;
1530         rth->rt_hh      = NULL;
1531         rth->rt_gateway = fi->fib_gateway;
1532         rth->rt_dev     = fi->fib_dev;
1533         rth->rt_mtu     = fi->fib_mtu;
1534         rth->rt_window  = fi->fib_window;
1535         rth->rt_irtt    = fi->fib_irtt;
1536         rth->rt_tos     = f->fib_tos;
1537         rth->rt_flags   = fi->fib_flags | RTF_HOST;
1538         if (local)
1539                 rth->rt_flags   |= RTF_LOCAL;
1540 
1541         if (!(rth->rt_flags & RTF_GATEWAY))
1542                 rth->rt_gateway = rth->rt_dst;
1543 
1544         if (ip_rt_lock == 1)
1545                 rt_cache_add(hash, rth);
1546         else
1547         {
1548                 rt_free(rth);
1549 #if RT_CACHE_DEBUG >= 1
1550                 printk("rt_cache: route to %08x was born dead\n", daddr);
1551 #endif
1552         }
1553 
1554         ip_rt_unlock();
1555         return rth;
1556 }
1557 
1558 void ip_rt_put(struct rtable * rt)
     /* [previous][next][first][last][top][bottom][index][help] */
1559 {
1560         if (rt)
1561                 ATOMIC_DECR(&rt->rt_refcnt);
1562 }
1563 
1564 struct rtable * ip_rt_route(__u32 daddr, int local)
     /* [previous][next][first][last][top][bottom][index][help] */
1565 {
1566         struct rtable * rth;
1567 
1568         ip_rt_fast_lock();
1569 
1570         for (rth=ip_rt_hash_table[ip_rt_hash_code(daddr)^local]; rth; rth=rth->rt_next)
1571         {
1572                 if (rth->rt_dst == daddr)
1573                 {
1574                         rth->rt_lastuse = jiffies;
1575                         ATOMIC_INCR(&rth->rt_use);
1576                         ATOMIC_INCR(&rth->rt_refcnt);
1577                         ip_rt_unlock();
1578                         return rth;
1579                 }
1580         }
1581         return ip_rt_slow_route (daddr, local);
1582 }
1583 
1584 
1585 /*
1586  *      Process a route add request from the user, or from a kernel
1587  *      task.
1588  */
1589  
1590 int ip_rt_new(struct rtentry *r)
     /* [previous][next][first][last][top][bottom][index][help] */
1591 {
1592         int err;
1593         char * devname;
1594         struct device * dev = NULL;
1595         unsigned long flags;
1596         __u32 daddr, mask, gw;
1597         short metric;
1598 
1599         /*
1600          *      If a device is specified find it.
1601          */
1602          
1603         if ((devname = r->rt_dev) != NULL) 
1604         {
1605                 err = getname(devname, &devname);
1606                 if (err)
1607                         return err;
1608                 dev = dev_get(devname);
1609                 putname(devname);
1610                 if (!dev)
1611                         return -ENODEV;
1612         }
1613         
1614         /*
1615          *      If the device isn't INET, don't allow it
1616          */
1617 
1618         if (r->rt_dst.sa_family != AF_INET)
1619                 return -EAFNOSUPPORT;
1620 
1621         /*
1622          *      Make local copies of the important bits
1623          *      We decrement the metric by one for BSD compatibility.
1624          */
1625          
1626         flags = r->rt_flags;
1627         daddr = (__u32) ((struct sockaddr_in *) &r->rt_dst)->sin_addr.s_addr;
1628         mask  = (__u32) ((struct sockaddr_in *) &r->rt_genmask)->sin_addr.s_addr;
1629         gw    = (__u32) ((struct sockaddr_in *) &r->rt_gateway)->sin_addr.s_addr;
1630         metric = r->rt_metric > 0 ? r->rt_metric - 1 : 0;
1631 
1632         /*
1633          *      BSD emulation: Permits route add someroute gw one-of-my-addresses
1634          *      to indicate which iface. Not as clean as the nice Linux dev technique
1635          *      but people keep using it...  (and gated likes it ;))
1636          */
1637          
1638         if (!dev && (flags & RTF_GATEWAY)) 
1639         {
1640                 struct device *dev2;
1641                 for (dev2 = dev_base ; dev2 != NULL ; dev2 = dev2->next) 
1642                 {
1643                         if ((dev2->flags & IFF_UP) && dev2->pa_addr == gw) 
1644                         {
1645                                 flags &= ~RTF_GATEWAY;
1646                                 dev = dev2;
1647                                 break;
1648                         }
1649                 }
1650         }
1651 
1652         /*
1653          *      Ignore faulty masks
1654          */
1655          
1656         if (bad_mask(mask, daddr))
1657                 mask=0;
1658 
1659         /*
1660          *      Set the mask to nothing for host routes.
1661          */
1662          
1663         if (flags & RTF_HOST)
1664                 mask = 0xffffffff;
1665         else if (mask && r->rt_genmask.sa_family != AF_INET)
1666                 return -EAFNOSUPPORT;
1667 
1668         /*
1669          *      You can only gateway IP via IP..
1670          */
1671          
1672         if (flags & RTF_GATEWAY) 
1673         {
1674                 if (r->rt_gateway.sa_family != AF_INET)
1675                         return -EAFNOSUPPORT;
1676                 if (!dev)
1677                         dev = get_gw_dev(gw);
1678         } 
1679         else if (!dev)
1680                 dev = ip_dev_check(daddr);
1681 
1682         /*
1683          *      Unknown device.
1684          */
1685          
1686         if (dev == NULL)
1687                 return -ENETUNREACH;
1688 
1689         /*
1690          *      Add the route
1691          */
1692 
1693         rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window, r->rt_irtt, metric);
1694         return 0;
1695 }
1696 
1697 
1698 /*
1699  *      Remove a route, as requested by the user.
1700  */
1701 
1702 static int rt_kill(struct rtentry *r)
     /* [previous][next][first][last][top][bottom][index][help] */
1703 {
1704         struct sockaddr_in *trg;
1705         struct sockaddr_in *msk;
1706         struct sockaddr_in *gtw;
1707         char *devname;
1708         int err;
1709         struct device * dev = NULL;
1710 
1711         trg = (struct sockaddr_in *) &r->rt_dst;
1712         msk = (struct sockaddr_in *) &r->rt_genmask;
1713         gtw = (struct sockaddr_in *) &r->rt_gateway;
1714         if ((devname = r->rt_dev) != NULL) 
1715         {
1716                 err = getname(devname, &devname);
1717                 if (err)
1718                         return err;
1719                 dev = dev_get(devname);
1720                 putname(devname);
1721                 if (!dev)
1722                         return -ENODEV;
1723         }
1724         /*
1725          * metric can become negative here if it wasn't filled in
1726          * but that's a fortunate accident; we really use that in rt_del.
1727          */
1728         err=rt_del((__u32)trg->sin_addr.s_addr, (__u32)msk->sin_addr.s_addr, dev,
1729                 (__u32)gtw->sin_addr.s_addr, r->rt_flags, r->rt_metric - 1);
1730         return err;
1731 }
1732 
1733 /*
1734  *      Handle IP routing ioctl calls. These are used to manipulate the routing tables
1735  */
1736  
1737 int ip_rt_ioctl(unsigned int cmd, void *arg)
     /* [previous][next][first][last][top][bottom][index][help] */
1738 {
1739         int err;
1740         struct rtentry rt;
1741 
1742         switch(cmd) 
1743         {
1744                 case SIOCADDRT:         /* Add a route */
1745                 case SIOCDELRT:         /* Delete a route */
1746                         if (!suser())
1747                                 return -EPERM;
1748                         err=verify_area(VERIFY_READ, arg, sizeof(struct rtentry));
1749                         if (err)
1750                                 return err;
1751                         memcpy_fromfs(&rt, arg, sizeof(struct rtentry));
1752                         return (cmd == SIOCDELRT) ? rt_kill(&rt) : ip_rt_new(&rt);
1753         }
1754 
1755         return -EINVAL;
1756 }
1757 
1758 void ip_rt_advice(struct rtable **rp, int advice)
     /* [previous][next][first][last][top][bottom][index][help] */
1759 {
1760         /* Thanks! */
1761         return;
1762 }
1763 

/* [previous][next][first][last][top][bottom][index][help] */