root/net/ipv4/route.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rt_logmask
  2. rt_mask
  3. fz_hash_code
  4. fib_free_node
  5. fib_lookup_gateway
  6. fib_lookup_local
  7. fib_lookup
  8. get_gw_dev
  9. bad_mask
  10. fib_del_list
  11. fib_del_1
  12. fib_create_info
  13. fib_add_1
  14. rt_flush_list
  15. fib_flush_1
  16. rt_get_info
  17. rt_cache_get_info
  18. rt_free
  19. rt_kick_free_queue
  20. ip_rt_run_bh
  21. ip_rt_check_expire
  22. rt_redirect_1
  23. rt_cache_flush
  24. rt_garbage_collect_1
  25. rt_req_enqueue
  26. rt_req_dequeue
  27. rt_kick_backlog
  28. rt_del
  29. rt_add
  30. ip_rt_flush
  31. ip_rt_redirect
  32. rt_garbage_collect
  33. rt_cache_add
  34. ip_rt_slow_route
  35. ip_rt_put
  36. ip_rt_route
  37. ip_rt_new
  38. ip_rt_kill
  39. ip_rt_ioctl
  40. ip_rt_advice
  41. ip_rt_update

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              ROUTE - implementation of the IP router.
   7  *
   8  * Version:     @(#)route.c     1.0.14  05/31/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  13  *              Linus Torvalds, <Linus.Torvalds@helsinki.fi>
  14  *
  15  * Fixes:
  16  *              Alan Cox        :       Verify area fixes.
  17  *              Alan Cox        :       cli() protects routing changes
  18  *              Rui Oliveira    :       ICMP routing table updates
  19  *              (rco@di.uminho.pt)      Routing table insertion and update
  20  *              Linus Torvalds  :       Rewrote bits to be sensible
  21  *              Alan Cox        :       Added BSD route gw semantics
  22  *              Alan Cox        :       Super /proc >4K 
  23  *              Alan Cox        :       MTU in route table
  24  *              Alan Cox        :       MSS actually. Also added the window
  25  *                                      clamper.
  26  *              Sam Lantinga    :       Fixed route matching in rt_del()
  27  *              Alan Cox        :       Routing cache support.
  28  *              Alan Cox        :       Removed compatibility cruft.
  29  *              Alan Cox        :       RTF_REJECT support.
  30  *              Alan Cox        :       TCP irtt support.
  31  *              Jonathan Naylor :       Added Metric support.
  32  *      Miquel van Smoorenburg  :       BSD API fixes.
  33  *      Miquel van Smoorenburg  :       Metrics.
  34  *              Alan Cox        :       Use __u32 properly
  35  *              Alan Cox        :       Aligned routing errors more closely with BSD
  36  *                                      our system is still very different.
  37  *              Alan Cox        :       Faster /proc handling
  38  *      Alexey Kuznetsov        :       Massive rework to support tree based routing,
  39  *                                      routing caches and better behaviour.
  40  *              
  41  *              Olaf Erb        :       irtt wasn't being copied right.
  42  *              Bjorn Ekwall    :       Kerneld route support.
  43  *              Alan Cox        :       Multicast fixed (I hope)
  44  *              Pavel Krauz     :       Limited broadcast fixed
  45  *
  46  *              This program is free software; you can redistribute it and/or
  47  *              modify it under the terms of the GNU General Public License
  48  *              as published by the Free Software Foundation; either version
  49  *              2 of the License, or (at your option) any later version.
  50  */
  51 
  52 #include <linux/config.h>
  53 #include <asm/segment.h>
  54 #include <asm/system.h>
  55 #include <asm/bitops.h>
  56 #include <linux/types.h>
  57 #include <linux/kernel.h>
  58 #include <linux/sched.h>
  59 #include <linux/mm.h>
  60 #include <linux/string.h>
  61 #include <linux/socket.h>
  62 #include <linux/sockios.h>
  63 #include <linux/errno.h>
  64 #include <linux/in.h>
  65 #include <linux/inet.h>
  66 #include <linux/netdevice.h>
  67 #include <linux/if_arp.h>
  68 #include <net/ip.h>
  69 #include <net/protocol.h>
  70 #include <net/route.h>
  71 #include <net/tcp.h>
  72 #include <linux/skbuff.h>
  73 #include <net/sock.h>
  74 #include <net/icmp.h>
  75 #include <net/netlink.h>
  76 #ifdef CONFIG_KERNELD
  77 #include <linux/kerneld.h>
  78 #endif
  79 
  80 /*
  81  * Forwarding Information Base definitions.
  82  */
  83 
  84 struct fib_node
  85 {
  86         struct fib_node         *fib_next;
  87         __u32                   fib_dst;
  88         unsigned long           fib_use;
  89         struct fib_info         *fib_info;
  90         short                   fib_metric;
  91         unsigned char           fib_tos;
  92 };
  93 
  94 /*
  95  * This structure contains data shared by many of routes.
  96  */     
  97 
  98 struct fib_info
  99 {
 100         struct fib_info         *fib_next;
 101         struct fib_info         *fib_prev;
 102         __u32                   fib_gateway;
 103         struct device           *fib_dev;
 104         int                     fib_refcnt;
 105         unsigned long           fib_window;
 106         unsigned short          fib_flags;
 107         unsigned short          fib_mtu;
 108         unsigned short          fib_irtt;
 109 };
 110 
 111 struct fib_zone
 112 {
 113         struct fib_zone *fz_next;
 114         struct fib_node **fz_hash_table;
 115         struct fib_node *fz_list;
 116         int             fz_nent;
 117         int             fz_logmask;
 118         __u32           fz_mask;
 119 };
 120 
 121 static struct fib_zone  *fib_zones[33];
 122 static struct fib_zone  *fib_zone_list;
 123 static struct fib_node  *fib_loopback = NULL;
 124 static struct fib_info  *fib_info_list;
 125 
 126 /*
 127  * Backlogging.
 128  */
 129 
 130 #define RT_BH_REDIRECT          0
 131 #define RT_BH_GARBAGE_COLLECT   1
 132 #define RT_BH_FREE              2
 133 
 134 struct rt_req
 135 {
 136         struct rt_req * rtr_next;
 137         struct device *dev;
 138         __u32 dst;
 139         __u32 gw;
 140         unsigned char tos;
 141 };
 142 
 143 int                     ip_rt_lock;
 144 unsigned                ip_rt_bh_mask;
 145 static struct rt_req    *rt_backlog;
 146 
 147 /*
 148  * Route cache.
 149  */
 150 
 151 struct rtable           *ip_rt_hash_table[RT_HASH_DIVISOR];
 152 static int              rt_cache_size;
 153 static struct rtable    *rt_free_queue;
 154 struct wait_queue       *rt_wait;
 155 
 156 static void rt_kick_backlog(void);
 157 static void rt_cache_add(unsigned hash, struct rtable * rth);
 158 static void rt_cache_flush(void);
 159 static void rt_garbage_collect_1(void);
 160 
 161 /* 
 162  * Evaluate mask length.
 163  */
 164 
 165 static __inline__ int rt_logmask(__u32 mask)
     /* [previous][next][first][last][top][bottom][index][help] */
 166 {
 167         if (!(mask = ntohl(mask)))
 168                 return 32;
 169         return ffz(~mask);
 170 }
 171 
 172 /* 
 173  * Create mask from length.
 174  */
 175 
 176 static __inline__ __u32 rt_mask(int logmask)
     /* [previous][next][first][last][top][bottom][index][help] */
 177 {
 178         if (logmask >= 32)
 179                 return 0;
 180         return htonl(~((1<<logmask)-1));
 181 }
 182 
 183 static __inline__ unsigned fz_hash_code(__u32 dst, int logmask)
     /* [previous][next][first][last][top][bottom][index][help] */
 184 {
 185         return ip_rt_hash_code(ntohl(dst)>>logmask);
 186 }
 187 
 188 /*
 189  * Free FIB node.
 190  */
 191 
 192 static void fib_free_node(struct fib_node * f)
     /* [previous][next][first][last][top][bottom][index][help] */
 193 {
 194         struct fib_info * fi = f->fib_info;
 195         if (!--fi->fib_refcnt)
 196         {
 197 #if RT_CACHE_DEBUG >= 2
 198                 printk("fib_free_node: fi %08x/%s is free\n", fi->fib_gateway, fi->fib_dev->name);
 199 #endif
 200                 if (fi->fib_next)
 201                         fi->fib_next->fib_prev = fi->fib_prev;
 202                 if (fi->fib_prev)
 203                         fi->fib_prev->fib_next = fi->fib_next;
 204                 if (fi == fib_info_list)
 205                         fib_info_list = fi->fib_next;
 206         }
 207         kfree_s(f, sizeof(struct fib_node));
 208 }
 209 
 210 /*
 211  * Find gateway route by address.
 212  */
 213 
 214 static struct fib_node * fib_lookup_gateway(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 215 {
 216         struct fib_zone * fz;
 217         struct fib_node * f;
 218 
 219         for (fz = fib_zone_list; fz; fz = fz->fz_next) 
 220         {
 221                 if (fz->fz_hash_table)
 222                         f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 223                 else
 224                         f = fz->fz_list;
 225                 
 226                 for ( ; f; f = f->fib_next)
 227                 {
 228                         if ((dst ^ f->fib_dst) & fz->fz_mask)
 229                                 continue;
 230                         if (f->fib_info->fib_flags & RTF_GATEWAY)
 231                                 return NULL;
 232                         return f;
 233                 }
 234         }
 235         return NULL;
 236 }
 237 
 238 /*
 239  * Find local route by address.
 240  * FIXME: I use "longest match" principle. If destination
 241  *        has some non-local route, I'll not search shorter matches.
 242  *        It's possible, I'm wrong, but I wanted to prevent following
 243  *        situation:
 244  *      route add 193.233.7.128 netmask 255.255.255.192 gw xxxxxx
 245  *      route add 193.233.7.0   netmask 255.255.255.0 eth1
 246  *        (Two ethernets connected by serial line, one is small and other is large)
 247  *        Host 193.233.7.129 is locally unreachable,
 248  *        but old (<=1.3.37) code will send packets destined for it to eth1.
 249  *
 250  */
 251 
 252 static struct fib_node * fib_lookup_local(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 253 {
 254         struct fib_zone * fz;
 255         struct fib_node * f;
 256 
 257         for (fz = fib_zone_list; fz; fz = fz->fz_next) 
 258         {
 259                 int longest_match_found = 0;
 260 
 261                 if (fz->fz_hash_table)
 262                         f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 263                 else
 264                         f = fz->fz_list;
 265                 
 266                 for ( ; f; f = f->fib_next)
 267                 {
 268                         if ((dst ^ f->fib_dst) & fz->fz_mask)
 269                                 continue;
 270                         if (!(f->fib_info->fib_flags & RTF_GATEWAY))
 271                                 return f;
 272                         longest_match_found = 1;
 273                 }
 274                 if (longest_match_found)
 275                         return NULL;
 276         }
 277         return NULL;
 278 }
 279 
 280 /*
 281  * Main lookup routine.
 282  *      IMPORTANT NOTE: this algorithm has small difference from <=1.3.37 visible
 283  *      by user. It doesn't route non-CIDR broadcasts by default.
 284  *
 285  *      F.e.
 286  *              ifconfig eth0 193.233.7.65 netmask 255.255.255.192 broadcast 193.233.7.255
 287  *      is valid, but if you really are not able (not allowed, do not want) to
 288  *      use CIDR compliant broadcast 193.233.7.127, you should add host route:
 289  *              route add -host 193.233.7.255 eth0
 290  */
 291 
 292 static struct fib_node * fib_lookup(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 293 {
 294         struct fib_zone * fz;
 295         struct fib_node * f;
 296 
 297         for (fz = fib_zone_list; fz; fz = fz->fz_next) 
 298         {
 299                 if (fz->fz_hash_table)
 300                         f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 301                 else
 302                         f = fz->fz_list;
 303                 
 304                 for ( ; f; f = f->fib_next)
 305                 {
 306                         if ((dst ^ f->fib_dst) & fz->fz_mask)
 307                                 continue;
 308                         return f;
 309                 }
 310         }
 311         return NULL;
 312 }
 313 
 314 static __inline__ struct device * get_gw_dev(__u32 gw)
     /* [previous][next][first][last][top][bottom][index][help] */
 315 {
 316         struct fib_node * f;
 317         f = fib_lookup_gateway(gw);
 318         if (f)
 319                 return f->fib_info->fib_dev;
 320         return NULL;
 321 }
 322 
 323 /*
 324  *      Check if a mask is acceptable.
 325  */
 326  
 327 static inline int bad_mask(__u32 mask, __u32 addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 328 {
 329         if (addr & (mask = ~mask))
 330                 return 1;
 331         mask = ntohl(mask);
 332         if (mask & (mask+1))
 333                 return 1;
 334         return 0;
 335 }
 336 
 337 
 338 static int fib_del_list(struct fib_node **fp, __u32 dst,
     /* [previous][next][first][last][top][bottom][index][help] */
 339                 struct device * dev, __u32 gtw, short flags, short metric, __u32 mask)
 340 {
 341         struct fib_node *f;
 342         int found=0;
 343 
 344         while((f = *fp) != NULL) 
 345         {
 346                 struct fib_info * fi = f->fib_info;
 347 
 348                 /*
 349                  *      Make sure the destination and netmask match.
 350                  *      metric, gateway and device are also checked
 351                  *      if they were specified.
 352                  */
 353                 if (f->fib_dst != dst ||
 354                     (gtw && fi->fib_gateway != gtw) ||
 355                     (metric >= 0 && f->fib_metric != metric) ||
 356                     (dev && fi->fib_dev != dev) )
 357                 {
 358                         fp = &f->fib_next;
 359                         continue;
 360                 }
 361                 cli();
 362                 *fp = f->fib_next;
 363                 if (fib_loopback == f)
 364                         fib_loopback = NULL;
 365                 sti();
 366                 ip_netlink_msg(RTMSG_DELROUTE, dst, gtw, mask, flags, metric, fi->fib_dev->name);
 367                 fib_free_node(f);
 368                 found++;
 369         }
 370         return found;
 371 }
 372 
 373 static __inline__ int fib_del_1(__u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
 374                 struct device * dev, __u32 gtw, short flags, short metric)
 375 {
 376         struct fib_node **fp;
 377         struct fib_zone *fz;
 378         int found=0;
 379 
 380         if (!mask)
 381         {
 382                 for (fz=fib_zone_list; fz; fz = fz->fz_next)
 383                 {
 384                         int tmp;
 385                         if (fz->fz_hash_table)
 386                                 fp = &fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 387                         else
 388                                 fp = &fz->fz_list;
 389 
 390                         tmp = fib_del_list(fp, dst, dev, gtw, flags, metric, mask);
 391                         fz->fz_nent -= tmp;
 392                         found += tmp;
 393                 }
 394         } 
 395         else
 396         {
 397                 if ((fz = fib_zones[rt_logmask(mask)]) != NULL)
 398                 {
 399                         if (fz->fz_hash_table)
 400                                 fp = &fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 401                         else
 402                                 fp = &fz->fz_list;
 403         
 404                         found = fib_del_list(fp, dst, dev, gtw, flags, metric, mask);
 405                         fz->fz_nent -= found;
 406                 }
 407         }
 408 
 409         if (found)
 410         {
 411                 rt_cache_flush();
 412                 return 0;
 413         }
 414         return -ESRCH;
 415 }
 416 
 417 
 418 static struct fib_info * fib_create_info(__u32 gw, struct device * dev,
     /* [previous][next][first][last][top][bottom][index][help] */
 419                                          unsigned short flags, unsigned short mss,
 420                                          unsigned long window, unsigned short irtt)
 421 {
 422         struct fib_info * fi;
 423 
 424         if (!(flags & RTF_MSS))
 425         {
 426                 mss = dev->mtu;
 427 #ifdef CONFIG_NO_PATH_MTU_DISCOVERY
 428                 /*
 429                  *      If MTU was not specified, use default.
 430                  *      If you want to increase MTU for some net (local subnet)
 431                  *      use "route add .... mss xxx".
 432                  *
 433                  *      The MTU isn't currently always used and computed as it
 434                  *      should be as far as I can tell. [Still verifying this is right]
 435                  */
 436                 if ((flags & RTF_GATEWAY) && mss > 576)
 437                         mss = 576;
 438 #endif
 439         }
 440         if (!(flags & RTF_WINDOW))
 441                 window = 0;
 442         if (!(flags & RTF_IRTT))
 443                 irtt = 0;
 444 
 445         for (fi=fib_info_list; fi; fi = fi->fib_next)
 446         {
 447                 if (fi->fib_gateway != gw ||
 448                     fi->fib_dev != dev  ||
 449                     fi->fib_flags != flags ||
 450                     fi->fib_mtu != mss ||
 451                     fi->fib_window != window ||
 452                     fi->fib_irtt != irtt)
 453                         continue;
 454                 fi->fib_refcnt++;
 455 #if RT_CACHE_DEBUG >= 2
 456                 printk("fib_create_info: fi %08x/%s is duplicate\n", fi->fib_gateway, fi->fib_dev->name);
 457 #endif
 458                 return fi;
 459         }
 460         fi = (struct fib_info*)kmalloc(sizeof(struct fib_info), GFP_KERNEL);
 461         if (!fi)
 462                 return NULL;
 463         memset(fi, 0, sizeof(struct fib_info));
 464         fi->fib_flags = flags;
 465         fi->fib_dev = dev;
 466         fi->fib_gateway = gw;
 467         fi->fib_mtu = mss;
 468         fi->fib_window = window;
 469         fi->fib_refcnt++;
 470         fi->fib_next = fib_info_list;
 471         fi->fib_prev = NULL;
 472         fi->fib_irtt = irtt;
 473         if (fib_info_list)
 474                 fib_info_list->fib_prev = fi;
 475         fib_info_list = fi;
 476 #if RT_CACHE_DEBUG >= 2
 477         printk("fib_create_info: fi %08x/%s is created\n", fi->fib_gateway, fi->fib_dev->name);
 478 #endif
 479         return fi;
 480 }
 481 
 482 
 483 static __inline__ void fib_add_1(short flags, __u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
 484         __u32 gw, struct device *dev, unsigned short mss,
 485         unsigned long window, unsigned short irtt, short metric)
 486 {
 487         struct fib_node *f, *f1;
 488         struct fib_node **fp;
 489         struct fib_node **dup_fp = NULL;
 490         struct fib_zone * fz;
 491         struct fib_info * fi;
 492         int logmask;
 493 
 494         /*
 495          *      Allocate an entry and fill it in.
 496          */
 497          
 498         f = (struct fib_node *) kmalloc(sizeof(struct fib_node), GFP_KERNEL);
 499         if (f == NULL)
 500                 return;
 501 
 502         memset(f, 0, sizeof(struct fib_node));
 503         f->fib_dst = dst;
 504         f->fib_metric = metric;
 505         f->fib_tos    = 0;
 506 
 507         if  ((fi = fib_create_info(gw, dev, flags, mss, window, irtt)) == NULL)
 508         {
 509                 kfree_s(f, sizeof(struct fib_node));
 510                 return;
 511         }
 512         f->fib_info = fi;
 513 
 514         logmask = rt_logmask(mask);
 515         fz = fib_zones[logmask];
 516 
 517 
 518         if (!fz)
 519         {
 520                 int i;
 521                 fz = kmalloc(sizeof(struct fib_zone), GFP_KERNEL);
 522                 if (!fz)
 523                 {
 524                         fib_free_node(f);
 525                         return;
 526                 }
 527                 memset(fz, 0, sizeof(struct fib_zone));
 528                 fz->fz_logmask = logmask;
 529                 fz->fz_mask = mask;
 530                 for (i=logmask-1; i>=0; i--)
 531                         if (fib_zones[i])
 532                                 break;
 533                 cli();
 534                 if (i<0)
 535                 {
 536                         fz->fz_next = fib_zone_list;
 537                         fib_zone_list = fz;
 538                 }
 539                 else
 540                 {
 541                         fz->fz_next = fib_zones[i]->fz_next;
 542                         fib_zones[i]->fz_next = fz;
 543                 }
 544                 fib_zones[logmask] = fz;
 545                 sti();
 546         }
 547 
 548         /*
 549          * If zone overgrows RTZ_HASHING_LIMIT, create hash table.
 550          */
 551 
 552         if (fz->fz_nent >= RTZ_HASHING_LIMIT && !fz->fz_hash_table && logmask<32)
 553         {
 554                 struct fib_node ** ht;
 555 #if RT_CACHE_DEBUG >= 2
 556                 printk("fib_add_1: hashing for zone %d started\n", logmask);
 557 #endif
 558                 ht = kmalloc(RTZ_HASH_DIVISOR*sizeof(struct rtable*), GFP_KERNEL);
 559 
 560                 if (ht)
 561                 {
 562                         memset(ht, 0, RTZ_HASH_DIVISOR*sizeof(struct fib_node*));
 563                         cli();
 564                         f1 = fz->fz_list;
 565                         while (f1)
 566                         {
 567                                 struct fib_node * next;
 568                                 unsigned hash = fz_hash_code(f1->fib_dst, logmask);
 569                                 next = f1->fib_next;
 570                                 f1->fib_next = ht[hash];
 571                                 ht[hash] = f1;
 572                                 f1 = next;
 573                         }
 574                         fz->fz_list = NULL;
 575                         fz->fz_hash_table = ht; 
 576                         sti();
 577                 }
 578         }
 579 
 580         if (fz->fz_hash_table)
 581                 fp = &fz->fz_hash_table[fz_hash_code(dst, logmask)];
 582         else
 583                 fp = &fz->fz_list;
 584 
 585         /*
 586          * Scan list to find the first route with the same destination
 587          */
 588         while ((f1 = *fp) != NULL)
 589         {
 590                 if (f1->fib_dst == dst)
 591                         break;
 592                 fp = &f1->fib_next;
 593         }
 594 
 595         /*
 596          * Find route with the same destination and less (or equal) metric.
 597          */
 598         while ((f1 = *fp) != NULL && f1->fib_dst == dst)
 599         {
 600                 if (f1->fib_metric >= metric)
 601                         break;
 602                 /*
 603                  *      Record route with the same destination and gateway,
 604                  *      but less metric. We'll delete it 
 605                  *      after instantiation of new route.
 606                  */
 607                 if (f1->fib_info->fib_gateway == gw)
 608                         dup_fp = fp;
 609                 fp = &f1->fib_next;
 610         }
 611 
 612         /*
 613          * Is it already present?
 614          */
 615 
 616         if (f1 && f1->fib_metric == metric && f1->fib_info == fi)
 617         {
 618                 fib_free_node(f);
 619                 return;
 620         }
 621         
 622         /*
 623          * Insert new entry to the list.
 624          */
 625 
 626         cli();
 627         f->fib_next = f1;
 628         *fp = f;
 629         if (!fib_loopback && (fi->fib_dev->flags & IFF_LOOPBACK))
 630                 fib_loopback = f;
 631         sti();
 632         fz->fz_nent++;
 633         ip_netlink_msg(RTMSG_NEWROUTE, dst, gw, mask, flags, metric, fi->fib_dev->name);
 634 
 635         /*
 636          *      Delete route with the same destination and gateway.
 637          *      Note that we should have at most one such route.
 638          */
 639         if (dup_fp)
 640                 fp = dup_fp;
 641         else
 642                 fp = &f->fib_next;
 643 
 644         while ((f1 = *fp) != NULL && f1->fib_dst == dst)
 645         {
 646                 if (f1->fib_info->fib_gateway == gw)
 647                 {
 648                         cli();
 649                         *fp = f1->fib_next;
 650                         if (fib_loopback == f1)
 651                                 fib_loopback = NULL;
 652                         sti();
 653                         ip_netlink_msg(RTMSG_DELROUTE, dst, gw, mask, flags, metric, f1->fib_info->fib_dev->name);
 654                         fib_free_node(f1);
 655                         fz->fz_nent--;
 656                         break;
 657                 }
 658                 fp = &f1->fib_next;
 659         }
 660         rt_cache_flush();
 661         return;
 662 }
 663 
 664 static int rt_flush_list(struct fib_node ** fp, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 665 {
 666         int found = 0;
 667         struct fib_node *f;
 668 
 669         while ((f = *fp) != NULL) {
 670 /*
 671  *      "Magic" device route is allowed to point to loopback,
 672  *      discard it too.
 673  */
 674                 if (f->fib_info->fib_dev != dev &&
 675                     (dev != &loopback_dev || f->fib_dst != dev->pa_addr)) {
 676                         fp = &f->fib_next;
 677                         continue;
 678                 }
 679                 cli();
 680                 *fp = f->fib_next;
 681                 if (fib_loopback == f)
 682                         fib_loopback = NULL;
 683                 sti();
 684                 fib_free_node(f);
 685                 found++;
 686         }
 687         return found;
 688 }
 689 
 690 static __inline__ void fib_flush_1(struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 691 {
 692         struct fib_zone *fz;
 693         int found = 0;
 694 
 695         for (fz = fib_zone_list; fz; fz = fz->fz_next)
 696         {
 697                 if (fz->fz_hash_table)
 698                 {
 699                         int i;
 700                         int tmp = 0;
 701                         for (i=0; i<RTZ_HASH_DIVISOR; i++)
 702                                 tmp += rt_flush_list(&fz->fz_hash_table[i], dev);
 703                         fz->fz_nent -= tmp;
 704                         found += tmp;
 705                 }
 706                 else
 707                 {
 708                         int tmp;
 709                         tmp = rt_flush_list(&fz->fz_list, dev);
 710                         fz->fz_nent -= tmp;
 711                         found += tmp;
 712                 }
 713         }
 714                 
 715         if (found)
 716                 rt_cache_flush();
 717 }
 718 
 719 
 720 /* 
 721  *      Called from the PROCfs module. This outputs /proc/net/route.
 722  *
 723  *      We preserve the old format but pad the buffers out. This means that
 724  *      we can spin over the other entries as we read them. Remember the
 725  *      gated BGP4 code could need to read 60,000+ routes on occasion (that's
 726  *      about 7Mb of data). To do that ok we will need to also cache the
 727  *      last route we got to (reads will generally be following on from
 728  *      one another without gaps).
 729  */
 730  
 731 int rt_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
     /* [previous][next][first][last][top][bottom][index][help] */
 732 {
 733         struct fib_zone *fz;
 734         struct fib_node *f;
 735         int len=0;
 736         off_t pos=0;
 737         char temp[129];
 738         int i;
 739         
 740         pos = 128;
 741 
 742         if (offset<128)
 743         {
 744                 sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT");
 745                 len = 128;
 746         }
 747         
 748         while  (ip_rt_lock)
 749                 sleep_on(&rt_wait);
 750         ip_rt_fast_lock();
 751 
 752         for (fz=fib_zone_list; fz; fz = fz->fz_next)
 753         {
 754                 int maxslot;
 755                 struct fib_node ** fp;
 756 
 757                 if (fz->fz_nent == 0)
 758                         continue;
 759 
 760                 if (pos + 128*fz->fz_nent <= offset)
 761                 {
 762                         pos += 128*fz->fz_nent;
 763                         len = 0;
 764                         continue;
 765                 }
 766 
 767                 if (fz->fz_hash_table)
 768                 {
 769                         maxslot = RTZ_HASH_DIVISOR;
 770                         fp      = fz->fz_hash_table;
 771                 }
 772                 else
 773                 {
 774                         maxslot = 1;
 775                         fp      = &fz->fz_list;
 776                 }
 777                         
 778                 for (i=0; i < maxslot; i++, fp++)
 779                 {
 780                         
 781                         for (f = *fp; f; f = f->fib_next) 
 782                         {
 783                                 struct fib_info * fi;
 784                                 /*
 785                                  *      Spin through entries until we are ready
 786                                  */
 787                                 pos += 128;
 788 
 789                                 if (pos <= offset)
 790                                 {
 791                                         len=0;
 792                                         continue;
 793                                 }
 794                                         
 795                                 fi = f->fib_info;
 796                                 sprintf(temp, "%s\t%08lX\t%08lX\t%02X\t%d\t%lu\t%d\t%08lX\t%d\t%lu\t%u",
 797                                         fi->fib_dev->name, (unsigned long)f->fib_dst, (unsigned long)fi->fib_gateway,
 798                                         fi->fib_flags, 0, f->fib_use, f->fib_metric,
 799                                         (unsigned long)fz->fz_mask, (int)fi->fib_mtu, fi->fib_window, (int)fi->fib_irtt);
 800                                 sprintf(buffer+len,"%-127s\n",temp);
 801 
 802                                 len += 128;
 803                                 if (pos >= offset+length)
 804                                         goto done;
 805                         }
 806                 }
 807         }
 808 
 809 done:
 810         ip_rt_unlock();
 811         wake_up(&rt_wait);
 812         
 813         *start = buffer+len-(pos-offset);
 814         len = pos - offset;
 815         if (len>length)
 816                 len = length;
 817         return len;
 818 }
 819 
 820 int rt_cache_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
     /* [previous][next][first][last][top][bottom][index][help] */
 821 {
 822         int len=0;
 823         off_t pos=0;
 824         char temp[129];
 825         struct rtable *r;
 826         int i;
 827 
 828         pos = 128;
 829 
 830         if (offset<128)
 831         {
 832                 sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tSource\t\tMTU\tWindow\tIRTT\tHH\tARP");
 833                 len = 128;
 834         }
 835         
 836         
 837         while  (ip_rt_lock)
 838                 sleep_on(&rt_wait);
 839         ip_rt_fast_lock();
 840 
 841         for (i = 0; i<RT_HASH_DIVISOR; i++)
 842         {
 843                 for (r = ip_rt_hash_table[i]; r; r = r->rt_next) 
 844                 {
 845                         /*
 846                          *      Spin through entries until we are ready
 847                          */
 848                         pos += 128;
 849 
 850                         if (pos <= offset)
 851                         {
 852                                 len = 0;
 853                                 continue;
 854                         }
 855                                         
 856                         sprintf(temp, "%s\t%08lX\t%08lX\t%02X\t%d\t%u\t%d\t%08lX\t%d\t%lu\t%u\t%d\t%1d",
 857                                 r->rt_dev->name, (unsigned long)r->rt_dst, (unsigned long)r->rt_gateway,
 858                                 r->rt_flags, r->rt_refcnt, r->rt_use, 0,
 859                                 (unsigned long)r->rt_src, (int)r->rt_mtu, r->rt_window, (int)r->rt_irtt, r->rt_hh ? r->rt_hh->hh_refcnt : -1, r->rt_hh ? r->rt_hh->hh_uptodate : 0);
 860                         sprintf(buffer+len,"%-127s\n",temp);
 861                         len += 128;
 862                         if (pos >= offset+length)
 863                                 goto done;
 864                 }
 865         }
 866 
 867 done:
 868         ip_rt_unlock();
 869         wake_up(&rt_wait);
 870         
 871         *start = buffer+len-(pos-offset);
 872         len = pos-offset;
 873         if (len>length)
 874                 len = length;
 875         return len;
 876 }
 877 
 878 
 879 static void rt_free(struct rtable * rt)
     /* [previous][next][first][last][top][bottom][index][help] */
 880 {
 881         unsigned long flags;
 882 
 883         save_flags(flags);
 884         cli();
 885         if (!rt->rt_refcnt)
 886         {
 887                 struct hh_cache * hh = rt->rt_hh;
 888                 rt->rt_hh = NULL;
 889                 restore_flags(flags);
 890                 if (hh && atomic_dec_and_test(&hh->hh_refcnt))
 891                         kfree_s(hh, sizeof(struct hh_cache));
 892                 kfree_s(rt, sizeof(struct rt_table));
 893                 return;
 894         }
 895         rt->rt_next = rt_free_queue;
 896         rt->rt_flags &= ~RTF_UP;
 897         rt_free_queue = rt;
 898         ip_rt_bh_mask |= RT_BH_FREE;
 899 #if RT_CACHE_DEBUG >= 2
 900         printk("rt_free: %08x\n", rt->rt_dst);
 901 #endif
 902         restore_flags(flags);
 903 }
 904 
 905 /*
 906  * RT "bottom half" handlers. Called with masked interrupts.
 907  */
 908 
 909 static __inline__ void rt_kick_free_queue(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 910 {
 911         struct rtable *rt, **rtp;
 912 
 913         rtp = &rt_free_queue;
 914 
 915         while ((rt = *rtp) != NULL)
 916         {
 917                 if  (!rt->rt_refcnt)
 918                 {
 919                         struct hh_cache * hh = rt->rt_hh;
 920 #if RT_CACHE_DEBUG >= 2
 921                         __u32 daddr = rt->rt_dst;
 922 #endif
 923                         *rtp = rt->rt_next;
 924                         rt->rt_hh = NULL;
 925                         sti();
 926                         if (hh && atomic_dec_and_test(&hh->hh_refcnt))
 927                                 kfree_s(hh, sizeof(struct hh_cache));
 928                         kfree_s(rt, sizeof(struct rt_table));
 929 #if RT_CACHE_DEBUG >= 2
 930                         printk("rt_kick_free_queue: %08x is free\n", daddr);
 931 #endif
 932                         cli();
 933                         continue;
 934                 }
 935                 rtp = &rt->rt_next;
 936         }
 937 }
 938 
 939 void ip_rt_run_bh()
     /* [previous][next][first][last][top][bottom][index][help] */
 940 {
 941         unsigned long flags;
 942         save_flags(flags);
 943         cli();
 944         if (ip_rt_bh_mask && !ip_rt_lock)
 945         {
 946                 if (ip_rt_bh_mask & RT_BH_REDIRECT)
 947                         rt_kick_backlog();
 948 
 949                 if (ip_rt_bh_mask & RT_BH_GARBAGE_COLLECT)
 950                 {
 951                         ip_rt_fast_lock();
 952                         ip_rt_bh_mask &= ~RT_BH_GARBAGE_COLLECT;
 953                         sti();
 954                         rt_garbage_collect_1();
 955                         cli();
 956                         ip_rt_fast_unlock();
 957                 }
 958 
 959                 if (ip_rt_bh_mask & RT_BH_FREE)
 960                         rt_kick_free_queue();
 961         }
 962         restore_flags(flags);
 963 }
 964 
 965 
 966 void ip_rt_check_expire()
     /* [previous][next][first][last][top][bottom][index][help] */
 967 {
 968         ip_rt_fast_lock();
 969         if (ip_rt_lock == 1)
 970         {
 971                 int i;
 972                 struct rtable *rth, **rthp;
 973                 unsigned long flags;
 974                 unsigned long now = jiffies;
 975 
 976                 save_flags(flags);
 977                 for (i=0; i<RT_HASH_DIVISOR; i++)
 978                 {
 979                         rthp = &ip_rt_hash_table[i];
 980 
 981                         while ((rth = *rthp) != NULL)
 982                         {
 983                                 struct rtable * rth_next = rth->rt_next;
 984 
 985                                 /*
 986                                  * Cleanup aged off entries.
 987                                  */
 988 
 989                                 cli();
 990                                 if (!rth->rt_refcnt && rth->rt_lastuse + RT_CACHE_TIMEOUT < now)
 991                                 {
 992                                         *rthp = rth_next;
 993                                         sti();
 994                                         rt_cache_size--;
 995 #if RT_CACHE_DEBUG >= 2
 996                                         printk("rt_check_expire clean %02x@%08x\n", i, rth->rt_dst);
 997 #endif
 998                                         rt_free(rth);
 999                                         continue;
1000                                 }
1001                                 sti();
1002 
1003                                 if (!rth_next)
1004                                         break;
1005 
1006                                 /*
1007                                  * LRU ordering.
1008                                  */
1009 
1010                                 if (rth->rt_lastuse + RT_CACHE_BUBBLE_THRESHOLD < rth_next->rt_lastuse ||
1011                                     (rth->rt_lastuse < rth_next->rt_lastuse &&
1012                                      rth->rt_use < rth_next->rt_use))
1013                                 {
1014 #if RT_CACHE_DEBUG >= 2
1015                                         printk("rt_check_expire bubbled %02x@%08x<->%08x\n", i, rth->rt_dst, rth_next->rt_dst);
1016 #endif
1017                                         cli();
1018                                         *rthp = rth_next;
1019                                         rth->rt_next = rth_next->rt_next;
1020                                         rth_next->rt_next = rth;
1021                                         sti();
1022                                         rthp = &rth_next->rt_next;
1023                                         continue;
1024                                 }
1025                                 rthp = &rth->rt_next;
1026                         }
1027                 }
1028                 restore_flags(flags);
1029                 rt_kick_free_queue();
1030         }
1031         ip_rt_unlock();
1032 }
1033 
1034 static void rt_redirect_1(__u32 dst, __u32 gw, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
1035 {
1036         struct rtable *rt;
1037         unsigned long hash = ip_rt_hash_code(dst);
1038 
1039         if (gw == dev->pa_addr)
1040                 return;
1041         if (dev != get_gw_dev(gw))
1042                 return;
1043         rt = (struct rtable *) kmalloc(sizeof(struct rtable), GFP_ATOMIC);
1044         if (rt == NULL) 
1045                 return;
1046         memset(rt, 0, sizeof(struct rtable));
1047         rt->rt_flags = RTF_DYNAMIC | RTF_MODIFIED | RTF_HOST | RTF_GATEWAY | RTF_UP;
1048         rt->rt_dst = dst;
1049         rt->rt_dev = dev;
1050         rt->rt_gateway = gw;
1051         rt->rt_src = dev->pa_addr;
1052         rt->rt_mtu = dev->mtu;
1053 #ifdef CONFIG_NO_PATH_MTU_DISCOVERY
1054         if (dev->mtu > 576)
1055                 rt->rt_mtu = 576;
1056 #endif
1057         rt->rt_lastuse  = jiffies;
1058         rt->rt_refcnt  = 1;
1059         rt_cache_add(hash, rt);
1060         ip_rt_put(rt);
1061         return;
1062 }
1063 
1064 static void rt_cache_flush(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1065 {
1066         int i;
1067         struct rtable * rth, * next;
1068 
1069         for (i=0; i<RT_HASH_DIVISOR; i++)
1070         {
1071                 int nr=0;
1072 
1073                 cli();
1074                 if (!(rth = ip_rt_hash_table[i]))
1075                 {
1076                         sti();
1077                         continue;
1078                 }
1079 
1080                 ip_rt_hash_table[i] = NULL;
1081                 sti();
1082 
1083                 for (; rth; rth=next)
1084                 {
1085                         next = rth->rt_next;
1086                         rt_cache_size--;
1087                         nr++;
1088                         rth->rt_next = NULL;
1089                         rt_free(rth);
1090                 }
1091 #if RT_CACHE_DEBUG >= 2
1092                 if (nr > 0)
1093                         printk("rt_cache_flush: %d@%02x\n", nr, i);
1094 #endif
1095         }
1096 #if RT_CACHE_DEBUG >= 1
1097         if (rt_cache_size)
1098         {
1099                 printk("rt_cache_flush: bug rt_cache_size=%d\n", rt_cache_size);
1100                 rt_cache_size = 0;
1101         }
1102 #endif
1103 }
1104 
1105 static void rt_garbage_collect_1(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1106 {
1107         int i;
1108         unsigned expire = RT_CACHE_TIMEOUT>>1;
1109         struct rtable * rth, **rthp;
1110         unsigned long now = jiffies;
1111 
1112         for (;;)
1113         {
1114                 for (i=0; i<RT_HASH_DIVISOR; i++)
1115                 {
1116                         if (!ip_rt_hash_table[i])
1117                                 continue;
1118                         for (rthp=&ip_rt_hash_table[i]; (rth=*rthp); rthp=&rth->rt_next)
1119                         {
1120                                 if (rth->rt_lastuse + expire*(rth->rt_refcnt+1) > now)
1121                                         continue;
1122                                 rt_cache_size--;
1123                                 cli();
1124                                 *rthp=rth->rt_next;
1125                                 rth->rt_next = NULL;
1126                                 sti();
1127                                 rt_free(rth);
1128                                 break;
1129                         }
1130                 }
1131                 if (rt_cache_size < RT_CACHE_SIZE_MAX)
1132                         return;
1133                 expire >>= 1;
1134         }
1135 }
1136 
1137 static __inline__ void rt_req_enqueue(struct rt_req **q, struct rt_req *rtr)
     /* [previous][next][first][last][top][bottom][index][help] */
1138 {
1139         unsigned long flags;
1140         struct rt_req * tail;
1141 
1142         save_flags(flags);
1143         cli();
1144         tail = *q;
1145         if (!tail)
1146                 rtr->rtr_next = rtr;
1147         else
1148         {
1149                 rtr->rtr_next = tail->rtr_next;
1150                 tail->rtr_next = rtr;
1151         }
1152         *q = rtr;
1153         restore_flags(flags);
1154         return;
1155 }
1156 
1157 /*
1158  * Caller should mask interrupts.
1159  */
1160 
1161 static __inline__ struct rt_req * rt_req_dequeue(struct rt_req **q)
     /* [previous][next][first][last][top][bottom][index][help] */
1162 {
1163         struct rt_req * rtr;
1164 
1165         if (*q)
1166         {
1167                 rtr = (*q)->rtr_next;
1168                 (*q)->rtr_next = rtr->rtr_next;
1169                 if (rtr->rtr_next == rtr)
1170                         *q = NULL;
1171                 rtr->rtr_next = NULL;
1172                 return rtr;
1173         }
1174         return NULL;
1175 }
1176 
1177 /*
1178    Called with masked interrupts
1179  */
1180 
1181 static void rt_kick_backlog()
     /* [previous][next][first][last][top][bottom][index][help] */
1182 {
1183         if (!ip_rt_lock)
1184         {
1185                 struct rt_req * rtr;
1186 
1187                 ip_rt_fast_lock();
1188 
1189                 while ((rtr = rt_req_dequeue(&rt_backlog)) != NULL)
1190                 {
1191                         sti();
1192                         rt_redirect_1(rtr->dst, rtr->gw, rtr->dev);
1193                         kfree_s(rtr, sizeof(struct rt_req));
1194                         cli();
1195                 }
1196 
1197                 ip_rt_bh_mask &= ~RT_BH_REDIRECT;
1198 
1199                 ip_rt_fast_unlock();
1200         }
1201 }
1202 
1203 /*
1204  * rt_{del|add|flush} called only from USER process. Waiting is OK.
1205  */
1206 
1207 static int rt_del(__u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
1208                 struct device * dev, __u32 gtw, short rt_flags, short metric)
1209 {
1210         int retval;
1211 
1212         while (ip_rt_lock)
1213                 sleep_on(&rt_wait);
1214         ip_rt_fast_lock();
1215         retval = fib_del_1(dst, mask, dev, gtw, rt_flags, metric);
1216         ip_rt_unlock();
1217         wake_up(&rt_wait);
1218         return retval;
1219 }
1220 
1221 static void rt_add(short flags, __u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
1222         __u32 gw, struct device *dev, unsigned short mss,
1223         unsigned long window, unsigned short irtt, short metric)
1224 {
1225         while (ip_rt_lock)
1226                 sleep_on(&rt_wait);
1227         ip_rt_fast_lock();
1228         fib_add_1(flags, dst, mask, gw, dev, mss, window, irtt, metric);
1229         ip_rt_unlock();
1230         wake_up(&rt_wait);
1231 }
1232 
1233 void ip_rt_flush(struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
1234 {
1235         while (ip_rt_lock)
1236                 sleep_on(&rt_wait);
1237         ip_rt_fast_lock();
1238         fib_flush_1(dev);
1239         ip_rt_unlock();
1240         wake_up(&rt_wait);
1241 }
1242 
1243 /*
1244    Called by ICMP module.
1245  */
1246 
1247 void ip_rt_redirect(__u32 src, __u32 dst, __u32 gw, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
1248 {
1249         struct rt_req * rtr;
1250         struct rtable * rt;
1251 
1252         rt = ip_rt_route(dst, 0);
1253         if (!rt)
1254                 return;
1255 
1256         if (rt->rt_gateway != src ||
1257             rt->rt_dev != dev ||
1258             ((gw^dev->pa_addr)&dev->pa_mask) ||
1259             ip_chk_addr(gw))
1260         {
1261                 ip_rt_put(rt);
1262                 return;
1263         }
1264         ip_rt_put(rt);
1265 
1266         ip_rt_fast_lock();
1267         if (ip_rt_lock == 1)
1268         {
1269                 rt_redirect_1(dst, gw, dev);
1270                 ip_rt_unlock();
1271                 return;
1272         }
1273 
1274         rtr = kmalloc(sizeof(struct rt_req), GFP_ATOMIC);
1275         if (rtr)
1276         {
1277                 rtr->dst = dst;
1278                 rtr->gw = gw;
1279                 rtr->dev = dev;
1280                 rt_req_enqueue(&rt_backlog, rtr);
1281                 ip_rt_bh_mask |= RT_BH_REDIRECT;
1282         }
1283         ip_rt_unlock();
1284 }
1285 
1286 
1287 static __inline__ void rt_garbage_collect(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1288 {
1289         if (ip_rt_lock == 1)
1290         {
1291                 rt_garbage_collect_1();
1292                 return;
1293         }
1294         ip_rt_bh_mask |= RT_BH_GARBAGE_COLLECT;
1295 }
1296 
1297 static void rt_cache_add(unsigned hash, struct rtable * rth)
     /* [previous][next][first][last][top][bottom][index][help] */
1298 {
1299         unsigned long   flags;
1300         struct rtable   **rthp;
1301         __u32           daddr = rth->rt_dst;
1302         unsigned long   now = jiffies;
1303 
1304 #if RT_CACHE_DEBUG >= 2
1305         if (ip_rt_lock != 1)
1306         {
1307                 printk("rt_cache_add: ip_rt_lock==%d\n", ip_rt_lock);
1308                 return;
1309         }
1310 #endif
1311 
1312         save_flags(flags);
1313 
1314         if (rth->rt_dev->header_cache_bind)
1315         {
1316                 struct rtable * rtg = rth;
1317 
1318                 if (rth->rt_gateway != daddr)
1319                 {
1320                         ip_rt_fast_unlock();
1321                         rtg = ip_rt_route(rth->rt_gateway, 0);
1322                         ip_rt_fast_lock();
1323                 }
1324 
1325                 if (rtg)
1326                 {
1327                         if (rtg == rth)
1328                                 rtg->rt_dev->header_cache_bind(&rtg->rt_hh, rtg->rt_dev, ETH_P_IP, rtg->rt_dst);
1329                         else
1330                         {
1331                                 if (rtg->rt_hh)
1332                                         atomic_inc(&rtg->rt_hh->hh_refcnt);
1333                                 rth->rt_hh = rtg->rt_hh;
1334                                 ip_rt_put(rtg);
1335                         }
1336                 }
1337         }
1338 
1339         if (rt_cache_size >= RT_CACHE_SIZE_MAX)
1340                 rt_garbage_collect();
1341 
1342         cli();
1343         rth->rt_next = ip_rt_hash_table[hash];
1344 #if RT_CACHE_DEBUG >= 2
1345         if (rth->rt_next)
1346         {
1347                 struct rtable * trth;
1348                 printk("rt_cache @%02x: %08x", hash, daddr);
1349                 for (trth=rth->rt_next; trth; trth=trth->rt_next)
1350                         printk(" . %08x", trth->rt_dst);
1351                 printk("\n");
1352         }
1353 #endif
1354         ip_rt_hash_table[hash] = rth;
1355         rthp = &rth->rt_next;
1356         sti();
1357         rt_cache_size++;
1358 
1359         /*
1360          * Cleanup duplicate (and aged off) entries.
1361          */
1362 
1363         while ((rth = *rthp) != NULL)
1364         {
1365 
1366                 cli();
1367                 if ((!rth->rt_refcnt && rth->rt_lastuse + RT_CACHE_TIMEOUT < now)
1368                     || rth->rt_dst == daddr)
1369                 {
1370                         *rthp = rth->rt_next;
1371                         rt_cache_size--;
1372                         sti();
1373 #if RT_CACHE_DEBUG >= 2
1374                         printk("rt_cache clean %02x@%08x\n", hash, rth->rt_dst);
1375 #endif
1376                         rt_free(rth);
1377                         continue;
1378                 }
1379                 sti();
1380                 rthp = &rth->rt_next;
1381         }
1382         restore_flags(flags);
1383 }
1384 
1385 /*
1386    RT should be already locked.
1387    
1388    We could improve this by keeping a chain of say 32 struct rtable's
1389    last freed for fast recycling.
1390    
1391  */
1392 
1393 struct rtable * ip_rt_slow_route (__u32 daddr, int local)
     /* [previous][next][first][last][top][bottom][index][help] */
1394 {
1395         unsigned hash = ip_rt_hash_code(daddr)^local;
1396         struct rtable * rth;
1397         struct fib_node * f;
1398         struct fib_info * fi;
1399         __u32 saddr;
1400 
1401 #if RT_CACHE_DEBUG >= 2
1402         printk("rt_cache miss @%08x\n", daddr);
1403 #endif
1404 
1405         rth = kmalloc(sizeof(struct rtable), GFP_ATOMIC);
1406         if (!rth)
1407         {
1408                 ip_rt_unlock();
1409                 return NULL;
1410         }
1411 
1412         if (local)
1413                 f = fib_lookup_local(daddr);
1414         else
1415                 f = fib_lookup (daddr);
1416 
1417         if (f)
1418         {
1419                 fi = f->fib_info;
1420                 f->fib_use++;
1421         }
1422 
1423         if (!f || (fi->fib_flags & RTF_REJECT))
1424         {
1425 #ifdef CONFIG_KERNELD   
1426                 char wanted_route[20];
1427 #endif          
1428 #if RT_CACHE_DEBUG >= 2
1429                 printk("rt_route failed @%08x\n", daddr);
1430 #endif
1431                 ip_rt_unlock();
1432                 kfree_s(rth, sizeof(struct rtable));
1433 #ifdef CONFIG_KERNELD           
1434                 daddr=ntohl(daddr);
1435                 sprintf(wanted_route, "%d.%d.%d.%d",
1436                         (int)(daddr >> 24) & 0xff, (int)(daddr >> 16) & 0xff,
1437                         (int)(daddr >> 8) & 0xff, (int)daddr & 0xff);
1438                 kerneld_route(wanted_route);    /* Dynamic route request */
1439 #endif          
1440                 return NULL;
1441         }
1442 
1443         saddr = fi->fib_dev->pa_addr;
1444 
1445         if (daddr == fi->fib_dev->pa_addr)
1446         {
1447                 f->fib_use--;
1448                 if ((f = fib_loopback) != NULL)
1449                 {
1450                         f->fib_use++;
1451                         fi = f->fib_info;
1452                 }
1453         }
1454         
1455         if (!f)
1456         {
1457                 ip_rt_unlock();
1458                 kfree_s(rth, sizeof(struct rtable));
1459                 return NULL;
1460         }
1461 
1462         rth->rt_dst     = daddr;
1463         rth->rt_src     = saddr;
1464         rth->rt_lastuse = jiffies;
1465         rth->rt_refcnt  = 1;
1466         rth->rt_use     = 1;
1467         rth->rt_next    = NULL;
1468         rth->rt_hh      = NULL;
1469         rth->rt_gateway = fi->fib_gateway;
1470         rth->rt_dev     = fi->fib_dev;
1471         rth->rt_mtu     = fi->fib_mtu;
1472         rth->rt_window  = fi->fib_window;
1473         rth->rt_irtt    = fi->fib_irtt;
1474         rth->rt_tos     = f->fib_tos;
1475         rth->rt_flags   = fi->fib_flags | RTF_HOST;
1476         if (local)
1477                 rth->rt_flags   |= RTF_LOCAL;
1478 
1479         if (!(rth->rt_flags & RTF_GATEWAY))
1480                 rth->rt_gateway = rth->rt_dst;
1481         /*
1482          *      Multicast or limited broadcast is never gatewayed.
1483          */
1484         if (MULTICAST(daddr) || daddr == 0xFFFFFFFF)
1485                 rth->rt_gateway = rth->rt_dst;
1486 
1487         if (ip_rt_lock == 1)
1488                 rt_cache_add(hash, rth);
1489         else
1490         {
1491                 rt_free(rth);
1492 #if RT_CACHE_DEBUG >= 1
1493                 printk(KERN_DEBUG "rt_cache: route to %08x was born dead\n", daddr);
1494 #endif
1495         }
1496 
1497         ip_rt_unlock();
1498         return rth;
1499 }
1500 
1501 void ip_rt_put(struct rtable * rt)
     /* [previous][next][first][last][top][bottom][index][help] */
1502 {
1503         if (rt)
1504                 atomic_dec(&rt->rt_refcnt);
1505 }
1506 
1507 struct rtable * ip_rt_route(__u32 daddr, int local)
     /* [previous][next][first][last][top][bottom][index][help] */
1508 {
1509         struct rtable * rth;
1510 
1511         ip_rt_fast_lock();
1512 
1513         for (rth=ip_rt_hash_table[ip_rt_hash_code(daddr)^local]; rth; rth=rth->rt_next)
1514         {
1515                 if (rth->rt_dst == daddr)
1516                 {
1517                         rth->rt_lastuse = jiffies;
1518                         atomic_inc(&rth->rt_use);
1519                         atomic_inc(&rth->rt_refcnt);
1520                         ip_rt_unlock();
1521                         return rth;
1522                 }
1523         }
1524         return ip_rt_slow_route (daddr, local);
1525 }
1526 
1527 /*
1528  *      Process a route add request from the user, or from a kernel
1529  *      task.
1530  */
1531  
1532 int ip_rt_new(struct rtentry *r)
     /* [previous][next][first][last][top][bottom][index][help] */
1533 {
1534         int err;
1535         char * devname;
1536         struct device * dev = NULL;
1537         unsigned long flags;
1538         __u32 daddr, mask, gw;
1539         short metric;
1540 
1541         /*
1542          *      If a device is specified find it.
1543          */
1544          
1545         if ((devname = r->rt_dev) != NULL) 
1546         {
1547                 err = getname(devname, &devname);
1548                 if (err)
1549                         return err;
1550                 dev = dev_get(devname);
1551                 putname(devname);
1552                 if (!dev)
1553                         return -ENODEV;
1554         }
1555         
1556         /*
1557          *      If the device isn't INET, don't allow it
1558          */
1559 
1560         if (r->rt_dst.sa_family != AF_INET)
1561                 return -EAFNOSUPPORT;
1562 
1563         /*
1564          *      Make local copies of the important bits
1565          *      We decrement the metric by one for BSD compatibility.
1566          */
1567          
1568         flags = r->rt_flags;
1569         daddr = (__u32) ((struct sockaddr_in *) &r->rt_dst)->sin_addr.s_addr;
1570         mask  = (__u32) ((struct sockaddr_in *) &r->rt_genmask)->sin_addr.s_addr;
1571         gw    = (__u32) ((struct sockaddr_in *) &r->rt_gateway)->sin_addr.s_addr;
1572         metric = r->rt_metric > 0 ? r->rt_metric - 1 : 0;
1573 
1574         /*
1575          *      BSD emulation: Permits route add someroute gw one-of-my-addresses
1576          *      to indicate which iface. Not as clean as the nice Linux dev technique
1577          *      but people keep using it...  (and gated likes it ;))
1578          */
1579          
1580         if (!dev && (flags & RTF_GATEWAY)) 
1581         {
1582                 struct device *dev2;
1583                 for (dev2 = dev_base ; dev2 != NULL ; dev2 = dev2->next) 
1584                 {
1585                         if ((dev2->flags & IFF_UP) && dev2->pa_addr == gw) 
1586                         {
1587                                 flags &= ~RTF_GATEWAY;
1588                                 dev = dev2;
1589                                 break;
1590                         }
1591                 }
1592         }
1593 
1594         if (flags & RTF_HOST) 
1595                 mask = 0xffffffff;
1596         else if (mask && r->rt_genmask.sa_family != AF_INET)
1597                 return -EAFNOSUPPORT;
1598 
1599         if (flags & RTF_GATEWAY) 
1600         {
1601                 if (r->rt_gateway.sa_family != AF_INET)
1602                         return -EAFNOSUPPORT;
1603 
1604                 /*
1605                  *      Don't try to add a gateway we can't reach.. 
1606                  *      Tunnel devices are exempt from this rule.
1607                  */
1608 
1609                 if (!dev)
1610                         dev = get_gw_dev(gw);
1611                 else if (dev != get_gw_dev(gw) && dev->type != ARPHRD_TUNNEL)
1612                         return -EINVAL;
1613                 if (!dev)
1614                         return -ENETUNREACH;
1615         } 
1616         else
1617         {
1618                 gw = 0;
1619                 if (!dev)
1620                         dev = ip_dev_bynet(daddr, mask);
1621                 if (!dev)
1622                         return -ENETUNREACH;
1623                 if (!mask)
1624                 {
1625                         if (((daddr ^ dev->pa_addr) & dev->pa_mask) == 0)
1626                                 mask = dev->pa_mask;
1627                 }
1628         }
1629 
1630 #ifndef CONFIG_IP_CLASSLESS
1631         if (!mask)
1632                 mask = ip_get_mask(daddr);
1633 #endif
1634         
1635         if (bad_mask(mask, daddr))
1636                 return -EINVAL;
1637 
1638         /*
1639          *      Add the route
1640          */
1641 
1642         rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window, r->rt_irtt, metric);
1643         return 0;
1644 }
1645 
1646 
1647 /*
1648  *      Remove a route, as requested by the user.
1649  */
1650 
1651 int ip_rt_kill(struct rtentry *r)
     /* [previous][next][first][last][top][bottom][index][help] */
1652 {
1653         struct sockaddr_in *trg;
1654         struct sockaddr_in *msk;
1655         struct sockaddr_in *gtw;
1656         char *devname;
1657         int err;
1658         struct device * dev = NULL;
1659 
1660         trg = (struct sockaddr_in *) &r->rt_dst;
1661         msk = (struct sockaddr_in *) &r->rt_genmask;
1662         gtw = (struct sockaddr_in *) &r->rt_gateway;
1663         if ((devname = r->rt_dev) != NULL) 
1664         {
1665                 err = getname(devname, &devname);
1666                 if (err)
1667                         return err;
1668                 dev = dev_get(devname);
1669                 putname(devname);
1670                 if (!dev)
1671                         return -ENODEV;
1672         }
1673         /*
1674          * metric can become negative here if it wasn't filled in
1675          * but that's a fortunate accident; we really use that in rt_del.
1676          */
1677         err=rt_del((__u32)trg->sin_addr.s_addr, (__u32)msk->sin_addr.s_addr, dev,
1678                 (__u32)gtw->sin_addr.s_addr, r->rt_flags, r->rt_metric - 1);
1679         return err;
1680 }
1681 
1682 /*
1683  *      Handle IP routing ioctl calls. These are used to manipulate the routing tables
1684  */
1685  
1686 int ip_rt_ioctl(unsigned int cmd, void *arg)
     /* [previous][next][first][last][top][bottom][index][help] */
1687 {
1688         int err;
1689         struct rtentry rt;
1690 
1691         switch(cmd) 
1692         {
1693                 case SIOCADDRT:         /* Add a route */
1694                 case SIOCDELRT:         /* Delete a route */
1695                         if (!suser())
1696                                 return -EPERM;
1697                         err=verify_area(VERIFY_READ, arg, sizeof(struct rtentry));
1698                         if (err)
1699                                 return err;
1700                         memcpy_fromfs(&rt, arg, sizeof(struct rtentry));
1701                         return (cmd == SIOCDELRT) ? ip_rt_kill(&rt) : ip_rt_new(&rt);
1702         }
1703 
1704         return -EINVAL;
1705 }
1706 
1707 void ip_rt_advice(struct rtable **rp, int advice)
     /* [previous][next][first][last][top][bottom][index][help] */
1708 {
1709         /* Thanks! */
1710         return;
1711 }
1712 
1713 void ip_rt_update(int event, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
1714 {
1715         if (event == NETDEV_UP)
1716                 rt_add(RTF_HOST|RTF_UP, dev->pa_addr, ~0, 0, dev, 0, 0, 0, 0);
1717         else if (event == NETDEV_DOWN)
1718                 rt_del(dev->pa_addr, ~0, dev, 0, RTF_HOST|RTF_UP, 0);
1719 }

/* [previous][next][first][last][top][bottom][index][help] */