root/net/ipv4/route.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rt_logmask
  2. rt_mask
  3. fz_hash_code
  4. fib_free_node
  5. fib_lookup_gateway
  6. fib_lookup_local
  7. fib_lookup
  8. get_gw_dev
  9. default_mask
  10. guess_mask
  11. bad_mask
  12. fib_del_list
  13. fib_del_1
  14. fib_create_info
  15. fib_add_1
  16. rt_flush_list
  17. fib_flush_1
  18. rt_get_info
  19. rt_cache_get_info
  20. rt_free
  21. rt_kick_free_queue
  22. ip_rt_run_bh
  23. ip_rt_check_expire
  24. rt_redirect_1
  25. rt_cache_flush
  26. rt_garbage_collect_1
  27. rt_req_enqueue
  28. rt_req_dequeue
  29. rt_kick_backlog
  30. rt_del
  31. rt_add
  32. ip_rt_flush
  33. ip_rt_redirect
  34. rt_garbage_collect
  35. rt_cache_add
  36. ip_rt_slow_route
  37. ip_rt_put
  38. ip_rt_route
  39. ip_rt_new
  40. rt_kill
  41. ip_rt_ioctl
  42. ip_rt_advice

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              ROUTE - implementation of the IP router.
   7  *
   8  * Version:     @(#)route.c     1.0.14  05/31/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  13  *              Linus Torvalds, <Linus.Torvalds@helsinki.fi>
  14  *
  15  * Fixes:
  16  *              Alan Cox        :       Verify area fixes.
  17  *              Alan Cox        :       cli() protects routing changes
  18  *              Rui Oliveira    :       ICMP routing table updates
  19  *              (rco@di.uminho.pt)      Routing table insertion and update
  20  *              Linus Torvalds  :       Rewrote bits to be sensible
  21  *              Alan Cox        :       Added BSD route gw semantics
  22  *              Alan Cox        :       Super /proc >4K 
  23  *              Alan Cox        :       MTU in route table
  24  *              Alan Cox        :       MSS actually. Also added the window
  25  *                                      clamper.
  26  *              Sam Lantinga    :       Fixed route matching in rt_del()
  27  *              Alan Cox        :       Routing cache support.
  28  *              Alan Cox        :       Removed compatibility cruft.
  29  *              Alan Cox        :       RTF_REJECT support.
  30  *              Alan Cox        :       TCP irtt support.
  31  *              Jonathan Naylor :       Added Metric support.
  32  *      Miquel van Smoorenburg  :       BSD API fixes.
  33  *      Miquel van Smoorenburg  :       Metrics.
  34  *              Alan Cox        :       Use __u32 properly
  35  *              Alan Cox        :       Aligned routing errors more closely with BSD
  36  *                                      our system is still very different.
  37  *              Alan Cox        :       Faster /proc handling
  38  *      Alexey Kuznetsov        :       Massive rework to support tree based routing,
  39  *                                      routing caches and better behaviour.
  40  *              
  41  *              Olaf Erb        :       irtt wasnt being copied right.
  42  *              Bjorn Ekwall    :       Kerneld route support.
  43  *
  44  *              This program is free software; you can redistribute it and/or
  45  *              modify it under the terms of the GNU General Public License
  46  *              as published by the Free Software Foundation; either version
  47  *              2 of the License, or (at your option) any later version.
  48  */
  49 
  50 #include <linux/config.h>
  51 #include <asm/segment.h>
  52 #include <asm/system.h>
  53 #include <asm/bitops.h>
  54 #include <linux/types.h>
  55 #include <linux/kernel.h>
  56 #include <linux/sched.h>
  57 #include <linux/mm.h>
  58 #include <linux/string.h>
  59 #include <linux/socket.h>
  60 #include <linux/sockios.h>
  61 #include <linux/errno.h>
  62 #include <linux/in.h>
  63 #include <linux/inet.h>
  64 #include <linux/netdevice.h>
  65 #include <net/ip.h>
  66 #include <net/protocol.h>
  67 #include <net/route.h>
  68 #include <net/tcp.h>
  69 #include <linux/skbuff.h>
  70 #include <net/sock.h>
  71 #include <net/icmp.h>
  72 #include <net/netlink.h>
  73 #ifdef CONFIG_KERNELD
  74 #include <linux/kerneld.h>
  75 #endif
  76 
  77 /*
  78  * Forwarding Information Base definitions.
  79  */
  80 
  81 struct fib_node
  82 {
  83         struct fib_node         *fib_next;
  84         __u32                   fib_dst;
  85         unsigned long           fib_use;
  86         struct fib_info         *fib_info;
  87         short                   fib_metric;
  88         unsigned char           fib_tos;
  89 };
  90 
  91 /*
  92  * This structure contains data shared by many of routes.
  93  */     
  94 
  95 struct fib_info
  96 {
  97         struct fib_info         *fib_next;
  98         struct fib_info         *fib_prev;
  99         __u32                   fib_gateway;
 100         struct device           *fib_dev;
 101         int                     fib_refcnt;
 102         unsigned long           fib_window;
 103         unsigned short          fib_flags;
 104         unsigned short          fib_mtu;
 105         unsigned short          fib_irtt;
 106 };
 107 
 108 struct fib_zone
 109 {
 110         struct fib_zone *fz_next;
 111         struct fib_node **fz_hash_table;
 112         struct fib_node *fz_list;
 113         int             fz_nent;
 114         int             fz_logmask;
 115         __u32           fz_mask;
 116 };
 117 
 118 static struct fib_zone  *fib_zones[33];
 119 static struct fib_zone  *fib_zone_list;
 120 static struct fib_node  *fib_loopback = NULL;
 121 static struct fib_info  *fib_info_list;
 122 
 123 /*
 124  * Backlogging.
 125  */
 126 
 127 #define RT_BH_REDIRECT          0
 128 #define RT_BH_GARBAGE_COLLECT   1
 129 #define RT_BH_FREE              2
 130 
 131 struct rt_req
 132 {
 133         struct rt_req * rtr_next;
 134         struct device *dev;
 135         __u32 dst;
 136         __u32 gw;
 137         unsigned char tos;
 138 };
 139 
 140 int                     ip_rt_lock;
 141 unsigned                ip_rt_bh_mask;
 142 static struct rt_req    *rt_backlog;
 143 
 144 /*
 145  * Route cache.
 146  */
 147 
 148 struct rtable           *ip_rt_hash_table[RT_HASH_DIVISOR];
 149 static int              rt_cache_size;
 150 static struct rtable    *rt_free_queue;
 151 struct wait_queue       *rt_wait;
 152 
 153 static void rt_kick_backlog(void);
 154 static void rt_cache_add(unsigned hash, struct rtable * rth);
 155 static void rt_cache_flush(void);
 156 static void rt_garbage_collect_1(void);
 157 
 158 /* 
 159  * Evaluate mask length.
 160  */
 161 
 162 static __inline__ int rt_logmask(__u32 mask)
     /* [previous][next][first][last][top][bottom][index][help] */
 163 {
 164         if (!(mask = ntohl(mask)))
 165                 return 32;
 166         return ffz(~mask);
 167 }
 168 
 169 /* 
 170  * Create mask from length.
 171  */
 172 
 173 static __inline__ __u32 rt_mask(int logmask)
     /* [previous][next][first][last][top][bottom][index][help] */
 174 {
 175         if (logmask >= 32)
 176                 return 0;
 177         return htonl(~((1<<logmask)-1));
 178 }
 179 
 180 static __inline__ unsigned fz_hash_code(__u32 dst, int logmask)
     /* [previous][next][first][last][top][bottom][index][help] */
 181 {
 182         return ip_rt_hash_code(ntohl(dst)>>logmask);
 183 }
 184 
 185 /*
 186  * Free FIB node.
 187  */
 188 
 189 static void fib_free_node(struct fib_node * f)
     /* [previous][next][first][last][top][bottom][index][help] */
 190 {
 191         struct fib_info * fi = f->fib_info;
 192         if (!--fi->fib_refcnt)
 193         {
 194 #if RT_CACHE_DEBUG >= 2
 195                 printk("fib_free_node: fi %08x/%s is free\n", fi->fib_gateway, fi->fib_dev->name);
 196 #endif
 197                 if (fi->fib_next)
 198                         fi->fib_next->fib_prev = fi->fib_prev;
 199                 if (fi->fib_prev)
 200                         fi->fib_prev->fib_next = fi->fib_next;
 201                 if (fi == fib_info_list)
 202                         fib_info_list = fi->fib_next;
 203         }
 204         kfree_s(f, sizeof(struct fib_node));
 205 }
 206 
 207 /*
 208  * Find gateway route by address.
 209  */
 210 
 211 static struct fib_node * fib_lookup_gateway(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 212 {
 213         struct fib_zone * fz;
 214         struct fib_node * f;
 215 
 216         for (fz = fib_zone_list; fz; fz = fz->fz_next) 
 217         {
 218                 if (fz->fz_hash_table)
 219                         f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 220                 else
 221                         f = fz->fz_list;
 222                 
 223                 for ( ; f; f = f->fib_next)
 224                 {
 225                         if ((dst ^ f->fib_dst) & fz->fz_mask)
 226                                 continue;
 227                         if (f->fib_info->fib_flags & RTF_GATEWAY)
 228                                 return NULL;
 229                         return f;
 230                 }
 231         }
 232         return NULL;
 233 }
 234 
 235 /*
 236  * Find local route by address.
 237  * FIXME: I use "longest match" principle. If destination
 238  *        has some non-local route, I'll not search shorter matches.
 239  *        It's possible, I'm wrong, but I wanted to prevent following
 240  *        situation:
 241  *      route add 193.233.7.128 netmask 255.255.255.192 gw xxxxxx
 242  *      route add 193.233.7.0   netmask 255.255.255.0 eth1
 243  *        (Two ethernets connected by serial line, one is small and other is large)
 244  *        Host 193.233.7.129 is locally unreachable,
 245  *        but old (<=1.3.37) code will send packets destined for it to eth1.
 246  *
 247  */
 248 
 249 static struct fib_node * fib_lookup_local(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 250 {
 251         struct fib_zone * fz;
 252         struct fib_node * f;
 253 
 254         for (fz = fib_zone_list; fz; fz = fz->fz_next) 
 255         {
 256                 int longest_match_found = 0;
 257 
 258                 if (fz->fz_hash_table)
 259                         f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 260                 else
 261                         f = fz->fz_list;
 262                 
 263                 for ( ; f; f = f->fib_next)
 264                 {
 265                         if ((dst ^ f->fib_dst) & fz->fz_mask)
 266                                 continue;
 267                         if (!(f->fib_info->fib_flags & RTF_GATEWAY))
 268                                 return f;
 269                         longest_match_found = 1;
 270                 }
 271                 if (longest_match_found)
 272                         return NULL;
 273         }
 274         return NULL;
 275 }
 276 
 277 /*
 278  * Main lookup routine.
 279  *      IMPORTANT NOTE: this algorithm has small difference from <=1.3.37 visible
 280  *      by user. It doesn't route non-CIDR broadcasts by default.
 281  *
 282  *      F.e.
 283  *              ifconfig eth0 193.233.7.65 netmask 255.255.255.192 broadcast 193.233.7.255
 284  *      is valid, but if you really are not able (not allowed, do not want) to
 285  *      use CIDR compliant broadcast 193.233.7.127, you should add host route:
 286  *              route add -host 193.233.7.255 eth0
 287  */
 288 
 289 static struct fib_node * fib_lookup(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 290 {
 291         struct fib_zone * fz;
 292         struct fib_node * f;
 293 
 294         for (fz = fib_zone_list; fz; fz = fz->fz_next) 
 295         {
 296                 if (fz->fz_hash_table)
 297                         f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 298                 else
 299                         f = fz->fz_list;
 300                 
 301                 for ( ; f; f = f->fib_next)
 302                 {
 303                         if ((dst ^ f->fib_dst) & fz->fz_mask)
 304                                 continue;
 305                         return f;
 306                 }
 307         }
 308         return NULL;
 309 }
 310 
 311 static __inline__ struct device * get_gw_dev(__u32 gw)
     /* [previous][next][first][last][top][bottom][index][help] */
 312 {
 313         struct fib_node * f;
 314         f = fib_lookup_gateway(gw);
 315         if (f)
 316                 return f->fib_info->fib_dev;
 317         return NULL;
 318 }
 319 
 320 /*
 321  *      Used by 'rt_add()' when we can't get the netmask any other way..
 322  *
 323  *      If the lower byte or two are zero, we guess the mask based on the
 324  *      number of zero 8-bit net numbers, otherwise we use the "default"
 325  *      masks judging by the destination address and our device netmask.
 326  */
 327  
 328 static __u32 unsigned long default_mask(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 329 {
 330         dst = ntohl(dst);
 331         if (IN_CLASSA(dst))
 332                 return htonl(IN_CLASSA_NET);
 333         if (IN_CLASSB(dst))
 334                 return htonl(IN_CLASSB_NET);
 335         return htonl(IN_CLASSC_NET);
 336 }
 337 
 338 
 339 /*
 340  *      If no mask is specified then generate a default entry.
 341  */
 342 
 343 static __u32 guess_mask(__u32 dst, struct device * dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 344 {
 345         __u32 mask;
 346 
 347         if (!dst)
 348                 return 0;
 349         mask = default_mask(dst);
 350         if ((dst ^ dev->pa_addr) & mask)
 351                 return mask;
 352         return dev->pa_mask;
 353 }
 354 
 355 
 356 /*
 357  *      Check if a mask is acceptable.
 358  */
 359  
 360 static inline int bad_mask(__u32 mask, __u32 addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 361 {
 362         if (addr & (mask = ~mask))
 363                 return 1;
 364         mask = ntohl(mask);
 365         if (mask & (mask+1))
 366                 return 1;
 367         return 0;
 368 }
 369 
 370 
 371 static int fib_del_list(struct fib_node **fp, __u32 dst,
     /* [previous][next][first][last][top][bottom][index][help] */
 372                 struct device * dev, __u32 gtw, short flags, short metric, __u32 mask)
 373 {
 374         struct fib_node *f;
 375         int found=0;
 376 
 377         while((f = *fp) != NULL) 
 378         {
 379                 struct fib_info * fi = f->fib_info;
 380 
 381                 /*
 382                  *      Make sure the destination and netmask match.
 383                  *      metric, gateway and device are also checked
 384                  *      if they were specified.
 385                  */
 386                 if (f->fib_dst != dst ||
 387                     (gtw && fi->fib_gateway != gtw) ||
 388                     (metric >= 0 && f->fib_metric != metric) ||
 389                     (dev && fi->fib_dev != dev) )
 390                 {
 391                         fp = &f->fib_next;
 392                         continue;
 393                 }
 394                 cli();
 395                 *fp = f->fib_next;
 396                 if (fib_loopback == f)
 397                         fib_loopback = NULL;
 398                 sti();
 399                 ip_netlink_msg(RTMSG_DELROUTE, dst, gtw, mask, flags, metric, fi->fib_dev->name);
 400                 fib_free_node(f);
 401                 found++;
 402         }
 403         return found;
 404 }
 405 
 406 static __inline__ int fib_del_1(__u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
 407                 struct device * dev, __u32 gtw, short flags, short metric)
 408 {
 409         struct fib_node **fp;
 410         struct fib_zone *fz;
 411         int found=0;
 412 
 413         if (!mask)
 414         {
 415                 for (fz=fib_zone_list; fz; fz = fz->fz_next)
 416                 {
 417                         int tmp;
 418                         if (fz->fz_hash_table)
 419                                 fp = &fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 420                         else
 421                                 fp = &fz->fz_list;
 422 
 423                         tmp = fib_del_list(fp, dst, dev, gtw, flags, metric, mask);
 424                         fz->fz_nent -= tmp;
 425                         found += tmp;
 426                 }
 427         } 
 428         else
 429         {
 430                 if ((fz = fib_zones[rt_logmask(mask)]) != NULL)
 431                 {
 432                         if (fz->fz_hash_table)
 433                                 fp = &fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 434                         else
 435                                 fp = &fz->fz_list;
 436         
 437                         found = fib_del_list(fp, dst, dev, gtw, flags, metric, mask);
 438                         fz->fz_nent -= found;
 439                 }
 440         }
 441 
 442         if (found)
 443         {
 444                 rt_cache_flush();
 445                 return 0;
 446         }
 447         return -ESRCH;
 448 }
 449 
 450 
 451 static struct fib_info * fib_create_info(__u32 gw, struct device * dev,
     /* [previous][next][first][last][top][bottom][index][help] */
 452                                          unsigned short flags, unsigned short mss,
 453                                          unsigned long window, unsigned short irtt)
 454 {
 455         struct fib_info * fi;
 456 
 457         if (!(flags & RTF_MSS))
 458         {
 459                 mss = dev->mtu;
 460 #ifdef CONFIG_NO_PATH_MTU_DISCOVERY
 461                 /*
 462                  *      If MTU was not specified, use default.
 463                  *      If you want to increase MTU for some net (local subnet)
 464                  *      use "route add .... mss xxx".
 465                  *
 466                  *      The MTU isnt currently always used and computed as it
 467                  *      should be as far as I can tell. [Still verifying this is right]
 468                  */
 469                 if ((flags & RTF_GATEWAY) && mss > 576)
 470                         mss = 576;
 471 #endif
 472         }
 473         if (!(flags & RTF_WINDOW))
 474                 window = 0;
 475         if (!(flags & RTF_IRTT))
 476                 irtt = 0;
 477 
 478         for (fi=fib_info_list; fi; fi = fi->fib_next)
 479         {
 480                 if (fi->fib_gateway != gw ||
 481                     fi->fib_dev != dev  ||
 482                     fi->fib_flags != flags ||
 483                     fi->fib_mtu != mss ||
 484                     fi->fib_window != window ||
 485                     fi->fib_irtt != irtt)
 486                         continue;
 487                 fi->fib_refcnt++;
 488 #if RT_CACHE_DEBUG >= 2
 489                 printk("fib_create_info: fi %08x/%s is duplicate\n", fi->fib_gateway, fi->fib_dev->name);
 490 #endif
 491                 return fi;
 492         }
 493         fi = (struct fib_info*)kmalloc(sizeof(struct fib_info), GFP_KERNEL);
 494         if (!fi)
 495                 return NULL;
 496         memset(fi, 0, sizeof(struct fib_info));
 497         fi->fib_flags = flags;
 498         fi->fib_dev = dev;
 499         fi->fib_gateway = gw;
 500         fi->fib_mtu = mss;
 501         fi->fib_window = window;
 502         fi->fib_refcnt++;
 503         fi->fib_next = fib_info_list;
 504         fi->fib_prev = NULL;
 505         fi->fib_irtt = irtt;
 506         if (fib_info_list)
 507                 fib_info_list->fib_prev = fi;
 508         fib_info_list = fi;
 509 #if RT_CACHE_DEBUG >= 2
 510         printk("fib_create_info: fi %08x/%s is created\n", fi->fib_gateway, fi->fib_dev->name);
 511 #endif
 512         return fi;
 513 }
 514 
 515 
 516 static __inline__ void fib_add_1(short flags, __u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
 517         __u32 gw, struct device *dev, unsigned short mss,
 518         unsigned long window, unsigned short irtt, short metric)
 519 {
 520         struct fib_node *f, *f1;
 521         struct fib_node **fp;
 522         struct fib_node **dup_fp = NULL;
 523         struct fib_zone * fz;
 524         struct fib_info * fi;
 525         int logmask;
 526 
 527         if (flags & RTF_HOST) 
 528                 mask = 0xffffffff;
 529         /*
 530          * If mask is not specified, try to guess it.
 531          */
 532         else if (!mask)
 533         {
 534                 if (!((dst ^ dev->pa_addr) & dev->pa_mask)) 
 535                 {
 536                         mask = dev->pa_mask;
 537                         flags &= ~RTF_GATEWAY;
 538                         if (flags & RTF_DYNAMIC) 
 539                         {
 540                                 printk("Dynamic route to my own net rejected\n");
 541                                 return;
 542                         }
 543                 } 
 544                 else
 545                         mask = guess_mask(dst, dev);
 546                 dst &= mask;
 547         }
 548         
 549         /*
 550          *      A gateway must be reachable and not a local address
 551          */
 552          
 553         if (gw == dev->pa_addr)
 554                 flags &= ~RTF_GATEWAY;
 555                 
 556         if (flags & RTF_GATEWAY) 
 557         {
 558                 /*
 559                  *      Don't try to add a gateway we can't reach.. 
 560                  */
 561                  
 562                 if (dev != get_gw_dev(gw))
 563                         return;
 564                         
 565                 flags |= RTF_GATEWAY;
 566         } 
 567         else
 568                 gw = 0;
 569                 
 570         /*
 571          *      Allocate an entry and fill it in.
 572          */
 573          
 574         f = (struct fib_node *) kmalloc(sizeof(struct fib_node), GFP_KERNEL);
 575         if (f == NULL)
 576                 return;
 577 
 578         memset(f, 0, sizeof(struct fib_node));
 579         f->fib_dst = dst;
 580         f->fib_metric = metric;
 581         f->fib_tos    = 0;
 582 
 583         if  ((fi = fib_create_info(gw, dev, flags, mss, window, irtt)) == NULL)
 584         {
 585                 kfree_s(f, sizeof(struct fib_node));
 586                 return;
 587         }
 588         f->fib_info = fi;
 589 
 590         logmask = rt_logmask(mask);
 591         fz = fib_zones[logmask];
 592 
 593 
 594         if (!fz)
 595         {
 596                 int i;
 597                 fz = kmalloc(sizeof(struct fib_zone), GFP_KERNEL);
 598                 if (!fz)
 599                 {
 600                         fib_free_node(f);
 601                         return;
 602                 }
 603                 memset(fz, 0, sizeof(struct fib_zone));
 604                 fz->fz_logmask = logmask;
 605                 fz->fz_mask = mask;
 606                 for (i=logmask-1; i>=0; i--)
 607                         if (fib_zones[i])
 608                                 break;
 609                 cli();
 610                 if (i<0)
 611                 {
 612                         fz->fz_next = fib_zone_list;
 613                         fib_zone_list = fz;
 614                 }
 615                 else
 616                 {
 617                         fz->fz_next = fib_zones[i]->fz_next;
 618                         fib_zones[i]->fz_next = fz;
 619                 }
 620                 fib_zones[logmask] = fz;
 621                 sti();
 622         }
 623 
 624         /*
 625          * If zone overgrows RTZ_HASHING_LIMIT, create hash table.
 626          */
 627 
 628         if (fz->fz_nent >= RTZ_HASHING_LIMIT && !fz->fz_hash_table && logmask<32)
 629         {
 630                 struct fib_node ** ht;
 631 #if RT_CACHE_DEBUG
 632                 printk("fib_add_1: hashing for zone %d started\n", logmask);
 633 #endif
 634                 ht = kmalloc(RTZ_HASH_DIVISOR*sizeof(struct rtable*), GFP_KERNEL);
 635 
 636                 if (ht)
 637                 {
 638                         memset(ht, 0, RTZ_HASH_DIVISOR*sizeof(struct fib_node*));
 639                         cli();
 640                         f1 = fz->fz_list;
 641                         while (f1)
 642                         {
 643                                 struct fib_node * next;
 644                                 unsigned hash = fz_hash_code(f1->fib_dst, logmask);
 645                                 next = f1->fib_next;
 646                                 f1->fib_next = ht[hash];
 647                                 ht[hash] = f1;
 648                                 f1 = next;
 649                         }
 650                         fz->fz_list = NULL;
 651                         fz->fz_hash_table = ht; 
 652                         sti();
 653                 }
 654         }
 655 
 656         if (fz->fz_hash_table)
 657                 fp = &fz->fz_hash_table[fz_hash_code(dst, logmask)];
 658         else
 659                 fp = &fz->fz_list;
 660 
 661         /*
 662          * Scan list to find the first route with the same destination
 663          */
 664         while ((f1 = *fp) != NULL)
 665         {
 666                 if (f1->fib_dst == dst)
 667                         break;
 668                 fp = &f1->fib_next;
 669         }
 670 
 671         /*
 672          * Find route with the same destination and less (or equal) metric.
 673          */
 674         while ((f1 = *fp) != NULL && f1->fib_dst == dst)
 675         {
 676                 if (f1->fib_metric >= metric)
 677                         break;
 678                 /*
 679                  *      Record route with the same destination and gateway,
 680                  *      but less metric. We'll delete it 
 681                  *      after instantiation of new route.
 682                  */
 683                 if (f1->fib_info->fib_gateway == gw)
 684                         dup_fp = fp;
 685                 fp = &f1->fib_next;
 686         }
 687 
 688         /*
 689          * Is it already present?
 690          */
 691 
 692         if (f1 && f1->fib_metric == metric && f1->fib_info == fi)
 693         {
 694                 fib_free_node(f);
 695                 return;
 696         }
 697         
 698         /*
 699          * Insert new entry to the list.
 700          */
 701 
 702         cli();
 703         f->fib_next = f1;
 704         *fp = f;
 705         if (!fib_loopback && (fi->fib_dev->flags & IFF_LOOPBACK))
 706                 fib_loopback = f;
 707         sti();
 708         fz->fz_nent++;
 709         ip_netlink_msg(RTMSG_NEWROUTE, dst, gw, mask, flags, metric, fi->fib_dev->name);
 710 
 711         /*
 712          *      Delete route with the same destination and gateway.
 713          *      Note that we should have at most one such route.
 714          */
 715         if (dup_fp)
 716                 fp = dup_fp;
 717         else
 718                 fp = &f->fib_next;
 719 
 720         while ((f1 = *fp) != NULL && f1->fib_dst == dst)
 721         {
 722                 if (f1->fib_info->fib_gateway == gw)
 723                 {
 724                         cli();
 725                         *fp = f1->fib_next;
 726                         if (fib_loopback == f1)
 727                                 fib_loopback = NULL;
 728                         sti();
 729                         ip_netlink_msg(RTMSG_DELROUTE, dst, gw, mask, flags, metric, f1->fib_info->fib_dev->name);
 730                         fib_free_node(f1);
 731                         fz->fz_nent--;
 732                         break;
 733                 }
 734                 fp = &f1->fib_next;
 735         }
 736         rt_cache_flush();
 737         return;
 738 }
 739 
 740 static int rt_flush_list(struct fib_node ** fp, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 741 {
 742         int found = 0;
 743         struct fib_node *f;
 744 
 745         while ((f = *fp) != NULL) {
 746                 if (f->fib_info->fib_dev != dev) {
 747                         fp = &f->fib_next;
 748                         continue;
 749                 }
 750                 cli();
 751                 *fp = f->fib_next;
 752                 if (fib_loopback == f)
 753                         fib_loopback = NULL;
 754                 sti();
 755                 fib_free_node(f);
 756                 found++;
 757         }
 758         return found;
 759 }
 760 
 761 static __inline__ void fib_flush_1(struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 762 {
 763         struct fib_zone *fz;
 764         int found = 0;
 765 
 766         for (fz = fib_zone_list; fz; fz = fz->fz_next)
 767         {
 768                 if (fz->fz_hash_table)
 769                 {
 770                         int i;
 771                         int tmp = 0;
 772                         for (i=0; i<RTZ_HASH_DIVISOR; i++)
 773                                 tmp += rt_flush_list(&fz->fz_hash_table[i], dev);
 774                         fz->fz_nent -= tmp;
 775                         found += tmp;
 776                 }
 777                 else
 778                 {
 779                         int tmp;
 780                         tmp = rt_flush_list(&fz->fz_list, dev);
 781                         fz->fz_nent -= tmp;
 782                         found += tmp;
 783                 }
 784         }
 785                 
 786         if (found)
 787                 rt_cache_flush();
 788 }
 789 
 790 
 791 /* 
 792  *      Called from the PROCfs module. This outputs /proc/net/route.
 793  *
 794  *      We preserve the old format but pad the buffers out. This means that
 795  *      we can spin over the other entries as we read them. Remember the
 796  *      gated BGP4 code could need to read 60,000+ routes on occasion (thats
 797  *      about 7Mb of data). To do that ok we will need to also cache the
 798  *      last route we got to (reads will generally be following on from
 799  *      one another without gaps).
 800  */
 801  
 802 int rt_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
     /* [previous][next][first][last][top][bottom][index][help] */
 803 {
 804         struct fib_zone *fz;
 805         struct fib_node *f;
 806         int len=0;
 807         off_t pos=0;
 808         char temp[129];
 809         int i;
 810         
 811         pos = 128;
 812 
 813         if (offset<128)
 814         {
 815                 sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT");
 816                 len = 128;
 817         }
 818         
 819         while  (ip_rt_lock)
 820                 sleep_on(&rt_wait);
 821         ip_rt_fast_lock();
 822 
 823         for (fz=fib_zone_list; fz; fz = fz->fz_next)
 824         {
 825                 int maxslot;
 826                 struct fib_node ** fp;
 827 
 828                 if (fz->fz_nent == 0)
 829                         continue;
 830 
 831                 if (pos + 128*fz->fz_nent <= offset)
 832                 {
 833                         pos += 128*fz->fz_nent;
 834                         len = 0;
 835                         continue;
 836                 }
 837 
 838                 if (fz->fz_hash_table)
 839                 {
 840                         maxslot = RTZ_HASH_DIVISOR;
 841                         fp      = fz->fz_hash_table;
 842                 }
 843                 else
 844                 {
 845                         maxslot = 1;
 846                         fp      = &fz->fz_list;
 847                 }
 848                         
 849                 for (i=0; i < maxslot; i++, fp++)
 850                 {
 851                         
 852                         for (f = *fp; f; f = f->fib_next) 
 853                         {
 854                                 struct fib_info * fi;
 855                                 /*
 856                                  *      Spin through entries until we are ready
 857                                  */
 858                                 pos += 128;
 859 
 860                                 if (pos <= offset)
 861                                 {
 862                                         len=0;
 863                                         continue;
 864                                 }
 865                                         
 866                                 fi = f->fib_info;
 867                                 sprintf(temp, "%s\t%08lX\t%08lX\t%02X\t%d\t%lu\t%d\t%08lX\t%d\t%lu\t%u",
 868                                         fi->fib_dev->name, (unsigned long)f->fib_dst, (unsigned long)fi->fib_gateway,
 869                                         fi->fib_flags, 0, f->fib_use, f->fib_metric,
 870                                         (unsigned long)fz->fz_mask, (int)fi->fib_mtu, fi->fib_window, (int)fi->fib_irtt);
 871                                 sprintf(buffer+len,"%-127s\n",temp);
 872 
 873                                 len += 128;
 874                                 if (pos >= offset+length)
 875                                         goto done;
 876                         }
 877                 }
 878         }
 879 
 880 done:
 881         ip_rt_unlock();
 882         wake_up(&rt_wait);
 883         
 884         *start = buffer+len-(pos-offset);
 885         len = pos - offset;
 886         if (len>length)
 887                 len = length;
 888         return len;
 889 }
 890 
 891 int rt_cache_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
     /* [previous][next][first][last][top][bottom][index][help] */
 892 {
 893         int len=0;
 894         off_t pos=0;
 895         char temp[129];
 896         struct rtable *r;
 897         int i;
 898 
 899         pos = 128;
 900 
 901         if (offset<128)
 902         {
 903                 sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tSource\t\tMTU\tWindow\tIRTT\tHH\tARP\n");
 904                 len = 128;
 905         }
 906         
 907         
 908         while  (ip_rt_lock)
 909                 sleep_on(&rt_wait);
 910         ip_rt_fast_lock();
 911 
 912         for (i = 0; i<RT_HASH_DIVISOR; i++)
 913         {
 914                 for (r = ip_rt_hash_table[i]; r; r = r->rt_next) 
 915                 {
 916                         /*
 917                          *      Spin through entries until we are ready
 918                          */
 919                         pos += 128;
 920 
 921                         if (pos <= offset)
 922                         {
 923                                 len = 0;
 924                                 continue;
 925                         }
 926                                         
 927                         sprintf(temp, "%s\t%08lX\t%08lX\t%02X\t%d\t%u\t%d\t%08lX\t%d\t%lu\t%u\t%d\t%1d",
 928                                 r->rt_dev->name, (unsigned long)r->rt_dst, (unsigned long)r->rt_gateway,
 929                                 r->rt_flags, r->rt_refcnt, r->rt_use, 0,
 930                                 (unsigned long)r->rt_src, (int)r->rt_mtu, r->rt_window, (int)r->rt_irtt, r->rt_hh ? r->rt_hh->hh_refcnt : -1, r->rt_hh ? r->rt_hh->hh_uptodate : 0);
 931                         sprintf(buffer+len,"%-127s\n",temp);
 932                         len += 128;
 933                         if (pos >= offset+length)
 934                                 goto done;
 935                 }
 936         }
 937 
 938 done:
 939         ip_rt_unlock();
 940         wake_up(&rt_wait);
 941         
 942         *start = buffer+len-(pos-offset);
 943         len = pos-offset;
 944         if (len>length)
 945                 len = length;
 946         return len;
 947 }
 948 
 949 
 950 static void rt_free(struct rtable * rt)
     /* [previous][next][first][last][top][bottom][index][help] */
 951 {
 952         unsigned long flags;
 953 
 954         save_flags(flags);
 955         cli();
 956         if (!rt->rt_refcnt)
 957         {
 958                 struct hh_cache * hh = rt->rt_hh;
 959                 rt->rt_hh = NULL;
 960                 if (hh && !--hh->hh_refcnt)
 961                 {
 962                         restore_flags(flags);
 963                         kfree_s(hh, sizeof(struct hh_cache));
 964                 }
 965                 restore_flags(flags);
 966                 kfree_s(rt, sizeof(struct rt_table));
 967                 return;
 968         }
 969         rt->rt_next = rt_free_queue;
 970         rt->rt_flags &= ~RTF_UP;
 971         rt_free_queue = rt;
 972         ip_rt_bh_mask |= RT_BH_FREE;
 973 #if RT_CACHE_DEBUG >= 2
 974         printk("rt_free: %08x\n", rt->rt_dst);
 975 #endif
 976         restore_flags(flags);
 977 }
 978 
 979 /*
 980  * RT "bottom half" handlers. Called with masked inetrrupts.
 981  */
 982 
 983 static __inline__ void rt_kick_free_queue(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 984 {
 985         struct rtable *rt, **rtp;
 986 
 987         rtp = &rt_free_queue;
 988 
 989         while ((rt = *rtp) != NULL)
 990         {
 991                 if  (!rt->rt_refcnt)
 992                 {
 993                         struct hh_cache * hh = rt->rt_hh;
 994 #if RT_CACHE_DEBUG >= 2
 995                         __u32 daddr = rt->rt_dst;
 996 #endif
 997                         *rtp = rt->rt_next;
 998                         rt->rt_hh = NULL;
 999                         if (hh && !--hh->hh_refcnt)
1000                         {
1001                                 sti();
1002                                 kfree_s(hh, sizeof(struct hh_cache));
1003                         }
1004                         sti();
1005                         kfree_s(rt, sizeof(struct rt_table));
1006 #if RT_CACHE_DEBUG >= 2
1007                         printk("rt_kick_free_queue: %08x is free\n", daddr);
1008 #endif
1009                         cli();
1010                         continue;
1011                 }
1012                 rtp = &rt->rt_next;
1013         }
1014 }
1015 
1016 void ip_rt_run_bh() {
     /* [previous][next][first][last][top][bottom][index][help] */
1017         unsigned long flags;
1018         save_flags(flags);
1019         cli();
1020         if (ip_rt_bh_mask && !ip_rt_lock)
1021         {
1022                 if (ip_rt_bh_mask & RT_BH_REDIRECT)
1023                         rt_kick_backlog();
1024 
1025                 if (ip_rt_bh_mask & RT_BH_GARBAGE_COLLECT)
1026                 {
1027                         ip_rt_fast_lock();
1028                         ip_rt_bh_mask &= ~RT_BH_GARBAGE_COLLECT;
1029                         sti();
1030                         rt_garbage_collect_1();
1031                         cli();
1032                         ip_rt_fast_unlock();
1033                 }
1034 
1035                 if (ip_rt_bh_mask & RT_BH_FREE)
1036                         rt_kick_free_queue();
1037         }
1038         restore_flags(flags);
1039 }
1040 
1041 
1042 void ip_rt_check_expire()
     /* [previous][next][first][last][top][bottom][index][help] */
1043 {
1044         ip_rt_fast_lock();
1045         if (ip_rt_lock == 1)
1046         {
1047                 int i;
1048                 struct rtable *rth, **rthp;
1049                 unsigned long flags;
1050                 unsigned long now = jiffies;
1051 
1052                 save_flags(flags);
1053                 for (i=0; i<RT_HASH_DIVISOR; i++)
1054                 {
1055                         rthp = &ip_rt_hash_table[i];
1056 
1057                         while ((rth = *rthp) != NULL)
1058                         {
1059                                 struct rtable * rth_next = rth->rt_next;
1060 
1061                                 /*
1062                                  * Cleanup aged off entries.
1063                                  */
1064 
1065                                 cli();
1066                                 if (!rth->rt_refcnt && rth->rt_lastuse + RT_CACHE_TIMEOUT < now)
1067                                 {
1068                                         *rthp = rth_next;
1069                                         sti();
1070                                         rt_cache_size--;
1071 #if RT_CACHE_DEBUG >= 2
1072                                         printk("rt_check_expire clean %02x@%08x\n", i, rth->rt_dst);
1073 #endif
1074                                         rt_free(rth);
1075                                         continue;
1076                                 }
1077                                 sti();
1078 
1079                                 if (!rth_next)
1080                                         break;
1081 
1082                                 /*
1083                                  * LRU ordering.
1084                                  */
1085 
1086                                 if (rth->rt_lastuse + RT_CACHE_BUBBLE_THRESHOULD < rth_next->rt_lastuse ||
1087                                     (rth->rt_lastuse < rth_next->rt_lastuse &&
1088                                      rth->rt_use < rth_next->rt_use))
1089                                 {
1090 #if RT_CACHE_DEBUG >= 2
1091                                         printk("rt_check_expire bubbled %02x@%08x<->%08x\n", i, rth->rt_dst, rth_next->rt_dst);
1092 #endif
1093                                         cli();
1094                                         *rthp = rth_next;
1095                                         rth->rt_next = rth_next->rt_next;
1096                                         rth_next->rt_next = rth;
1097                                         sti();
1098                                         rthp = &rth_next->rt_next;
1099                                         continue;
1100                                 }
1101                                 rthp = &rth->rt_next;
1102                         }
1103                 }
1104                 restore_flags(flags);
1105                 rt_kick_free_queue();
1106         }
1107         ip_rt_unlock();
1108 }
1109 
1110 static void rt_redirect_1(__u32 dst, __u32 gw, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
1111 {
1112         struct rtable *rt;
1113         unsigned long hash = ip_rt_hash_code(dst);
1114 
1115         if (gw == dev->pa_addr)
1116                 return;
1117         if (dev != get_gw_dev(gw))
1118                 return;
1119         rt = (struct rtable *) kmalloc(sizeof(struct rtable), GFP_ATOMIC);
1120         if (rt == NULL) 
1121                 return;
1122         memset(rt, 0, sizeof(struct rtable));
1123         rt->rt_flags = RTF_DYNAMIC | RTF_MODIFIED | RTF_HOST | RTF_GATEWAY | RTF_UP;
1124         rt->rt_dst = dst;
1125         rt->rt_dev = dev;
1126         rt->rt_gateway = gw;
1127         rt->rt_src = dev->pa_addr;
1128         rt->rt_mtu = dev->mtu;
1129 #ifdef CONFIG_NO_PATH_MTU_DISCOVERY
1130         if (dev->mtu > 576)
1131                 rt->rt_mtu = 576;
1132 #endif
1133         rt->rt_lastuse  = jiffies;
1134         rt->rt_refcnt  = 1;
1135         rt_cache_add(hash, rt);
1136         ip_rt_put(rt);
1137         return;
1138 }
1139 
1140 static void rt_cache_flush(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1141 {
1142         int i;
1143         struct rtable * rth, * next;
1144 
1145         for (i=0; i<RT_HASH_DIVISOR; i++)
1146         {
1147                 int nr=0;
1148 
1149                 cli();
1150                 if (!(rth = ip_rt_hash_table[i]))
1151                 {
1152                         sti();
1153                         continue;
1154                 }
1155 
1156                 ip_rt_hash_table[i] = NULL;
1157                 sti();
1158 
1159                 for (; rth; rth=next)
1160                 {
1161                         next = rth->rt_next;
1162                         rt_cache_size--;
1163                         nr++;
1164                         rth->rt_next = NULL;
1165                         rt_free(rth);
1166                 }
1167 #if RT_CACHE_DEBUG >= 2
1168                 if (nr > 0)
1169                         printk("rt_cache_flush: %d@%02x\n", nr, i);
1170 #endif
1171         }
1172 #if RT_CACHE_DEBUG >= 1
1173         if (rt_cache_size)
1174         {
1175                 printk("rt_cache_flush: bug rt_cache_size=%d\n", rt_cache_size);
1176                 rt_cache_size = 0;
1177         }
1178 #endif
1179 }
1180 
1181 static void rt_garbage_collect_1(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1182 {
1183         int i;
1184         unsigned expire = RT_CACHE_TIMEOUT>>1;
1185         struct rtable * rth, **rthp;
1186         unsigned long now = jiffies;
1187 
1188         for (;;)
1189         {
1190                 for (i=0; i<RT_HASH_DIVISOR; i++)
1191                 {
1192                         if (!ip_rt_hash_table[i])
1193                                 continue;
1194                         for (rthp=&ip_rt_hash_table[i]; (rth=*rthp); rthp=&rth->rt_next)
1195                         {
1196                                 if (rth->rt_lastuse + expire*(rth->rt_refcnt+1) > now)
1197                                         continue;
1198                                 rt_cache_size--;
1199                                 cli();
1200                                 *rthp=rth->rt_next;
1201                                 rth->rt_next = NULL;
1202                                 sti();
1203                                 rt_free(rth);
1204                                 break;
1205                         }
1206                 }
1207                 if (rt_cache_size < RT_CACHE_SIZE_MAX)
1208                         return;
1209                 expire >>= 1;
1210         }
1211 }
1212 
1213 static __inline__ void rt_req_enqueue(struct rt_req **q, struct rt_req *rtr)
     /* [previous][next][first][last][top][bottom][index][help] */
1214 {
1215         unsigned long flags;
1216         struct rt_req * tail;
1217 
1218         save_flags(flags);
1219         cli();
1220         tail = *q;
1221         if (!tail)
1222                 rtr->rtr_next = rtr;
1223         else
1224         {
1225                 rtr->rtr_next = tail->rtr_next;
1226                 tail->rtr_next = rtr;
1227         }
1228         *q = rtr;
1229         restore_flags(flags);
1230         return;
1231 }
1232 
1233 /*
1234  * Caller should mask interrupts.
1235  */
1236 
1237 static __inline__ struct rt_req * rt_req_dequeue(struct rt_req **q)
     /* [previous][next][first][last][top][bottom][index][help] */
1238 {
1239         struct rt_req * rtr;
1240 
1241         if (*q)
1242         {
1243                 rtr = (*q)->rtr_next;
1244                 (*q)->rtr_next = rtr->rtr_next;
1245                 if (rtr->rtr_next == rtr)
1246                         *q = NULL;
1247                 rtr->rtr_next = NULL;
1248                 return rtr;
1249         }
1250         return NULL;
1251 }
1252 
1253 /*
1254    Called with masked interrupts
1255  */
1256 
1257 static void rt_kick_backlog()
     /* [previous][next][first][last][top][bottom][index][help] */
1258 {
1259         if (!ip_rt_lock)
1260         {
1261                 struct rt_req * rtr;
1262 
1263                 ip_rt_fast_lock();
1264 
1265                 while ((rtr = rt_req_dequeue(&rt_backlog)) != NULL)
1266                 {
1267                         sti();
1268                         rt_redirect_1(rtr->dst, rtr->gw, rtr->dev);
1269                         kfree_s(rtr, sizeof(struct rt_req));
1270                         cli();
1271                 }
1272 
1273                 ip_rt_bh_mask &= ~RT_BH_REDIRECT;
1274 
1275                 ip_rt_fast_unlock();
1276         }
1277 }
1278 
1279 /*
1280  * rt_{del|add|flush} called only from USER process. Waiting is OK.
1281  */
1282 
1283 static int rt_del(__u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
1284                 struct device * dev, __u32 gtw, short rt_flags, short metric)
1285 {
1286         int retval;
1287 
1288         while (ip_rt_lock)
1289                 sleep_on(&rt_wait);
1290         ip_rt_fast_lock();
1291         retval = fib_del_1(dst, mask, dev, gtw, rt_flags, metric);
1292         ip_rt_unlock();
1293         wake_up(&rt_wait);
1294         return retval;
1295 }
1296 
1297 static void rt_add(short flags, __u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
1298         __u32 gw, struct device *dev, unsigned short mss,
1299         unsigned long window, unsigned short irtt, short metric)
1300 {
1301         while (ip_rt_lock)
1302                 sleep_on(&rt_wait);
1303         ip_rt_fast_lock();
1304         fib_add_1(flags, dst, mask, gw, dev, mss, window, irtt, metric);
1305         ip_rt_unlock();
1306         wake_up(&rt_wait);
1307 }
1308 
1309 void ip_rt_flush(struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
1310 {
1311         while (ip_rt_lock)
1312                 sleep_on(&rt_wait);
1313         ip_rt_fast_lock();
1314         fib_flush_1(dev);
1315         ip_rt_unlock();
1316         wake_up(&rt_wait);
1317 }
1318 
1319 /*
1320    Called by ICMP module.
1321  */
1322 
1323 void ip_rt_redirect(__u32 src, __u32 dst, __u32 gw, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
1324 {
1325         struct rt_req * rtr;
1326         struct rtable * rt;
1327 
1328         rt = ip_rt_route(dst, 0);
1329         if (!rt)
1330                 return;
1331 
1332         if (rt->rt_gateway != src ||
1333             rt->rt_dev != dev ||
1334             ((gw^dev->pa_addr)&dev->pa_mask) ||
1335             ip_chk_addr(gw))
1336         {
1337                 ip_rt_put(rt);
1338                 return;
1339         }
1340         ip_rt_put(rt);
1341 
1342         ip_rt_fast_lock();
1343         if (ip_rt_lock == 1)
1344         {
1345                 rt_redirect_1(dst, gw, dev);
1346                 ip_rt_unlock();
1347                 return;
1348         }
1349 
1350         rtr = kmalloc(sizeof(struct rt_req), GFP_ATOMIC);
1351         if (rtr)
1352         {
1353                 rtr->dst = dst;
1354                 rtr->gw = gw;
1355                 rtr->dev = dev;
1356                 rt_req_enqueue(&rt_backlog, rtr);
1357                 ip_rt_bh_mask |= RT_BH_REDIRECT;
1358         }
1359         ip_rt_unlock();
1360 }
1361 
1362 
1363 static __inline__ void rt_garbage_collect(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1364 {
1365         if (ip_rt_lock == 1)
1366         {
1367                 rt_garbage_collect_1();
1368                 return;
1369         }
1370         ip_rt_bh_mask |= RT_BH_GARBAGE_COLLECT;
1371 }
1372 
1373 static void rt_cache_add(unsigned hash, struct rtable * rth)
     /* [previous][next][first][last][top][bottom][index][help] */
1374 {
1375         unsigned long   flags;
1376         struct rtable   **rthp;
1377         __u32           daddr = rth->rt_dst;
1378         unsigned long   now = jiffies;
1379 
1380 #if RT_CACHE_DEBUG >= 2
1381         if (ip_rt_lock != 1)
1382         {
1383                 printk("rt_cache_add: ip_rt_lock==%d\n", ip_rt_lock);
1384                 return;
1385         }
1386 #endif
1387 
1388         save_flags(flags);
1389 
1390         if (rth->rt_dev->header_cache_bind)
1391         {
1392                 struct rtable * rtg = rth;
1393 
1394                 if (rth->rt_gateway != daddr)
1395                 {
1396                         ip_rt_fast_unlock();
1397                         rtg = ip_rt_route(rth->rt_gateway, 0);
1398                         ip_rt_fast_lock();
1399                 }
1400 
1401                 if (rtg)
1402                 {
1403                         if (rtg == rth)
1404                                 rtg->rt_dev->header_cache_bind(&rtg->rt_hh, rtg->rt_dev, ETH_P_IP, rtg->rt_dst);
1405                         else
1406                         {
1407                                 if (rtg->rt_hh)
1408                                         ATOMIC_INCR(&rtg->rt_hh->hh_refcnt);
1409                                 rth->rt_hh = rtg->rt_hh;
1410                                 ip_rt_put(rtg);
1411                         }
1412                 }
1413         }
1414 
1415         if (rt_cache_size >= RT_CACHE_SIZE_MAX)
1416                 rt_garbage_collect();
1417 
1418         cli();
1419         rth->rt_next = ip_rt_hash_table[hash];
1420 #if RT_CACHE_DEBUG >= 2
1421         if (rth->rt_next)
1422         {
1423                 struct rtable * trth;
1424                 printk("rt_cache @%02x: %08x", hash, daddr);
1425                 for (trth=rth->rt_next; trth; trth=trth->rt_next)
1426                         printk(" . %08x", trth->rt_dst);
1427                 printk("\n");
1428         }
1429 #endif
1430         ip_rt_hash_table[hash] = rth;
1431         rthp = &rth->rt_next;
1432         sti();
1433         rt_cache_size++;
1434 
1435         /*
1436          * Cleanup duplicate (and aged off) entries.
1437          */
1438 
1439         while ((rth = *rthp) != NULL)
1440         {
1441 
1442                 cli();
1443                 if ((!rth->rt_refcnt && rth->rt_lastuse + RT_CACHE_TIMEOUT < now)
1444                     || rth->rt_dst == daddr)
1445                 {
1446                         *rthp = rth->rt_next;
1447                         rt_cache_size--;
1448                         sti();
1449 #if RT_CACHE_DEBUG >= 2
1450                         printk("rt_cache clean %02x@%08x\n", hash, rth->rt_dst);
1451 #endif
1452                         rt_free(rth);
1453                         continue;
1454                 }
1455                 sti();
1456                 rthp = &rth->rt_next;
1457         }
1458         restore_flags(flags);
1459 }
1460 
1461 /*
1462    RT should be already locked.
1463    
1464    We could improve this by keeping a chain of say 32 struct rtable's
1465    last freed for fast recycling.
1466    
1467  */
1468 
1469 struct rtable * ip_rt_slow_route (__u32 daddr, int local)
     /* [previous][next][first][last][top][bottom][index][help] */
1470 {
1471         unsigned hash = ip_rt_hash_code(daddr)^local;
1472         struct rtable * rth;
1473         struct fib_node * f;
1474         struct fib_info * fi;
1475         __u32 saddr;
1476 
1477 #if RT_CACHE_DEBUG >= 2
1478         printk("rt_cache miss @%08x\n", daddr);
1479 #endif
1480 
1481         rth = kmalloc(sizeof(struct rtable), GFP_ATOMIC);
1482         if (!rth)
1483         {
1484                 ip_rt_unlock();
1485                 return NULL;
1486         }
1487 
1488         if (local)
1489                 f = fib_lookup_local(daddr);
1490         else
1491                 f = fib_lookup (daddr);
1492 
1493         if (f)
1494         {
1495                 fi = f->fib_info;
1496                 f->fib_use++;
1497         }
1498 
1499         if (!f || (fi->fib_flags & RTF_REJECT))
1500         {
1501 #ifdef CONFIG_KERNELD   
1502                 char wanted_route[20];
1503 #endif          
1504 #if RT_CACHE_DEBUG >= 2
1505                 printk("rt_route failed @%08x\n", daddr);
1506 #endif
1507                 ip_rt_unlock();
1508                 kfree_s(rth, sizeof(struct rtable));
1509 #ifdef CONFIG_KERNELD           
1510                 daddr=ntohl(daddr);
1511                 sprintf(wanted_route, "%d.%d.%d.%d",
1512                         (int)(daddr >> 24) & 0xff, (int)(daddr >> 16) & 0xff,
1513                         (int)(daddr >> 8) & 0xff, (int)daddr & 0xff);
1514                 kerneld_route(wanted_route);    /* Dynamic route request */
1515 #endif          
1516                 return NULL;
1517         }
1518 
1519         saddr = fi->fib_dev->pa_addr;
1520 
1521         if (daddr == fi->fib_dev->pa_addr)
1522         {
1523                 f->fib_use--;
1524                 if ((f = fib_loopback) != NULL)
1525                 {
1526                         f->fib_use++;
1527                         fi = f->fib_info;
1528                 }
1529         }
1530         
1531         if (!f)
1532         {
1533                 ip_rt_unlock();
1534                 kfree_s(rth, sizeof(struct rtable));
1535                 return NULL;
1536         }
1537 
1538         rth->rt_dst     = daddr;
1539         rth->rt_src     = saddr;
1540         rth->rt_lastuse = jiffies;
1541         rth->rt_refcnt  = 1;
1542         rth->rt_use     = 1;
1543         rth->rt_next    = NULL;
1544         rth->rt_hh      = NULL;
1545         rth->rt_gateway = fi->fib_gateway;
1546         rth->rt_dev     = fi->fib_dev;
1547         rth->rt_mtu     = fi->fib_mtu;
1548         rth->rt_window  = fi->fib_window;
1549         rth->rt_irtt    = fi->fib_irtt;
1550         rth->rt_tos     = f->fib_tos;
1551         rth->rt_flags   = fi->fib_flags | RTF_HOST;
1552         if (local)
1553                 rth->rt_flags   |= RTF_LOCAL;
1554 
1555         if (!(rth->rt_flags & RTF_GATEWAY))
1556                 rth->rt_gateway = rth->rt_dst;
1557 
1558         if (ip_rt_lock == 1)
1559                 rt_cache_add(hash, rth);
1560         else
1561         {
1562                 rt_free(rth);
1563 #if RT_CACHE_DEBUG >= 1
1564                 printk("rt_cache: route to %08x was born dead\n", daddr);
1565 #endif
1566         }
1567 
1568         ip_rt_unlock();
1569         return rth;
1570 }
1571 
1572 void ip_rt_put(struct rtable * rt)
     /* [previous][next][first][last][top][bottom][index][help] */
1573 {
1574         if (rt)
1575                 ATOMIC_DECR(&rt->rt_refcnt);
1576 }
1577 
1578 struct rtable * ip_rt_route(__u32 daddr, int local)
     /* [previous][next][first][last][top][bottom][index][help] */
1579 {
1580         struct rtable * rth;
1581 
1582         ip_rt_fast_lock();
1583 
1584         for (rth=ip_rt_hash_table[ip_rt_hash_code(daddr)^local]; rth; rth=rth->rt_next)
1585         {
1586                 if (rth->rt_dst == daddr)
1587                 {
1588                         rth->rt_lastuse = jiffies;
1589                         ATOMIC_INCR(&rth->rt_use);
1590                         ATOMIC_INCR(&rth->rt_refcnt);
1591                         ip_rt_unlock();
1592                         return rth;
1593                 }
1594         }
1595         return ip_rt_slow_route (daddr, local);
1596 }
1597 
1598 /*
1599  *      Process a route add request from the user, or from a kernel
1600  *      task.
1601  */
1602  
1603 int ip_rt_new(struct rtentry *r)
     /* [previous][next][first][last][top][bottom][index][help] */
1604 {
1605         int err;
1606         char * devname;
1607         struct device * dev = NULL;
1608         unsigned long flags;
1609         __u32 daddr, mask, gw;
1610         short metric;
1611 
1612         /*
1613          *      If a device is specified find it.
1614          */
1615          
1616         if ((devname = r->rt_dev) != NULL) 
1617         {
1618                 err = getname(devname, &devname);
1619                 if (err)
1620                         return err;
1621                 dev = dev_get(devname);
1622                 putname(devname);
1623                 if (!dev)
1624                         return -ENODEV;
1625         }
1626         
1627         /*
1628          *      If the device isn't INET, don't allow it
1629          */
1630 
1631         if (r->rt_dst.sa_family != AF_INET)
1632                 return -EAFNOSUPPORT;
1633 
1634         /*
1635          *      Make local copies of the important bits
1636          *      We decrement the metric by one for BSD compatibility.
1637          */
1638          
1639         flags = r->rt_flags;
1640         daddr = (__u32) ((struct sockaddr_in *) &r->rt_dst)->sin_addr.s_addr;
1641         mask  = (__u32) ((struct sockaddr_in *) &r->rt_genmask)->sin_addr.s_addr;
1642         gw    = (__u32) ((struct sockaddr_in *) &r->rt_gateway)->sin_addr.s_addr;
1643         metric = r->rt_metric > 0 ? r->rt_metric - 1 : 0;
1644 
1645         /*
1646          *      BSD emulation: Permits route add someroute gw one-of-my-addresses
1647          *      to indicate which iface. Not as clean as the nice Linux dev technique
1648          *      but people keep using it...  (and gated likes it ;))
1649          */
1650          
1651         if (!dev && (flags & RTF_GATEWAY)) 
1652         {
1653                 struct device *dev2;
1654                 for (dev2 = dev_base ; dev2 != NULL ; dev2 = dev2->next) 
1655                 {
1656                         if ((dev2->flags & IFF_UP) && dev2->pa_addr == gw) 
1657                         {
1658                                 flags &= ~RTF_GATEWAY;
1659                                 dev = dev2;
1660                                 break;
1661                         }
1662                 }
1663         }
1664 
1665         /*
1666          *      Ignore faulty masks
1667          */
1668          
1669         if (bad_mask(mask, daddr))
1670                 mask=0;
1671 
1672         /*
1673          *      Set the mask to nothing for host routes.
1674          */
1675          
1676         if (flags & RTF_HOST)
1677                 mask = 0xffffffff;
1678         else if (mask && r->rt_genmask.sa_family != AF_INET)
1679                 return -EAFNOSUPPORT;
1680 
1681         /*
1682          *      You can only gateway IP via IP..
1683          */
1684          
1685         if (flags & RTF_GATEWAY) 
1686         {
1687                 if (r->rt_gateway.sa_family != AF_INET)
1688                         return -EAFNOSUPPORT;
1689                 if (!dev)
1690                         dev = get_gw_dev(gw);
1691         } 
1692         else if (!dev)
1693                 dev = ip_dev_check(daddr);
1694 
1695         /*
1696          *      Unknown device.
1697          */
1698          
1699         if (dev == NULL)
1700                 return -ENETUNREACH;
1701 
1702         /*
1703          *      Add the route
1704          */
1705 
1706         rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window, r->rt_irtt, metric);
1707         return 0;
1708 }
1709 
1710 
1711 /*
1712  *      Remove a route, as requested by the user.
1713  */
1714 
1715 static int rt_kill(struct rtentry *r)
     /* [previous][next][first][last][top][bottom][index][help] */
1716 {
1717         struct sockaddr_in *trg;
1718         struct sockaddr_in *msk;
1719         struct sockaddr_in *gtw;
1720         char *devname;
1721         int err;
1722         struct device * dev = NULL;
1723 
1724         trg = (struct sockaddr_in *) &r->rt_dst;
1725         msk = (struct sockaddr_in *) &r->rt_genmask;
1726         gtw = (struct sockaddr_in *) &r->rt_gateway;
1727         if ((devname = r->rt_dev) != NULL) 
1728         {
1729                 err = getname(devname, &devname);
1730                 if (err)
1731                         return err;
1732                 dev = dev_get(devname);
1733                 putname(devname);
1734                 if (!dev)
1735                         return -ENODEV;
1736         }
1737         /*
1738          * metric can become negative here if it wasn't filled in
1739          * but that's a fortunate accident; we really use that in rt_del.
1740          */
1741         err=rt_del((__u32)trg->sin_addr.s_addr, (__u32)msk->sin_addr.s_addr, dev,
1742                 (__u32)gtw->sin_addr.s_addr, r->rt_flags, r->rt_metric - 1);
1743         return err;
1744 }
1745 
1746 /*
1747  *      Handle IP routing ioctl calls. These are used to manipulate the routing tables
1748  */
1749  
1750 int ip_rt_ioctl(unsigned int cmd, void *arg)
     /* [previous][next][first][last][top][bottom][index][help] */
1751 {
1752         int err;
1753         struct rtentry rt;
1754 
1755         switch(cmd) 
1756         {
1757                 case SIOCADDRT:         /* Add a route */
1758                 case SIOCDELRT:         /* Delete a route */
1759                         if (!suser())
1760                                 return -EPERM;
1761                         err=verify_area(VERIFY_READ, arg, sizeof(struct rtentry));
1762                         if (err)
1763                                 return err;
1764                         memcpy_fromfs(&rt, arg, sizeof(struct rtentry));
1765                         return (cmd == SIOCDELRT) ? rt_kill(&rt) : ip_rt_new(&rt);
1766         }
1767 
1768         return -EINVAL;
1769 }
1770 
1771 void ip_rt_advice(struct rtable **rp, int advice)
     /* [previous][next][first][last][top][bottom][index][help] */
1772 {
1773         /* Thanks! */
1774         return;
1775 }
1776 

/* [previous][next][first][last][top][bottom][index][help] */