root/net/ipv4/route.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rt_logmask
  2. rt_mask
  3. fz_hash_code
  4. fib_free_node
  5. fib_lookup_gateway
  6. fib_lookup_local
  7. fib_lookup
  8. get_gw_dev
  9. default_mask
  10. guess_mask
  11. bad_mask
  12. fib_del_list
  13. fib_del_1
  14. fib_create_info
  15. fib_add_1
  16. rt_flush_list
  17. fib_flush_1
  18. rt_get_info
  19. rt_cache_get_info
  20. rt_free
  21. rt_kick_free_queue
  22. ip_rt_run_bh
  23. ip_rt_check_expire
  24. rt_redirect_1
  25. rt_cache_flush
  26. rt_garbage_collect_1
  27. rt_req_enqueue
  28. rt_req_dequeue
  29. rt_kick_backlog
  30. rt_del
  31. rt_add
  32. ip_rt_flush
  33. ip_rt_redirect
  34. rt_garbage_collect
  35. rt_cache_add
  36. ip_rt_slow_route
  37. ip_rt_put
  38. ip_rt_route
  39. ip_rt_new
  40. rt_kill
  41. ip_rt_ioctl
  42. ip_rt_advice

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              ROUTE - implementation of the IP router.
   7  *
   8  * Version:     @(#)route.c     1.0.14  05/31/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  13  *              Linus Torvalds, <Linus.Torvalds@helsinki.fi>
  14  *
  15  * Fixes:
  16  *              Alan Cox        :       Verify area fixes.
  17  *              Alan Cox        :       cli() protects routing changes
  18  *              Rui Oliveira    :       ICMP routing table updates
  19  *              (rco@di.uminho.pt)      Routing table insertion and update
  20  *              Linus Torvalds  :       Rewrote bits to be sensible
  21  *              Alan Cox        :       Added BSD route gw semantics
  22  *              Alan Cox        :       Super /proc >4K 
  23  *              Alan Cox        :       MTU in route table
  24  *              Alan Cox        :       MSS actually. Also added the window
  25  *                                      clamper.
  26  *              Sam Lantinga    :       Fixed route matching in rt_del()
  27  *              Alan Cox        :       Routing cache support.
  28  *              Alan Cox        :       Removed compatibility cruft.
  29  *              Alan Cox        :       RTF_REJECT support.
  30  *              Alan Cox        :       TCP irtt support.
  31  *              Jonathan Naylor :       Added Metric support.
  32  *      Miquel van Smoorenburg  :       BSD API fixes.
  33  *      Miquel van Smoorenburg  :       Metrics.
  34  *              Alan Cox        :       Use __u32 properly
  35  *              Alan Cox        :       Aligned routing errors more closely with BSD
  36  *                                      our system is still very different.
  37  *              Alan Cox        :       Faster /proc handling
  38  *      Alexey Kuznetsov        :       Massive rework to support tree based routing,
  39  *                                      routing caches and better behaviour.
  40  *              
  41  *              Olaf Erb        :       irtt wasnt being copied right.
  42  *              Bjorn Ekwall    :       Kerneld route support.
  43  *
  44  *              This program is free software; you can redistribute it and/or
  45  *              modify it under the terms of the GNU General Public License
  46  *              as published by the Free Software Foundation; either version
  47  *              2 of the License, or (at your option) any later version.
  48  */
  49 
  50 #include <linux/config.h>
  51 #include <asm/segment.h>
  52 #include <asm/system.h>
  53 #include <asm/bitops.h>
  54 #include <linux/types.h>
  55 #include <linux/kernel.h>
  56 #include <linux/sched.h>
  57 #include <linux/mm.h>
  58 #include <linux/string.h>
  59 #include <linux/socket.h>
  60 #include <linux/sockios.h>
  61 #include <linux/errno.h>
  62 #include <linux/in.h>
  63 #include <linux/inet.h>
  64 #include <linux/netdevice.h>
  65 #include <linux/if_arp.h>
  66 #include <net/ip.h>
  67 #include <net/protocol.h>
  68 #include <net/route.h>
  69 #include <net/tcp.h>
  70 #include <linux/skbuff.h>
  71 #include <net/sock.h>
  72 #include <net/icmp.h>
  73 #include <net/netlink.h>
  74 #ifdef CONFIG_KERNELD
  75 #include <linux/kerneld.h>
  76 #endif
  77 
  78 /*
  79  * Forwarding Information Base definitions.
  80  */
  81 
  82 struct fib_node
  83 {
  84         struct fib_node         *fib_next;
  85         __u32                   fib_dst;
  86         unsigned long           fib_use;
  87         struct fib_info         *fib_info;
  88         short                   fib_metric;
  89         unsigned char           fib_tos;
  90 };
  91 
  92 /*
  93  * This structure contains data shared by many of routes.
  94  */     
  95 
  96 struct fib_info
  97 {
  98         struct fib_info         *fib_next;
  99         struct fib_info         *fib_prev;
 100         __u32                   fib_gateway;
 101         struct device           *fib_dev;
 102         int                     fib_refcnt;
 103         unsigned long           fib_window;
 104         unsigned short          fib_flags;
 105         unsigned short          fib_mtu;
 106         unsigned short          fib_irtt;
 107 };
 108 
 109 struct fib_zone
 110 {
 111         struct fib_zone *fz_next;
 112         struct fib_node **fz_hash_table;
 113         struct fib_node *fz_list;
 114         int             fz_nent;
 115         int             fz_logmask;
 116         __u32           fz_mask;
 117 };
 118 
 119 static struct fib_zone  *fib_zones[33];
 120 static struct fib_zone  *fib_zone_list;
 121 static struct fib_node  *fib_loopback = NULL;
 122 static struct fib_info  *fib_info_list;
 123 
 124 /*
 125  * Backlogging.
 126  */
 127 
 128 #define RT_BH_REDIRECT          0
 129 #define RT_BH_GARBAGE_COLLECT   1
 130 #define RT_BH_FREE              2
 131 
 132 struct rt_req
 133 {
 134         struct rt_req * rtr_next;
 135         struct device *dev;
 136         __u32 dst;
 137         __u32 gw;
 138         unsigned char tos;
 139 };
 140 
 141 int                     ip_rt_lock;
 142 unsigned                ip_rt_bh_mask;
 143 static struct rt_req    *rt_backlog;
 144 
 145 /*
 146  * Route cache.
 147  */
 148 
 149 struct rtable           *ip_rt_hash_table[RT_HASH_DIVISOR];
 150 static int              rt_cache_size;
 151 static struct rtable    *rt_free_queue;
 152 struct wait_queue       *rt_wait;
 153 
 154 static void rt_kick_backlog(void);
 155 static void rt_cache_add(unsigned hash, struct rtable * rth);
 156 static void rt_cache_flush(void);
 157 static void rt_garbage_collect_1(void);
 158 
 159 /* 
 160  * Evaluate mask length.
 161  */
 162 
 163 static __inline__ int rt_logmask(__u32 mask)
     /* [previous][next][first][last][top][bottom][index][help] */
 164 {
 165         if (!(mask = ntohl(mask)))
 166                 return 32;
 167         return ffz(~mask);
 168 }
 169 
 170 /* 
 171  * Create mask from length.
 172  */
 173 
 174 static __inline__ __u32 rt_mask(int logmask)
     /* [previous][next][first][last][top][bottom][index][help] */
 175 {
 176         if (logmask >= 32)
 177                 return 0;
 178         return htonl(~((1<<logmask)-1));
 179 }
 180 
 181 static __inline__ unsigned fz_hash_code(__u32 dst, int logmask)
     /* [previous][next][first][last][top][bottom][index][help] */
 182 {
 183         return ip_rt_hash_code(ntohl(dst)>>logmask);
 184 }
 185 
 186 /*
 187  * Free FIB node.
 188  */
 189 
 190 static void fib_free_node(struct fib_node * f)
     /* [previous][next][first][last][top][bottom][index][help] */
 191 {
 192         struct fib_info * fi = f->fib_info;
 193         if (!--fi->fib_refcnt)
 194         {
 195 #if RT_CACHE_DEBUG >= 2
 196                 printk("fib_free_node: fi %08x/%s is free\n", fi->fib_gateway, fi->fib_dev->name);
 197 #endif
 198                 if (fi->fib_next)
 199                         fi->fib_next->fib_prev = fi->fib_prev;
 200                 if (fi->fib_prev)
 201                         fi->fib_prev->fib_next = fi->fib_next;
 202                 if (fi == fib_info_list)
 203                         fib_info_list = fi->fib_next;
 204         }
 205         kfree_s(f, sizeof(struct fib_node));
 206 }
 207 
 208 /*
 209  * Find gateway route by address.
 210  */
 211 
 212 static struct fib_node * fib_lookup_gateway(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 213 {
 214         struct fib_zone * fz;
 215         struct fib_node * f;
 216 
 217         for (fz = fib_zone_list; fz; fz = fz->fz_next) 
 218         {
 219                 if (fz->fz_hash_table)
 220                         f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 221                 else
 222                         f = fz->fz_list;
 223                 
 224                 for ( ; f; f = f->fib_next)
 225                 {
 226                         if ((dst ^ f->fib_dst) & fz->fz_mask)
 227                                 continue;
 228                         if (f->fib_info->fib_flags & RTF_GATEWAY)
 229                                 return NULL;
 230                         return f;
 231                 }
 232         }
 233         return NULL;
 234 }
 235 
 236 /*
 237  * Find local route by address.
 238  * FIXME: I use "longest match" principle. If destination
 239  *        has some non-local route, I'll not search shorter matches.
 240  *        It's possible, I'm wrong, but I wanted to prevent following
 241  *        situation:
 242  *      route add 193.233.7.128 netmask 255.255.255.192 gw xxxxxx
 243  *      route add 193.233.7.0   netmask 255.255.255.0 eth1
 244  *        (Two ethernets connected by serial line, one is small and other is large)
 245  *        Host 193.233.7.129 is locally unreachable,
 246  *        but old (<=1.3.37) code will send packets destined for it to eth1.
 247  *
 248  */
 249 
 250 static struct fib_node * fib_lookup_local(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 251 {
 252         struct fib_zone * fz;
 253         struct fib_node * f;
 254 
 255         for (fz = fib_zone_list; fz; fz = fz->fz_next) 
 256         {
 257                 int longest_match_found = 0;
 258 
 259                 if (fz->fz_hash_table)
 260                         f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 261                 else
 262                         f = fz->fz_list;
 263                 
 264                 for ( ; f; f = f->fib_next)
 265                 {
 266                         if ((dst ^ f->fib_dst) & fz->fz_mask)
 267                                 continue;
 268                         if (!(f->fib_info->fib_flags & RTF_GATEWAY))
 269                                 return f;
 270                         longest_match_found = 1;
 271                 }
 272                 if (longest_match_found)
 273                         return NULL;
 274         }
 275         return NULL;
 276 }
 277 
 278 /*
 279  * Main lookup routine.
 280  *      IMPORTANT NOTE: this algorithm has small difference from <=1.3.37 visible
 281  *      by user. It doesn't route non-CIDR broadcasts by default.
 282  *
 283  *      F.e.
 284  *              ifconfig eth0 193.233.7.65 netmask 255.255.255.192 broadcast 193.233.7.255
 285  *      is valid, but if you really are not able (not allowed, do not want) to
 286  *      use CIDR compliant broadcast 193.233.7.127, you should add host route:
 287  *              route add -host 193.233.7.255 eth0
 288  */
 289 
 290 static struct fib_node * fib_lookup(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 291 {
 292         struct fib_zone * fz;
 293         struct fib_node * f;
 294 
 295         for (fz = fib_zone_list; fz; fz = fz->fz_next) 
 296         {
 297                 if (fz->fz_hash_table)
 298                         f = fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 299                 else
 300                         f = fz->fz_list;
 301                 
 302                 for ( ; f; f = f->fib_next)
 303                 {
 304                         if ((dst ^ f->fib_dst) & fz->fz_mask)
 305                                 continue;
 306                         return f;
 307                 }
 308         }
 309         return NULL;
 310 }
 311 
 312 static __inline__ struct device * get_gw_dev(__u32 gw)
     /* [previous][next][first][last][top][bottom][index][help] */
 313 {
 314         struct fib_node * f;
 315         f = fib_lookup_gateway(gw);
 316         if (f)
 317                 return f->fib_info->fib_dev;
 318         return NULL;
 319 }
 320 
 321 /*
 322  *      Used by 'rt_add()' when we can't get the netmask any other way..
 323  *
 324  *      If the lower byte or two are zero, we guess the mask based on the
 325  *      number of zero 8-bit net numbers, otherwise we use the "default"
 326  *      masks judging by the destination address and our device netmask.
 327  */
 328  
 329 static __u32 unsigned long default_mask(__u32 dst)
     /* [previous][next][first][last][top][bottom][index][help] */
 330 {
 331         dst = ntohl(dst);
 332         if (IN_CLASSA(dst))
 333                 return htonl(IN_CLASSA_NET);
 334         if (IN_CLASSB(dst))
 335                 return htonl(IN_CLASSB_NET);
 336         return htonl(IN_CLASSC_NET);
 337 }
 338 
 339 
 340 /*
 341  *      If no mask is specified then generate a default entry.
 342  */
 343 
 344 static __u32 guess_mask(__u32 dst, struct device * dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 345 {
 346         __u32 mask;
 347 
 348         if (!dst)
 349                 return 0;
 350         mask = default_mask(dst);
 351         if ((dst ^ dev->pa_addr) & mask)
 352                 return mask;
 353         return dev->pa_mask;
 354 }
 355 
 356 
 357 /*
 358  *      Check if a mask is acceptable.
 359  */
 360  
 361 static inline int bad_mask(__u32 mask, __u32 addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 362 {
 363         if (addr & (mask = ~mask))
 364                 return 1;
 365         mask = ntohl(mask);
 366         if (mask & (mask+1))
 367                 return 1;
 368         return 0;
 369 }
 370 
 371 
 372 static int fib_del_list(struct fib_node **fp, __u32 dst,
     /* [previous][next][first][last][top][bottom][index][help] */
 373                 struct device * dev, __u32 gtw, short flags, short metric, __u32 mask)
 374 {
 375         struct fib_node *f;
 376         int found=0;
 377 
 378         while((f = *fp) != NULL) 
 379         {
 380                 struct fib_info * fi = f->fib_info;
 381 
 382                 /*
 383                  *      Make sure the destination and netmask match.
 384                  *      metric, gateway and device are also checked
 385                  *      if they were specified.
 386                  */
 387                 if (f->fib_dst != dst ||
 388                     (gtw && fi->fib_gateway != gtw) ||
 389                     (metric >= 0 && f->fib_metric != metric) ||
 390                     (dev && fi->fib_dev != dev) )
 391                 {
 392                         fp = &f->fib_next;
 393                         continue;
 394                 }
 395                 cli();
 396                 *fp = f->fib_next;
 397                 if (fib_loopback == f)
 398                         fib_loopback = NULL;
 399                 sti();
 400                 ip_netlink_msg(RTMSG_DELROUTE, dst, gtw, mask, flags, metric, fi->fib_dev->name);
 401                 fib_free_node(f);
 402                 found++;
 403         }
 404         return found;
 405 }
 406 
 407 static __inline__ int fib_del_1(__u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
 408                 struct device * dev, __u32 gtw, short flags, short metric)
 409 {
 410         struct fib_node **fp;
 411         struct fib_zone *fz;
 412         int found=0;
 413 
 414         if (!mask)
 415         {
 416                 for (fz=fib_zone_list; fz; fz = fz->fz_next)
 417                 {
 418                         int tmp;
 419                         if (fz->fz_hash_table)
 420                                 fp = &fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 421                         else
 422                                 fp = &fz->fz_list;
 423 
 424                         tmp = fib_del_list(fp, dst, dev, gtw, flags, metric, mask);
 425                         fz->fz_nent -= tmp;
 426                         found += tmp;
 427                 }
 428         } 
 429         else
 430         {
 431                 if ((fz = fib_zones[rt_logmask(mask)]) != NULL)
 432                 {
 433                         if (fz->fz_hash_table)
 434                                 fp = &fz->fz_hash_table[fz_hash_code(dst, fz->fz_logmask)];
 435                         else
 436                                 fp = &fz->fz_list;
 437         
 438                         found = fib_del_list(fp, dst, dev, gtw, flags, metric, mask);
 439                         fz->fz_nent -= found;
 440                 }
 441         }
 442 
 443         if (found)
 444         {
 445                 rt_cache_flush();
 446                 return 0;
 447         }
 448         return -ESRCH;
 449 }
 450 
 451 
 452 static struct fib_info * fib_create_info(__u32 gw, struct device * dev,
     /* [previous][next][first][last][top][bottom][index][help] */
 453                                          unsigned short flags, unsigned short mss,
 454                                          unsigned long window, unsigned short irtt)
 455 {
 456         struct fib_info * fi;
 457 
 458         if (!(flags & RTF_MSS))
 459         {
 460                 mss = dev->mtu;
 461 #ifdef CONFIG_NO_PATH_MTU_DISCOVERY
 462                 /*
 463                  *      If MTU was not specified, use default.
 464                  *      If you want to increase MTU for some net (local subnet)
 465                  *      use "route add .... mss xxx".
 466                  *
 467                  *      The MTU isnt currently always used and computed as it
 468                  *      should be as far as I can tell. [Still verifying this is right]
 469                  */
 470                 if ((flags & RTF_GATEWAY) && mss > 576)
 471                         mss = 576;
 472 #endif
 473         }
 474         if (!(flags & RTF_WINDOW))
 475                 window = 0;
 476         if (!(flags & RTF_IRTT))
 477                 irtt = 0;
 478 
 479         for (fi=fib_info_list; fi; fi = fi->fib_next)
 480         {
 481                 if (fi->fib_gateway != gw ||
 482                     fi->fib_dev != dev  ||
 483                     fi->fib_flags != flags ||
 484                     fi->fib_mtu != mss ||
 485                     fi->fib_window != window ||
 486                     fi->fib_irtt != irtt)
 487                         continue;
 488                 fi->fib_refcnt++;
 489 #if RT_CACHE_DEBUG >= 2
 490                 printk("fib_create_info: fi %08x/%s is duplicate\n", fi->fib_gateway, fi->fib_dev->name);
 491 #endif
 492                 return fi;
 493         }
 494         fi = (struct fib_info*)kmalloc(sizeof(struct fib_info), GFP_KERNEL);
 495         if (!fi)
 496                 return NULL;
 497         memset(fi, 0, sizeof(struct fib_info));
 498         fi->fib_flags = flags;
 499         fi->fib_dev = dev;
 500         fi->fib_gateway = gw;
 501         fi->fib_mtu = mss;
 502         fi->fib_window = window;
 503         fi->fib_refcnt++;
 504         fi->fib_next = fib_info_list;
 505         fi->fib_prev = NULL;
 506         fi->fib_irtt = irtt;
 507         if (fib_info_list)
 508                 fib_info_list->fib_prev = fi;
 509         fib_info_list = fi;
 510 #if RT_CACHE_DEBUG >= 2
 511         printk("fib_create_info: fi %08x/%s is created\n", fi->fib_gateway, fi->fib_dev->name);
 512 #endif
 513         return fi;
 514 }
 515 
 516 
 517 static __inline__ void fib_add_1(short flags, __u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
 518         __u32 gw, struct device *dev, unsigned short mss,
 519         unsigned long window, unsigned short irtt, short metric)
 520 {
 521         struct fib_node *f, *f1;
 522         struct fib_node **fp;
 523         struct fib_node **dup_fp = NULL;
 524         struct fib_zone * fz;
 525         struct fib_info * fi;
 526         int logmask;
 527 
 528         if (flags & RTF_HOST) 
 529                 mask = 0xffffffff;
 530         /*
 531          * If mask is not specified, try to guess it.
 532          */
 533         else if (!mask)
 534         {
 535                 if (!((dst ^ dev->pa_addr) & dev->pa_mask)) 
 536                 {
 537                         mask = dev->pa_mask;
 538                         flags &= ~RTF_GATEWAY;
 539                         if (flags & RTF_DYNAMIC) 
 540                         {
 541                                 printk("Dynamic route to my own net rejected\n");
 542                                 return;
 543                         }
 544                 } 
 545                 else
 546                         mask = guess_mask(dst, dev);
 547                 dst &= mask;
 548         }
 549         
 550         /*
 551          *      A gateway must be reachable and not a local address
 552          */
 553          
 554         if (gw == dev->pa_addr)
 555                 flags &= ~RTF_GATEWAY;
 556                 
 557         if (flags & RTF_GATEWAY) 
 558         {
 559                 /*
 560                  *      Don't try to add a gateway we can't reach.. 
 561                  *      Tunnel devices are exempt from this rule.
 562                  */
 563                  
 564                 if ((dev != get_gw_dev(gw)) && dev->type!=ARPHRD_TUNNEL)
 565                         return;
 566                         
 567                 flags |= RTF_GATEWAY;
 568         } 
 569         else
 570                 gw = 0;
 571                 
 572         /*
 573          *      Allocate an entry and fill it in.
 574          */
 575          
 576         f = (struct fib_node *) kmalloc(sizeof(struct fib_node), GFP_KERNEL);
 577         if (f == NULL)
 578                 return;
 579 
 580         memset(f, 0, sizeof(struct fib_node));
 581         f->fib_dst = dst;
 582         f->fib_metric = metric;
 583         f->fib_tos    = 0;
 584 
 585         if  ((fi = fib_create_info(gw, dev, flags, mss, window, irtt)) == NULL)
 586         {
 587                 kfree_s(f, sizeof(struct fib_node));
 588                 return;
 589         }
 590         f->fib_info = fi;
 591 
 592         logmask = rt_logmask(mask);
 593         fz = fib_zones[logmask];
 594 
 595 
 596         if (!fz)
 597         {
 598                 int i;
 599                 fz = kmalloc(sizeof(struct fib_zone), GFP_KERNEL);
 600                 if (!fz)
 601                 {
 602                         fib_free_node(f);
 603                         return;
 604                 }
 605                 memset(fz, 0, sizeof(struct fib_zone));
 606                 fz->fz_logmask = logmask;
 607                 fz->fz_mask = mask;
 608                 for (i=logmask-1; i>=0; i--)
 609                         if (fib_zones[i])
 610                                 break;
 611                 cli();
 612                 if (i<0)
 613                 {
 614                         fz->fz_next = fib_zone_list;
 615                         fib_zone_list = fz;
 616                 }
 617                 else
 618                 {
 619                         fz->fz_next = fib_zones[i]->fz_next;
 620                         fib_zones[i]->fz_next = fz;
 621                 }
 622                 fib_zones[logmask] = fz;
 623                 sti();
 624         }
 625 
 626         /*
 627          * If zone overgrows RTZ_HASHING_LIMIT, create hash table.
 628          */
 629 
 630         if (fz->fz_nent >= RTZ_HASHING_LIMIT && !fz->fz_hash_table && logmask<32)
 631         {
 632                 struct fib_node ** ht;
 633 #if RT_CACHE_DEBUG
 634                 printk("fib_add_1: hashing for zone %d started\n", logmask);
 635 #endif
 636                 ht = kmalloc(RTZ_HASH_DIVISOR*sizeof(struct rtable*), GFP_KERNEL);
 637 
 638                 if (ht)
 639                 {
 640                         memset(ht, 0, RTZ_HASH_DIVISOR*sizeof(struct fib_node*));
 641                         cli();
 642                         f1 = fz->fz_list;
 643                         while (f1)
 644                         {
 645                                 struct fib_node * next;
 646                                 unsigned hash = fz_hash_code(f1->fib_dst, logmask);
 647                                 next = f1->fib_next;
 648                                 f1->fib_next = ht[hash];
 649                                 ht[hash] = f1;
 650                                 f1 = next;
 651                         }
 652                         fz->fz_list = NULL;
 653                         fz->fz_hash_table = ht; 
 654                         sti();
 655                 }
 656         }
 657 
 658         if (fz->fz_hash_table)
 659                 fp = &fz->fz_hash_table[fz_hash_code(dst, logmask)];
 660         else
 661                 fp = &fz->fz_list;
 662 
 663         /*
 664          * Scan list to find the first route with the same destination
 665          */
 666         while ((f1 = *fp) != NULL)
 667         {
 668                 if (f1->fib_dst == dst)
 669                         break;
 670                 fp = &f1->fib_next;
 671         }
 672 
 673         /*
 674          * Find route with the same destination and less (or equal) metric.
 675          */
 676         while ((f1 = *fp) != NULL && f1->fib_dst == dst)
 677         {
 678                 if (f1->fib_metric >= metric)
 679                         break;
 680                 /*
 681                  *      Record route with the same destination and gateway,
 682                  *      but less metric. We'll delete it 
 683                  *      after instantiation of new route.
 684                  */
 685                 if (f1->fib_info->fib_gateway == gw)
 686                         dup_fp = fp;
 687                 fp = &f1->fib_next;
 688         }
 689 
 690         /*
 691          * Is it already present?
 692          */
 693 
 694         if (f1 && f1->fib_metric == metric && f1->fib_info == fi)
 695         {
 696                 fib_free_node(f);
 697                 return;
 698         }
 699         
 700         /*
 701          * Insert new entry to the list.
 702          */
 703 
 704         cli();
 705         f->fib_next = f1;
 706         *fp = f;
 707         if (!fib_loopback && (fi->fib_dev->flags & IFF_LOOPBACK))
 708                 fib_loopback = f;
 709         sti();
 710         fz->fz_nent++;
 711         ip_netlink_msg(RTMSG_NEWROUTE, dst, gw, mask, flags, metric, fi->fib_dev->name);
 712 
 713         /*
 714          *      Delete route with the same destination and gateway.
 715          *      Note that we should have at most one such route.
 716          */
 717         if (dup_fp)
 718                 fp = dup_fp;
 719         else
 720                 fp = &f->fib_next;
 721 
 722         while ((f1 = *fp) != NULL && f1->fib_dst == dst)
 723         {
 724                 if (f1->fib_info->fib_gateway == gw)
 725                 {
 726                         cli();
 727                         *fp = f1->fib_next;
 728                         if (fib_loopback == f1)
 729                                 fib_loopback = NULL;
 730                         sti();
 731                         ip_netlink_msg(RTMSG_DELROUTE, dst, gw, mask, flags, metric, f1->fib_info->fib_dev->name);
 732                         fib_free_node(f1);
 733                         fz->fz_nent--;
 734                         break;
 735                 }
 736                 fp = &f1->fib_next;
 737         }
 738         rt_cache_flush();
 739         return;
 740 }
 741 
 742 static int rt_flush_list(struct fib_node ** fp, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 743 {
 744         int found = 0;
 745         struct fib_node *f;
 746 
 747         while ((f = *fp) != NULL) {
 748                 if (f->fib_info->fib_dev != dev) {
 749                         fp = &f->fib_next;
 750                         continue;
 751                 }
 752                 cli();
 753                 *fp = f->fib_next;
 754                 if (fib_loopback == f)
 755                         fib_loopback = NULL;
 756                 sti();
 757                 fib_free_node(f);
 758                 found++;
 759         }
 760         return found;
 761 }
 762 
 763 static __inline__ void fib_flush_1(struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 764 {
 765         struct fib_zone *fz;
 766         int found = 0;
 767 
 768         for (fz = fib_zone_list; fz; fz = fz->fz_next)
 769         {
 770                 if (fz->fz_hash_table)
 771                 {
 772                         int i;
 773                         int tmp = 0;
 774                         for (i=0; i<RTZ_HASH_DIVISOR; i++)
 775                                 tmp += rt_flush_list(&fz->fz_hash_table[i], dev);
 776                         fz->fz_nent -= tmp;
 777                         found += tmp;
 778                 }
 779                 else
 780                 {
 781                         int tmp;
 782                         tmp = rt_flush_list(&fz->fz_list, dev);
 783                         fz->fz_nent -= tmp;
 784                         found += tmp;
 785                 }
 786         }
 787                 
 788         if (found)
 789                 rt_cache_flush();
 790 }
 791 
 792 
 793 /* 
 794  *      Called from the PROCfs module. This outputs /proc/net/route.
 795  *
 796  *      We preserve the old format but pad the buffers out. This means that
 797  *      we can spin over the other entries as we read them. Remember the
 798  *      gated BGP4 code could need to read 60,000+ routes on occasion (thats
 799  *      about 7Mb of data). To do that ok we will need to also cache the
 800  *      last route we got to (reads will generally be following on from
 801  *      one another without gaps).
 802  */
 803  
 804 int rt_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
     /* [previous][next][first][last][top][bottom][index][help] */
 805 {
 806         struct fib_zone *fz;
 807         struct fib_node *f;
 808         int len=0;
 809         off_t pos=0;
 810         char temp[129];
 811         int i;
 812         
 813         pos = 128;
 814 
 815         if (offset<128)
 816         {
 817                 sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT");
 818                 len = 128;
 819         }
 820         
 821         while  (ip_rt_lock)
 822                 sleep_on(&rt_wait);
 823         ip_rt_fast_lock();
 824 
 825         for (fz=fib_zone_list; fz; fz = fz->fz_next)
 826         {
 827                 int maxslot;
 828                 struct fib_node ** fp;
 829 
 830                 if (fz->fz_nent == 0)
 831                         continue;
 832 
 833                 if (pos + 128*fz->fz_nent <= offset)
 834                 {
 835                         pos += 128*fz->fz_nent;
 836                         len = 0;
 837                         continue;
 838                 }
 839 
 840                 if (fz->fz_hash_table)
 841                 {
 842                         maxslot = RTZ_HASH_DIVISOR;
 843                         fp      = fz->fz_hash_table;
 844                 }
 845                 else
 846                 {
 847                         maxslot = 1;
 848                         fp      = &fz->fz_list;
 849                 }
 850                         
 851                 for (i=0; i < maxslot; i++, fp++)
 852                 {
 853                         
 854                         for (f = *fp; f; f = f->fib_next) 
 855                         {
 856                                 struct fib_info * fi;
 857                                 /*
 858                                  *      Spin through entries until we are ready
 859                                  */
 860                                 pos += 128;
 861 
 862                                 if (pos <= offset)
 863                                 {
 864                                         len=0;
 865                                         continue;
 866                                 }
 867                                         
 868                                 fi = f->fib_info;
 869                                 sprintf(temp, "%s\t%08lX\t%08lX\t%02X\t%d\t%lu\t%d\t%08lX\t%d\t%lu\t%u",
 870                                         fi->fib_dev->name, (unsigned long)f->fib_dst, (unsigned long)fi->fib_gateway,
 871                                         fi->fib_flags, 0, f->fib_use, f->fib_metric,
 872                                         (unsigned long)fz->fz_mask, (int)fi->fib_mtu, fi->fib_window, (int)fi->fib_irtt);
 873                                 sprintf(buffer+len,"%-127s\n",temp);
 874 
 875                                 len += 128;
 876                                 if (pos >= offset+length)
 877                                         goto done;
 878                         }
 879                 }
 880         }
 881 
 882 done:
 883         ip_rt_unlock();
 884         wake_up(&rt_wait);
 885         
 886         *start = buffer+len-(pos-offset);
 887         len = pos - offset;
 888         if (len>length)
 889                 len = length;
 890         return len;
 891 }
 892 
 893 int rt_cache_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
     /* [previous][next][first][last][top][bottom][index][help] */
 894 {
 895         int len=0;
 896         off_t pos=0;
 897         char temp[129];
 898         struct rtable *r;
 899         int i;
 900 
 901         pos = 128;
 902 
 903         if (offset<128)
 904         {
 905                 sprintf(buffer,"%-127s\n","Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tSource\t\tMTU\tWindow\tIRTT\tHH\tARP\n");
 906                 len = 128;
 907         }
 908         
 909         
 910         while  (ip_rt_lock)
 911                 sleep_on(&rt_wait);
 912         ip_rt_fast_lock();
 913 
 914         for (i = 0; i<RT_HASH_DIVISOR; i++)
 915         {
 916                 for (r = ip_rt_hash_table[i]; r; r = r->rt_next) 
 917                 {
 918                         /*
 919                          *      Spin through entries until we are ready
 920                          */
 921                         pos += 128;
 922 
 923                         if (pos <= offset)
 924                         {
 925                                 len = 0;
 926                                 continue;
 927                         }
 928                                         
 929                         sprintf(temp, "%s\t%08lX\t%08lX\t%02X\t%d\t%u\t%d\t%08lX\t%d\t%lu\t%u\t%d\t%1d",
 930                                 r->rt_dev->name, (unsigned long)r->rt_dst, (unsigned long)r->rt_gateway,
 931                                 r->rt_flags, r->rt_refcnt, r->rt_use, 0,
 932                                 (unsigned long)r->rt_src, (int)r->rt_mtu, r->rt_window, (int)r->rt_irtt, r->rt_hh ? r->rt_hh->hh_refcnt : -1, r->rt_hh ? r->rt_hh->hh_uptodate : 0);
 933                         sprintf(buffer+len,"%-127s\n",temp);
 934                         len += 128;
 935                         if (pos >= offset+length)
 936                                 goto done;
 937                 }
 938         }
 939 
 940 done:
 941         ip_rt_unlock();
 942         wake_up(&rt_wait);
 943         
 944         *start = buffer+len-(pos-offset);
 945         len = pos-offset;
 946         if (len>length)
 947                 len = length;
 948         return len;
 949 }
 950 
 951 
 952 static void rt_free(struct rtable * rt)
     /* [previous][next][first][last][top][bottom][index][help] */
 953 {
 954         unsigned long flags;
 955 
 956         save_flags(flags);
 957         cli();
 958         if (!rt->rt_refcnt)
 959         {
 960                 struct hh_cache * hh = rt->rt_hh;
 961                 rt->rt_hh = NULL;
 962                 if (hh && !--hh->hh_refcnt)
 963                 {
 964                         restore_flags(flags);
 965                         kfree_s(hh, sizeof(struct hh_cache));
 966                 }
 967                 restore_flags(flags);
 968                 kfree_s(rt, sizeof(struct rt_table));
 969                 return;
 970         }
 971         rt->rt_next = rt_free_queue;
 972         rt->rt_flags &= ~RTF_UP;
 973         rt_free_queue = rt;
 974         ip_rt_bh_mask |= RT_BH_FREE;
 975 #if RT_CACHE_DEBUG >= 2
 976         printk("rt_free: %08x\n", rt->rt_dst);
 977 #endif
 978         restore_flags(flags);
 979 }
 980 
 981 /*
 982  * RT "bottom half" handlers. Called with masked inetrrupts.
 983  */
 984 
 985 static __inline__ void rt_kick_free_queue(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 986 {
 987         struct rtable *rt, **rtp;
 988 
 989         rtp = &rt_free_queue;
 990 
 991         while ((rt = *rtp) != NULL)
 992         {
 993                 if  (!rt->rt_refcnt)
 994                 {
 995                         struct hh_cache * hh = rt->rt_hh;
 996 #if RT_CACHE_DEBUG >= 2
 997                         __u32 daddr = rt->rt_dst;
 998 #endif
 999                         *rtp = rt->rt_next;
1000                         rt->rt_hh = NULL;
1001                         if (hh && !--hh->hh_refcnt)
1002                         {
1003                                 sti();
1004                                 kfree_s(hh, sizeof(struct hh_cache));
1005                         }
1006                         sti();
1007                         kfree_s(rt, sizeof(struct rt_table));
1008 #if RT_CACHE_DEBUG >= 2
1009                         printk("rt_kick_free_queue: %08x is free\n", daddr);
1010 #endif
1011                         cli();
1012                         continue;
1013                 }
1014                 rtp = &rt->rt_next;
1015         }
1016 }
1017 
1018 void ip_rt_run_bh() {
     /* [previous][next][first][last][top][bottom][index][help] */
1019         unsigned long flags;
1020         save_flags(flags);
1021         cli();
1022         if (ip_rt_bh_mask && !ip_rt_lock)
1023         {
1024                 if (ip_rt_bh_mask & RT_BH_REDIRECT)
1025                         rt_kick_backlog();
1026 
1027                 if (ip_rt_bh_mask & RT_BH_GARBAGE_COLLECT)
1028                 {
1029                         ip_rt_fast_lock();
1030                         ip_rt_bh_mask &= ~RT_BH_GARBAGE_COLLECT;
1031                         sti();
1032                         rt_garbage_collect_1();
1033                         cli();
1034                         ip_rt_fast_unlock();
1035                 }
1036 
1037                 if (ip_rt_bh_mask & RT_BH_FREE)
1038                         rt_kick_free_queue();
1039         }
1040         restore_flags(flags);
1041 }
1042 
1043 
1044 void ip_rt_check_expire()
     /* [previous][next][first][last][top][bottom][index][help] */
1045 {
1046         ip_rt_fast_lock();
1047         if (ip_rt_lock == 1)
1048         {
1049                 int i;
1050                 struct rtable *rth, **rthp;
1051                 unsigned long flags;
1052                 unsigned long now = jiffies;
1053 
1054                 save_flags(flags);
1055                 for (i=0; i<RT_HASH_DIVISOR; i++)
1056                 {
1057                         rthp = &ip_rt_hash_table[i];
1058 
1059                         while ((rth = *rthp) != NULL)
1060                         {
1061                                 struct rtable * rth_next = rth->rt_next;
1062 
1063                                 /*
1064                                  * Cleanup aged off entries.
1065                                  */
1066 
1067                                 cli();
1068                                 if (!rth->rt_refcnt && rth->rt_lastuse + RT_CACHE_TIMEOUT < now)
1069                                 {
1070                                         *rthp = rth_next;
1071                                         sti();
1072                                         rt_cache_size--;
1073 #if RT_CACHE_DEBUG >= 2
1074                                         printk("rt_check_expire clean %02x@%08x\n", i, rth->rt_dst);
1075 #endif
1076                                         rt_free(rth);
1077                                         continue;
1078                                 }
1079                                 sti();
1080 
1081                                 if (!rth_next)
1082                                         break;
1083 
1084                                 /*
1085                                  * LRU ordering.
1086                                  */
1087 
1088                                 if (rth->rt_lastuse + RT_CACHE_BUBBLE_THRESHOULD < rth_next->rt_lastuse ||
1089                                     (rth->rt_lastuse < rth_next->rt_lastuse &&
1090                                      rth->rt_use < rth_next->rt_use))
1091                                 {
1092 #if RT_CACHE_DEBUG >= 2
1093                                         printk("rt_check_expire bubbled %02x@%08x<->%08x\n", i, rth->rt_dst, rth_next->rt_dst);
1094 #endif
1095                                         cli();
1096                                         *rthp = rth_next;
1097                                         rth->rt_next = rth_next->rt_next;
1098                                         rth_next->rt_next = rth;
1099                                         sti();
1100                                         rthp = &rth_next->rt_next;
1101                                         continue;
1102                                 }
1103                                 rthp = &rth->rt_next;
1104                         }
1105                 }
1106                 restore_flags(flags);
1107                 rt_kick_free_queue();
1108         }
1109         ip_rt_unlock();
1110 }
1111 
1112 static void rt_redirect_1(__u32 dst, __u32 gw, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
1113 {
1114         struct rtable *rt;
1115         unsigned long hash = ip_rt_hash_code(dst);
1116 
1117         if (gw == dev->pa_addr)
1118                 return;
1119         if (dev != get_gw_dev(gw))
1120                 return;
1121         rt = (struct rtable *) kmalloc(sizeof(struct rtable), GFP_ATOMIC);
1122         if (rt == NULL) 
1123                 return;
1124         memset(rt, 0, sizeof(struct rtable));
1125         rt->rt_flags = RTF_DYNAMIC | RTF_MODIFIED | RTF_HOST | RTF_GATEWAY | RTF_UP;
1126         rt->rt_dst = dst;
1127         rt->rt_dev = dev;
1128         rt->rt_gateway = gw;
1129         rt->rt_src = dev->pa_addr;
1130         rt->rt_mtu = dev->mtu;
1131 #ifdef CONFIG_NO_PATH_MTU_DISCOVERY
1132         if (dev->mtu > 576)
1133                 rt->rt_mtu = 576;
1134 #endif
1135         rt->rt_lastuse  = jiffies;
1136         rt->rt_refcnt  = 1;
1137         rt_cache_add(hash, rt);
1138         ip_rt_put(rt);
1139         return;
1140 }
1141 
1142 static void rt_cache_flush(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1143 {
1144         int i;
1145         struct rtable * rth, * next;
1146 
1147         for (i=0; i<RT_HASH_DIVISOR; i++)
1148         {
1149                 int nr=0;
1150 
1151                 cli();
1152                 if (!(rth = ip_rt_hash_table[i]))
1153                 {
1154                         sti();
1155                         continue;
1156                 }
1157 
1158                 ip_rt_hash_table[i] = NULL;
1159                 sti();
1160 
1161                 for (; rth; rth=next)
1162                 {
1163                         next = rth->rt_next;
1164                         rt_cache_size--;
1165                         nr++;
1166                         rth->rt_next = NULL;
1167                         rt_free(rth);
1168                 }
1169 #if RT_CACHE_DEBUG >= 2
1170                 if (nr > 0)
1171                         printk("rt_cache_flush: %d@%02x\n", nr, i);
1172 #endif
1173         }
1174 #if RT_CACHE_DEBUG >= 1
1175         if (rt_cache_size)
1176         {
1177                 printk("rt_cache_flush: bug rt_cache_size=%d\n", rt_cache_size);
1178                 rt_cache_size = 0;
1179         }
1180 #endif
1181 }
1182 
1183 static void rt_garbage_collect_1(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1184 {
1185         int i;
1186         unsigned expire = RT_CACHE_TIMEOUT>>1;
1187         struct rtable * rth, **rthp;
1188         unsigned long now = jiffies;
1189 
1190         for (;;)
1191         {
1192                 for (i=0; i<RT_HASH_DIVISOR; i++)
1193                 {
1194                         if (!ip_rt_hash_table[i])
1195                                 continue;
1196                         for (rthp=&ip_rt_hash_table[i]; (rth=*rthp); rthp=&rth->rt_next)
1197                         {
1198                                 if (rth->rt_lastuse + expire*(rth->rt_refcnt+1) > now)
1199                                         continue;
1200                                 rt_cache_size--;
1201                                 cli();
1202                                 *rthp=rth->rt_next;
1203                                 rth->rt_next = NULL;
1204                                 sti();
1205                                 rt_free(rth);
1206                                 break;
1207                         }
1208                 }
1209                 if (rt_cache_size < RT_CACHE_SIZE_MAX)
1210                         return;
1211                 expire >>= 1;
1212         }
1213 }
1214 
1215 static __inline__ void rt_req_enqueue(struct rt_req **q, struct rt_req *rtr)
     /* [previous][next][first][last][top][bottom][index][help] */
1216 {
1217         unsigned long flags;
1218         struct rt_req * tail;
1219 
1220         save_flags(flags);
1221         cli();
1222         tail = *q;
1223         if (!tail)
1224                 rtr->rtr_next = rtr;
1225         else
1226         {
1227                 rtr->rtr_next = tail->rtr_next;
1228                 tail->rtr_next = rtr;
1229         }
1230         *q = rtr;
1231         restore_flags(flags);
1232         return;
1233 }
1234 
1235 /*
1236  * Caller should mask interrupts.
1237  */
1238 
1239 static __inline__ struct rt_req * rt_req_dequeue(struct rt_req **q)
     /* [previous][next][first][last][top][bottom][index][help] */
1240 {
1241         struct rt_req * rtr;
1242 
1243         if (*q)
1244         {
1245                 rtr = (*q)->rtr_next;
1246                 (*q)->rtr_next = rtr->rtr_next;
1247                 if (rtr->rtr_next == rtr)
1248                         *q = NULL;
1249                 rtr->rtr_next = NULL;
1250                 return rtr;
1251         }
1252         return NULL;
1253 }
1254 
1255 /*
1256    Called with masked interrupts
1257  */
1258 
1259 static void rt_kick_backlog()
     /* [previous][next][first][last][top][bottom][index][help] */
1260 {
1261         if (!ip_rt_lock)
1262         {
1263                 struct rt_req * rtr;
1264 
1265                 ip_rt_fast_lock();
1266 
1267                 while ((rtr = rt_req_dequeue(&rt_backlog)) != NULL)
1268                 {
1269                         sti();
1270                         rt_redirect_1(rtr->dst, rtr->gw, rtr->dev);
1271                         kfree_s(rtr, sizeof(struct rt_req));
1272                         cli();
1273                 }
1274 
1275                 ip_rt_bh_mask &= ~RT_BH_REDIRECT;
1276 
1277                 ip_rt_fast_unlock();
1278         }
1279 }
1280 
1281 /*
1282  * rt_{del|add|flush} called only from USER process. Waiting is OK.
1283  */
1284 
1285 static int rt_del(__u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
1286                 struct device * dev, __u32 gtw, short rt_flags, short metric)
1287 {
1288         int retval;
1289 
1290         while (ip_rt_lock)
1291                 sleep_on(&rt_wait);
1292         ip_rt_fast_lock();
1293         retval = fib_del_1(dst, mask, dev, gtw, rt_flags, metric);
1294         ip_rt_unlock();
1295         wake_up(&rt_wait);
1296         return retval;
1297 }
1298 
1299 static void rt_add(short flags, __u32 dst, __u32 mask,
     /* [previous][next][first][last][top][bottom][index][help] */
1300         __u32 gw, struct device *dev, unsigned short mss,
1301         unsigned long window, unsigned short irtt, short metric)
1302 {
1303         while (ip_rt_lock)
1304                 sleep_on(&rt_wait);
1305         ip_rt_fast_lock();
1306         fib_add_1(flags, dst, mask, gw, dev, mss, window, irtt, metric);
1307         ip_rt_unlock();
1308         wake_up(&rt_wait);
1309 }
1310 
1311 void ip_rt_flush(struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
1312 {
1313         while (ip_rt_lock)
1314                 sleep_on(&rt_wait);
1315         ip_rt_fast_lock();
1316         fib_flush_1(dev);
1317         ip_rt_unlock();
1318         wake_up(&rt_wait);
1319 }
1320 
1321 /*
1322    Called by ICMP module.
1323  */
1324 
1325 void ip_rt_redirect(__u32 src, __u32 dst, __u32 gw, struct device *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
1326 {
1327         struct rt_req * rtr;
1328         struct rtable * rt;
1329 
1330         rt = ip_rt_route(dst, 0);
1331         if (!rt)
1332                 return;
1333 
1334         if (rt->rt_gateway != src ||
1335             rt->rt_dev != dev ||
1336             ((gw^dev->pa_addr)&dev->pa_mask) ||
1337             ip_chk_addr(gw))
1338         {
1339                 ip_rt_put(rt);
1340                 return;
1341         }
1342         ip_rt_put(rt);
1343 
1344         ip_rt_fast_lock();
1345         if (ip_rt_lock == 1)
1346         {
1347                 rt_redirect_1(dst, gw, dev);
1348                 ip_rt_unlock();
1349                 return;
1350         }
1351 
1352         rtr = kmalloc(sizeof(struct rt_req), GFP_ATOMIC);
1353         if (rtr)
1354         {
1355                 rtr->dst = dst;
1356                 rtr->gw = gw;
1357                 rtr->dev = dev;
1358                 rt_req_enqueue(&rt_backlog, rtr);
1359                 ip_rt_bh_mask |= RT_BH_REDIRECT;
1360         }
1361         ip_rt_unlock();
1362 }
1363 
1364 
1365 static __inline__ void rt_garbage_collect(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1366 {
1367         if (ip_rt_lock == 1)
1368         {
1369                 rt_garbage_collect_1();
1370                 return;
1371         }
1372         ip_rt_bh_mask |= RT_BH_GARBAGE_COLLECT;
1373 }
1374 
1375 static void rt_cache_add(unsigned hash, struct rtable * rth)
     /* [previous][next][first][last][top][bottom][index][help] */
1376 {
1377         unsigned long   flags;
1378         struct rtable   **rthp;
1379         __u32           daddr = rth->rt_dst;
1380         unsigned long   now = jiffies;
1381 
1382 #if RT_CACHE_DEBUG >= 2
1383         if (ip_rt_lock != 1)
1384         {
1385                 printk("rt_cache_add: ip_rt_lock==%d\n", ip_rt_lock);
1386                 return;
1387         }
1388 #endif
1389 
1390         save_flags(flags);
1391 
1392         if (rth->rt_dev->header_cache_bind)
1393         {
1394                 struct rtable * rtg = rth;
1395 
1396                 if (rth->rt_gateway != daddr)
1397                 {
1398                         ip_rt_fast_unlock();
1399                         rtg = ip_rt_route(rth->rt_gateway, 0);
1400                         ip_rt_fast_lock();
1401                 }
1402 
1403                 if (rtg)
1404                 {
1405                         if (rtg == rth)
1406                                 rtg->rt_dev->header_cache_bind(&rtg->rt_hh, rtg->rt_dev, ETH_P_IP, rtg->rt_dst);
1407                         else
1408                         {
1409                                 if (rtg->rt_hh)
1410                                         ATOMIC_INCR(&rtg->rt_hh->hh_refcnt);
1411                                 rth->rt_hh = rtg->rt_hh;
1412                                 ip_rt_put(rtg);
1413                         }
1414                 }
1415         }
1416 
1417         if (rt_cache_size >= RT_CACHE_SIZE_MAX)
1418                 rt_garbage_collect();
1419 
1420         cli();
1421         rth->rt_next = ip_rt_hash_table[hash];
1422 #if RT_CACHE_DEBUG >= 2
1423         if (rth->rt_next)
1424         {
1425                 struct rtable * trth;
1426                 printk("rt_cache @%02x: %08x", hash, daddr);
1427                 for (trth=rth->rt_next; trth; trth=trth->rt_next)
1428                         printk(" . %08x", trth->rt_dst);
1429                 printk("\n");
1430         }
1431 #endif
1432         ip_rt_hash_table[hash] = rth;
1433         rthp = &rth->rt_next;
1434         sti();
1435         rt_cache_size++;
1436 
1437         /*
1438          * Cleanup duplicate (and aged off) entries.
1439          */
1440 
1441         while ((rth = *rthp) != NULL)
1442         {
1443 
1444                 cli();
1445                 if ((!rth->rt_refcnt && rth->rt_lastuse + RT_CACHE_TIMEOUT < now)
1446                     || rth->rt_dst == daddr)
1447                 {
1448                         *rthp = rth->rt_next;
1449                         rt_cache_size--;
1450                         sti();
1451 #if RT_CACHE_DEBUG >= 2
1452                         printk("rt_cache clean %02x@%08x\n", hash, rth->rt_dst);
1453 #endif
1454                         rt_free(rth);
1455                         continue;
1456                 }
1457                 sti();
1458                 rthp = &rth->rt_next;
1459         }
1460         restore_flags(flags);
1461 }
1462 
1463 /*
1464    RT should be already locked.
1465    
1466    We could improve this by keeping a chain of say 32 struct rtable's
1467    last freed for fast recycling.
1468    
1469  */
1470 
1471 struct rtable * ip_rt_slow_route (__u32 daddr, int local)
     /* [previous][next][first][last][top][bottom][index][help] */
1472 {
1473         unsigned hash = ip_rt_hash_code(daddr)^local;
1474         struct rtable * rth;
1475         struct fib_node * f;
1476         struct fib_info * fi;
1477         __u32 saddr;
1478 
1479 #if RT_CACHE_DEBUG >= 2
1480         printk("rt_cache miss @%08x\n", daddr);
1481 #endif
1482 
1483         rth = kmalloc(sizeof(struct rtable), GFP_ATOMIC);
1484         if (!rth)
1485         {
1486                 ip_rt_unlock();
1487                 return NULL;
1488         }
1489 
1490         if (local)
1491                 f = fib_lookup_local(daddr);
1492         else
1493                 f = fib_lookup (daddr);
1494 
1495         if (f)
1496         {
1497                 fi = f->fib_info;
1498                 f->fib_use++;
1499         }
1500 
1501         if (!f || (fi->fib_flags & RTF_REJECT))
1502         {
1503 #ifdef CONFIG_KERNELD   
1504                 char wanted_route[20];
1505 #endif          
1506 #if RT_CACHE_DEBUG >= 2
1507                 printk("rt_route failed @%08x\n", daddr);
1508 #endif
1509                 ip_rt_unlock();
1510                 kfree_s(rth, sizeof(struct rtable));
1511 #ifdef CONFIG_KERNELD           
1512                 daddr=ntohl(daddr);
1513                 sprintf(wanted_route, "%d.%d.%d.%d",
1514                         (int)(daddr >> 24) & 0xff, (int)(daddr >> 16) & 0xff,
1515                         (int)(daddr >> 8) & 0xff, (int)daddr & 0xff);
1516                 kerneld_route(wanted_route);    /* Dynamic route request */
1517 #endif          
1518                 return NULL;
1519         }
1520 
1521         saddr = fi->fib_dev->pa_addr;
1522 
1523         if (daddr == fi->fib_dev->pa_addr)
1524         {
1525                 f->fib_use--;
1526                 if ((f = fib_loopback) != NULL)
1527                 {
1528                         f->fib_use++;
1529                         fi = f->fib_info;
1530                 }
1531         }
1532         
1533         if (!f)
1534         {
1535                 ip_rt_unlock();
1536                 kfree_s(rth, sizeof(struct rtable));
1537                 return NULL;
1538         }
1539 
1540         rth->rt_dst     = daddr;
1541         rth->rt_src     = saddr;
1542         rth->rt_lastuse = jiffies;
1543         rth->rt_refcnt  = 1;
1544         rth->rt_use     = 1;
1545         rth->rt_next    = NULL;
1546         rth->rt_hh      = NULL;
1547         rth->rt_gateway = fi->fib_gateway;
1548         rth->rt_dev     = fi->fib_dev;
1549         rth->rt_mtu     = fi->fib_mtu;
1550         rth->rt_window  = fi->fib_window;
1551         rth->rt_irtt    = fi->fib_irtt;
1552         rth->rt_tos     = f->fib_tos;
1553         rth->rt_flags   = fi->fib_flags | RTF_HOST;
1554         if (local)
1555                 rth->rt_flags   |= RTF_LOCAL;
1556 
1557         if (!(rth->rt_flags & RTF_GATEWAY))
1558                 rth->rt_gateway = rth->rt_dst;
1559 
1560         if (ip_rt_lock == 1)
1561                 rt_cache_add(hash, rth);
1562         else
1563         {
1564                 rt_free(rth);
1565 #if RT_CACHE_DEBUG >= 1
1566                 printk("rt_cache: route to %08x was born dead\n", daddr);
1567 #endif
1568         }
1569 
1570         ip_rt_unlock();
1571         return rth;
1572 }
1573 
1574 void ip_rt_put(struct rtable * rt)
     /* [previous][next][first][last][top][bottom][index][help] */
1575 {
1576         if (rt)
1577                 ATOMIC_DECR(&rt->rt_refcnt);
1578 }
1579 
1580 struct rtable * ip_rt_route(__u32 daddr, int local)
     /* [previous][next][first][last][top][bottom][index][help] */
1581 {
1582         struct rtable * rth;
1583 
1584         ip_rt_fast_lock();
1585 
1586         for (rth=ip_rt_hash_table[ip_rt_hash_code(daddr)^local]; rth; rth=rth->rt_next)
1587         {
1588                 if (rth->rt_dst == daddr)
1589                 {
1590                         rth->rt_lastuse = jiffies;
1591                         ATOMIC_INCR(&rth->rt_use);
1592                         ATOMIC_INCR(&rth->rt_refcnt);
1593                         ip_rt_unlock();
1594                         return rth;
1595                 }
1596         }
1597         return ip_rt_slow_route (daddr, local);
1598 }
1599 
1600 /*
1601  *      Process a route add request from the user, or from a kernel
1602  *      task.
1603  */
1604  
1605 int ip_rt_new(struct rtentry *r)
     /* [previous][next][first][last][top][bottom][index][help] */
1606 {
1607         int err;
1608         char * devname;
1609         struct device * dev = NULL;
1610         unsigned long flags;
1611         __u32 daddr, mask, gw;
1612         short metric;
1613 
1614         /*
1615          *      If a device is specified find it.
1616          */
1617          
1618         if ((devname = r->rt_dev) != NULL) 
1619         {
1620                 err = getname(devname, &devname);
1621                 if (err)
1622                         return err;
1623                 dev = dev_get(devname);
1624                 putname(devname);
1625                 if (!dev)
1626                         return -ENODEV;
1627         }
1628         
1629         /*
1630          *      If the device isn't INET, don't allow it
1631          */
1632 
1633         if (r->rt_dst.sa_family != AF_INET)
1634                 return -EAFNOSUPPORT;
1635 
1636         /*
1637          *      Make local copies of the important bits
1638          *      We decrement the metric by one for BSD compatibility.
1639          */
1640          
1641         flags = r->rt_flags;
1642         daddr = (__u32) ((struct sockaddr_in *) &r->rt_dst)->sin_addr.s_addr;
1643         mask  = (__u32) ((struct sockaddr_in *) &r->rt_genmask)->sin_addr.s_addr;
1644         gw    = (__u32) ((struct sockaddr_in *) &r->rt_gateway)->sin_addr.s_addr;
1645         metric = r->rt_metric > 0 ? r->rt_metric - 1 : 0;
1646 
1647         /*
1648          *      BSD emulation: Permits route add someroute gw one-of-my-addresses
1649          *      to indicate which iface. Not as clean as the nice Linux dev technique
1650          *      but people keep using it...  (and gated likes it ;))
1651          */
1652          
1653         if (!dev && (flags & RTF_GATEWAY)) 
1654         {
1655                 struct device *dev2;
1656                 for (dev2 = dev_base ; dev2 != NULL ; dev2 = dev2->next) 
1657                 {
1658                         if ((dev2->flags & IFF_UP) && dev2->pa_addr == gw) 
1659                         {
1660                                 flags &= ~RTF_GATEWAY;
1661                                 dev = dev2;
1662                                 break;
1663                         }
1664                 }
1665         }
1666 
1667         /*
1668          *      Ignore faulty masks
1669          */
1670          
1671         if (bad_mask(mask, daddr))
1672                 mask=0;
1673 
1674         /*
1675          *      Set the mask to nothing for host routes.
1676          */
1677          
1678         if (flags & RTF_HOST)
1679                 mask = 0xffffffff;
1680         else if (mask && r->rt_genmask.sa_family != AF_INET)
1681                 return -EAFNOSUPPORT;
1682 
1683         /*
1684          *      You can only gateway IP via IP..
1685          */
1686          
1687         if (flags & RTF_GATEWAY) 
1688         {
1689                 if (r->rt_gateway.sa_family != AF_INET)
1690                         return -EAFNOSUPPORT;
1691                 if (!dev)
1692                         dev = get_gw_dev(gw);
1693         } 
1694         else if (!dev)
1695                 dev = ip_dev_check(daddr);
1696 
1697         /*
1698          *      Unknown device.
1699          */
1700          
1701         if (dev == NULL)
1702                 return -ENETUNREACH;
1703 
1704         /*
1705          *      Add the route
1706          */
1707 
1708         rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window, r->rt_irtt, metric);
1709         return 0;
1710 }
1711 
1712 
1713 /*
1714  *      Remove a route, as requested by the user.
1715  */
1716 
1717 static int rt_kill(struct rtentry *r)
     /* [previous][next][first][last][top][bottom][index][help] */
1718 {
1719         struct sockaddr_in *trg;
1720         struct sockaddr_in *msk;
1721         struct sockaddr_in *gtw;
1722         char *devname;
1723         int err;
1724         struct device * dev = NULL;
1725 
1726         trg = (struct sockaddr_in *) &r->rt_dst;
1727         msk = (struct sockaddr_in *) &r->rt_genmask;
1728         gtw = (struct sockaddr_in *) &r->rt_gateway;
1729         if ((devname = r->rt_dev) != NULL) 
1730         {
1731                 err = getname(devname, &devname);
1732                 if (err)
1733                         return err;
1734                 dev = dev_get(devname);
1735                 putname(devname);
1736                 if (!dev)
1737                         return -ENODEV;
1738         }
1739         /*
1740          * metric can become negative here if it wasn't filled in
1741          * but that's a fortunate accident; we really use that in rt_del.
1742          */
1743         err=rt_del((__u32)trg->sin_addr.s_addr, (__u32)msk->sin_addr.s_addr, dev,
1744                 (__u32)gtw->sin_addr.s_addr, r->rt_flags, r->rt_metric - 1);
1745         return err;
1746 }
1747 
1748 /*
1749  *      Handle IP routing ioctl calls. These are used to manipulate the routing tables
1750  */
1751  
1752 int ip_rt_ioctl(unsigned int cmd, void *arg)
     /* [previous][next][first][last][top][bottom][index][help] */
1753 {
1754         int err;
1755         struct rtentry rt;
1756 
1757         switch(cmd) 
1758         {
1759                 case SIOCADDRT:         /* Add a route */
1760                 case SIOCDELRT:         /* Delete a route */
1761                         if (!suser())
1762                                 return -EPERM;
1763                         err=verify_area(VERIFY_READ, arg, sizeof(struct rtentry));
1764                         if (err)
1765                                 return err;
1766                         memcpy_fromfs(&rt, arg, sizeof(struct rtentry));
1767                         return (cmd == SIOCDELRT) ? rt_kill(&rt) : ip_rt_new(&rt);
1768         }
1769 
1770         return -EINVAL;
1771 }
1772 
1773 void ip_rt_advice(struct rtable **rp, int advice)
     /* [previous][next][first][last][top][bottom][index][help] */
1774 {
1775         /* Thanks! */
1776         return;
1777 }
1778 

/* [previous][next][first][last][top][bottom][index][help] */