1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
8 * Version: @(#)route.c 1.0.14 05/31/93
9 *
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Alan Cox, <gw4pts@gw4pts.ampr.org>
13 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
14 *
15 * Fixes:
16 * Alan Cox : Verify area fixes.
17 * Alan Cox : cli() protects routing changes
18 * Rui Oliveira : ICMP routing table updates
19 * (rco@di.uminho.pt) Routing table insertion and update
20 * Linus Torvalds : Rewrote bits to be sensible
21 * Alan Cox : Added BSD route gw semantics
22 * Alan Cox : Super /proc >4K
23 * Alan Cox : MTU in route table
24 * Alan Cox : MSS actually. Also added the window
25 * clamper.
26 * Sam Lantinga : Fixed route matching in rt_del()
27 * Alan Cox : Routing cache support.
28 * Alan Cox : Removed compatibility cruft.
29 * Alan Cox : RTF_REJECT support.
30 * Alan Cox : TCP irtt support.
31 * Jonathan Naylor : Added Metric support.
32 * Miquel van Smoorenburg : BSD API fixes.
33 * Miquel van Smoorenburg : Metrics.
34 * Alan Cox : Use __u32 properly
35 *
36 * This program is free software; you can redistribute it and/or
37 * modify it under the terms of the GNU General Public License
38 * as published by the Free Software Foundation; either version
39 * 2 of the License, or (at your option) any later version.
40 */
41
42 #include <asm/segment.h>
43 #include <asm/system.h>
44 #include <linux/types.h>
45 #include <linux/kernel.h>
46 #include <linux/sched.h>
47 #include <linux/mm.h>
48 #include <linux/string.h>
49 #include <linux/socket.h>
50 #include <linux/sockios.h>
51 #include <linux/errno.h>
52 #include <linux/in.h>
53 #include <linux/inet.h>
54 #include <linux/netdevice.h>
55 #include <net/ip.h>
56 #include <net/protocol.h>
57 #include <net/route.h>
58 #include <net/tcp.h>
59 #include <linux/skbuff.h>
60 #include <net/sock.h>
61 #include <net/icmp.h>
62
63 /*
64 * The routing table list
65 */
66
67 static struct rtable *rt_base = NULL;
68 unsigned long rt_stamp = 1; /* Routing table version stamp for caches ( 0 is 'unset' ) */
69
70 /*
71 * Pointer to the loopback route
72 */
73
74 static struct rtable *rt_loopback = NULL;
75
76 /*
77 * Remove a routing table entry.
78 * Should we return a status value here ?
79 */
80
81 static void rt_del(__u32 dst, __u32 mask,
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
82 char *devname, __u32 gtw, short rt_flags, short metric)
83 {
84 struct rtable *r, **rp;
85 unsigned long flags;
86
87 rp = &rt_base;
88
89 /*
90 * This must be done with interrupts off because we could take
91 * an ICMP_REDIRECT.
92 */
93
94 save_flags(flags);
95 cli();
96 while((r = *rp) != NULL)
97 {
98 /*
99 * Make sure the destination and netmask match.
100 * metric, gateway and device are also checked
101 * if they were specified.
102 */
103 if (r->rt_dst != dst ||
104 (mask && r->rt_mask != mask) ||
105 (gtw && r->rt_gateway != gtw) ||
106 (metric >= 0 && r->rt_metric != metric) ||
107 (devname && strcmp((r->rt_dev)->name,devname) != 0) )
108 {
109 rp = &r->rt_next;
110 continue;
111 }
112 *rp = r->rt_next;
113
114 /*
115 * If we delete the loopback route update its pointer.
116 */
117
118 if (rt_loopback == r)
119 rt_loopback = NULL;
120 kfree_s(r, sizeof(struct rtable));
121 }
122 rt_stamp++; /* New table revision */
123
124 restore_flags(flags);
125 }
126
127
128 /*
129 * Remove all routing table entries for a device. This is called when
130 * a device is downed.
131 */
132
133 void ip_rt_flush(struct device *dev)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
134 {
135 struct rtable *r;
136 struct rtable **rp;
137 unsigned long flags;
138
139 rp = &rt_base;
140 save_flags(flags);
141 cli();
142 while ((r = *rp) != NULL) {
143 if (r->rt_dev != dev) {
144 rp = &r->rt_next;
145 continue;
146 }
147 *rp = r->rt_next;
148 if (rt_loopback == r)
149 rt_loopback = NULL;
150 kfree_s(r, sizeof(struct rtable));
151 }
152 rt_stamp++; /* New table revision */
153 restore_flags(flags);
154 }
155
156 /*
157 * Used by 'rt_add()' when we can't get the netmask any other way..
158 *
159 * If the lower byte or two are zero, we guess the mask based on the
160 * number of zero 8-bit net numbers, otherwise we use the "default"
161 * masks judging by the destination address and our device netmask.
162 */
163
164 static __u32 unsigned long default_mask(__u32 dst)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
165 {
166 dst = ntohl(dst);
167 if (IN_CLASSA(dst))
168 return htonl(IN_CLASSA_NET);
169 if (IN_CLASSB(dst))
170 return htonl(IN_CLASSB_NET);
171 return htonl(IN_CLASSC_NET);
172 }
173
174
175 /*
176 * If no mask is specified then generate a default entry.
177 */
178
179 static __u32 guess_mask(__u32 dst, struct device * dev)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
180 {
181 __u32 mask;
182
183 if (!dst)
184 return 0;
185 mask = default_mask(dst);
186 if ((dst ^ dev->pa_addr) & mask)
187 return mask;
188 return dev->pa_mask;
189 }
190
191
192 /*
193 * Find the route entry through which our gateway will be reached
194 */
195
196 static inline struct device * get_gw_dev(__u32 gw)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
197 {
198 struct rtable * rt;
199
200 for (rt = rt_base ; ; rt = rt->rt_next)
201 {
202 if (!rt)
203 return NULL;
204 if ((gw ^ rt->rt_dst) & rt->rt_mask)
205 continue;
206 /*
207 * Gateways behind gateways are a no-no
208 */
209
210 if (rt->rt_flags & RTF_GATEWAY)
211 return NULL;
212 return rt->rt_dev;
213 }
214 }
215
216 /*
217 * Rewrote rt_add(), as the old one was weird - Linus
218 *
219 * This routine is used to update the IP routing table, either
220 * from the kernel (ICMP_REDIRECT) or via an ioctl call issued
221 * by the superuser.
222 */
223
224 void ip_rt_add(short flags, __u32 dst, __u32 mask,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
225 __u32 gw, struct device *dev, unsigned short mtu,
226 unsigned long window, unsigned short irtt, short metric)
227 {
228 struct rtable *r, *rt;
229 struct rtable **rp;
230 unsigned long cpuflags;
231 int duplicate = 0;
232
233 /*
234 * A host is a unique machine and has no network bits.
235 */
236
237 if (flags & RTF_HOST)
238 {
239 mask = 0xffffffff;
240 }
241
242 /*
243 * Calculate the network mask
244 */
245
246 else if (!mask)
247 {
248 if (!((dst ^ dev->pa_addr) & dev->pa_mask))
249 {
250 mask = dev->pa_mask;
251 flags &= ~RTF_GATEWAY;
252 if (flags & RTF_DYNAMIC)
253 {
254 /*printk("Dynamic route to my own net rejected\n");*/
255 return;
256 }
257 }
258 else
259 mask = guess_mask(dst, dev);
260 dst &= mask;
261 }
262
263 /*
264 * A gateway must be reachable and not a local address
265 */
266
267 if (gw == dev->pa_addr)
268 flags &= ~RTF_GATEWAY;
269
270 if (flags & RTF_GATEWAY)
271 {
272 /*
273 * Don't try to add a gateway we can't reach..
274 */
275
276 if (dev != get_gw_dev(gw))
277 return;
278
279 flags |= RTF_GATEWAY;
280 }
281 else
282 gw = 0;
283
284 /*
285 * Allocate an entry and fill it in.
286 */
287
288 rt = (struct rtable *) kmalloc(sizeof(struct rtable), GFP_ATOMIC);
289 if (rt == NULL)
290 {
291 return;
292 }
293 memset(rt, 0, sizeof(struct rtable));
294 rt->rt_flags = flags | RTF_UP;
295 rt->rt_dst = dst;
296 rt->rt_dev = dev;
297 rt->rt_gateway = gw;
298 rt->rt_mask = mask;
299 rt->rt_mss = dev->mtu - HEADER_SIZE;
300 rt->rt_metric = metric;
301 rt->rt_window = 0; /* Default is no clamping */
302
303 /* Are the MSS/Window valid ? */
304
305 if(rt->rt_flags & RTF_MSS)
306 rt->rt_mss = mtu;
307
308 if(rt->rt_flags & RTF_WINDOW)
309 rt->rt_window = window;
310 if(rt->rt_flags & RTF_IRTT)
311 rt->rt_irtt = irtt;
312
313 /*
314 * What we have to do is loop though this until we have
315 * found the first address which has a higher generality than
316 * the one in rt. Then we can put rt in right before it.
317 * The interrupts must be off for this process.
318 */
319
320 save_flags(cpuflags);
321 cli();
322
323 /*
324 * Remove old route if we are getting a duplicate.
325 */
326
327 rp = &rt_base;
328 while ((r = *rp) != NULL)
329 {
330 if (r->rt_dst != dst ||
331 r->rt_mask != mask)
332 {
333 rp = &r->rt_next;
334 continue;
335 }
336 if (r->rt_metric != metric && r->rt_gateway != gw)
337 {
338 duplicate = 1;
339 rp = &r->rt_next;
340 continue;
341 }
342 *rp = r->rt_next;
343 if (rt_loopback == r)
344 rt_loopback = NULL;
345 kfree_s(r, sizeof(struct rtable));
346 }
347
348 /*
349 * Add the new route
350 */
351
352 rp = &rt_base;
353 while ((r = *rp) != NULL) {
354 /*
355 * When adding a duplicate route, add it before
356 * the route with a higher metric.
357 */
358 if (duplicate &&
359 r->rt_dst == dst &&
360 r->rt_mask == mask &&
361 r->rt_metric > metric)
362 break;
363 else
364 /*
365 * Otherwise, just add it before the
366 * route with a higher generality.
367 */
368 if ((r->rt_mask & mask) != mask)
369 break;
370 rp = &r->rt_next;
371 }
372 rt->rt_next = r;
373 *rp = rt;
374
375 /*
376 * Update the loopback route
377 */
378
379 if ((rt->rt_dev->flags & IFF_LOOPBACK) && !rt_loopback)
380 rt_loopback = rt;
381
382 rt_stamp++; /* New table revision */
383
384 /*
385 * Restore the interrupts and return
386 */
387
388 restore_flags(cpuflags);
389 return;
390 }
391
392
393 /*
394 * Check if a mask is acceptable.
395 */
396
397 static inline int bad_mask(__u32 mask, __u32 addr)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
398 {
399 if (addr & (mask = ~mask))
400 return 1;
401 mask = ntohl(mask);
402 if (mask & (mask+1))
403 return 1;
404 return 0;
405 }
406
407 /*
408 * Process a route add request from the user
409 */
410
411 static int rt_new(struct rtentry *r)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
412 {
413 int err;
414 char * devname;
415 struct device * dev = NULL;
416 unsigned long flags;
417 __u32 daddr, mask, gw;
418 short metric;
419
420 /*
421 * If a device is specified find it.
422 */
423
424 if ((devname = r->rt_dev) != NULL)
425 {
426 err = getname(devname, &devname);
427 if (err)
428 return err;
429 dev = dev_get(devname);
430 putname(devname);
431 if (!dev)
432 return -EINVAL;
433 }
434
435 /*
436 * If the device isn't INET, don't allow it
437 */
438
439 if (r->rt_dst.sa_family != AF_INET)
440 return -EAFNOSUPPORT;
441
442 /*
443 * Make local copies of the important bits
444 * We decrement the metric by one for BSD compatibility.
445 */
446
447 flags = r->rt_flags;
448 daddr = (__u32) ((struct sockaddr_in *) &r->rt_dst)->sin_addr.s_addr;
449 mask = (__u32) ((struct sockaddr_in *) &r->rt_genmask)->sin_addr.s_addr;
450 gw = (__u32) ((struct sockaddr_in *) &r->rt_gateway)->sin_addr.s_addr;
451 metric = r->rt_metric > 0 ? r->rt_metric - 1 : 0;
452
453 /*
454 * BSD emulation: Permits route add someroute gw one-of-my-addresses
455 * to indicate which iface. Not as clean as the nice Linux dev technique
456 * but people keep using it...
457 */
458
459 if (!dev && (flags & RTF_GATEWAY))
460 {
461 struct device *dev2;
462 for (dev2 = dev_base ; dev2 != NULL ; dev2 = dev2->next)
463 {
464 if ((dev2->flags & IFF_UP) && dev2->pa_addr == gw)
465 {
466 flags &= ~RTF_GATEWAY;
467 dev = dev2;
468 break;
469 }
470 }
471 }
472
473 /*
474 * Ignore faulty masks
475 */
476
477 if (bad_mask(mask, daddr))
478 mask=0;
479
480 /*
481 * Set the mask to nothing for host routes.
482 */
483
484 if (flags & RTF_HOST)
485 mask = 0xffffffff;
486 else if (mask && r->rt_genmask.sa_family != AF_INET)
487 return -EAFNOSUPPORT;
488
489 /*
490 * You can only gateway IP via IP..
491 */
492
493 if (flags & RTF_GATEWAY)
494 {
495 if (r->rt_gateway.sa_family != AF_INET)
496 return -EAFNOSUPPORT;
497 if (!dev)
498 dev = get_gw_dev(gw);
499 }
500 else if (!dev)
501 dev = ip_dev_check(daddr);
502
503 /*
504 * Unknown device.
505 */
506
507 if (dev == NULL)
508 return -ENETUNREACH;
509
510 /*
511 * Add the route
512 */
513
514 ip_rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window, r->rt_irtt, metric);
515 return 0;
516 }
517
518
519 /*
520 * Remove a route, as requested by the user.
521 */
522
523 static int rt_kill(struct rtentry *r)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
524 {
525 struct sockaddr_in *trg;
526 struct sockaddr_in *msk;
527 struct sockaddr_in *gtw;
528 char *devname;
529 int err;
530
531 trg = (struct sockaddr_in *) &r->rt_dst;
532 msk = (struct sockaddr_in *) &r->rt_genmask;
533 gtw = (struct sockaddr_in *) &r->rt_gateway;
534 if ((devname = r->rt_dev) != NULL)
535 {
536 err = getname(devname, &devname);
537 if (err)
538 return err;
539 }
540 /*
541 * metric can become negative here if it wasn't filled in
542 * but that's a fortunate accident; we really use that in rt_del.
543 */
544 rt_del((__u32)trg->sin_addr.s_addr, (__u32)msk->sin_addr.s_addr, devname,
545 (__u32)gtw->sin_addr.s_addr, r->rt_flags, r->rt_metric - 1);
546 if ( devname != NULL )
547 putname(devname);
548 return 0;
549 }
550
551
552 /*
553 * Called from the PROCfs module. This outputs /proc/net/route.
554 */
555
556 int rt_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
557 {
558 struct rtable *r;
559 int len=0;
560 off_t pos=0;
561 off_t begin=0;
562 int size;
563
564 len += sprintf(buffer,
565 "Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\n");
566 pos=len;
567
568 /*
569 * This isn't quite right -- r->rt_dst is a struct!
570 */
571
572 for (r = rt_base; r != NULL; r = r->rt_next)
573 {
574 size = sprintf(buffer+len, "%s\t%08lX\t%08lX\t%02X\t%d\t%lu\t%d\t%08lX\t%d\t%lu\t%u\n",
575 r->rt_dev->name, (unsigned long)r->rt_dst, (unsigned long)r->rt_gateway,
576 r->rt_flags, r->rt_refcnt, r->rt_use, r->rt_metric,
577 (unsigned long)r->rt_mask, (int)r->rt_mss, r->rt_window, (int)r->rt_irtt);
578 len+=size;
579 pos+=size;
580 if(pos<offset)
581 {
582 len=0;
583 begin=pos;
584 }
585 if(pos>offset+length)
586 break;
587 }
588
589 *start=buffer+(offset-begin);
590 len-=(offset-begin);
591 if(len>length)
592 len=length;
593 return len;
594 }
595
596 /*
597 * This is hackish, but results in better code. Use "-S" to see why.
598 */
599
600 #define early_out ({ goto no_route; 1; })
601
602 /*
603 * Route a packet. This needs to be fairly quick. Florian & Co.
604 * suggested a unified ARP and IP routing cache. Done right its
605 * probably a brilliant idea. I'd actually suggest a unified
606 * ARP/IP routing/Socket pointer cache. Volunteers welcome
607 */
608
609 struct rtable * ip_rt_route(__u32 daddr, struct options *opt, __u32 *src_addr)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
610 {
611 struct rtable *rt;
612
613 for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next)
614 {
615 if (!((rt->rt_dst ^ daddr) & rt->rt_mask))
616 break;
617 /*
618 * broadcast addresses can be special cases..
619 */
620 if (rt->rt_flags & RTF_GATEWAY)
621 continue;
622 if ((rt->rt_dev->flags & IFF_BROADCAST) &&
623 (rt->rt_dev->pa_brdaddr == daddr))
624 break;
625 }
626
627 if(rt->rt_flags&RTF_REJECT)
628 return NULL;
629
630 if(src_addr!=NULL)
631 *src_addr= rt->rt_dev->pa_addr;
632
633 if (daddr == rt->rt_dev->pa_addr) {
634 if ((rt = rt_loopback) == NULL)
635 goto no_route;
636 }
637 rt->rt_use++;
638 return rt;
639 no_route:
640 return NULL;
641 }
642
643 struct rtable * ip_rt_local(__u32 daddr, struct options *opt, __u32 *src_addr)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
644 {
645 struct rtable *rt;
646
647 for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next)
648 {
649 /*
650 * No routed addressing.
651 */
652 if (rt->rt_flags&RTF_GATEWAY)
653 continue;
654
655 if (!((rt->rt_dst ^ daddr) & rt->rt_mask))
656 break;
657 /*
658 * broadcast addresses can be special cases..
659 */
660
661 if ((rt->rt_dev->flags & IFF_BROADCAST) &&
662 rt->rt_dev->pa_brdaddr == daddr)
663 break;
664 }
665
666 if(src_addr!=NULL)
667 *src_addr= rt->rt_dev->pa_addr;
668
669 if (daddr == rt->rt_dev->pa_addr) {
670 if ((rt = rt_loopback) == NULL)
671 goto no_route;
672 }
673 rt->rt_use++;
674 return rt;
675 no_route:
676 return NULL;
677 }
678
679 /*
680 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
681 */
682
683 int ip_rt_ioctl(unsigned int cmd, void *arg)
/* ![[previous]](../icons/left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
684 {
685 int err;
686 struct rtentry rt;
687
688 switch(cmd)
689 {
690 case SIOCADDRT: /* Add a route */
691 case SIOCDELRT: /* Delete a route */
692 if (!suser())
693 return -EPERM;
694 err=verify_area(VERIFY_READ, arg, sizeof(struct rtentry));
695 if (err)
696 return err;
697 memcpy_fromfs(&rt, arg, sizeof(struct rtentry));
698 return (cmd == SIOCDELRT) ? rt_kill(&rt) : rt_new(&rt);
699 }
700
701 return -EINVAL;
702 }