1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
8 * Version: @(#)route.c 1.0.14 05/31/93
9 *
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Alan Cox, <gw4pts@gw4pts.ampr.org>
13 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
14 *
15 * Fixes:
16 * Alan Cox : Verify area fixes.
17 * Alan Cox : cli() protects routing changes
18 * Rui Oliveira : ICMP routing table updates
19 * (rco@di.uminho.pt) Routing table insertion and update
20 * Linus Torvalds : Rewrote bits to be sensible
21 * Alan Cox : Added BSD route gw semantics
22 * Alan Cox : Super /proc >4K
23 * Alan Cox : MTU in route table
24 * Alan Cox : MSS actually. Also added the window
25 * clamper.
26 * Sam Lantinga : Fixed route matching in rt_del()
27 * Alan Cox : Routing cache support.
28 * Alan Cox : Removed compatibility cruft.
29 * Alan Cox : RTF_REJECT support.
30 * Alan Cox : TCP irtt support.
31 * Jonathan Naylor : Added Metric support.
32 * Miquel van Smoorenburg : BSD API fixes.
33 * Miquel van Smoorenburg : Metrics.
34 *
35 * This program is free software; you can redistribute it and/or
36 * modify it under the terms of the GNU General Public License
37 * as published by the Free Software Foundation; either version
38 * 2 of the License, or (at your option) any later version.
39 */
40
41 #include <asm/segment.h>
42 #include <asm/system.h>
43 #include <linux/types.h>
44 #include <linux/kernel.h>
45 #include <linux/sched.h>
46 #include <linux/mm.h>
47 #include <linux/string.h>
48 #include <linux/socket.h>
49 #include <linux/sockios.h>
50 #include <linux/errno.h>
51 #include <linux/in.h>
52 #include <linux/inet.h>
53 #include <linux/netdevice.h>
54 #include <net/ip.h>
55 #include <net/protocol.h>
56 #include <net/route.h>
57 #include <net/tcp.h>
58 #include <linux/skbuff.h>
59 #include <net/sock.h>
60 #include <net/icmp.h>
61
62 /*
63 * The routing table list
64 */
65
66 static struct rtable *rt_base = NULL;
67 unsigned long rt_stamp = 1; /* Routing table version stamp for caches ( 0 is 'unset' ) */
68
69 /*
70 * Pointer to the loopback route
71 */
72
73 static struct rtable *rt_loopback = NULL;
74
75 /*
76 * Remove a routing table entry.
77 * Should we return a status value here ?
78 */
79
80 static void rt_del(unsigned long dst, unsigned long mask,
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
81 char *devname, unsigned long gtw, short rt_flags, short metric)
82 {
83 struct rtable *r, **rp;
84 unsigned long flags;
85
86 rp = &rt_base;
87
88 /*
89 * This must be done with interrupts off because we could take
90 * an ICMP_REDIRECT.
91 */
92
93 save_flags(flags);
94 cli();
95 while((r = *rp) != NULL)
96 {
97 /*
98 * Make sure the destination and netmask match.
99 * metric, gateway and device are also checked
100 * if they were specified.
101 */
102 if (r->rt_dst != dst ||
103 (mask && r->rt_mask != mask) ||
104 (gtw && r->rt_gateway != gtw) ||
105 (metric >= 0 && r->rt_metric != metric) ||
106 (devname && strcmp((r->rt_dev)->name,devname) != 0) )
107 {
108 rp = &r->rt_next;
109 continue;
110 }
111 *rp = r->rt_next;
112
113 /*
114 * If we delete the loopback route update its pointer.
115 */
116
117 if (rt_loopback == r)
118 rt_loopback = NULL;
119 kfree_s(r, sizeof(struct rtable));
120 }
121 rt_stamp++; /* New table revision */
122
123 restore_flags(flags);
124 }
125
126
127 /*
128 * Remove all routing table entries for a device. This is called when
129 * a device is downed.
130 */
131
132 void ip_rt_flush(struct device *dev)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
133 {
134 struct rtable *r;
135 struct rtable **rp;
136 unsigned long flags;
137
138 rp = &rt_base;
139 save_flags(flags);
140 cli();
141 while ((r = *rp) != NULL) {
142 if (r->rt_dev != dev) {
143 rp = &r->rt_next;
144 continue;
145 }
146 *rp = r->rt_next;
147 if (rt_loopback == r)
148 rt_loopback = NULL;
149 kfree_s(r, sizeof(struct rtable));
150 }
151 rt_stamp++; /* New table revision */
152 restore_flags(flags);
153 }
154
155 /*
156 * Used by 'rt_add()' when we can't get the netmask any other way..
157 *
158 * If the lower byte or two are zero, we guess the mask based on the
159 * number of zero 8-bit net numbers, otherwise we use the "default"
160 * masks judging by the destination address and our device netmask.
161 */
162
163 static inline unsigned long default_mask(unsigned long dst)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
164 {
165 dst = ntohl(dst);
166 if (IN_CLASSA(dst))
167 return htonl(IN_CLASSA_NET);
168 if (IN_CLASSB(dst))
169 return htonl(IN_CLASSB_NET);
170 return htonl(IN_CLASSC_NET);
171 }
172
173
174 /*
175 * If no mask is specified then generate a default entry.
176 */
177
178 static unsigned long guess_mask(unsigned long dst, struct device * dev)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
179 {
180 unsigned long mask;
181
182 if (!dst)
183 return 0;
184 mask = default_mask(dst);
185 if ((dst ^ dev->pa_addr) & mask)
186 return mask;
187 return dev->pa_mask;
188 }
189
190
191 /*
192 * Find the route entry through which our gateway will be reached
193 */
194
195 static inline struct device * get_gw_dev(unsigned long gw)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
196 {
197 struct rtable * rt;
198
199 for (rt = rt_base ; ; rt = rt->rt_next)
200 {
201 if (!rt)
202 return NULL;
203 if ((gw ^ rt->rt_dst) & rt->rt_mask)
204 continue;
205 /*
206 * Gateways behind gateways are a no-no
207 */
208
209 if (rt->rt_flags & RTF_GATEWAY)
210 return NULL;
211 return rt->rt_dev;
212 }
213 }
214
215 /*
216 * Rewrote rt_add(), as the old one was weird - Linus
217 *
218 * This routine is used to update the IP routing table, either
219 * from the kernel (ICMP_REDIRECT) or via an ioctl call issued
220 * by the superuser.
221 */
222
223 void ip_rt_add(short flags, unsigned long dst, unsigned long mask,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
224 unsigned long gw, struct device *dev, unsigned short mtu,
225 unsigned long window, unsigned short irtt, short metric)
226 {
227 struct rtable *r, *rt;
228 struct rtable **rp;
229 unsigned long cpuflags;
230 int duplicate = 0;
231
232 /*
233 * A host is a unique machine and has no network bits.
234 */
235
236 if (flags & RTF_HOST)
237 {
238 mask = 0xffffffff;
239 }
240
241 /*
242 * Calculate the network mask
243 */
244
245 else if (!mask)
246 {
247 if (!((dst ^ dev->pa_addr) & dev->pa_mask))
248 {
249 mask = dev->pa_mask;
250 flags &= ~RTF_GATEWAY;
251 if (flags & RTF_DYNAMIC)
252 {
253 /*printk("Dynamic route to my own net rejected\n");*/
254 return;
255 }
256 }
257 else
258 mask = guess_mask(dst, dev);
259 dst &= mask;
260 }
261
262 /*
263 * A gateway must be reachable and not a local address
264 */
265
266 if (gw == dev->pa_addr)
267 flags &= ~RTF_GATEWAY;
268
269 if (flags & RTF_GATEWAY)
270 {
271 /*
272 * Don't try to add a gateway we can't reach..
273 */
274
275 if (dev != get_gw_dev(gw))
276 return;
277
278 flags |= RTF_GATEWAY;
279 }
280 else
281 gw = 0;
282
283 /*
284 * Allocate an entry and fill it in.
285 */
286
287 rt = (struct rtable *) kmalloc(sizeof(struct rtable), GFP_ATOMIC);
288 if (rt == NULL)
289 {
290 return;
291 }
292 memset(rt, 0, sizeof(struct rtable));
293 rt->rt_flags = flags | RTF_UP;
294 rt->rt_dst = dst;
295 rt->rt_dev = dev;
296 rt->rt_gateway = gw;
297 rt->rt_mask = mask;
298 rt->rt_mss = dev->mtu - HEADER_SIZE;
299 rt->rt_metric = metric;
300 rt->rt_window = 0; /* Default is no clamping */
301
302 /* Are the MSS/Window valid ? */
303
304 if(rt->rt_flags & RTF_MSS)
305 rt->rt_mss = mtu;
306
307 if(rt->rt_flags & RTF_WINDOW)
308 rt->rt_window = window;
309 if(rt->rt_flags & RTF_IRTT)
310 rt->rt_irtt = irtt;
311
312 /*
313 * What we have to do is loop though this until we have
314 * found the first address which has a higher generality than
315 * the one in rt. Then we can put rt in right before it.
316 * The interrupts must be off for this process.
317 */
318
319 save_flags(cpuflags);
320 cli();
321
322 /*
323 * Remove old route if we are getting a duplicate.
324 */
325
326 rp = &rt_base;
327 while ((r = *rp) != NULL)
328 {
329 if (r->rt_dst != dst ||
330 r->rt_mask != mask)
331 {
332 rp = &r->rt_next;
333 continue;
334 }
335 if (r->rt_metric != metric && r->rt_gateway != gw)
336 {
337 duplicate = 1;
338 rp = &r->rt_next;
339 continue;
340 }
341 *rp = r->rt_next;
342 if (rt_loopback == r)
343 rt_loopback = NULL;
344 kfree_s(r, sizeof(struct rtable));
345 }
346
347 /*
348 * Add the new route
349 */
350
351 rp = &rt_base;
352 while ((r = *rp) != NULL) {
353 /*
354 * When adding a duplicate route, add it before
355 * the route with a higher metric.
356 */
357 if (duplicate &&
358 r->rt_dst == dst &&
359 r->rt_mask == mask &&
360 r->rt_metric > metric)
361 break;
362 else
363 /*
364 * Otherwise, just add it before the
365 * route with a higher generality.
366 */
367 if ((r->rt_mask & mask) != mask)
368 break;
369 rp = &r->rt_next;
370 }
371 rt->rt_next = r;
372 *rp = rt;
373
374 /*
375 * Update the loopback route
376 */
377
378 if ((rt->rt_dev->flags & IFF_LOOPBACK) && !rt_loopback)
379 rt_loopback = rt;
380
381 rt_stamp++; /* New table revision */
382
383 /*
384 * Restore the interrupts and return
385 */
386
387 restore_flags(cpuflags);
388 return;
389 }
390
391
392 /*
393 * Check if a mask is acceptable.
394 */
395
396 static inline int bad_mask(unsigned long mask, unsigned long addr)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
397 {
398 if (addr & (mask = ~mask))
399 return 1;
400 mask = ntohl(mask);
401 if (mask & (mask+1))
402 return 1;
403 return 0;
404 }
405
406 /*
407 * Process a route add request from the user
408 */
409
410 static int rt_new(struct rtentry *r)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
411 {
412 int err;
413 char * devname;
414 struct device * dev = NULL;
415 unsigned long flags, daddr, mask, gw;
416 short metric;
417
418 /*
419 * If a device is specified find it.
420 */
421
422 if ((devname = r->rt_dev) != NULL)
423 {
424 err = getname(devname, &devname);
425 if (err)
426 return err;
427 dev = dev_get(devname);
428 putname(devname);
429 if (!dev)
430 return -EINVAL;
431 }
432
433 /*
434 * If the device isn't INET, don't allow it
435 */
436
437 if (r->rt_dst.sa_family != AF_INET)
438 return -EAFNOSUPPORT;
439
440 /*
441 * Make local copies of the important bits
442 * We decrement the metric by one for BSD compatibility.
443 */
444
445 flags = r->rt_flags;
446 daddr = ((struct sockaddr_in *) &r->rt_dst)->sin_addr.s_addr;
447 mask = ((struct sockaddr_in *) &r->rt_genmask)->sin_addr.s_addr;
448 gw = ((struct sockaddr_in *) &r->rt_gateway)->sin_addr.s_addr;
449 metric = r->rt_metric > 0 ? r->rt_metric - 1 : 0;
450
451 /*
452 * BSD emulation: Permits route add someroute gw one-of-my-addresses
453 * to indicate which iface. Not as clean as the nice Linux dev technique
454 * but people keep using it...
455 */
456
457 if (!dev && (flags & RTF_GATEWAY))
458 {
459 struct device *dev2;
460 for (dev2 = dev_base ; dev2 != NULL ; dev2 = dev2->next)
461 {
462 if ((dev2->flags & IFF_UP) && dev2->pa_addr == gw)
463 {
464 flags &= ~RTF_GATEWAY;
465 dev = dev2;
466 break;
467 }
468 }
469 }
470
471 /*
472 * Ignore faulty masks
473 */
474
475 if (bad_mask(mask, daddr))
476 mask = 0;
477
478 /*
479 * Set the mask to nothing for host routes.
480 */
481
482 if (flags & RTF_HOST)
483 mask = 0xffffffff;
484 else if (mask && r->rt_genmask.sa_family != AF_INET)
485 return -EAFNOSUPPORT;
486
487 /*
488 * You can only gateway IP via IP..
489 */
490
491 if (flags & RTF_GATEWAY)
492 {
493 if (r->rt_gateway.sa_family != AF_INET)
494 return -EAFNOSUPPORT;
495 if (!dev)
496 dev = get_gw_dev(gw);
497 }
498 else if (!dev)
499 dev = ip_dev_check(daddr);
500
501 /*
502 * Unknown device.
503 */
504
505 if (dev == NULL)
506 return -ENETUNREACH;
507
508 /*
509 * Add the route
510 */
511
512 ip_rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window, r->rt_irtt, metric);
513 return 0;
514 }
515
516
517 /*
518 * Remove a route, as requested by the user.
519 */
520
521 static int rt_kill(struct rtentry *r)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
522 {
523 struct sockaddr_in *trg;
524 struct sockaddr_in *msk;
525 struct sockaddr_in *gtw;
526 char *devname;
527 int err;
528
529 trg = (struct sockaddr_in *) &r->rt_dst;
530 msk = (struct sockaddr_in *) &r->rt_genmask;
531 gtw = (struct sockaddr_in *) &r->rt_gateway;
532 if ((devname = r->rt_dev) != NULL)
533 {
534 err = getname(devname, &devname);
535 if (err)
536 return err;
537 }
538 /*
539 * metric can become negative here if it wasn't filled in
540 * but that's a fortunate accident; we really use that in rt_del.
541 */
542 rt_del(trg->sin_addr.s_addr, msk->sin_addr.s_addr, devname,
543 gtw->sin_addr.s_addr, r->rt_flags, r->rt_metric - 1);
544 if ( devname != NULL )
545 putname(devname);
546 return 0;
547 }
548
549
550 /*
551 * Called from the PROCfs module. This outputs /proc/net/route.
552 */
553
554 int rt_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
555 {
556 struct rtable *r;
557 int len=0;
558 off_t pos=0;
559 off_t begin=0;
560 int size;
561
562 len += sprintf(buffer,
563 "Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\n");
564 pos=len;
565
566 /*
567 * This isn't quite right -- r->rt_dst is a struct!
568 */
569
570 for (r = rt_base; r != NULL; r = r->rt_next)
571 {
572 size = sprintf(buffer+len, "%s\t%08lX\t%08lX\t%02X\t%d\t%lu\t%d\t%08lX\t%d\t%lu\t%u\n",
573 r->rt_dev->name, r->rt_dst, r->rt_gateway,
574 r->rt_flags, r->rt_refcnt, r->rt_use, r->rt_metric,
575 r->rt_mask, (int)r->rt_mss, r->rt_window, (int)r->rt_irtt);
576 len+=size;
577 pos+=size;
578 if(pos<offset)
579 {
580 len=0;
581 begin=pos;
582 }
583 if(pos>offset+length)
584 break;
585 }
586
587 *start=buffer+(offset-begin);
588 len-=(offset-begin);
589 if(len>length)
590 len=length;
591 return len;
592 }
593
594 /*
595 * This is hackish, but results in better code. Use "-S" to see why.
596 */
597
598 #define early_out ({ goto no_route; 1; })
599
600 /*
601 * Route a packet. This needs to be fairly quick. Florian & Co.
602 * suggested a unified ARP and IP routing cache. Done right its
603 * probably a brilliant idea. I'd actually suggest a unified
604 * ARP/IP routing/Socket pointer cache. Volunteers welcome
605 */
606
607 struct rtable * ip_rt_route(unsigned long daddr, struct options *opt, unsigned long *src_addr)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
608 {
609 struct rtable *rt;
610
611 for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next)
612 {
613 if (!((rt->rt_dst ^ daddr) & rt->rt_mask))
614 break;
615 /*
616 * broadcast addresses can be special cases..
617 */
618 if (rt->rt_flags & RTF_GATEWAY)
619 continue;
620 if ((rt->rt_dev->flags & IFF_BROADCAST) &&
621 (rt->rt_dev->pa_brdaddr == daddr))
622 break;
623 }
624
625 if(rt->rt_flags&RTF_REJECT)
626 return NULL;
627
628 if(src_addr!=NULL)
629 *src_addr= rt->rt_dev->pa_addr;
630
631 if (daddr == rt->rt_dev->pa_addr) {
632 if ((rt = rt_loopback) == NULL)
633 goto no_route;
634 }
635 rt->rt_use++;
636 return rt;
637 no_route:
638 return NULL;
639 }
640
641 struct rtable * ip_rt_local(unsigned long daddr, struct options *opt, unsigned long *src_addr)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
642 {
643 struct rtable *rt;
644
645 for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next)
646 {
647 /*
648 * No routed addressing.
649 */
650 if (rt->rt_flags&RTF_GATEWAY)
651 continue;
652
653 if (!((rt->rt_dst ^ daddr) & rt->rt_mask))
654 break;
655 /*
656 * broadcast addresses can be special cases..
657 */
658
659 if ((rt->rt_dev->flags & IFF_BROADCAST) &&
660 rt->rt_dev->pa_brdaddr == daddr)
661 break;
662 }
663
664 if(src_addr!=NULL)
665 *src_addr= rt->rt_dev->pa_addr;
666
667 if (daddr == rt->rt_dev->pa_addr) {
668 if ((rt = rt_loopback) == NULL)
669 goto no_route;
670 }
671 rt->rt_use++;
672 return rt;
673 no_route:
674 return NULL;
675 }
676
677 /*
678 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
679 */
680
681 int ip_rt_ioctl(unsigned int cmd, void *arg)
/* ![[previous]](../icons/left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
682 {
683 int err;
684 struct rtentry rt;
685
686 switch(cmd)
687 {
688 case SIOCADDRT: /* Add a route */
689 case SIOCDELRT: /* Delete a route */
690 if (!suser())
691 return -EPERM;
692 err=verify_area(VERIFY_READ, arg, sizeof(struct rtentry));
693 if (err)
694 return err;
695 memcpy_fromfs(&rt, arg, sizeof(struct rtentry));
696 return (cmd == SIOCDELRT) ? rt_kill(&rt) : rt_new(&rt);
697 }
698
699 return -EINVAL;
700 }