1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
8 * Version: @(#)route.c 1.0.14 05/31/93
9 *
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Alan Cox, <gw4pts@gw4pts.ampr.org>
13 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
14 *
15 * Fixes:
16 * Alan Cox : Verify area fixes.
17 * Alan Cox : cli() protects routing changes
18 * Rui Oliveira : ICMP routing table updates
19 * (rco@di.uminho.pt) Routing table insertion and update
20 * Linus Torvalds : Rewrote bits to be sensible
21 * Alan Cox : Added BSD route gw semantics
22 * Alan Cox : Super /proc >4K
23 * Alan Cox : MTU in route table
24 * Alan Cox : MSS actually. Also added the window
25 * clamper.
26 * Sam Lantinga : Fixed route matching in rt_del()
27 * Alan Cox : Routing cache support.
28 * Alan Cox : Removed compatibility cruft.
29 * Alan Cox : RTF_REJECT support.
30 * Alan Cox : TCP irtt support.
31 * Jonathan Naylor : Added Metric support.
32 *
33 * This program is free software; you can redistribute it and/or
34 * modify it under the terms of the GNU General Public License
35 * as published by the Free Software Foundation; either version
36 * 2 of the License, or (at your option) any later version.
37 */
38
39 #include <asm/segment.h>
40 #include <asm/system.h>
41 #include <linux/types.h>
42 #include <linux/kernel.h>
43 #include <linux/sched.h>
44 #include <linux/mm.h>
45 #include <linux/string.h>
46 #include <linux/socket.h>
47 #include <linux/sockios.h>
48 #include <linux/errno.h>
49 #include <linux/in.h>
50 #include <linux/inet.h>
51 #include <linux/netdevice.h>
52 #include <net/ip.h>
53 #include <net/protocol.h>
54 #include <net/route.h>
55 #include <net/tcp.h>
56 #include <linux/skbuff.h>
57 #include <net/sock.h>
58 #include <net/icmp.h>
59
60 /*
61 * The routing table list
62 */
63
64 static struct rtable *rt_base = NULL;
65 unsigned long rt_stamp = 1; /* Routing table version stamp for caches ( 0 is 'unset' ) */
66
67 /*
68 * Pointer to the loopback route
69 */
70
71 static struct rtable *rt_loopback = NULL;
72
73 /*
74 * Remove a routing table entry.
75 */
76
77 static void rt_del(unsigned long dst, char *devname)
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
78 {
79 struct rtable *r, **rp;
80 unsigned long flags;
81
82 rp = &rt_base;
83
84 /*
85 * This must be done with interrupts off because we could take
86 * an ICMP_REDIRECT.
87 */
88
89 save_flags(flags);
90 cli();
91 while((r = *rp) != NULL)
92 {
93 /* Make sure both the destination and the device match */
94 if ( r->rt_dst != dst ||
95 (devname != NULL && strcmp((r->rt_dev)->name,devname) != 0) )
96 {
97 rp = &r->rt_next;
98 continue;
99 }
100 *rp = r->rt_next;
101
102 /*
103 * If we delete the loopback route update its pointer.
104 */
105
106 if (rt_loopback == r)
107 rt_loopback = NULL;
108 kfree_s(r, sizeof(struct rtable));
109 }
110 rt_stamp++; /* New table revision */
111
112 restore_flags(flags);
113 }
114
115
116 /*
117 * Remove all routing table entries for a device. This is called when
118 * a device is downed.
119 */
120
121 void ip_rt_flush(struct device *dev)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
122 {
123 struct rtable *r;
124 struct rtable **rp;
125 unsigned long flags;
126
127 rp = &rt_base;
128 save_flags(flags);
129 cli();
130 while ((r = *rp) != NULL) {
131 if (r->rt_dev != dev) {
132 rp = &r->rt_next;
133 continue;
134 }
135 *rp = r->rt_next;
136 if (rt_loopback == r)
137 rt_loopback = NULL;
138 kfree_s(r, sizeof(struct rtable));
139 }
140 rt_stamp++; /* New table revision */
141 restore_flags(flags);
142 }
143
144 /*
145 * Used by 'rt_add()' when we can't get the netmask any other way..
146 *
147 * If the lower byte or two are zero, we guess the mask based on the
148 * number of zero 8-bit net numbers, otherwise we use the "default"
149 * masks judging by the destination address and our device netmask.
150 */
151
152 static inline unsigned long default_mask(unsigned long dst)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
153 {
154 dst = ntohl(dst);
155 if (IN_CLASSA(dst))
156 return htonl(IN_CLASSA_NET);
157 if (IN_CLASSB(dst))
158 return htonl(IN_CLASSB_NET);
159 return htonl(IN_CLASSC_NET);
160 }
161
162
163 /*
164 * If no mask is specified then generate a default entry.
165 */
166
167 static unsigned long guess_mask(unsigned long dst, struct device * dev)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
168 {
169 unsigned long mask;
170
171 if (!dst)
172 return 0;
173 mask = default_mask(dst);
174 if ((dst ^ dev->pa_addr) & mask)
175 return mask;
176 return dev->pa_mask;
177 }
178
179
180 /*
181 * Find the route entry through which our gateway will be reached
182 */
183
184 static inline struct device * get_gw_dev(unsigned long gw)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
185 {
186 struct rtable * rt;
187
188 for (rt = rt_base ; ; rt = rt->rt_next)
189 {
190 if (!rt)
191 return NULL;
192 if ((gw ^ rt->rt_dst) & rt->rt_mask)
193 continue;
194 /*
195 * Gateways behind gateways are a no-no
196 */
197
198 if (rt->rt_flags & RTF_GATEWAY)
199 return NULL;
200 return rt->rt_dev;
201 }
202 }
203
204 /*
205 * Rewrote rt_add(), as the old one was weird - Linus
206 *
207 * This routine is used to update the IP routing table, either
208 * from the kernel (ICMP_REDIRECT) or via an ioctl call issued
209 * by the superuser.
210 */
211
212 void ip_rt_add(short flags, unsigned long dst, unsigned long mask,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
213 unsigned long gw, struct device *dev, unsigned short mtu, unsigned long window, unsigned short irtt, unsigned char metric)
214 {
215 struct rtable *r, *rt;
216 struct rtable **rp;
217 unsigned long cpuflags;
218
219 /*
220 * A host is a unique machine and has no network bits.
221 */
222
223 if (flags & RTF_HOST)
224 {
225 mask = 0xffffffff;
226 }
227
228 /*
229 * Calculate the network mask
230 */
231
232 else if (!mask)
233 {
234 if (!((dst ^ dev->pa_addr) & dev->pa_mask))
235 {
236 mask = dev->pa_mask;
237 flags &= ~RTF_GATEWAY;
238 if (flags & RTF_DYNAMIC)
239 {
240 /*printk("Dynamic route to my own net rejected\n");*/
241 return;
242 }
243 }
244 else
245 mask = guess_mask(dst, dev);
246 dst &= mask;
247 }
248
249 /*
250 * A gateway must be reachable and not a local address
251 */
252
253 if (gw == dev->pa_addr)
254 flags &= ~RTF_GATEWAY;
255
256 if (flags & RTF_GATEWAY)
257 {
258 /*
259 * Don't try to add a gateway we can't reach..
260 */
261
262 if (dev != get_gw_dev(gw))
263 return;
264
265 flags |= RTF_GATEWAY;
266 }
267 else
268 gw = 0;
269
270 /*
271 * Allocate an entry and fill it in.
272 */
273
274 rt = (struct rtable *) kmalloc(sizeof(struct rtable), GFP_ATOMIC);
275 if (rt == NULL)
276 {
277 return;
278 }
279 memset(rt, 0, sizeof(struct rtable));
280 rt->rt_flags = flags | RTF_UP;
281 rt->rt_dst = dst;
282 rt->rt_dev = dev;
283 rt->rt_gateway = gw;
284 rt->rt_mask = mask;
285 rt->rt_mss = dev->mtu - HEADER_SIZE;
286 rt->rt_metric = metric;
287 rt->rt_window = 0; /* Default is no clamping */
288
289 /* Are the MSS/Window valid ? */
290
291 if(rt->rt_flags & RTF_MSS)
292 rt->rt_mss = mtu;
293
294 if(rt->rt_flags & RTF_WINDOW)
295 rt->rt_window = window;
296 if(rt->rt_flags & RTF_IRTT)
297 rt->rt_irtt = irtt;
298
299 /*
300 * What we have to do is loop though this until we have
301 * found the first address which has a higher generality than
302 * the one in rt. Then we can put rt in right before it.
303 * The interrupts must be off for this process.
304 */
305
306 save_flags(cpuflags);
307 cli();
308
309 /*
310 * Remove old route if we are getting a duplicate.
311 */
312
313 rp = &rt_base;
314 while ((r = *rp) != NULL)
315 {
316 if (r->rt_dst != dst ||
317 r->rt_mask != mask ||
318 r->rt_metric < metric)
319 {
320 rp = &r->rt_next;
321 continue;
322 }
323 *rp = r->rt_next;
324 if (rt_loopback == r)
325 rt_loopback = NULL;
326 kfree_s(r, sizeof(struct rtable));
327 }
328
329 /*
330 * Add the new route
331 */
332
333 rp = &rt_base;
334 while ((r = *rp) != NULL) {
335 if ((r->rt_mask & mask) != mask)
336 break;
337 rp = &r->rt_next;
338 }
339 rt->rt_next = r;
340 *rp = rt;
341
342 /*
343 * Update the loopback route
344 */
345
346 if ((rt->rt_dev->flags & IFF_LOOPBACK) && !rt_loopback)
347 rt_loopback = rt;
348
349 rt_stamp++; /* New table revision */
350
351 /*
352 * Restore the interrupts and return
353 */
354
355 restore_flags(cpuflags);
356 return;
357 }
358
359
360 /*
361 * Check if a mask is acceptable.
362 */
363
364 static inline int bad_mask(unsigned long mask, unsigned long addr)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
365 {
366 if (addr & (mask = ~mask))
367 return 1;
368 mask = ntohl(mask);
369 if (mask & (mask+1))
370 return 1;
371 return 0;
372 }
373
374 /*
375 * Process a route add request from the user
376 */
377
378 static int rt_new(struct rtentry *r)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
379 {
380 int err;
381 char * devname;
382 struct device * dev = NULL;
383 unsigned long flags, daddr, mask, gw;
384
385 /*
386 * If a device is specified find it.
387 */
388
389 if ((devname = r->rt_dev) != NULL)
390 {
391 err = getname(devname, &devname);
392 if (err)
393 return err;
394 dev = dev_get(devname);
395 putname(devname);
396 if (!dev)
397 return -EINVAL;
398 }
399
400 /*
401 * If the device isn't INET, don't allow it
402 */
403
404 if (r->rt_dst.sa_family != AF_INET)
405 return -EAFNOSUPPORT;
406
407 /*
408 * Make local copies of the important bits
409 */
410
411 flags = r->rt_flags;
412 daddr = ((struct sockaddr_in *) &r->rt_dst)->sin_addr.s_addr;
413 mask = ((struct sockaddr_in *) &r->rt_genmask)->sin_addr.s_addr;
414 gw = ((struct sockaddr_in *) &r->rt_gateway)->sin_addr.s_addr;
415
416
417 /*
418 * BSD emulation: Permits route add someroute gw one-of-my-addresses
419 * to indicate which iface. Not as clean as the nice Linux dev technique
420 * but people keep using it...
421 */
422
423 if (!dev && (flags & RTF_GATEWAY))
424 {
425 struct device *dev2;
426 for (dev2 = dev_base ; dev2 != NULL ; dev2 = dev2->next)
427 {
428 if ((dev2->flags & IFF_UP) && dev2->pa_addr == gw)
429 {
430 flags &= ~RTF_GATEWAY;
431 dev = dev2;
432 break;
433 }
434 }
435 }
436
437 /*
438 * Ignore faulty masks
439 */
440
441 if (bad_mask(mask, daddr))
442 mask = 0;
443
444 /*
445 * Set the mask to nothing for host routes.
446 */
447
448 if (flags & RTF_HOST)
449 mask = 0xffffffff;
450 else if (mask && r->rt_genmask.sa_family != AF_INET)
451 return -EAFNOSUPPORT;
452
453 /*
454 * You can only gateway IP via IP..
455 */
456
457 if (flags & RTF_GATEWAY)
458 {
459 if (r->rt_gateway.sa_family != AF_INET)
460 return -EAFNOSUPPORT;
461 if (!dev)
462 dev = get_gw_dev(gw);
463 }
464 else if (!dev)
465 dev = ip_dev_check(daddr);
466
467 /*
468 * Unknown device.
469 */
470
471 if (dev == NULL)
472 return -ENETUNREACH;
473
474 /*
475 * Add the route
476 */
477
478 ip_rt_add(flags, daddr, mask, gw, dev, r->rt_mss, r->rt_window, r->rt_irtt, r->rt_metric);
479 return 0;
480 }
481
482
483 /*
484 * Remove a route, as requested by the user.
485 */
486
487 static int rt_kill(struct rtentry *r)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
488 {
489 struct sockaddr_in *trg;
490 char *devname;
491 int err;
492
493 trg = (struct sockaddr_in *) &r->rt_dst;
494 if ((devname = r->rt_dev) != NULL)
495 {
496 err = getname(devname, &devname);
497 if (err)
498 return err;
499 }
500 rt_del(trg->sin_addr.s_addr, devname);
501 if ( devname != NULL )
502 putname(devname);
503 return 0;
504 }
505
506
507 /*
508 * Called from the PROCfs module. This outputs /proc/net/route.
509 */
510
511 int rt_get_info(char *buffer, char **start, off_t offset, int length)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
512 {
513 struct rtable *r;
514 int len=0;
515 off_t pos=0;
516 off_t begin=0;
517 int size;
518
519 len += sprintf(buffer,
520 "Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\n");
521 pos=len;
522
523 /*
524 * This isn't quite right -- r->rt_dst is a struct!
525 */
526
527 for (r = rt_base; r != NULL; r = r->rt_next)
528 {
529 size = sprintf(buffer+len, "%s\t%08lX\t%08lX\t%02X\t%d\t%lu\t%d\t%08lX\t%d\t%lu\t%u\n",
530 r->rt_dev->name, r->rt_dst, r->rt_gateway,
531 r->rt_flags, r->rt_refcnt, r->rt_use, r->rt_metric,
532 r->rt_mask, (int)r->rt_mss, r->rt_window, (int)r->rt_irtt);
533 len+=size;
534 pos+=size;
535 if(pos<offset)
536 {
537 len=0;
538 begin=pos;
539 }
540 if(pos>offset+length)
541 break;
542 }
543
544 *start=buffer+(offset-begin);
545 len-=(offset-begin);
546 if(len>length)
547 len=length;
548 return len;
549 }
550
551 /*
552 * This is hackish, but results in better code. Use "-S" to see why.
553 */
554
555 #define early_out ({ goto no_route; 1; })
556
557 /*
558 * Route a packet. This needs to be fairly quick. Florian & Co.
559 * suggested a unified ARP and IP routing cache. Done right its
560 * probably a brilliant idea. I'd actually suggest a unified
561 * ARP/IP routing/Socket pointer cache. Volunteers welcome
562 */
563
564 struct rtable * ip_rt_route(unsigned long daddr, struct options *opt, unsigned long *src_addr)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
565 {
566 struct rtable *rt;
567
568 for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next)
569 {
570 if (!((rt->rt_dst ^ daddr) & rt->rt_mask))
571 break;
572 /*
573 * broadcast addresses can be special cases..
574 */
575 if (rt->rt_flags & RTF_GATEWAY)
576 continue;
577 if ((rt->rt_dev->flags & IFF_BROADCAST) &&
578 (rt->rt_dev->pa_brdaddr == daddr))
579 break;
580 }
581
582 if(rt->rt_flags&RTF_REJECT)
583 return NULL;
584
585 if(src_addr!=NULL)
586 *src_addr= rt->rt_dev->pa_addr;
587
588 if (daddr == rt->rt_dev->pa_addr) {
589 if ((rt = rt_loopback) == NULL)
590 goto no_route;
591 }
592 rt->rt_use++;
593 return rt;
594 no_route:
595 return NULL;
596 }
597
598 struct rtable * ip_rt_local(unsigned long daddr, struct options *opt, unsigned long *src_addr)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
599 {
600 struct rtable *rt;
601
602 for (rt = rt_base; rt != NULL || early_out ; rt = rt->rt_next)
603 {
604 /*
605 * No routed addressing.
606 */
607 if (rt->rt_flags&RTF_GATEWAY)
608 continue;
609
610 if (!((rt->rt_dst ^ daddr) & rt->rt_mask))
611 break;
612 /*
613 * broadcast addresses can be special cases..
614 */
615
616 if ((rt->rt_dev->flags & IFF_BROADCAST) &&
617 rt->rt_dev->pa_brdaddr == daddr)
618 break;
619 }
620
621 if(src_addr!=NULL)
622 *src_addr= rt->rt_dev->pa_addr;
623
624 if (daddr == rt->rt_dev->pa_addr) {
625 if ((rt = rt_loopback) == NULL)
626 goto no_route;
627 }
628 rt->rt_use++;
629 return rt;
630 no_route:
631 return NULL;
632 }
633
634 /*
635 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
636 */
637
638 int ip_rt_ioctl(unsigned int cmd, void *arg)
/* ![[previous]](../icons/left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
639 {
640 int err;
641 struct rtentry rt;
642
643 switch(cmd)
644 {
645 case SIOCADDRT: /* Add a route */
646 case SIOCDELRT: /* Delete a route */
647 if (!suser())
648 return -EPERM;
649 err=verify_area(VERIFY_READ, arg, sizeof(struct rtentry));
650 if (err)
651 return err;
652 memcpy_fromfs(&rt, arg, sizeof(struct rtentry));
653 return (cmd == SIOCDELRT) ? rt_kill(&rt) : rt_new(&rt);
654 }
655
656 return -EINVAL;
657 }