1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * The Internet Protocol (IP) module.
7 *
8 * Version: @(#)ip.c 1.0.16b 9/1/93
9 *
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Donald Becker, <becker@super.org>
13 * Alan Cox, <Alan.Cox@linux.org>
14 * Richard Underwood
15 * Stefan Becker, <stefanb@yello.ping.de>
16 * Jorge Cwik, <jorge@laser.satlink.net>
17 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
18 *
19 *
20 * Fixes:
21 * Alan Cox : Commented a couple of minor bits of surplus code
22 * Alan Cox : Undefining IP_FORWARD doesn't include the code
23 * (just stops a compiler warning).
24 * Alan Cox : Frames with >=MAX_ROUTE record routes, strict routes or loose routes
25 * are junked rather than corrupting things.
26 * Alan Cox : Frames to bad broadcast subnets are dumped
27 * We used to process them non broadcast and
28 * boy could that cause havoc.
29 * Alan Cox : ip_forward sets the free flag on the
30 * new frame it queues. Still crap because
31 * it copies the frame but at least it
32 * doesn't eat memory too.
33 * Alan Cox : Generic queue code and memory fixes.
34 * Fred Van Kempen : IP fragment support (borrowed from NET2E)
35 * Gerhard Koerting: Forward fragmented frames correctly.
36 * Gerhard Koerting: Fixes to my fix of the above 8-).
37 * Gerhard Koerting: IP interface addressing fix.
38 * Linus Torvalds : More robustness checks
39 * Alan Cox : Even more checks: Still not as robust as it ought to be
40 * Alan Cox : Save IP header pointer for later
41 * Alan Cox : ip option setting
42 * Alan Cox : Use ip_tos/ip_ttl settings
43 * Alan Cox : Fragmentation bogosity removed
44 * (Thanks to Mark.Bush@prg.ox.ac.uk)
45 * Dmitry Gorodchanin : Send of a raw packet crash fix.
46 * Alan Cox : Silly ip bug when an overlength
47 * fragment turns up. Now frees the
48 * queue.
49 * Linus Torvalds/ : Memory leakage on fragmentation
50 * Alan Cox : handling.
51 * Gerhard Koerting: Forwarding uses IP priority hints
52 * Teemu Rantanen : Fragment problems.
53 * Alan Cox : General cleanup, comments and reformat
54 * Alan Cox : SNMP statistics
55 * Alan Cox : BSD address rule semantics. Also see
56 * UDP as there is a nasty checksum issue
57 * if you do things the wrong way.
58 * Alan Cox : Always defrag, moved IP_FORWARD to the config.in file
59 * Alan Cox : IP options adjust sk->priority.
60 * Pedro Roque : Fix mtu/length error in ip_forward.
61 * Alan Cox : Avoid ip_chk_addr when possible.
62 * Richard Underwood : IP multicasting.
63 * Alan Cox : Cleaned up multicast handlers.
64 * Alan Cox : RAW sockets demultiplex in the BSD style.
65 * Gunther Mayer : Fix the SNMP reporting typo
66 * Alan Cox : Always in group 224.0.0.1
67 * Pauline Middelink : Fast ip_checksum update when forwarding
68 * Masquerading support.
69 * Alan Cox : Multicast loopback error for 224.0.0.1
70 * Alan Cox : IP_MULTICAST_LOOP option.
71 * Alan Cox : Use notifiers.
72 * Bjorn Ekwall : Removed ip_csum (from slhc.c too)
73 * Bjorn Ekwall : Moved ip_fast_csum to ip.h (inline!)
74 * Stefan Becker : Send out ICMP HOST REDIRECT
75 * Arnt Gulbrandsen : ip_build_xmit
76 * Alan Cox : Per socket routing cache
77 * Alan Cox : Fixed routing cache, added header cache.
78 * Alan Cox : Loopback didnt work right in original ip_build_xmit - fixed it.
79 * Alan Cox : Only send ICMP_REDIRECT if src/dest are the same net.
80 * Alan Cox : Incoming IP option handling.
81 * Alan Cox : Set saddr on raw output frames as per BSD.
82 * Alan Cox : Stopped broadcast source route explosions.
83 * Alan Cox : Can disable source routing
84 * Takeshi Sone : Masquerading didn't work.
85 * Dave Bonn,Alan Cox : Faster IP forwarding whenever possible.
86 * Alan Cox : Memory leaks, tramples, misc debugging.
87 * Alan Cox : Fixed multicast (by popular demand 8))
88 * Alan Cox : Fixed forwarding (by even more popular demand 8))
89 * Alan Cox : Fixed SNMP statistics [I think]
90 * Gerhard Koerting : IP fragmentation forwarding fix
91 * Alan Cox : Device lock against page fault.
92 * Alan Cox : IP_HDRINCL facility.
93 * Werner Almesberger : Zero fragment bug
94 * Alan Cox : RAW IP frame length bug
95 * Alan Cox : Outgoing firewall on build_xmit
96 * A.N.Kuznetsov : IP_OPTIONS support throughout the kernel
97 * Alan Cox : Multicast routing hooks
98 * Jos Vos : Do accounting *before* call_in_firewall
99 *
100 *
101 *
102 * To Fix:
103 * IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient
104 * and could be made very efficient with the addition of some virtual memory hacks to permit
105 * the allocation of a buffer that can then be 'grown' by twiddling page tables.
106 * Output fragmentation wants updating along with the buffer management to use a single
107 * interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet
108 * output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause
109 * fragmentation anyway.
110 *
111 * FIXME: copy frag 0 iph to qp->iph
112 *
113 * This program is free software; you can redistribute it and/or
114 * modify it under the terms of the GNU General Public License
115 * as published by the Free Software Foundation; either version
116 * 2 of the License, or (at your option) any later version.
117 */
118
119 #include <asm/segment.h>
120 #include <asm/system.h>
121 #include <linux/types.h>
122 #include <linux/kernel.h>
123 #include <linux/sched.h>
124 #include <linux/mm.h>
125 #include <linux/string.h>
126 #include <linux/errno.h>
127 #include <linux/config.h>
128
129 #include <linux/socket.h>
130 #include <linux/sockios.h>
131 #include <linux/in.h>
132 #include <linux/inet.h>
133 #include <linux/netdevice.h>
134 #include <linux/etherdevice.h>
135 #include <linux/proc_fs.h>
136 #include <linux/stat.h>
137
138 #include <net/snmp.h>
139 #include <net/ip.h>
140 #include <net/protocol.h>
141 #include <net/route.h>
142 #include <net/tcp.h>
143 #include <net/udp.h>
144 #include <linux/skbuff.h>
145 #include <net/sock.h>
146 #include <net/arp.h>
147 #include <net/icmp.h>
148 #include <net/raw.h>
149 #include <net/checksum.h>
150 #include <linux/igmp.h>
151 #include <linux/ip_fw.h>
152 #include <linux/firewall.h>
153 #include <linux/mroute.h>
154 #include <net/netlink.h>
155 #ifdef CONFIG_NET_ALIAS
156 #include <linux/net_alias.h>
157 #endif
158
159 extern int last_retran;
160 extern void sort_send(struct sock *sk);
161
162 #define min(a,b) ((a)<(b)?(a):(b))
163
164 /*
165 * SNMP management statistics
166 */
167
168 #ifdef CONFIG_IP_FORWARD
169 struct ip_mib ip_statistics={1,64,}; /* Forwarding=Yes, Default TTL=64 */
170 #else
171 struct ip_mib ip_statistics={2,64,}; /* Forwarding=No, Default TTL=64 */
172 #endif
173
174 /*
175 * Handle the issuing of an ioctl() request
176 * for the ip device. This is scheduled to
177 * disappear
178 */
179
180 int ip_ioctl(struct sock *sk, int cmd, unsigned long arg)
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
181 {
182 switch(cmd)
183 {
184 default:
185 return(-EINVAL);
186 }
187 }
188
189
190
191 /*
192 * This function receives all incoming IP datagrams.
193 *
194 * On entry skb->data points to the start of the IP header and
195 * the MAC header has been removed.
196 */
197
198 int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
/* ![[previous]](../icons/left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
199 {
200 struct iphdr *iph = skb->h.iph;
201 struct sock *raw_sk=NULL;
202 unsigned char hash;
203 unsigned char flag = 0;
204 struct inet_protocol *ipprot;
205 int brd=IS_MYADDR;
206 struct options * opt = NULL;
207 int is_frag=0;
208 __u32 daddr;
209
210 #ifdef CONFIG_FIREWALL
211 int err;
212 #endif
213 #ifdef CONFIG_IP_MROUTE
214 int mroute_pkt=0;
215 #endif
216
217 #ifdef CONFIG_NET_IPV6
218 /*
219 * Intercept IPv6 frames. We dump ST-II and invalid types just below..
220 */
221
222 if(iph->version == 6)
223 return ipv6_rcv(skb,dev,pt);
224 #endif
225
226 ip_statistics.IpInReceives++;
227
228 /*
229 * Tag the ip header of this packet so we can find it
230 */
231
232 skb->ip_hdr = iph;
233
234 /*
235 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum.
236 * RFC1122: 3.1.2.3 MUST discard a frame with invalid source address [NEEDS FIXING].
237 *
238 * Is the datagram acceptable?
239 *
240 * 1. Length at least the size of an ip header
241 * 2. Version of 4
242 * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums]
243 * 4. Doesn't have a bogus length
244 * (5. We ought to check for IP multicast addresses and undefined types.. does this matter ?)
245 */
246
247 if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0
248 || skb->len < ntohs(iph->tot_len))
249 {
250 ip_statistics.IpInHdrErrors++;
251 kfree_skb(skb, FREE_WRITE);
252 return(0);
253 }
254
255 /*
256 * Our transport medium may have padded the buffer out. Now we know it
257 * is IP we can trim to the true length of the frame.
258 * Note this now means skb->len holds ntohs(iph->tot_len).
259 */
260
261 skb_trim(skb,ntohs(iph->tot_len));
262
263 if (iph->ihl > 5)
264 {
265 skb->ip_summed = 0;
266 if (ip_options_compile(NULL, skb))
267 return(0);
268 opt = (struct options*)skb->proto_priv;
269 #ifdef CONFIG_IP_NOSR
270 if (opt->srr)
271 {
272 kfree_skb(skb, FREE_READ);
273 return -EINVAL;
274 }
275 #endif
276 }
277
278 /*
279 * Try to select closest <src,dst> alias device, if any.
280 * net_alias_dev_rcv_sel32 returns main device if it
281 * fails to found other.
282 */
283
284 #ifdef CONFIG_NET_ALIAS
285 if (iph->daddr != skb->dev->pa_addr && net_alias_has(skb->dev))
286 skb->dev = dev = net_alias_dev_rcv_sel32(skb->dev, AF_INET, iph->saddr, iph->daddr);
287 #endif
288
289 /*
290 * Account for the packet (even if the packet is
291 * not accepted by the firewall!).
292 */
293
294 #ifdef CONFIG_IP_ACCT
295 ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
296 #endif
297
298 /*
299 * See if the firewall wants to dispose of the packet.
300 */
301
302 #ifdef CONFIG_FIREWALL
303
304 if ((err=call_in_firewall(PF_INET, skb, iph))<FW_ACCEPT)
305 {
306 if(err==FW_REJECT)
307 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0, dev);
308 kfree_skb(skb, FREE_WRITE);
309 return 0;
310 }
311
312 #endif
313
314 /*
315 * Remember if the frame is fragmented.
316 */
317
318 if(iph->frag_off)
319 {
320 if (iph->frag_off & htons(IP_MF))
321 is_frag|=IPFWD_FRAGMENT;
322 /*
323 * Last fragment ?
324 */
325
326 if (iph->frag_off & htons(IP_OFFSET))
327 is_frag|=IPFWD_LASTFRAG;
328 }
329
330 /*
331 * Do any IP forwarding required. chk_addr() is expensive -- avoid it someday.
332 *
333 * This is inefficient. While finding out if it is for us we could also compute
334 * the routing table entry. This is where the great unified cache theory comes
335 * in as and when someone implements it
336 *
337 * For most hosts over 99% of packets match the first conditional
338 * and don't go via ip_chk_addr. Note: brd is set to IS_MYADDR at
339 * function entry.
340 */
341 daddr = iph->daddr;
342 if ( iph->daddr == skb->dev->pa_addr || (brd = ip_chk_addr(iph->daddr)) != 0)
343 {
344 if (opt && opt->srr)
345 {
346 int srrspace, srrptr;
347 __u32 nexthop;
348 unsigned char * optptr = ((unsigned char *)iph) + opt->srr;
349
350 if (brd != IS_MYADDR || skb->pkt_type != PACKET_HOST)
351 {
352 kfree_skb(skb, FREE_WRITE);
353 return 0;
354 }
355
356 for ( srrptr=optptr[2], srrspace = optptr[1];
357 srrptr <= srrspace;
358 srrptr += 4
359 )
360 {
361 int brd2;
362 if (srrptr + 3 > srrspace)
363 {
364 icmp_send(skb, ICMP_PARAMETERPROB, 0, opt->srr+2,
365 skb->dev);
366 kfree_skb(skb, FREE_WRITE);
367 return 0;
368 }
369 memcpy(&nexthop, &optptr[srrptr-1], 4);
370 if ((brd2 = ip_chk_addr(nexthop)) == 0)
371 break;
372 if (brd2 != IS_MYADDR)
373 {
374
375 /*
376 * ANK: should we implement weak tunneling of multicasts?
377 * Are they obsolete? DVMRP specs (RFC-1075) is old enough...
378 * [They are obsolete]
379 */
380 kfree_skb(skb, FREE_WRITE);
381 return -EINVAL;
382 }
383 memcpy(&daddr, &optptr[srrptr-1], 4);
384 }
385 if (srrptr <= srrspace)
386 {
387 opt->srr_is_hit = 1;
388 opt->is_changed = 1;
389 #ifdef CONFIG_IP_FORWARD
390 if (ip_forward(skb, dev, is_frag, nexthop))
391 kfree_skb(skb, FREE_WRITE);
392 #else
393 ip_statistics.IpInAddrErrors++;
394 kfree_skb(skb, FREE_WRITE);
395 #endif
396 return 0;
397 }
398 }
399
400 #ifdef CONFIG_IP_MULTICAST
401 if(!(dev->flags&IFF_ALLMULTI) && brd==IS_MULTICAST && iph->daddr!=IGMP_ALL_HOSTS && !(dev->flags&IFF_LOOPBACK))
402 {
403 /*
404 * Check it is for one of our groups
405 */
406 struct ip_mc_list *ip_mc=dev->ip_mc_list;
407 do
408 {
409 if(ip_mc==NULL)
410 {
411 kfree_skb(skb, FREE_WRITE);
412 return 0;
413 }
414 if(ip_mc->multiaddr==iph->daddr)
415 break;
416 ip_mc=ip_mc->next;
417 }
418 while(1);
419 }
420 #endif
421
422 #ifdef CONFIG_IP_MASQUERADE
423 /*
424 * Do we need to de-masquerade this fragment?
425 */
426 if (ip_fw_demasquerade(skb))
427 {
428 struct iphdr *iph=skb->h.iph;
429 if (ip_forward(skb, dev, is_frag|IPFWD_MASQUERADED, iph->daddr))
430 kfree_skb(skb, FREE_WRITE);
431 return(0);
432 }
433 #endif
434
435 /*
436 * Reassemble IP fragments.
437 */
438
439 if(is_frag)
440 {
441 /* Defragment. Obtain the complete packet if there is one */
442 skb=ip_defrag(iph,skb,dev);
443 if(skb==NULL)
444 return 0;
445 skb->dev = dev;
446 iph=skb->h.iph;
447 }
448
449 /*
450 * Point into the IP datagram, just past the header.
451 */
452
453 skb->ip_hdr = iph;
454 skb->h.raw += iph->ihl*4;
455
456 #ifdef CONFIG_IP_MROUTE
457 /*
458 * Check the state on multicast routing (multicast and not 224.0.0.z)
459 */
460
461 if(brd==IS_MULTICAST && (iph->daddr&htonl(0xFFFFFF00))!=htonl(0xE0000000))
462 mroute_pkt=1;
463
464 #endif
465 /*
466 * Deliver to raw sockets. This is fun as to avoid copies we want to make no surplus copies.
467 *
468 * RFC 1122: SHOULD pass TOS value up to the transport layer.
469 */
470
471 hash = iph->protocol & (SOCK_ARRAY_SIZE-1);
472
473 /*
474 * If there maybe a raw socket we must check - if not we don't care less
475 */
476
477 if((raw_sk=raw_prot.sock_array[hash])!=NULL)
478 {
479 struct sock *sknext=NULL;
480 struct sk_buff *skb1;
481 raw_sk=get_sock_raw(raw_sk, iph->protocol, iph->saddr, iph->daddr);
482 if(raw_sk) /* Any raw sockets */
483 {
484 do
485 {
486 /* Find the next */
487 sknext=get_sock_raw(raw_sk->next, iph->protocol, iph->saddr, iph->daddr);
488 if(sknext)
489 skb1=skb_clone(skb, GFP_ATOMIC);
490 else
491 break; /* One pending raw socket left */
492 if(skb1)
493 raw_rcv(raw_sk, skb1, dev, iph->saddr,daddr);
494 raw_sk=sknext;
495 }
496 while(raw_sk!=NULL);
497
498 /*
499 * Here either raw_sk is the last raw socket, or NULL if none
500 */
501
502 /*
503 * We deliver to the last raw socket AFTER the protocol checks as it avoids a surplus copy
504 */
505 }
506 }
507
508 /*
509 * skb->h.raw now points at the protocol beyond the IP header.
510 */
511
512 hash = iph->protocol & (MAX_INET_PROTOS -1);
513 for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next)
514 {
515 struct sk_buff *skb2;
516
517 if (ipprot->protocol != iph->protocol)
518 continue;
519 /*
520 * See if we need to make a copy of it. This will
521 * only be set if more than one protocol wants it.
522 * and then not for the last one. If there is a pending
523 * raw delivery wait for that
524 */
525
526 #ifdef CONFIG_IP_MROUTE
527 if (ipprot->copy || raw_sk || mroute_pkt)
528 #else
529 if (ipprot->copy || raw_sk)
530 #endif
531 {
532 skb2 = skb_clone(skb, GFP_ATOMIC);
533 if(skb2==NULL)
534 continue;
535 }
536 else
537 {
538 skb2 = skb;
539 }
540 flag = 1;
541
542 /*
543 * Pass on the datagram to each protocol that wants it,
544 * based on the datagram protocol. We should really
545 * check the protocol handler's return values here...
546 */
547
548 ipprot->handler(skb2, dev, opt, daddr,
549 (ntohs(iph->tot_len) - (iph->ihl * 4)),
550 iph->saddr, 0, ipprot);
551 }
552
553 /*
554 * All protocols checked.
555 * If this packet was a broadcast, we may *not* reply to it, since that
556 * causes (proven, grin) ARP storms and a leakage of memory (i.e. all
557 * ICMP reply messages get queued up for transmission...)
558 */
559
560 #ifdef CONFIG_IP_MROUTE
561 /*
562 * Forward the last copy to the multicast router. If
563 * there is a pending raw deliery however make a copy
564 * and forward that.
565 */
566
567 if(mroute_pkt)
568 {
569 flag=1;
570 if(raw_sk==NULL)
571 ipmr_forward(skb, is_frag);
572 else
573 {
574 struct sk_buff *skb2=skb_clone(skb, GFP_ATOMIC);
575 if(skb2)
576 {
577 skb2->free=1;
578 ipmr_forward(skb2, is_frag);
579 }
580 }
581 }
582 #endif
583
584 if(raw_sk!=NULL) /* Shift to last raw user */
585 raw_rcv(raw_sk, skb, dev, iph->saddr, daddr);
586 else if (!flag) /* Free and report errors */
587 {
588 if (brd != IS_BROADCAST && brd!=IS_MULTICAST)
589 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0, dev);
590 kfree_skb(skb, FREE_WRITE);
591 }
592
593 return(0);
594 }
595
596 /*
597 * Do any unicast IP forwarding required.
598 */
599
600 /*
601 * Don't forward multicast or broadcast frames.
602 */
603
604 if(skb->pkt_type!=PACKET_HOST || brd==IS_BROADCAST)
605 {
606 kfree_skb(skb,FREE_WRITE);
607 return 0;
608 }
609
610 /*
611 * The packet is for another target. Forward the frame
612 */
613
614 #ifdef CONFIG_IP_FORWARD
615 if (opt && opt->is_strictroute)
616 {
617 icmp_send(skb, ICMP_PARAMETERPROB, 0, 16, skb->dev);
618 kfree_skb(skb, FREE_WRITE);
619 return -1;
620 }
621 if (ip_forward(skb, dev, is_frag, iph->daddr))
622 kfree_skb(skb, FREE_WRITE);
623 #else
624 /* printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n",
625 iph->saddr,iph->daddr);*/
626 ip_statistics.IpInAddrErrors++;
627 kfree_skb(skb, FREE_WRITE);
628 #endif
629 return(0);
630 }
631
632