1 /* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * The Internet Protocol (IP) module. 7 * 8 * Version: @(#)ip.c 1.0.16b 9/1/93 9 * 10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu> 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Donald Becker, <becker@super.org> 13 * Alan Cox, <Alan.Cox@linux.org> 14 * Richard Underwood 15 * Stefan Becker, <stefanb@yello.ping.de> 16 * Jorge Cwik, <jorge@laser.satlink.net> 17 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 18 * 19 * 20 * Fixes: 21 * Alan Cox : Commented a couple of minor bits of surplus code 22 * Alan Cox : Undefining IP_FORWARD doesn't include the code 23 * (just stops a compiler warning). 24 * Alan Cox : Frames with >=MAX_ROUTE record routes, strict routes or loose routes 25 * are junked rather than corrupting things. 26 * Alan Cox : Frames to bad broadcast subnets are dumped 27 * We used to process them non broadcast and 28 * boy could that cause havoc. 29 * Alan Cox : ip_forward sets the free flag on the 30 * new frame it queues. Still crap because 31 * it copies the frame but at least it 32 * doesn't eat memory too. 33 * Alan Cox : Generic queue code and memory fixes. 34 * Fred Van Kempen : IP fragment support (borrowed from NET2E) 35 * Gerhard Koerting: Forward fragmented frames correctly. 36 * Gerhard Koerting: Fixes to my fix of the above 8-). 37 * Gerhard Koerting: IP interface addressing fix. 38 * Linus Torvalds : More robustness checks 39 * Alan Cox : Even more checks: Still not as robust as it ought to be 40 * Alan Cox : Save IP header pointer for later 41 * Alan Cox : ip option setting 42 * Alan Cox : Use ip_tos/ip_ttl settings 43 * Alan Cox : Fragmentation bogosity removed 44 * (Thanks to Mark.Bush@prg.ox.ac.uk) 45 * Dmitry Gorodchanin : Send of a raw packet crash fix. 46 * Alan Cox : Silly ip bug when an overlength 47 * fragment turns up. Now frees the 48 * queue. 49 * Linus Torvalds/ : Memory leakage on fragmentation 50 * Alan Cox : handling. 51 * Gerhard Koerting: Forwarding uses IP priority hints 52 * Teemu Rantanen : Fragment problems. 53 * Alan Cox : General cleanup, comments and reformat 54 * Alan Cox : SNMP statistics 55 * Alan Cox : BSD address rule semantics. Also see 56 * UDP as there is a nasty checksum issue 57 * if you do things the wrong way. 58 * Alan Cox : Always defrag, moved IP_FORWARD to the config.in file 59 * Alan Cox : IP options adjust sk->priority. 60 * Pedro Roque : Fix mtu/length error in ip_forward. 61 * Alan Cox : Avoid ip_chk_addr when possible. 62 * Richard Underwood : IP multicasting. 63 * Alan Cox : Cleaned up multicast handlers. 64 * Alan Cox : RAW sockets demultiplex in the BSD style. 65 * Gunther Mayer : Fix the SNMP reporting typo 66 * Alan Cox : Always in group 224.0.0.1 67 * Pauline Middelink : Fast ip_checksum update when forwarding 68 * Masquerading support. 69 * Alan Cox : Multicast loopback error for 224.0.0.1 70 * Alan Cox : IP_MULTICAST_LOOP option. 71 * Alan Cox : Use notifiers. 72 * Bjorn Ekwall : Removed ip_csum (from slhc.c too) 73 * Bjorn Ekwall : Moved ip_fast_csum to ip.h (inline!) 74 * Stefan Becker : Send out ICMP HOST REDIRECT 75 * Arnt Gulbrandsen : ip_build_xmit 76 * Alan Cox : Per socket routing cache 77 * Alan Cox : Fixed routing cache, added header cache. 78 * Alan Cox : Loopback didnt work right in original ip_build_xmit - fixed it. 79 * Alan Cox : Only send ICMP_REDIRECT if src/dest are the same net. 80 * Alan Cox : Incoming IP option handling. 81 * Alan Cox : Set saddr on raw output frames as per BSD. 82 * Alan Cox : Stopped broadcast source route explosions. 83 * Alan Cox : Can disable source routing 84 * Takeshi Sone : Masquerading didn't work. 85 * Dave Bonn,Alan Cox : Faster IP forwarding whenever possible. 86 * Alan Cox : Memory leaks, tramples, misc debugging. 87 * Alan Cox : Fixed multicast (by popular demand 8)) 88 * Alan Cox : Fixed forwarding (by even more popular demand 8)) 89 * Alan Cox : Fixed SNMP statistics [I think] 90 * Gerhard Koerting : IP fragmentation forwarding fix 91 * Alan Cox : Device lock against page fault. 92 * Alan Cox : IP_HDRINCL facility. 93 * Werner Almesberger : Zero fragment bug 94 * Alan Cox : RAW IP frame length bug 95 * Alan Cox : Outgoing firewall on build_xmit 96 * A.N.Kuznetsov : IP_OPTIONS support throughout the kernel 97 * Alan Cox : Multicast routing hooks 98 * 99 * 100 * 101 * To Fix: 102 * IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient 103 * and could be made very efficient with the addition of some virtual memory hacks to permit 104 * the allocation of a buffer that can then be 'grown' by twiddling page tables. 105 * Output fragmentation wants updating along with the buffer management to use a single 106 * interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet 107 * output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause 108 * fragmentation anyway. 109 * 110 * FIXME: copy frag 0 iph to qp->iph 111 * 112 * This program is free software; you can redistribute it and/or 113 * modify it under the terms of the GNU General Public License 114 * as published by the Free Software Foundation; either version 115 * 2 of the License, or (at your option) any later version. 116 */ 117 118 #include <asm/segment.h> 119 #include <asm/system.h> 120 #include <linux/types.h> 121 #include <linux/kernel.h> 122 #include <linux/sched.h> 123 #include <linux/mm.h> 124 #include <linux/string.h> 125 #include <linux/errno.h> 126 #include <linux/config.h> 127 128 #include <linux/socket.h> 129 #include <linux/sockios.h> 130 #include <linux/in.h> 131 #include <linux/inet.h> 132 #include <linux/netdevice.h> 133 #include <linux/etherdevice.h> 134 #include <linux/proc_fs.h> 135 #include <linux/stat.h> 136 137 #include <net/snmp.h> 138 #include <net/ip.h> 139 #include <net/protocol.h> 140 #include <net/route.h> 141 #include <net/tcp.h> 142 #include <net/udp.h> 143 #include <linux/skbuff.h> 144 #include <net/sock.h> 145 #include <net/arp.h> 146 #include <net/icmp.h> 147 #include <net/raw.h> 148 #include <net/checksum.h> 149 #include <linux/igmp.h> 150 #include <linux/ip_fw.h> 151 #include <linux/firewall.h> 152 #include <linux/mroute.h> 153 #include <net/netlink.h> 154 #ifdef CONFIG_NET_ALIAS 155 #include <linux/net_alias.h> 156 #endif 157 158 extern int last_retran; 159 extern void sort_send(struct sock *sk); 160 161 #define min(a,b) ((a)<(b)?(a):(b)) 162 163 /* 164 * SNMP management statistics 165 */ 166 167 #ifdef CONFIG_IP_FORWARD 168 struct ip_mib ip_statistics={1,64,}; /* Forwarding=Yes, Default TTL=64 */ 169 #else 170 struct ip_mib ip_statistics={2,64,}; /* Forwarding=No, Default TTL=64 */ 171 #endif 172 173 /* 174 * Handle the issuing of an ioctl() request 175 * for the ip device. This is scheduled to 176 * disappear 177 */ 178 179 int ip_ioctl(struct sock *sk, int cmd, unsigned long arg) /* */ 180 { 181 switch(cmd) 182 { 183 default: 184 return(-EINVAL); 185 } 186 } 187 188 189 190 /* 191 * This function receives all incoming IP datagrams. 192 * 193 * On entry skb->data points to the start of the IP header and 194 * the MAC header has been removed. 195 */ 196 197 int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) /* */ 198 { 199 struct iphdr *iph = skb->h.iph; 200 struct sock *raw_sk=NULL; 201 unsigned char hash; 202 unsigned char flag = 0; 203 struct inet_protocol *ipprot; 204 int brd=IS_MYADDR; 205 struct options * opt = NULL; 206 int is_frag=0; 207 #ifdef CONFIG_FIREWALL 208 int err; 209 #endif 210 #ifdef CONFIG_IP_MROUTE 211 int mroute_pkt=0; 212 #endif 213 214 #ifdef CONFIG_NET_IPV6 215 /* 216 * Intercept IPv6 frames. We dump ST-II and invalid types just below.. 217 */ 218 219 if(iph->version == 6) 220 return ipv6_rcv(skb,dev,pt); 221 #endif 222 223 ip_statistics.IpInReceives++; 224 225 /* 226 * Tag the ip header of this packet so we can find it 227 */ 228 229 skb->ip_hdr = iph; 230 231 /* 232 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum. 233 * RFC1122: 3.1.2.3 MUST discard a frame with invalid source address [NEEDS FIXING]. 234 * 235 * Is the datagram acceptable? 236 * 237 * 1. Length at least the size of an ip header 238 * 2. Version of 4 239 * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums] 240 * 4. Doesn't have a bogus length 241 * (5. We ought to check for IP multicast addresses and undefined types.. does this matter ?) 242 */ 243 244 if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0 245 || skb->len < ntohs(iph->tot_len)) 246 { 247 ip_statistics.IpInHdrErrors++; 248 kfree_skb(skb, FREE_WRITE); 249 return(0); 250 } 251 252 /* 253 * Our transport medium may have padded the buffer out. Now we know it 254 * is IP we can trim to the true length of the frame. 255 * Note this now means skb->len holds ntohs(iph->tot_len). 256 */ 257 258 skb_trim(skb,ntohs(iph->tot_len)); 259 260 if (iph->ihl > 5) 261 { 262 skb->ip_summed = 0; 263 if (ip_options_compile(NULL, skb)) 264 return(0); 265 opt = (struct options*)skb->proto_priv; 266 #ifdef CONFIG_IP_NOSR 267 if (opt->srr) 268 { 269 kfree_skb(skb, FREE_READ); 270 return -EINVAL; 271 } 272 #endif 273 } 274 275 /* 276 * Try to select closest <src,dst> alias device, if any. 277 * net_alias_dev_rcv_sel32 returns main device if it 278 * fails to found other. 279 */ 280 281 #ifdef CONFIG_NET_ALIAS 282 if (iph->daddr != skb->dev->pa_addr && net_alias_has(skb->dev)) 283 skb->dev = dev = net_alias_dev_rcv_sel32(skb->dev, AF_INET, iph->saddr, iph->daddr); 284 #endif 285 286 /* 287 * See if the firewall wants to dispose of the packet. 288 */ 289 290 #ifdef CONFIG_FIREWALL 291 292 if ((err=call_in_firewall(PF_INET, skb, iph))<FW_ACCEPT) 293 { 294 if(err==FW_REJECT) 295 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0, dev); 296 kfree_skb(skb, FREE_WRITE); 297 return 0; 298 } 299 300 #endif 301 302 /* 303 * Remember if the frame is fragmented. 304 */ 305 306 if(iph->frag_off) 307 { 308 if (iph->frag_off & htons(IP_MF)) 309 is_frag|=1; 310 /* 311 * Last fragment ? 312 */ 313 314 if (iph->frag_off & htons(IP_OFFSET)) 315 is_frag|=2; 316 } 317 318 /* 319 * Do any IP forwarding required. chk_addr() is expensive -- avoid it someday. 320 * 321 * This is inefficient. While finding out if it is for us we could also compute 322 * the routing table entry. This is where the great unified cache theory comes 323 * in as and when someone implements it 324 * 325 * For most hosts over 99% of packets match the first conditional 326 * and don't go via ip_chk_addr. Note: brd is set to IS_MYADDR at 327 * function entry. 328 */ 329 330 if ( iph->daddr == skb->dev->pa_addr || (brd = ip_chk_addr(iph->daddr)) != 0) 331 { 332 if (opt && opt->srr) 333 { 334 int srrspace, srrptr; 335 __u32 nexthop; 336 unsigned char * optptr = ((unsigned char *)iph) + opt->srr; 337 338 if (brd != IS_MYADDR || skb->pkt_type != PACKET_HOST) 339 { 340 kfree_skb(skb, FREE_WRITE); 341 return 0; 342 } 343 344 for ( srrptr=optptr[2], srrspace = optptr[1]; 345 srrptr <= srrspace; 346 srrptr += 4 347 ) 348 { 349 int brd2; 350 if (srrptr + 3 > srrspace) 351 { 352 icmp_send(skb, ICMP_PARAMETERPROB, 0, opt->srr+2, 353 skb->dev); 354 kfree_skb(skb, FREE_WRITE); 355 return 0; 356 } 357 memcpy(&nexthop, &optptr[srrptr-1], 4); 358 if ((brd2 = ip_chk_addr(nexthop)) == 0) 359 break; 360 if (brd2 != IS_MYADDR) 361 { 362 363 /* 364 * ANK: should we implement weak tunneling of multicasts? 365 * Are they obsolete? DVMRP specs (RFC-1075) is old enough... 366 * [They are obsolete] 367 */ 368 kfree_skb(skb, FREE_WRITE); 369 return -EINVAL; 370 } 371 } 372 if (srrptr <= srrspace) 373 { 374 opt->srr_is_hit = 1; 375 opt->is_changed = 1; 376 #ifdef CONFIG_IP_FORWARD 377 if (ip_forward(skb, dev, is_frag, nexthop)) 378 kfree_skb(skb, FREE_WRITE); 379 #else 380 ip_statistics.IpInAddrErrors++; 381 kfree_skb(skb, FREE_WRITE); 382 #endif 383 return 0; 384 } 385 } 386 387 #ifdef CONFIG_IP_MULTICAST 388 if(!(dev->flags&IFF_ALLMULTI) && brd==IS_MULTICAST && iph->daddr!=IGMP_ALL_HOSTS && !(dev->flags&IFF_LOOPBACK)) 389 { 390 /* 391 * Check it is for one of our groups 392 */ 393 struct ip_mc_list *ip_mc=dev->ip_mc_list; 394 do 395 { 396 if(ip_mc==NULL) 397 { 398 kfree_skb(skb, FREE_WRITE); 399 return 0; 400 } 401 if(ip_mc->multiaddr==iph->daddr) 402 break; 403 ip_mc=ip_mc->next; 404 } 405 while(1); 406 } 407 #endif 408 409 #ifdef CONFIG_IP_MASQUERADE 410 /* 411 * Do we need to de-masquerade this fragment? 412 */ 413 if (ip_fw_demasquerade(skb)) 414 { 415 struct iphdr *iph=skb->h.iph; 416 if (ip_forward(skb, dev, is_frag|4, iph->daddr)) 417 kfree_skb(skb, FREE_WRITE); 418 return(0); 419 } 420 #endif 421 422 /* 423 * Account for the packet 424 */ 425 426 #ifdef CONFIG_IP_ACCT 427 ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1); 428 #endif 429 430 /* 431 * Reassemble IP fragments. 432 */ 433 434 if(is_frag) 435 { 436 /* Defragment. Obtain the complete packet if there is one */ 437 skb=ip_defrag(iph,skb,dev); 438 if(skb==NULL) 439 return 0; 440 skb->dev = dev; 441 iph=skb->h.iph; 442 } 443 444 /* 445 * Point into the IP datagram, just past the header. 446 */ 447 448 skb->ip_hdr = iph; 449 skb->h.raw += iph->ihl*4; 450 451 #ifdef CONFIG_IP_MROUTE 452 /* 453 * Check the state on multicast routing (multicast and not 224.0.0.z) 454 */ 455 456 if(brd==IS_MULTICAST && (iph->daddr&htonl(0xFFFFFF00))!=htonl(0xE0000000)) 457 mroute_pkt=1; 458 459 #endif 460 /* 461 * Deliver to raw sockets. This is fun as to avoid copies we want to make no surplus copies. 462 * 463 * RFC 1122: SHOULD pass TOS value up to the transport layer. 464 */ 465 466 hash = iph->protocol & (SOCK_ARRAY_SIZE-1); 467 468 /* 469 * If there maybe a raw socket we must check - if not we don't care less 470 */ 471 472 if((raw_sk=raw_prot.sock_array[hash])!=NULL) 473 { 474 struct sock *sknext=NULL; 475 struct sk_buff *skb1; 476 raw_sk=get_sock_raw(raw_sk, iph->protocol, iph->saddr, iph->daddr); 477 if(raw_sk) /* Any raw sockets */ 478 { 479 do 480 { 481 /* Find the next */ 482 sknext=get_sock_raw(raw_sk->next, iph->protocol, iph->saddr, iph->daddr); 483 if(sknext) 484 skb1=skb_clone(skb, GFP_ATOMIC); 485 else 486 break; /* One pending raw socket left */ 487 if(skb1) 488 raw_rcv(raw_sk, skb1, dev, iph->saddr,iph->daddr); 489 raw_sk=sknext; 490 } 491 while(raw_sk!=NULL); 492 493 /* 494 * Here either raw_sk is the last raw socket, or NULL if none 495 */ 496 497 /* 498 * We deliver to the last raw socket AFTER the protocol checks as it avoids a surplus copy 499 */ 500 } 501 } 502 503 /* 504 * skb->h.raw now points at the protocol beyond the IP header. 505 */ 506 507 hash = iph->protocol & (MAX_INET_PROTOS -1); 508 for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next) 509 { 510 struct sk_buff *skb2; 511 512 if (ipprot->protocol != iph->protocol) 513 continue; 514 /* 515 * See if we need to make a copy of it. This will 516 * only be set if more than one protocol wants it. 517 * and then not for the last one. If there is a pending 518 * raw delivery wait for that 519 */ 520 521 #ifdef CONFIG_IP_MROUTE 522 if (ipprot->copy || raw_sk || mroute_pkt) 523 #else 524 if (ipprot->copy || raw_sk) 525 #endif 526 { 527 skb2 = skb_clone(skb, GFP_ATOMIC); 528 if(skb2==NULL) 529 continue; 530 } 531 else 532 { 533 skb2 = skb; 534 } 535 flag = 1; 536 537 /* 538 * Pass on the datagram to each protocol that wants it, 539 * based on the datagram protocol. We should really 540 * check the protocol handler's return values here... 541 */ 542 543 ipprot->handler(skb2, dev, opt, iph->daddr, 544 (ntohs(iph->tot_len) - (iph->ihl * 4)), 545 iph->saddr, 0, ipprot); 546 } 547 548 /* 549 * All protocols checked. 550 * If this packet was a broadcast, we may *not* reply to it, since that 551 * causes (proven, grin) ARP storms and a leakage of memory (i.e. all 552 * ICMP reply messages get queued up for transmission...) 553 */ 554 555 #ifdef CONFIG_IP_MROUTE 556 /* 557 * Forward the last copy to the multicast router. If 558 * there is a pending raw deliery however make a copy 559 * and forward that. 560 */ 561 562 if(mroute_pkt) 563 { 564 flag=1; 565 if(raw_sk==NULL) 566 ipmr_forward(skb, is_frag); 567 else 568 { 569 struct sk_buff *skb2=skb_clone(skb, GFP_ATOMIC); 570 if(skb2) 571 { 572 skb2->free=1; 573 ipmr_forward(skb2, is_frag); 574 } 575 } 576 } 577 #endif 578 579 if(raw_sk!=NULL) /* Shift to last raw user */ 580 raw_rcv(raw_sk, skb, dev, iph->saddr, iph->daddr); 581 else if (!flag) /* Free and report errors */ 582 { 583 if (brd != IS_BROADCAST && brd!=IS_MULTICAST) 584 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0, dev); 585 kfree_skb(skb, FREE_WRITE); 586 } 587 588 return(0); 589 } 590 591 /* 592 * Do any unicast IP forwarding required. 593 */ 594 595 /* 596 * Don't forward multicast or broadcast frames. 597 */ 598 599 if(skb->pkt_type!=PACKET_HOST || brd==IS_BROADCAST) 600 { 601 kfree_skb(skb,FREE_WRITE); 602 return 0; 603 } 604 605 /* 606 * The packet is for another target. Forward the frame 607 */ 608 609 #ifdef CONFIG_IP_FORWARD 610 if (opt && opt->is_strictroute) 611 { 612 icmp_send(skb, ICMP_PARAMETERPROB, 0, 16, skb->dev); 613 kfree_skb(skb, FREE_WRITE); 614 return -1; 615 } 616 if (ip_forward(skb, dev, is_frag, iph->daddr)) 617 kfree_skb(skb, FREE_WRITE); 618 #else 619 /* printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n", 620 iph->saddr,iph->daddr);*/ 621 ip_statistics.IpInAddrErrors++; 622 kfree_skb(skb, FREE_WRITE); 623 #endif 624 return(0); 625 } 626 627