1 /* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * The Internet Protocol (IP) module. 7 * 8 * Version: @(#)ip.c 1.0.16b 9/1/93 9 * 10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu> 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Donald Becker, <becker@super.org> 13 * Alan Cox, <Alan.Cox@linux.org> 14 * Richard Underwood 15 * Stefan Becker, <stefanb@yello.ping.de> 16 * Jorge Cwik, <jorge@laser.satlink.net> 17 * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 18 * 19 * 20 * Fixes: 21 * Alan Cox : Commented a couple of minor bits of surplus code 22 * Alan Cox : Undefining IP_FORWARD doesn't include the code 23 * (just stops a compiler warning). 24 * Alan Cox : Frames with >=MAX_ROUTE record routes, strict routes or loose routes 25 * are junked rather than corrupting things. 26 * Alan Cox : Frames to bad broadcast subnets are dumped 27 * We used to process them non broadcast and 28 * boy could that cause havoc. 29 * Alan Cox : ip_forward sets the free flag on the 30 * new frame it queues. Still crap because 31 * it copies the frame but at least it 32 * doesn't eat memory too. 33 * Alan Cox : Generic queue code and memory fixes. 34 * Fred Van Kempen : IP fragment support (borrowed from NET2E) 35 * Gerhard Koerting: Forward fragmented frames correctly. 36 * Gerhard Koerting: Fixes to my fix of the above 8-). 37 * Gerhard Koerting: IP interface addressing fix. 38 * Linus Torvalds : More robustness checks 39 * Alan Cox : Even more checks: Still not as robust as it ought to be 40 * Alan Cox : Save IP header pointer for later 41 * Alan Cox : ip option setting 42 * Alan Cox : Use ip_tos/ip_ttl settings 43 * Alan Cox : Fragmentation bogosity removed 44 * (Thanks to Mark.Bush@prg.ox.ac.uk) 45 * Dmitry Gorodchanin : Send of a raw packet crash fix. 46 * Alan Cox : Silly ip bug when an overlength 47 * fragment turns up. Now frees the 48 * queue. 49 * Linus Torvalds/ : Memory leakage on fragmentation 50 * Alan Cox : handling. 51 * Gerhard Koerting: Forwarding uses IP priority hints 52 * Teemu Rantanen : Fragment problems. 53 * Alan Cox : General cleanup, comments and reformat 54 * Alan Cox : SNMP statistics 55 * Alan Cox : BSD address rule semantics. Also see 56 * UDP as there is a nasty checksum issue 57 * if you do things the wrong way. 58 * Alan Cox : Always defrag, moved IP_FORWARD to the config.in file 59 * Alan Cox : IP options adjust sk->priority. 60 * Pedro Roque : Fix mtu/length error in ip_forward. 61 * Alan Cox : Avoid ip_chk_addr when possible. 62 * Richard Underwood : IP multicasting. 63 * Alan Cox : Cleaned up multicast handlers. 64 * Alan Cox : RAW sockets demultiplex in the BSD style. 65 * Gunther Mayer : Fix the SNMP reporting typo 66 * Alan Cox : Always in group 224.0.0.1 67 * Pauline Middelink : Fast ip_checksum update when forwarding 68 * Masquerading support. 69 * Alan Cox : Multicast loopback error for 224.0.0.1 70 * Alan Cox : IP_MULTICAST_LOOP option. 71 * Alan Cox : Use notifiers. 72 * Bjorn Ekwall : Removed ip_csum (from slhc.c too) 73 * Bjorn Ekwall : Moved ip_fast_csum to ip.h (inline!) 74 * Stefan Becker : Send out ICMP HOST REDIRECT 75 * Arnt Gulbrandsen : ip_build_xmit 76 * Alan Cox : Per socket routing cache 77 * Alan Cox : Fixed routing cache, added header cache. 78 * Alan Cox : Loopback didnt work right in original ip_build_xmit - fixed it. 79 * Alan Cox : Only send ICMP_REDIRECT if src/dest are the same net. 80 * Alan Cox : Incoming IP option handling. 81 * Alan Cox : Set saddr on raw output frames as per BSD. 82 * Alan Cox : Stopped broadcast source route explosions. 83 * Alan Cox : Can disable source routing 84 * Takeshi Sone : Masquerading didn't work. 85 * Dave Bonn,Alan Cox : Faster IP forwarding whenever possible. 86 * Alan Cox : Memory leaks, tramples, misc debugging. 87 * Alan Cox : Fixed multicast (by popular demand 8)) 88 * Alan Cox : Fixed forwarding (by even more popular demand 8)) 89 * Alan Cox : Fixed SNMP statistics [I think] 90 * Gerhard Koerting : IP fragmentation forwarding fix 91 * Alan Cox : Device lock against page fault. 92 * Alan Cox : IP_HDRINCL facility. 93 * Werner Almesberger : Zero fragment bug 94 * Alan Cox : RAW IP frame length bug 95 * Alan Cox : Outgoing firewall on build_xmit 96 * A.N.Kuznetsov : IP_OPTIONS support throughout the kernel 97 * Alan Cox : Multicast routing hooks 98 * 99 * 100 * 101 * To Fix: 102 * IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient 103 * and could be made very efficient with the addition of some virtual memory hacks to permit 104 * the allocation of a buffer that can then be 'grown' by twiddling page tables. 105 * Output fragmentation wants updating along with the buffer management to use a single 106 * interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet 107 * output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause 108 * fragmentation anyway. 109 * 110 * FIXME: copy frag 0 iph to qp->iph 111 * 112 * This program is free software; you can redistribute it and/or 113 * modify it under the terms of the GNU General Public License 114 * as published by the Free Software Foundation; either version 115 * 2 of the License, or (at your option) any later version. 116 */ 117 118 #include <asm/segment.h> 119 #include <asm/system.h> 120 #include <linux/types.h> 121 #include <linux/kernel.h> 122 #include <linux/sched.h> 123 #include <linux/mm.h> 124 #include <linux/string.h> 125 #include <linux/errno.h> 126 #include <linux/config.h> 127 128 #include <linux/socket.h> 129 #include <linux/sockios.h> 130 #include <linux/in.h> 131 #include <linux/inet.h> 132 #include <linux/netdevice.h> 133 #include <linux/etherdevice.h> 134 #include <linux/proc_fs.h> 135 #include <linux/stat.h> 136 137 #include <net/snmp.h> 138 #include <net/ip.h> 139 #include <net/protocol.h> 140 #include <net/route.h> 141 #include <net/tcp.h> 142 #include <net/udp.h> 143 #include <linux/skbuff.h> 144 #include <net/sock.h> 145 #include <net/arp.h> 146 #include <net/icmp.h> 147 #include <net/raw.h> 148 #include <net/checksum.h> 149 #include <linux/igmp.h> 150 #include <linux/ip_fw.h> 151 #include <linux/firewall.h> 152 #include <linux/mroute.h> 153 #include <net/netlink.h> 154 #ifdef CONFIG_NET_ALIAS 155 #include <linux/net_alias.h> 156 #endif 157 158 extern int last_retran; 159 extern void sort_send(struct sock *sk); 160 161 #define min(a,b) ((a)<(b)?(a):(b)) 162 163 /* 164 * SNMP management statistics 165 */ 166 167 #ifdef CONFIG_IP_FORWARD 168 struct ip_mib ip_statistics={1,64,}; /* Forwarding=Yes, Default TTL=64 */ 169 #else 170 struct ip_mib ip_statistics={2,64,}; /* Forwarding=No, Default TTL=64 */ 171 #endif 172 173 /* 174 * Handle the issuing of an ioctl() request 175 * for the ip device. This is scheduled to 176 * disappear 177 */ 178 179 int ip_ioctl(struct sock *sk, int cmd, unsigned long arg) /* */ 180 { 181 switch(cmd) 182 { 183 default: 184 return(-EINVAL); 185 } 186 } 187 188 189 190 /* 191 * This function receives all incoming IP datagrams. 192 * 193 * On entry skb->data points to the start of the IP header and 194 * the MAC header has been removed. 195 */ 196 197 int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt) /* */ 198 { 199 struct iphdr *iph = skb->h.iph; 200 struct sock *raw_sk=NULL; 201 unsigned char hash; 202 unsigned char flag = 0; 203 struct inet_protocol *ipprot; 204 int brd=IS_MYADDR; 205 struct options * opt = NULL; 206 int is_frag=0; 207 __u32 daddr; 208 209 #ifdef CONFIG_FIREWALL 210 int err; 211 #endif 212 #ifdef CONFIG_IP_MROUTE 213 int mroute_pkt=0; 214 #endif 215 216 #ifdef CONFIG_NET_IPV6 217 /* 218 * Intercept IPv6 frames. We dump ST-II and invalid types just below.. 219 */ 220 221 if(iph->version == 6) 222 return ipv6_rcv(skb,dev,pt); 223 #endif 224 225 ip_statistics.IpInReceives++; 226 227 /* 228 * Tag the ip header of this packet so we can find it 229 */ 230 231 skb->ip_hdr = iph; 232 233 /* 234 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum. 235 * RFC1122: 3.1.2.3 MUST discard a frame with invalid source address [NEEDS FIXING]. 236 * 237 * Is the datagram acceptable? 238 * 239 * 1. Length at least the size of an ip header 240 * 2. Version of 4 241 * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums] 242 * 4. Doesn't have a bogus length 243 * (5. We ought to check for IP multicast addresses and undefined types.. does this matter ?) 244 */ 245 246 if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0 247 || skb->len < ntohs(iph->tot_len)) 248 { 249 ip_statistics.IpInHdrErrors++; 250 kfree_skb(skb, FREE_WRITE); 251 return(0); 252 } 253 254 /* 255 * Our transport medium may have padded the buffer out. Now we know it 256 * is IP we can trim to the true length of the frame. 257 * Note this now means skb->len holds ntohs(iph->tot_len). 258 */ 259 260 skb_trim(skb,ntohs(iph->tot_len)); 261 262 if (iph->ihl > 5) 263 { 264 skb->ip_summed = 0; 265 if (ip_options_compile(NULL, skb)) 266 return(0); 267 opt = (struct options*)skb->proto_priv; 268 #ifdef CONFIG_IP_NOSR 269 if (opt->srr) 270 { 271 kfree_skb(skb, FREE_READ); 272 return -EINVAL; 273 } 274 #endif 275 } 276 277 /* 278 * Try to select closest <src,dst> alias device, if any. 279 * net_alias_dev_rcv_sel32 returns main device if it 280 * fails to found other. 281 */ 282 283 #ifdef CONFIG_NET_ALIAS 284 if (iph->daddr != skb->dev->pa_addr && net_alias_has(skb->dev)) 285 skb->dev = dev = net_alias_dev_rcv_sel32(skb->dev, AF_INET, iph->saddr, iph->daddr); 286 #endif 287 288 /* 289 * See if the firewall wants to dispose of the packet. 290 */ 291 292 #ifdef CONFIG_FIREWALL 293 294 if ((err=call_in_firewall(PF_INET, skb, iph))<FW_ACCEPT) 295 { 296 if(err==FW_REJECT) 297 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0, dev); 298 kfree_skb(skb, FREE_WRITE); 299 return 0; 300 } 301 302 #endif 303 304 /* 305 * Remember if the frame is fragmented. 306 */ 307 308 if(iph->frag_off) 309 { 310 if (iph->frag_off & htons(IP_MF)) 311 is_frag|=1; 312 /* 313 * Last fragment ? 314 */ 315 316 if (iph->frag_off & htons(IP_OFFSET)) 317 is_frag|=2; 318 } 319 320 /* 321 * Do any IP forwarding required. chk_addr() is expensive -- avoid it someday. 322 * 323 * This is inefficient. While finding out if it is for us we could also compute 324 * the routing table entry. This is where the great unified cache theory comes 325 * in as and when someone implements it 326 * 327 * For most hosts over 99% of packets match the first conditional 328 * and don't go via ip_chk_addr. Note: brd is set to IS_MYADDR at 329 * function entry. 330 */ 331 daddr = iph->daddr; 332 if ( iph->daddr == skb->dev->pa_addr || (brd = ip_chk_addr(iph->daddr)) != 0) 333 { 334 if (opt && opt->srr) 335 { 336 int srrspace, srrptr; 337 __u32 nexthop; 338 unsigned char * optptr = ((unsigned char *)iph) + opt->srr; 339 340 if (brd != IS_MYADDR || skb->pkt_type != PACKET_HOST) 341 { 342 kfree_skb(skb, FREE_WRITE); 343 return 0; 344 } 345 346 for ( srrptr=optptr[2], srrspace = optptr[1]; 347 srrptr <= srrspace; 348 srrptr += 4 349 ) 350 { 351 int brd2; 352 if (srrptr + 3 > srrspace) 353 { 354 icmp_send(skb, ICMP_PARAMETERPROB, 0, opt->srr+2, 355 skb->dev); 356 kfree_skb(skb, FREE_WRITE); 357 return 0; 358 } 359 memcpy(&nexthop, &optptr[srrptr-1], 4); 360 if ((brd2 = ip_chk_addr(nexthop)) == 0) 361 break; 362 if (brd2 != IS_MYADDR) 363 { 364 365 /* 366 * ANK: should we implement weak tunneling of multicasts? 367 * Are they obsolete? DVMRP specs (RFC-1075) is old enough... 368 * [They are obsolete] 369 */ 370 kfree_skb(skb, FREE_WRITE); 371 return -EINVAL; 372 } 373 memcpy(&daddr, &optptr[srrptr-1], 4); 374 } 375 if (srrptr <= srrspace) 376 { 377 opt->srr_is_hit = 1; 378 opt->is_changed = 1; 379 #ifdef CONFIG_IP_FORWARD 380 if (ip_forward(skb, dev, is_frag, nexthop)) 381 kfree_skb(skb, FREE_WRITE); 382 #else 383 ip_statistics.IpInAddrErrors++; 384 kfree_skb(skb, FREE_WRITE); 385 #endif 386 return 0; 387 } 388 } 389 390 #ifdef CONFIG_IP_MULTICAST 391 if(!(dev->flags&IFF_ALLMULTI) && brd==IS_MULTICAST && iph->daddr!=IGMP_ALL_HOSTS && !(dev->flags&IFF_LOOPBACK)) 392 { 393 /* 394 * Check it is for one of our groups 395 */ 396 struct ip_mc_list *ip_mc=dev->ip_mc_list; 397 do 398 { 399 if(ip_mc==NULL) 400 { 401 kfree_skb(skb, FREE_WRITE); 402 return 0; 403 } 404 if(ip_mc->multiaddr==iph->daddr) 405 break; 406 ip_mc=ip_mc->next; 407 } 408 while(1); 409 } 410 #endif 411 412 #ifdef CONFIG_IP_MASQUERADE 413 /* 414 * Do we need to de-masquerade this fragment? 415 */ 416 if (ip_fw_demasquerade(skb)) 417 { 418 struct iphdr *iph=skb->h.iph; 419 if (ip_forward(skb, dev, is_frag|4, iph->daddr)) 420 kfree_skb(skb, FREE_WRITE); 421 return(0); 422 } 423 #endif 424 425 /* 426 * Account for the packet 427 */ 428 429 #ifdef CONFIG_IP_ACCT 430 ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1); 431 #endif 432 433 /* 434 * Reassemble IP fragments. 435 */ 436 437 if(is_frag) 438 { 439 /* Defragment. Obtain the complete packet if there is one */ 440 skb=ip_defrag(iph,skb,dev); 441 if(skb==NULL) 442 return 0; 443 skb->dev = dev; 444 iph=skb->h.iph; 445 } 446 447 /* 448 * Point into the IP datagram, just past the header. 449 */ 450 451 skb->ip_hdr = iph; 452 skb->h.raw += iph->ihl*4; 453 454 #ifdef CONFIG_IP_MROUTE 455 /* 456 * Check the state on multicast routing (multicast and not 224.0.0.z) 457 */ 458 459 if(brd==IS_MULTICAST && (iph->daddr&htonl(0xFFFFFF00))!=htonl(0xE0000000)) 460 mroute_pkt=1; 461 462 #endif 463 /* 464 * Deliver to raw sockets. This is fun as to avoid copies we want to make no surplus copies. 465 * 466 * RFC 1122: SHOULD pass TOS value up to the transport layer. 467 */ 468 469 hash = iph->protocol & (SOCK_ARRAY_SIZE-1); 470 471 /* 472 * If there maybe a raw socket we must check - if not we don't care less 473 */ 474 475 if((raw_sk=raw_prot.sock_array[hash])!=NULL) 476 { 477 struct sock *sknext=NULL; 478 struct sk_buff *skb1; 479 raw_sk=get_sock_raw(raw_sk, iph->protocol, iph->saddr, iph->daddr); 480 if(raw_sk) /* Any raw sockets */ 481 { 482 do 483 { 484 /* Find the next */ 485 sknext=get_sock_raw(raw_sk->next, iph->protocol, iph->saddr, iph->daddr); 486 if(sknext) 487 skb1=skb_clone(skb, GFP_ATOMIC); 488 else 489 break; /* One pending raw socket left */ 490 if(skb1) 491 raw_rcv(raw_sk, skb1, dev, iph->saddr,daddr); 492 raw_sk=sknext; 493 } 494 while(raw_sk!=NULL); 495 496 /* 497 * Here either raw_sk is the last raw socket, or NULL if none 498 */ 499 500 /* 501 * We deliver to the last raw socket AFTER the protocol checks as it avoids a surplus copy 502 */ 503 } 504 } 505 506 /* 507 * skb->h.raw now points at the protocol beyond the IP header. 508 */ 509 510 hash = iph->protocol & (MAX_INET_PROTOS -1); 511 for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next) 512 { 513 struct sk_buff *skb2; 514 515 if (ipprot->protocol != iph->protocol) 516 continue; 517 /* 518 * See if we need to make a copy of it. This will 519 * only be set if more than one protocol wants it. 520 * and then not for the last one. If there is a pending 521 * raw delivery wait for that 522 */ 523 524 #ifdef CONFIG_IP_MROUTE 525 if (ipprot->copy || raw_sk || mroute_pkt) 526 #else 527 if (ipprot->copy || raw_sk) 528 #endif 529 { 530 skb2 = skb_clone(skb, GFP_ATOMIC); 531 if(skb2==NULL) 532 continue; 533 } 534 else 535 { 536 skb2 = skb; 537 } 538 flag = 1; 539 540 /* 541 * Pass on the datagram to each protocol that wants it, 542 * based on the datagram protocol. We should really 543 * check the protocol handler's return values here... 544 */ 545 546 ipprot->handler(skb2, dev, opt, daddr, 547 (ntohs(iph->tot_len) - (iph->ihl * 4)), 548 iph->saddr, 0, ipprot); 549 } 550 551 /* 552 * All protocols checked. 553 * If this packet was a broadcast, we may *not* reply to it, since that 554 * causes (proven, grin) ARP storms and a leakage of memory (i.e. all 555 * ICMP reply messages get queued up for transmission...) 556 */ 557 558 #ifdef CONFIG_IP_MROUTE 559 /* 560 * Forward the last copy to the multicast router. If 561 * there is a pending raw deliery however make a copy 562 * and forward that. 563 */ 564 565 if(mroute_pkt) 566 { 567 flag=1; 568 if(raw_sk==NULL) 569 ipmr_forward(skb, is_frag); 570 else 571 { 572 struct sk_buff *skb2=skb_clone(skb, GFP_ATOMIC); 573 if(skb2) 574 { 575 skb2->free=1; 576 ipmr_forward(skb2, is_frag); 577 } 578 } 579 } 580 #endif 581 582 if(raw_sk!=NULL) /* Shift to last raw user */ 583 raw_rcv(raw_sk, skb, dev, iph->saddr, daddr); 584 else if (!flag) /* Free and report errors */ 585 { 586 if (brd != IS_BROADCAST && brd!=IS_MULTICAST) 587 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0, dev); 588 kfree_skb(skb, FREE_WRITE); 589 } 590 591 return(0); 592 } 593 594 /* 595 * Do any unicast IP forwarding required. 596 */ 597 598 /* 599 * Don't forward multicast or broadcast frames. 600 */ 601 602 if(skb->pkt_type!=PACKET_HOST || brd==IS_BROADCAST) 603 { 604 kfree_skb(skb,FREE_WRITE); 605 return 0; 606 } 607 608 /* 609 * The packet is for another target. Forward the frame 610 */ 611 612 #ifdef CONFIG_IP_FORWARD 613 if (opt && opt->is_strictroute) 614 { 615 icmp_send(skb, ICMP_PARAMETERPROB, 0, 16, skb->dev); 616 kfree_skb(skb, FREE_WRITE); 617 return -1; 618 } 619 if (ip_forward(skb, dev, is_frag, iph->daddr)) 620 kfree_skb(skb, FREE_WRITE); 621 #else 622 /* printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n", 623 iph->saddr,iph->daddr);*/ 624 ip_statistics.IpInAddrErrors++; 625 kfree_skb(skb, FREE_WRITE); 626 #endif 627 return(0); 628 } 629 630