1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * The Internet Protocol (IP) module.
7 *
8 * Version: @(#)ip.c 1.0.16b 9/1/93
9 *
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Donald Becker, <becker@super.org>
13 * Alan Cox, <gw4pts@gw4pts.ampr.org>
14 *
15 * Fixes:
16 * Alan Cox : Commented a couple of minor bits of surplus code
17 * Alan Cox : Undefining IP_FORWARD doesn't include the code
18 * (just stops a compiler warning).
19 * Alan Cox : Frames with >=MAX_ROUTE record routes, strict routes or loose routes
20 * are junked rather than corrupting things.
21 * Alan Cox : Frames to bad broadcast subnets are dumped
22 * We used to process them non broadcast and
23 * boy could that cause havoc.
24 * Alan Cox : ip_forward sets the free flag on the
25 * new frame it queues. Still crap because
26 * it copies the frame but at least it
27 * doesn't eat memory too.
28 * Alan Cox : Generic queue code and memory fixes.
29 * Fred Van Kempen : IP fragment support (borrowed from NET2E)
30 * Gerhard Koerting: Forward fragmented frames correctly.
31 * Gerhard Koerting: Fixes to my fix of the above 8-).
32 * Gerhard Koerting: IP interface addressing fix.
33 * Linus Torvalds : More robustness checks
34 * Alan Cox : Even more checks: Still not as robust as it ought to be
35 * Alan Cox : Save IP header pointer for later
36 * Alan Cox : ip option setting
37 * Alan Cox : Use ip_tos/ip_ttl settings
38 * Alan Cox : Fragmentation bogosity removed
39 * (Thanks to Mark.Bush@prg.ox.ac.uk)
40 * Dmitry Gorodchanin : Send of a raw packet crash fix.
41 * Alan Cox : Silly ip bug when an overlength
42 * fragment turns up. Now frees the
43 * queue.
44 * Linus Torvalds/ : Memory leakage on fragmentation
45 * Alan Cox : handling.
46 * Gerhard Koerting: Forwarding uses IP priority hints
47 * Teemu Rantanen : Fragment problems.
48 * Alan Cox : General cleanup, comments and reformat
49 * Alan Cox : SNMP statistics
50 * Alan Cox : BSD address rule semantics. Also see
51 * UDP as there is a nasty checksum issue
52 * if you do things the wrong way.
53 * Alan Cox : Always defrag, moved IP_FORWARD to the config.in file
54 * Alan Cox : IP options adjust sk->priority.
55 * Pedro Roque : Fix mtu/length error in ip_forward.
56 *
57 * To Fix:
58 * IP option processing is mostly not needed. ip_forward needs to know about routing rules
59 * and time stamp but that's about all. Use the route mtu field here too
60 *
61 * This program is free software; you can redistribute it and/or
62 * modify it under the terms of the GNU General Public License
63 * as published by the Free Software Foundation; either version
64 * 2 of the License, or (at your option) any later version.
65 */
66 #include <asm/segment.h>
67 #include <asm/system.h>
68 #include <linux/types.h>
69 #include <linux/kernel.h>
70 #include <linux/sched.h>
71 #include <linux/string.h>
72 #include <linux/errno.h>
73 #include <linux/socket.h>
74 #include <linux/sockios.h>
75 #include <linux/in.h>
76 #include <linux/inet.h>
77 #include <linux/netdevice.h>
78 #include <linux/etherdevice.h>
79 #include "snmp.h"
80 #include "ip.h"
81 #include "protocol.h"
82 #include "route.h"
83 #include "tcp.h"
84 #include <linux/skbuff.h>
85 #include "sock.h"
86 #include "arp.h"
87 #include "icmp.h"
88
89 #define CONFIG_IP_DEFRAG
90
91 extern int last_retran;
92 extern void sort_send(struct sock *sk);
93
94 #define min(a,b) ((a)<(b)?(a):(b))
95 #define LOOPBACK(x) (((x) & htonl(0xff000000)) == htonl(0x7f000000))
96
97 /*
98 * SNMP management statistics
99 */
100
101 struct ip_mib ip_statistics={1,64,}; /* Forwarding=Yes, Default TTL=64 */
102
103 /*
104 * Handle the issuing of an ioctl() request
105 * for the ip device. This is scheduled to
106 * disappear
107 */
108
109 int ip_ioctl(struct sock *sk, int cmd, unsigned long arg)
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
110 {
111 switch(cmd)
112 {
113 default:
114 return(-EINVAL);
115 }
116 }
117
118
119 /* these two routines will do routing. */
120
121 static void
122 strict_route(struct iphdr *iph, struct options *opt)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
123 {
124 }
125
126
127 static void
128 loose_route(struct iphdr *iph, struct options *opt)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
129 {
130 }
131
132
133
134
135 /* This routine will check to see if we have lost a gateway. */
136 void
137 ip_route_check(unsigned long daddr)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
138 {
139 }
140
141
142 #if 0
143 /* this routine puts the options at the end of an ip header. */
144 static int
145 build_options(struct iphdr *iph, struct options *opt)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
146 {
147 unsigned char *ptr;
148 /* currently we don't support any options. */
149 ptr = (unsigned char *)(iph+1);
150 *ptr = 0;
151 return (4);
152 }
153 #endif
154
155
156 /*
157 * Take an skb, and fill in the MAC header.
158 */
159
160 static int ip_send(struct sk_buff *skb, unsigned long daddr, int len, struct device *dev, unsigned long saddr)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
161 {
162 int mac = 0;
163
164 skb->dev = dev;
165 skb->arp = 1;
166 if (dev->hard_header)
167 {
168 /*
169 * Build a hardware header. Source address is our mac, destination unknown
170 * (rebuild header will sort this out)
171 */
172 mac = dev->hard_header(skb->data, dev, ETH_P_IP, NULL, NULL, len, skb);
173 if (mac < 0)
174 {
175 mac = -mac;
176 skb->arp = 0;
177 skb->raddr = daddr; /* next routing address */
178 }
179 }
180 return mac;
181 }
182
183 int ip_id_count = 0;
184
185 /*
186 * This routine builds the appropriate hardware/IP headers for
187 * the routine. It assumes that if *dev != NULL then the
188 * protocol knows what it's doing, otherwise it uses the
189 * routing/ARP tables to select a device struct.
190 */
191 int ip_build_header(struct sk_buff *skb, unsigned long saddr, unsigned long daddr,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
192 struct device **dev, int type, struct options *opt, int len, int tos, int ttl)
193 {
194 static struct options optmem;
195 struct iphdr *iph;
196 struct rtable *rt;
197 unsigned char *buff;
198 unsigned long raddr;
199 int tmp;
200 unsigned long src;
201
202 /*
203 * If there is no 'from' address as yet, then make it our loopback
204 */
205
206 if (saddr == 0)
207 saddr = ip_my_addr();
208
209 buff = skb->data;
210
211 /*
212 * See if we need to look up the device.
213 */
214
215 if (*dev == NULL)
216 {
217 if(skb->localroute)
218 rt = ip_rt_local(daddr, &optmem, &src);
219 else
220 rt = ip_rt_route(daddr, &optmem, &src);
221 if (rt == NULL)
222 {
223 ip_statistics.IpOutNoRoutes++;
224 return(-ENETUNREACH);
225 }
226
227 *dev = rt->rt_dev;
228 /*
229 * If the frame is from us and going off machine it MUST MUST MUST
230 * have the output device ip address and never the loopback
231 */
232 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
233 saddr = src;/*rt->rt_dev->pa_addr;*/
234 raddr = rt->rt_gateway;
235
236 opt = &optmem;
237 }
238 else
239 {
240 /*
241 * We still need the address of the first hop.
242 */
243 if(skb->localroute)
244 rt = ip_rt_local(daddr, &optmem, &src);
245 else
246 rt = ip_rt_route(daddr, &optmem, &src);
247 /*
248 * If the frame is from us and going off machine it MUST MUST MUST
249 * have the output device ip address and never the loopback
250 */
251 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
252 saddr = src;/*rt->rt_dev->pa_addr;*/
253
254 raddr = (rt == NULL) ? 0 : rt->rt_gateway;
255 }
256
257 /*
258 * No gateway so aim at the real destination
259 */
260 if (raddr == 0)
261 raddr = daddr;
262
263 /*
264 * Now build the MAC header.
265 */
266
267 tmp = ip_send(skb, raddr, len, *dev, saddr);
268 buff += tmp;
269 len -= tmp;
270
271 /*
272 * Book keeping
273 */
274
275 skb->dev = *dev;
276 skb->saddr = saddr;
277 if (skb->sk)
278 skb->sk->saddr = saddr;
279
280 /*
281 * Now build the IP header.
282 */
283
284 /*
285 * If we are using IPPROTO_RAW, then we don't need an IP header, since
286 * one is being supplied to us by the user
287 */
288
289 if(type == IPPROTO_RAW)
290 return (tmp);
291
292 iph = (struct iphdr *)buff;
293 iph->version = 4;
294 iph->tos = tos;
295 iph->frag_off = 0;
296 iph->ttl = ttl;
297 iph->daddr = daddr;
298 iph->saddr = saddr;
299 iph->protocol = type;
300 iph->ihl = 5;
301
302 /* Setup the IP options. */
303 #ifdef Not_Yet_Avail
304 build_options(iph, opt);
305 #endif
306
307 return(20 + tmp); /* IP header plus MAC header size */
308 }
309
310
311 static int
312 do_options(struct iphdr *iph, struct options *opt)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
313 {
314 unsigned char *buff;
315 int done = 0;
316 int i, len = sizeof(struct iphdr);
317
318 /* Zero out the options. */
319 opt->record_route.route_size = 0;
320 opt->loose_route.route_size = 0;
321 opt->strict_route.route_size = 0;
322 opt->tstamp.ptr = 0;
323 opt->security = 0;
324 opt->compartment = 0;
325 opt->handling = 0;
326 opt->stream = 0;
327 opt->tcc = 0;
328 return(0);
329
330 /* Advance the pointer to start at the options. */
331 buff = (unsigned char *)(iph + 1);
332
333 /* Now start the processing. */
334 while (!done && len < iph->ihl*4) switch(*buff) {
335 case IPOPT_END:
336 done = 1;
337 break;
338 case IPOPT_NOOP:
339 buff++;
340 len++;
341 break;
342 case IPOPT_SEC:
343 buff++;
344 if (*buff != 11) return(1);
345 buff++;
346 opt->security = ntohs(*(unsigned short *)buff);
347 buff += 2;
348 opt->compartment = ntohs(*(unsigned short *)buff);
349 buff += 2;
350 opt->handling = ntohs(*(unsigned short *)buff);
351 buff += 2;
352 opt->tcc = ((*buff) << 16) + ntohs(*(unsigned short *)(buff+1));
353 buff += 3;
354 len += 11;
355 break;
356 case IPOPT_LSRR:
357 buff++;
358 if ((*buff - 3)% 4 != 0) return(1);
359 len += *buff;
360 opt->loose_route.route_size = (*buff -3)/4;
361 buff++;
362 if (*buff % 4 != 0) return(1);
363 opt->loose_route.pointer = *buff/4 - 1;
364 buff++;
365 buff++;
366 for (i = 0; i < opt->loose_route.route_size; i++) {
367 if(i>=MAX_ROUTE)
368 return(1);
369 opt->loose_route.route[i] = *(unsigned long *)buff;
370 buff += 4;
371 }
372 break;
373 case IPOPT_SSRR:
374 buff++;
375 if ((*buff - 3)% 4 != 0) return(1);
376 len += *buff;
377 opt->strict_route.route_size = (*buff -3)/4;
378 buff++;
379 if (*buff % 4 != 0) return(1);
380 opt->strict_route.pointer = *buff/4 - 1;
381 buff++;
382 buff++;
383 for (i = 0; i < opt->strict_route.route_size; i++) {
384 if(i>=MAX_ROUTE)
385 return(1);
386 opt->strict_route.route[i] = *(unsigned long *)buff;
387 buff += 4;
388 }
389 break;
390 case IPOPT_RR:
391 buff++;
392 if ((*buff - 3)% 4 != 0) return(1);
393 len += *buff;
394 opt->record_route.route_size = (*buff -3)/4;
395 buff++;
396 if (*buff % 4 != 0) return(1);
397 opt->record_route.pointer = *buff/4 - 1;
398 buff++;
399 buff++;
400 for (i = 0; i < opt->record_route.route_size; i++) {
401 if(i>=MAX_ROUTE)
402 return 1;
403 opt->record_route.route[i] = *(unsigned long *)buff;
404 buff += 4;
405 }
406 break;
407 case IPOPT_SID:
408 len += 4;
409 buff +=2;
410 opt->stream = *(unsigned short *)buff;
411 buff += 2;
412 break;
413 case IPOPT_TIMESTAMP:
414 buff++;
415 len += *buff;
416 if (*buff % 4 != 0) return(1);
417 opt->tstamp.len = *buff / 4 - 1;
418 buff++;
419 if ((*buff - 1) % 4 != 0) return(1);
420 opt->tstamp.ptr = (*buff-1)/4;
421 buff++;
422 opt->tstamp.x.full_char = *buff;
423 buff++;
424 for (i = 0; i < opt->tstamp.len; i++) {
425 opt->tstamp.data[i] = *(unsigned long *)buff;
426 buff += 4;
427 }
428 break;
429 default:
430 return(1);
431 }
432
433 if (opt->record_route.route_size == 0) {
434 if (opt->strict_route.route_size != 0) {
435 memcpy(&(opt->record_route), &(opt->strict_route),
436 sizeof(opt->record_route));
437 } else if (opt->loose_route.route_size != 0) {
438 memcpy(&(opt->record_route), &(opt->loose_route),
439 sizeof(opt->record_route));
440 }
441 }
442
443 if (opt->strict_route.route_size != 0 &&
444 opt->strict_route.route_size != opt->strict_route.pointer) {
445 strict_route(iph, opt);
446 return(0);
447 }
448
449 if (opt->loose_route.route_size != 0 &&
450 opt->loose_route.route_size != opt->loose_route.pointer) {
451 loose_route(iph, opt);
452 return(0);
453 }
454
455 return(0);
456 }
457
458 /*
459 * This is a version of ip_compute_csum() optimized for IP headers, which
460 * always checksum on 4 octet boundaries.
461 */
462
463 static inline unsigned short ip_fast_csum(unsigned char * buff, int wlen)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
464 {
465 unsigned long sum = 0;
466
467 if (wlen)
468 {
469 unsigned long bogus;
470 __asm__("clc\n"
471 "1:\t"
472 "lodsl\n\t"
473 "adcl %3, %0\n\t"
474 "decl %2\n\t"
475 "jne 1b\n\t"
476 "adcl $0, %0\n\t"
477 "movl %0, %3\n\t"
478 "shrl $16, %3\n\t"
479 "addw %w3, %w0\n\t"
480 "adcw $0, %w0"
481 : "=r" (sum), "=S" (buff), "=r" (wlen), "=a" (bogus)
482 : "0" (sum), "1" (buff), "2" (wlen));
483 }
484 return (~sum) & 0xffff;
485 }
486
487 /*
488 * This routine does all the checksum computations that don't
489 * require anything special (like copying or special headers).
490 */
491
492 unsigned short ip_compute_csum(unsigned char * buff, int len)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
493 {
494 unsigned long sum = 0;
495
496 /* Do the first multiple of 4 bytes and convert to 16 bits. */
497 if (len > 3)
498 {
499 __asm__("clc\n"
500 "1:\t"
501 "lodsl\n\t"
502 "adcl %%eax, %%ebx\n\t"
503 "loop 1b\n\t"
504 "adcl $0, %%ebx\n\t"
505 "movl %%ebx, %%eax\n\t"
506 "shrl $16, %%eax\n\t"
507 "addw %%ax, %%bx\n\t"
508 "adcw $0, %%bx"
509 : "=b" (sum) , "=S" (buff)
510 : "0" (sum), "c" (len >> 2) ,"1" (buff)
511 : "ax", "cx", "si", "bx" );
512 }
513 if (len & 2)
514 {
515 __asm__("lodsw\n\t"
516 "addw %%ax, %%bx\n\t"
517 "adcw $0, %%bx"
518 : "=b" (sum), "=S" (buff)
519 : "0" (sum), "1" (buff)
520 : "bx", "ax", "si");
521 }
522 if (len & 1)
523 {
524 __asm__("lodsb\n\t"
525 "movb $0, %%ah\n\t"
526 "addw %%ax, %%bx\n\t"
527 "adcw $0, %%bx"
528 : "=b" (sum), "=S" (buff)
529 : "0" (sum), "1" (buff)
530 : "bx", "ax", "si");
531 }
532 sum =~sum;
533 return(sum & 0xffff);
534 }
535
536 /*
537 * Check the header of an incoming IP datagram. This version is still used in slhc.c.
538 */
539
540 int ip_csum(struct iphdr *iph)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
541 {
542 return ip_fast_csum((unsigned char *)iph, iph->ihl);
543 }
544
545 /*
546 * Generate a checksym for an outgoing IP datagram.
547 */
548
549 static void ip_send_check(struct iphdr *iph)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
550 {
551 iph->check = 0;
552 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
553 }
554
555 /************************ Fragment Handlers From NET2E not yet with tweaks to beat 4K **********************************/
556
557
558 /*
559 * This fragment handler is a bit of a heap. On the other hand it works quite
560 * happily and handles things quite well.
561 */
562
563 static struct ipq *ipqueue = NULL; /* IP fragment queue */
564
565 /*
566 * Create a new fragment entry.
567 */
568
569 static struct ipfrag *ip_frag_create(int offset, int end, struct sk_buff *skb, unsigned char *ptr)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
570 {
571 struct ipfrag *fp;
572
573 fp = (struct ipfrag *) kmalloc(sizeof(struct ipfrag), GFP_ATOMIC);
574 if (fp == NULL)
575 {
576 printk("IP: frag_create: no memory left !\n");
577 return(NULL);
578 }
579 memset(fp, 0, sizeof(struct ipfrag));
580
581 /* Fill in the structure. */
582 fp->offset = offset;
583 fp->end = end;
584 fp->len = end - offset;
585 fp->skb = skb;
586 fp->ptr = ptr;
587
588 return(fp);
589 }
590
591
592 /*
593 * Find the correct entry in the "incomplete datagrams" queue for
594 * this IP datagram, and return the queue entry address if found.
595 */
596
597 static struct ipq *ip_find(struct iphdr *iph)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
598 {
599 struct ipq *qp;
600 struct ipq *qplast;
601
602 cli();
603 qplast = NULL;
604 for(qp = ipqueue; qp != NULL; qplast = qp, qp = qp->next)
605 {
606 if (iph->id== qp->iph->id && iph->saddr == qp->iph->saddr &&
607 iph->daddr == qp->iph->daddr && iph->protocol == qp->iph->protocol)
608 {
609 del_timer(&qp->timer); /* So it doesn't vanish on us. The timer will be reset anyway */
610 sti();
611 return(qp);
612 }
613 }
614 sti();
615 return(NULL);
616 }
617
618
619 /*
620 * Remove an entry from the "incomplete datagrams" queue, either
621 * because we completed, reassembled and processed it, or because
622 * it timed out.
623 */
624
625 static void ip_free(struct ipq *qp)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
626 {
627 struct ipfrag *fp;
628 struct ipfrag *xp;
629
630 /*
631 * Stop the timer for this entry.
632 */
633
634 del_timer(&qp->timer);
635
636 /* Remove this entry from the "incomplete datagrams" queue. */
637 cli();
638 if (qp->prev == NULL)
639 {
640 ipqueue = qp->next;
641 if (ipqueue != NULL)
642 ipqueue->prev = NULL;
643 }
644 else
645 {
646 qp->prev->next = qp->next;
647 if (qp->next != NULL)
648 qp->next->prev = qp->prev;
649 }
650
651 /* Release all fragment data. */
652
653 fp = qp->fragments;
654 while (fp != NULL)
655 {
656 xp = fp->next;
657 IS_SKB(fp->skb);
658 kfree_skb(fp->skb,FREE_READ);
659 kfree_s(fp, sizeof(struct ipfrag));
660 fp = xp;
661 }
662
663 /* Release the MAC header. */
664 kfree_s(qp->mac, qp->maclen);
665
666 /* Release the IP header. */
667 kfree_s(qp->iph, qp->ihlen + 8);
668
669 /* Finally, release the queue descriptor itself. */
670 kfree_s(qp, sizeof(struct ipq));
671 sti();
672 }
673
674
675 /*
676 * Oops- a fragment queue timed out. Kill it and send an ICMP reply.
677 */
678
679 static void ip_expire(unsigned long arg)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
680 {
681 struct ipq *qp;
682
683 qp = (struct ipq *)arg;
684
685 /*
686 * Send an ICMP "Fragment Reassembly Timeout" message.
687 */
688
689 ip_statistics.IpReasmTimeout++;
690 ip_statistics.IpReasmFails++;
691 /* This if is always true... shrug */
692 if(qp->fragments!=NULL)
693 icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED,
694 ICMP_EXC_FRAGTIME, qp->dev);
695
696 /*
697 * Nuke the fragment queue.
698 */
699 ip_free(qp);
700 }
701
702
703 /*
704 * Add an entry to the 'ipq' queue for a newly received IP datagram.
705 * We will (hopefully :-) receive all other fragments of this datagram
706 * in time, so we just create a queue for this datagram, in which we
707 * will insert the received fragments at their respective positions.
708 */
709
710 static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph, struct device *dev)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
711 {
712 struct ipq *qp;
713 int maclen;
714 int ihlen;
715
716 qp = (struct ipq *) kmalloc(sizeof(struct ipq), GFP_ATOMIC);
717 if (qp == NULL)
718 {
719 printk("IP: create: no memory left !\n");
720 return(NULL);
721 skb->dev = qp->dev;
722 }
723 memset(qp, 0, sizeof(struct ipq));
724
725 /*
726 * Allocate memory for the MAC header.
727 *
728 * FIXME: We have a maximum MAC address size limit and define
729 * elsewhere. We should use it here and avoid the 3 kmalloc() calls
730 */
731
732 maclen = ((unsigned long) iph) - ((unsigned long) skb->data);
733 qp->mac = (unsigned char *) kmalloc(maclen, GFP_ATOMIC);
734 if (qp->mac == NULL)
735 {
736 printk("IP: create: no memory left !\n");
737 kfree_s(qp, sizeof(struct ipq));
738 return(NULL);
739 }
740
741 /*
742 * Allocate memory for the IP header (plus 8 octects for ICMP).
743 */
744
745 ihlen = (iph->ihl * sizeof(unsigned long));
746 qp->iph = (struct iphdr *) kmalloc(ihlen + 8, GFP_ATOMIC);
747 if (qp->iph == NULL)
748 {
749 printk("IP: create: no memory left !\n");
750 kfree_s(qp->mac, maclen);
751 kfree_s(qp, sizeof(struct ipq));
752 return(NULL);
753 }
754
755 /* Fill in the structure. */
756 memcpy(qp->mac, skb->data, maclen);
757 memcpy(qp->iph, iph, ihlen + 8);
758 qp->len = 0;
759 qp->ihlen = ihlen;
760 qp->maclen = maclen;
761 qp->fragments = NULL;
762 qp->dev = dev;
763
764 /* Start a timer for this entry. */
765 qp->timer.expires = IP_FRAG_TIME; /* about 30 seconds */
766 qp->timer.data = (unsigned long) qp; /* pointer to queue */
767 qp->timer.function = ip_expire; /* expire function */
768 add_timer(&qp->timer);
769
770 /* Add this entry to the queue. */
771 qp->prev = NULL;
772 cli();
773 qp->next = ipqueue;
774 if (qp->next != NULL)
775 qp->next->prev = qp;
776 ipqueue = qp;
777 sti();
778 return(qp);
779 }
780
781
782 /*
783 * See if a fragment queue is complete.
784 */
785
786 static int ip_done(struct ipq *qp)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
787 {
788 struct ipfrag *fp;
789 int offset;
790
791 /* Only possible if we received the final fragment. */
792 if (qp->len == 0)
793 return(0);
794
795 /* Check all fragment offsets to see if they connect. */
796 fp = qp->fragments;
797 offset = 0;
798 while (fp != NULL)
799 {
800 if (fp->offset > offset)
801 return(0); /* fragment(s) missing */
802 offset = fp->end;
803 fp = fp->next;
804 }
805
806 /* All fragments are present. */
807 return(1);
808 }
809
810
811 /*
812 * Build a new IP datagram from all its fragments.
813 *
814 * FIXME: We copy here because we lack an effective way of handling lists
815 * of bits on input. Until the new skb data handling is in I'm not going
816 * to touch this with a bargepole. This also causes a 4Kish limit on
817 * packet sizes.
818 */
819
820 static struct sk_buff *ip_glue(struct ipq *qp)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
821 {
822 struct sk_buff *skb;
823 struct iphdr *iph;
824 struct ipfrag *fp;
825 unsigned char *ptr;
826 int count, len;
827
828 /*
829 * Allocate a new buffer for the datagram.
830 */
831
832 len = qp->maclen + qp->ihlen + qp->len;
833
834 if ((skb = alloc_skb(len,GFP_ATOMIC)) == NULL)
835 {
836 ip_statistics.IpReasmFails++;
837 printk("IP: queue_glue: no memory for glueing queue 0x%X\n", (int) qp);
838 ip_free(qp);
839 return(NULL);
840 }
841
842 /* Fill in the basic details. */
843 skb->len = (len - qp->maclen);
844 skb->h.raw = skb->data;
845 skb->free = 1;
846
847 /* Copy the original MAC and IP headers into the new buffer. */
848 ptr = (unsigned char *) skb->h.raw;
849 memcpy(ptr, ((unsigned char *) qp->mac), qp->maclen);
850 ptr += qp->maclen;
851 memcpy(ptr, ((unsigned char *) qp->iph), qp->ihlen);
852 ptr += qp->ihlen;
853 skb->h.raw += qp->maclen;
854
855 count = 0;
856
857 /* Copy the data portions of all fragments into the new buffer. */
858 fp = qp->fragments;
859 while(fp != NULL)
860 {
861 if(count+fp->len>skb->len)
862 {
863 printk("Invalid fragment list: Fragment over size.\n");
864 ip_free(qp);
865 kfree_skb(skb,FREE_WRITE);
866 ip_statistics.IpReasmFails++;
867 return NULL;
868 }
869 memcpy((ptr + fp->offset), fp->ptr, fp->len);
870 count += fp->len;
871 fp = fp->next;
872 }
873
874 /* We glued together all fragments, so remove the queue entry. */
875 ip_free(qp);
876
877 /* Done with all fragments. Fixup the new IP header. */
878 iph = skb->h.iph;
879 iph->frag_off = 0;
880 iph->tot_len = htons((iph->ihl * sizeof(unsigned long)) + count);
881 skb->ip_hdr = iph;
882
883 ip_statistics.IpReasmOKs++;
884 return(skb);
885 }
886
887
888 /*
889 * Process an incoming IP datagram fragment.
890 */
891
892 static struct sk_buff *ip_defrag(struct iphdr *iph, struct sk_buff *skb, struct device *dev)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
893 {
894 struct ipfrag *prev, *next;
895 struct ipfrag *tfp;
896 struct ipq *qp;
897 struct sk_buff *skb2;
898 unsigned char *ptr;
899 int flags, offset;
900 int i, ihl, end;
901
902 ip_statistics.IpReasmReqds++;
903
904 /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */
905 qp = ip_find(iph);
906
907 /* Is this a non-fragmented datagram? */
908 offset = ntohs(iph->frag_off);
909 flags = offset & ~IP_OFFSET;
910 offset &= IP_OFFSET;
911 if (((flags & IP_MF) == 0) && (offset == 0))
912 {
913 if (qp != NULL)
914 ip_free(qp); /* Huh? How could this exist?? */
915 return(skb);
916 }
917
918 offset <<= 3; /* offset is in 8-byte chunks */
919
920 /*
921 * If the queue already existed, keep restarting its timer as long
922 * as we still are receiving fragments. Otherwise, create a fresh
923 * queue entry.
924 */
925
926 if (qp != NULL)
927 {
928 del_timer(&qp->timer);
929 qp->timer.expires = IP_FRAG_TIME; /* about 30 seconds */
930 qp->timer.data = (unsigned long) qp; /* pointer to queue */
931 qp->timer.function = ip_expire; /* expire function */
932 add_timer(&qp->timer);
933 }
934 else
935 {
936 /*
937 * If we failed to create it, then discard the frame
938 */
939 if ((qp = ip_create(skb, iph, dev)) == NULL)
940 {
941 skb->sk = NULL;
942 kfree_skb(skb, FREE_READ);
943 ip_statistics.IpReasmFails++;
944 return NULL;
945 }
946 }
947
948 /*
949 * Determine the position of this fragment.
950 */
951
952 ihl = (iph->ihl * sizeof(unsigned long));
953 end = offset + ntohs(iph->tot_len) - ihl;
954
955 /*
956 * Point into the IP datagram 'data' part.
957 */
958
959 ptr = skb->data + dev->hard_header_len + ihl;
960
961 /*
962 * Is this the final fragment?
963 */
964
965 if ((flags & IP_MF) == 0)
966 qp->len = end;
967
968 /*
969 * Find out which fragments are in front and at the back of us
970 * in the chain of fragments so far. We must know where to put
971 * this fragment, right?
972 */
973
974 prev = NULL;
975 for(next = qp->fragments; next != NULL; next = next->next)
976 {
977 if (next->offset > offset)
978 break; /* bingo! */
979 prev = next;
980 }
981
982 /*
983 * We found where to put this one.
984 * Check for overlap with preceeding fragment, and, if needed,
985 * align things so that any overlaps are eliminated.
986 */
987 if (prev != NULL && offset < prev->end)
988 {
989 i = prev->end - offset;
990 offset += i; /* ptr into datagram */
991 ptr += i; /* ptr into fragment data */
992 }
993
994 /*
995 * Look for overlap with succeeding segments.
996 * If we can merge fragments, do it.
997 */
998
999 for(; next != NULL; next = tfp)
1000 {
1001 tfp = next->next;
1002 if (next->offset >= end)
1003 break; /* no overlaps at all */
1004
1005 i = end - next->offset; /* overlap is 'i' bytes */
1006 next->len -= i; /* so reduce size of */
1007 next->offset += i; /* next fragment */
1008 next->ptr += i;
1009
1010 /*
1011 * If we get a frag size of <= 0, remove it and the packet
1012 * that it goes with.
1013 */
1014 if (next->len <= 0)
1015 {
1016 if (next->prev != NULL)
1017 next->prev->next = next->next;
1018 else
1019 qp->fragments = next->next;
1020
1021 if (tfp->next != NULL)
1022 next->next->prev = next->prev;
1023
1024 kfree_skb(next->skb,FREE_READ);
1025 kfree_s(next, sizeof(struct ipfrag));
1026 }
1027 }
1028
1029 /*
1030 * Insert this fragment in the chain of fragments.
1031 */
1032
1033 tfp = NULL;
1034 tfp = ip_frag_create(offset, end, skb, ptr);
1035
1036 /*
1037 * No memory to save the fragment - so throw the lot
1038 */
1039
1040 if (!tfp)
1041 {
1042 skb->sk = NULL;
1043 kfree_skb(skb, FREE_READ);
1044 return NULL;
1045 }
1046 tfp->prev = prev;
1047 tfp->next = next;
1048 if (prev != NULL)
1049 prev->next = tfp;
1050 else
1051 qp->fragments = tfp;
1052
1053 if (next != NULL)
1054 next->prev = tfp;
1055
1056 /*
1057 * OK, so we inserted this new fragment into the chain.
1058 * Check if we now have a full IP datagram which we can
1059 * bump up to the IP layer...
1060 */
1061
1062 if (ip_done(qp))
1063 {
1064 skb2 = ip_glue(qp); /* glue together the fragments */
1065 return(skb2);
1066 }
1067 return(NULL);
1068 }
1069
1070
1071 /*
1072 * This IP datagram is too large to be sent in one piece. Break it up into
1073 * smaller pieces (each of size equal to the MAC header plus IP header plus
1074 * a block of the data of the original IP data part) that will yet fit in a
1075 * single device frame, and queue such a frame for sending by calling the
1076 * ip_queue_xmit(). Note that this is recursion, and bad things will happen
1077 * if this function causes a loop...
1078 *
1079 * Yes this is inefficient, feel free to submit a quicker one.
1080 *
1081 * **Protocol Violation**
1082 * We copy all the options to each fragment. !FIXME!
1083 */
1084 void ip_fragment(struct sock *sk, struct sk_buff *skb, struct device *dev, int is_frag)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1085 {
1086 struct iphdr *iph;
1087 unsigned char *raw;
1088 unsigned char *ptr;
1089 struct sk_buff *skb2;
1090 int left, mtu, hlen, len;
1091 int offset;
1092 unsigned long flags;
1093
1094 /*
1095 * Point into the IP datagram header.
1096 */
1097
1098 raw = skb->data;
1099 iph = (struct iphdr *) (raw + dev->hard_header_len);
1100
1101 skb->ip_hdr = iph;
1102
1103 /*
1104 * Setup starting values.
1105 */
1106
1107 hlen = (iph->ihl * sizeof(unsigned long));
1108 left = ntohs(iph->tot_len) - hlen; /* Space per frame */
1109 hlen += dev->hard_header_len; /* Total header size */
1110 mtu = (dev->mtu - hlen); /* Size of data space */
1111 ptr = (raw + hlen); /* Where to start from */
1112
1113 /*
1114 * Check for any "DF" flag. [DF means do not fragment]
1115 */
1116
1117 if (ntohs(iph->frag_off) & IP_DF)
1118 {
1119 ip_statistics.IpFragFails++;
1120 icmp_send(skb,ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, dev);
1121 return;
1122 }
1123
1124 /*
1125 * The protocol doesn't seem to say what to do in the case that the
1126 * frame + options doesn't fit the mtu. As it used to fall down dead
1127 * in this case we were fortunate it didn't happen
1128 */
1129
1130 if(mtu<8)
1131 {
1132 /* It's wrong but its better than nothing */
1133 icmp_send(skb,ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED,dev);
1134 ip_statistics.IpFragFails++;
1135 return;
1136 }
1137
1138 /*
1139 * Fragment the datagram.
1140 */
1141
1142 /*
1143 * The initial offset is 0 for a complete frame. When
1144 * fragmenting fragments its wherever this one starts.
1145 */
1146
1147 if (is_frag & 2)
1148 offset = (ntohs(iph->frag_off) & 0x1fff) << 3;
1149 else
1150 offset = 0;
1151
1152
1153 /*
1154 * Keep copying data until we run out.
1155 */
1156
1157 while(left > 0)
1158 {
1159 len = left;
1160 /* IF: it doesn't fit, use 'mtu' - the data space left */
1161 if (len > mtu)
1162 len = mtu;
1163 /* IF: we are not sending upto and including the packet end
1164 then align the next start on an eight byte boundary */
1165 if (len < left)
1166 {
1167 len/=8;
1168 len*=8;
1169 }
1170 /*
1171 * Allocate buffer.
1172 */
1173
1174 if ((skb2 = alloc_skb(len + hlen,GFP_ATOMIC)) == NULL)
1175 {
1176 printk("IP: frag: no memory for new fragment!\n");
1177 ip_statistics.IpFragFails++;
1178 return;
1179 }
1180
1181 /*
1182 * Set up data on packet
1183 */
1184
1185 skb2->arp = skb->arp;
1186 if(skb->free==0)
1187 printk("IP fragmenter: BUG free!=1 in fragmenter\n");
1188 skb2->free = 1;
1189 skb2->len = len + hlen;
1190 skb2->h.raw=(char *) skb2->data;
1191 /*
1192 * Charge the memory for the fragment to any owner
1193 * it might posess
1194 */
1195
1196 save_flags(flags);
1197 if (sk)
1198 {
1199 cli();
1200 sk->wmem_alloc += skb2->mem_len;
1201 skb2->sk=sk;
1202 }
1203 restore_flags(flags);
1204 skb2->raddr = skb->raddr; /* For rebuild_header - must be here */
1205
1206 /*
1207 * Copy the packet header into the new buffer.
1208 */
1209
1210 memcpy(skb2->h.raw, raw, hlen);
1211
1212 /*
1213 * Copy a block of the IP datagram.
1214 */
1215 memcpy(skb2->h.raw + hlen, ptr, len);
1216 left -= len;
1217
1218 skb2->h.raw+=dev->hard_header_len;
1219
1220 /*
1221 * Fill in the new header fields.
1222 */
1223 iph = (struct iphdr *)(skb2->h.raw/*+dev->hard_header_len*/);
1224 iph->frag_off = htons((offset >> 3));
1225 /*
1226 * Added AC : If we are fragmenting a fragment thats not the
1227 * last fragment then keep MF on each bit
1228 */
1229 if (left > 0 || (is_frag & 1))
1230 iph->frag_off |= htons(IP_MF);
1231 ptr += len;
1232 offset += len;
1233
1234 /*
1235 * Put this fragment into the sending queue.
1236 */
1237
1238 ip_statistics.IpFragCreates++;
1239
1240 ip_queue_xmit(sk, dev, skb2, 2);
1241 }
1242 ip_statistics.IpFragOKs++;
1243 }
1244
1245
1246
1247 #ifdef CONFIG_IP_FORWARD
1248
1249 /*
1250 * Forward an IP datagram to its next destination.
1251 */
1252
1253 static void ip_forward(struct sk_buff *skb, struct device *dev, int is_frag)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1254 {
1255 struct device *dev2; /* Output device */
1256 struct iphdr *iph; /* Our header */
1257 struct sk_buff *skb2; /* Output packet */
1258 struct rtable *rt; /* Route we use */
1259 unsigned char *ptr; /* Data pointer */
1260 unsigned long raddr; /* Router IP address */
1261
1262 /*
1263 * According to the RFC, we must first decrease the TTL field. If
1264 * that reaches zero, we must reply an ICMP control message telling
1265 * that the packet's lifetime expired.
1266 *
1267 * Exception:
1268 * We may not generate an ICMP for an ICMP. icmp_send does the
1269 * enforcement of this so we can forget it here. It is however
1270 * sometimes VERY important.
1271 */
1272
1273 iph = skb->h.iph;
1274 iph->ttl--;
1275 if (iph->ttl <= 0)
1276 {
1277 /* Tell the sender its packet died... */
1278 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, dev);
1279 return;
1280 }
1281
1282 /*
1283 * Re-compute the IP header checksum.
1284 * This is inefficient. We know what has happened to the header
1285 * and could thus adjust the checksum as Phil Karn does in KA9Q
1286 */
1287
1288 ip_send_check(iph);
1289
1290 /*
1291 * OK, the packet is still valid. Fetch its destination address,
1292 * and give it to the IP sender for further processing.
1293 */
1294
1295 rt = ip_rt_route(iph->daddr, NULL, NULL);
1296 if (rt == NULL)
1297 {
1298 /*
1299 * Tell the sender its packet cannot be delivered. Again
1300 * ICMP is screened later.
1301 */
1302 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, dev);
1303 return;
1304 }
1305
1306
1307 /*
1308 * Gosh. Not only is the packet valid; we even know how to
1309 * forward it onto its final destination. Can we say this
1310 * is being plain lucky?
1311 * If the router told us that there is no GW, use the dest.
1312 * IP address itself- we seem to be connected directly...
1313 */
1314
1315 raddr = rt->rt_gateway;
1316
1317 if (raddr != 0)
1318 {
1319 /*
1320 * There is a gateway so find the correct route for it.
1321 * Gateways cannot in turn be gatewayed.
1322 */
1323 rt = ip_rt_route(raddr, NULL, NULL);
1324 if (rt == NULL)
1325 {
1326 /*
1327 * Tell the sender its packet cannot be delivered...
1328 */
1329 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, dev);
1330 return;
1331 }
1332 if (rt->rt_gateway != 0)
1333 raddr = rt->rt_gateway;
1334 }
1335 else
1336 raddr = iph->daddr;
1337
1338 /*
1339 * Having picked a route we can now send the frame out.
1340 */
1341
1342 dev2 = rt->rt_dev;
1343
1344 /*
1345 * In IP you never forward a frame on the interface that it arrived
1346 * upon. We should generate an ICMP HOST REDIRECT giving the route
1347 * we calculated.
1348 * For now just dropping the packet is an acceptable compromise.
1349 */
1350
1351 if (dev == dev2)
1352 return;
1353
1354 /*
1355 * We now allocate a new buffer, and copy the datagram into it.
1356 * If the indicated interface is up and running, kick it.
1357 */
1358
1359 if (dev2->flags & IFF_UP)
1360 {
1361
1362 /*
1363 * Current design decrees we copy the packet. For identical header
1364 * lengths we could avoid it. The new skb code will let us push
1365 * data so the problem goes away then.
1366 */
1367
1368 skb2 = alloc_skb(dev2->hard_header_len + skb->len, GFP_ATOMIC);
1369 /*
1370 * This is rare and since IP is tolerant of network failures
1371 * quite harmless.
1372 */
1373 if (skb2 == NULL)
1374 {
1375 printk("\nIP: No memory available for IP forward\n");
1376 return;
1377 }
1378 ptr = skb2->data;
1379 skb2->free = 1;
1380 skb2->len = skb->len + dev2->hard_header_len;
1381 skb2->h.raw = ptr;
1382
1383 /*
1384 * Copy the packet data into the new buffer.
1385 */
1386 memcpy(ptr + dev2->hard_header_len, skb->h.raw, skb->len);
1387
1388 /* Now build the MAC header. */
1389 (void) ip_send(skb2, raddr, skb->len, dev2, dev2->pa_addr);
1390
1391 ip_statistics.IpForwDatagrams++;
1392
1393 /*
1394 * See if it needs fragmenting. Note in ip_rcv we tagged
1395 * the fragment type. This must be right so that
1396 * the fragmenter does the right thing.
1397 */
1398
1399 if(skb2->len > dev2->mtu + dev2->hard_header_len)
1400 {
1401 ip_fragment(NULL,skb2,dev2, is_frag);
1402 kfree_skb(skb2,FREE_WRITE);
1403 }
1404 else
1405 {
1406 /*
1407 * Map service types to priority. We lie about
1408 * throughput being low priority, but its a good
1409 * choice to help improve general usage.
1410 */
1411 if(iph->tos & IPTOS_LOWDELAY)
1412 dev_queue_xmit(skb2, dev2, SOPRI_INTERACTIVE);
1413 else if(iph->tos & IPTOS_THROUGHPUT)
1414 dev_queue_xmit(skb2, dev2, SOPRI_BACKGROUND);
1415 else
1416 dev_queue_xmit(skb2, dev2, SOPRI_NORMAL);
1417 }
1418 }
1419 }
1420
1421
1422 #endif
1423
1424 /*
1425 * This function receives all incoming IP datagrams.
1426 */
1427
1428 int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1429 {
1430 struct iphdr *iph = skb->h.iph;
1431 unsigned char hash;
1432 unsigned char flag = 0;
1433 unsigned char opts_p = 0; /* Set iff the packet has options. */
1434 struct inet_protocol *ipprot;
1435 static struct options opt; /* since we don't use these yet, and they
1436 take up stack space. */
1437 int brd;
1438 int is_frag=0;
1439
1440
1441 ip_statistics.IpInReceives++;
1442
1443 /*
1444 * Tag the ip header of this packet so we can find it
1445 */
1446
1447 skb->ip_hdr = iph;
1448
1449 /*
1450 * Is the datagram acceptable?
1451 *
1452 * 1. Length at least the size of an ip header
1453 * 2. Version of 4
1454 * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums]
1455 * (4. We ought to check for IP multicast addresses and undefined types.. does this matter ?)
1456 */
1457
1458 if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0)
1459 {
1460 ip_statistics.IpInHdrErrors++;
1461 kfree_skb(skb, FREE_WRITE);
1462 return(0);
1463 }
1464
1465 /*
1466 * Our transport medium may have padded the buffer out. Now we know it
1467 * is IP we can trim to the true length of the frame.
1468 */
1469
1470 skb->len=ntohs(iph->tot_len);
1471
1472 /*
1473 * Next anaylse the packet for options. Studies show under one packet in
1474 * a thousand have options....
1475 */
1476
1477 if (iph->ihl != 5)
1478 { /* Fast path for the typical optionless IP packet. */
1479 memset((char *) &opt, 0, sizeof(opt));
1480 if (do_options(iph, &opt) != 0)
1481 return 0;
1482 opts_p = 1;
1483 }
1484
1485 /*
1486 * Remember if the frame is fragmented.
1487 */
1488
1489 if (iph->frag_off & 0x0020)
1490 is_frag|=1;
1491
1492 /*
1493 * Last fragment ?
1494 */
1495
1496 if (ntohs(iph->frag_off) & 0x1fff)
1497 is_frag|=2;
1498
1499 /*
1500 * Do any IP forwarding required. chk_addr() is expensive -- avoid it someday.
1501 *
1502 * This is inefficient. While finding out if it is for us we could also compute
1503 * the routing table entry. This is where the great unified cache theory comes
1504 * in as and when someone impliments it
1505 */
1506
1507 if ((brd = ip_chk_addr(iph->daddr)) == 0)
1508 {
1509 /*
1510 * Don't forward multicast or broadcast frames.
1511 */
1512
1513 if(skb->pkt_type!=PACKET_HOST)
1514 {
1515 kfree_skb(skb,FREE_WRITE);
1516 return 0;
1517 }
1518
1519 /*
1520 * The packet is for another target. Forward the frame
1521 */
1522
1523 #ifdef CONFIG_IP_FORWARD
1524 ip_forward(skb, dev, is_frag);
1525 #else
1526 /* printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n",
1527 iph->saddr,iph->daddr);*/
1528 ip_statistics.IpInAddrErrors++;
1529 #endif
1530 /*
1531 * The forwarder is inefficient and copies the packet. We
1532 * free the original now.
1533 */
1534
1535 kfree_skb(skb, FREE_WRITE);
1536 return(0);
1537 }
1538
1539 /*
1540 * Reassemble IP fragments.
1541 */
1542
1543 if(is_frag)
1544 {
1545 /* Defragment. Obtain the complete packet if there is one */
1546 skb=ip_defrag(iph,skb,dev);
1547 if(skb==NULL)
1548 return 0;
1549 iph=skb->h.iph;
1550 }
1551
1552 /*
1553 * Point into the IP datagram, just past the header.
1554 */
1555
1556 skb->ip_hdr = iph;
1557 skb->h.raw += iph->ihl*4;
1558
1559 /*
1560 * skb->h.raw now points at the protocol beyond the IP header.
1561 */
1562
1563 hash = iph->protocol & (MAX_INET_PROTOS -1);
1564 for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next)
1565 {
1566 struct sk_buff *skb2;
1567
1568 if (ipprot->protocol != iph->protocol)
1569 continue;
1570 /*
1571 * See if we need to make a copy of it. This will
1572 * only be set if more than one protocol wants it.
1573 * and then not for the last one.
1574 *
1575 * This is an artifact of poor upper protocol design.
1576 * Because the upper protocols damage the actual packet
1577 * we must do copying. In actual fact it's even worse
1578 * than this as TCP may hold on to the buffer.
1579 */
1580 if (ipprot->copy)
1581 {
1582 skb2 = skb_clone(skb, GFP_ATOMIC);
1583 if(skb2==NULL)
1584 continue;
1585 }
1586 else
1587 {
1588 skb2 = skb;
1589 }
1590 flag = 1;
1591
1592 /*
1593 * Pass on the datagram to each protocol that wants it,
1594 * based on the datagram protocol. We should really
1595 * check the protocol handler's return values here...
1596 */
1597 ipprot->handler(skb2, dev, opts_p ? &opt : 0, iph->daddr,
1598 (ntohs(iph->tot_len) - (iph->ihl * 4)),
1599 iph->saddr, 0, ipprot);
1600
1601 }
1602
1603 /*
1604 * All protocols checked.
1605 * If this packet was a broadcast, we may *not* reply to it, since that
1606 * causes (proven, grin) ARP storms and a leakage of memory (i.e. all
1607 * ICMP reply messages get queued up for transmission...)
1608 */
1609
1610 if (!flag)
1611 {
1612 if (brd != IS_BROADCAST)
1613 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, dev);
1614 kfree_skb(skb, FREE_WRITE);
1615 }
1616
1617 return(0);
1618 }
1619
1620
1621 /*
1622 * Queues a packet to be sent, and starts the transmitter
1623 * if necessary. if free = 1 then we free the block after
1624 * transmit, otherwise we don't. If free==2 we not only
1625 * free the block but also dont assign a new ip seq number.
1626 * This routine also needs to put in the total length,
1627 * and compute the checksum
1628 */
1629
1630 void ip_queue_xmit(struct sock *sk, struct device *dev,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1631 struct sk_buff *skb, int free)
1632 {
1633 struct iphdr *iph;
1634 unsigned char *ptr;
1635
1636 /* Sanity check */
1637 if (dev == NULL)
1638 {
1639 printk("IP: ip_queue_xmit dev = NULL\n");
1640 return;
1641 }
1642
1643 IS_SKB(skb);
1644
1645 /*
1646 * Do some book-keeping in the packet for later
1647 */
1648
1649
1650 skb->dev = dev;
1651 skb->when = jiffies;
1652
1653 /*
1654 * Find the IP header and set the length. This is bad
1655 * but once we get the skb data handling code in the
1656 * hardware will push its header sensibly and we will
1657 * set skb->ip_hdr to avoid this mess and the fixed
1658 * header length problem
1659 */
1660
1661 ptr = skb->data;
1662 ptr += dev->hard_header_len;
1663 iph = (struct iphdr *)ptr;
1664 skb->ip_hdr = iph;
1665 iph->tot_len = ntohs(skb->len-dev->hard_header_len);
1666
1667 /*
1668 * No reassigning numbers to fragments...
1669 */
1670
1671 if(free!=2)
1672 iph->id = htons(ip_id_count++);
1673 else
1674 free=1;
1675
1676 /* All buffers without an owner socket get freed */
1677 if (sk == NULL)
1678 free = 1;
1679
1680 skb->free = free;
1681
1682 /*
1683 * Do we need to fragment. Again this is inefficient.
1684 * We need to somehow lock the original buffer and use
1685 * bits of it.
1686 */
1687
1688 if(skb->len > dev->mtu + dev->hard_header_len)
1689 {
1690 ip_fragment(sk,skb,dev,0);
1691 IS_SKB(skb);
1692 kfree_skb(skb,FREE_WRITE);
1693 return;
1694 }
1695
1696 /*
1697 * Add an IP checksum
1698 */
1699
1700 ip_send_check(iph);
1701
1702 /*
1703 * Print the frame when debugging
1704 */
1705
1706 /*
1707 * More debugging. You cannot queue a packet already on a list
1708 * Spot this and moan loudly.
1709 */
1710 if (skb->next != NULL)
1711 {
1712 printk("ip_queue_xmit: next != NULL\n");
1713 skb_unlink(skb);
1714 }
1715
1716 /*
1717 * If a sender wishes the packet to remain unfreed
1718 * we add it to his send queue. This arguably belongs
1719 * in the TCP level since nobody elses uses it. BUT
1720 * remember IPng might change all the rules.
1721 */
1722
1723 if (!free)
1724 {
1725 unsigned long flags;
1726 /* The socket now has more outstanding blocks */
1727
1728 sk->packets_out++;
1729
1730 /* Protect the list for a moment */
1731 save_flags(flags);
1732 cli();
1733
1734 if (skb->link3 != NULL)
1735 {
1736 printk("ip.c: link3 != NULL\n");
1737 skb->link3 = NULL;
1738 }
1739 if (sk->send_head == NULL)
1740 {
1741 sk->send_tail = skb;
1742 sk->send_head = skb;
1743 }
1744 else
1745 {
1746 sk->send_tail->link3 = skb;
1747 sk->send_tail = skb;
1748 }
1749 /* skb->link3 is NULL */
1750
1751 /* Interrupt restore */
1752 restore_flags(flags);
1753 /* Set the IP write timeout to the round trip time for the packet.
1754 If an acknowledge has not arrived by then we may wish to act */
1755 reset_timer(sk, TIME_WRITE, sk->rto);
1756 }
1757 else
1758 /* Remember who owns the buffer */
1759 skb->sk = sk;
1760
1761 /*
1762 * If the indicated interface is up and running, send the packet.
1763 */
1764 ip_statistics.IpOutRequests++;
1765
1766 if (dev->flags & IFF_UP)
1767 {
1768 /*
1769 * If we have an owner use its priority setting,
1770 * otherwise use NORMAL
1771 */
1772
1773 if (sk != NULL)
1774 {
1775 dev_queue_xmit(skb, dev, sk->priority);
1776 }
1777 else
1778 {
1779 dev_queue_xmit(skb, dev, SOPRI_NORMAL);
1780 }
1781 }
1782 else
1783 {
1784 ip_statistics.IpOutDiscards++;
1785 if (free)
1786 kfree_skb(skb, FREE_WRITE);
1787 }
1788 }
1789
1790
1791 /*
1792 * A socket has timed out on its send queue and wants to do a
1793 * little retransmitting. Currently this means TCP.
1794 */
1795
1796 void ip_do_retransmit(struct sock *sk, int all)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1797 {
1798 struct sk_buff * skb;
1799 struct proto *prot;
1800 struct device *dev;
1801 int retransmits;
1802
1803 prot = sk->prot;
1804 skb = sk->send_head;
1805 retransmits = sk->retransmits;
1806
1807 while (skb != NULL)
1808 {
1809 dev = skb->dev;
1810 IS_SKB(skb);
1811 skb->when = jiffies;
1812
1813 /*
1814 * In general it's OK just to use the old packet. However we
1815 * need to use the current ack and window fields. Urg and
1816 * urg_ptr could possibly stand to be updated as well, but we
1817 * don't keep the necessary data. That shouldn't be a problem,
1818 * if the other end is doing the right thing. Since we're
1819 * changing the packet, we have to issue a new IP identifier.
1820 */
1821
1822 /* this check may be unnecessary - retransmit only for TCP */
1823 if (sk->protocol == IPPROTO_TCP) {
1824 struct tcphdr *th;
1825 struct iphdr *iph;
1826 int size;
1827
1828 iph = (struct iphdr *)(skb->data + dev->hard_header_len);
1829 th = (struct tcphdr *)(((char *)iph) + (iph->ihl << 2));
1830 size = skb->len - (((unsigned char *) th) - skb->data);
1831
1832 iph->id = htons(ip_id_count++);
1833 ip_send_check(iph);
1834
1835 th->ack_seq = ntohl(sk->acked_seq);
1836 th->window = ntohs(tcp_select_window(sk));
1837 tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
1838 }
1839
1840 /*
1841 * If the interface is (still) up and running, kick it.
1842 */
1843
1844 if (dev->flags & IFF_UP)
1845 {
1846 /*
1847 * If the packet is still being sent by the device/protocol
1848 * below then don't retransmit. This is both needed, and good -
1849 * especially with connected mode AX.25 where it stops resends
1850 * occuring of an as yet unsent anyway frame!
1851 * We still add up the counts as the round trip time wants
1852 * adjusting.
1853 */
1854 if (sk && !skb_device_locked(skb))
1855 {
1856 /* Remove it from any existing driver queue first! */
1857 skb_unlink(skb);
1858 /* Now queue it */
1859 ip_statistics.IpOutRequests++;
1860 dev_queue_xmit(skb, dev, sk->priority);
1861 }
1862 }
1863
1864 /*
1865 * Count retransmissions
1866 */
1867 retransmits++;
1868 sk->prot->retransmits ++;
1869
1870 /*
1871 * Only one retransmit requested.
1872 */
1873 if (!all)
1874 break;
1875
1876 /*
1877 * This should cut it off before we send too many packets.
1878 */
1879 if (sk->retransmits > sk->cong_window)
1880 break;
1881 skb = skb->link3;
1882 }
1883 }
1884
1885 /*
1886 * This is the normal code called for timeouts. It does the retransmission
1887 * and then does backoff. ip_do_retransmit is separated out because
1888 * tcp_ack needs to send stuff from the retransmit queue without
1889 * initiating a backoff.
1890 */
1891
1892 void ip_retransmit(struct sock *sk, int all)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1893 {
1894 ip_do_retransmit(sk, all);
1895
1896 /*
1897 * Increase the timeout each time we retransmit. Note that
1898 * we do not increase the rtt estimate. rto is initialized
1899 * from rtt, but increases here. Jacobson (SIGCOMM 88) suggests
1900 * that doubling rto each time is the least we can get away with.
1901 * In KA9Q, Karn uses this for the first few times, and then
1902 * goes to quadratic. netBSD doubles, but only goes up to *64,
1903 * and clamps at 1 to 64 sec afterwards. Note that 120 sec is
1904 * defined in the protocol as the maximum possible RTT. I guess
1905 * we'll have to use something other than TCP to talk to the
1906 * University of Mars.
1907 */
1908
1909 sk->retransmits++;
1910 sk->backoff++;
1911 sk->rto = min(sk->rto << 1, 120*HZ);
1912 reset_timer(sk, TIME_WRITE, sk->rto);
1913 }
1914
1915 /*
1916 * Socket option code for IP. This is the end of the line after any TCP,UDP etc options on
1917 * an IP socket.
1918 *
1919 * We impliment IP_TOS (type of service), IP_TTL (time to live).
1920 *
1921 * Next release we will sort out IP_OPTIONS since for some people are kind of important.
1922 */
1923
1924 int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1925 {
1926 int val,err;
1927
1928 if (optval == NULL)
1929 return(-EINVAL);
1930
1931 err=verify_area(VERIFY_READ, optval, sizeof(int));
1932 if(err)
1933 return err;
1934
1935 val = get_fs_long((unsigned long *)optval);
1936
1937 if(level!=SOL_IP)
1938 return -EOPNOTSUPP;
1939
1940 switch(optname)
1941 {
1942 case IP_TOS:
1943 if(val<0||val>255)
1944 return -EINVAL;
1945 sk->ip_tos=val;
1946 if(val==IPTOS_LOWDELAY)
1947 sk->priority=SOPRI_INTERACTIVE;
1948 if(val==IPTOS_THROUGHPUT)
1949 sk->priority=SOPRI_BACKGROUND;
1950 return 0;
1951 case IP_TTL:
1952 if(val<1||val>255)
1953 return -EINVAL;
1954 sk->ip_ttl=val;
1955 return 0;
1956 /* IP_OPTIONS and friends go here eventually */
1957 default:
1958 return(-ENOPROTOOPT);
1959 }
1960 }
1961
1962 /*
1963 * Get the options. Note for future reference. The GET of IP options gets the
1964 * _received_ ones. The set sets the _sent_ ones.
1965 */
1966
1967 int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1968 {
1969 int val,err;
1970
1971 if(level!=SOL_IP)
1972 return -EOPNOTSUPP;
1973
1974 switch(optname)
1975 {
1976 case IP_TOS:
1977 val=sk->ip_tos;
1978 break;
1979 case IP_TTL:
1980 val=sk->ip_ttl;
1981 break;
1982 default:
1983 return(-ENOPROTOOPT);
1984 }
1985 err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
1986 if(err)
1987 return err;
1988 put_fs_long(sizeof(int),(unsigned long *) optlen);
1989
1990 err=verify_area(VERIFY_WRITE, optval, sizeof(int));
1991 if(err)
1992 return err;
1993 put_fs_long(val,(unsigned long *)optval);
1994
1995 return(0);
1996 }
1997
1998 /*
1999 * IP protocol layer initialiser
2000 */
2001
2002 static struct packet_type ip_packet_type =
2003 {
2004 0, /* MUTTER ntohs(ETH_P_IP),*/
2005 0, /* copy */
2006 ip_rcv,
2007 NULL,
2008 NULL,
2009 };
2010
2011
2012 /*
2013 * IP registers the packet type and then calls the subprotocol initialisers
2014 */
2015
2016 void ip_init(void)
/* ![[previous]](../icons/left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
2017 {
2018 ip_packet_type.type=htons(ETH_P_IP);
2019 dev_add_pack(&ip_packet_type);
2020 /* ip_raw_init();
2021 ip_packet_init();
2022 ip_tcp_init();
2023 ip_udp_init();*/
2024 }