1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * The Internet Protocol (IP) module.
7 *
8 * Version: @(#)ip.c 1.0.16b 9/1/93
9 *
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Donald Becker, <becker@super.org>
13 * Alan Cox, <gw4pts@gw4pts.ampr.org>
14 * Richard Underwood
15 * Stefan Becker, <stefanb@yello.ping.de>
16 * Jorge Cwik, <jorge@laser.satlink.net>
17 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
18 *
19 *
20 * Fixes:
21 * Alan Cox : Commented a couple of minor bits of surplus code
22 * Alan Cox : Undefining IP_FORWARD doesn't include the code
23 * (just stops a compiler warning).
24 * Alan Cox : Frames with >=MAX_ROUTE record routes, strict routes or loose routes
25 * are junked rather than corrupting things.
26 * Alan Cox : Frames to bad broadcast subnets are dumped
27 * We used to process them non broadcast and
28 * boy could that cause havoc.
29 * Alan Cox : ip_forward sets the free flag on the
30 * new frame it queues. Still crap because
31 * it copies the frame but at least it
32 * doesn't eat memory too.
33 * Alan Cox : Generic queue code and memory fixes.
34 * Fred Van Kempen : IP fragment support (borrowed from NET2E)
35 * Gerhard Koerting: Forward fragmented frames correctly.
36 * Gerhard Koerting: Fixes to my fix of the above 8-).
37 * Gerhard Koerting: IP interface addressing fix.
38 * Linus Torvalds : More robustness checks
39 * Alan Cox : Even more checks: Still not as robust as it ought to be
40 * Alan Cox : Save IP header pointer for later
41 * Alan Cox : ip option setting
42 * Alan Cox : Use ip_tos/ip_ttl settings
43 * Alan Cox : Fragmentation bogosity removed
44 * (Thanks to Mark.Bush@prg.ox.ac.uk)
45 * Dmitry Gorodchanin : Send of a raw packet crash fix.
46 * Alan Cox : Silly ip bug when an overlength
47 * fragment turns up. Now frees the
48 * queue.
49 * Linus Torvalds/ : Memory leakage on fragmentation
50 * Alan Cox : handling.
51 * Gerhard Koerting: Forwarding uses IP priority hints
52 * Teemu Rantanen : Fragment problems.
53 * Alan Cox : General cleanup, comments and reformat
54 * Alan Cox : SNMP statistics
55 * Alan Cox : BSD address rule semantics. Also see
56 * UDP as there is a nasty checksum issue
57 * if you do things the wrong way.
58 * Alan Cox : Always defrag, moved IP_FORWARD to the config.in file
59 * Alan Cox : IP options adjust sk->priority.
60 * Pedro Roque : Fix mtu/length error in ip_forward.
61 * Alan Cox : Avoid ip_chk_addr when possible.
62 * Richard Underwood : IP multicasting.
63 * Alan Cox : Cleaned up multicast handlers.
64 * Alan Cox : RAW sockets demultiplex in the BSD style.
65 * Gunther Mayer : Fix the SNMP reporting typo
66 * Alan Cox : Always in group 224.0.0.1
67 * Pauline Middelink : Fast ip_checksum update when forwarding
68 * Masquerading support.
69 * Alan Cox : Multicast loopback error for 224.0.0.1
70 * Alan Cox : IP_MULTICAST_LOOP option.
71 * Alan Cox : Use notifiers.
72 * Bjorn Ekwall : Removed ip_csum (from slhc.c too)
73 * Bjorn Ekwall : Moved ip_fast_csum to ip.h (inline!)
74 * Stefan Becker : Send out ICMP HOST REDIRECT
75 * Arnt Gulbrandsen : ip_build_xmit
76 * Alan Cox : Per socket routing cache
77 * Alan Cox : Fixed routing cache, added header cache.
78 * Alan Cox : Loopback didnt work right in original ip_build_xmit - fixed it.
79 * Alan Cox : Only send ICMP_REDIRECT if src/dest are the same net.
80 * Alan Cox : Incoming IP option handling.
81 * Alan Cox : Set saddr on raw output frames as per BSD.
82 * Alan Cox : Stopped broadcast source route explosions.
83 * Alan Cox : Can disable source routing
84 * Takeshi Sone : Masquerading didn't work.
85 * Dave Bonn,Alan Cox : Faster IP forwarding whenever possible.
86 * Alan Cox : Memory leaks, tramples, misc debugging.
87 * Alan Cox : Fixed multicast (by popular demand 8))
88 * Alan Cox : Fixed forwarding (by even more popular demand 8))
89 * Alan Cox : Fixed SNMP statistics [I think]
90 * Gerhard Koerting : IP fragmentation forwarding fix
91 * Alan Cox : Device lock against page fault.
92 * Alan Cox : IP_HDRINCL facility.
93 * Werner Almesberger : Zero fragment bug
94 * Alan Cox : RAW IP frame length bug
95 * Alan Cox : Outgoing firewall on build_xmit
96 * A.N.Kuznetsov : IP_OPTIONS support throughout the kernel
97 * Alan Cox : Multicast routing hooks
98 *
99 *
100 *
101 * To Fix:
102 * IP option processing is mostly not needed. ip_forward needs to know about routing rules
103 * and time stamp but that's about all. Use the route mtu field here too
104 * IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient
105 * and could be made very efficient with the addition of some virtual memory hacks to permit
106 * the allocation of a buffer that can then be 'grown' by twiddling page tables.
107 * Output fragmentation wants updating along with the buffer management to use a single
108 * interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet
109 * output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause
110 * fragmentation anyway.
111 *
112 * FIXME: copy frag 0 iph to qp->iph
113 *
114 * This program is free software; you can redistribute it and/or
115 * modify it under the terms of the GNU General Public License
116 * as published by the Free Software Foundation; either version
117 * 2 of the License, or (at your option) any later version.
118 */
119
120 #include <asm/segment.h>
121 #include <asm/system.h>
122 #include <linux/types.h>
123 #include <linux/kernel.h>
124 #include <linux/sched.h>
125 #include <linux/mm.h>
126 #include <linux/string.h>
127 #include <linux/errno.h>
128 #include <linux/config.h>
129
130 #include <linux/socket.h>
131 #include <linux/sockios.h>
132 #include <linux/in.h>
133 #include <linux/inet.h>
134 #include <linux/netdevice.h>
135 #include <linux/etherdevice.h>
136 #include <linux/proc_fs.h>
137 #include <linux/stat.h>
138
139 #include <net/snmp.h>
140 #include <net/ip.h>
141 #include <net/protocol.h>
142 #include <net/route.h>
143 #include <net/tcp.h>
144 #include <net/udp.h>
145 #include <linux/skbuff.h>
146 #include <net/sock.h>
147 #include <net/arp.h>
148 #include <net/icmp.h>
149 #include <net/raw.h>
150 #include <net/checksum.h>
151 #include <linux/igmp.h>
152 #include <linux/ip_fw.h>
153 #include <linux/mroute.h>
154 #include <net/netlink.h>
155
156 #define CONFIG_IP_DEFRAG
157
158 extern int last_retran;
159 extern void sort_send(struct sock *sk);
160
161 #define min(a,b) ((a)<(b)?(a):(b))
162
163 /*
164 * SNMP management statistics
165 */
166
167 #ifdef CONFIG_IP_FORWARD
168 struct ip_mib ip_statistics={1,64,}; /* Forwarding=Yes, Default TTL=64 */
169 #else
170 struct ip_mib ip_statistics={2,64,}; /* Forwarding=No, Default TTL=64 */
171 #endif
172
173 /*
174 * Write options to IP header, record destination address to
175 * source route option, address of outgoing interface
176 * (we should already know it, so that this function is allowed be
177 * called only after routing decision) and timestamp,
178 * if we originate this datagram.
179 */
180
181 static void ip_options_build(struct sk_buff * skb, struct options * opt,
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
182 __u32 daddr, __u32 saddr,
183 int is_frag) {
184 unsigned char * iph = (unsigned char*)skb->ip_hdr;
185
186 memcpy(skb->proto_priv, opt, sizeof(struct options));
187 memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen);
188 opt = (struct options*)skb->proto_priv;
189 opt->is_data = 0;
190
191 if (opt->srr)
192 memcpy(iph+opt->srr+iph[opt->srr+1]-4, &daddr, 4);
193
194 if (!is_frag) {
195 if (opt->rr_needaddr)
196 memcpy(iph+opt->rr+iph[opt->rr+2]-5, &saddr, 4);
197 if (opt->ts_needaddr)
198 memcpy(iph+opt->ts+iph[opt->ts+2]-9, &saddr, 4);
199 if (opt->ts_needtime) {
200 struct timeval tv;
201 __u32 midtime;
202 do_gettimeofday(&tv);
203 midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000);
204 memcpy(iph+opt->ts+iph[opt->ts+2]-5, &midtime, 4);
205 }
206 return;
207 }
208 if (opt->rr) {
209 memset(iph+opt->rr, IPOPT_NOP, iph[opt->rr+1]);
210 opt->rr = 0;
211 opt->rr_needaddr = 0;
212 }
213 if (opt->ts) {
214 memset(iph+opt->ts, IPOPT_NOP, iph[opt->ts+1]);
215 opt->ts = 0;
216 opt->ts_needaddr = opt->ts_needtime = 0;
217 }
218 }
219
220 int ip_options_echo(struct options * dopt, struct options * sopt,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
221 __u32 daddr, __u32 saddr,
222 struct sk_buff * skb) {
223 unsigned char *sptr, *dptr;
224 int soffset, doffset;
225 int optlen;
226
227 memset(dopt, 0, sizeof(struct options));
228
229 dopt->is_data = 1;
230
231 if (!sopt)
232 sopt = (struct options*)skb->proto_priv;
233
234 if (sopt->optlen == 0) {
235 dopt->optlen = 0;
236 return 0;
237 }
238
239 sptr = (sopt->is_data ? sopt->__data - sizeof(struct iphdr) :
240 (unsigned char *)skb->ip_hdr);
241 dptr = dopt->__data;
242
243 if (sopt->rr) {
244 optlen = sptr[sopt->rr+1];
245 soffset = sptr[sopt->rr+2];
246 dopt->rr = dopt->optlen + sizeof(struct iphdr);
247 memcpy(dptr, sptr+sopt->rr, optlen);
248 if (sopt->rr_needaddr && soffset <= optlen) {
249 if (soffset + 3 > optlen)
250 return -EINVAL;
251 dptr[2] = soffset + 4;
252 dopt->rr_needaddr = 1;
253 }
254 dptr += optlen;
255 dopt->optlen += optlen;
256 }
257 if (sopt->ts) {
258 optlen = sptr[sopt->ts+1];
259 soffset = sptr[sopt->ts+2];
260 dopt->ts = dopt->optlen + sizeof(struct iphdr);
261 memcpy(dptr, sptr+sopt->ts, optlen);
262 if (soffset <= optlen) {
263 if (dopt->ts_needaddr) {
264 if (soffset + 3 > optlen)
265 return -EINVAL;
266 dopt->ts_needaddr = 1;
267 soffset += 4;
268 }
269 if (dopt->ts_needtime) {
270 if (soffset + 3 > optlen)
271 return -EINVAL;
272 dopt->ts_needtime = 1;
273 soffset += 4;
274 }
275 if (((struct timestamp*)(dptr+1))->flags == IPOPT_TS_PRESPEC) {
276 __u32 addr;
277 memcpy(&addr, sptr+soffset-9, 4);
278 if (ip_chk_addr(addr) == 0) {
279 dopt->ts_needtime = 0;
280 dopt->ts_needaddr = 0;
281 soffset -= 8;
282 }
283 }
284 dptr[2] = soffset;
285 }
286 dptr += optlen;
287 dopt->optlen += optlen;
288 }
289 if (sopt->srr) {
290 unsigned char * start = sptr+sopt->srr;
291 __u32 faddr;
292
293 optlen = start[1];
294 soffset = start[2];
295 doffset = 0;
296 if (soffset > optlen)
297 soffset = optlen + 1;
298 soffset -= 4;
299 if (soffset > 3) {
300 memcpy(&faddr, &start[soffset-1], 4);
301 for (soffset-=4, doffset=4; soffset > 3; soffset-=4, doffset+=4)
302 memcpy(&dptr[doffset-1], &start[soffset-1], 4);
303 /*
304 * RFC1812 requires to fix illegal source routes.
305 */
306 if (memcmp(&saddr, &start[soffset+3], 4) == 0)
307 doffset -= 4;
308 }
309 if (doffset > 3) {
310 memcpy(&start[doffset-1], &daddr, 4);
311 dopt->faddr = faddr;
312 dptr[0] = start[0];
313 dptr[1] = doffset+3;
314 dptr[2] = 4;
315 dptr += doffset+3;
316 dopt->srr = dopt->optlen + sizeof(struct iphdr);
317 dopt->optlen += doffset+3;
318 dopt->is_strictroute = sopt->is_strictroute;
319 }
320 }
321 while (dopt->optlen & 3) {
322 *dptr++ = IPOPT_END;
323 dopt->optlen++;
324 }
325 return 0;
326 }
327
328 static void ip_options_fragment(struct sk_buff * skb) {
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
329 unsigned char * optptr = (unsigned char*)skb->ip_hdr;
330 struct options * opt = (struct options*)skb->proto_priv;
331 int l = opt->optlen;
332 int optlen;
333
334 while (l > 0) {
335 switch (*optptr) {
336 case IPOPT_END:
337 return;
338 case IPOPT_NOOP:
339 l--;
340 optptr++;
341 continue;
342 }
343 optlen = optptr[1];
344 if (l<2 || optlen>l)
345 return;
346 if (!(*optptr & 0x80))
347 memset(optptr, IPOPT_NOOP, optlen);
348 l -= optlen;
349 optptr += optlen;
350 }
351 opt->ts = 0;
352 opt->rr = 0;
353 opt->rr_needaddr = 0;
354 opt->ts_needaddr = 0;
355 opt->ts_needtime = 0;
356 return;
357 }
358
359 /*
360 * Verify options and fill pointers in struct optinos.
361 * Caller should clear *opt, and set opt->data.
362 * If opt == NULL, then skb->data should point to IP header.
363 */
364
365 int ip_options_compile(struct options * opt, struct sk_buff * skb)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
366 {
367 int l;
368 unsigned char * iph;
369 unsigned char * optptr;
370 int optlen;
371 unsigned char * pp_ptr = NULL;
372
373 if (!opt) {
374 opt = (struct options*)skb->proto_priv;
375 memset(opt, 0, sizeof(struct options));
376 iph = (unsigned char*)skb->ip_hdr;
377 opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr);
378 optptr = iph + sizeof(struct iphdr);
379 opt->is_data = 0;
380 } else {
381 optptr = opt->is_data ? opt->__data : (unsigned char*)&skb->ip_hdr[1];
382 iph = optptr - sizeof(struct iphdr);
383 }
384
385 for (l = opt->optlen; l > 0; ) {
386 switch (*optptr) {
387 case IPOPT_END:
388 for (optptr++, l--; l>0; l--) {
389 if (*optptr != IPOPT_END) {
390 *optptr = IPOPT_END;
391 opt->is_changed = 1;
392 }
393 }
394 goto eol;
395 case IPOPT_NOOP:
396 l--;
397 optptr++;
398 continue;
399 }
400 optlen = optptr[1];
401 if (l<2 || optlen>l) {
402 pp_ptr = optptr;
403 break;
404 }
405 switch (*optptr) {
406 case IPOPT_SSRR:
407 case IPOPT_LSRR:
408 if (optlen < 3) {
409 pp_ptr = optptr + 1;
410 break;
411 }
412 if (optptr[2] < 4) {
413 pp_ptr = optptr + 2;
414 break;
415 }
416 /* NB: cf RFC-1812 5.2.4.1 */
417 if (opt->srr) {
418 pp_ptr = optptr;
419 break;
420 }
421 if (!skb) {
422 if (optptr[2] != 4 || optlen < 7 || ((optlen-3) & 3)) {
423 pp_ptr = optptr + 1;
424 break;
425 }
426 memcpy(&opt->faddr, &optptr[3], 4);
427 if (optlen > 7)
428 memmove(&optptr[3], &optptr[7], optlen-7);
429 }
430 opt->is_strictroute = (optptr[0] == IPOPT_SSRR);
431 opt->srr = optptr - iph;
432 break;
433 case IPOPT_RR:
434 if (opt->rr) {
435 pp_ptr = optptr;
436 break;
437 }
438 if (optlen < 3) {
439 pp_ptr = optptr + 1;
440 break;
441 }
442 if (optptr[2] < 4) {
443 pp_ptr = optptr + 2;
444 break;
445 }
446 if (optptr[2] <= optlen) {
447 if (optptr[2]+3 > optlen) {
448 pp_ptr = optptr + 2;
449 break;
450 }
451 if (skb) {
452 memcpy(&optptr[optptr[2]-1], &skb->dev->pa_addr, 4);
453 opt->is_changed = 1;
454 }
455 optptr[2] += 4;
456 opt->rr_needaddr = 1;
457 }
458 opt->rr = optptr - iph;
459 break;
460 case IPOPT_TIMESTAMP:
461 if (opt->ts) {
462 pp_ptr = optptr;
463 break;
464 }
465 if (optlen < 4) {
466 pp_ptr = optptr + 1;
467 break;
468 }
469 if (optptr[2] < 5) {
470 pp_ptr = optptr + 2;
471 break;
472 }
473 if (optptr[2] <= optlen) {
474 struct timestamp * ts = (struct timestamp*)(optptr+1);
475 __u32 * timeptr = NULL;
476 if (ts->ptr+3 > ts->len) {
477 pp_ptr = optptr + 2;
478 break;
479 }
480 switch (ts->flags) {
481 case IPOPT_TS_TSONLY:
482 opt->ts = optptr - iph;
483 if (skb) {
484 timeptr = (__u32*)&optptr[ts->ptr-1];
485 opt->is_changed = 1;
486 }
487 ts->ptr += 4;
488 break;
489 case IPOPT_TS_TSANDADDR:
490 if (ts->ptr+7 > ts->len) {
491 pp_ptr = optptr + 2;
492 break;
493 }
494 opt->ts = optptr - iph;
495 if (skb) {
496 memcpy(&optptr[ts->ptr-1], &skb->dev->pa_addr, 4);
497 timeptr = (__u32*)&optptr[ts->ptr+3];
498 }
499 opt->ts_needaddr = 1;
500 opt->ts_needtime = 1;
501 ts->ptr += 8;
502 break;
503 case IPOPT_TS_PRESPEC:
504 if (ts->ptr+7 > ts->len) {
505 pp_ptr = optptr + 2;
506 break;
507 }
508 opt->ts = optptr - iph;
509 {
510 __u32 addr;
511 memcpy(&addr, &optptr[ts->ptr-1], 4);
512 if (ip_chk_addr(addr) == 0)
513 break;
514 if (skb)
515 timeptr = (__u32*)&optptr[ts->ptr+3];
516 }
517 opt->ts_needaddr = 1;
518 opt->ts_needtime = 1;
519 ts->ptr += 8;
520 break;
521 default:
522 pp_ptr = optptr + 3;
523 break;
524 }
525 if (timeptr) {
526 struct timeval tv;
527 __u32 midtime;
528 do_gettimeofday(&tv);
529 midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000);
530 memcpy(timeptr, &midtime, sizeof(__u32));
531 opt->is_changed = 1;
532 }
533 } else {
534 struct timestamp * ts = (struct timestamp*)(optptr+1);
535 if (ts->overflow == 15) {
536 pp_ptr = optptr + 3;
537 break;
538 }
539 opt->ts = optptr - iph;
540 if (skb) {
541 ts->overflow++;
542 opt->is_changed = 1;
543 }
544 }
545 break;
546 case IPOPT_SEC:
547 case IPOPT_SID:
548 default:
549 if (!skb) {
550 pp_ptr = optptr;
551 break;
552 }
553 break;
554 }
555 l -= optlen;
556 optptr += optlen;
557 }
558
559 eol:
560 if (!pp_ptr)
561 return 0;
562
563 if (skb) {
564 icmp_send(skb, ICMP_PARAMETERPROB, 0, pp_ptr-iph, skb->dev);
565 kfree_skb(skb, FREE_READ);
566 }
567 return -EINVAL;
568 }
569
570 /*
571 * Handle the issuing of an ioctl() request
572 * for the ip device. This is scheduled to
573 * disappear
574 */
575
576 int ip_ioctl(struct sock *sk, int cmd, unsigned long arg)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
577 {
578 switch(cmd)
579 {
580 default:
581 return(-EINVAL);
582 }
583 }
584
585
586 /*
587 * Take an skb, and fill in the MAC header.
588 */
589
590 static int ip_send(struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
591 {
592 int mac = 0;
593
594 skb->dev = dev;
595 skb->arp = 1;
596 if (dev->hard_header)
597 {
598 /*
599 * Build a hardware header. Source address is our mac, destination unknown
600 * (rebuild header will sort this out)
601 */
602 skb_reserve(skb,(dev->hard_header_len+15)&~15); /* 16 byte aligned IP headers are good */
603 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
604 if (mac < 0)
605 {
606 mac = -mac;
607 skb->arp = 0;
608 skb->raddr = daddr; /* next routing address */
609 }
610 }
611 return mac;
612 }
613
614 static int ip_send_room(struct sk_buff *skb, __u32 daddr, int len, struct device *dev, __u32 saddr)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
615 {
616 int mac = 0;
617
618 skb->dev = dev;
619 skb->arp = 1;
620 if (dev->hard_header)
621 {
622 skb_reserve(skb,MAX_HEADER);
623 mac = dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, len);
624 if (mac < 0)
625 {
626 mac = -mac;
627 skb->arp = 0;
628 skb->raddr = daddr; /* next routing address */
629 }
630 }
631 return mac;
632 }
633
634 int ip_id_count = 0;
635
636 /*
637 * This routine builds the appropriate hardware/IP headers for
638 * the routine. It assumes that if *dev != NULL then the
639 * protocol knows what it's doing, otherwise it uses the
640 * routing/ARP tables to select a device struct.
641 */
642 int ip_build_header(struct sk_buff *skb, __u32 saddr, __u32 daddr,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
643 struct device **dev, int type, struct options *opt, int len, int tos, int ttl)
644 {
645 struct rtable *rt;
646 __u32 raddr;
647 int tmp;
648 __u32 src;
649 struct iphdr *iph;
650 __u32 final_daddr = daddr;
651
652 if (opt && opt->srr)
653 daddr = opt->faddr;
654
655 /*
656 * See if we need to look up the device.
657 */
658
659 #ifdef CONFIG_IP_MULTICAST
660 if(MULTICAST(daddr) && *dev==NULL && skb->sk && *skb->sk->ip_mc_name)
661 *dev=dev_get(skb->sk->ip_mc_name);
662 #endif
663 if (*dev == NULL)
664 {
665 if(skb->localroute)
666 rt = ip_rt_local(daddr, NULL, &src);
667 else
668 rt = ip_rt_route(daddr, NULL, &src);
669 if (rt == NULL)
670 {
671 ip_statistics.IpOutNoRoutes++;
672 return(-ENETUNREACH);
673 }
674
675 *dev = rt->rt_dev;
676 /*
677 * If the frame is from us and going off machine it MUST MUST MUST
678 * have the output device ip address and never the loopback
679 */
680 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
681 saddr = src;/*rt->rt_dev->pa_addr;*/
682 raddr = rt->rt_gateway;
683
684 }
685 else
686 {
687 /*
688 * We still need the address of the first hop.
689 */
690 if(skb->localroute)
691 rt = ip_rt_local(daddr, NULL, &src);
692 else
693 rt = ip_rt_route(daddr, NULL, &src);
694 /*
695 * If the frame is from us and going off machine it MUST MUST MUST
696 * have the output device ip address and never the loopback
697 */
698 if (LOOPBACK(saddr) && !LOOPBACK(daddr))
699 saddr = src;/*rt->rt_dev->pa_addr;*/
700
701 raddr = (rt == NULL) ? 0 : rt->rt_gateway;
702 }
703
704 /*
705 * No source addr so make it our addr
706 */
707 if (saddr == 0)
708 saddr = src;
709
710 /*
711 * No gateway so aim at the real destination
712 */
713 if (raddr == 0)
714 raddr = daddr;
715
716 /*
717 * Now build the MAC header.
718 */
719
720 if(type==IPPROTO_TCP)
721 tmp = ip_send_room(skb, raddr, len, *dev, saddr);
722 else
723 tmp = ip_send(skb, raddr, len, *dev, saddr);
724
725 /*
726 * Book keeping
727 */
728
729 skb->dev = *dev;
730 skb->saddr = saddr;
731
732 /*
733 * Now build the IP header.
734 */
735
736 /*
737 * If we are using IPPROTO_RAW, then we don't need an IP header, since
738 * one is being supplied to us by the user
739 */
740
741 if(type == IPPROTO_RAW)
742 return (tmp);
743
744 /*
745 * Build the IP addresses
746 */
747
748 if (opt)
749 iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr) + opt->optlen);
750 else
751 iph=(struct iphdr *)skb_put(skb,sizeof(struct iphdr));
752
753 iph->version = 4;
754 iph->ihl = 5;
755 iph->tos = tos;
756 iph->frag_off = 0;
757 iph->ttl = ttl;
758 iph->daddr = daddr;
759 iph->saddr = saddr;
760 iph->protocol = type;
761 skb->ip_hdr = iph;
762
763 if (!opt || !opt->optlen)
764 return sizeof(struct iphdr) + tmp;
765 if (opt->is_strictroute && rt && rt->rt_gateway) {
766 ip_statistics.IpOutNoRoutes++;
767 return -ENETUNREACH;
768 }
769 iph->ihl += opt->optlen>>2;
770 ip_options_build(skb, opt, final_daddr, (*dev)->pa_addr, 0);
771 return iph->ihl*4 + tmp;
772 }
773
774
775 /*
776 * Generate a checksum for an outgoing IP datagram.
777 */
778
779 void ip_send_check(struct iphdr *iph)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
780 {
781 iph->check = 0;
782 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
783 }
784
785
786 /************************ Fragment Handlers From NET2E **********************************/
787
788
789 /*
790 * This fragment handler is a bit of a heap. On the other hand it works quite
791 * happily and handles things quite well.
792 */
793
794 static struct ipq *ipqueue = NULL; /* IP fragment queue */
795
796 /*
797 * Create a new fragment entry.
798 */
799
800 static struct ipfrag *ip_frag_create(int offset, int end, struct sk_buff *skb, unsigned char *ptr)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
801 {
802 struct ipfrag *fp;
803
804 fp = (struct ipfrag *) kmalloc(sizeof(struct ipfrag), GFP_ATOMIC);
805 if (fp == NULL)
806 {
807 NETDEBUG(printk("IP: frag_create: no memory left !\n"));
808 return(NULL);
809 }
810 memset(fp, 0, sizeof(struct ipfrag));
811
812 /* Fill in the structure. */
813 fp->offset = offset;
814 fp->end = end;
815 fp->len = end - offset;
816 fp->skb = skb;
817 fp->ptr = ptr;
818
819 return(fp);
820 }
821
822
823 /*
824 * Find the correct entry in the "incomplete datagrams" queue for
825 * this IP datagram, and return the queue entry address if found.
826 */
827
828 static struct ipq *ip_find(struct iphdr *iph)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
829 {
830 struct ipq *qp;
831 struct ipq *qplast;
832
833 cli();
834 qplast = NULL;
835 for(qp = ipqueue; qp != NULL; qplast = qp, qp = qp->next)
836 {
837 if (iph->id== qp->iph->id && iph->saddr == qp->iph->saddr &&
838 iph->daddr == qp->iph->daddr && iph->protocol == qp->iph->protocol)
839 {
840 del_timer(&qp->timer); /* So it doesn't vanish on us. The timer will be reset anyway */
841 sti();
842 return(qp);
843 }
844 }
845 sti();
846 return(NULL);
847 }
848
849
850 /*
851 * Remove an entry from the "incomplete datagrams" queue, either
852 * because we completed, reassembled and processed it, or because
853 * it timed out.
854 */
855
856 static void ip_free(struct ipq *qp)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
857 {
858 struct ipfrag *fp;
859 struct ipfrag *xp;
860
861 /*
862 * Stop the timer for this entry.
863 */
864
865 del_timer(&qp->timer);
866
867 /* Remove this entry from the "incomplete datagrams" queue. */
868 cli();
869 if (qp->prev == NULL)
870 {
871 ipqueue = qp->next;
872 if (ipqueue != NULL)
873 ipqueue->prev = NULL;
874 }
875 else
876 {
877 qp->prev->next = qp->next;
878 if (qp->next != NULL)
879 qp->next->prev = qp->prev;
880 }
881
882 /* Release all fragment data. */
883
884 fp = qp->fragments;
885 while (fp != NULL)
886 {
887 xp = fp->next;
888 IS_SKB(fp->skb);
889 kfree_skb(fp->skb,FREE_READ);
890 kfree_s(fp, sizeof(struct ipfrag));
891 fp = xp;
892 }
893
894 /* Release the IP header. */
895 kfree_s(qp->iph, 64 + 8);
896
897 /* Finally, release the queue descriptor itself. */
898 kfree_s(qp, sizeof(struct ipq));
899 sti();
900 }
901
902
903 /*
904 * Oops- a fragment queue timed out. Kill it and send an ICMP reply.
905 */
906
907 static void ip_expire(unsigned long arg)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
908 {
909 struct ipq *qp;
910
911 qp = (struct ipq *)arg;
912
913 /*
914 * Send an ICMP "Fragment Reassembly Timeout" message.
915 */
916
917 ip_statistics.IpReasmTimeout++;
918 ip_statistics.IpReasmFails++;
919 /* This if is always true... shrug */
920 if(qp->fragments!=NULL)
921 icmp_send(qp->fragments->skb,ICMP_TIME_EXCEEDED,
922 ICMP_EXC_FRAGTIME, 0, qp->dev);
923
924 /*
925 * Nuke the fragment queue.
926 */
927 ip_free(qp);
928 }
929
930
931 /*
932 * Add an entry to the 'ipq' queue for a newly received IP datagram.
933 * We will (hopefully :-) receive all other fragments of this datagram
934 * in time, so we just create a queue for this datagram, in which we
935 * will insert the received fragments at their respective positions.
936 */
937
938 static struct ipq *ip_create(struct sk_buff *skb, struct iphdr *iph, struct device *dev)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
939 {
940 struct ipq *qp;
941 int ihlen;
942
943 qp = (struct ipq *) kmalloc(sizeof(struct ipq), GFP_ATOMIC);
944 if (qp == NULL)
945 {
946 NETDEBUG(printk("IP: create: no memory left !\n"));
947 return(NULL);
948 skb->dev = qp->dev;
949 }
950 memset(qp, 0, sizeof(struct ipq));
951
952 /*
953 * Allocate memory for the IP header (plus 8 octets for ICMP).
954 */
955
956 ihlen = iph->ihl * 4;
957 qp->iph = (struct iphdr *) kmalloc(64 + 8, GFP_ATOMIC);
958 if (qp->iph == NULL)
959 {
960 NETDEBUG(printk("IP: create: no memory left !\n"));
961 kfree_s(qp, sizeof(struct ipq));
962 return(NULL);
963 }
964
965 memcpy(qp->iph, iph, ihlen + 8);
966 qp->len = 0;
967 qp->ihlen = ihlen;
968 qp->fragments = NULL;
969 qp->dev = dev;
970
971 /* Start a timer for this entry. */
972 qp->timer.expires = jiffies + IP_FRAG_TIME; /* about 30 seconds */
973 qp->timer.data = (unsigned long) qp; /* pointer to queue */
974 qp->timer.function = ip_expire; /* expire function */
975 add_timer(&qp->timer);
976
977 /* Add this entry to the queue. */
978 qp->prev = NULL;
979 cli();
980 qp->next = ipqueue;
981 if (qp->next != NULL)
982 qp->next->prev = qp;
983 ipqueue = qp;
984 sti();
985 return(qp);
986 }
987
988
989 /*
990 * See if a fragment queue is complete.
991 */
992
993 static int ip_done(struct ipq *qp)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
994 {
995 struct ipfrag *fp;
996 int offset;
997
998 /* Only possible if we received the final fragment. */
999 if (qp->len == 0)
1000 return(0);
1001
1002 /* Check all fragment offsets to see if they connect. */
1003 fp = qp->fragments;
1004 offset = 0;
1005 while (fp != NULL)
1006 {
1007 if (fp->offset > offset)
1008 return(0); /* fragment(s) missing */
1009 offset = fp->end;
1010 fp = fp->next;
1011 }
1012
1013 /* All fragments are present. */
1014 return(1);
1015 }
1016
1017
1018 /*
1019 * Build a new IP datagram from all its fragments.
1020 *
1021 * FIXME: We copy here because we lack an effective way of handling lists
1022 * of bits on input. Until the new skb data handling is in I'm not going
1023 * to touch this with a bargepole.
1024 */
1025
1026 static struct sk_buff *ip_glue(struct ipq *qp)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1027 {
1028 struct sk_buff *skb;
1029 struct iphdr *iph;
1030 struct ipfrag *fp;
1031 unsigned char *ptr;
1032 int count, len;
1033
1034 /*
1035 * Allocate a new buffer for the datagram.
1036 */
1037 len = qp->ihlen + qp->len;
1038
1039 if ((skb = dev_alloc_skb(len)) == NULL)
1040 {
1041 ip_statistics.IpReasmFails++;
1042 NETDEBUG(printk("IP: queue_glue: no memory for gluing queue %p\n", qp));
1043 ip_free(qp);
1044 return(NULL);
1045 }
1046
1047 /* Fill in the basic details. */
1048 skb_put(skb,len);
1049 skb->h.raw = skb->data;
1050 skb->free = 1;
1051
1052 /* Copy the original IP headers into the new buffer. */
1053 ptr = (unsigned char *) skb->h.raw;
1054 memcpy(ptr, ((unsigned char *) qp->iph), qp->ihlen);
1055 ptr += qp->ihlen;
1056
1057 count = 0;
1058
1059 /* Copy the data portions of all fragments into the new buffer. */
1060 fp = qp->fragments;
1061 while(fp != NULL)
1062 {
1063 if(count+fp->len > skb->len)
1064 {
1065 NETDEBUG(printk("Invalid fragment list: Fragment over size.\n"));
1066 ip_free(qp);
1067 kfree_skb(skb,FREE_WRITE);
1068 ip_statistics.IpReasmFails++;
1069 return NULL;
1070 }
1071 memcpy((ptr + fp->offset), fp->ptr, fp->len);
1072 count += fp->len;
1073 fp = fp->next;
1074 }
1075
1076 /* We glued together all fragments, so remove the queue entry. */
1077 ip_free(qp);
1078
1079 /* Done with all fragments. Fixup the new IP header. */
1080 iph = skb->h.iph;
1081 iph->frag_off = 0;
1082 iph->tot_len = htons((iph->ihl * 4) + count);
1083 skb->ip_hdr = iph;
1084
1085 ip_statistics.IpReasmOKs++;
1086 return(skb);
1087 }
1088
1089
1090 /*
1091 * Process an incoming IP datagram fragment.
1092 */
1093
1094 static struct sk_buff *ip_defrag(struct iphdr *iph, struct sk_buff *skb, struct device *dev)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1095 {
1096 struct ipfrag *prev, *next, *tmp;
1097 struct ipfrag *tfp;
1098 struct ipq *qp;
1099 struct sk_buff *skb2;
1100 unsigned char *ptr;
1101 int flags, offset;
1102 int i, ihl, end;
1103
1104 ip_statistics.IpReasmReqds++;
1105
1106 /* Find the entry of this IP datagram in the "incomplete datagrams" queue. */
1107 qp = ip_find(iph);
1108
1109 /* Is this a non-fragmented datagram? */
1110 offset = ntohs(iph->frag_off);
1111 flags = offset & ~IP_OFFSET;
1112 offset &= IP_OFFSET;
1113 if (((flags & IP_MF) == 0) && (offset == 0))
1114 {
1115 if (qp != NULL)
1116 ip_free(qp); /* Huh? How could this exist?? */
1117 return(skb);
1118 }
1119
1120 offset <<= 3; /* offset is in 8-byte chunks */
1121 ihl = iph->ihl * 4;
1122
1123 /*
1124 * If the queue already existed, keep restarting its timer as long
1125 * as we still are receiving fragments. Otherwise, create a fresh
1126 * queue entry.
1127 */
1128
1129 if (qp != NULL)
1130 {
1131 /* ANK. If the first fragment is received,
1132 * we should remember the correct IP header (with options)
1133 */
1134 if (offset == 0)
1135 {
1136 qp->ihlen = ihl;
1137 memcpy(qp->iph, iph, ihl+8);
1138 }
1139 del_timer(&qp->timer);
1140 qp->timer.expires = jiffies + IP_FRAG_TIME; /* about 30 seconds */
1141 qp->timer.data = (unsigned long) qp; /* pointer to queue */
1142 qp->timer.function = ip_expire; /* expire function */
1143 add_timer(&qp->timer);
1144 }
1145 else
1146 {
1147 /*
1148 * If we failed to create it, then discard the frame
1149 */
1150 if ((qp = ip_create(skb, iph, dev)) == NULL)
1151 {
1152 skb->sk = NULL;
1153 kfree_skb(skb, FREE_READ);
1154 ip_statistics.IpReasmFails++;
1155 return NULL;
1156 }
1157 }
1158
1159 /*
1160 * Determine the position of this fragment.
1161 */
1162
1163 end = offset + ntohs(iph->tot_len) - ihl;
1164
1165 /*
1166 * Point into the IP datagram 'data' part.
1167 */
1168
1169 ptr = skb->data + ihl;
1170
1171 /*
1172 * Is this the final fragment?
1173 */
1174
1175 if ((flags & IP_MF) == 0)
1176 qp->len = end;
1177
1178 /*
1179 * Find out which fragments are in front and at the back of us
1180 * in the chain of fragments so far. We must know where to put
1181 * this fragment, right?
1182 */
1183
1184 prev = NULL;
1185 for(next = qp->fragments; next != NULL; next = next->next)
1186 {
1187 if (next->offset > offset)
1188 break; /* bingo! */
1189 prev = next;
1190 }
1191
1192 /*
1193 * We found where to put this one.
1194 * Check for overlap with preceding fragment, and, if needed,
1195 * align things so that any overlaps are eliminated.
1196 */
1197 if (prev != NULL && offset < prev->end)
1198 {
1199 i = prev->end - offset;
1200 offset += i; /* ptr into datagram */
1201 ptr += i; /* ptr into fragment data */
1202 }
1203
1204 /*
1205 * Look for overlap with succeeding segments.
1206 * If we can merge fragments, do it.
1207 */
1208
1209 for(tmp=next; tmp != NULL; tmp = tfp)
1210 {
1211 tfp = tmp->next;
1212 if (tmp->offset >= end)
1213 break; /* no overlaps at all */
1214
1215 i = end - next->offset; /* overlap is 'i' bytes */
1216 tmp->len -= i; /* so reduce size of */
1217 tmp->offset += i; /* next fragment */
1218 tmp->ptr += i;
1219 /*
1220 * If we get a frag size of <= 0, remove it and the packet
1221 * that it goes with.
1222 */
1223 if (tmp->len <= 0)
1224 {
1225 if (tmp->prev != NULL)
1226 tmp->prev->next = tmp->next;
1227 else
1228 qp->fragments = tmp->next;
1229
1230 if (tfp->next != NULL)
1231 tmp->next->prev = tmp->prev;
1232
1233 next=tfp; /* We have killed the original next frame */
1234
1235 kfree_skb(tmp->skb,FREE_READ);
1236 kfree_s(tmp, sizeof(struct ipfrag));
1237 }
1238 }
1239
1240 /*
1241 * Insert this fragment in the chain of fragments.
1242 */
1243
1244 tfp = NULL;
1245 tfp = ip_frag_create(offset, end, skb, ptr);
1246
1247 /*
1248 * No memory to save the fragment - so throw the lot
1249 */
1250
1251 if (!tfp)
1252 {
1253 skb->sk = NULL;
1254 kfree_skb(skb, FREE_READ);
1255 return NULL;
1256 }
1257 tfp->prev = prev;
1258 tfp->next = next;
1259 if (prev != NULL)
1260 prev->next = tfp;
1261 else
1262 qp->fragments = tfp;
1263
1264 if (next != NULL)
1265 next->prev = tfp;
1266
1267 /*
1268 * OK, so we inserted this new fragment into the chain.
1269 * Check if we now have a full IP datagram which we can
1270 * bump up to the IP layer...
1271 */
1272
1273 if (ip_done(qp))
1274 {
1275 skb2 = ip_glue(qp); /* glue together the fragments */
1276 return(skb2);
1277 }
1278 return(NULL);
1279 }
1280
1281
1282 /*
1283 * This IP datagram is too large to be sent in one piece. Break it up into
1284 * smaller pieces (each of size equal to the MAC header plus IP header plus
1285 * a block of the data of the original IP data part) that will yet fit in a
1286 * single device frame, and queue such a frame for sending by calling the
1287 * ip_queue_xmit(). Note that this is recursion, and bad things will happen
1288 * if this function causes a loop...
1289 *
1290 * Yes this is inefficient, feel free to submit a quicker one.
1291 *
1292 */
1293
1294 static void ip_fragment(struct sock *sk, struct sk_buff *skb, struct device *dev, int is_frag)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1295 {
1296 struct iphdr *iph;
1297 unsigned char *raw;
1298 unsigned char *ptr;
1299 struct sk_buff *skb2;
1300 int left, mtu, hlen, len;
1301 int offset;
1302 unsigned long flags;
1303
1304 /*
1305 * Point into the IP datagram header.
1306 */
1307
1308 raw = skb->data;
1309 #if 0
1310 iph = (struct iphdr *) (raw + dev->hard_header_len);
1311 skb->ip_hdr = iph;
1312 #else
1313 iph = skb->ip_hdr;
1314 #endif
1315
1316 /*
1317 * Setup starting values.
1318 */
1319
1320 hlen = iph->ihl * 4;
1321 left = ntohs(iph->tot_len) - hlen; /* Space per frame */
1322 hlen += dev->hard_header_len; /* Total header size */
1323 mtu = (dev->mtu - hlen); /* Size of data space */
1324 ptr = (raw + hlen); /* Where to start from */
1325
1326 /*
1327 * Check for any "DF" flag. [DF means do not fragment]
1328 */
1329
1330 if (ntohs(iph->frag_off) & IP_DF)
1331 {
1332 ip_statistics.IpFragFails++;
1333 printk("ip_queue_xmit: frag needed\n");
1334 return;
1335 }
1336
1337 /*
1338 * The protocol doesn't seem to say what to do in the case that the
1339 * frame + options doesn't fit the mtu. As it used to fall down dead
1340 * in this case we were fortunate it didn't happen
1341 */
1342
1343 if(mtu<8)
1344 {
1345 /* It's wrong but it's better than nothing */
1346 icmp_send(skb,ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED,dev->mtu, dev);
1347 ip_statistics.IpFragFails++;
1348 return;
1349 }
1350
1351 /*
1352 * Fragment the datagram.
1353 */
1354
1355 /*
1356 * The initial offset is 0 for a complete frame. When
1357 * fragmenting fragments it's wherever this one starts.
1358 */
1359
1360 if (is_frag & 2)
1361 offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
1362 else
1363 offset = 0;
1364
1365
1366 /*
1367 * Keep copying data until we run out.
1368 */
1369
1370 while(left > 0)
1371 {
1372 len = left;
1373 /* IF: it doesn't fit, use 'mtu' - the data space left */
1374 if (len > mtu)
1375 len = mtu;
1376 /* IF: we are not sending upto and including the packet end
1377 then align the next start on an eight byte boundary */
1378 if (len < left)
1379 {
1380 len/=8;
1381 len*=8;
1382 }
1383 /*
1384 * Allocate buffer.
1385 */
1386
1387 if ((skb2 = alloc_skb(len + hlen+15,GFP_ATOMIC)) == NULL)
1388 {
1389 NETDEBUG(printk("IP: frag: no memory for new fragment!\n"));
1390 ip_statistics.IpFragFails++;
1391 return;
1392 }
1393
1394 /*
1395 * Set up data on packet
1396 */
1397
1398 skb2->arp = skb->arp;
1399 if(skb->free==0)
1400 printk("IP fragmenter: BUG free!=1 in fragmenter\n");
1401 skb2->free = 1;
1402 skb_put(skb2,len + hlen);
1403 skb2->h.raw=(char *) skb2->data;
1404 /*
1405 * Charge the memory for the fragment to any owner
1406 * it might possess
1407 */
1408
1409 save_flags(flags);
1410 if (sk)
1411 {
1412 cli();
1413 sk->wmem_alloc += skb2->truesize;
1414 skb2->sk=sk;
1415 }
1416 restore_flags(flags);
1417 skb2->raddr = skb->raddr; /* For rebuild_header - must be here */
1418
1419 /*
1420 * Copy the packet header into the new buffer.
1421 */
1422
1423 memcpy(skb2->h.raw, raw, hlen);
1424
1425 /*
1426 * Copy a block of the IP datagram.
1427 */
1428 memcpy(skb2->h.raw + hlen, ptr, len);
1429 left -= len;
1430
1431 skb2->h.raw+=dev->hard_header_len;
1432
1433 /*
1434 * Fill in the new header fields.
1435 */
1436 iph = (struct iphdr *)(skb2->h.raw/*+dev->hard_header_len*/);
1437 iph->frag_off = htons((offset >> 3));
1438 skb2->ip_hdr = iph;
1439
1440 /* ANK: dirty, but effective trick. Upgrade options only if
1441 * the segment to be fragmented was THE FIRST (otherwise,
1442 * options are already fixed) and make it ONCE
1443 * on the initial skb, so that all the following fragments
1444 * will inherit fixed options.
1445 */
1446 if (offset == 0)
1447 ip_options_fragment(skb);
1448
1449 /*
1450 * Added AC : If we are fragmenting a fragment thats not the
1451 * last fragment then keep MF on each bit
1452 */
1453 if (left > 0 || (is_frag & 1))
1454 iph->frag_off |= htons(IP_MF);
1455 ptr += len;
1456 offset += len;
1457
1458 /*
1459 * Put this fragment into the sending queue.
1460 */
1461
1462 ip_statistics.IpFragCreates++;
1463
1464 ip_queue_xmit(sk, dev, skb2, 2);
1465 }
1466 ip_statistics.IpFragOKs++;
1467 }
1468
1469
1470
1471 #ifdef CONFIG_IP_FORWARD
1472 #ifdef CONFIG_IP_MROUTE
1473
1474 /*
1475 * Encapsulate a packet by attaching a valid IPIP header to it.
1476 * This avoids tunnel drivers and other mess and gives us the speed so
1477 * important for multicast video.
1478 */
1479
1480 static void ip_encap(struct sk_buff *skb, int len, struct device *out, __u32 daddr)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1481 {
1482 /*
1483 * There is space for the IPIP header and MAC left.
1484 *
1485 * Firstly push down and install the IPIP header.
1486 */
1487 struct iphdr *iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
1488 if(len>65515)
1489 len=65515;
1490 iph->version = 4;
1491 iph->tos = skb->ip_hdr->tos;
1492 iph->ttl = skb->ip_hdr->ttl;
1493 iph->frag_off = 0;
1494 iph->daddr = daddr;
1495 iph->saddr = out->pa_addr;
1496 iph->protocol = IPPROTO_IPIP;
1497 iph->ihl = 5;
1498 iph->tot_len = htons(skb->len);
1499 iph->id = htons(ip_id_count++);
1500 ip_send_check(iph);
1501
1502 skb->dev = out;
1503 skb->arp = 1;
1504 skb->raddr=daddr;
1505 /*
1506 * Now add the physical header (driver will push it down).
1507 */
1508 if (out->hard_header && out->hard_header(skb, out, ETH_P_IP, NULL, NULL, len)<0)
1509 skb->arp=0;
1510 /*
1511 * Read to queue for transmission.
1512 */
1513 }
1514
1515 #endif
1516
1517 /*
1518 * Forward an IP datagram to its next destination.
1519 */
1520
1521 int ip_forward(struct sk_buff *skb, struct device *dev, int is_frag,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1522 __u32 target_addr)
1523 {
1524 struct device *dev2; /* Output device */
1525 struct iphdr *iph; /* Our header */
1526 struct sk_buff *skb2; /* Output packet */
1527 struct rtable *rt; /* Route we use */
1528 unsigned char *ptr; /* Data pointer */
1529 unsigned long raddr; /* Router IP address */
1530 struct options * opt = (struct options*)skb->proto_priv;
1531 int encap = 0; /* Encap length */
1532 #ifdef CONFIG_IP_FIREWALL
1533 int fw_res = 0; /* Forwarding result */
1534 #ifdef CONFIG_IP_MASQUERADE
1535 struct sk_buff *skb_in = skb; /* So we can remember if the masquerader did some swaps */
1536 #endif
1537
1538 /*
1539 * See if we are allowed to forward this.
1540 * Note: demasqueraded fragments are always 'back'warded.
1541 */
1542
1543
1544 if(!(is_frag&4))
1545 {
1546 fw_res=ip_fw_chk(skb->h.iph, dev, ip_fw_fwd_chain, ip_fw_fwd_policy, 0);
1547 switch (fw_res) {
1548 case FW_ACCEPT:
1549 #ifdef CONFIG_IP_MASQUERADE
1550 case FW_MASQUERADE:
1551 #endif
1552 break;
1553 case FW_REJECT:
1554 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, dev);
1555 /* fall thru */
1556 default:
1557 return -1;
1558 }
1559 }
1560 #endif
1561 /*
1562 * According to the RFC, we must first decrease the TTL field. If
1563 * that reaches zero, we must reply an ICMP control message telling
1564 * that the packet's lifetime expired.
1565 *
1566 * Exception:
1567 * We may not generate an ICMP for an ICMP. icmp_send does the
1568 * enforcement of this so we can forget it here. It is however
1569 * sometimes VERY important.
1570 */
1571
1572 iph = skb->h.iph;
1573 iph->ttl--;
1574
1575 /*
1576 * Re-compute the IP header checksum.
1577 * This is inefficient. We know what has happened to the header
1578 * and could thus adjust the checksum as Phil Karn does in KA9Q
1579 */
1580
1581 iph->check = ntohs(iph->check) + 0x0100;
1582 if ((iph->check & 0xFF00) == 0)
1583 iph->check++; /* carry overflow */
1584 iph->check = htons(iph->check);
1585
1586 if (iph->ttl <= 0)
1587 {
1588 /* Tell the sender its packet died... */
1589 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0, dev);
1590 return -1;
1591 }
1592
1593 #ifdef CONFIG_IP_MROUTE
1594 if(!(is_frag&8))
1595 {
1596 #endif
1597 /*
1598 * OK, the packet is still valid. Fetch its destination address,
1599 * and give it to the IP sender for further processing.
1600 */
1601
1602 rt = ip_rt_route(target_addr, NULL, NULL);
1603 if (rt == NULL)
1604 {
1605 /*
1606 * Tell the sender its packet cannot be delivered. Again
1607 * ICMP is screened later.
1608 */
1609 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, 0, dev);
1610 return -1;
1611 }
1612
1613
1614 /*
1615 * Gosh. Not only is the packet valid; we even know how to
1616 * forward it onto its final destination. Can we say this
1617 * is being plain lucky?
1618 * If the router told us that there is no GW, use the dest.
1619 * IP address itself- we seem to be connected directly...
1620 */
1621
1622 raddr = rt->rt_gateway;
1623
1624 if (raddr != 0)
1625 {
1626 /*
1627 * Strict routing permits no gatewaying
1628 */
1629
1630 if (opt->is_strictroute)
1631 {
1632 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0, dev);
1633 return -1;
1634 }
1635
1636 /*
1637 * There is a gateway so find the correct route for it.
1638 * Gateways cannot in turn be gatewayed.
1639 */
1640 }
1641 else
1642 raddr = target_addr;
1643
1644 /*
1645 * Having picked a route we can now send the frame out.
1646 */
1647
1648 dev2 = rt->rt_dev;
1649 /*
1650 * In IP you never have to forward a frame on the interface that it
1651 * arrived upon. We now generate an ICMP HOST REDIRECT giving the route
1652 * we calculated.
1653 */
1654 #ifndef CONFIG_IP_NO_ICMP_REDIRECT
1655 if (dev == dev2 && !((iph->saddr^iph->daddr)&dev->pa_mask) &&
1656 (rt->rt_flags&RTF_MODIFIED) && !opt->srr)
1657 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, raddr, dev);
1658 #endif
1659 #ifdef CONFIG_IP_MROUTE
1660 }
1661 else
1662 {
1663 /*
1664 * Multicast route forward. Routing is already done
1665 */
1666 dev2=skb->dev;
1667 raddr=skb->raddr;
1668 if(is_frag&16) /* VIFF_TUNNEL mode */
1669 encap=20;
1670 }
1671 #endif
1672
1673
1674 /*
1675 * We now may allocate a new buffer, and copy the datagram into it.
1676 * If the indicated interface is up and running, kick it.
1677 */
1678
1679 if (dev2->flags & IFF_UP)
1680 {
1681 #ifdef CONFIG_IP_MASQUERADE
1682 /*
1683 * If this fragment needs masquerading, make it so...
1684 * (Dont masquerade de-masqueraded fragments)
1685 */
1686 if (!(is_frag&4) && fw_res==FW_MASQUERADE)
1687 ip_fw_masquerade(&skb, dev2);
1688 #endif
1689 IS_SKB(skb);
1690
1691 if (skb->len+encap > dev2->mtu && (ntohs(iph->frag_off) & IP_DF)) {
1692 ip_statistics.IpFragFails++;
1693 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, dev2->mtu, dev);
1694 return -1;
1695 }
1696
1697 #ifdef CONFIG_IP_MROUTE
1698 if(skb_headroom(skb)-encap<dev2->hard_header_len)
1699 {
1700 skb2 = alloc_skb(dev2->hard_header_len + skb->len + encap + 15, GFP_ATOMIC);
1701 #else
1702 if(skb_headroom(skb)<dev2->hard_header_len)
1703 {
1704 skb2 = alloc_skb(dev2->hard_header_len + skb->len + 15, GFP_ATOMIC);
1705 #endif
1706 /*
1707 * This is rare and since IP is tolerant of network failures
1708 * quite harmless.
1709 */
1710
1711 if (skb2 == NULL)
1712 {
1713 NETDEBUG(printk("\nIP: No memory available for IP forward\n"));
1714 return -1;
1715 }
1716
1717 IS_SKB(skb2);
1718 /*
1719 * Add the physical headers.
1720 */
1721 #ifdef CONFIG_IP_MROUTE
1722 if(is_frag&16)
1723 {
1724 skb_reserve(skb,(encap+dev->hard_header_len+15)&~15); /* 16 byte aligned IP headers are good */
1725 ip_encap(skb2,skb->len, dev2, raddr);
1726 }
1727 else
1728 #endif
1729 ip_send(skb2,raddr,skb->len,dev2,dev2->pa_addr);
1730
1731 /*
1732 * We have to copy the bytes over as the new header wouldn't fit
1733 * the old buffer. This should be very rare.
1734 */
1735
1736 ptr = skb_put(skb2,skb->len);
1737 skb2->free = 1;
1738 skb2->h.raw = ptr;
1739
1740 /*
1741 * Copy the packet data into the new buffer.
1742 */
1743 memcpy(ptr, skb->h.raw, skb->len);
1744 memcpy(skb2->proto_priv, skb->proto_priv, sizeof(skb->proto_priv));
1745 iph = skb2->ip_hdr = skb2->h.iph;
1746 }
1747 else
1748 {
1749 /*
1750 * Build a new MAC header.
1751 */
1752
1753 skb2 = skb;
1754 skb2->dev=dev2;
1755 #ifdef CONFIG_IP_MROUTE
1756 if(is_frag&16)
1757 ip_encap(skb,skb->len, dev2, raddr);
1758 else
1759 {
1760 #endif
1761 skb->arp=1;
1762 skb->raddr=raddr;
1763 if(dev2->hard_header)
1764 {
1765 if(dev2->hard_header(skb, dev2, ETH_P_IP, NULL, NULL, skb->len)<0)
1766 skb->arp=0;
1767 }
1768 #ifdef CONFIG_IP_MROUTE
1769 }
1770 #endif
1771 ip_statistics.IpForwDatagrams++;
1772 }
1773
1774 if (opt->optlen) {
1775 unsigned char * optptr;
1776 if (opt->rr_needaddr) {
1777 optptr = (unsigned char *)iph + opt->rr;
1778 memcpy(&optptr[optptr[2]-5], &dev2->pa_addr, 4);
1779 opt->is_changed = 1;
1780 }
1781 if (opt->srr_is_hit) {
1782 int srrptr, srrspace;
1783
1784 optptr = (unsigned char *)iph + opt->srr;
1785
1786 for ( srrptr=optptr[2], srrspace = optptr[1];
1787 srrptr <= srrspace;
1788 srrptr += 4
1789 ) {
1790 if (srrptr + 3 > srrspace)
1791 break;
1792 if (memcmp(&target_addr, &optptr[srrptr-1], 4) == 0)
1793 break;
1794 }
1795 if (srrptr + 3 <= srrspace) {
1796 opt->is_changed = 1;
1797 memcpy(&optptr[srrptr-1], &dev2->pa_addr, 4);
1798 iph->daddr = target_addr;
1799 optptr[2] = srrptr+4;
1800 } else
1801 printk("ip_forward(): Argh! Destination lost!\n");
1802 }
1803 if (opt->ts_needaddr) {
1804 optptr = (unsigned char *)iph + opt->ts;
1805 memcpy(&optptr[optptr[2]-9], &dev2->pa_addr, 4);
1806 opt->is_changed = 1;
1807 }
1808 if (opt->is_changed) {
1809 opt->is_changed = 0;
1810 ip_send_check(iph);
1811 }
1812 }
1813 /*
1814 * ANK: this is point of "no return", we cannot send an ICMP,
1815 * because we changed SRR option.
1816 */
1817
1818 /*
1819 * See if it needs fragmenting. Note in ip_rcv we tagged
1820 * the fragment type. This must be right so that
1821 * the fragmenter does the right thing.
1822 */
1823
1824 if(skb2->len > dev2->mtu + dev2->hard_header_len)
1825 {
1826 ip_fragment(NULL,skb2,dev2, is_frag);
1827 kfree_skb(skb2,FREE_WRITE);
1828 }
1829 else
1830 {
1831 #ifdef CONFIG_IP_ACCT
1832 /*
1833 * Count mapping we shortcut
1834 */
1835
1836 ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
1837 #endif
1838
1839 /*
1840 * Map service types to priority. We lie about
1841 * throughput being low priority, but it's a good
1842 * choice to help improve general usage.
1843 */
1844 if(iph->tos & IPTOS_LOWDELAY)
1845 dev_queue_xmit(skb2, dev2, SOPRI_INTERACTIVE);
1846 else if(iph->tos & IPTOS_THROUGHPUT)
1847 dev_queue_xmit(skb2, dev2, SOPRI_BACKGROUND);
1848 else
1849 dev_queue_xmit(skb2, dev2, SOPRI_NORMAL);
1850 }
1851 }
1852 else
1853 return -1;
1854
1855 /*
1856 * Tell the caller if their buffer is free.
1857 */
1858
1859 if(skb==skb2)
1860 return 0;
1861
1862 #ifdef CONFIG_IP_MASQUERADE
1863 /*
1864 * The original is free. Free our copy and
1865 * tell the caller not to free.
1866 */
1867 if(skb!=skb_in)
1868 {
1869 kfree_skb(skb_in, FREE_WRITE);
1870 return 0;
1871 }
1872 #endif
1873 return 1;
1874 }
1875
1876
1877 #endif
1878
1879
1880 /*
1881 * This function receives all incoming IP datagrams.
1882 *
1883 * On entry skb->data points to the start of the IP header and
1884 * the MAC header has been removed.
1885 */
1886
1887 int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
1888 {
1889 struct iphdr *iph = skb->h.iph;
1890 struct sock *raw_sk=NULL;
1891 unsigned char hash;
1892 unsigned char flag = 0;
1893 struct inet_protocol *ipprot;
1894 int brd=IS_MYADDR;
1895 struct options * opt = NULL;
1896 int is_frag=0;
1897 #ifdef CONFIG_IP_FIREWALL
1898 int err;
1899 #endif
1900 #ifdef CONFIG_IP_MROUTE
1901 int mroute_pkt=0;
1902 #endif
1903
1904 #ifdef CONFIG_NET_IPV6
1905 /*
1906 * Intercept IPv6 frames. We dump ST-II and invalid types just below..
1907 */
1908
1909 if(iph->version == 6)
1910 return ipv6_rcv(skb,dev,pt);
1911 #endif
1912
1913 ip_statistics.IpInReceives++;
1914
1915 /*
1916 * Tag the ip header of this packet so we can find it
1917 */
1918
1919 skb->ip_hdr = iph;
1920
1921 /*
1922 * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum.
1923 * RFC1122: 3.1.2.3 MUST discard a frame with invalid source address [NEEDS FIXING].
1924 *
1925 * Is the datagram acceptable?
1926 *
1927 * 1. Length at least the size of an ip header
1928 * 2. Version of 4
1929 * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums]
1930 * 4. Doesn't have a bogus length
1931 * (5. We ought to check for IP multicast addresses and undefined types.. does this matter ?)
1932 */
1933
1934 if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4 || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0
1935 || skb->len < ntohs(iph->tot_len))
1936 {
1937 ip_statistics.IpInHdrErrors++;
1938 kfree_skb(skb, FREE_WRITE);
1939 return(0);
1940 }
1941
1942 /*
1943 * Our transport medium may have padded the buffer out. Now we know it
1944 * is IP we can trim to the true length of the frame.
1945 * Note this now means skb->len holds ntohs(iph->tot_len).
1946 */
1947
1948 skb_trim(skb,ntohs(iph->tot_len));
1949
1950 if (iph->ihl > 5) {
1951 skb->ip_summed = 0;
1952 if (ip_options_compile(NULL, skb))
1953 return(0);
1954 opt = (struct options*)skb->proto_priv;
1955 #ifdef CONFIG_IP_NOSR
1956 if (opt->srr) {
1957 kfree_skb(skb, FREE_READ);
1958 return -EINVAL;
1959 }
1960 #endif
1961 }
1962
1963 /*
1964 * See if the firewall wants to dispose of the packet.
1965 */
1966
1967 #ifdef CONFIG_IP_FIREWALL
1968
1969 if ((err=ip_fw_chk(iph,dev,ip_fw_blk_chain,ip_fw_blk_policy, 0))<FW_ACCEPT)
1970 {
1971 if(err==FW_REJECT)
1972 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0, dev);
1973 kfree_skb(skb, FREE_WRITE);
1974 return 0;
1975 }
1976
1977 #endif
1978
1979 /*
1980 * Remember if the frame is fragmented.
1981 */
1982
1983 if(iph->frag_off)
1984 {
1985 if (iph->frag_off & htons(IP_MF))
1986 is_frag|=1;
1987 /*
1988 * Last fragment ?
1989 */
1990
1991 if (iph->frag_off & htons(IP_OFFSET))
1992 is_frag|=2;
1993 }
1994
1995 /*
1996 * Do any IP forwarding required. chk_addr() is expensive -- avoid it someday.
1997 *
1998 * This is inefficient. While finding out if it is for us we could also compute
1999 * the routing table entry. This is where the great unified cache theory comes
2000 * in as and when someone implements it
2001 *
2002 * For most hosts over 99% of packets match the first conditional
2003 * and don't go via ip_chk_addr. Note: brd is set to IS_MYADDR at
2004 * function entry.
2005 */
2006
2007 if ( iph->daddr == skb->dev->pa_addr || (brd = ip_chk_addr(iph->daddr)) != 0)
2008 {
2009 if (opt && opt->srr) {
2010 int srrspace, srrptr;
2011 __u32 nexthop;
2012 unsigned char * optptr = ((unsigned char *)iph) + opt->srr;
2013
2014 if (brd != IS_MYADDR || skb->pkt_type != PACKET_HOST) {
2015 kfree_skb(skb, FREE_WRITE);
2016 return 0;
2017 }
2018
2019 for ( srrptr=optptr[2], srrspace = optptr[1];
2020 srrptr <= srrspace;
2021 srrptr += 4
2022 )
2023 {
2024 int brd2;
2025 if (srrptr + 3 > srrspace)
2026 {
2027 icmp_send(skb, ICMP_PARAMETERPROB, 0, opt->srr+2,
2028 skb->dev);
2029 kfree_skb(skb, FREE_WRITE);
2030 return 0;
2031 }
2032 memcpy(&nexthop, &optptr[srrptr-1], 4);
2033 if ((brd2 = ip_chk_addr(nexthop)) == 0)
2034 break;
2035 if (brd2 != IS_MYADDR)
2036 {
2037
2038 /*
2039 * ANK: should we implement weak tunneling of multicasts?
2040 * Are they obsolete? DVMRP specs (RFC-1075) is old enough...
2041 * [They are obsolete]
2042 */
2043 kfree_skb(skb, FREE_WRITE);
2044 return -EINVAL;
2045 }
2046 }
2047 if (srrptr <= srrspace)
2048 {
2049 opt->srr_is_hit = 1;
2050 opt->is_changed = 1;
2051 #ifdef CONFIG_IP_FORWARD
2052 if (ip_forward(skb, dev, is_frag, nexthop))
2053 kfree_skb(skb, FREE_WRITE);
2054 #else
2055 ip_statistics.IpInAddrErrors++;
2056 kfree_skb(skb, FREE_WRITE);
2057 #endif
2058 return 0;
2059 }
2060 }
2061
2062 #ifdef CONFIG_IP_MULTICAST
2063 if(!(dev->flags&IFF_ALLMULTI) && brd==IS_MULTICAST && iph->daddr!=IGMP_ALL_HOSTS && !(dev->flags&IFF_LOOPBACK))
2064 {
2065 /*
2066 * Check it is for one of our groups
2067 */
2068 struct ip_mc_list *ip_mc=dev->ip_mc_list;
2069 do
2070 {
2071 if(ip_mc==NULL)
2072 {
2073 kfree_skb(skb, FREE_WRITE);
2074 return 0;
2075 }
2076 if(ip_mc->multiaddr==iph->daddr)
2077 break;
2078 ip_mc=ip_mc->next;
2079 }
2080 while(1);
2081 }
2082 #endif
2083
2084 #ifdef CONFIG_IP_MASQUERADE
2085 /*
2086 * Do we need to de-masquerade this fragment?
2087 */
2088 if (ip_fw_demasquerade(skb))
2089 {
2090 struct iphdr *iph=skb->h.iph;
2091 if (ip_forward(skb, dev, is_frag|4, iph->daddr))
2092 kfree_skb(skb, FREE_WRITE);
2093 return(0);
2094 }
2095 #endif
2096
2097 /*
2098 * Account for the packet
2099 */
2100
2101 #ifdef CONFIG_IP_ACCT
2102 ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
2103 #endif
2104
2105 /*
2106 * Reassemble IP fragments.
2107 */
2108
2109 if(is_frag)
2110 {
2111 /* Defragment. Obtain the complete packet if there is one */
2112 skb=ip_defrag(iph,skb,dev);
2113 if(skb==NULL)
2114 return 0;
2115 skb->dev = dev;
2116 iph=skb->h.iph;
2117 }
2118
2119 /*
2120 * Point into the IP datagram, just past the header.
2121 */
2122
2123 skb->ip_hdr = iph;
2124 skb->h.raw += iph->ihl*4;
2125
2126 #ifdef CONFIG_IP_MROUTE
2127 /*
2128 * Check the state on multicast routing (multicast and not 224.0.0.z)
2129 */
2130
2131 if(brd==IS_MULTICAST && (iph->daddr&htonl(0xFFFFFF00))!=htonl(0xE0000000))
2132 mroute_pkt=1;
2133
2134 #endif
2135 /*
2136 * Deliver to raw sockets. This is fun as to avoid copies we want to make no surplus copies.
2137 *
2138 * RFC 1122: SHOULD pass TOS value up to the transport layer.
2139 */
2140
2141 hash = iph->protocol & (SOCK_ARRAY_SIZE-1);
2142
2143 /*
2144 * If there maybe a raw socket we must check - if not we don't care less
2145 */
2146
2147 if((raw_sk=raw_prot.sock_array[hash])!=NULL)
2148 {
2149 struct sock *sknext=NULL;
2150 struct sk_buff *skb1;
2151 raw_sk=get_sock_raw(raw_sk, iph->protocol, iph->saddr, iph->daddr);
2152 if(raw_sk) /* Any raw sockets */
2153 {
2154 do
2155 {
2156 /* Find the next */
2157 sknext=get_sock_raw(raw_sk->next, iph->protocol, iph->saddr, iph->daddr);
2158 if(sknext)
2159 skb1=skb_clone(skb, GFP_ATOMIC);
2160 else
2161 break; /* One pending raw socket left */
2162 if(skb1)
2163 raw_rcv(raw_sk, skb1, dev, iph->saddr,iph->daddr);
2164 raw_sk=sknext;
2165 }
2166 while(raw_sk!=NULL);
2167
2168 /*
2169 * Here either raw_sk is the last raw socket, or NULL if none
2170 */
2171
2172 /*
2173 * We deliver to the last raw socket AFTER the protocol checks as it avoids a surplus copy
2174 */
2175 }
2176 }
2177
2178 /*
2179 * skb->h.raw now points at the protocol beyond the IP header.
2180 */
2181
2182 hash = iph->protocol & (MAX_INET_PROTOS -1);
2183 for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next)
2184 {
2185 struct sk_buff *skb2;
2186
2187 if (ipprot->protocol != iph->protocol)
2188 continue;
2189 /*
2190 * See if we need to make a copy of it. This will
2191 * only be set if more than one protocol wants it.
2192 * and then not for the last one. If there is a pending
2193 * raw delivery wait for that
2194 */
2195
2196 #ifdef CONFIG_IP_MROUTE
2197 if (ipprot->copy || raw_sk || mroute_pkt)
2198 #else
2199 if (ipprot->copy || raw_sk)
2200 #endif
2201 {
2202 skb2 = skb_clone(skb, GFP_ATOMIC);
2203 if(skb2==NULL)
2204 continue;
2205 }
2206 else
2207 {
2208 skb2 = skb;
2209 }
2210 flag = 1;
2211
2212 /*
2213 * Pass on the datagram to each protocol that wants it,
2214 * based on the datagram protocol. We should really
2215 * check the protocol handler's return values here...
2216 */
2217
2218 ipprot->handler(skb2, dev, opt, iph->daddr,
2219 (ntohs(iph->tot_len) - (iph->ihl * 4)),
2220 iph->saddr, 0, ipprot);
2221 }
2222
2223 /*
2224 * All protocols checked.
2225 * If this packet was a broadcast, we may *not* reply to it, since that
2226 * causes (proven, grin) ARP storms and a leakage of memory (i.e. all
2227 * ICMP reply messages get queued up for transmission...)
2228 */
2229
2230 #ifdef CONFIG_IP_MROUTE
2231 /*
2232 * Forward the last copy to the multicast router. If
2233 * there is a pending raw deliery however make a copy
2234 * and forward that.
2235 */
2236
2237 if(mroute_pkt)
2238 {
2239 flag=1;
2240 if(raw_sk==NULL)
2241 ipmr_forward(skb, is_frag);
2242 else
2243 {
2244 struct sk_buff *skb2=skb_clone(skb, GFP_ATOMIC);
2245 if(skb2)
2246 {
2247 skb2->free=1;
2248 ipmr_forward(skb2, is_frag);
2249 }
2250 }
2251 }
2252 #endif
2253
2254 if(raw_sk!=NULL) /* Shift to last raw user */
2255 raw_rcv(raw_sk, skb, dev, iph->saddr, iph->daddr);
2256 else if (!flag) /* Free and report errors */
2257 {
2258 if (brd != IS_BROADCAST && brd!=IS_MULTICAST)
2259 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0, dev);
2260 kfree_skb(skb, FREE_WRITE);
2261 }
2262
2263 return(0);
2264 }
2265
2266 /*
2267 * Do any unicast IP forwarding required.
2268 */
2269
2270 /*
2271 * Don't forward multicast or broadcast frames.
2272 */
2273
2274 if(skb->pkt_type!=PACKET_HOST || brd==IS_BROADCAST)
2275 {
2276 kfree_skb(skb,FREE_WRITE);
2277 return 0;
2278 }
2279
2280 /*
2281 * The packet is for another target. Forward the frame
2282 */
2283
2284 #ifdef CONFIG_IP_FORWARD
2285 if (opt && opt->is_strictroute) {
2286 icmp_send(skb, ICMP_PARAMETERPROB, 0, 16, skb->dev);
2287 kfree_skb(skb, FREE_WRITE);
2288 return -1;
2289 }
2290 if (ip_forward(skb, dev, is_frag, iph->daddr))
2291 kfree_skb(skb, FREE_WRITE);
2292 #else
2293 /* printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n",
2294 iph->saddr,iph->daddr);*/
2295 ip_statistics.IpInAddrErrors++;
2296 kfree_skb(skb, FREE_WRITE);
2297 #endif
2298 return(0);
2299 }
2300
2301
2302 /*
2303 * Loop a packet back to the sender.
2304 */
2305
2306 static void ip_loopback(struct device *old_dev, struct sk_buff *skb)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
2307 {
2308 struct device *dev=&loopback_dev;
2309 int len=ntohs(skb->ip_hdr->tot_len);
2310 struct sk_buff *newskb=dev_alloc_skb(len+dev->hard_header_len+15);
2311
2312 if(newskb==NULL)
2313 return;
2314
2315 newskb->link3=NULL;
2316 newskb->sk=NULL;
2317 newskb->dev=dev;
2318 newskb->saddr=skb->saddr;
2319 newskb->daddr=skb->daddr;
2320 newskb->raddr=skb->raddr;
2321 newskb->free=1;
2322 newskb->lock=0;
2323 newskb->users=0;
2324 newskb->pkt_type=skb->pkt_type;
2325
2326 /*
2327 * Put a MAC header on the packet
2328 */
2329 ip_send(newskb, skb->ip_hdr->daddr, len, dev, skb->ip_hdr->saddr);
2330 /*
2331 * Add the rest of the data space.
2332 */
2333 newskb->ip_hdr=(struct iphdr *)skb_put(newskb, len);
2334 memcpy(newskb->proto_priv, skb->proto_priv, sizeof(skb->proto_priv));
2335
2336 /*
2337 * Copy the data
2338 */
2339 memcpy(newskb->ip_hdr,skb->ip_hdr,len);
2340
2341 /* Recurse. The device check against IFF_LOOPBACK will stop infinite recursion */
2342
2343 /*printk("Loopback output queued [%lX to %lX].\n", newskb->ip_hdr->saddr,newskb->ip_hdr->daddr);*/
2344 ip_queue_xmit(NULL, dev, newskb, 1);
2345 }
2346
2347
2348 /*
2349 * Queues a packet to be sent, and starts the transmitter
2350 * if necessary. if free = 1 then we free the block after
2351 * transmit, otherwise we don't. If free==2 we not only
2352 * free the block but also don't assign a new ip seq number.
2353 * This routine also needs to put in the total length,
2354 * and compute the checksum
2355 */
2356
2357 void ip_queue_xmit(struct sock *sk, struct device *dev,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
2358 struct sk_buff *skb, int free)
2359 {
2360 struct iphdr *iph;
2361 /* unsigned char *ptr;*/
2362
2363 /* Sanity check */
2364 if (dev == NULL)
2365 {
2366 NETDEBUG(printk("IP: ip_queue_xmit dev = NULL\n"));
2367 return;
2368 }
2369
2370 IS_SKB(skb);
2371
2372 /*
2373 * Do some book-keeping in the packet for later
2374 */
2375
2376
2377 skb->dev = dev;
2378 skb->when = jiffies;
2379
2380 /*
2381 * Find the IP header and set the length. This is bad
2382 * but once we get the skb data handling code in the
2383 * hardware will push its header sensibly and we will
2384 * set skb->ip_hdr to avoid this mess and the fixed
2385 * header length problem
2386 */
2387
2388 iph = skb->ip_hdr;
2389 iph->tot_len = ntohs(skb->len-(((unsigned char *)iph)-skb->data));
2390
2391 #ifdef CONFIG_IP_FIREWALL
2392 if(ip_fw_chk(iph, dev, ip_fw_blk_chain, ip_fw_blk_policy, 0) < FW_ACCEPT)
2393 /* just don't send this packet */
2394 return;
2395 #endif
2396
2397 /*
2398 * No reassigning numbers to fragments...
2399 */
2400
2401 if(free!=2)
2402 iph->id = htons(ip_id_count++);
2403 else
2404 free=1;
2405
2406 /* All buffers without an owner socket get freed */
2407 if (sk == NULL)
2408 free = 1;
2409
2410 skb->free = free;
2411
2412 /*
2413 * Do we need to fragment. Again this is inefficient.
2414 * We need to somehow lock the original buffer and use
2415 * bits of it.
2416 */
2417
2418 if(ntohs(iph->tot_len)> dev->mtu)
2419 {
2420 ip_fragment(sk,skb,dev,0);
2421 IS_SKB(skb);
2422 kfree_skb(skb,FREE_WRITE);
2423 return;
2424 }
2425
2426 /*
2427 * Add an IP checksum
2428 */
2429
2430 ip_send_check(iph);
2431
2432 /*
2433 * Print the frame when debugging
2434 */
2435
2436 /*
2437 * More debugging. You cannot queue a packet already on a list
2438 * Spot this and moan loudly.
2439 */
2440 if (skb->next != NULL)
2441 {
2442 NETDEBUG(printk("ip_queue_xmit: next != NULL\n"));
2443 skb_unlink(skb);
2444 }
2445
2446 /*
2447 * If a sender wishes the packet to remain unfreed
2448 * we add it to his send queue. This arguably belongs
2449 * in the TCP level since nobody else uses it. BUT
2450 * remember IPng might change all the rules.
2451 */
2452
2453 if (!free)
2454 {
2455 unsigned long flags;
2456 /* The socket now has more outstanding blocks */
2457
2458 sk->packets_out++;
2459
2460 /* Protect the list for a moment */
2461 save_flags(flags);
2462 cli();
2463
2464 if (skb->link3 != NULL)
2465 {
2466 NETDEBUG(printk("ip.c: link3 != NULL\n"));
2467 skb->link3 = NULL;
2468 }
2469 if (sk->send_head == NULL)
2470 {
2471 sk->send_tail = skb;
2472 sk->send_head = skb;
2473 }
2474 else
2475 {
2476 sk->send_tail->link3 = skb;
2477 sk->send_tail = skb;
2478 }
2479 /* skb->link3 is NULL */
2480
2481 /* Interrupt restore */
2482 restore_flags(flags);
2483 }
2484 else
2485 /* Remember who owns the buffer */
2486 skb->sk = sk;
2487
2488 /*
2489 * If the indicated interface is up and running, send the packet.
2490 */
2491
2492 ip_statistics.IpOutRequests++;
2493 #ifdef CONFIG_IP_ACCT
2494 ip_fw_chk(iph,dev,ip_acct_chain,IP_FW_F_ACCEPT,1);
2495 #endif
2496
2497 #ifdef CONFIG_IP_MULTICAST
2498
2499 /*
2500 * Multicasts are looped back for other local users
2501 */
2502
2503 if (MULTICAST(iph->daddr) && !(dev->flags&IFF_LOOPBACK))
2504 {
2505 if(sk==NULL || sk->ip_mc_loop)
2506 {
2507 if(iph->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
2508 {
2509 ip_loopback(dev,skb);
2510 }
2511 else
2512 {
2513 struct ip_mc_list *imc=dev->ip_mc_list;
2514 while(imc!=NULL)
2515 {
2516 if(imc->multiaddr==iph->daddr)
2517 {
2518 ip_loopback(dev,skb);
2519 break;
2520 }
2521 imc=imc->next;
2522 }
2523 }
2524 }
2525 /* Multicasts with ttl 0 must not go beyond the host */
2526
2527 if(skb->ip_hdr->ttl==0)
2528 {
2529 kfree_skb(skb, FREE_READ);
2530 return;
2531 }
2532 }
2533 #endif
2534 if((dev->flags&IFF_BROADCAST) && (iph->daddr==dev->pa_brdaddr||iph->daddr==0xFFFFFFFF) && !(dev->flags&IFF_LOOPBACK))
2535 ip_loopback(dev,skb);
2536
2537 if (dev->flags & IFF_UP)
2538 {
2539 /*
2540 * If we have an owner use its priority setting,
2541 * otherwise use NORMAL
2542 */
2543
2544 if (sk != NULL)
2545 {
2546 dev_queue_xmit(skb, dev, sk->priority);
2547 }
2548 else
2549 {
2550 dev_queue_xmit(skb, dev, SOPRI_NORMAL);
2551 }
2552 }
2553 else
2554 {
2555 if(sk)
2556 sk->err = ENETDOWN;
2557 ip_statistics.IpOutDiscards++;
2558 if (free)
2559 kfree_skb(skb, FREE_WRITE);
2560 }
2561 }
2562
2563
2564
2565 #ifdef CONFIG_IP_MULTICAST
2566
2567 /*
2568 * Write an multicast group list table for the IGMP daemon to
2569 * read.
2570 */
2571
2572 int ip_mc_procinfo(char *buffer, char **start, off_t offset, int length, int dummy)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
2573 {
2574 off_t pos=0, begin=0;
2575 struct ip_mc_list *im;
2576 unsigned long flags;
2577 int len=0;
2578 struct device *dev;
2579
2580 len=sprintf(buffer,"Device : Count\tGroup Users Timer\n");
2581 save_flags(flags);
2582 cli();
2583
2584 for(dev = dev_base; dev; dev = dev->next)
2585 {
2586 if((dev->flags&IFF_UP)&&(dev->flags&IFF_MULTICAST))
2587 {
2588 len+=sprintf(buffer+len,"%-10s: %5d\n",
2589 dev->name, dev->mc_count);
2590 for(im = dev->ip_mc_list; im; im = im->next)
2591 {
2592 len+=sprintf(buffer+len,
2593 "\t\t\t%08lX %5d %d:%08lX\n",
2594 im->multiaddr, im->users,
2595 im->tm_running, im->timer.expires-jiffies);
2596 pos=begin+len;
2597 if(pos<offset)
2598 {
2599 len=0;
2600 begin=pos;
2601 }
2602 if(pos>offset+length)
2603 break;
2604 }
2605 }
2606 }
2607 restore_flags(flags);
2608 *start=buffer+(offset-begin);
2609 len-=(offset-begin);
2610 if(len>length)
2611 len=length;
2612 return len;
2613 }
2614
2615
2616 /*
2617 * Socket option code for IP. This is the end of the line after any TCP,UDP etc options on
2618 * an IP socket.
2619 *
2620 * We implement IP_TOS (type of service), IP_TTL (time to live).
2621 *
2622 * Next release we will sort out IP_OPTIONS since for some people are kind of important.
2623 */
2624
2625 static struct device *ip_mc_find_devfor(unsigned long addr)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
2626 {
2627 struct device *dev;
2628 for(dev = dev_base; dev; dev = dev->next)
2629 {
2630 if((dev->flags&IFF_UP)&&(dev->flags&IFF_MULTICAST)&&
2631 (dev->pa_addr==addr))
2632 return dev;
2633 }
2634
2635 return NULL;
2636 }
2637
2638 #endif
2639
2640 int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
2641 {
2642 int val,err;
2643 unsigned char ucval;
2644 #if defined(CONFIG_IP_FIREWALL) || defined(CONFIG_IP_ACCT)
2645 struct ip_fw tmp_fw;
2646 #endif
2647 if (optval == NULL)
2648 {
2649 val=0;
2650 ucval=0;
2651 }
2652 else
2653 {
2654 err=verify_area(VERIFY_READ, optval, sizeof(int));
2655 if(err)
2656 return err;
2657 val = get_user((int *) optval);
2658 ucval=get_user((unsigned char *) optval);
2659 }
2660
2661 if(level!=SOL_IP)
2662 return -EOPNOTSUPP;
2663 #ifdef CONFIG_IP_MROUTE
2664 if(optname>=MRT_BASE && optname <=MRT_BASE+10)
2665 {
2666 return ip_mroute_setsockopt(sk,optname,optval,optlen);
2667 }
2668 #endif
2669
2670 switch(optname)
2671 {
2672 case IP_OPTIONS:
2673 {
2674 struct options * opt = NULL;
2675 struct options * old_opt;
2676 if (optlen > 40 || optlen < 0)
2677 return -EINVAL;
2678 err = verify_area(VERIFY_READ, optval, optlen);
2679 if (err)
2680 return err;
2681 opt = kmalloc(sizeof(struct options)+((optlen+3)&~3), GFP_KERNEL);
2682 if (!opt)
2683 return -ENOMEM;
2684 memset(opt, 0, sizeof(struct options));
2685 if (optlen)
2686 memcpy_fromfs(opt->__data, optval, optlen);
2687 while (optlen & 3)
2688 opt->__data[optlen++] = IPOPT_END;
2689 opt->optlen = optlen;
2690 opt->is_data = 1;
2691 opt->is_setbyuser = 1;
2692 if (optlen && ip_options_compile(opt, NULL)) {
2693 kfree_s(opt, sizeof(struct options) + optlen);
2694 return -EINVAL;
2695 }
2696 /*
2697 * ANK: I'm afraid that receive handler may change
2698 * options from under us.
2699 */
2700 cli();
2701 old_opt = sk->opt;
2702 sk->opt = opt;
2703 sti();
2704 if (old_opt)
2705 kfree_s(old_opt, sizeof(struct optlen) + old_opt->optlen);
2706 return 0;
2707 }
2708 case IP_TOS:
2709 if(val<0||val>255)
2710 return -EINVAL;
2711 sk->ip_tos=val;
2712 if(val==IPTOS_LOWDELAY)
2713 sk->priority=SOPRI_INTERACTIVE;
2714 if(val==IPTOS_THROUGHPUT)
2715 sk->priority=SOPRI_BACKGROUND;
2716 return 0;
2717 case IP_TTL:
2718 if(val<1||val>255)
2719 return -EINVAL;
2720 sk->ip_ttl=val;
2721 return 0;
2722 case IP_HDRINCL:
2723 if(sk->type!=SOCK_RAW)
2724 return -ENOPROTOOPT;
2725 sk->ip_hdrincl=val?1:0;
2726 return 0;
2727 #ifdef CONFIG_IP_MULTICAST
2728 case IP_MULTICAST_TTL:
2729 {
2730 sk->ip_mc_ttl=(int)ucval;
2731 return 0;
2732 }
2733 case IP_MULTICAST_LOOP:
2734 {
2735 if(ucval!=0 && ucval!=1)
2736 return -EINVAL;
2737 sk->ip_mc_loop=(int)ucval;
2738 return 0;
2739 }
2740 case IP_MULTICAST_IF:
2741 {
2742 struct in_addr addr;
2743 struct device *dev=NULL;
2744
2745 /*
2746 * Check the arguments are allowable
2747 */
2748
2749 err=verify_area(VERIFY_READ, optval, sizeof(addr));
2750 if(err)
2751 return err;
2752
2753 memcpy_fromfs(&addr,optval,sizeof(addr));
2754
2755
2756 /*
2757 * What address has been requested
2758 */
2759
2760 if(addr.s_addr==INADDR_ANY) /* Default */
2761 {
2762 sk->ip_mc_name[0]=0;
2763 return 0;
2764 }
2765
2766 /*
2767 * Find the device
2768 */
2769
2770 dev=ip_mc_find_devfor(addr.s_addr);
2771
2772 /*
2773 * Did we find one
2774 */
2775
2776 if(dev)
2777 {
2778 strcpy(sk->ip_mc_name,dev->name);
2779 return 0;
2780 }
2781 return -EADDRNOTAVAIL;
2782 }
2783
2784 case IP_ADD_MEMBERSHIP:
2785 {
2786
2787 /*
2788 * FIXME: Add/Del membership should have a semaphore protecting them from re-entry
2789 */
2790 struct ip_mreq mreq;
2791 __u32 route_src;
2792 struct rtable *rt;
2793 struct device *dev=NULL;
2794
2795 /*
2796 * Check the arguments.
2797 */
2798
2799 err=verify_area(VERIFY_READ, optval, sizeof(mreq));
2800 if(err)
2801 return err;
2802
2803 memcpy_fromfs(&mreq,optval,sizeof(mreq));
2804
2805 /*
2806 * Get device for use later
2807 */
2808
2809 if(mreq.imr_interface.s_addr==INADDR_ANY)
2810 {
2811 /*
2812 * Not set so scan.
2813 */
2814 if((rt=ip_rt_route(mreq.imr_multiaddr.s_addr,NULL, &route_src))!=NULL)
2815 {
2816 dev=rt->rt_dev;
2817 rt->rt_use--;
2818 }
2819 }
2820 else
2821 {
2822 /*
2823 * Find a suitable device.
2824 */
2825
2826 dev=ip_mc_find_devfor(mreq.imr_interface.s_addr);
2827 }
2828
2829 /*
2830 * No device, no cookies.
2831 */
2832
2833 if(!dev)
2834 return -ENODEV;
2835
2836 /*
2837 * Join group.
2838 */
2839
2840 return ip_mc_join_group(sk,dev,mreq.imr_multiaddr.s_addr);
2841 }
2842
2843 case IP_DROP_MEMBERSHIP:
2844 {
2845 struct ip_mreq mreq;
2846 struct rtable *rt;
2847 __u32 route_src;
2848 struct device *dev=NULL;
2849
2850 /*
2851 * Check the arguments
2852 */
2853
2854 err=verify_area(VERIFY_READ, optval, sizeof(mreq));
2855 if(err)
2856 return err;
2857
2858 memcpy_fromfs(&mreq,optval,sizeof(mreq));
2859
2860 /*
2861 * Get device for use later
2862 */
2863
2864 if(mreq.imr_interface.s_addr==INADDR_ANY)
2865 {
2866 if((rt=ip_rt_route(mreq.imr_multiaddr.s_addr,NULL, &route_src))!=NULL)
2867 {
2868 dev=rt->rt_dev;
2869 rt->rt_use--;
2870 }
2871 }
2872 else
2873 {
2874
2875 dev=ip_mc_find_devfor(mreq.imr_interface.s_addr);
2876 }
2877
2878 /*
2879 * Did we find a suitable device.
2880 */
2881
2882 if(!dev)
2883 return -ENODEV;
2884
2885 /*
2886 * Leave group
2887 */
2888
2889 return ip_mc_leave_group(sk,dev,mreq.imr_multiaddr.s_addr);
2890 }
2891 #endif
2892 #ifdef CONFIG_IP_FIREWALL
2893 case IP_FW_ADD_BLK:
2894 case IP_FW_DEL_BLK:
2895 case IP_FW_ADD_FWD:
2896 case IP_FW_DEL_FWD:
2897 case IP_FW_CHK_BLK:
2898 case IP_FW_CHK_FWD:
2899 case IP_FW_FLUSH_BLK:
2900 case IP_FW_FLUSH_FWD:
2901 case IP_FW_ZERO_BLK:
2902 case IP_FW_ZERO_FWD:
2903 case IP_FW_POLICY_BLK:
2904 case IP_FW_POLICY_FWD:
2905 if(!suser())
2906 return -EPERM;
2907 if(optlen>sizeof(tmp_fw) || optlen<1)
2908 return -EINVAL;
2909 err=verify_area(VERIFY_READ,optval,optlen);
2910 if(err)
2911 return err;
2912 memcpy_fromfs(&tmp_fw,optval,optlen);
2913 err=ip_fw_ctl(optname, &tmp_fw,optlen);
2914 return -err; /* -0 is 0 after all */
2915
2916 #endif
2917 #ifdef CONFIG_IP_ACCT
2918 case IP_ACCT_DEL:
2919 case IP_ACCT_ADD:
2920 case IP_ACCT_FLUSH:
2921 case IP_ACCT_ZERO:
2922 if(!suser())
2923 return -EPERM;
2924 if(optlen>sizeof(tmp_fw) || optlen<1)
2925 return -EINVAL;
2926 err=verify_area(VERIFY_READ,optval,optlen);
2927 if(err)
2928 return err;
2929 memcpy_fromfs(&tmp_fw, optval,optlen);
2930 err=ip_acct_ctl(optname, &tmp_fw,optlen);
2931 return -err; /* -0 is 0 after all */
2932 #endif
2933 /* IP_OPTIONS and friends go here eventually */
2934 default:
2935 return(-ENOPROTOOPT);
2936 }
2937 }
2938
2939 /*
2940 * Get the options. Note for future reference. The GET of IP options gets the
2941 * _received_ ones. The set sets the _sent_ ones.
2942 */
2943
2944 int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
2945 {
2946 int val,err;
2947 #ifdef CONFIG_IP_MULTICAST
2948 int len;
2949 #endif
2950
2951 if(level!=SOL_IP)
2952 return -EOPNOTSUPP;
2953
2954 #ifdef CONFIG_IP_MROUTE
2955 if(optname>=MRT_BASE && optname <=MRT_BASE+10)
2956 {
2957 return ip_mroute_getsockopt(sk,optname,optval,optlen);
2958 }
2959 #endif
2960
2961 switch(optname)
2962 {
2963 case IP_OPTIONS:
2964 {
2965 unsigned char optbuf[sizeof(struct options)+40];
2966 struct options * opt = (struct options*)optbuf;
2967 err = verify_area(VERIFY_WRITE, optlen, sizeof(int));
2968 if (err)
2969 return err;
2970 cli();
2971 opt->optlen = 0;
2972 if (sk->opt)
2973 memcpy(optbuf, sk->opt, sizeof(struct options)+sk->opt->optlen);
2974 sti();
2975 if (opt->optlen == 0) {
2976 put_fs_long(0,(unsigned long *) optlen);
2977 return 0;
2978 }
2979 err = verify_area(VERIFY_WRITE, optval, opt->optlen);
2980 if (err)
2981 return err;
2982 /*
2983 * Now we should undo all the changes done by ip_options_compile().
2984 */
2985 if (opt->srr) {
2986 unsigned char * optptr = opt->__data+opt->srr-sizeof(struct iphdr);
2987 memmove(optptr+7, optptr+4, optptr[1]-7);
2988 memcpy(optptr+3, &opt->faddr, 4);
2989 }
2990 if (opt->rr_needaddr) {
2991 unsigned char * optptr = opt->__data+opt->rr-sizeof(struct iphdr);
2992 memset(&optptr[optptr[2]-1], 0, 4);
2993 optptr[2] -= 4;
2994 }
2995 if (opt->ts) {
2996 unsigned char * optptr = opt->__data+opt->ts-sizeof(struct iphdr);
2997 if (opt->ts_needtime) {
2998 memset(&optptr[optptr[2]-1], 0, 4);
2999 optptr[2] -= 4;
3000 }
3001 if (opt->ts_needaddr) {
3002 memset(&optptr[optptr[2]-1], 0, 4);
3003 optptr[2] -= 4;
3004 }
3005 }
3006 put_fs_long(opt->optlen, (unsigned long *) optlen);
3007 memcpy_tofs(optval, opt->__data, opt->optlen);
3008 }
3009 return 0;
3010 case IP_TOS:
3011 val=sk->ip_tos;
3012 break;
3013 case IP_TTL:
3014 val=sk->ip_ttl;
3015 break;
3016 case IP_HDRINCL:
3017 val=sk->ip_hdrincl;
3018 break;
3019 #ifdef CONFIG_IP_MULTICAST
3020 case IP_MULTICAST_TTL:
3021 val=sk->ip_mc_ttl;
3022 break;
3023 case IP_MULTICAST_LOOP:
3024 val=sk->ip_mc_loop;
3025 break;
3026 case IP_MULTICAST_IF:
3027 err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
3028 if(err)
3029 return err;
3030 len=strlen(sk->ip_mc_name);
3031 err=verify_area(VERIFY_WRITE, optval, len);
3032 if(err)
3033 return err;
3034 put_user(len,(int *) optlen);
3035 memcpy_tofs((void *)optval,sk->ip_mc_name, len);
3036 return 0;
3037 #endif
3038 default:
3039 return(-ENOPROTOOPT);
3040 }
3041 err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
3042 if(err)
3043 return err;
3044 put_user(sizeof(int),(int *) optlen);
3045
3046 err=verify_area(VERIFY_WRITE, optval, sizeof(int));
3047 if(err)
3048 return err;
3049 put_user(val,(int *) optval);
3050
3051 return(0);
3052 }
3053
3054 /*
3055 * Build and send a packet, with as little as one copy
3056 *
3057 * Doesn't care much about ip options... option length can be
3058 * different for fragment at 0 and other fragments.
3059 *
3060 * Note that the fragment at the highest offset is sent first,
3061 * so the getfrag routine can fill in the TCP/UDP checksum header
3062 * field in the last fragment it sends... actually it also helps
3063 * the reassemblers, they can put most packets in at the head of
3064 * the fragment queue, and they know the total size in advance. This
3065 * last feature will measurable improve the Linux fragment handler.
3066 *
3067 * The callback has five args, an arbitrary pointer (copy of frag),
3068 * the source IP address (may depend on the routing table), the
3069 * destination adddress (char *), the offset to copy from, and the
3070 * length to be copied.
3071 *
3072 */
3073
3074 int ip_build_xmit(struct sock *sk,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
3075 void getfrag (const void *,
3076 __u32,
3077 char *,
3078 unsigned int,
3079 unsigned int),
3080 const void *frag,
3081 unsigned short int length,
3082 __u32 daddr,
3083 __u32 user_saddr,
3084 struct options * opt,
3085 int flags,
3086 int type)
3087 {
3088 struct rtable *rt;
3089 unsigned int fraglen, maxfraglen, fragheaderlen;
3090 int offset, mf;
3091 __u32 saddr;
3092 unsigned short id;
3093 struct iphdr *iph;
3094 int local=0;
3095 struct device *dev;
3096 int nfrags=0;
3097 __u32 true_daddr = daddr;
3098
3099 if (opt && opt->srr && !sk->ip_hdrincl)
3100 daddr = opt->faddr;
3101
3102 ip_statistics.IpOutRequests++;
3103
3104 #ifdef CONFIG_IP_MULTICAST
3105 if(sk && MULTICAST(daddr) && *sk->ip_mc_name)
3106 {
3107 dev=dev_get(sk->ip_mc_name);
3108 if(!dev)
3109 return -ENODEV;
3110 rt=NULL;
3111 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
3112 saddr = sk->saddr;
3113 else
3114 saddr = dev->pa_addr;
3115 }
3116 else
3117 {
3118 #endif
3119 /*
3120 * Perform the IP routing decisions
3121 */
3122
3123 if(sk->localroute || flags&MSG_DONTROUTE)
3124 local=1;
3125
3126 rt = sk->ip_route_cache;
3127
3128 /*
3129 * See if the routing cache is outdated. We need to clean this up once we are happy it is reliable
3130 * by doing the invalidation actively in the route change and header change.
3131 */
3132
3133 saddr=sk->ip_route_saddr;
3134 if(!rt || sk->ip_route_stamp != rt_stamp ||
3135 daddr!=sk->ip_route_daddr || sk->ip_route_local!=local ||
3136 (sk->saddr && sk->saddr != saddr))
3137 {
3138 if(local)
3139 rt = ip_rt_local(daddr, NULL, &saddr);
3140 else
3141 rt = ip_rt_route(daddr, NULL, &saddr);
3142 sk->ip_route_local=local;
3143 sk->ip_route_daddr=daddr;
3144 sk->ip_route_saddr=saddr;
3145 sk->ip_route_stamp=rt_stamp;
3146 sk->ip_route_cache=rt;
3147 sk->ip_hcache_ver=NULL;
3148 sk->ip_hcache_state= 0;
3149 }
3150 else if(rt)
3151 {
3152 /*
3153 * Attempt header caches only if the cached route is being reused. Header cache
3154 * is not ultra cheap to set up. This means we only set it up on the second packet,
3155 * so one shot communications are not slowed. We assume (seems reasonable) that 2 is
3156 * probably going to be a stream of data.
3157 */
3158 if(rt->rt_dev->header_cache && sk->ip_hcache_state!= -1)
3159 {
3160 if(sk->ip_hcache_ver==NULL || sk->ip_hcache_stamp!=*sk->ip_hcache_ver)
3161 rt->rt_dev->header_cache(rt->rt_dev,sk,saddr,daddr);
3162 else
3163 /* Can't cache. Remember this */
3164 sk->ip_hcache_state= -1;
3165 }
3166 }
3167
3168 if (rt == NULL)
3169 {
3170 ip_statistics.IpOutNoRoutes++;
3171 return(-ENETUNREACH);
3172 }
3173
3174 if (sk->saddr && (!LOOPBACK(sk->saddr) || LOOPBACK(daddr)))
3175 saddr = sk->saddr;
3176
3177 dev=rt->rt_dev;
3178 #ifdef CONFIG_IP_MULTICAST
3179 }
3180 #endif
3181 if (user_saddr)
3182 saddr = user_saddr;
3183
3184 /*
3185 * Now compute the buffer space we require
3186 */
3187
3188 /*
3189 * Try the simple case first. This leaves broadcast, multicast, fragmented frames, and by
3190 * choice RAW frames within 20 bytes of maximum size(rare) to the long path
3191 */
3192
3193 length += 20;
3194 if (!sk->ip_hdrincl && opt) {
3195 length += opt->optlen;
3196 if (opt->is_strictroute && rt && rt->rt_gateway) {
3197 ip_statistics.IpOutNoRoutes++;
3198 return -ENETUNREACH;
3199 }
3200 }
3201 if(length <= dev->mtu && !MULTICAST(daddr) && daddr!=0xFFFFFFFF && daddr!=dev->pa_brdaddr)
3202 {
3203 int error;
3204 struct sk_buff *skb=sock_alloc_send_skb(sk, length+15+dev->hard_header_len,0, 0,&error);
3205 if(skb==NULL)
3206 {
3207 ip_statistics.IpOutDiscards++;
3208 return error;
3209 }
3210 skb->dev=dev;
3211 skb->free=1;
3212 skb->when=jiffies;
3213 skb->sk=sk;
3214 skb->arp=0;
3215 skb->saddr=saddr;
3216 skb->raddr=(rt&&rt->rt_gateway)?rt->rt_gateway:daddr;
3217 skb_reserve(skb,(dev->hard_header_len+15)&~15);
3218 if(sk->ip_hcache_state>0)
3219 {
3220 memcpy(skb_push(skb,dev->hard_header_len),sk->ip_hcache_data,dev->hard_header_len);
3221 skb->arp=1;
3222 }
3223 else if(dev->hard_header)
3224 {
3225 if(dev->hard_header(skb,dev,ETH_P_IP,NULL,NULL,0)>0)
3226 skb->arp=1;
3227 }
3228 else
3229 skb->arp=1;
3230 skb->ip_hdr=iph=(struct iphdr *)skb_put(skb,length);
3231 dev_lock_list();
3232 if(!sk->ip_hdrincl)
3233 {
3234 iph->version=4;
3235 iph->ihl=5;
3236 iph->tos=sk->ip_tos;
3237 iph->tot_len = htons(length);
3238 iph->id=htons(ip_id_count++);
3239 iph->frag_off = 0;
3240 iph->ttl=sk->ip_ttl;
3241 iph->protocol=type;
3242 iph->saddr=saddr;
3243 iph->daddr=daddr;
3244 if (opt) {
3245 iph->ihl += opt->optlen>>2;
3246 ip_options_build(skb, opt,
3247 true_daddr, dev->pa_addr, 0);
3248 }
3249 iph->check=0;
3250 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
3251 getfrag(frag,saddr,((char *)iph)+iph->ihl*4,0, length-iph->ihl*4);
3252 }
3253 else
3254 getfrag(frag,saddr,(void *)iph,0,length-20);
3255 dev_unlock_list();
3256 #ifdef CONFIG_IP_FIREWALL
3257 if(ip_fw_chk(iph, dev, ip_fw_blk_chain, ip_fw_blk_policy,0) < FW_ACCEPT)
3258 {
3259 kfree_skb(skb, FREE_WRITE);
3260 return -EPERM;
3261 }
3262 #endif
3263 #ifdef CONFIG_IP_ACCT
3264 ip_fw_chk((void *)skb->data,dev,ip_acct_chain, IP_FW_F_ACCEPT,1);
3265 #endif
3266 if(dev->flags&IFF_UP)
3267 dev_queue_xmit(skb,dev,sk->priority);
3268 else
3269 {
3270 ip_statistics.IpOutDiscards++;
3271 kfree_skb(skb, FREE_WRITE);
3272 }
3273 return 0;
3274 }
3275 length-=20;
3276 if (sk && !sk->ip_hdrincl && opt) {
3277 length -= opt->optlen;
3278 fragheaderlen = dev->hard_header_len + sizeof(struct iphdr) + opt->optlen;
3279 maxfraglen = ((dev->mtu-sizeof(struct iphdr)-opt->optlen) & ~7) + fragheaderlen;
3280 } else {
3281 fragheaderlen = dev->hard_header_len;
3282 if(!sk->ip_hdrincl)
3283 fragheaderlen += 20;
3284
3285 /*
3286 * Fragheaderlen is the size of 'overhead' on each buffer. Now work
3287 * out the size of the frames to send.
3288 */
3289
3290 maxfraglen = ((dev->mtu-20) & ~7) + fragheaderlen;
3291 }
3292
3293 /*
3294 * Start at the end of the frame by handling the remainder.
3295 */
3296
3297 offset = length - (length % (maxfraglen - fragheaderlen));
3298
3299 /*
3300 * Amount of memory to allocate for final fragment.
3301 */
3302
3303 fraglen = length - offset + fragheaderlen;
3304
3305 if(length-offset==0)
3306 {
3307 fraglen = maxfraglen;
3308 offset -= maxfraglen-fragheaderlen;
3309 }
3310
3311
3312 /*
3313 * The last fragment will not have MF (more fragments) set.
3314 */
3315
3316 mf = 0;
3317
3318 /*
3319 * Can't fragment raw packets
3320 */
3321
3322 if (sk->ip_hdrincl && offset > 0)
3323 return(-EMSGSIZE);
3324
3325 /*
3326 * Lock the device lists.
3327 */
3328
3329 dev_lock_list();
3330
3331 /*
3332 * Get an identifier
3333 */
3334
3335 id = htons(ip_id_count++);
3336
3337 /*
3338 * Being outputting the bytes.
3339 */
3340
3341 do
3342 {
3343 struct sk_buff * skb;
3344 int error;
3345 char *data;
3346
3347 /*
3348 * Get the memory we require with some space left for alignment.
3349 */
3350
3351 skb = sock_alloc_send_skb(sk, fraglen+15, 0, 0, &error);
3352 if (skb == NULL)
3353 {
3354 ip_statistics.IpOutDiscards++;
3355 if(nfrags>1)
3356 ip_statistics.IpFragCreates++;
3357 dev_unlock_list();
3358 return(error);
3359 }
3360
3361 /*
3362 * Fill in the control structures
3363 */
3364
3365 skb->next = skb->prev = NULL;
3366 skb->dev = dev;
3367 skb->when = jiffies;
3368 skb->free = 1; /* dubious, this one */
3369 skb->sk = sk;
3370 skb->arp = 0;
3371 skb->saddr = saddr;
3372 skb->raddr = (rt&&rt->rt_gateway) ? rt->rt_gateway : daddr;
3373 skb_reserve(skb,(dev->hard_header_len+15)&~15);
3374 data = skb_put(skb, fraglen-dev->hard_header_len);
3375
3376 /*
3377 * Save us ARP and stuff. In the optimal case we do no route lookup (route cache ok)
3378 * no ARP lookup (arp cache ok) and output. The cache checks are still too slow but
3379 * this can be fixed later. For gateway routes we ought to have a rt->.. header cache
3380 * pointer to speed header cache builds for identical targets.
3381 */
3382
3383 if(sk->ip_hcache_state>0)
3384 {
3385 memcpy(skb_push(skb,dev->hard_header_len),sk->ip_hcache_data, dev->hard_header_len);
3386 skb->arp=1;
3387 }
3388 else if (dev->hard_header)
3389 {
3390 if(dev->hard_header(skb, dev, ETH_P_IP,
3391 NULL, NULL, 0)>0)
3392 skb->arp=1;
3393 }
3394
3395 /*
3396 * Find where to start putting bytes.
3397 */
3398
3399 skb->ip_hdr = iph = (struct iphdr *)data;
3400
3401 /*
3402 * Only write IP header onto non-raw packets
3403 */
3404
3405 if(!sk->ip_hdrincl)
3406 {
3407
3408 iph->version = 4;
3409 iph->ihl = 5; /* ugh */
3410 if (opt) {
3411 iph->ihl += opt->optlen>>2;
3412 ip_options_build(skb, opt,
3413 true_daddr, dev->pa_addr, offset);
3414 }
3415 iph->tos = sk->ip_tos;
3416 iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
3417 iph->id = id;
3418 iph->frag_off = htons(offset>>3);
3419 iph->frag_off |= mf;
3420 #ifdef CONFIG_IP_MULTICAST
3421 if (MULTICAST(daddr))
3422 iph->ttl = sk->ip_mc_ttl;
3423 else
3424 #endif
3425 iph->ttl = sk->ip_ttl;
3426 iph->protocol = type;
3427 iph->check = 0;
3428 iph->saddr = saddr;
3429 iph->daddr = daddr;
3430 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
3431 data += iph->ihl*4;
3432
3433 /*
3434 * Any further fragments will have MF set.
3435 */
3436
3437 mf = htons(IP_MF);
3438 }
3439
3440 /*
3441 * User data callback
3442 */
3443
3444 getfrag(frag, saddr, data, offset, fraglen-fragheaderlen);
3445
3446 /*
3447 * Account for the fragment.
3448 */
3449
3450 #ifdef CONFIG_IP_FIREWALL
3451 if(!offset && ip_fw_chk(iph, dev, ip_fw_blk_chain, ip_fw_blk_policy,0) < FW_ACCEPT)
3452 {
3453 kfree_skb(skb, FREE_WRITE);
3454 dev_unlock_list();
3455 return -EPERM;
3456 }
3457 #endif
3458 #ifdef CONFIG_IP_ACCT
3459 if(!offset)
3460 ip_fw_chk(iph, dev, ip_acct_chain, IP_FW_F_ACCEPT, 1);
3461 #endif
3462 offset -= (maxfraglen-fragheaderlen);
3463 fraglen = maxfraglen;
3464
3465 #ifdef CONFIG_IP_MULTICAST
3466
3467 /*
3468 * Multicasts are looped back for other local users
3469 */
3470
3471 if (MULTICAST(daddr) && !(dev->flags&IFF_LOOPBACK))
3472 {
3473 /*
3474 * Loop back any frames. The check for IGMP_ALL_HOSTS is because
3475 * you are always magically a member of this group.
3476 *
3477 * Always loop back all host messages when running as a multicast router.
3478 */
3479
3480 if(sk==NULL || sk->ip_mc_loop)
3481 {
3482 if(skb->daddr==IGMP_ALL_HOSTS || (dev->flags&IFF_ALLMULTI))
3483 ip_loopback(rt?rt->rt_dev:dev,skb);
3484 else
3485 {
3486 struct ip_mc_list *imc=rt?rt->rt_dev->ip_mc_list:dev->ip_mc_list;
3487 while(imc!=NULL)
3488 {
3489 if(imc->multiaddr==daddr)
3490 {
3491 ip_loopback(rt?rt->rt_dev:dev,skb);
3492 break;
3493 }
3494 imc=imc->next;
3495 }
3496 }
3497 }
3498
3499 /*
3500 * Multicasts with ttl 0 must not go beyond the host. Fixme: avoid the
3501 * extra clone.
3502 */
3503
3504 if(skb->ip_hdr->ttl==0)
3505 kfree_skb(skb, FREE_READ);
3506 }
3507 #endif
3508
3509 nfrags++;
3510
3511 /*
3512 * BSD loops broadcasts
3513 */
3514
3515 if((dev->flags&IFF_BROADCAST) && (daddr==0xFFFFFFFF || daddr==dev->pa_brdaddr) && !(dev->flags&IFF_LOOPBACK))
3516 ip_loopback(dev,skb);
3517
3518 /*
3519 * Now queue the bytes into the device.
3520 */
3521
3522 if (dev->flags & IFF_UP)
3523 {
3524 dev_queue_xmit(skb, dev, sk->priority);
3525 }
3526 else
3527 {
3528 /*
3529 * Whoops...
3530 */
3531
3532 ip_statistics.IpOutDiscards++;
3533 if(nfrags>1)
3534 ip_statistics.IpFragCreates+=nfrags;
3535 kfree_skb(skb, FREE_WRITE);
3536 dev_unlock_list();
3537 /*
3538 * BSD behaviour.
3539 */
3540 if(sk!=NULL)
3541 sk->err=ENETDOWN;
3542 return(0); /* lose rest of fragments */
3543 }
3544 }
3545 while (offset >= 0);
3546 if(nfrags>1)
3547 ip_statistics.IpFragCreates+=nfrags;
3548 dev_unlock_list();
3549 return(0);
3550 }
3551
3552
3553 /*
3554 * IP protocol layer initialiser
3555 */
3556
3557 static struct packet_type ip_packet_type =
3558 {
3559 0, /* MUTTER ntohs(ETH_P_IP),*/
3560 NULL, /* All devices */
3561 ip_rcv,
3562 NULL,
3563 NULL,
3564 };
3565
3566 #ifdef CONFIG_RTNETLINK
3567
3568 /*
3569 * Netlink hooks for IP
3570 */
3571
3572 void ip_netlink_msg(unsigned long msg, __u32 daddr, __u32 gw, __u32 mask, short flags, short metric, char *name)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
3573 {
3574 struct sk_buff *skb=alloc_skb(sizeof(struct netlink_rtinfo), GFP_ATOMIC);
3575 struct netlink_rtinfo *nrt;
3576 struct sockaddr_in *s;
3577 if(skb==NULL)
3578 return;
3579 nrt=(struct netlink_rtinfo *)skb_put(skb, sizeof(struct netlink_rtinfo));
3580 nrt->rtmsg_type=msg;
3581 s=(struct sockaddr_in *)&nrt->rtmsg_dst;
3582 s->sin_family=AF_INET;
3583 s->sin_addr.s_addr=daddr;
3584 s=(struct sockaddr_in *)&nrt->rtmsg_gateway;
3585 s->sin_family=AF_INET;
3586 s->sin_addr.s_addr=gw;
3587 s=(struct sockaddr_in *)&nrt->rtmsg_genmask;
3588 s->sin_family=AF_INET;
3589 s->sin_addr.s_addr=mask;
3590 nrt->rtmsg_flags=flags;
3591 nrt->rtmsg_metric=metric;
3592 strcpy(nrt->rtmsg_device,name);
3593 netlink_post(NETLINK_ROUTE, skb);
3594 }
3595
3596 #endif
3597
3598 /*
3599 * Device notifier
3600 */
3601
3602 static int ip_rt_event(unsigned long event, void *ptr)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
3603 {
3604 struct device *dev=ptr;
3605 if(event==NETDEV_DOWN)
3606 {
3607 ip_netlink_msg(RTMSG_DELDEVICE, 0,0,0,0,0,dev->name);
3608 ip_rt_flush(dev);
3609 }
3610 /*
3611 * Join the intial group if multicast.
3612 */
3613 if(event==NETDEV_UP)
3614 {
3615 #ifdef CONFIG_IP_MULTICAST
3616 ip_mc_allhost(dev);
3617 #endif
3618 ip_netlink_msg(RTMSG_NEWDEVICE, 0,0,0,0,0,dev->name);
3619 }
3620 return NOTIFY_DONE;
3621 }
3622
3623 struct notifier_block ip_rt_notifier={
3624 ip_rt_event,
3625 NULL,
3626 0
3627 };
3628
3629 /*
3630 * IP registers the packet type and then calls the subprotocol initialisers
3631 */
3632
3633 void ip_init(void)
/* ![[previous]](../icons/left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
3634 {
3635 ip_packet_type.type=htons(ETH_P_IP);
3636 dev_add_pack(&ip_packet_type);
3637
3638 /* So we flush routes when a device is downed */
3639 register_netdevice_notifier(&ip_rt_notifier);
3640
3641 /* ip_raw_init();
3642 ip_packet_init();
3643 ip_tcp_init();
3644 ip_udp_init();*/
3645
3646 #ifdef CONFIG_IP_MULTICAST
3647 proc_net_register(&(struct proc_dir_entry) {
3648 PROC_NET_IGMP, 4, "igmp",
3649 S_IFREG | S_IRUGO, 1, 0, 0,
3650 0, &proc_net_inode_operations,
3651 ip_mc_procinfo
3652 });
3653 #endif
3654 }
3655