1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * The IP forwarding functionality.
7 *
8 * Authors: see ip.c
9 *
10 * Fixes:
11 * Many : Split from ip.c , see ip_input.c for history.
12 * Dave Gregorich : NULL ip_rt_put fix for multicast routing.
13 * Jos Vos : Add call_out_firewall before sending,
14 * use output device for accounting.
15 */
16
17 #include <linux/config.h>
18 #include <linux/types.h>
19 #include <linux/mm.h>
20 #include <linux/sched.h>
21 #include <linux/skbuff.h>
22 #include <linux/ip.h>
23 #include <linux/icmp.h>
24 #include <linux/netdevice.h>
25 #include <net/sock.h>
26 #include <net/ip.h>
27 #include <net/icmp.h>
28 #include <linux/tcp.h>
29 #include <linux/udp.h>
30 #include <linux/firewall.h>
31 #include <linux/ip_fw.h>
32 #include <net/checksum.h>
33 #include <linux/route.h>
34 #include <net/route.h>
35
36 #ifdef CONFIG_IP_FORWARD
37 #ifdef CONFIG_IP_MROUTE
38
39 /*
40 * Encapsulate a packet by attaching a valid IPIP header to it.
41 * This avoids tunnel drivers and other mess and gives us the speed so
42 * important for multicast video.
43 */
44
45 static void ip_encap(struct sk_buff *skb, int len, struct device *out, __u32 daddr)
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
46 {
47 /*
48 * There is space for the IPIP header and MAC left.
49 *
50 * Firstly push down and install the IPIP header.
51 */
52 struct iphdr *iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
53 if(len>65515)
54 len=65515;
55 iph->version = 4;
56 iph->tos = skb->ip_hdr->tos;
57 iph->ttl = skb->ip_hdr->ttl;
58 iph->frag_off = 0;
59 iph->daddr = daddr;
60 iph->saddr = out->pa_addr;
61 iph->protocol = IPPROTO_IPIP;
62 iph->ihl = 5;
63 iph->tot_len = htons(skb->len);
64 iph->id = htons(ip_id_count++);
65 ip_send_check(iph);
66
67 skb->dev = out;
68 skb->arp = 1;
69 skb->raddr=daddr;
70 /*
71 * Now add the physical header (driver will push it down).
72 */
73 if (out->hard_header && out->hard_header(skb, out, ETH_P_IP, NULL, NULL, len)<0)
74 skb->arp=0;
75 /*
76 * Read to queue for transmission.
77 */
78 }
79
80 #endif
81
82 /*
83 * Forward an IP datagram to its next destination.
84 */
85
86 int ip_forward(struct sk_buff *skb, struct device *dev, int is_frag,
/* ![[previous]](../icons/left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
87 __u32 target_addr)
88 {
89 struct device *dev2; /* Output device */
90 struct iphdr *iph; /* Our header */
91 struct sk_buff *skb2; /* Output packet */
92 struct rtable *rt; /* Route we use */
93 unsigned char *ptr; /* Data pointer */
94 unsigned long raddr; /* Router IP address */
95 struct options * opt = (struct options*)skb->proto_priv;
96 struct hh_cache *hh = NULL;
97 int encap = 0; /* Encap length */
98 #ifdef CONFIG_FIREWALL
99 int fw_res = 0; /* Forwarding result */
100 #ifdef CONFIG_IP_MASQUERADE
101 struct sk_buff *skb_in = skb; /* So we can remember if the masquerader did some swaps */
102 #endif
103
104 /*
105 * See if we are allowed to forward this.
106 * Note: demasqueraded fragments are always 'back'warded.
107 */
108
109
110 if(!(is_frag&IPFWD_MASQUERADED))
111 {
112 fw_res=call_fw_firewall(PF_INET, skb, skb->h.iph);
113 switch (fw_res) {
114 case FW_ACCEPT:
115 case FW_MASQUERADE:
116 break;
117 case FW_REJECT:
118 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, dev);
119 /* fall thru */
120 default:
121 return -1;
122 }
123 }
124 #endif
125 /*
126 * According to the RFC, we must first decrease the TTL field. If
127 * that reaches zero, we must reply an ICMP control message telling
128 * that the packet's lifetime expired.
129 *
130 * Exception:
131 * We may not generate an ICMP for an ICMP. icmp_send does the
132 * enforcement of this so we can forget it here. It is however
133 * sometimes VERY important.
134 */
135
136 iph = skb->h.iph;
137 iph->ttl--;
138
139 /*
140 * Re-compute the IP header checksum.
141 * This is inefficient. We know what has happened to the header
142 * and could thus adjust the checksum as Phil Karn does in KA9Q
143 */
144
145 iph->check = ntohs(iph->check) + 0x0100;
146 if ((iph->check & 0xFF00) == 0)
147 iph->check++; /* carry overflow */
148 iph->check = htons(iph->check);
149
150 if (iph->ttl <= 0)
151 {
152 /* Tell the sender its packet died... */
153 icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0, dev);
154 return -1;
155 }
156
157 #ifdef CONFIG_IP_MROUTE
158 if(!(is_frag&IPFWD_MULTICASTING))
159 {
160 #endif
161 /*
162 * OK, the packet is still valid. Fetch its destination address,
163 * and give it to the IP sender for further processing.
164 */
165
166 rt = ip_rt_route(target_addr, 0);
167
168 if (rt == NULL)
169 {
170 /*
171 * Tell the sender its packet cannot be delivered. Again
172 * ICMP is screened later.
173 */
174 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, 0, dev);
175 return -1;
176 }
177
178
179 /*
180 * Gosh. Not only is the packet valid; we even know how to
181 * forward it onto its final destination. Can we say this
182 * is being plain lucky?
183 * If the router told us that there is no GW, use the dest.
184 * IP address itself- we seem to be connected directly...
185 */
186
187 raddr = rt->rt_gateway;
188
189 if (opt->is_strictroute && (rt->rt_flags & RTF_GATEWAY)) {
190 /*
191 * Strict routing permits no gatewaying
192 */
193
194 ip_rt_put(rt);
195 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0, dev);
196 return -1;
197 }
198
199 /*
200 * Having picked a route we can now send the frame out.
201 */
202
203 dev2 = rt->rt_dev;
204 hh = rt->rt_hh;
205 /*
206 * In IP you never have to forward a frame on the interface that it
207 * arrived upon. We now generate an ICMP HOST REDIRECT giving the route
208 * we calculated.
209 */
210 #ifndef CONFIG_IP_NO_ICMP_REDIRECT
211 if (dev == dev2 &&
212 !((iph->saddr^dev->pa_addr)&dev->pa_mask) &&
213 (rt->rt_flags&RTF_MODIFIED) && !opt->srr)
214 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, raddr, dev);
215 #endif
216 #ifdef CONFIG_IP_MROUTE
217 }
218 else
219 {
220 /*
221 * Multicast route forward. Routing is already done
222 */
223 dev2=skb->dev;
224 raddr=skb->raddr;
225 if(is_frag&IPFWD_MULTITUNNEL) /* VIFF_TUNNEL mode */
226 encap=20;
227 rt=NULL;
228 }
229 #endif
230
231
232 /*
233 * We now may allocate a new buffer, and copy the datagram into it.
234 * If the indicated interface is up and running, kick it.
235 */
236
237 if (dev2->flags & IFF_UP)
238 {
239 #ifdef CONFIG_IP_MASQUERADE
240 /*
241 * If this fragment needs masquerading, make it so...
242 * (Dont masquerade de-masqueraded fragments)
243 */
244 if (!(is_frag&IPFWD_MASQUERADED) && fw_res==FW_MASQUERADE)
245 ip_fw_masquerade(&skb, dev2);
246 #endif
247 IS_SKB(skb);
248
249 if (skb->len+encap > dev2->mtu && (ntohs(iph->frag_off) & IP_DF))
250 {
251 ip_statistics.IpFragFails++;
252 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(dev2->mtu), dev);
253 if(rt)
254 ip_rt_put(rt);
255 return -1;
256 }
257
258 #ifdef CONFIG_IP_MROUTE
259 if(skb_headroom(skb)-encap<dev2->hard_header_len)
260 {
261 skb2 = alloc_skb(dev2->hard_header_len + skb->len + encap + 15, GFP_ATOMIC);
262 #else
263 if(skb_headroom(skb)<dev2->hard_header_len)
264 {
265 skb2 = alloc_skb(dev2->hard_header_len + skb->len + 15, GFP_ATOMIC);
266 #endif
267 /*
268 * This is rare and since IP is tolerant of network failures
269 * quite harmless.
270 */
271
272 if (skb2 == NULL)
273 {
274 NETDEBUG(printk("\nIP: No memory available for IP forward\n"));
275 if(rt)
276 ip_rt_put(rt);
277 return -1;
278 }
279
280 IS_SKB(skb2);
281 /*
282 * Add the physical headers.
283 */
284 skb2->protocol=htons(ETH_P_IP);
285 #ifdef CONFIG_IP_MROUTE
286 if(is_frag&IPFWD_MULTITUNNEL)
287 {
288 skb_reserve(skb,(encap+dev->hard_header_len+15)&~15); /* 16 byte aligned IP headers are good */
289 ip_encap(skb2,skb->len, dev2, raddr);
290 }
291 else
292 #endif
293 ip_send(rt,skb2,raddr,skb->len,dev2,dev2->pa_addr);
294
295 /*
296 * We have to copy the bytes over as the new header wouldn't fit
297 * the old buffer. This should be very rare.
298 */
299
300 ptr = skb_put(skb2,skb->len);
301 skb2->free = 1;
302 skb2->h.raw = ptr;
303
304 /*
305 * Copy the packet data into the new buffer.
306 */
307 memcpy(ptr, skb->h.raw, skb->len);
308 memcpy(skb2->proto_priv, skb->proto_priv, sizeof(skb->proto_priv));
309 iph = skb2->ip_hdr = skb2->h.iph;
310 }
311 else
312 {
313 /*
314 * Build a new MAC header.
315 */
316
317 skb2 = skb;
318 skb2->dev=dev2;
319 #ifdef CONFIG_IP_MROUTE
320 if(is_frag&IPFWD_MULTITUNNEL)
321 ip_encap(skb,skb->len, dev2, raddr);
322 else
323 {
324 #endif
325 skb->arp=1;
326 skb->raddr=raddr;
327 if (hh)
328 {
329 memcpy(skb_push(skb, dev2->hard_header_len), hh->hh_data, dev2->hard_header_len);
330 if (!hh->hh_uptodate)
331 {
332 #if RT_CACHE_DEBUG >= 2
333 printk("ip_forward: hh miss %08x via %08x\n", target_addr, rt->rt_gateway);
334 #endif
335 skb->arp = 0;
336 }
337 }
338 else if (dev2->hard_header)
339 {
340 if(dev2->hard_header(skb, dev2, ETH_P_IP, NULL, NULL, skb->len)<0)
341 skb->arp=0;
342 }
343 #ifdef CONFIG_IP_MROUTE
344 }
345 #endif
346 }
347 #ifdef CONFIG_FIREWALL
348 if((fw_res = call_out_firewall(PF_INET, skb2, iph)) < FW_ACCEPT)
349 {
350 /* FW_ACCEPT and FW_MASQUERADE are treated equal:
351 masquerading is only supported via forward rules */
352 if (fw_res == FW_REJECT)
353 icmp_send(skb2, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, dev);
354 if (skb != skb2)
355 kfree_skb(skb2,FREE_WRITE);
356 return -1;
357 }
358 #endif
359 ip_statistics.IpForwDatagrams++;
360
361 if (opt->optlen)
362 {
363 unsigned char * optptr;
364 if (opt->rr_needaddr)
365 {
366 optptr = (unsigned char *)iph + opt->rr;
367 memcpy(&optptr[optptr[2]-5], &dev2->pa_addr, 4);
368 opt->is_changed = 1;
369 }
370 if (opt->srr_is_hit)
371 {
372 int srrptr, srrspace;
373
374 optptr = (unsigned char *)iph + opt->srr;
375
376 for ( srrptr=optptr[2], srrspace = optptr[1];
377 srrptr <= srrspace;
378 srrptr += 4
379 )
380 {
381 if (srrptr + 3 > srrspace)
382 break;
383 if (memcmp(&target_addr, &optptr[srrptr-1], 4) == 0)
384 break;
385 }
386 if (srrptr + 3 <= srrspace)
387 {
388 opt->is_changed = 1;
389 memcpy(&optptr[srrptr-1], &dev2->pa_addr, 4);
390 iph->daddr = target_addr;
391 optptr[2] = srrptr+4;
392 }
393 else
394 printk("ip_forward(): Argh! Destination lost!\n");
395 }
396 if (opt->ts_needaddr)
397 {
398 optptr = (unsigned char *)iph + opt->ts;
399 memcpy(&optptr[optptr[2]-9], &dev2->pa_addr, 4);
400 opt->is_changed = 1;
401 }
402 if (opt->is_changed)
403 {
404 opt->is_changed = 0;
405 ip_send_check(iph);
406 }
407 }
408 /*
409 * ANK: this is point of "no return", we cannot send an ICMP,
410 * because we changed SRR option.
411 */
412
413 /*
414 * See if it needs fragmenting. Note in ip_rcv we tagged
415 * the fragment type. This must be right so that
416 * the fragmenter does the right thing.
417 */
418
419 if(skb2->len > dev2->mtu + dev2->hard_header_len)
420 {
421 ip_fragment(NULL,skb2,dev2, is_frag);
422 kfree_skb(skb2,FREE_WRITE);
423 }
424 else
425 {
426 #ifdef CONFIG_IP_ACCT
427 /*
428 * Count mapping we shortcut
429 */
430
431 ip_fw_chk(iph,dev2,ip_acct_chain,IP_FW_F_ACCEPT,1);
432 #endif
433
434 /*
435 * Map service types to priority. We lie about
436 * throughput being low priority, but it's a good
437 * choice to help improve general usage.
438 */
439 if(iph->tos & IPTOS_LOWDELAY)
440 dev_queue_xmit(skb2, dev2, SOPRI_INTERACTIVE);
441 else if(iph->tos & IPTOS_THROUGHPUT)
442 dev_queue_xmit(skb2, dev2, SOPRI_BACKGROUND);
443 else
444 dev_queue_xmit(skb2, dev2, SOPRI_NORMAL);
445 }
446 }
447 else
448 {
449 if(rt)
450 ip_rt_put(rt);
451 return -1;
452 }
453 if(rt)
454 ip_rt_put(rt);
455
456 /*
457 * Tell the caller if their buffer is free.
458 */
459
460 if(skb==skb2)
461 return 0;
462
463 #ifdef CONFIG_IP_MASQUERADE
464 /*
465 * The original is free. Free our copy and
466 * tell the caller not to free.
467 */
468 if(skb!=skb_in)
469 {
470 kfree_skb(skb_in, FREE_WRITE);
471 return 0;
472 }
473 #endif
474 return 1;
475 }
476
477
478 #endif