1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Generic socket support routines. Memory allocators, sk->inuse/release
7 * handler for protocols to use and generic option handler.
8 *
9 *
10 * Version: @(#)sock.c 1.0.17 06/02/93
11 *
12 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
13 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 * Florian La Roche, <flla@stud.uni-sb.de>
15 * Alan Cox, <A.Cox@swansea.ac.uk>
16 *
17 * Fixes:
18 * Alan Cox : Numerous verify_area() problems
19 * Alan Cox : Connecting on a connecting socket
20 * now returns an error for tcp.
21 * Alan Cox : sock->protocol is set correctly.
22 * and is not sometimes left as 0.
23 * Alan Cox : connect handles icmp errors on a
24 * connect properly. Unfortunately there
25 * is a restart syscall nasty there. I
26 * can't match BSD without hacking the C
27 * library. Ideas urgently sought!
28 * Alan Cox : Disallow bind() to addresses that are
29 * not ours - especially broadcast ones!!
30 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
31 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
32 * instead they leave that for the DESTROY timer.
33 * Alan Cox : Clean up error flag in accept
34 * Alan Cox : TCP ack handling is buggy, the DESTROY timer
35 * was buggy. Put a remove_sock() in the handler
36 * for memory when we hit 0. Also altered the timer
37 * code. The ACK stuff can wait and needs major
38 * TCP layer surgery.
39 * Alan Cox : Fixed TCP ack bug, removed remove sock
40 * and fixed timer/inet_bh race.
41 * Alan Cox : Added zapped flag for TCP
42 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
43 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
45 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
46 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47 * Rick Sladkey : Relaxed UDP rules for matching packets.
48 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
49 * Pauline Middelink : identd support
50 * Alan Cox : Fixed connect() taking signals I think.
51 * Alan Cox : SO_LINGER supported
52 * Alan Cox : Error reporting fixes
53 * Anonymous : inet_create tidied up (sk->reuse setting)
54 * Alan Cox : inet sockets don't set sk->type!
55 * Alan Cox : Split socket option code
56 * Alan Cox : Callbacks
57 * Alan Cox : Nagle flag for Charles & Johannes stuff
58 * Alex : Removed restriction on inet fioctl
59 * Alan Cox : Splitting INET from NET core
60 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
61 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
62 * Alan Cox : Split IP from generic code
63 * Alan Cox : New kfree_skbmem()
64 * Alan Cox : Make SO_DEBUG superuser only.
65 * Alan Cox : Allow anyone to clear SO_DEBUG
66 * (compatibility fix)
67 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
68 * Alan Cox : Allocator for a socket is settable.
69 * Alan Cox : SO_ERROR includes soft errors.
70 *
71 * To Fix:
72 *
73 *
74 * This program is free software; you can redistribute it and/or
75 * modify it under the terms of the GNU General Public License
76 * as published by the Free Software Foundation; either version
77 * 2 of the License, or (at your option) any later version.
78 */
79
80 #include <linux/config.h>
81 #include <linux/errno.h>
82 #include <linux/types.h>
83 #include <linux/socket.h>
84 #include <linux/in.h>
85 #include <linux/kernel.h>
86 #include <linux/major.h>
87 #include <linux/sched.h>
88 #include <linux/timer.h>
89 #include <linux/string.h>
90 #include <linux/sockios.h>
91 #include <linux/net.h>
92 #include <linux/fcntl.h>
93 #include <linux/mm.h>
94 #include <linux/interrupt.h>
95
96 #include <asm/segment.h>
97 #include <asm/system.h>
98
99 #include <linux/inet.h>
100 #include <linux/netdevice.h>
101 #include <net/ip.h>
102 #include <net/protocol.h>
103 #include <net/arp.h>
104 #include <net/rarp.h>
105 #include <net/route.h>
106 #include <net/tcp.h>
107 #include <net/udp.h>
108 #include <linux/skbuff.h>
109 #include <net/sock.h>
110 #include <net/raw.h>
111 #include <net/icmp.h>
112
113 #define min(a,b) ((a)<(b)?(a):(b))
114
115 /*
116 * This is meant for all protocols to use and covers goings on
117 * at the socket level. Everything here is generic.
118 */
119
120 int sock_setsockopt(struct sock *sk, int level, int optname,
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
121 char *optval, int optlen)
122 {
123 int val;
124 int valbool;
125 int err;
126 struct linger ling;
127
128 if (optval == NULL)
129 return(-EINVAL);
130
131 err=verify_area(VERIFY_READ, optval, sizeof(int));
132 if(err)
133 return err;
134
135 val = get_user((int *)optval);
136 valbool = val?1:0;
137
138 switch(optname)
139 {
140 case SO_DEBUG:
141 if(val && !suser())
142 return(-EPERM);
143 sk->debug=valbool;
144 return 0;
145 case SO_REUSEADDR:
146 sk->reuse = valbool;
147 return(0);
148 case SO_TYPE:
149 case SO_ERROR:
150 return(-ENOPROTOOPT);
151 case SO_DONTROUTE:
152 sk->localroute=valbool;
153 return 0;
154 case SO_BROADCAST:
155 sk->broadcast=valbool;
156 return 0;
157 case SO_SNDBUF:
158 if(val>32767)
159 val=32767;
160 if(val<256)
161 val=256;
162 sk->sndbuf=val;
163 return 0;
164
165 case SO_RCVBUF:
166 if(val>32767)
167 val=32767;
168 if(val<256)
169 val=256;
170 sk->rcvbuf=val;
171 return(0);
172
173 case SO_KEEPALIVE:
174 sk->keepopen = valbool;
175 return(0);
176
177 case SO_OOBINLINE:
178 sk->urginline = valbool;
179 return(0);
180
181 case SO_NO_CHECK:
182 sk->no_check = valbool;
183 return(0);
184
185 case SO_PRIORITY:
186 if (val >= 0 && val < DEV_NUMBUFFS)
187 {
188 sk->priority = val;
189 }
190 else
191 {
192 return(-EINVAL);
193 }
194 return(0);
195
196
197 case SO_LINGER:
198 err=verify_area(VERIFY_READ,optval,sizeof(ling));
199 if(err)
200 return err;
201 memcpy_fromfs(&ling,optval,sizeof(ling));
202 if(ling.l_onoff==0)
203 sk->linger=0;
204 else
205 {
206 sk->lingertime=ling.l_linger;
207 sk->linger=1;
208 }
209 return 0;
210
211 case SO_BSDCOMPAT:
212 sk->bsdism = valbool;
213 return 0;
214
215 default:
216 return(-ENOPROTOOPT);
217 }
218 }
219
220
221 int sock_getsockopt(struct sock *sk, int level, int optname,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
222 char *optval, int *optlen)
223 {
224 int val;
225 int err;
226 struct linger ling;
227
228 switch(optname)
229 {
230 case SO_DEBUG:
231 val = sk->debug;
232 break;
233
234 case SO_DONTROUTE:
235 val = sk->localroute;
236 break;
237
238 case SO_BROADCAST:
239 val= sk->broadcast;
240 break;
241
242 case SO_SNDBUF:
243 val=sk->sndbuf;
244 break;
245
246 case SO_RCVBUF:
247 val =sk->rcvbuf;
248 break;
249
250 case SO_REUSEADDR:
251 val = sk->reuse;
252 break;
253
254 case SO_KEEPALIVE:
255 val = sk->keepopen;
256 break;
257
258 case SO_TYPE:
259 val = sk->type;
260 break;
261
262 case SO_ERROR:
263 val = sock_error(sk);
264 if(val==0)
265 val=xchg(&sk->err_soft,0);
266 break;
267
268 case SO_OOBINLINE:
269 val = sk->urginline;
270 break;
271
272 case SO_NO_CHECK:
273 val = sk->no_check;
274 break;
275
276 case SO_PRIORITY:
277 val = sk->priority;
278 break;
279
280 case SO_LINGER:
281 err=verify_area(VERIFY_WRITE,optval,sizeof(ling));
282 if(err)
283 return err;
284 err=verify_area(VERIFY_WRITE,optlen,sizeof(int));
285 if(err)
286 return err;
287 put_fs_long(sizeof(ling),(unsigned long *)optlen);
288 ling.l_onoff=sk->linger;
289 ling.l_linger=sk->lingertime;
290 memcpy_tofs(optval,&ling,sizeof(ling));
291 return 0;
292
293 case SO_BSDCOMPAT:
294 val = sk->bsdism;
295 break;
296
297 default:
298 return(-ENOPROTOOPT);
299 }
300 err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
301 if(err)
302 return err;
303 put_fs_long(sizeof(int),(unsigned long *) optlen);
304
305 err=verify_area(VERIFY_WRITE, optval, sizeof(int));
306 if(err)
307 return err;
308 put_fs_long(val,(unsigned long *)optval);
309
310 return(0);
311 }
312
313
314 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
315 {
316 if (sk)
317 {
318 if (sk->wmem_alloc + size < sk->sndbuf || force)
319 {
320 struct sk_buff * c = alloc_skb(size, priority);
321 if (c)
322 {
323 unsigned long flags;
324 save_flags(flags);
325 cli();
326 sk->wmem_alloc+= c->truesize;
327 restore_flags(flags); /* was sti(); */
328 }
329 return c;
330 }
331 return(NULL);
332 }
333 return(alloc_skb(size, priority));
334 }
335
336
337 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
338 {
339 if (sk)
340 {
341 if (sk->rmem_alloc + size < sk->rcvbuf || force)
342 {
343 struct sk_buff *c = alloc_skb(size, priority);
344 if (c)
345 {
346 unsigned long flags;
347 save_flags(flags);
348 cli();
349 sk->rmem_alloc += c->truesize;
350 restore_flags(flags); /* was sti(); */
351 }
352 return(c);
353 }
354 return(NULL);
355 }
356 return(alloc_skb(size, priority));
357 }
358
359
360 unsigned long sock_rspace(struct sock *sk)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
361 {
362 int amt;
363
364 if (sk != NULL)
365 {
366 if (sk->rmem_alloc >= sk->rcvbuf-2*MIN_WINDOW)
367 return(0);
368 amt = min((sk->rcvbuf-sk->rmem_alloc)/2-MIN_WINDOW, MAX_WINDOW);
369 if (amt < 0)
370 return(0);
371 return(amt);
372 }
373 return(0);
374 }
375
376
377 unsigned long sock_wspace(struct sock *sk)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
378 {
379 if (sk != NULL)
380 {
381 if (sk->shutdown & SEND_SHUTDOWN)
382 return(0);
383 if (sk->wmem_alloc >= sk->sndbuf)
384 return(0);
385 return(sk->sndbuf-sk->wmem_alloc );
386 }
387 return(0);
388 }
389
390
391 void sock_wfree(struct sock *sk, struct sk_buff *skb)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
392 {
393 int s=skb->truesize;
394 #if CONFIG_SKB_CHECK
395 IS_SKB(skb);
396 #endif
397 kfree_skbmem(skb);
398 if (sk)
399 {
400 unsigned long flags;
401 save_flags(flags);
402 cli();
403 sk->wmem_alloc -= s;
404 restore_flags(flags);
405 /* In case it might be waiting for more memory. */
406 sk->write_space(sk);
407 return;
408 }
409 }
410
411
412 void sock_rfree(struct sock *sk, struct sk_buff *skb)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
413 {
414 int s=skb->truesize;
415 #if CONFIG_SKB_CHECK
416 IS_SKB(skb);
417 #endif
418 kfree_skbmem(skb);
419 if (sk)
420 {
421 unsigned long flags;
422 save_flags(flags);
423 cli();
424 sk->rmem_alloc -= s;
425 restore_flags(flags);
426 }
427 }
428
429 /*
430 * Generic send/receive buffer handlers
431 */
432
433 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, unsigned long fallback, int noblock, int *errcode)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
434 {
435 struct sk_buff *skb;
436 int err;
437
438 sk->inuse=1;
439
440 do
441 {
442 if(sk->err!=0)
443 {
444 cli();
445 err= -sk->err;
446 sk->err=0;
447 sti();
448 *errcode=err;
449 return NULL;
450 }
451
452 if(sk->shutdown&SEND_SHUTDOWN)
453 {
454 *errcode=-EPIPE;
455 return NULL;
456 }
457
458 if(!fallback)
459 skb = sock_wmalloc(sk, size, 0, sk->allocation);
460 else
461 {
462 /* The buffer get won't block, or use the atomic queue. It does
463 produce annoying no free page messages still.... */
464 skb = sock_wmalloc(sk, size, 0 , GFP_BUFFER);
465 if(!skb)
466 skb=sock_wmalloc(sk, fallback, 0, GFP_KERNEL);
467 }
468
469 /*
470 * This means we have too many buffers for this socket already.
471 */
472
473 if(skb==NULL)
474 {
475 unsigned long tmp;
476
477 sk->socket->flags |= SO_NOSPACE;
478 if(noblock)
479 {
480 *errcode=-EAGAIN;
481 return NULL;
482 }
483 if(sk->shutdown&SEND_SHUTDOWN)
484 {
485 *errcode=-EPIPE;
486 return NULL;
487 }
488 tmp = sk->wmem_alloc;
489 cli();
490 if(sk->shutdown&SEND_SHUTDOWN)
491 {
492 sti();
493 *errcode=-EPIPE;
494 return NULL;
495 }
496
497 #if 1
498 if( tmp <= sk->wmem_alloc)
499 #else
500 /* ANK: Line above seems either incorrect
501 * or useless. sk->wmem_alloc has a tiny chance to change
502 * between tmp = sk->w... and cli(),
503 * but it might(?) change earlier. In real life
504 * it does not (I never seen the message).
505 * In any case I'd delete this check at all, or
506 * change it to:
507 */
508 if (sk->wmem_alloc + size >= sk->sndbuf)
509 #endif
510 {
511 if (sk->wmem_alloc <= 0)
512 printk("sock.c: Look where I am %ld<%ld\n", tmp, sk->wmem_alloc);
513 sk->socket->flags &= ~SO_NOSPACE;
514 interruptible_sleep_on(sk->sleep);
515 if (current->signal & ~current->blocked)
516 {
517 sti();
518 *errcode = -ERESTARTSYS;
519 return NULL;
520 }
521 }
522 sti();
523 }
524 }
525 while(skb==NULL);
526
527 return skb;
528 }
529
530
531 void release_sock(struct sock *sk)
/* ![[previous]](../icons/left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
532 {
533 unsigned long flags;
534 #ifdef CONFIG_INET
535 struct sk_buff *skb;
536 #endif
537
538 if (!sk->prot)
539 return;
540 /*
541 * Make the backlog atomic. If we don't do this there is a tiny
542 * window where a packet may arrive between the sk->blog being
543 * tested and then set with sk->inuse still 0 causing an extra
544 * unwanted re-entry into release_sock().
545 */
546
547 save_flags(flags);
548 cli();
549 if (sk->blog)
550 {
551 restore_flags(flags);
552 return;
553 }
554 sk->blog=1;
555 sk->inuse = 1;
556 restore_flags(flags);
557 #ifdef CONFIG_INET
558 /* See if we have any packets built up. */
559 while((skb = skb_dequeue(&sk->back_log)) != NULL)
560 {
561 sk->blog = 1;
562 if (sk->prot->rcv)
563 sk->prot->rcv(skb, skb->dev, (struct options*)skb->proto_priv,
564 skb->saddr, skb->len, skb->daddr, 1,
565 /* Only used for/by raw sockets. */
566 (struct inet_protocol *)sk->pair);
567 }
568 #endif
569 sk->blog = 0;
570 sk->inuse = 0;
571 #ifdef CONFIG_INET
572 if (sk->dead && sk->state == TCP_CLOSE)
573 {
574 /* Should be about 2 rtt's */
575 reset_timer(sk, TIME_DONE, min(sk->rtt * 2, TCP_DONE_TIME));
576 }
577 #endif
578 }