1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Generic socket support routines. Memory allocators, sk->inuse/release
7 * handler for protocols to use and generic option handler.
8 *
9 *
10 * Version: @(#)sock.c 1.0.17 06/02/93
11 *
12 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
13 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 * Florian La Roche, <flla@stud.uni-sb.de>
15 * Alan Cox, <A.Cox@swansea.ac.uk>
16 *
17 * Fixes:
18 * Alan Cox : Numerous verify_area() problems
19 * Alan Cox : Connecting on a connecting socket
20 * now returns an error for tcp.
21 * Alan Cox : sock->protocol is set correctly.
22 * and is not sometimes left as 0.
23 * Alan Cox : connect handles icmp errors on a
24 * connect properly. Unfortunately there
25 * is a restart syscall nasty there. I
26 * can't match BSD without hacking the C
27 * library. Ideas urgently sought!
28 * Alan Cox : Disallow bind() to addresses that are
29 * not ours - especially broadcast ones!!
30 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
31 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
32 * instead they leave that for the DESTROY timer.
33 * Alan Cox : Clean up error flag in accept
34 * Alan Cox : TCP ack handling is buggy, the DESTROY timer
35 * was buggy. Put a remove_sock() in the handler
36 * for memory when we hit 0. Also altered the timer
37 * code. The ACK stuff can wait and needs major
38 * TCP layer surgery.
39 * Alan Cox : Fixed TCP ack bug, removed remove sock
40 * and fixed timer/inet_bh race.
41 * Alan Cox : Added zapped flag for TCP
42 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
43 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
45 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
46 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47 * Rick Sladkey : Relaxed UDP rules for matching packets.
48 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
49 * Pauline Middelink : identd support
50 * Alan Cox : Fixed connect() taking signals I think.
51 * Alan Cox : SO_LINGER supported
52 * Alan Cox : Error reporting fixes
53 * Anonymous : inet_create tidied up (sk->reuse setting)
54 * Alan Cox : inet sockets don't set sk->type!
55 * Alan Cox : Split socket option code
56 * Alan Cox : Callbacks
57 * Alan Cox : Nagle flag for Charles & Johannes stuff
58 * Alex : Removed restriction on inet fioctl
59 * Alan Cox : Splitting INET from NET core
60 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
61 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
62 * Alan Cox : Split IP from generic code
63 * Alan Cox : New kfree_skbmem()
64 * Alan Cox : Make SO_DEBUG superuser only.
65 * Alan Cox : Allow anyone to clear SO_DEBUG
66 * (compatibility fix)
67 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
68 * Alan Cox : Allocator for a socket is settable.
69 * Alan Cox : SO_ERROR includes soft errors.
70 * Alan Cox : Allow NULL arguments on some SO_ opts
71 *
72 * To Fix:
73 *
74 *
75 * This program is free software; you can redistribute it and/or
76 * modify it under the terms of the GNU General Public License
77 * as published by the Free Software Foundation; either version
78 * 2 of the License, or (at your option) any later version.
79 */
80
81 #include <linux/config.h>
82 #include <linux/errno.h>
83 #include <linux/types.h>
84 #include <linux/socket.h>
85 #include <linux/in.h>
86 #include <linux/kernel.h>
87 #include <linux/major.h>
88 #include <linux/sched.h>
89 #include <linux/timer.h>
90 #include <linux/string.h>
91 #include <linux/sockios.h>
92 #include <linux/net.h>
93 #include <linux/fcntl.h>
94 #include <linux/mm.h>
95 #include <linux/interrupt.h>
96
97 #include <asm/segment.h>
98 #include <asm/system.h>
99
100 #include <linux/inet.h>
101 #include <linux/netdevice.h>
102 #include <net/ip.h>
103 #include <net/protocol.h>
104 #include <net/arp.h>
105 #include <net/rarp.h>
106 #include <net/route.h>
107 #include <net/tcp.h>
108 #include <net/udp.h>
109 #include <linux/skbuff.h>
110 #include <net/sock.h>
111 #include <net/raw.h>
112 #include <net/icmp.h>
113
114 #define min(a,b) ((a)<(b)?(a):(b))
115
116 /*
117 * This is meant for all protocols to use and covers goings on
118 * at the socket level. Everything here is generic.
119 */
120
121 int sock_setsockopt(struct sock *sk, int level, int optname,
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
122 char *optval, int optlen)
123 {
124 int val;
125 int valbool;
126 int err;
127 struct linger ling;
128
129 /*
130 * Options without arguments
131 */
132
133 #ifdef SO_DONTLINGER /* Compatibility item... */
134 switch(optname)
135 {
136 case SO_DONTLINGER:
137 sk->linger=0;
138 return 0;
139 }
140 #endif
141
142 if (optval == NULL)
143 return(-EINVAL);
144
145 err=verify_area(VERIFY_READ, optval, sizeof(int));
146 if(err)
147 return err;
148
149 val = get_user((int *)optval);
150 valbool = val?1:0;
151
152 switch(optname)
153 {
154 case SO_DEBUG:
155 if(val && !suser())
156 return(-EPERM);
157 sk->debug=valbool;
158 return 0;
159 case SO_REUSEADDR:
160 sk->reuse = valbool;
161 return(0);
162 case SO_TYPE:
163 case SO_ERROR:
164 return(-ENOPROTOOPT);
165 case SO_DONTROUTE:
166 sk->localroute=valbool;
167 return 0;
168 case SO_BROADCAST:
169 sk->broadcast=valbool;
170 return 0;
171 case SO_SNDBUF:
172 if(val > SK_WMEM_MAX*2)
173 val = SK_WMEM_MAX*2;
174 if(val < 256)
175 val = 256;
176 sk->sndbuf = val;
177 return 0;
178
179 case SO_RCVBUF:
180 if(val > SK_RMEM_MAX*2)
181 val = SK_RMEM_MAX*2;
182 if(val < 256)
183 val = 256;
184 sk->rcvbuf = val;
185 return(0);
186
187 case SO_KEEPALIVE:
188 sk->keepopen = valbool;
189 return(0);
190
191 case SO_OOBINLINE:
192 sk->urginline = valbool;
193 return(0);
194
195 case SO_NO_CHECK:
196 sk->no_check = valbool;
197 return(0);
198
199 case SO_PRIORITY:
200 if (val >= 0 && val < DEV_NUMBUFFS)
201 {
202 sk->priority = val;
203 }
204 else
205 {
206 return(-EINVAL);
207 }
208 return(0);
209
210
211 case SO_LINGER:
212 err=verify_area(VERIFY_READ,optval,sizeof(ling));
213 if(err)
214 return err;
215 memcpy_fromfs(&ling,optval,sizeof(ling));
216 if(ling.l_onoff==0)
217 sk->linger=0;
218 else
219 {
220 sk->lingertime=ling.l_linger;
221 sk->linger=1;
222 }
223 return 0;
224
225 case SO_BSDCOMPAT:
226 sk->bsdism = valbool;
227 return 0;
228
229 default:
230 return(-ENOPROTOOPT);
231 }
232 }
233
234
235 int sock_getsockopt(struct sock *sk, int level, int optname,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
236 char *optval, int *optlen)
237 {
238 int val;
239 int err;
240 struct linger ling;
241
242 switch(optname)
243 {
244 case SO_DEBUG:
245 val = sk->debug;
246 break;
247
248 case SO_DONTROUTE:
249 val = sk->localroute;
250 break;
251
252 case SO_BROADCAST:
253 val= sk->broadcast;
254 break;
255
256 case SO_SNDBUF:
257 val=sk->sndbuf;
258 break;
259
260 case SO_RCVBUF:
261 val =sk->rcvbuf;
262 break;
263
264 case SO_REUSEADDR:
265 val = sk->reuse;
266 break;
267
268 case SO_KEEPALIVE:
269 val = sk->keepopen;
270 break;
271
272 case SO_TYPE:
273 val = sk->type;
274 break;
275
276 case SO_ERROR:
277 val = sock_error(sk);
278 if(val==0)
279 val=xchg(&sk->err_soft,0);
280 break;
281
282 case SO_OOBINLINE:
283 val = sk->urginline;
284 break;
285
286 case SO_NO_CHECK:
287 val = sk->no_check;
288 break;
289
290 case SO_PRIORITY:
291 val = sk->priority;
292 break;
293
294 case SO_LINGER:
295 err=verify_area(VERIFY_WRITE,optval,sizeof(ling));
296 if(err)
297 return err;
298 err=verify_area(VERIFY_WRITE,optlen,sizeof(int));
299 if(err)
300 return err;
301 put_fs_long(sizeof(ling),(unsigned long *)optlen);
302 ling.l_onoff=sk->linger;
303 ling.l_linger=sk->lingertime;
304 memcpy_tofs(optval,&ling,sizeof(ling));
305 return 0;
306
307 case SO_BSDCOMPAT:
308 val = sk->bsdism;
309 break;
310
311 default:
312 return(-ENOPROTOOPT);
313 }
314 err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
315 if(err)
316 return err;
317 put_fs_long(sizeof(int),(unsigned long *) optlen);
318
319 err=verify_area(VERIFY_WRITE, optval, sizeof(int));
320 if(err)
321 return err;
322 put_fs_long(val,(unsigned long *)optval);
323
324 return(0);
325 }
326
327
328 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
329 {
330 if (sk)
331 {
332 if (sk->wmem_alloc + size < sk->sndbuf || force)
333 {
334 struct sk_buff * c = alloc_skb(size, priority);
335 if (c)
336 {
337 unsigned long flags;
338 save_flags(flags);
339 cli();
340 sk->wmem_alloc+= c->truesize;
341 restore_flags(flags); /* was sti(); */
342 }
343 return c;
344 }
345 return(NULL);
346 }
347 return(alloc_skb(size, priority));
348 }
349
350
351 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
352 {
353 if (sk)
354 {
355 if (sk->rmem_alloc + size < sk->rcvbuf || force)
356 {
357 struct sk_buff *c = alloc_skb(size, priority);
358 if (c)
359 {
360 unsigned long flags;
361 save_flags(flags);
362 cli();
363 sk->rmem_alloc += c->truesize;
364 restore_flags(flags); /* was sti(); */
365 }
366 return(c);
367 }
368 return(NULL);
369 }
370 return(alloc_skb(size, priority));
371 }
372
373
374 unsigned long sock_rspace(struct sock *sk)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
375 {
376 int amt;
377
378 if (sk != NULL)
379 {
380 if (sk->rmem_alloc >= sk->rcvbuf-2*MIN_WINDOW)
381 return(0);
382 amt = min((sk->rcvbuf-sk->rmem_alloc)/2-MIN_WINDOW, MAX_WINDOW);
383 if (amt < 0)
384 return(0);
385 return(amt);
386 }
387 return(0);
388 }
389
390
391 unsigned long sock_wspace(struct sock *sk)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
392 {
393 if (sk != NULL)
394 {
395 if (sk->shutdown & SEND_SHUTDOWN)
396 return(0);
397 if (sk->wmem_alloc >= sk->sndbuf)
398 return(0);
399 return(sk->sndbuf-sk->wmem_alloc );
400 }
401 return(0);
402 }
403
404
405 void sock_wfree(struct sock *sk, struct sk_buff *skb)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
406 {
407 int s=skb->truesize;
408 #if CONFIG_SKB_CHECK
409 IS_SKB(skb);
410 #endif
411 kfree_skbmem(skb);
412 if (sk)
413 {
414 unsigned long flags;
415 save_flags(flags);
416 cli();
417 sk->wmem_alloc -= s;
418 restore_flags(flags);
419 /* In case it might be waiting for more memory. */
420 sk->write_space(sk);
421 return;
422 }
423 }
424
425
426 void sock_rfree(struct sock *sk, struct sk_buff *skb)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
427 {
428 int s=skb->truesize;
429 #if CONFIG_SKB_CHECK
430 IS_SKB(skb);
431 #endif
432 kfree_skbmem(skb);
433 if (sk)
434 {
435 unsigned long flags;
436 save_flags(flags);
437 cli();
438 sk->rmem_alloc -= s;
439 restore_flags(flags);
440 }
441 }
442
443 /*
444 * Generic send/receive buffer handlers
445 */
446
447 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, unsigned long fallback, int noblock, int *errcode)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
448 {
449 struct sk_buff *skb;
450 int err;
451
452 sk->inuse=1;
453
454 do
455 {
456 if(sk->err!=0)
457 {
458 cli();
459 err= -sk->err;
460 sk->err=0;
461 sti();
462 *errcode=err;
463 return NULL;
464 }
465
466 if(sk->shutdown&SEND_SHUTDOWN)
467 {
468 *errcode=-EPIPE;
469 return NULL;
470 }
471
472 if(!fallback)
473 skb = sock_wmalloc(sk, size, 0, sk->allocation);
474 else
475 {
476 /* The buffer get won't block, or use the atomic queue. It does
477 produce annoying no free page messages still.... */
478 skb = sock_wmalloc(sk, size, 0 , GFP_BUFFER);
479 if(!skb)
480 skb=sock_wmalloc(sk, fallback, 0, GFP_KERNEL);
481 }
482
483 /*
484 * This means we have too many buffers for this socket already.
485 */
486
487 if(skb==NULL)
488 {
489 unsigned long tmp;
490
491 sk->socket->flags |= SO_NOSPACE;
492 if(noblock)
493 {
494 *errcode=-EAGAIN;
495 return NULL;
496 }
497 if(sk->shutdown&SEND_SHUTDOWN)
498 {
499 *errcode=-EPIPE;
500 return NULL;
501 }
502 tmp = sk->wmem_alloc;
503 cli();
504 if(sk->shutdown&SEND_SHUTDOWN)
505 {
506 sti();
507 *errcode=-EPIPE;
508 return NULL;
509 }
510
511 #if 1
512 if( tmp <= sk->wmem_alloc)
513 #else
514 /* ANK: Line above seems either incorrect
515 * or useless. sk->wmem_alloc has a tiny chance to change
516 * between tmp = sk->w... and cli(),
517 * but it might(?) change earlier. In real life
518 * it does not (I never seen the message).
519 * In any case I'd delete this check at all, or
520 * change it to:
521 */
522 if (sk->wmem_alloc + size >= sk->sndbuf)
523 #endif
524 {
525 if (sk->wmem_alloc < 0)
526 printk("sock.c: Look where I am %ld<%ld\n", tmp, sk->wmem_alloc);
527 sk->socket->flags &= ~SO_NOSPACE;
528 interruptible_sleep_on(sk->sleep);
529 if (current->signal & ~current->blocked)
530 {
531 sti();
532 *errcode = -ERESTARTSYS;
533 return NULL;
534 }
535 }
536 sti();
537 }
538 }
539 while(skb==NULL);
540
541 return skb;
542 }
543
544
545 void release_sock(struct sock *sk)
/* ![[previous]](../icons/left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
546 {
547 #ifdef CONFIG_INET
548 struct sk_buff *skb;
549 #endif
550
551 /*
552 * First, mark it not in use: this ensures that we will not
553 * get any new backlog packets..
554 */
555 sk->inuse = 0;
556
557 #ifdef CONFIG_INET
558 if (!sk->prot)
559 return;
560
561 /*
562 * This is only ever called from a user process context, hence
563 * (until fine grained SMP) its safe. sk->inuse must be volatile
564 * so the compiler doesn't do anything unfortunate with it.
565 */
566
567 /* See if we have any packets built up. */
568 while ((skb = __skb_dequeue(&sk->back_log)) != NULL)
569 {
570 sk->inuse = 1; /* Very important.. */
571 if (sk->prot->rcv)
572 sk->prot->rcv(skb, skb->dev, (struct options*)skb->proto_priv,
573 skb->saddr, skb->len, skb->daddr, 1,
574 /* Only used for/by raw sockets. */
575 (struct inet_protocol *)sk->pair);
576 sk->inuse = 0;
577 }
578 if (sk->dead && sk->state == TCP_CLOSE)
579 {
580 /* Should be about 2 rtt's */
581 reset_timer(sk, TIME_DONE, min(sk->rtt * 2, TCP_DONE_TIME));
582 }
583 #endif
584 }