1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Generic socket support routines. Memory allocators, socket lock/release
7 * handler for protocols to use and generic option handler.
8 *
9 *
10 * Version: @(#)sock.c 1.0.17 06/02/93
11 *
12 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
13 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 * Florian La Roche, <flla@stud.uni-sb.de>
15 * Alan Cox, <A.Cox@swansea.ac.uk>
16 *
17 * Fixes:
18 * Alan Cox : Numerous verify_area() problems
19 * Alan Cox : Connecting on a connecting socket
20 * now returns an error for tcp.
21 * Alan Cox : sock->protocol is set correctly.
22 * and is not sometimes left as 0.
23 * Alan Cox : connect handles icmp errors on a
24 * connect properly. Unfortunately there
25 * is a restart syscall nasty there. I
26 * can't match BSD without hacking the C
27 * library. Ideas urgently sought!
28 * Alan Cox : Disallow bind() to addresses that are
29 * not ours - especially broadcast ones!!
30 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
31 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
32 * instead they leave that for the DESTROY timer.
33 * Alan Cox : Clean up error flag in accept
34 * Alan Cox : TCP ack handling is buggy, the DESTROY timer
35 * was buggy. Put a remove_sock() in the handler
36 * for memory when we hit 0. Also altered the timer
37 * code. The ACK stuff can wait and needs major
38 * TCP layer surgery.
39 * Alan Cox : Fixed TCP ack bug, removed remove sock
40 * and fixed timer/inet_bh race.
41 * Alan Cox : Added zapped flag for TCP
42 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
43 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
45 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
46 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47 * Rick Sladkey : Relaxed UDP rules for matching packets.
48 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
49 * Pauline Middelink : identd support
50 * Alan Cox : Fixed connect() taking signals I think.
51 * Alan Cox : SO_LINGER supported
52 * Alan Cox : Error reporting fixes
53 * Anonymous : inet_create tidied up (sk->reuse setting)
54 * Alan Cox : inet sockets don't set sk->type!
55 * Alan Cox : Split socket option code
56 * Alan Cox : Callbacks
57 * Alan Cox : Nagle flag for Charles & Johannes stuff
58 * Alex : Removed restriction on inet fioctl
59 * Alan Cox : Splitting INET from NET core
60 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
61 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
62 * Alan Cox : Split IP from generic code
63 * Alan Cox : New kfree_skbmem()
64 * Alan Cox : Make SO_DEBUG superuser only.
65 * Alan Cox : Allow anyone to clear SO_DEBUG
66 * (compatibility fix)
67 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
68 * Alan Cox : Allocator for a socket is settable.
69 * Alan Cox : SO_ERROR includes soft errors.
70 * Alan Cox : Allow NULL arguments on some SO_ opts
71 * Alan Cox : Generic socket allocation to make hooks
72 * easier (suggested by Craig Metz).
73 *
74 * To Fix:
75 *
76 *
77 * This program is free software; you can redistribute it and/or
78 * modify it under the terms of the GNU General Public License
79 * as published by the Free Software Foundation; either version
80 * 2 of the License, or (at your option) any later version.
81 */
82
83 #include <linux/config.h>
84 #include <linux/errno.h>
85 #include <linux/types.h>
86 #include <linux/socket.h>
87 #include <linux/in.h>
88 #include <linux/kernel.h>
89 #include <linux/major.h>
90 #include <linux/sched.h>
91 #include <linux/timer.h>
92 #include <linux/string.h>
93 #include <linux/sockios.h>
94 #include <linux/net.h>
95 #include <linux/fcntl.h>
96 #include <linux/mm.h>
97 #include <linux/interrupt.h>
98
99 #include <asm/segment.h>
100 #include <asm/system.h>
101
102 #include <linux/inet.h>
103 #include <linux/netdevice.h>
104 #include <net/ip.h>
105 #include <net/protocol.h>
106 #include <net/arp.h>
107 #include <net/rarp.h>
108 #include <net/route.h>
109 #include <net/tcp.h>
110 #include <net/udp.h>
111 #include <linux/skbuff.h>
112 #include <net/sock.h>
113 #include <net/raw.h>
114 #include <net/icmp.h>
115
116 #define min(a,b) ((a)<(b)?(a):(b))
117
118 /*
119 * This is meant for all protocols to use and covers goings on
120 * at the socket level. Everything here is generic.
121 */
122
123 int sock_setsockopt(struct sock *sk, int level, int optname,
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
124 char *optval, int optlen)
125 {
126 int val;
127 int valbool;
128 int err;
129 struct linger ling;
130
131 /*
132 * Options without arguments
133 */
134
135 #ifdef SO_DONTLINGER /* Compatibility item... */
136 switch(optname)
137 {
138 case SO_DONTLINGER:
139 sk->linger=0;
140 return 0;
141 }
142 #endif
143
144 if (optval == NULL)
145 return(-EINVAL);
146
147 err=verify_area(VERIFY_READ, optval, sizeof(int));
148 if(err)
149 return err;
150
151 val = get_user((int *)optval);
152 valbool = val?1:0;
153
154 switch(optname)
155 {
156 case SO_DEBUG:
157 if(val && !suser())
158 return(-EPERM);
159 sk->debug=valbool;
160 return 0;
161 case SO_REUSEADDR:
162 sk->reuse = valbool;
163 return(0);
164 case SO_TYPE:
165 case SO_ERROR:
166 return(-ENOPROTOOPT);
167 case SO_DONTROUTE:
168 sk->localroute=valbool;
169 return 0;
170 case SO_BROADCAST:
171 sk->broadcast=valbool;
172 return 0;
173 case SO_SNDBUF:
174 if(val > SK_WMEM_MAX*2)
175 val = SK_WMEM_MAX*2;
176 if(val < 256)
177 val = 256;
178 sk->sndbuf = val;
179 return 0;
180
181 case SO_RCVBUF:
182 if(val > SK_RMEM_MAX*2)
183 val = SK_RMEM_MAX*2;
184 if(val < 256)
185 val = 256;
186 sk->rcvbuf = val;
187 return(0);
188
189 case SO_KEEPALIVE:
190 sk->keepopen = valbool;
191 return(0);
192
193 case SO_OOBINLINE:
194 sk->urginline = valbool;
195 return(0);
196
197 case SO_NO_CHECK:
198 sk->no_check = valbool;
199 return(0);
200
201 case SO_PRIORITY:
202 if (val >= 0 && val < DEV_NUMBUFFS)
203 {
204 sk->priority = val;
205 }
206 else
207 {
208 return(-EINVAL);
209 }
210 return(0);
211
212
213 case SO_LINGER:
214 err=verify_area(VERIFY_READ,optval,sizeof(ling));
215 if(err)
216 return err;
217 memcpy_fromfs(&ling,optval,sizeof(ling));
218 if(ling.l_onoff==0)
219 sk->linger=0;
220 else
221 {
222 sk->lingertime=ling.l_linger;
223 sk->linger=1;
224 }
225 return 0;
226
227 case SO_BSDCOMPAT:
228 sk->bsdism = valbool;
229 return 0;
230
231 default:
232 return(-ENOPROTOOPT);
233 }
234 }
235
236
237 int sock_getsockopt(struct sock *sk, int level, int optname,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
238 char *optval, int *optlen)
239 {
240 int val;
241 int err;
242 struct linger ling;
243
244 switch(optname)
245 {
246 case SO_DEBUG:
247 val = sk->debug;
248 break;
249
250 case SO_DONTROUTE:
251 val = sk->localroute;
252 break;
253
254 case SO_BROADCAST:
255 val= sk->broadcast;
256 break;
257
258 case SO_SNDBUF:
259 val=sk->sndbuf;
260 break;
261
262 case SO_RCVBUF:
263 val =sk->rcvbuf;
264 break;
265
266 case SO_REUSEADDR:
267 val = sk->reuse;
268 break;
269
270 case SO_KEEPALIVE:
271 val = sk->keepopen;
272 break;
273
274 case SO_TYPE:
275 val = sk->type;
276 break;
277
278 case SO_ERROR:
279 val = sock_error(sk);
280 if(val==0)
281 val=xchg(&sk->err_soft,0);
282 break;
283
284 case SO_OOBINLINE:
285 val = sk->urginline;
286 break;
287
288 case SO_NO_CHECK:
289 val = sk->no_check;
290 break;
291
292 case SO_PRIORITY:
293 val = sk->priority;
294 break;
295
296 case SO_LINGER:
297 err=verify_area(VERIFY_WRITE,optval,sizeof(ling));
298 if(err)
299 return err;
300 err=verify_area(VERIFY_WRITE,optlen,sizeof(int));
301 if(err)
302 return err;
303 put_fs_long(sizeof(ling),(unsigned long *)optlen);
304 ling.l_onoff=sk->linger;
305 ling.l_linger=sk->lingertime;
306 memcpy_tofs(optval,&ling,sizeof(ling));
307 return 0;
308
309 case SO_BSDCOMPAT:
310 val = sk->bsdism;
311 break;
312
313 default:
314 return(-ENOPROTOOPT);
315 }
316 err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
317 if(err)
318 return err;
319 put_fs_long(sizeof(int),(unsigned long *) optlen);
320
321 err=verify_area(VERIFY_WRITE, optval, sizeof(int));
322 if(err)
323 return err;
324 put_fs_long(val,(unsigned long *)optval);
325
326 return(0);
327 }
328
329 struct sock *sk_alloc(int priority)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
330 {
331 struct sock *sk=(struct sock *)kmalloc(sizeof(*sk), priority);
332 if(!sk)
333 return NULL;
334 memset(sk, 0, sizeof(*sk));
335 return sk;
336 }
337
338 void sk_free(struct sock *sk)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
339 {
340 kfree_s(sk,sizeof(*sk));
341 }
342
343
344 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
345 {
346 if (sk) {
347 if (force || sk->wmem_alloc + size < sk->sndbuf) {
348 struct sk_buff * skb = alloc_skb(size, priority);
349 if (skb)
350 atomic_add(skb->truesize, &sk->wmem_alloc);
351 return skb;
352 }
353 return NULL;
354 }
355 return alloc_skb(size, priority);
356 }
357
358 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
359 {
360 if (sk) {
361 if (force || sk->rmem_alloc + size < sk->rcvbuf) {
362 struct sk_buff *skb = alloc_skb(size, priority);
363 if (skb)
364 atomic_add(skb->truesize, &sk->rmem_alloc);
365 return skb;
366 }
367 return NULL;
368 }
369 return alloc_skb(size, priority);
370 }
371
372
373 unsigned long sock_rspace(struct sock *sk)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
374 {
375 int amt;
376
377 if (sk != NULL)
378 {
379 if (sk->rmem_alloc >= sk->rcvbuf-2*MIN_WINDOW)
380 return(0);
381 amt = min((sk->rcvbuf-sk->rmem_alloc)/2-MIN_WINDOW, MAX_WINDOW);
382 if (amt < 0)
383 return(0);
384 return(amt);
385 }
386 return(0);
387 }
388
389
390 unsigned long sock_wspace(struct sock *sk)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
391 {
392 if (sk != NULL)
393 {
394 if (sk->shutdown & SEND_SHUTDOWN)
395 return(0);
396 if (sk->wmem_alloc >= sk->sndbuf)
397 return(0);
398 return sk->sndbuf - sk->wmem_alloc;
399 }
400 return(0);
401 }
402
403
404 void sock_wfree(struct sock *sk, struct sk_buff *skb)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
405 {
406 int s=skb->truesize;
407 #if CONFIG_SKB_CHECK
408 IS_SKB(skb);
409 #endif
410 kfree_skbmem(skb);
411 if (sk)
412 {
413 /* In case it might be waiting for more memory. */
414 sk->write_space(sk);
415 atomic_sub(s, &sk->wmem_alloc);
416 }
417 }
418
419
420 void sock_rfree(struct sock *sk, struct sk_buff *skb)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
421 {
422 int s=skb->truesize;
423 #if CONFIG_SKB_CHECK
424 IS_SKB(skb);
425 #endif
426 kfree_skbmem(skb);
427 if (sk)
428 {
429 atomic_sub(s, &sk->rmem_alloc);
430 }
431 }
432
433 /*
434 * Generic send/receive buffer handlers
435 */
436
437 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, unsigned long fallback, int noblock, int *errcode)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
438 {
439 struct sk_buff *skb;
440 int err;
441
442 do
443 {
444 if(sk->err!=0)
445 {
446 cli();
447 err= -sk->err;
448 sk->err=0;
449 sti();
450 *errcode=err;
451 return NULL;
452 }
453
454 if(sk->shutdown&SEND_SHUTDOWN)
455 {
456 *errcode=-EPIPE;
457 return NULL;
458 }
459
460 if(!fallback)
461 skb = sock_wmalloc(sk, size, 0, sk->allocation);
462 else
463 {
464 /* The buffer get won't block, or use the atomic queue. It does
465 produce annoying no free page messages still.... */
466 skb = sock_wmalloc(sk, size, 0 , GFP_BUFFER);
467 if(!skb)
468 skb=sock_wmalloc(sk, fallback, 0, GFP_KERNEL);
469 }
470
471 /*
472 * This means we have too many buffers for this socket already.
473 */
474
475 if(skb==NULL)
476 {
477 unsigned long tmp;
478
479 sk->socket->flags |= SO_NOSPACE;
480 if(noblock)
481 {
482 *errcode=-EAGAIN;
483 return NULL;
484 }
485 if(sk->shutdown&SEND_SHUTDOWN)
486 {
487 *errcode=-EPIPE;
488 return NULL;
489 }
490 tmp = sk->wmem_alloc;
491 cli();
492 if(sk->shutdown&SEND_SHUTDOWN)
493 {
494 sti();
495 *errcode=-EPIPE;
496 return NULL;
497 }
498
499 #if 1
500 if( tmp <= sk->wmem_alloc)
501 #else
502 /* ANK: Line above seems either incorrect
503 * or useless. sk->wmem_alloc has a tiny chance to change
504 * between tmp = sk->w... and cli(),
505 * but it might(?) change earlier. In real life
506 * it does not (I never seen the message).
507 * In any case I'd delete this check at all, or
508 * change it to:
509 */
510 if (sk->wmem_alloc + size >= sk->sndbuf)
511 #endif
512 {
513 sk->socket->flags &= ~SO_NOSPACE;
514 interruptible_sleep_on(sk->sleep);
515 if (current->signal & ~current->blocked)
516 {
517 sti();
518 *errcode = -ERESTARTSYS;
519 return NULL;
520 }
521 }
522 sti();
523 }
524 }
525 while(skb==NULL);
526
527 return skb;
528 }
529
530
531 void __release_sock(struct sock *sk)
/* ![[previous]](../icons/left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
532 {
533 #ifdef CONFIG_INET
534 if (!sk->prot || !sk->prot->rcv)
535 return;
536
537 /* See if we have any packets built up. */
538 start_bh_atomic();
539 while (!skb_queue_empty(&sk->back_log)) {
540 struct sk_buff * skb = sk->back_log.next;
541 __skb_unlink(skb, &sk->back_log);
542 sk->prot->rcv(skb, skb->dev, (struct options*)skb->proto_priv,
543 skb->saddr, skb->len, skb->daddr, 1,
544 /* Only used for/by raw sockets. */
545 (struct inet_protocol *)sk->pair);
546 }
547 end_bh_atomic();
548 #endif
549 }