root/net/core/sock.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. sock_setsockopt
  2. sock_getsockopt
  3. sk_alloc
  4. sk_free
  5. sock_wmalloc
  6. sock_rmalloc
  7. sock_rspace
  8. sock_wspace
  9. sock_wfree
  10. sock_rfree
  11. sock_alloc_send_skb
  12. __release_sock

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Generic socket support routines. Memory allocators, socket lock/release
   7  *              handler for protocols to use and generic option handler.
   8  *
   9  *
  10  * Version:     @(#)sock.c      1.0.17  06/02/93
  11  *
  12  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  13  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Alan Cox, <A.Cox@swansea.ac.uk>
  16  *
  17  * Fixes:
  18  *              Alan Cox        :       Numerous verify_area() problems
  19  *              Alan Cox        :       Connecting on a connecting socket
  20  *                                      now returns an error for tcp.
  21  *              Alan Cox        :       sock->protocol is set correctly.
  22  *                                      and is not sometimes left as 0.
  23  *              Alan Cox        :       connect handles icmp errors on a
  24  *                                      connect properly. Unfortunately there
  25  *                                      is a restart syscall nasty there. I
  26  *                                      can't match BSD without hacking the C
  27  *                                      library. Ideas urgently sought!
  28  *              Alan Cox        :       Disallow bind() to addresses that are
  29  *                                      not ours - especially broadcast ones!!
  30  *              Alan Cox        :       Socket 1024 _IS_ ok for users. (fencepost)
  31  *              Alan Cox        :       sock_wfree/sock_rfree don't destroy sockets,
  32  *                                      instead they leave that for the DESTROY timer.
  33  *              Alan Cox        :       Clean up error flag in accept
  34  *              Alan Cox        :       TCP ack handling is buggy, the DESTROY timer
  35  *                                      was buggy. Put a remove_sock() in the handler
  36  *                                      for memory when we hit 0. Also altered the timer
  37  *                                      code. The ACK stuff can wait and needs major 
  38  *                                      TCP layer surgery.
  39  *              Alan Cox        :       Fixed TCP ack bug, removed remove sock
  40  *                                      and fixed timer/inet_bh race.
  41  *              Alan Cox        :       Added zapped flag for TCP
  42  *              Alan Cox        :       Move kfree_skb into skbuff.c and tidied up surplus code
  43  *              Alan Cox        :       for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
  44  *              Alan Cox        :       kfree_s calls now are kfree_skbmem so we can track skb resources
  45  *              Alan Cox        :       Supports socket option broadcast now as does udp. Packet and raw need fixing.
  46  *              Alan Cox        :       Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
  47  *              Rick Sladkey    :       Relaxed UDP rules for matching packets.
  48  *              C.E.Hawkins     :       IFF_PROMISC/SIOCGHWADDR support
  49  *      Pauline Middelink       :       identd support
  50  *              Alan Cox        :       Fixed connect() taking signals I think.
  51  *              Alan Cox        :       SO_LINGER supported
  52  *              Alan Cox        :       Error reporting fixes
  53  *              Anonymous       :       inet_create tidied up (sk->reuse setting)
  54  *              Alan Cox        :       inet sockets don't set sk->type!
  55  *              Alan Cox        :       Split socket option code
  56  *              Alan Cox        :       Callbacks
  57  *              Alan Cox        :       Nagle flag for Charles & Johannes stuff
  58  *              Alex            :       Removed restriction on inet fioctl
  59  *              Alan Cox        :       Splitting INET from NET core
  60  *              Alan Cox        :       Fixed bogus SO_TYPE handling in getsockopt()
  61  *              Adam Caldwell   :       Missing return in SO_DONTROUTE/SO_DEBUG code
  62  *              Alan Cox        :       Split IP from generic code
  63  *              Alan Cox        :       New kfree_skbmem()
  64  *              Alan Cox        :       Make SO_DEBUG superuser only.
  65  *              Alan Cox        :       Allow anyone to clear SO_DEBUG
  66  *                                      (compatibility fix)
  67  *              Alan Cox        :       Added optimistic memory grabbing for AF_UNIX throughput.
  68  *              Alan Cox        :       Allocator for a socket is settable.
  69  *              Alan Cox        :       SO_ERROR includes soft errors.
  70  *              Alan Cox        :       Allow NULL arguments on some SO_ opts
  71  *              Alan Cox        :       Generic socket allocation to make hooks
  72  *                                      easier (suggested by Craig Metz).
  73  *
  74  * To Fix:
  75  *
  76  *
  77  *              This program is free software; you can redistribute it and/or
  78  *              modify it under the terms of the GNU General Public License
  79  *              as published by the Free Software Foundation; either version
  80  *              2 of the License, or (at your option) any later version.
  81  */
  82 
  83 #include <linux/config.h>
  84 #include <linux/errno.h>
  85 #include <linux/types.h>
  86 #include <linux/socket.h>
  87 #include <linux/in.h>
  88 #include <linux/kernel.h>
  89 #include <linux/major.h>
  90 #include <linux/sched.h>
  91 #include <linux/timer.h>
  92 #include <linux/string.h>
  93 #include <linux/sockios.h>
  94 #include <linux/net.h>
  95 #include <linux/fcntl.h>
  96 #include <linux/mm.h>
  97 #include <linux/interrupt.h>
  98 
  99 #include <asm/segment.h>
 100 #include <asm/system.h>
 101 
 102 #include <linux/inet.h>
 103 #include <linux/netdevice.h>
 104 #include <net/ip.h>
 105 #include <net/protocol.h>
 106 #include <net/arp.h>
 107 #include <net/rarp.h>
 108 #include <net/route.h>
 109 #include <net/tcp.h>
 110 #include <net/udp.h>
 111 #include <linux/skbuff.h>
 112 #include <net/sock.h>
 113 #include <net/raw.h>
 114 #include <net/icmp.h>
 115 
 116 #define min(a,b)        ((a)<(b)?(a):(b))
 117 
 118 /*
 119  *      This is meant for all protocols to use and covers goings on
 120  *      at the socket level. Everything here is generic.
 121  */
 122 
 123 int sock_setsockopt(struct sock *sk, int level, int optname,
     /* [previous][next][first][last][top][bottom][index][help] */
 124                 char *optval, int optlen)
 125 {
 126         int val;
 127         int valbool;
 128         int err;
 129         struct linger ling;
 130 
 131         /*
 132          *      Options without arguments
 133          */
 134 
 135 #ifdef SO_DONTLINGER            /* Compatibility item... */
 136         switch(optname)
 137         {
 138                 case SO_DONTLINGER:
 139                         sk->linger=0;
 140                         return 0;
 141         }
 142 #endif  
 143                 
 144         if (optval == NULL) 
 145                 return(-EINVAL);
 146 
 147         err=verify_area(VERIFY_READ, optval, sizeof(int));
 148         if(err)
 149                 return err;
 150         
 151         val = get_user((int *)optval);
 152         valbool = val?1:0;
 153         
 154         switch(optname) 
 155         {
 156                 case SO_DEBUG:  
 157                         if(val && !suser())
 158                                 return(-EPERM);
 159                         sk->debug=valbool;
 160                         return 0;
 161                 case SO_REUSEADDR:
 162                         sk->reuse = valbool;
 163                         return(0);
 164                 case SO_TYPE:
 165                 case SO_ERROR:
 166                         return(-ENOPROTOOPT);
 167                 case SO_DONTROUTE:
 168                         sk->localroute=valbool;
 169                         return 0;
 170                 case SO_BROADCAST:
 171                         sk->broadcast=valbool;
 172                         return 0;
 173                 case SO_SNDBUF:
 174                         if(val > SK_WMEM_MAX*2)
 175                                 val = SK_WMEM_MAX*2;
 176                         if(val < 256)
 177                                 val = 256;
 178                         sk->sndbuf = val;
 179                         return 0;
 180 
 181                 case SO_RCVBUF:
 182                         if(val > SK_RMEM_MAX*2)
 183                                 val = SK_RMEM_MAX*2;
 184                         if(val < 256)
 185                                 val = 256;
 186                         sk->rcvbuf = val;
 187                         return(0);
 188 
 189                 case SO_KEEPALIVE:
 190                         sk->keepopen = valbool;
 191                         return(0);
 192 
 193                 case SO_OOBINLINE:
 194                         sk->urginline = valbool;
 195                         return(0);
 196 
 197                 case SO_NO_CHECK:
 198                         sk->no_check = valbool;
 199                         return(0);
 200 
 201                 case SO_PRIORITY:
 202                         if (val >= 0 && val < DEV_NUMBUFFS) 
 203                         {
 204                                 sk->priority = val;
 205                         } 
 206                         else 
 207                         {
 208                                 return(-EINVAL);
 209                         }
 210                         return(0);
 211 
 212 
 213                 case SO_LINGER:
 214                         err=verify_area(VERIFY_READ,optval,sizeof(ling));
 215                         if(err)
 216                                 return err;
 217                         memcpy_fromfs(&ling,optval,sizeof(ling));
 218                         if(ling.l_onoff==0)
 219                                 sk->linger=0;
 220                         else
 221                         {
 222                                 sk->lingertime=ling.l_linger;
 223                                 sk->linger=1;
 224                         }
 225                         return 0;
 226 
 227                 case SO_BSDCOMPAT:
 228                         sk->bsdism = valbool;
 229                         return 0;
 230                         
 231                 default:
 232                         return(-ENOPROTOOPT);
 233         }
 234 }
 235 
 236 
 237 int sock_getsockopt(struct sock *sk, int level, int optname,
     /* [previous][next][first][last][top][bottom][index][help] */
 238                    char *optval, int *optlen)
 239 {               
 240         int val;
 241         int err;
 242         struct linger ling;
 243 
 244         switch(optname) 
 245         {
 246                 case SO_DEBUG:          
 247                         val = sk->debug;
 248                         break;
 249                 
 250                 case SO_DONTROUTE:
 251                         val = sk->localroute;
 252                         break;
 253                 
 254                 case SO_BROADCAST:
 255                         val= sk->broadcast;
 256                         break;
 257 
 258                 case SO_SNDBUF:
 259                         val=sk->sndbuf;
 260                         break;
 261                 
 262                 case SO_RCVBUF:
 263                         val =sk->rcvbuf;
 264                         break;
 265 
 266                 case SO_REUSEADDR:
 267                         val = sk->reuse;
 268                         break;
 269 
 270                 case SO_KEEPALIVE:
 271                         val = sk->keepopen;
 272                         break;
 273 
 274                 case SO_TYPE:
 275                         val = sk->type;                         
 276                         break;
 277 
 278                 case SO_ERROR:
 279                         val = sock_error(sk);
 280                         if(val==0)
 281                                 val=xchg(&sk->err_soft,0);
 282                         break;
 283 
 284                 case SO_OOBINLINE:
 285                         val = sk->urginline;
 286                         break;
 287         
 288                 case SO_NO_CHECK:
 289                         val = sk->no_check;
 290                         break;
 291 
 292                 case SO_PRIORITY:
 293                         val = sk->priority;
 294                         break;
 295                 
 296                 case SO_LINGER: 
 297                         err=verify_area(VERIFY_WRITE,optval,sizeof(ling));
 298                         if(err)
 299                                 return err;
 300                         err=verify_area(VERIFY_WRITE,optlen,sizeof(int));
 301                         if(err)
 302                                 return err;
 303                         put_fs_long(sizeof(ling),(unsigned long *)optlen);
 304                         ling.l_onoff=sk->linger;
 305                         ling.l_linger=sk->lingertime;
 306                         memcpy_tofs(optval,&ling,sizeof(ling));
 307                         return 0;
 308                 
 309                 case SO_BSDCOMPAT:
 310                         val = sk->bsdism;
 311                         break;
 312 
 313                 default:
 314                         return(-ENOPROTOOPT);
 315         }
 316         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
 317         if(err)
 318                 return err;
 319         put_fs_long(sizeof(int),(unsigned long *) optlen);
 320 
 321         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
 322         if(err)
 323                 return err;
 324         put_fs_long(val,(unsigned long *)optval);
 325 
 326         return(0);
 327 }
 328 
 329 struct sock *sk_alloc(int priority)
     /* [previous][next][first][last][top][bottom][index][help] */
 330 {
 331         struct sock *sk=(struct sock *)kmalloc(sizeof(*sk), priority);
 332         if(!sk)
 333                 return NULL;
 334         memset(sk, 0, sizeof(*sk));
 335         return sk;
 336 }
 337 
 338 void sk_free(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 339 {
 340         kfree_s(sk,sizeof(*sk));
 341 }
 342 
 343 
 344 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority)
     /* [previous][next][first][last][top][bottom][index][help] */
 345 {
 346         if (sk) {
 347                 if (force || sk->wmem_alloc + size < sk->sndbuf) {
 348                         struct sk_buff * skb = alloc_skb(size, priority);
 349                         if (skb)
 350                                 atomic_add(skb->truesize, &sk->wmem_alloc);
 351                         return skb;
 352                 }
 353                 return NULL;
 354         }
 355         return alloc_skb(size, priority);
 356 }
 357 
 358 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority)
     /* [previous][next][first][last][top][bottom][index][help] */
 359 {
 360         if (sk) {
 361                 if (force || sk->rmem_alloc + size < sk->rcvbuf) {
 362                         struct sk_buff *skb = alloc_skb(size, priority);
 363                         if (skb)
 364                                 atomic_add(skb->truesize, &sk->rmem_alloc);
 365                         return skb;
 366                 }
 367                 return NULL;
 368         }
 369         return alloc_skb(size, priority);
 370 }
 371 
 372 
 373 unsigned long sock_rspace(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 374 {
 375         int amt;
 376 
 377         if (sk != NULL) 
 378         {
 379                 if (sk->rmem_alloc >= sk->rcvbuf-2*MIN_WINDOW) 
 380                         return(0);
 381                 amt = min((sk->rcvbuf-sk->rmem_alloc)/2-MIN_WINDOW, MAX_WINDOW);
 382                 if (amt < 0) 
 383                         return(0);
 384                 return(amt);
 385         }
 386         return(0);
 387 }
 388 
 389 
 390 unsigned long sock_wspace(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 391 {
 392         if (sk != NULL) 
 393         {
 394                 if (sk->shutdown & SEND_SHUTDOWN)
 395                         return(0);
 396                 if (sk->wmem_alloc >= sk->sndbuf)
 397                         return(0);
 398                 return sk->sndbuf - sk->wmem_alloc;
 399         }
 400         return(0);
 401 }
 402 
 403 
 404 void sock_wfree(struct sock *sk, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
 405 {
 406         int s=skb->truesize;
 407 #if CONFIG_SKB_CHECK
 408         IS_SKB(skb);
 409 #endif
 410         kfree_skbmem(skb);
 411         if (sk) 
 412         {
 413                 /* In case it might be waiting for more memory. */
 414                 sk->write_space(sk);
 415                 atomic_sub(s, &sk->wmem_alloc);
 416         }
 417 }
 418 
 419 
 420 void sock_rfree(struct sock *sk, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
 421 {
 422         int s=skb->truesize;
 423 #if CONFIG_SKB_CHECK
 424         IS_SKB(skb);
 425 #endif  
 426         kfree_skbmem(skb);
 427         if (sk) 
 428         {
 429                 atomic_sub(s, &sk->rmem_alloc);
 430         }
 431 }
 432 
 433 /*
 434  *      Generic send/receive buffer handlers
 435  */
 436 
 437 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, unsigned long fallback, int noblock, int *errcode)
     /* [previous][next][first][last][top][bottom][index][help] */
 438 {
 439         struct sk_buff *skb;
 440         int err;
 441 
 442         do
 443         {
 444                 if(sk->err!=0)
 445                 {
 446                         cli();
 447                         err= -sk->err;
 448                         sk->err=0;
 449                         sti();
 450                         *errcode=err;
 451                         return NULL;
 452                 }
 453                 
 454                 if(sk->shutdown&SEND_SHUTDOWN)
 455                 {
 456                         *errcode=-EPIPE;
 457                         return NULL;
 458                 }
 459                 
 460                 if(!fallback)
 461                         skb = sock_wmalloc(sk, size, 0, sk->allocation);
 462                 else
 463                 {
 464                         /* The buffer get won't block, or use the atomic queue. It does
 465                            produce annoying no free page messages still.... */
 466                         skb = sock_wmalloc(sk, size, 0 , GFP_BUFFER);
 467                         if(!skb)
 468                                 skb=sock_wmalloc(sk, fallback, 0, GFP_KERNEL);
 469                 }
 470                 
 471                 /*
 472                  *      This means we have too many buffers for this socket already.
 473                  */
 474                  
 475                 if(skb==NULL)
 476                 {
 477                         unsigned long tmp;
 478 
 479                         sk->socket->flags |= SO_NOSPACE;
 480                         if(noblock)
 481                         {
 482                                 *errcode=-EAGAIN;
 483                                 return NULL;
 484                         }
 485                         if(sk->shutdown&SEND_SHUTDOWN)
 486                         {
 487                                 *errcode=-EPIPE;
 488                                 return NULL;
 489                         }
 490                         tmp = sk->wmem_alloc;
 491                         cli();
 492                         if(sk->shutdown&SEND_SHUTDOWN)
 493                         {
 494                                 sti();
 495                                 *errcode=-EPIPE;
 496                                 return NULL;
 497                         }
 498                         
 499 #if 1
 500                         if( tmp <= sk->wmem_alloc)
 501 #else
 502                         /* ANK: Line above seems either incorrect
 503                          *      or useless. sk->wmem_alloc has a tiny chance to change
 504                          *      between tmp = sk->w... and cli(),
 505                          *      but it might(?) change earlier. In real life
 506                          *      it does not (I never seen the message).
 507                          *      In any case I'd delete this check at all, or
 508                          *      change it to:
 509                          */
 510                         if (sk->wmem_alloc + size >= sk->sndbuf) 
 511 #endif
 512                         {
 513                                 sk->socket->flags &= ~SO_NOSPACE;
 514                                 interruptible_sleep_on(sk->sleep);
 515                                 if (current->signal & ~current->blocked) 
 516                                 {
 517                                         sti();
 518                                         *errcode = -ERESTARTSYS;
 519                                         return NULL;
 520                                 }
 521                         }
 522                         sti();
 523                 }
 524         }
 525         while(skb==NULL);
 526                 
 527         return skb;
 528 }
 529 
 530 
 531 void __release_sock(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 532 {
 533 #ifdef CONFIG_INET
 534         if (!sk->prot || !sk->prot->rcv)
 535                 return;
 536                 
 537         /* See if we have any packets built up. */
 538         start_bh_atomic();
 539         while (!skb_queue_empty(&sk->back_log)) {
 540                 struct sk_buff * skb = sk->back_log.next;
 541                 __skb_unlink(skb, &sk->back_log);
 542                 sk->prot->rcv(skb, skb->dev, (struct options*)skb->proto_priv,
 543                               skb->saddr, skb->len, skb->daddr, 1,
 544                               /* Only used for/by raw sockets. */
 545                               (struct inet_protocol *)sk->pair); 
 546         }
 547         end_bh_atomic();
 548 #endif  
 549 }

/* [previous][next][first][last][top][bottom][index][help] */