root/net/core/sock.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. sock_setsockopt
  2. sock_getsockopt
  3. sock_wmalloc
  4. sock_rmalloc
  5. sock_rspace
  6. sock_wspace
  7. sock_wfree
  8. sock_rfree
  9. sock_alloc_send_skb
  10. __release_sock

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Generic socket support routines. Memory allocators, socket lock/release
   7  *              handler for protocols to use and generic option handler.
   8  *
   9  *
  10  * Version:     @(#)sock.c      1.0.17  06/02/93
  11  *
  12  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  13  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Alan Cox, <A.Cox@swansea.ac.uk>
  16  *
  17  * Fixes:
  18  *              Alan Cox        :       Numerous verify_area() problems
  19  *              Alan Cox        :       Connecting on a connecting socket
  20  *                                      now returns an error for tcp.
  21  *              Alan Cox        :       sock->protocol is set correctly.
  22  *                                      and is not sometimes left as 0.
  23  *              Alan Cox        :       connect handles icmp errors on a
  24  *                                      connect properly. Unfortunately there
  25  *                                      is a restart syscall nasty there. I
  26  *                                      can't match BSD without hacking the C
  27  *                                      library. Ideas urgently sought!
  28  *              Alan Cox        :       Disallow bind() to addresses that are
  29  *                                      not ours - especially broadcast ones!!
  30  *              Alan Cox        :       Socket 1024 _IS_ ok for users. (fencepost)
  31  *              Alan Cox        :       sock_wfree/sock_rfree don't destroy sockets,
  32  *                                      instead they leave that for the DESTROY timer.
  33  *              Alan Cox        :       Clean up error flag in accept
  34  *              Alan Cox        :       TCP ack handling is buggy, the DESTROY timer
  35  *                                      was buggy. Put a remove_sock() in the handler
  36  *                                      for memory when we hit 0. Also altered the timer
  37  *                                      code. The ACK stuff can wait and needs major 
  38  *                                      TCP layer surgery.
  39  *              Alan Cox        :       Fixed TCP ack bug, removed remove sock
  40  *                                      and fixed timer/inet_bh race.
  41  *              Alan Cox        :       Added zapped flag for TCP
  42  *              Alan Cox        :       Move kfree_skb into skbuff.c and tidied up surplus code
  43  *              Alan Cox        :       for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
  44  *              Alan Cox        :       kfree_s calls now are kfree_skbmem so we can track skb resources
  45  *              Alan Cox        :       Supports socket option broadcast now as does udp. Packet and raw need fixing.
  46  *              Alan Cox        :       Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
  47  *              Rick Sladkey    :       Relaxed UDP rules for matching packets.
  48  *              C.E.Hawkins     :       IFF_PROMISC/SIOCGHWADDR support
  49  *      Pauline Middelink       :       identd support
  50  *              Alan Cox        :       Fixed connect() taking signals I think.
  51  *              Alan Cox        :       SO_LINGER supported
  52  *              Alan Cox        :       Error reporting fixes
  53  *              Anonymous       :       inet_create tidied up (sk->reuse setting)
  54  *              Alan Cox        :       inet sockets don't set sk->type!
  55  *              Alan Cox        :       Split socket option code
  56  *              Alan Cox        :       Callbacks
  57  *              Alan Cox        :       Nagle flag for Charles & Johannes stuff
  58  *              Alex            :       Removed restriction on inet fioctl
  59  *              Alan Cox        :       Splitting INET from NET core
  60  *              Alan Cox        :       Fixed bogus SO_TYPE handling in getsockopt()
  61  *              Adam Caldwell   :       Missing return in SO_DONTROUTE/SO_DEBUG code
  62  *              Alan Cox        :       Split IP from generic code
  63  *              Alan Cox        :       New kfree_skbmem()
  64  *              Alan Cox        :       Make SO_DEBUG superuser only.
  65  *              Alan Cox        :       Allow anyone to clear SO_DEBUG
  66  *                                      (compatibility fix)
  67  *              Alan Cox        :       Added optimistic memory grabbing for AF_UNIX throughput.
  68  *              Alan Cox        :       Allocator for a socket is settable.
  69  *              Alan Cox        :       SO_ERROR includes soft errors.
  70  *              Alan Cox        :       Allow NULL arguments on some SO_ opts
  71  *
  72  * To Fix:
  73  *
  74  *
  75  *              This program is free software; you can redistribute it and/or
  76  *              modify it under the terms of the GNU General Public License
  77  *              as published by the Free Software Foundation; either version
  78  *              2 of the License, or (at your option) any later version.
  79  */
  80 
  81 #include <linux/config.h>
  82 #include <linux/errno.h>
  83 #include <linux/types.h>
  84 #include <linux/socket.h>
  85 #include <linux/in.h>
  86 #include <linux/kernel.h>
  87 #include <linux/major.h>
  88 #include <linux/sched.h>
  89 #include <linux/timer.h>
  90 #include <linux/string.h>
  91 #include <linux/sockios.h>
  92 #include <linux/net.h>
  93 #include <linux/fcntl.h>
  94 #include <linux/mm.h>
  95 #include <linux/interrupt.h>
  96 
  97 #include <asm/segment.h>
  98 #include <asm/system.h>
  99 
 100 #include <linux/inet.h>
 101 #include <linux/netdevice.h>
 102 #include <net/ip.h>
 103 #include <net/protocol.h>
 104 #include <net/arp.h>
 105 #include <net/rarp.h>
 106 #include <net/route.h>
 107 #include <net/tcp.h>
 108 #include <net/udp.h>
 109 #include <linux/skbuff.h>
 110 #include <net/sock.h>
 111 #include <net/raw.h>
 112 #include <net/icmp.h>
 113 
 114 #define min(a,b)        ((a)<(b)?(a):(b))
 115 
 116 /*
 117  *      This is meant for all protocols to use and covers goings on
 118  *      at the socket level. Everything here is generic.
 119  */
 120 
 121 int sock_setsockopt(struct sock *sk, int level, int optname,
     /* [previous][next][first][last][top][bottom][index][help] */
 122                 char *optval, int optlen)
 123 {
 124         int val;
 125         int valbool;
 126         int err;
 127         struct linger ling;
 128 
 129         /*
 130          *      Options without arguments
 131          */
 132 
 133 #ifdef SO_DONTLINGER            /* Compatibility item... */
 134         switch(optname)
 135         {
 136                 case SO_DONTLINGER:
 137                         sk->linger=0;
 138                         return 0;
 139         }
 140 #endif  
 141                 
 142         if (optval == NULL) 
 143                 return(-EINVAL);
 144 
 145         err=verify_area(VERIFY_READ, optval, sizeof(int));
 146         if(err)
 147                 return err;
 148         
 149         val = get_user((int *)optval);
 150         valbool = val?1:0;
 151         
 152         switch(optname) 
 153         {
 154                 case SO_DEBUG:  
 155                         if(val && !suser())
 156                                 return(-EPERM);
 157                         sk->debug=valbool;
 158                         return 0;
 159                 case SO_REUSEADDR:
 160                         sk->reuse = valbool;
 161                         return(0);
 162                 case SO_TYPE:
 163                 case SO_ERROR:
 164                         return(-ENOPROTOOPT);
 165                 case SO_DONTROUTE:
 166                         sk->localroute=valbool;
 167                         return 0;
 168                 case SO_BROADCAST:
 169                         sk->broadcast=valbool;
 170                         return 0;
 171                 case SO_SNDBUF:
 172                         if(val > SK_WMEM_MAX*2)
 173                                 val = SK_WMEM_MAX*2;
 174                         if(val < 256)
 175                                 val = 256;
 176                         sk->sndbuf = val;
 177                         return 0;
 178 
 179                 case SO_RCVBUF:
 180                         if(val > SK_RMEM_MAX*2)
 181                                 val = SK_RMEM_MAX*2;
 182                         if(val < 256)
 183                                 val = 256;
 184                         sk->rcvbuf = val;
 185                         return(0);
 186 
 187                 case SO_KEEPALIVE:
 188                         sk->keepopen = valbool;
 189                         return(0);
 190 
 191                 case SO_OOBINLINE:
 192                         sk->urginline = valbool;
 193                         return(0);
 194 
 195                 case SO_NO_CHECK:
 196                         sk->no_check = valbool;
 197                         return(0);
 198 
 199                 case SO_PRIORITY:
 200                         if (val >= 0 && val < DEV_NUMBUFFS) 
 201                         {
 202                                 sk->priority = val;
 203                         } 
 204                         else 
 205                         {
 206                                 return(-EINVAL);
 207                         }
 208                         return(0);
 209 
 210 
 211                 case SO_LINGER:
 212                         err=verify_area(VERIFY_READ,optval,sizeof(ling));
 213                         if(err)
 214                                 return err;
 215                         memcpy_fromfs(&ling,optval,sizeof(ling));
 216                         if(ling.l_onoff==0)
 217                                 sk->linger=0;
 218                         else
 219                         {
 220                                 sk->lingertime=ling.l_linger;
 221                                 sk->linger=1;
 222                         }
 223                         return 0;
 224 
 225                 case SO_BSDCOMPAT:
 226                         sk->bsdism = valbool;
 227                         return 0;
 228                         
 229                 default:
 230                         return(-ENOPROTOOPT);
 231         }
 232 }
 233 
 234 
 235 int sock_getsockopt(struct sock *sk, int level, int optname,
     /* [previous][next][first][last][top][bottom][index][help] */
 236                    char *optval, int *optlen)
 237 {               
 238         int val;
 239         int err;
 240         struct linger ling;
 241 
 242         switch(optname) 
 243         {
 244                 case SO_DEBUG:          
 245                         val = sk->debug;
 246                         break;
 247                 
 248                 case SO_DONTROUTE:
 249                         val = sk->localroute;
 250                         break;
 251                 
 252                 case SO_BROADCAST:
 253                         val= sk->broadcast;
 254                         break;
 255 
 256                 case SO_SNDBUF:
 257                         val=sk->sndbuf;
 258                         break;
 259                 
 260                 case SO_RCVBUF:
 261                         val =sk->rcvbuf;
 262                         break;
 263 
 264                 case SO_REUSEADDR:
 265                         val = sk->reuse;
 266                         break;
 267 
 268                 case SO_KEEPALIVE:
 269                         val = sk->keepopen;
 270                         break;
 271 
 272                 case SO_TYPE:
 273                         val = sk->type;                         
 274                         break;
 275 
 276                 case SO_ERROR:
 277                         val = sock_error(sk);
 278                         if(val==0)
 279                                 val=xchg(&sk->err_soft,0);
 280                         break;
 281 
 282                 case SO_OOBINLINE:
 283                         val = sk->urginline;
 284                         break;
 285         
 286                 case SO_NO_CHECK:
 287                         val = sk->no_check;
 288                         break;
 289 
 290                 case SO_PRIORITY:
 291                         val = sk->priority;
 292                         break;
 293                 
 294                 case SO_LINGER: 
 295                         err=verify_area(VERIFY_WRITE,optval,sizeof(ling));
 296                         if(err)
 297                                 return err;
 298                         err=verify_area(VERIFY_WRITE,optlen,sizeof(int));
 299                         if(err)
 300                                 return err;
 301                         put_fs_long(sizeof(ling),(unsigned long *)optlen);
 302                         ling.l_onoff=sk->linger;
 303                         ling.l_linger=sk->lingertime;
 304                         memcpy_tofs(optval,&ling,sizeof(ling));
 305                         return 0;
 306                 
 307                 case SO_BSDCOMPAT:
 308                         val = sk->bsdism;
 309                         break;
 310 
 311                 default:
 312                         return(-ENOPROTOOPT);
 313         }
 314         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
 315         if(err)
 316                 return err;
 317         put_fs_long(sizeof(int),(unsigned long *) optlen);
 318 
 319         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
 320         if(err)
 321                 return err;
 322         put_fs_long(val,(unsigned long *)optval);
 323 
 324         return(0);
 325 }
 326 
 327 
 328 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority)
     /* [previous][next][first][last][top][bottom][index][help] */
 329 {
 330         if (sk) 
 331         {
 332                 if (sk->wmem_alloc + size < sk->sndbuf || force) 
 333                 {
 334                         struct sk_buff * c = alloc_skb(size, priority);
 335                         if (c) 
 336                         {
 337                                 unsigned long flags;
 338                                 save_flags(flags);
 339                                 cli();
 340                                 sk->wmem_alloc+= c->truesize;
 341                                 restore_flags(flags); /* was sti(); */
 342                         }
 343                         return c;
 344                 }
 345                 return(NULL);
 346         }
 347         return(alloc_skb(size, priority));
 348 }
 349 
 350 
 351 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority)
     /* [previous][next][first][last][top][bottom][index][help] */
 352 {
 353         if (sk) 
 354         {
 355                 if (sk->rmem_alloc + size < sk->rcvbuf || force) 
 356                 {
 357                         struct sk_buff *c = alloc_skb(size, priority);
 358                         if (c) 
 359                         {
 360                                 unsigned long flags;
 361                                 save_flags(flags);
 362                                 cli();
 363                                 sk->rmem_alloc += c->truesize;
 364                                 restore_flags(flags); /* was sti(); */
 365                         }
 366                         return(c);
 367                 }
 368                 return(NULL);
 369         }
 370         return(alloc_skb(size, priority));
 371 }
 372 
 373 
 374 unsigned long sock_rspace(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 375 {
 376         int amt;
 377 
 378         if (sk != NULL) 
 379         {
 380                 if (sk->rmem_alloc >= sk->rcvbuf-2*MIN_WINDOW) 
 381                         return(0);
 382                 amt = min((sk->rcvbuf-sk->rmem_alloc)/2-MIN_WINDOW, MAX_WINDOW);
 383                 if (amt < 0) 
 384                         return(0);
 385                 return(amt);
 386         }
 387         return(0);
 388 }
 389 
 390 
 391 unsigned long sock_wspace(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 392 {
 393         if (sk != NULL) 
 394         {
 395                 if (sk->shutdown & SEND_SHUTDOWN)
 396                         return(0);
 397                 if (sk->wmem_alloc >= sk->sndbuf)
 398                         return(0);
 399                 return(sk->sndbuf-sk->wmem_alloc );
 400         }
 401         return(0);
 402 }
 403 
 404 
 405 void sock_wfree(struct sock *sk, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
 406 {
 407         int s=skb->truesize;
 408 #if CONFIG_SKB_CHECK
 409         IS_SKB(skb);
 410 #endif
 411         kfree_skbmem(skb);
 412         if (sk) 
 413         {
 414                 unsigned long flags;
 415                 save_flags(flags);
 416                 cli();
 417                 sk->wmem_alloc -= s;
 418                 restore_flags(flags);
 419                 /* In case it might be waiting for more memory. */
 420                 sk->write_space(sk);
 421                 return;
 422         }
 423 }
 424 
 425 
 426 void sock_rfree(struct sock *sk, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
 427 {
 428         int s=skb->truesize;
 429 #if CONFIG_SKB_CHECK
 430         IS_SKB(skb);
 431 #endif  
 432         kfree_skbmem(skb);
 433         if (sk) 
 434         {
 435                 unsigned long flags;
 436                 save_flags(flags);
 437                 cli();
 438                 sk->rmem_alloc -= s;
 439                 restore_flags(flags);
 440         }
 441 }
 442 
 443 /*
 444  *      Generic send/receive buffer handlers
 445  */
 446 
 447 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, unsigned long fallback, int noblock, int *errcode)
     /* [previous][next][first][last][top][bottom][index][help] */
 448 {
 449         struct sk_buff *skb;
 450         int err;
 451 
 452         do
 453         {
 454                 if(sk->err!=0)
 455                 {
 456                         cli();
 457                         err= -sk->err;
 458                         sk->err=0;
 459                         sti();
 460                         *errcode=err;
 461                         return NULL;
 462                 }
 463                 
 464                 if(sk->shutdown&SEND_SHUTDOWN)
 465                 {
 466                         *errcode=-EPIPE;
 467                         return NULL;
 468                 }
 469                 
 470                 if(!fallback)
 471                         skb = sock_wmalloc(sk, size, 0, sk->allocation);
 472                 else
 473                 {
 474                         /* The buffer get won't block, or use the atomic queue. It does
 475                            produce annoying no free page messages still.... */
 476                         skb = sock_wmalloc(sk, size, 0 , GFP_BUFFER);
 477                         if(!skb)
 478                                 skb=sock_wmalloc(sk, fallback, 0, GFP_KERNEL);
 479                 }
 480                 
 481                 /*
 482                  *      This means we have too many buffers for this socket already.
 483                  */
 484                  
 485                 if(skb==NULL)
 486                 {
 487                         unsigned long tmp;
 488 
 489                         sk->socket->flags |= SO_NOSPACE;
 490                         if(noblock)
 491                         {
 492                                 *errcode=-EAGAIN;
 493                                 return NULL;
 494                         }
 495                         if(sk->shutdown&SEND_SHUTDOWN)
 496                         {
 497                                 *errcode=-EPIPE;
 498                                 return NULL;
 499                         }
 500                         tmp = sk->wmem_alloc;
 501                         cli();
 502                         if(sk->shutdown&SEND_SHUTDOWN)
 503                         {
 504                                 sti();
 505                                 *errcode=-EPIPE;
 506                                 return NULL;
 507                         }
 508                         
 509 #if 1
 510                         if( tmp <= sk->wmem_alloc)
 511 #else
 512                         /* ANK: Line above seems either incorrect
 513                          *      or useless. sk->wmem_alloc has a tiny chance to change
 514                          *      between tmp = sk->w... and cli(),
 515                          *      but it might(?) change earlier. In real life
 516                          *      it does not (I never seen the message).
 517                          *      In any case I'd delete this check at all, or
 518                          *      change it to:
 519                          */
 520                         if (sk->wmem_alloc + size >= sk->sndbuf) 
 521 #endif
 522                         {
 523                                 sk->socket->flags &= ~SO_NOSPACE;
 524                                 interruptible_sleep_on(sk->sleep);
 525                                 if (current->signal & ~current->blocked) 
 526                                 {
 527                                         sti();
 528                                         *errcode = -ERESTARTSYS;
 529                                         return NULL;
 530                                 }
 531                         }
 532                         sti();
 533                 }
 534         }
 535         while(skb==NULL);
 536                 
 537         return skb;
 538 }
 539 
 540 
 541 void __release_sock(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 542 {
 543 #ifdef CONFIG_INET
 544         if (!sk->prot || !sk->prot->rcv)
 545                 return;
 546                 
 547         /* See if we have any packets built up. */
 548         start_bh_atomic();
 549         while (!skb_queue_empty(&sk->back_log)) {
 550                 struct sk_buff * skb = sk->back_log.next;
 551                 __skb_unlink(skb, &sk->back_log);
 552                 sk->prot->rcv(skb, skb->dev, (struct options*)skb->proto_priv,
 553                               skb->saddr, skb->len, skb->daddr, 1,
 554                               /* Only used for/by raw sockets. */
 555                               (struct inet_protocol *)sk->pair); 
 556         }
 557         end_bh_atomic();
 558 #endif  
 559 }

/* [previous][next][first][last][top][bottom][index][help] */