root/fs/select.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. free_wait
  2. check
  3. do_select
  4. __get_fd_set
  5. __set_fd_set
  6. sys_select

   1 /*
   2  * This file contains the procedures for the handling of select
   3  *
   4  * Created for Linux based loosely upon Mathius Lattner's minix
   5  * patches by Peter MacDonald. Heavily edited by Linus.
   6  */
   7 
   8 #include <linux/types.h>
   9 #include <linux/time.h>
  10 #include <linux/fs.h>
  11 #include <linux/kernel.h>
  12 #include <linux/sched.h>
  13 #include <linux/string.h>
  14 #include <linux/stat.h>
  15 #include <linux/signal.h>
  16 #include <linux/errno.h>
  17 
  18 #include <asm/segment.h>
  19 #include <asm/system.h>
  20 
  21 #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
  22 
  23 /*
  24  * Ok, Peter made a complicated, but straightforward multiple_wait() function.
  25  * I have rewritten this, taking some shortcuts: This code may not be easy to
  26  * follow, but it should be free of race-conditions, and it's practical. If you
  27  * understand what I'm doing here, then you understand how the linux
  28  * sleep/wakeup mechanism works.
  29  *
  30  * Two very simple procedures, select_wait() and free_wait() make all the work.
  31  * select_wait() is a inline-function defined in <linux/sched.h>, as all select
  32  * functions have to call it to add an entry to the select table.
  33  */
  34 
  35 /*
  36  * I rewrote this again to make the select_table size variable, take some
  37  * more shortcuts, improve responsiveness, and remove another race that
  38  * Linus noticed.  -- jrs
  39  */
  40 
  41 static void free_wait(select_table * p)
     /* [previous][next][first][last][top][bottom][index][help] */
  42 {
  43         struct select_table_entry * entry = p->entry + p->nr;
  44 
  45         while (p->nr > 0) {
  46                 p->nr--;
  47                 entry--;
  48                 remove_wait_queue(entry->wait_address,&entry->wait);
  49         }
  50 }
  51 
  52 /*
  53  * The check function checks the ready status of a file using the vfs layer.
  54  *
  55  * If the file was not ready we were added to its wait queue.  But in
  56  * case it became ready just after the check and just before it called
  57  * select_wait, we call it again, knowing we are already on its
  58  * wait queue this time.  The second call is not necessary if the
  59  * select_table is NULL indicating an earlier file check was ready
  60  * and we aren't going to sleep on the select_table.  -- jrs
  61  */
  62 
  63 static int check(int flag, select_table * wait, struct file * file)
     /* [previous][next][first][last][top][bottom][index][help] */
  64 {
  65         struct inode * inode;
  66         struct file_operations *fops;
  67         int (*select) (struct inode *, struct file *, int, select_table *);
  68 
  69         inode = file->f_inode;
  70         if ((fops = file->f_op) && (select = fops->select))
  71                 return select(inode, file, flag, wait)
  72                     || (wait && select(inode, file, flag, NULL));
  73         if (S_ISREG(inode->i_mode))
  74                 return 1;
  75         return 0;
  76 }
  77 
  78 int do_select(int n, fd_set *in, fd_set *out, fd_set *ex,
     /* [previous][next][first][last][top][bottom][index][help] */
  79         fd_set *res_in, fd_set *res_out, fd_set *res_ex)
  80 {
  81         int count;
  82         select_table wait_table, *wait;
  83         struct select_table_entry *entry;
  84         unsigned long set;
  85         int i,j;
  86         int max = -1;
  87 
  88         for (j = 0 ; j < __FDSET_LONGS ; j++) {
  89                 i = j << 5;
  90                 if (i >= n)
  91                         break;
  92                 set = in->fds_bits[j] | out->fds_bits[j] | ex->fds_bits[j];
  93                 for ( ; set ; i++,set >>= 1) {
  94                         if (i >= n)
  95                                 goto end_check;
  96                         if (!(set & 1))
  97                                 continue;
  98                         if (!current->filp[i])
  99                                 return -EBADF;
 100                         if (!current->filp[i]->f_inode)
 101                                 return -EBADF;
 102                         max = i;
 103                 }
 104         }
 105 end_check:
 106         n = max + 1;
 107         entry = (struct select_table_entry *) __get_free_page(GFP_KERNEL);
 108         if (!entry)
 109                 return -ENOMEM;
 110         FD_ZERO(res_in);
 111         FD_ZERO(res_out);
 112         FD_ZERO(res_ex);
 113         count = 0;
 114         wait_table.nr = 0;
 115         wait_table.entry = entry;
 116         wait = &wait_table;
 117 repeat:
 118         current->state = TASK_INTERRUPTIBLE;
 119         for (i = 0 ; i < n ; i++) {
 120                 if (FD_ISSET(i,in) && check(SEL_IN,wait,current->filp[i])) {
 121                         FD_SET(i, res_in);
 122                         count++;
 123                         wait = NULL;
 124                 }
 125                 if (FD_ISSET(i,out) && check(SEL_OUT,wait,current->filp[i])) {
 126                         FD_SET(i, res_out);
 127                         count++;
 128                         wait = NULL;
 129                 }
 130                 if (FD_ISSET(i,ex) && check(SEL_EX,wait,current->filp[i])) {
 131                         FD_SET(i, res_ex);
 132                         count++;
 133                         wait = NULL;
 134                 }
 135         }
 136         wait = NULL;
 137         if (!count && current->timeout && !(current->signal & ~current->blocked)) {
 138                 schedule();
 139                 goto repeat;
 140         }
 141         free_wait(&wait_table);
 142         free_page((unsigned long) entry);
 143         current->state = TASK_RUNNING;
 144         return count;
 145 }
 146 
 147 /*
 148  * We do a VERIFY_WRITE here even though we are only reading this time:
 149  * we'll write to it eventually..
 150  */
 151 static int __get_fd_set(int nr, unsigned long * fs_pointer, unsigned long * fdset)
     /* [previous][next][first][last][top][bottom][index][help] */
 152 {
 153         int error;
 154 
 155         FD_ZERO(fdset);
 156         if (!fs_pointer)
 157                 return 0;
 158         error = verify_area(VERIFY_WRITE,fs_pointer,sizeof(fd_set));
 159         if (error)
 160                 return error;
 161         while (nr > 0) {
 162                 *fdset = get_fs_long(fs_pointer);
 163                 fdset++;
 164                 fs_pointer++;
 165                 nr -= 32;
 166         }
 167         return 0;
 168 }
 169 
 170 static void __set_fd_set(int nr, unsigned long * fs_pointer, unsigned long * fdset)
     /* [previous][next][first][last][top][bottom][index][help] */
 171 {
 172         if (!fs_pointer)
 173                 return;
 174         while (nr > 0) {
 175                 put_fs_long(*fdset, fs_pointer);
 176                 fdset++;
 177                 fs_pointer++;
 178                 nr -= 32;
 179         }
 180 }
 181 
 182 #define get_fd_set(nr,fsp,fdp) \
 183 __get_fd_set(nr, (unsigned long *) (fsp), (unsigned long *) (fdp))
 184 
 185 #define set_fd_set(nr,fsp,fdp) \
 186 __set_fd_set(nr, (unsigned long *) (fsp), (unsigned long *) (fdp))
 187 
 188 /*
 189  * We can actually return ERESTARTSYS insetad of EINTR, but I'd
 190  * like to be certain this leads to no problems. So I return
 191  * EINTR just for safety.
 192  *
 193  * Update: ERESTARTSYS breaks at least the xview clock binary, so
 194  * I'm trying ERESTARTNOHAND which restart only when you want to.
 195  */
 196 int sys_select( unsigned long *buffer )
     /* [previous][next][first][last][top][bottom][index][help] */
 197 {
 198 /* Perform the select(nd, in, out, ex, tv) system call. */
 199         int i;
 200         fd_set res_in, in, *inp;
 201         fd_set res_out, out, *outp;
 202         fd_set res_ex, ex, *exp;
 203         int n;
 204         struct timeval *tvp;
 205         unsigned long timeout;
 206 
 207         i = verify_area(VERIFY_READ, buffer, 20);
 208         if (i)
 209                 return i;
 210         n = get_fs_long(buffer++);
 211         if (n < 0)
 212                 return -EINVAL;
 213         if (n > NR_OPEN)
 214                 n = NR_OPEN;
 215         inp = (fd_set *) get_fs_long(buffer++);
 216         outp = (fd_set *) get_fs_long(buffer++);
 217         exp = (fd_set *) get_fs_long(buffer++);
 218         tvp = (struct timeval *) get_fs_long(buffer);
 219         if ((i = get_fd_set(n, inp, &in)) ||
 220             (i = get_fd_set(n, outp, &out)) ||
 221             (i = get_fd_set(n, exp, &ex))) return i;
 222         timeout = 0xffffffff;
 223         if (tvp) {
 224                 i = verify_area(VERIFY_WRITE, tvp, sizeof(*tvp));
 225                 if (i)
 226                         return i;
 227                 timeout = jiffies;
 228                 timeout += ROUND_UP(get_fs_long((unsigned long *)&tvp->tv_usec),(1000000/HZ));
 229                 timeout += get_fs_long((unsigned long *)&tvp->tv_sec) * HZ;
 230                 if (timeout <= jiffies)
 231                         timeout = 0;
 232         }
 233         current->timeout = timeout;
 234         i = do_select(n, &in, &out, &ex, &res_in, &res_out, &res_ex);
 235         if (current->timeout > jiffies)
 236                 timeout = current->timeout - jiffies;
 237         else
 238                 timeout = 0;
 239         current->timeout = 0;
 240         if (tvp) {
 241                 put_fs_long(timeout/HZ, (unsigned long *) &tvp->tv_sec);
 242                 timeout %= HZ;
 243                 timeout *= (1000000/HZ);
 244                 put_fs_long(timeout, (unsigned long *) &tvp->tv_usec);
 245         }
 246         if (i < 0)
 247                 return i;
 248         if (!i && (current->signal & ~current->blocked))
 249                 return -ERESTARTNOHAND;
 250         set_fd_set(n, inp, &res_in);
 251         set_fd_set(n, outp, &res_out);
 252         set_fd_set(n, exp, &res_ex);
 253         return i;
 254 }

/* [previous][next][first][last][top][bottom][index][help] */