root/fs/select.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. free_wait
  2. check
  3. do_select
  4. __get_fd_set
  5. __set_fd_set
  6. sys_select

   1 /*
   2  * This file contains the procedures for the handling of select
   3  *
   4  * Created for Linux based loosely upon Mathius Lattner's minix
   5  * patches by Peter MacDonald. Heavily edited by Linus.
   6  */
   7 
   8 #include <linux/types.h>
   9 #include <linux/time.h>
  10 #include <linux/fs.h>
  11 #include <linux/kernel.h>
  12 #include <linux/sched.h>
  13 #include <linux/string.h>
  14 #include <linux/stat.h>
  15 #include <linux/signal.h>
  16 #include <linux/errno.h>
  17 
  18 #include <asm/segment.h>
  19 #include <asm/system.h>
  20 
  21 #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
  22 
  23 /*
  24  * Ok, Peter made a complicated, but straightforward multiple_wait() function.
  25  * I have rewritten this, taking some shortcuts: This code may not be easy to
  26  * follow, but it should be free of race-conditions, and it's practical. If you
  27  * understand what I'm doing here, then you understand how the linux
  28  * sleep/wakeup mechanism works.
  29  *
  30  * Two very simple procedures, select_wait() and free_wait() make all the work.
  31  * select_wait() is a inline-function defined in <linux/sched.h>, as all select
  32  * functions have to call it to add an entry to the select table.
  33  */
  34 
  35 /*
  36  * I rewrote this again to make the select_table size variable, take some
  37  * more shortcuts, improve responsiveness, and remove another race that
  38  * Linus noticed.  -- jrs
  39  */
  40 
  41 static void free_wait(select_table * p)
     /* [previous][next][first][last][top][bottom][index][help] */
  42 {
  43         struct select_table_entry * entry = p->entry + p->nr;
  44 
  45         while (p->nr > 0) {
  46                 p->nr--;
  47                 entry--;
  48                 remove_wait_queue(entry->wait_address,&entry->wait);
  49         }
  50 }
  51 
  52 /*
  53  * The check function checks the ready status of a file using the vfs layer.
  54  *
  55  * If the file was not ready we were added to its wait queue.  But in
  56  * case it became ready just after the check and just before it called
  57  * select_wait, we call it again, knowing we are already on its
  58  * wait queue this time.  The second call is not necessary if the
  59  * select_table is NULL indicating an earlier file check was ready
  60  * and we aren't going to sleep on the select_table.  -- jrs
  61  */
  62 
  63 static int check(int flag, select_table * wait, struct file * file)
     /* [previous][next][first][last][top][bottom][index][help] */
  64 {
  65         struct inode * inode;
  66         struct file_operations *fops;
  67         int (*select) (struct inode *, struct file *, int, select_table *);
  68 
  69         inode = file->f_inode;
  70         if ((fops = file->f_op) && (select = fops->select))
  71                 return select(inode, file, flag, wait)
  72                     || (wait && select(inode, file, flag, NULL));
  73         if (S_ISREG(inode->i_mode))
  74                 return 1;
  75         return 0;
  76 }
  77 
  78 int do_select(int n, fd_set *in, fd_set *out, fd_set *ex,
     /* [previous][next][first][last][top][bottom][index][help] */
  79         fd_set *res_in, fd_set *res_out, fd_set *res_ex)
  80 {
  81         int count;
  82         select_table wait_table, *wait;
  83         struct select_table_entry *entry;
  84         unsigned long set;
  85         int i,j;
  86         int max = -1;
  87 
  88         for (j = 0 ; j < __FDSET_LONGS ; j++) {
  89                 i = j << 5;
  90                 if (i >= n)
  91                         break;
  92                 set = in->fds_bits[j] | out->fds_bits[j] | ex->fds_bits[j];
  93                 for ( ; set ; i++,set >>= 1) {
  94                         if (i >= n)
  95                                 goto end_check;
  96                         if (!(set & 1))
  97                                 continue;
  98                         if (!current->filp[i])
  99                                 return -EBADF;
 100                         if (!current->filp[i]->f_inode)
 101                                 return -EBADF;
 102                         max = i;
 103                 }
 104         }
 105 end_check:
 106         n = max + 1;
 107         if(!(entry = (struct select_table_entry*) __get_free_page(GFP_KERNEL)))
 108                 return -ENOMEM;
 109         FD_ZERO(res_in);
 110         FD_ZERO(res_out);
 111         FD_ZERO(res_ex);
 112         count = 0;
 113         wait_table.nr = 0;
 114         wait_table.entry = entry;
 115         wait = &wait_table;
 116 repeat:
 117         current->state = TASK_INTERRUPTIBLE;
 118         for (i = 0 ; i < n ; i++) {
 119                 if (FD_ISSET(i,in) && check(SEL_IN,wait,current->filp[i])) {
 120                         FD_SET(i, res_in);
 121                         count++;
 122                         wait = NULL;
 123                 }
 124                 if (FD_ISSET(i,out) && check(SEL_OUT,wait,current->filp[i])) {
 125                         FD_SET(i, res_out);
 126                         count++;
 127                         wait = NULL;
 128                 }
 129                 if (FD_ISSET(i,ex) && check(SEL_EX,wait,current->filp[i])) {
 130                         FD_SET(i, res_ex);
 131                         count++;
 132                         wait = NULL;
 133                 }
 134         }
 135         wait = NULL;
 136         if (!count && current->timeout && !(current->signal & ~current->blocked)) {
 137                 schedule();
 138                 goto repeat;
 139         }
 140         free_wait(&wait_table);
 141         free_page((unsigned long) entry);
 142         current->state = TASK_RUNNING;
 143         return count;
 144 }
 145 
 146 /*
 147  * We do a VERIFY_WRITE here even though we are only reading this time:
 148  * we'll write to it eventually..
 149  */
 150 static int __get_fd_set(int nr, unsigned long * fs_pointer, unsigned long * fdset)
     /* [previous][next][first][last][top][bottom][index][help] */
 151 {
 152         int error;
 153 
 154         FD_ZERO(fdset);
 155         if (!fs_pointer)
 156                 return 0;
 157         error = verify_area(VERIFY_WRITE,fs_pointer,sizeof(fd_set));
 158         if (error)
 159                 return error;
 160         while (nr > 0) {
 161                 *fdset = get_fs_long(fs_pointer);
 162                 fdset++;
 163                 fs_pointer++;
 164                 nr -= 32;
 165         }
 166         return 0;
 167 }
 168 
 169 static void __set_fd_set(int nr, unsigned long * fs_pointer, unsigned long * fdset)
     /* [previous][next][first][last][top][bottom][index][help] */
 170 {
 171         if (!fs_pointer)
 172                 return;
 173         while (nr > 0) {
 174                 put_fs_long(*fdset, fs_pointer);
 175                 fdset++;
 176                 fs_pointer++;
 177                 nr -= 32;
 178         }
 179 }
 180 
 181 #define get_fd_set(nr,fsp,fdp) \
 182 __get_fd_set(nr, (unsigned long *) (fsp), (unsigned long *) (fdp))
 183 
 184 #define set_fd_set(nr,fsp,fdp) \
 185 __set_fd_set(nr, (unsigned long *) (fsp), (unsigned long *) (fdp))
 186 
 187 /*
 188  * We can actually return ERESTARTSYS insetad of EINTR, but I'd
 189  * like to be certain this leads to no problems. So I return
 190  * EINTR just for safety.
 191  *
 192  * Update: ERESTARTSYS breaks at least the xview clock binary, so
 193  * I'm trying ERESTARTNOHAND which restart only when you want to.
 194  */
 195 asmlinkage int sys_select( unsigned long *buffer )
     /* [previous][next][first][last][top][bottom][index][help] */
 196 {
 197 /* Perform the select(nd, in, out, ex, tv) system call. */
 198         int i;
 199         fd_set res_in, in, *inp;
 200         fd_set res_out, out, *outp;
 201         fd_set res_ex, ex, *exp;
 202         int n;
 203         struct timeval *tvp;
 204         unsigned long timeout;
 205 
 206         i = verify_area(VERIFY_READ, buffer, 20);
 207         if (i)
 208                 return i;
 209         n = get_fs_long(buffer++);
 210         if (n < 0)
 211                 return -EINVAL;
 212         if (n > NR_OPEN)
 213                 n = NR_OPEN;
 214         inp = (fd_set *) get_fs_long(buffer++);
 215         outp = (fd_set *) get_fs_long(buffer++);
 216         exp = (fd_set *) get_fs_long(buffer++);
 217         tvp = (struct timeval *) get_fs_long(buffer);
 218         if ((i = get_fd_set(n, inp, &in)) ||
 219             (i = get_fd_set(n, outp, &out)) ||
 220             (i = get_fd_set(n, exp, &ex))) return i;
 221         timeout = ~0UL;
 222         if (tvp) {
 223                 i = verify_area(VERIFY_WRITE, tvp, sizeof(*tvp));
 224                 if (i)
 225                         return i;
 226                 timeout = jiffies;
 227                 timeout += ROUND_UP(get_fs_long((unsigned long *)&tvp->tv_usec),(1000000/HZ));
 228                 timeout += get_fs_long((unsigned long *)&tvp->tv_sec) * HZ;
 229                 if (timeout <= jiffies)
 230                         timeout = 0;
 231         }
 232         current->timeout = timeout;
 233         i = do_select(n, &in, &out, &ex, &res_in, &res_out, &res_ex);
 234         if (current->timeout > jiffies)
 235                 timeout = current->timeout - jiffies;
 236         else
 237                 timeout = 0;
 238         current->timeout = 0;
 239         if (tvp) {
 240                 put_fs_long(timeout/HZ, (unsigned long *) &tvp->tv_sec);
 241                 timeout %= HZ;
 242                 timeout *= (1000000/HZ);
 243                 put_fs_long(timeout, (unsigned long *) &tvp->tv_usec);
 244         }
 245         if (i < 0)
 246                 return i;
 247         if (!i && (current->signal & ~current->blocked))
 248                 return -ERESTARTNOHAND;
 249         set_fd_set(n, inp, &res_in);
 250         set_fd_set(n, outp, &res_out);
 251         set_fd_set(n, exp, &res_ex);
 252         return i;
 253 }

/* [previous][next][first][last][top][bottom][index][help] */