root/fs/buffer.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. __wait_on_buffer
  2. sync_buffers
  3. sync_dev
  4. fsync_dev
  5. sys_sync
  6. file_fsync
  7. sys_fsync
  8. sys_fdatasync
  9. invalidate_buffers
  10. remove_from_hash_queue
  11. remove_from_lru_list
  12. remove_from_free_list
  13. remove_from_queues
  14. put_last_lru
  15. put_last_free
  16. insert_into_queues
  17. find_buffer
  18. get_hash_table
  19. set_blocksize
  20. refill_freelist
  21. getblk
  22. set_writetime
  23. refile_buffer
  24. __brelse
  25. __bforget
  26. bread
  27. breada
  28. put_unused_buffer_head
  29. get_more_buffer_heads
  30. recover_reusable_buffer_heads
  31. get_unused_buffer_head
  32. create_buffers
  33. brw_page
  34. mark_buffer_uptodate
  35. unlock_buffer
  36. generic_readpage
  37. grow_buffers
  38. try_to_free_buffer
  39. age_buffer
  40. maybe_shrink_lav_buffers
  41. shrink_specific_buffers
  42. show_buffers
  43. try_to_reassign
  44. reassign_cluster
  45. try_to_generate_cluster
  46. generate_cluster
  47. buffer_init
  48. wakeup_bdflush
  49. sync_old_buffers
  50. sys_bdflush
  51. bdflush

   1 /*
   2  *  linux/fs/buffer.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  *  'buffer.c' implements the buffer-cache functions. Race-conditions have
   9  * been avoided by NEVER letting an interrupt change a buffer (except for the
  10  * data, of course), but instead letting the caller do it.
  11  */
  12 
  13 /*
  14  * NOTE! There is one discordant note here: checking floppies for
  15  * disk change. This is where it fits best, I think, as it should
  16  * invalidate changed floppy-disk-caches.
  17  */
  18  
  19 /* Some bdflush() changes for the dynamic ramdisk - Paul Gortmaker, 12/94 */
  20 
  21 #include <linux/sched.h>
  22 #include <linux/kernel.h>
  23 #include <linux/major.h>
  24 #include <linux/string.h>
  25 #include <linux/locks.h>
  26 #include <linux/errno.h>
  27 #include <linux/malloc.h>
  28 #include <linux/pagemap.h>
  29 #include <linux/swap.h>
  30 #include <linux/swapctl.h>
  31 #include <linux/smp.h>
  32 #include <linux/smp_lock.h>
  33 
  34 #include <asm/system.h>
  35 #include <asm/segment.h>
  36 #include <asm/io.h>
  37 
  38 #define NR_SIZES 5
  39 static char buffersize_index[17] =
  40 {-1,  0,  1, -1,  2, -1, -1, -1, 3, -1, -1, -1, -1, -1, -1, -1, 4};
  41 static short int bufferindex_size[NR_SIZES] = {512, 1024, 2048, 4096, 8192};
  42 
  43 #define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9])
  44 #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
  45 
  46 static int grow_buffers(int pri, int size);
  47 static int shrink_specific_buffers(unsigned int priority, int size);
  48 static int maybe_shrink_lav_buffers(int);
  49 
  50 static int nr_hash = 0;  /* Size of hash table */
  51 static struct buffer_head ** hash_table;
  52 static struct buffer_head * lru_list[NR_LIST] = {NULL, };
  53 /* next_to_age is an array of pointers into the lru lists, used to
  54    cycle through the buffers aging their contents when deciding which
  55    buffers to discard when more memory is needed */
  56 static struct buffer_head * next_to_age[NR_LIST] = {NULL, };
  57 static struct buffer_head * free_list[NR_SIZES] = {NULL, };
  58 
  59 static struct buffer_head * unused_list = NULL;
  60 struct buffer_head * reuse_list = NULL;
  61 static struct wait_queue * buffer_wait = NULL;
  62 
  63 int nr_buffers = 0;
  64 int nr_buffers_type[NR_LIST] = {0,};
  65 int nr_buffers_size[NR_SIZES] = {0,};
  66 int nr_buffers_st[NR_SIZES][NR_LIST] = {{0,},};
  67 int buffer_usage[NR_SIZES] = {0,};  /* Usage counts used to determine load average */
  68 int buffers_lav[NR_SIZES] = {0,};  /* Load average of buffer usage */
  69 int nr_free[NR_SIZES] = {0,};
  70 int buffermem = 0;
  71 int nr_buffer_heads = 0;
  72 extern int *blksize_size[];
  73 
  74 /* Here is the parameter block for the bdflush process. If you add or
  75  * remove any of the parameters, make sure to update kernel/sysctl.c.
  76  */
  77 
  78 static void wakeup_bdflush(int);
  79 
  80 #define N_PARAM 9
  81 #define LAV
  82 
  83 union bdflush_param{
  84         struct {
  85                 int nfract;  /* Percentage of buffer cache dirty to 
  86                                 activate bdflush */
  87                 int ndirty;  /* Maximum number of dirty blocks to write out per
  88                                 wake-cycle */
  89                 int nrefill; /* Number of clean buffers to try and obtain
  90                                 each time we call refill */
  91                 int nref_dirt; /* Dirty buffer threshold for activating bdflush
  92                                   when trying to refill buffers. */
  93                 int clu_nfract;  /* Percentage of buffer cache to scan to 
  94                                     search for free clusters */
  95                 int age_buffer;  /* Time for normal buffer to age before 
  96                                     we flush it */
  97                 int age_super;  /* Time for superblock to age before we 
  98                                    flush it */
  99                 int lav_const;  /* Constant used for load average (time
 100                                    constant */
 101                 int lav_ratio;  /* Used to determine how low a lav for a
 102                                    particular size can go before we start to
 103                                    trim back the buffers */
 104         } b_un;
 105         unsigned int data[N_PARAM];
 106 } bdf_prm = {{60, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}};
 107 
 108 /* The lav constant is set for 1 minute, as long as the update process runs
 109    every 5 seconds.  If you change the frequency of update, the time
 110    constant will also change. */
 111 
 112 
 113 /* These are the min and max parameter values that we will allow to be assigned */
 114 int bdflush_min[N_PARAM] = {  0,  10,    5,   25,  0,   100,   100, 1, 1};
 115 int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 60000, 60000, 2047, 5};
 116 
 117 /*
 118  * Rewrote the wait-routines to use the "new" wait-queue functionality,
 119  * and getting rid of the cli-sti pairs. The wait-queue routines still
 120  * need cli-sti, but now it's just a couple of 386 instructions or so.
 121  *
 122  * Note that the real wait_on_buffer() is an inline function that checks
 123  * if 'b_wait' is set before calling this, so that the queues aren't set
 124  * up unnecessarily.
 125  */
 126 void __wait_on_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 127 {
 128         struct wait_queue wait = { current, NULL };
 129 
 130         bh->b_count++;
 131         add_wait_queue(&bh->b_wait, &wait);
 132 repeat:
 133         current->state = TASK_UNINTERRUPTIBLE;
 134         if (buffer_locked(bh)) {
 135                 schedule();
 136                 goto repeat;
 137         }
 138         remove_wait_queue(&bh->b_wait, &wait);
 139         bh->b_count--;
 140         current->state = TASK_RUNNING;
 141 }
 142 
 143 /* Call sync_buffers with wait!=0 to ensure that the call does not
 144    return until all buffer writes have completed.  Sync() may return
 145    before the writes have finished; fsync() may not. */
 146 
 147 
 148 /* Godamity-damn.  Some buffers (bitmaps for filesystems)
 149    spontaneously dirty themselves without ever brelse being called.
 150    We will ultimately want to put these in a separate list, but for
 151    now we search all of the lists for dirty buffers */
 152 
 153 static int sync_buffers(kdev_t dev, int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 154 {
 155         int i, retry, pass = 0, err = 0;
 156         int nlist, ncount;
 157         struct buffer_head * bh, *next;
 158 
 159         /* One pass for no-wait, three for wait:
 160            0) write out all dirty, unlocked buffers;
 161            1) write out all dirty buffers, waiting if locked;
 162            2) wait for completion by waiting for all buffers to unlock. */
 163  repeat:
 164         retry = 0;
 165  repeat2:
 166         ncount = 0;
 167         /* We search all lists as a failsafe mechanism, not because we expect
 168            there to be dirty buffers on any of the other lists. */
 169         for(nlist = 0; nlist < NR_LIST; nlist++)
 170          {
 171          repeat1:
 172                  bh = lru_list[nlist];
 173                  if(!bh) continue;
 174                  for (i = nr_buffers_type[nlist]*2 ; i-- > 0 ; bh = next) {
 175                          if(bh->b_list != nlist) goto repeat1;
 176                          next = bh->b_next_free;
 177                          if(!lru_list[nlist]) break;
 178                          if (dev && bh->b_dev != dev)
 179                                   continue;
 180                          if (buffer_locked(bh))
 181                           {
 182                                   /* Buffer is locked; skip it unless wait is
 183                                      requested AND pass > 0. */
 184                                   if (!wait || !pass) {
 185                                           retry = 1;
 186                                           continue;
 187                                   }
 188                                   wait_on_buffer (bh);
 189                                   goto repeat2;
 190                           }
 191                          /* If an unlocked buffer is not uptodate, there has
 192                              been an IO error. Skip it. */
 193                          if (wait && buffer_req(bh) && !buffer_locked(bh) &&
 194                              !buffer_dirty(bh) && !buffer_uptodate(bh)) {
 195                                   err = 1;
 196                                   continue;
 197                           }
 198                          /* Don't write clean buffers.  Don't write ANY buffers
 199                             on the third pass. */
 200                          if (!buffer_dirty(bh) || pass>=2)
 201                                   continue;
 202                          /* don't bother about locked buffers */
 203                          if (buffer_locked(bh))
 204                                  continue;
 205                          bh->b_count++;
 206                          bh->b_flushtime = 0;
 207                          ll_rw_block(WRITE, 1, &bh);
 208 
 209                          if(nlist != BUF_DIRTY) { 
 210                                  printk("[%d %s %ld] ", nlist,
 211                                         kdevname(bh->b_dev), bh->b_blocknr);
 212                                  ncount++;
 213                          };
 214                          bh->b_count--;
 215                          retry = 1;
 216                  }
 217          }
 218         if (ncount)
 219           printk("sys_sync: %d dirty buffers not on dirty list\n", ncount);
 220         
 221         /* If we are waiting for the sync to succeed, and if any dirty
 222            blocks were written, then repeat; on the second pass, only
 223            wait for buffers being written (do not pass to write any
 224            more buffers on the second pass). */
 225         if (wait && retry && ++pass<=2)
 226                  goto repeat;
 227         return err;
 228 }
 229 
 230 void sync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 231 {
 232         sync_buffers(dev, 0);
 233         sync_supers(dev);
 234         sync_inodes(dev);
 235         sync_buffers(dev, 0);
 236         sync_dquots(dev, -1);
 237 }
 238 
 239 int fsync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 240 {
 241         sync_buffers(dev, 0);
 242         sync_supers(dev);
 243         sync_inodes(dev);
 244         sync_dquots(dev, -1);
 245         return sync_buffers(dev, 1);
 246 }
 247 
 248 asmlinkage int sys_sync(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 249 {
 250         fsync_dev(0);
 251         return 0;
 252 }
 253 
 254 int file_fsync (struct inode *inode, struct file *filp)
     /* [previous][next][first][last][top][bottom][index][help] */
 255 {
 256         return fsync_dev(inode->i_dev);
 257 }
 258 
 259 asmlinkage int sys_fsync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 260 {
 261         struct file * file;
 262         struct inode * inode;
 263 
 264         if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
 265                 return -EBADF;
 266         if (!file->f_op || !file->f_op->fsync)
 267                 return -EINVAL;
 268         if (file->f_op->fsync(inode,file))
 269                 return -EIO;
 270         return 0;
 271 }
 272 
 273 asmlinkage int sys_fdatasync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 274 {
 275         struct file * file;
 276         struct inode * inode;
 277 
 278         if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
 279                 return -EBADF;
 280         if (!file->f_op || !file->f_op->fsync)
 281                 return -EINVAL;
 282         /* this needs further work, at the moment it is identical to fsync() */
 283         if (file->f_op->fsync(inode,file))
 284                 return -EIO;
 285         return 0;
 286 }
 287 
 288 void invalidate_buffers(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 289 {
 290         int i;
 291         int nlist;
 292         struct buffer_head * bh;
 293 
 294         for(nlist = 0; nlist < NR_LIST; nlist++) {
 295                 bh = lru_list[nlist];
 296                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bh->b_next_free) {
 297                         if (bh->b_dev != dev)
 298                                 continue;
 299                         wait_on_buffer(bh);
 300                         if (bh->b_dev != dev)
 301                                 continue;
 302                         if (bh->b_count)
 303                                 continue;
 304                         bh->b_flushtime = 0;
 305                         clear_bit(BH_Protected, &bh->b_state);
 306                         clear_bit(BH_Uptodate, &bh->b_state);
 307                         clear_bit(BH_Dirty, &bh->b_state);
 308                         clear_bit(BH_Req, &bh->b_state);
 309                 }
 310         }
 311 }
 312 
 313 #define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block))%nr_hash)
 314 #define hash(dev,block) hash_table[_hashfn(dev,block)]
 315 
 316 static inline void remove_from_hash_queue(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 317 {
 318         if (bh->b_next)
 319                 bh->b_next->b_prev = bh->b_prev;
 320         if (bh->b_prev)
 321                 bh->b_prev->b_next = bh->b_next;
 322         if (hash(bh->b_dev,bh->b_blocknr) == bh)
 323                 hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
 324         bh->b_next = bh->b_prev = NULL;
 325 }
 326 
 327 static inline void remove_from_lru_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 328 {
 329         if (!(bh->b_prev_free) || !(bh->b_next_free))
 330                 panic("VFS: LRU block list corrupted");
 331         if (bh->b_dev == B_FREE)
 332                 panic("LRU list corrupted");
 333         bh->b_prev_free->b_next_free = bh->b_next_free;
 334         bh->b_next_free->b_prev_free = bh->b_prev_free;
 335 
 336         if (lru_list[bh->b_list] == bh)
 337                  lru_list[bh->b_list] = bh->b_next_free;
 338         if (lru_list[bh->b_list] == bh)
 339                  lru_list[bh->b_list] = NULL;
 340         if (next_to_age[bh->b_list] == bh)
 341                 next_to_age[bh->b_list] = bh->b_next_free;
 342         if (next_to_age[bh->b_list] == bh)
 343                 next_to_age[bh->b_list] = NULL;
 344 
 345         bh->b_next_free = bh->b_prev_free = NULL;
 346 }
 347 
 348 static inline void remove_from_free_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 349 {
 350         int isize = BUFSIZE_INDEX(bh->b_size);
 351         if (!(bh->b_prev_free) || !(bh->b_next_free))
 352                 panic("VFS: Free block list corrupted");
 353         if(bh->b_dev != B_FREE)
 354                 panic("Free list corrupted");
 355         if(!free_list[isize])
 356                 panic("Free list empty");
 357         nr_free[isize]--;
 358         if(bh->b_next_free == bh)
 359                  free_list[isize] = NULL;
 360         else {
 361                 bh->b_prev_free->b_next_free = bh->b_next_free;
 362                 bh->b_next_free->b_prev_free = bh->b_prev_free;
 363                 if (free_list[isize] == bh)
 364                          free_list[isize] = bh->b_next_free;
 365         }
 366         bh->b_next_free = bh->b_prev_free = NULL;
 367 }
 368 
 369 static inline void remove_from_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 370 {
 371         if(bh->b_dev == B_FREE) {
 372                 remove_from_free_list(bh); /* Free list entries should not be
 373                                               in the hash queue */
 374                 return;
 375         };
 376         nr_buffers_type[bh->b_list]--;
 377         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]--;
 378         remove_from_hash_queue(bh);
 379         remove_from_lru_list(bh);
 380 }
 381 
 382 static inline void put_last_lru(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 383 {
 384         if (!bh)
 385                 return;
 386         if (bh == lru_list[bh->b_list]) {
 387                 lru_list[bh->b_list] = bh->b_next_free;
 388                 if (next_to_age[bh->b_list] == bh)
 389                         next_to_age[bh->b_list] = bh->b_next_free;
 390                 return;
 391         }
 392         if(bh->b_dev == B_FREE)
 393                 panic("Wrong block for lru list");
 394         remove_from_lru_list(bh);
 395 /* add to back of free list */
 396 
 397         if(!lru_list[bh->b_list]) {
 398                 lru_list[bh->b_list] = bh;
 399                 lru_list[bh->b_list]->b_prev_free = bh;
 400         }
 401         if (!next_to_age[bh->b_list])
 402                 next_to_age[bh->b_list] = bh;
 403 
 404         bh->b_next_free = lru_list[bh->b_list];
 405         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 406         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 407         lru_list[bh->b_list]->b_prev_free = bh;
 408 }
 409 
 410 static inline void put_last_free(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 411 {
 412         int isize;
 413         if (!bh)
 414                 return;
 415 
 416         isize = BUFSIZE_INDEX(bh->b_size);      
 417         bh->b_dev = B_FREE;  /* So it is obvious we are on the free list */
 418         /* add to back of free list */
 419         if(!free_list[isize]) {
 420                 free_list[isize] = bh;
 421                 bh->b_prev_free = bh;
 422         };
 423 
 424         nr_free[isize]++;
 425         bh->b_next_free = free_list[isize];
 426         bh->b_prev_free = free_list[isize]->b_prev_free;
 427         free_list[isize]->b_prev_free->b_next_free = bh;
 428         free_list[isize]->b_prev_free = bh;
 429 }
 430 
 431 static inline void insert_into_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 432 {
 433         /* put at end of free list */
 434         if(bh->b_dev == B_FREE) {
 435                 put_last_free(bh);
 436                 return;
 437         }
 438         if(!lru_list[bh->b_list]) {
 439                 lru_list[bh->b_list] = bh;
 440                 bh->b_prev_free = bh;
 441         }
 442         if (!next_to_age[bh->b_list])
 443                 next_to_age[bh->b_list] = bh;
 444         if (bh->b_next_free) panic("VFS: buffer LRU pointers corrupted");
 445         bh->b_next_free = lru_list[bh->b_list];
 446         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 447         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 448         lru_list[bh->b_list]->b_prev_free = bh;
 449         nr_buffers_type[bh->b_list]++;
 450         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]++;
 451 /* put the buffer in new hash-queue if it has a device */
 452         bh->b_prev = NULL;
 453         bh->b_next = NULL;
 454         if (!(bh->b_dev))
 455                 return;
 456         bh->b_next = hash(bh->b_dev,bh->b_blocknr);
 457         hash(bh->b_dev,bh->b_blocknr) = bh;
 458         if (bh->b_next)
 459                 bh->b_next->b_prev = bh;
 460 }
 461 
 462 static inline struct buffer_head * find_buffer(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 463 {               
 464         struct buffer_head * tmp;
 465 
 466         for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
 467                 if (tmp->b_blocknr == block && tmp->b_dev == dev)
 468                         if (tmp->b_size == size)
 469                                 return tmp;
 470                         else {
 471                                 printk("VFS: Wrong blocksize on device %s\n",
 472                                         kdevname(dev));
 473                                 return NULL;
 474                         }
 475         return NULL;
 476 }
 477 
 478 /*
 479  * Why like this, I hear you say... The reason is race-conditions.
 480  * As we don't lock buffers (unless we are reading them, that is),
 481  * something might happen to it while we sleep (ie a read-error
 482  * will force it bad). This shouldn't really happen currently, but
 483  * the code is ready.
 484  */
 485 struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 486 {
 487         struct buffer_head * bh;
 488 
 489         for (;;) {
 490                 if (!(bh=find_buffer(dev,block,size)))
 491                         return NULL;
 492                 bh->b_count++;
 493                 wait_on_buffer(bh);
 494                 if (bh->b_dev == dev && bh->b_blocknr == block
 495                                              && bh->b_size == size)
 496                         return bh;
 497                 bh->b_count--;
 498         }
 499 }
 500 
 501 void set_blocksize(kdev_t dev, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 502 {
 503         int i, nlist;
 504         struct buffer_head * bh, *bhnext;
 505 
 506         if (!blksize_size[MAJOR(dev)])
 507                 return;
 508 
 509         if (size > PAGE_SIZE)
 510                 size = 0;
 511 
 512         switch (size) {
 513                 default: panic("Invalid blocksize passed to set_blocksize");
 514                 case 512: case 1024: case 2048: case 4096: case 8192: ;
 515         }
 516 
 517         if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
 518                 blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 519                 return;
 520         }
 521         if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
 522                 return;
 523         sync_buffers(dev, 2);
 524         blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 525 
 526   /* We need to be quite careful how we do this - we are moving entries
 527      around on the free list, and we can get in a loop if we are not careful.*/
 528 
 529         for(nlist = 0; nlist < NR_LIST; nlist++) {
 530                 bh = lru_list[nlist];
 531                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) {
 532                         if(!bh) break;
 533                         bhnext = bh->b_next_free; 
 534                         if (bh->b_dev != dev)
 535                                  continue;
 536                         if (bh->b_size == size)
 537                                  continue;
 538                         
 539                         wait_on_buffer(bh);
 540                         if (bh->b_dev == dev && bh->b_size != size) {
 541                                 clear_bit(BH_Dirty, &bh->b_state);
 542                                 clear_bit(BH_Uptodate, &bh->b_state);
 543                                 clear_bit(BH_Req, &bh->b_state);
 544                                 bh->b_flushtime = 0;
 545                         }
 546                         remove_from_hash_queue(bh);
 547                 }
 548         }
 549 }
 550 
 551 #define BADNESS(bh) (buffer_dirty(bh) || buffer_locked(bh))
 552 
 553 void refill_freelist(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 554 {
 555         struct buffer_head * bh, * tmp;
 556         struct buffer_head * candidate[NR_LIST];
 557         unsigned int best_time, winner;
 558         int isize = BUFSIZE_INDEX(size);
 559         int buffers[NR_LIST];
 560         int i;
 561         int needed;
 562 
 563         /* First see if we even need this.  Sometimes it is advantageous
 564          to request some blocks in a filesystem that we know that we will
 565          be needing ahead of time. */
 566 
 567         if (nr_free[isize] > 100)
 568                 return;
 569 
 570         /* If there are too many dirty buffers, we wake up the update process
 571            now so as to ensure that there are still clean buffers available
 572            for user processes to use (and dirty) */
 573         
 574         /* We are going to try and locate this much memory */
 575         needed =bdf_prm.b_un.nrefill * size;  
 576 
 577         while (nr_free_pages > min_free_pages*2 && needed > 0 &&
 578                grow_buffers(GFP_BUFFER, size)) {
 579                 needed -= PAGE_SIZE;
 580         }
 581 
 582         if(needed <= 0) return;
 583 
 584         /* See if there are too many buffers of a different size.
 585            If so, victimize them */
 586 
 587         while(maybe_shrink_lav_buffers(size))
 588          {
 589                  if(!grow_buffers(GFP_BUFFER, size)) break;
 590                  needed -= PAGE_SIZE;
 591                  if(needed <= 0) return;
 592          };
 593 
 594         /* OK, we cannot grow the buffer cache, now try and get some
 595            from the lru list */
 596 
 597         /* First set the candidate pointers to usable buffers.  This
 598            should be quick nearly all of the time. */
 599 
 600 repeat0:
 601         for(i=0; i<NR_LIST; i++){
 602                 if(i == BUF_DIRTY || i == BUF_SHARED || 
 603                    nr_buffers_type[i] == 0) {
 604                         candidate[i] = NULL;
 605                         buffers[i] = 0;
 606                         continue;
 607                 }
 608                 buffers[i] = nr_buffers_type[i];
 609                 for (bh = lru_list[i]; buffers[i] > 0; bh = tmp, buffers[i]--)
 610                  {
 611                          if(buffers[i] < 0) panic("Here is the problem");
 612                          tmp = bh->b_next_free;
 613                          if (!bh) break;
 614                          
 615                          if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 616                              buffer_dirty(bh)) {
 617                                  refile_buffer(bh);
 618                                  continue;
 619                          }
 620                          
 621                          if (bh->b_count || buffer_protected(bh) || bh->b_size != size)
 622                                   continue;
 623                          
 624                          /* Buffers are written in the order they are placed 
 625                             on the locked list. If we encounter a locked
 626                             buffer here, this means that the rest of them
 627                             are also locked */
 628                          if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 629                                  buffers[i] = 0;
 630                                  break;
 631                          }
 632                          
 633                          if (BADNESS(bh)) continue;
 634                          break;
 635                  };
 636                 if(!buffers[i]) candidate[i] = NULL; /* Nothing on this list */
 637                 else candidate[i] = bh;
 638                 if(candidate[i] && candidate[i]->b_count) panic("Here is the problem");
 639         }
 640         
 641  repeat:
 642         if(needed <= 0) return;
 643         
 644         /* Now see which candidate wins the election */
 645         
 646         winner = best_time = UINT_MAX;  
 647         for(i=0; i<NR_LIST; i++){
 648                 if(!candidate[i]) continue;
 649                 if(candidate[i]->b_lru_time < best_time){
 650                         best_time = candidate[i]->b_lru_time;
 651                         winner = i;
 652                 }
 653         }
 654         
 655         /* If we have a winner, use it, and then get a new candidate from that list */
 656         if(winner != UINT_MAX) {
 657                 i = winner;
 658                 bh = candidate[i];
 659                 candidate[i] = bh->b_next_free;
 660                 if(candidate[i] == bh) candidate[i] = NULL;  /* Got last one */
 661                 if (bh->b_count || bh->b_size != size)
 662                          panic("Busy buffer in candidate list\n");
 663                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1)
 664                          panic("Shared buffer in candidate list\n");
 665                 if (buffer_protected(bh))
 666                         panic("Protected buffer in candidate list\n");
 667                 if (BADNESS(bh)) panic("Buffer in candidate list with BADNESS != 0\n");
 668                 
 669                 if(bh->b_dev == B_FREE)
 670                         panic("Wrong list");
 671                 remove_from_queues(bh);
 672                 bh->b_dev = B_FREE;
 673                 put_last_free(bh);
 674                 needed -= bh->b_size;
 675                 buffers[i]--;
 676                 if(buffers[i] < 0) panic("Here is the problem");
 677                 
 678                 if(buffers[i] == 0) candidate[i] = NULL;
 679                 
 680                 /* Now all we need to do is advance the candidate pointer
 681                    from the winner list to the next usable buffer */
 682                 if(candidate[i] && buffers[i] > 0){
 683                         if(buffers[i] <= 0) panic("Here is another problem");
 684                         for (bh = candidate[i]; buffers[i] > 0; bh = tmp, buffers[i]--) {
 685                                 if(buffers[i] < 0) panic("Here is the problem");
 686                                 tmp = bh->b_next_free;
 687                                 if (!bh) break;
 688                                 
 689                                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 690                                     buffer_dirty(bh)) {
 691                                         refile_buffer(bh);
 692                                         continue;
 693                                 };
 694                                 
 695                                 if (bh->b_count || buffer_protected(bh) || bh->b_size != size)
 696                                          continue;
 697                                 
 698                                 /* Buffers are written in the order they are
 699                                    placed on the locked list.  If we encounter
 700                                    a locked buffer here, this means that the
 701                                    rest of them are also locked */
 702                                 if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 703                                         buffers[i] = 0;
 704                                         break;
 705                                 }
 706               
 707                                 if (BADNESS(bh)) continue;
 708                                 break;
 709                         };
 710                         if(!buffers[i]) candidate[i] = NULL; /* Nothing here */
 711                         else candidate[i] = bh;
 712                         if(candidate[i] && candidate[i]->b_count) 
 713                                  panic("Here is the problem");
 714                 }
 715                 
 716                 goto repeat;
 717         }
 718         
 719         if(needed <= 0) return;
 720         
 721         /* Too bad, that was not enough. Try a little harder to grow some. */
 722         
 723         if (nr_free_pages > min_free_pages + 5) {
 724                 if (grow_buffers(GFP_BUFFER, size)) {
 725                         needed -= PAGE_SIZE;
 726                         goto repeat0;
 727                 };
 728         }
 729         
 730         /* and repeat until we find something good */
 731         if (!grow_buffers(GFP_ATOMIC, size))
 732                 wakeup_bdflush(1);
 733         needed -= PAGE_SIZE;
 734         goto repeat0;
 735 }
 736 
 737 /*
 738  * Ok, this is getblk, and it isn't very clear, again to hinder
 739  * race-conditions. Most of the code is seldom used, (ie repeating),
 740  * so it should be much more efficient than it looks.
 741  *
 742  * The algorithm is changed: hopefully better, and an elusive bug removed.
 743  *
 744  * 14.02.92: changed it to sync dirty buffers a bit: better performance
 745  * when the filesystem starts to get full of dirty blocks (I hope).
 746  */
 747 struct buffer_head * getblk(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 748 {
 749         struct buffer_head * bh;
 750         int isize = BUFSIZE_INDEX(size);
 751 
 752         /* Update this for the buffer size lav. */
 753         buffer_usage[isize]++;
 754 
 755         /* If there are too many dirty buffers, we wake up the update process
 756            now so as to ensure that there are still clean buffers available
 757            for user processes to use (and dirty) */
 758 repeat:
 759         bh = get_hash_table(dev, block, size);
 760         if (bh) {
 761                 if (!buffer_dirty(bh)) {
 762                         if (buffer_uptodate(bh))
 763                                  put_last_lru(bh);
 764                         bh->b_flushtime = 0;
 765                 }
 766                 set_bit(BH_Touched, &bh->b_state);
 767                 return bh;
 768         }
 769 
 770         while(!free_list[isize]) refill_freelist(size);
 771         
 772         if (find_buffer(dev,block,size))
 773                  goto repeat;
 774 
 775         bh = free_list[isize];
 776         remove_from_free_list(bh);
 777 
 778 /* OK, FINALLY we know that this buffer is the only one of its kind, */
 779 /* and that it's unused (b_count=0), unlocked (buffer_locked=0), and clean */
 780         bh->b_count=1;
 781         bh->b_flushtime=0;
 782         bh->b_state=(1<<BH_Touched);
 783         bh->b_dev=dev;
 784         bh->b_blocknr=block;
 785         insert_into_queues(bh);
 786         return bh;
 787 }
 788 
 789 void set_writetime(struct buffer_head * buf, int flag)
     /* [previous][next][first][last][top][bottom][index][help] */
 790 {
 791         int newtime;
 792 
 793         if (buffer_dirty(buf)) {
 794                 /* Move buffer to dirty list if jiffies is clear */
 795                 newtime = jiffies + (flag ? bdf_prm.b_un.age_super : 
 796                                      bdf_prm.b_un.age_buffer);
 797                 if(!buf->b_flushtime || buf->b_flushtime > newtime)
 798                          buf->b_flushtime = newtime;
 799         } else {
 800                 buf->b_flushtime = 0;
 801         }
 802 }
 803 
 804 
 805 void refile_buffer(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 806 {
 807         int dispose;
 808 
 809         if(buf->b_dev == B_FREE) {
 810                 printk("Attempt to refile free buffer\n");
 811                 return;
 812         }
 813         if (buffer_dirty(buf))
 814                 dispose = BUF_DIRTY;
 815         else if ((mem_map[MAP_NR((unsigned long) buf->b_data)].count > 1) || buffer_protected(buf))
 816                 dispose = BUF_SHARED;
 817         else if (buffer_locked(buf))
 818                 dispose = BUF_LOCKED;
 819         else if (buf->b_list == BUF_SHARED)
 820                 dispose = BUF_UNSHARED;
 821         else
 822                 dispose = BUF_CLEAN;
 823         if(dispose == BUF_CLEAN) buf->b_lru_time = jiffies;
 824         if(dispose != buf->b_list)  {
 825                 if(dispose == BUF_DIRTY || dispose == BUF_UNSHARED)
 826                          buf->b_lru_time = jiffies;
 827                 if(dispose == BUF_LOCKED && 
 828                    (buf->b_flushtime - buf->b_lru_time) <= bdf_prm.b_un.age_super)
 829                          dispose = BUF_LOCKED1;
 830                 remove_from_queues(buf);
 831                 buf->b_list = dispose;
 832                 insert_into_queues(buf);
 833                 if(dispose == BUF_DIRTY && nr_buffers_type[BUF_DIRTY] > 
 834                    (nr_buffers - nr_buffers_type[BUF_SHARED]) *
 835                    bdf_prm.b_un.nfract/100)
 836                          wakeup_bdflush(0);
 837         }
 838 }
 839 
 840 /*
 841  * Release a buffer head
 842  */
 843 void __brelse(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 844 {
 845         wait_on_buffer(buf);
 846 
 847         /* If dirty, mark the time this buffer should be written back */
 848         set_writetime(buf, 0);
 849         refile_buffer(buf);
 850 
 851         if (buf->b_count) {
 852                 buf->b_count--;
 853                 return;
 854         }
 855         printk("VFS: brelse: Trying to free free buffer\n");
 856 }
 857 
 858 /*
 859  * bforget() is like brelse(), except it removes the buffer
 860  * from the hash-queues (so that it won't be re-used if it's
 861  * shared).
 862  */
 863 void __bforget(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 864 {
 865         wait_on_buffer(buf);
 866         mark_buffer_clean(buf);
 867         clear_bit(BH_Protected, &buf->b_state);
 868         buf->b_count--;
 869         remove_from_hash_queue(buf);
 870         buf->b_dev = NODEV;
 871         refile_buffer(buf);
 872 }
 873 
 874 /*
 875  * bread() reads a specified block and returns the buffer that contains
 876  * it. It returns NULL if the block was unreadable.
 877  */
 878 struct buffer_head * bread(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 879 {
 880         struct buffer_head * bh;
 881 
 882         if (!(bh = getblk(dev, block, size))) {
 883                 printk("VFS: bread: READ error on device %s\n",
 884                         kdevname(dev));
 885                 return NULL;
 886         }
 887         if (buffer_uptodate(bh))
 888                 return bh;
 889         ll_rw_block(READ, 1, &bh);
 890         wait_on_buffer(bh);
 891         if (buffer_uptodate(bh))
 892                 return bh;
 893         brelse(bh);
 894         return NULL;
 895 }
 896 
 897 /*
 898  * Ok, breada can be used as bread, but additionally to mark other
 899  * blocks for reading as well. End the argument list with a negative
 900  * number.
 901  */
 902 
 903 #define NBUF 16
 904 
 905 struct buffer_head * breada(kdev_t dev, int block, int bufsize,
     /* [previous][next][first][last][top][bottom][index][help] */
 906         unsigned int pos, unsigned int filesize)
 907 {
 908         struct buffer_head * bhlist[NBUF];
 909         unsigned int blocks;
 910         struct buffer_head * bh;
 911         int index;
 912         int i, j;
 913 
 914         if (pos >= filesize)
 915                 return NULL;
 916 
 917         if (block < 0 || !(bh = getblk(dev,block,bufsize)))
 918                 return NULL;
 919 
 920         index = BUFSIZE_INDEX(bh->b_size);
 921 
 922         if (buffer_uptodate(bh))
 923                 return bh;
 924 
 925         blocks = ((filesize & (bufsize - 1)) - (pos & (bufsize - 1))) >> (9+index);
 926 
 927         if (blocks > (read_ahead[MAJOR(dev)] >> index))
 928                 blocks = read_ahead[MAJOR(dev)] >> index;
 929         if (blocks > NBUF)
 930                 blocks = NBUF;
 931         
 932         bhlist[0] = bh;
 933         j = 1;
 934         for(i=1; i<blocks; i++) {
 935                 bh = getblk(dev,block+i,bufsize);
 936                 if (buffer_uptodate(bh)) {
 937                         brelse(bh);
 938                         break;
 939                 }
 940                 bhlist[j++] = bh;
 941         }
 942 
 943         /* Request the read for these buffers, and then release them */
 944         ll_rw_block(READ, j, bhlist);
 945 
 946         for(i=1; i<j; i++)
 947                 brelse(bhlist[i]);
 948 
 949         /* Wait for this buffer, and then continue on */
 950         bh = bhlist[0];
 951         wait_on_buffer(bh);
 952         if (buffer_uptodate(bh))
 953                 return bh;
 954         brelse(bh);
 955         return NULL;
 956 }
 957 
 958 /*
 959  * See fs/inode.c for the weird use of volatile..
 960  */
 961 static void put_unused_buffer_head(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 962 {
 963         struct wait_queue * wait;
 964 
 965         wait = ((volatile struct buffer_head *) bh)->b_wait;
 966         memset(bh,0,sizeof(*bh));
 967         ((volatile struct buffer_head *) bh)->b_wait = wait;
 968         bh->b_next_free = unused_list;
 969         unused_list = bh;
 970         wake_up(&buffer_wait);
 971 }
 972 
 973 static void get_more_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 974 {
 975         int i;
 976         struct buffer_head * bh;
 977 
 978         for (;;) {
 979                 if (unused_list)
 980                         return;
 981 
 982                 /*
 983                  * This is critical.  We can't swap out pages to get
 984                  * more buffer heads, because the swap-out may need
 985                  * more buffer-heads itself.  Thus GFP_ATOMIC.
 986                  */
 987                 bh = (struct buffer_head *) get_free_page(GFP_ATOMIC);
 988                 if (bh)
 989                         break;
 990 
 991                 /*
 992                  * Uhhuh. We're _really_ low on memory. Now we just
 993                  * wait for old buffer heads to become free due to
 994                  * finishing IO..
 995                  */
 996                 sleep_on(&buffer_wait);
 997         }
 998 
 999         for (nr_buffer_heads+=i=PAGE_SIZE/sizeof*bh ; i>0; i--) {
1000                 bh->b_next_free = unused_list;  /* only make link */
1001                 unused_list = bh++;
1002         }
1003 }
1004 
1005 /* 
1006  * We can't put completed temporary IO buffer_heads directly onto the
1007  * unused_list when they become unlocked, since the device driver
1008  * end_request routines still expect access to the buffer_head's
1009  * fields after the final unlock.  So, the device driver puts them on
1010  * the reuse_list instead once IO completes, and we recover these to
1011  * the unused_list here.
1012  *
1013  * The reuse_list receives buffers from interrupt routines, so we need
1014  * to be IRQ-safe here (but note that interrupts only _add_ to the
1015  * reuse_list, never take away. So we don't need to worry about the
1016  * reuse_list magically emptying).
1017  */
1018 static inline void recover_reusable_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1019 {
1020         if (reuse_list) {
1021                 struct buffer_head *bh;
1022                 unsigned long flags;
1023         
1024                 save_flags(flags);
1025                 do {
1026                         cli();
1027                         bh = reuse_list;
1028                         reuse_list = bh->b_next_free;
1029                         restore_flags(flags);
1030                         put_unused_buffer_head(bh);
1031                 } while (reuse_list);
1032         }
1033 }
1034 
1035 static struct buffer_head * get_unused_buffer_head(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1036 {
1037         struct buffer_head * bh;
1038 
1039         recover_reusable_buffer_heads();
1040         get_more_buffer_heads();
1041         if (!unused_list)
1042                 return NULL;
1043         bh = unused_list;
1044         unused_list = bh->b_next_free;
1045         bh->b_next_free = NULL;
1046         bh->b_data = NULL;
1047         bh->b_size = 0;
1048         bh->b_state = 0;
1049         return bh;
1050 }
1051 
1052 /*
1053  * Create the appropriate buffers when given a page for data area and
1054  * the size of each buffer.. Use the bh->b_this_page linked list to
1055  * follow the buffers created.  Return NULL if unable to create more
1056  * buffers.
1057  */
1058 static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
1059 {
1060         struct buffer_head *bh, *head;
1061         unsigned long offset;
1062 
1063         head = NULL;
1064         offset = PAGE_SIZE;
1065         while ((offset -= size) < PAGE_SIZE) {
1066                 bh = get_unused_buffer_head();
1067                 if (!bh)
1068                         goto no_grow;
1069                 bh->b_this_page = head;
1070                 head = bh;
1071                 bh->b_data = (char *) (page+offset);
1072                 bh->b_size = size;
1073                 bh->b_dev = B_FREE;  /* Flag as unused */
1074         }
1075         return head;
1076 /*
1077  * In case anything failed, we just free everything we got.
1078  */
1079 no_grow:
1080         bh = head;
1081         while (bh) {
1082                 head = bh;
1083                 bh = bh->b_this_page;
1084                 put_unused_buffer_head(head);
1085         }
1086         return NULL;
1087 }
1088 
1089 int brw_page(int rw, unsigned long address, kdev_t dev, int b[], int size, int bmap)
     /* [previous][next][first][last][top][bottom][index][help] */
1090 {
1091         struct buffer_head *bh, *prev, *next, *arr[MAX_BUF_PER_PAGE];
1092         int block, nr;
1093         struct page *page;
1094 
1095         page = mem_map + MAP_NR(address);
1096         page->uptodate = 0;
1097         bh = create_buffers(address, size);
1098         if (!bh)
1099                 return -ENOMEM;
1100         nr = 0;
1101         next = bh;
1102         do {
1103                 struct buffer_head * tmp;
1104                 block = *(b++);
1105 
1106                 set_bit(BH_FreeOnIO, &next->b_state);
1107                 next->b_list = BUF_CLEAN;
1108                 next->b_dev = dev;
1109                 next->b_blocknr = block;
1110                 next->b_count = 1;
1111                 next->b_flushtime = 0;
1112                 set_bit(BH_Uptodate, &next->b_state);
1113 
1114                 /* When we use bmap, we define block zero to represent
1115                    a hole.  ll_rw_page, however, may legitimately
1116                    access block zero, and we need to distinguish the
1117                    two cases. 
1118                    */
1119                 if (bmap && !block) {
1120                         memset(next->b_data, 0, size);
1121                         next->b_count--;
1122                         continue;
1123                 }
1124                 tmp = get_hash_table(dev, block, size);
1125                 if (tmp) {
1126                         if (!buffer_uptodate(tmp)) {
1127                                 if (rw == READ)
1128                                         ll_rw_block(READ, 1, &tmp);
1129                                 wait_on_buffer(tmp);
1130                         }
1131                         if (rw == READ) 
1132                                 memcpy(next->b_data, tmp->b_data, size);
1133                         else {
1134                                 memcpy(tmp->b_data, next->b_data, size);
1135                                 mark_buffer_dirty(tmp, 0);
1136                         }
1137                         brelse(tmp);
1138                         next->b_count--;
1139                         continue;
1140                 }
1141                 if (rw == READ)
1142                         clear_bit(BH_Uptodate, &next->b_state);
1143                 else
1144                         set_bit(BH_Dirty, &next->b_state);
1145                 arr[nr++] = next;
1146         } while (prev = next, (next = next->b_this_page) != NULL);
1147         prev->b_this_page = bh;
1148         
1149         if (nr)
1150                 ll_rw_block(rw, nr, arr);
1151         else {
1152                 unsigned long flags;
1153                 page->locked = 0;
1154                 page->uptodate = 1;
1155                 wake_up(&page->wait);
1156                 next = bh;
1157                 save_flags(flags);
1158                 cli();
1159                 do {
1160                         next->b_next_free = reuse_list;
1161                         reuse_list = next;
1162                         next = next->b_this_page;
1163                 } while (next != bh);
1164                 restore_flags(flags);
1165         }
1166         ++current->maj_flt;
1167         return 0;
1168 }
1169 
1170 void mark_buffer_uptodate(struct buffer_head * bh, int on)
     /* [previous][next][first][last][top][bottom][index][help] */
1171 {
1172         if (on) {
1173                 struct buffer_head *tmp = bh;
1174                 int page_uptodate = 1;
1175                 set_bit(BH_Uptodate, &bh->b_state);
1176                 do {
1177                         if (!test_bit(BH_Uptodate, &tmp->b_state)) {
1178                                 page_uptodate = 0;
1179                                 break;
1180                         }
1181                         tmp=tmp->b_this_page;
1182                 } while (tmp && tmp != bh);
1183                 if (page_uptodate)
1184                         mem_map[MAP_NR(bh->b_data)].uptodate = 1;
1185         } else
1186                 clear_bit(BH_Uptodate, &bh->b_state);
1187 }
1188 
1189 void unlock_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
1190 {
1191         struct buffer_head *tmp;
1192         unsigned long flags;
1193         struct page *page;
1194 
1195         clear_bit(BH_Lock, &bh->b_state);
1196         wake_up(&bh->b_wait);
1197 
1198         if (!test_bit(BH_FreeOnIO, &bh->b_state))
1199                 return;
1200         page = mem_map + MAP_NR(bh->b_data);
1201         if (!page->locked) {
1202                 printk ("Whoops: unlock_buffer: "
1203                         "async io complete on unlocked page\n");
1204                 return;
1205         }
1206         if (bh->b_count != 1) {
1207                 printk ("Whoops: unlock_buffer: b_count != 1 on async io.\n");
1208                 return;
1209         }
1210         /* Async buffer_heads are here only as labels for IO, and get
1211            thrown away once the IO for this page is complete.  IO is
1212            deemed complete once all buffers have been visited
1213            (b_count==0) and are now unlocked. */
1214         bh->b_count--;
1215         for (tmp = bh; tmp=tmp->b_this_page, tmp!=bh; ) {
1216                 if (test_bit(BH_Lock, &tmp->b_state) || tmp->b_count)
1217                         return;
1218         }
1219 
1220         /* OK, go ahead and complete the async IO on this page. */
1221         save_flags(flags);
1222         page->locked = 0;
1223         wake_up(&page->wait);
1224         cli();
1225         tmp = bh;
1226         do {
1227                 if (!test_bit(BH_FreeOnIO, &tmp->b_state)) {
1228                         printk ("Whoops: unlock_buffer: "
1229                                 "async IO mismatch on page.\n");
1230                         restore_flags(flags);
1231                         return;
1232                 }
1233                 tmp->b_next_free = reuse_list;
1234                 reuse_list = tmp;
1235                 clear_bit(BH_FreeOnIO, &tmp->b_state);
1236                 tmp = tmp->b_this_page;
1237         } while (tmp != bh);
1238         restore_flags(flags);
1239         if (page->free_after) {
1240                 extern int nr_async_pages;
1241                 nr_async_pages--;
1242                 page->free_after = 0;
1243                 free_page(page_address(page));
1244         }
1245         wake_up(&buffer_wait);
1246 }
1247 
1248 /*
1249  * Generic "readpage" function for block devices that have the normal
1250  * bmap functionality. This is most of the block device filesystems.
1251  * Reads the page asynchronously --- the unlock_buffer() and
1252  * mark_buffer_uptodate() functions propogate buffer state into the
1253  * page struct once IO has completed.
1254  */
1255 int generic_readpage(struct inode * inode, struct page * page)
     /* [previous][next][first][last][top][bottom][index][help] */
1256 {
1257         unsigned long block, address;
1258         int *p, nr[PAGE_SIZE/512];
1259         int i;
1260 
1261         address = page_address(page);
1262         page->count++;
1263         page->locked = 1;
1264         
1265         i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
1266         block = page->offset >> inode->i_sb->s_blocksize_bits;
1267         p = nr;
1268         do {
1269                 *p = inode->i_op->bmap(inode, block);
1270                 i--;
1271                 block++;
1272                 p++;
1273         } while (i > 0);
1274 
1275         /* IO start */
1276         brw_page(READ, address, inode->i_dev, nr, inode->i_sb->s_blocksize, 1);
1277         free_page(address);
1278         return 0;
1279 }
1280 
1281 /*
1282  * Try to increase the number of buffers available: the size argument
1283  * is used to determine what kind of buffers we want.
1284  */
1285 static int grow_buffers(int pri, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1286 {
1287         unsigned long page;
1288         struct buffer_head *bh, *tmp;
1289         struct buffer_head * insert_point;
1290         int isize;
1291 
1292         if ((size & 511) || (size > PAGE_SIZE)) {
1293                 printk("VFS: grow_buffers: size = %d\n",size);
1294                 return 0;
1295         }
1296 
1297         isize = BUFSIZE_INDEX(size);
1298 
1299         if (!(page = __get_free_page(pri)))
1300                 return 0;
1301         bh = create_buffers(page, size);
1302         if (!bh) {
1303                 free_page(page);
1304                 return 0;
1305         }
1306 
1307         insert_point = free_list[isize];
1308 
1309         tmp = bh;
1310         while (1) {
1311                 nr_free[isize]++;
1312                 if (insert_point) {
1313                         tmp->b_next_free = insert_point->b_next_free;
1314                         tmp->b_prev_free = insert_point;
1315                         insert_point->b_next_free->b_prev_free = tmp;
1316                         insert_point->b_next_free = tmp;
1317                 } else {
1318                         tmp->b_prev_free = tmp;
1319                         tmp->b_next_free = tmp;
1320                 }
1321                 insert_point = tmp;
1322                 ++nr_buffers;
1323                 if (tmp->b_this_page)
1324                         tmp = tmp->b_this_page;
1325                 else
1326                         break;
1327         }
1328         free_list[isize] = bh;
1329         mem_map[MAP_NR(page)].buffers = bh;
1330         tmp->b_this_page = bh;
1331         buffermem += PAGE_SIZE;
1332         return 1;
1333 }
1334 
1335 
1336 /* =========== Reduce the buffer memory ============= */
1337 
1338 /*
1339  * try_to_free_buffer() checks if all the buffers on this particular page
1340  * are unused, and free's the page if so.
1341  */
1342 int try_to_free_buffer(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1343                        int priority)
1344 {
1345         unsigned long page;
1346         struct buffer_head * tmp, * p;
1347         int isize = BUFSIZE_INDEX(bh->b_size);
1348 
1349         *bhp = bh;
1350         page = (unsigned long) bh->b_data;
1351         page &= PAGE_MASK;
1352         tmp = bh;
1353         do {
1354                 if (!tmp)
1355                         return 0;
1356                 if (tmp->b_count || buffer_protected(tmp) ||
1357                     buffer_dirty(tmp) || buffer_locked(tmp) || tmp->b_wait)
1358                         return 0;
1359                 if (priority && buffer_touched(tmp))
1360                         return 0;
1361                 tmp = tmp->b_this_page;
1362         } while (tmp != bh);
1363         tmp = bh;
1364         do {
1365                 p = tmp;
1366                 tmp = tmp->b_this_page;
1367                 nr_buffers--;
1368                 nr_buffers_size[isize]--;
1369                 if (p == *bhp)
1370                   {
1371                     *bhp = p->b_prev_free;
1372                     if (p == *bhp) /* Was this the last in the list? */
1373                       *bhp = NULL;
1374                   }
1375                 remove_from_queues(p);
1376                 put_unused_buffer_head(p);
1377         } while (tmp != bh);
1378         buffermem -= PAGE_SIZE;
1379         mem_map[MAP_NR(page)].buffers = NULL;
1380         free_page(page);
1381         return !mem_map[MAP_NR(page)].count;
1382 }
1383 
1384 /* Age buffers on a given page, according to whether they have been
1385    visited recently or not. */
1386 static inline void age_buffer(struct buffer_head *bh)
     /* [previous][next][first][last][top][bottom][index][help] */
1387 {
1388         struct buffer_head *tmp = bh;
1389         int touched = 0;
1390 
1391         /*
1392          * When we age a page, we mark all other buffers in the page
1393          * with the "has_aged" flag.  Then, when these aliased buffers
1394          * come up for aging, we skip them until next pass.  This
1395          * ensures that a page full of multiple buffers only gets aged
1396          * once per pass through the lru lists. 
1397          */
1398         if (clear_bit(BH_Has_aged, &bh->b_state))
1399                 return;
1400         
1401         do {
1402                 touched |= clear_bit(BH_Touched, &tmp->b_state);
1403                 tmp = tmp->b_this_page;
1404                 set_bit(BH_Has_aged, &tmp->b_state);
1405         } while (tmp != bh);
1406         clear_bit(BH_Has_aged, &bh->b_state);
1407 
1408         if (touched) 
1409                 touch_page(mem_map + MAP_NR((unsigned long) bh->b_data));
1410         else
1411                 age_page(mem_map + MAP_NR((unsigned long) bh->b_data));
1412 }
1413 
1414 /*
1415  * Consult the load average for buffers and decide whether or not
1416  * we should shrink the buffers of one size or not.  If we decide yes,
1417  * do it and return 1.  Else return 0.  Do not attempt to shrink size
1418  * that is specified.
1419  *
1420  * I would prefer not to use a load average, but the way things are now it
1421  * seems unavoidable.  The way to get rid of it would be to force clustering
1422  * universally, so that when we reclaim buffers we always reclaim an entire
1423  * page.  Doing this would mean that we all need to move towards QMAGIC.
1424  */
1425 
1426 static int maybe_shrink_lav_buffers(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1427 {          
1428         int nlist;
1429         int isize;
1430         int total_lav, total_n_buffers, n_sizes;
1431         
1432         /* Do not consider the shared buffers since they would not tend
1433            to have getblk called very often, and this would throw off
1434            the lav.  They are not easily reclaimable anyway (let the swapper
1435            make the first move). */
1436   
1437         total_lav = total_n_buffers = n_sizes = 0;
1438         for(nlist = 0; nlist < NR_SIZES; nlist++)
1439          {
1440                  total_lav += buffers_lav[nlist];
1441                  if(nr_buffers_size[nlist]) n_sizes++;
1442                  total_n_buffers += nr_buffers_size[nlist];
1443                  total_n_buffers -= nr_buffers_st[nlist][BUF_SHARED]; 
1444          }
1445         
1446         /* See if we have an excessive number of buffers of a particular
1447            size - if so, victimize that bunch. */
1448   
1449         isize = (size ? BUFSIZE_INDEX(size) : -1);
1450         
1451         if (n_sizes > 1)
1452                  for(nlist = 0; nlist < NR_SIZES; nlist++)
1453                   {
1454                           if(nlist == isize) continue;
1455                           if(nr_buffers_size[nlist] &&
1456                              bdf_prm.b_un.lav_const * buffers_lav[nlist]*total_n_buffers < 
1457                              total_lav * (nr_buffers_size[nlist] - nr_buffers_st[nlist][BUF_SHARED]))
1458                                    if(shrink_specific_buffers(6, bufferindex_size[nlist])) 
1459                                             return 1;
1460                   }
1461         return 0;
1462 }
1463 
1464 /*
1465  * Try to free up some pages by shrinking the buffer-cache
1466  *
1467  * Priority tells the routine how hard to try to shrink the
1468  * buffers: 6 means "don't bother too much", while a value
1469  * of 0 means "we'd better get some free pages now".
1470  *
1471  * "limit" is meant to limit the shrink-action only to pages
1472  * that are in the 0 - limit address range, for DMA re-allocations.
1473  * We ignore that right now.
1474  */
1475 
1476 static int shrink_specific_buffers(unsigned int priority, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1477 {
1478         struct buffer_head *bh;
1479         int nlist;
1480         int i, isize, isize1;
1481 
1482 #ifdef DEBUG
1483         if(size) printk("Shrinking buffers of size %d\n", size);
1484 #endif
1485         /* First try the free lists, and see if we can get a complete page
1486            from here */
1487         isize1 = (size ? BUFSIZE_INDEX(size) : -1);
1488 
1489         for(isize = 0; isize<NR_SIZES; isize++){
1490                 if(isize1 != -1 && isize1 != isize) continue;
1491                 bh = free_list[isize];
1492                 if(!bh) continue;
1493                 for (i=0 ; !i || bh != free_list[isize]; bh = bh->b_next_free, i++) {
1494                         if (bh->b_count || buffer_protected(bh) ||
1495                             !bh->b_this_page)
1496                                  continue;
1497                         if (!age_of((unsigned long) bh->b_data) &&
1498                             try_to_free_buffer(bh, &bh, 6))
1499                                  return 1;
1500                         if(!bh) break;
1501                         /* Some interrupt must have used it after we
1502                            freed the page.  No big deal - keep looking */
1503                 }
1504         }
1505         
1506         /* Not enough in the free lists, now try the lru list */
1507         
1508         for(nlist = 0; nlist < NR_LIST; nlist++) {
1509         repeat1:
1510                 if(priority > 2 && nlist == BUF_SHARED) continue;
1511                 i = nr_buffers_type[nlist];
1512                 i = ((BUFFEROUT_WEIGHT * i) >> 10) >> priority;
1513                 for ( ; i > 0; i-- ) {
1514                         bh = next_to_age[nlist];
1515                         if (!bh)
1516                                 break;
1517                         next_to_age[nlist] = bh->b_next_free;
1518 
1519                         /* First, age the buffer. */
1520                         age_buffer(bh);
1521                         /* We may have stalled while waiting for I/O
1522                            to complete. */
1523                         if(bh->b_list != nlist) goto repeat1;
1524                         if (bh->b_count || buffer_protected(bh) ||
1525                             !bh->b_this_page)
1526                                  continue;
1527                         if(size && bh->b_size != size) continue;
1528                         if (buffer_locked(bh))
1529                                  if (priority)
1530                                           continue;
1531                                  else
1532                                           wait_on_buffer(bh);
1533                         if (buffer_dirty(bh)) {
1534                                 bh->b_count++;
1535                                 bh->b_flushtime = 0;
1536                                 ll_rw_block(WRITEA, 1, &bh);
1537                                 bh->b_count--;
1538                                 continue;
1539                         }
1540                         /* At priority 6, only consider really old
1541                            (age==0) buffers for reclaiming.  At
1542                            priority 0, consider any buffers. */
1543                         if ((age_of((unsigned long) bh->b_data) >>
1544                              (6-priority)) > 0)
1545                                 continue;                               
1546                         if (try_to_free_buffer(bh, &bh, 0))
1547                                  return 1;
1548                         if(!bh) break;
1549                 }
1550         }
1551         return 0;
1552 }
1553 
1554 
1555 /* ================== Debugging =================== */
1556 
1557 void show_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1558 {
1559         struct buffer_head * bh;
1560         int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
1561         int protected = 0;
1562         int shared;
1563         int nlist, isize;
1564 
1565         printk("Buffer memory:   %6dkB\n",buffermem>>10);
1566         printk("Buffer heads:    %6d\n",nr_buffer_heads);
1567         printk("Buffer blocks:   %6d\n",nr_buffers);
1568 
1569         for(nlist = 0; nlist < NR_LIST; nlist++) {
1570           shared = found = locked = dirty = used = lastused = protected = 0;
1571           bh = lru_list[nlist];
1572           if(!bh) continue;
1573           do {
1574                 found++;
1575                 if (buffer_locked(bh))
1576                         locked++;
1577                 if (buffer_protected(bh))
1578                         protected++;
1579                 if (buffer_dirty(bh))
1580                         dirty++;
1581                 if (mem_map[MAP_NR(((unsigned long) bh->b_data))].count != 1)
1582                         shared++;
1583                 if (bh->b_count)
1584                         used++, lastused = found;
1585                 bh = bh->b_next_free;
1586           } while (bh != lru_list[nlist]);
1587           printk("Buffer[%d] mem: %d buffers, %d used (last=%d), "
1588                  "%d locked, %d protected, %d dirty %d shrd\n",
1589                  nlist, found, used, lastused,
1590                  locked, protected, dirty, shared);
1591         };
1592         printk("Size    [LAV]     Free  Clean  Unshar     Lck    Lck1   Dirty  Shared \n");
1593         for(isize = 0; isize<NR_SIZES; isize++){
1594                 printk("%5d [%5d]: %7d ", bufferindex_size[isize],
1595                        buffers_lav[isize], nr_free[isize]);
1596                 for(nlist = 0; nlist < NR_LIST; nlist++)
1597                          printk("%7d ", nr_buffers_st[isize][nlist]);
1598                 printk("\n");
1599         }
1600 }
1601 
1602 
1603 /* ====================== Cluster patches for ext2 ==================== */
1604 
1605 /*
1606  * try_to_reassign() checks if all the buffers on this particular page
1607  * are unused, and reassign to a new cluster them if this is true.
1608  */
1609 static inline int try_to_reassign(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1610                            kdev_t dev, unsigned int starting_block)
1611 {
1612         unsigned long page;
1613         struct buffer_head * tmp, * p;
1614 
1615         *bhp = bh;
1616         page = (unsigned long) bh->b_data;
1617         page &= PAGE_MASK;
1618         if(mem_map[MAP_NR(page)].count != 1) return 0;
1619         tmp = bh;
1620         do {
1621                 if (!tmp)
1622                          return 0;
1623                 
1624                 if (tmp->b_count || buffer_protected(tmp) ||
1625                     buffer_dirty(tmp) || buffer_locked(tmp))
1626                          return 0;
1627                 tmp = tmp->b_this_page;
1628         } while (tmp != bh);
1629         tmp = bh;
1630         
1631         while((unsigned long) tmp->b_data & (PAGE_SIZE - 1)) 
1632                  tmp = tmp->b_this_page;
1633         
1634         /* This is the buffer at the head of the page */
1635         bh = tmp;
1636         do {
1637                 p = tmp;
1638                 tmp = tmp->b_this_page;
1639                 remove_from_queues(p);
1640                 p->b_dev = dev;
1641                 mark_buffer_uptodate(p, 0);
1642                 clear_bit(BH_Req, &p->b_state);
1643                 p->b_blocknr = starting_block++;
1644                 insert_into_queues(p);
1645         } while (tmp != bh);
1646         return 1;
1647 }
1648 
1649 /*
1650  * Try to find a free cluster by locating a page where
1651  * all of the buffers are unused.  We would like this function
1652  * to be atomic, so we do not call anything that might cause
1653  * the process to sleep.  The priority is somewhat similar to
1654  * the priority used in shrink_buffers.
1655  * 
1656  * My thinking is that the kernel should end up using whole
1657  * pages for the buffer cache as much of the time as possible.
1658  * This way the other buffers on a particular page are likely
1659  * to be very near each other on the free list, and we will not
1660  * be expiring data prematurely.  For now we only cannibalize buffers
1661  * of the same size to keep the code simpler.
1662  */
1663 static int reassign_cluster(kdev_t dev, 
     /* [previous][next][first][last][top][bottom][index][help] */
1664                      unsigned int starting_block, int size)
1665 {
1666         struct buffer_head *bh;
1667         int isize = BUFSIZE_INDEX(size);
1668         int i;
1669 
1670         /* We want to give ourselves a really good shot at generating
1671            a cluster, and since we only take buffers from the free
1672            list, we "overfill" it a little. */
1673 
1674         while(nr_free[isize] < 32) refill_freelist(size);
1675 
1676         bh = free_list[isize];
1677         if(bh)
1678                  for (i=0 ; !i || bh != free_list[isize] ; bh = bh->b_next_free, i++) {
1679                          if (!bh->b_this_page)  continue;
1680                          if (try_to_reassign(bh, &bh, dev, starting_block))
1681                                  return 4;
1682                  }
1683         return 0;
1684 }
1685 
1686 /* This function tries to generate a new cluster of buffers
1687  * from a new page in memory.  We should only do this if we have
1688  * not expanded the buffer cache to the maximum size that we allow.
1689  */
1690 static unsigned long try_to_generate_cluster(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1691 {
1692         struct buffer_head * bh, * tmp, * arr[MAX_BUF_PER_PAGE];
1693         int isize = BUFSIZE_INDEX(size);
1694         unsigned long offset;
1695         unsigned long page;
1696         int nblock;
1697 
1698         page = get_free_page(GFP_NOBUFFER);
1699         if(!page) return 0;
1700 
1701         bh = create_buffers(page, size);
1702         if (!bh) {
1703                 free_page(page);
1704                 return 0;
1705         };
1706         nblock = block;
1707         for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
1708                 if (find_buffer(dev, nblock++, size))
1709                          goto not_aligned;
1710         }
1711         tmp = bh;
1712         nblock = 0;
1713         while (1) {
1714                 arr[nblock++] = bh;
1715                 bh->b_count = 1;
1716                 bh->b_flushtime = 0;
1717                 bh->b_state = 0;
1718                 bh->b_dev = dev;
1719                 bh->b_list = BUF_CLEAN;
1720                 bh->b_blocknr = block++;
1721                 nr_buffers++;
1722                 nr_buffers_size[isize]++;
1723                 insert_into_queues(bh);
1724                 if (bh->b_this_page)
1725                         bh = bh->b_this_page;
1726                 else
1727                         break;
1728         }
1729         buffermem += PAGE_SIZE;
1730         mem_map[MAP_NR(page)].buffers = bh;
1731         bh->b_this_page = tmp;
1732         while (nblock-- > 0)
1733                 brelse(arr[nblock]);
1734         return 4; /* ?? */
1735 not_aligned:
1736         while ((tmp = bh) != NULL) {
1737                 bh = bh->b_this_page;
1738                 put_unused_buffer_head(tmp);
1739         }
1740         free_page(page);
1741         return 0;
1742 }
1743 
1744 unsigned long generate_cluster(kdev_t dev, int b[], int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1745 {
1746         int i, offset;
1747         
1748         for (i = 0, offset = 0 ; offset < PAGE_SIZE ; i++, offset += size) {
1749                 if(i && b[i]-1 != b[i-1]) return 0;  /* No need to cluster */
1750                 if(find_buffer(dev, b[i], size)) return 0;
1751         };
1752 
1753         /* OK, we have a candidate for a new cluster */
1754         
1755         /* See if one size of buffer is over-represented in the buffer cache,
1756            if so reduce the numbers of buffers */
1757         if(maybe_shrink_lav_buffers(size))
1758          {
1759                  int retval;
1760                  retval = try_to_generate_cluster(dev, b[0], size);
1761                  if(retval) return retval;
1762          };
1763         
1764         if (nr_free_pages > min_free_pages*2) 
1765                  return try_to_generate_cluster(dev, b[0], size);
1766         else
1767                  return reassign_cluster(dev, b[0], size);
1768 }
1769 
1770 
1771 /* ===================== Init ======================= */
1772 
1773 /*
1774  * This initializes the initial buffer free list.  nr_buffers_type is set
1775  * to one less the actual number of buffers, as a sop to backwards
1776  * compatibility --- the old code did this (I think unintentionally,
1777  * but I'm not sure), and programs in the ps package expect it.
1778  *                                      - TYT 8/30/92
1779  */
1780 void buffer_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1781 {
1782         int i;
1783         int isize = BUFSIZE_INDEX(BLOCK_SIZE);
1784         long memsize = MAP_NR(high_memory) << PAGE_SHIFT;
1785 
1786         if (memsize >= 4*1024*1024) {
1787                 if(memsize >= 16*1024*1024)
1788                          nr_hash = 16381;
1789                 else
1790                          nr_hash = 4093;
1791         } else {
1792                 nr_hash = 997;
1793         };
1794         
1795         hash_table = (struct buffer_head **) vmalloc(nr_hash * 
1796                                                      sizeof(struct buffer_head *));
1797 
1798 
1799         for (i = 0 ; i < nr_hash ; i++)
1800                 hash_table[i] = NULL;
1801         lru_list[BUF_CLEAN] = 0;
1802         grow_buffers(GFP_KERNEL, BLOCK_SIZE);
1803         if (!free_list[isize])
1804                 panic("VFS: Unable to initialize buffer free list!");
1805         return;
1806 }
1807 
1808 
1809 /* ====================== bdflush support =================== */
1810 
1811 /* This is a simple kernel daemon, whose job it is to provide a dynamic
1812  * response to dirty buffers.  Once this process is activated, we write back
1813  * a limited number of buffers to the disks and then go back to sleep again.
1814  */
1815 struct wait_queue * bdflush_wait = NULL;
1816 struct wait_queue * bdflush_done = NULL;
1817 
1818 static void wakeup_bdflush(int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
1819 {
1820         wake_up(&bdflush_wait);
1821         if(wait) sleep_on(&bdflush_done);
1822 }
1823 
1824 
1825 /* 
1826  * Here we attempt to write back old buffers.  We also try and flush inodes 
1827  * and supers as well, since this function is essentially "update", and 
1828  * otherwise there would be no way of ensuring that these quantities ever 
1829  * get written back.  Ideally, we would have a timestamp on the inodes
1830  * and superblocks so that we could write back only the old ones as well
1831  */
1832 
1833 asmlinkage int sync_old_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1834 {
1835         int i, isize;
1836         int ndirty, nwritten;
1837         int nlist;
1838         int ncount;
1839         struct buffer_head * bh, *next;
1840 
1841         sync_supers(0);
1842         sync_inodes(0);
1843 
1844         ncount = 0;
1845 #ifdef DEBUG
1846         for(nlist = 0; nlist < NR_LIST; nlist++)
1847 #else
1848         for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1849 #endif
1850         {
1851                 ndirty = 0;
1852                 nwritten = 0;
1853         repeat:
1854                 bh = lru_list[nlist];
1855                 if(bh) 
1856                          for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) {
1857                                  /* We may have stalled while waiting for I/O to complete. */
1858                                  if(bh->b_list != nlist) goto repeat;
1859                                  next = bh->b_next_free;
1860                                  if(!lru_list[nlist]) {
1861                                          printk("Dirty list empty %d\n", i);
1862                                          break;
1863                                  }
1864                                  
1865                                  /* Clean buffer on dirty list?  Refile it */
1866                                  if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
1867                                   {
1868                                           refile_buffer(bh);
1869                                           continue;
1870                                   }
1871                                  
1872                                  if (buffer_locked(bh) || !buffer_dirty(bh))
1873                                           continue;
1874                                  ndirty++;
1875                                  if(bh->b_flushtime > jiffies) continue;
1876                                  nwritten++;
1877                                  bh->b_count++;
1878                                  bh->b_flushtime = 0;
1879 #ifdef DEBUG
1880                                  if(nlist != BUF_DIRTY) ncount++;
1881 #endif
1882                                  ll_rw_block(WRITE, 1, &bh);
1883                                  bh->b_count--;
1884                          }
1885         }
1886 #ifdef DEBUG
1887         if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount);
1888         printk("Wrote %d/%d buffers\n", nwritten, ndirty);
1889 #endif
1890         
1891         /* We assume that we only come through here on a regular
1892            schedule, like every 5 seconds.  Now update load averages.  
1893            Shift usage counts to prevent overflow. */
1894         for(isize = 0; isize<NR_SIZES; isize++){
1895                 CALC_LOAD(buffers_lav[isize], bdf_prm.b_un.lav_const, buffer_usage[isize]);
1896                 buffer_usage[isize] = 0;
1897         };
1898         return 0;
1899 }
1900 
1901 
1902 /* This is the interface to bdflush.  As we get more sophisticated, we can
1903  * pass tuning parameters to this "process", to adjust how it behaves. 
1904  * We would want to verify each parameter, however, to make sure that it 
1905  * is reasonable. */
1906 
1907 asmlinkage int sys_bdflush(int func, long data)
     /* [previous][next][first][last][top][bottom][index][help] */
1908 {
1909         int i, error;
1910 
1911         if (!suser())
1912                 return -EPERM;
1913 
1914         if (func == 1)
1915                  return sync_old_buffers();
1916 
1917         /* Basically func 1 means read param 1, 2 means write param 1, etc */
1918         if (func >= 2) {
1919                 i = (func-2) >> 1;
1920                 if (i < 0 || i >= N_PARAM)
1921                         return -EINVAL;
1922                 if((func & 1) == 0) {
1923                         error = verify_area(VERIFY_WRITE, (void *) data, sizeof(int));
1924                         if (error)
1925                                 return error;
1926                         put_user(bdf_prm.data[i], (int*)data);
1927                         return 0;
1928                 };
1929                 if (data < bdflush_min[i] || data > bdflush_max[i])
1930                         return -EINVAL;
1931                 bdf_prm.data[i] = data;
1932                 return 0;
1933         };
1934 
1935         /* Having func 0 used to launch the actual bdflush and then never
1936         return (unless explicitly killed). We return zero here to 
1937         remain semi-compatible with present update(8) programs. */
1938 
1939         return 0;
1940 }
1941 
1942 /* This is the actual bdflush daemon itself. It used to be started from
1943  * the syscall above, but now we launch it ourselves internally with
1944  * kernel_thread(...)  directly after the first thread in init/main.c */
1945 
1946 int bdflush(void * unused) 
     /* [previous][next][first][last][top][bottom][index][help] */
1947 {
1948         int i;
1949         int ndirty;
1950         int nlist;
1951         int ncount;
1952         struct buffer_head * bh, *next;
1953 
1954         /*
1955          *      We have a bare-bones task_struct, and really should fill
1956          *      in a few more things so "top" and /proc/2/{exe,root,cwd}
1957          *      display semi-sane things. Not real crucial though...  
1958          */
1959 
1960         current->session = 1;
1961         current->pgrp = 1;
1962         sprintf(current->comm, "kflushd");
1963 
1964         /*
1965          *      As a kernel thread we want to tamper with system buffers
1966          *      and other internals and thus be subject to the SMP locking
1967          *      rules. (On a uniprocessor box this does nothing).
1968          */
1969          
1970 #ifdef __SMP__
1971         lock_kernel();
1972         syscall_count++;
1973 #endif
1974                  
1975         for (;;) {
1976 #ifdef DEBUG
1977                 printk("bdflush() activated...");
1978 #endif
1979                 
1980                 ncount = 0;
1981 #ifdef DEBUG
1982                 for(nlist = 0; nlist < NR_LIST; nlist++)
1983 #else
1984                 for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1985 #endif
1986                  {
1987                          ndirty = 0;
1988                  repeat:
1989                          bh = lru_list[nlist];
1990                          if(bh) 
1991                                   for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty; 
1992                                        bh = next) {
1993                                           /* We may have stalled while waiting for I/O to complete. */
1994                                           if(bh->b_list != nlist) goto repeat;
1995                                           next = bh->b_next_free;
1996                                           if(!lru_list[nlist]) {
1997                                                   printk("Dirty list empty %d\n", i);
1998                                                   break;
1999                                           }
2000                                           
2001                                           /* Clean buffer on dirty list?  Refile it */
2002                                           if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
2003                                            {
2004                                                    refile_buffer(bh);
2005                                                    continue;
2006                                            }
2007                                           
2008                                           if (buffer_locked(bh) || !buffer_dirty(bh))
2009                                                    continue;
2010                                           /* Should we write back buffers that are shared or not??
2011                                              currently dirty buffers are not shared, so it does not matter */
2012                                           bh->b_count++;
2013                                           ndirty++;
2014                                           bh->b_flushtime = 0;
2015                                           ll_rw_block(WRITE, 1, &bh);
2016 #ifdef DEBUG
2017                                           if(nlist != BUF_DIRTY) ncount++;
2018 #endif
2019                                           bh->b_count--;
2020                                   }
2021                  }
2022 #ifdef DEBUG
2023                 if (ncount) printk("sys_bdflush: %d dirty buffers not on dirty list\n", ncount);
2024                 printk("sleeping again.\n");
2025 #endif
2026                 wake_up(&bdflush_done);
2027                 
2028                 /* If there are still a lot of dirty buffers around, skip the sleep
2029                    and flush some more */
2030                 
2031                 if(nr_buffers_type[BUF_DIRTY] <= (nr_buffers - nr_buffers_type[BUF_SHARED]) * 
2032                    bdf_prm.b_un.nfract/100) {
2033                         current->signal = 0;
2034                         interruptible_sleep_on(&bdflush_wait);
2035                 }
2036         }
2037 }
2038 
2039 
2040 /*
2041  * Overrides for Emacs so that we follow Linus's tabbing style.
2042  * Emacs will notice this stuff at the end of the file and automatically
2043  * adjust the settings for this buffer only.  This must remain at the end
2044  * of the file.
2045  * ---------------------------------------------------------------------------
2046  * Local variables:
2047  * c-indent-level: 8
2048  * c-brace-imaginary-offset: 0
2049  * c-brace-offset: -8
2050  * c-argdecl-indent: 8
2051  * c-label-offset: -8
2052  * c-continued-statement-offset: 8
2053  * c-continued-brace-offset: 0
2054  * End:
2055  */

/* [previous][next][first][last][top][bottom][index][help] */