root/fs/buffer.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. __wait_on_buffer
  2. sync_buffers
  3. sync_dev
  4. fsync_dev
  5. sys_sync
  6. file_fsync
  7. sys_fsync
  8. sys_fdatasync
  9. invalidate_buffers
  10. remove_from_hash_queue
  11. remove_from_lru_list
  12. remove_from_free_list
  13. remove_from_queues
  14. put_last_lru
  15. put_last_free
  16. insert_into_queues
  17. find_buffer
  18. get_hash_table
  19. set_blocksize
  20. refill_freelist
  21. getblk
  22. set_writetime
  23. refile_buffer
  24. __brelse
  25. __bforget
  26. bread
  27. breada
  28. put_unused_buffer_head
  29. get_more_buffer_heads
  30. recover_reusable_buffer_heads
  31. get_unused_buffer_head
  32. create_buffers
  33. after_unlock_page
  34. free_async_buffers
  35. brw_page
  36. mark_buffer_uptodate
  37. unlock_buffer
  38. generic_readpage
  39. grow_buffers
  40. try_to_free_buffer
  41. age_buffer
  42. maybe_shrink_lav_buffers
  43. shrink_specific_buffers
  44. show_buffers
  45. try_to_reassign
  46. reassign_cluster
  47. try_to_generate_cluster
  48. generate_cluster
  49. buffer_init
  50. wakeup_bdflush
  51. sync_old_buffers
  52. sys_bdflush
  53. bdflush

   1 /*
   2  *  linux/fs/buffer.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  *  'buffer.c' implements the buffer-cache functions. Race-conditions have
   9  * been avoided by NEVER letting an interrupt change a buffer (except for the
  10  * data, of course), but instead letting the caller do it.
  11  */
  12 
  13 /*
  14  * NOTE! There is one discordant note here: checking floppies for
  15  * disk change. This is where it fits best, I think, as it should
  16  * invalidate changed floppy-disk-caches.
  17  */
  18  
  19 /* Some bdflush() changes for the dynamic ramdisk - Paul Gortmaker, 12/94 */
  20 
  21 #include <linux/sched.h>
  22 #include <linux/kernel.h>
  23 #include <linux/major.h>
  24 #include <linux/string.h>
  25 #include <linux/locks.h>
  26 #include <linux/errno.h>
  27 #include <linux/malloc.h>
  28 #include <linux/pagemap.h>
  29 #include <linux/swap.h>
  30 #include <linux/swapctl.h>
  31 #include <linux/smp.h>
  32 #include <linux/smp_lock.h>
  33 
  34 #include <asm/system.h>
  35 #include <asm/segment.h>
  36 #include <asm/io.h>
  37 
  38 #define NR_SIZES 5
  39 static char buffersize_index[17] =
  40 {-1,  0,  1, -1,  2, -1, -1, -1, 3, -1, -1, -1, -1, -1, -1, -1, 4};
  41 static short int bufferindex_size[NR_SIZES] = {512, 1024, 2048, 4096, 8192};
  42 
  43 #define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9])
  44 #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
  45 
  46 static int grow_buffers(int pri, int size);
  47 static int shrink_specific_buffers(unsigned int priority, int size);
  48 static int maybe_shrink_lav_buffers(int);
  49 
  50 static int nr_hash = 0;  /* Size of hash table */
  51 static struct buffer_head ** hash_table;
  52 static struct buffer_head * lru_list[NR_LIST] = {NULL, };
  53 /* next_to_age is an array of pointers into the lru lists, used to
  54    cycle through the buffers aging their contents when deciding which
  55    buffers to discard when more memory is needed */
  56 static struct buffer_head * next_to_age[NR_LIST] = {NULL, };
  57 static struct buffer_head * free_list[NR_SIZES] = {NULL, };
  58 
  59 static struct buffer_head * unused_list = NULL;
  60 struct buffer_head * reuse_list = NULL;
  61 static struct wait_queue * buffer_wait = NULL;
  62 
  63 int nr_buffers = 0;
  64 int nr_buffers_type[NR_LIST] = {0,};
  65 int nr_buffers_size[NR_SIZES] = {0,};
  66 int nr_buffers_st[NR_SIZES][NR_LIST] = {{0,},};
  67 int buffer_usage[NR_SIZES] = {0,};  /* Usage counts used to determine load average */
  68 int buffers_lav[NR_SIZES] = {0,};  /* Load average of buffer usage */
  69 int nr_free[NR_SIZES] = {0,};
  70 int buffermem = 0;
  71 int nr_buffer_heads = 0;
  72 extern int *blksize_size[];
  73 
  74 /* Here is the parameter block for the bdflush process. If you add or
  75  * remove any of the parameters, make sure to update kernel/sysctl.c.
  76  */
  77 
  78 static void wakeup_bdflush(int);
  79 
  80 #define N_PARAM 9
  81 #define LAV
  82 
  83 union bdflush_param{
  84         struct {
  85                 int nfract;  /* Percentage of buffer cache dirty to 
  86                                 activate bdflush */
  87                 int ndirty;  /* Maximum number of dirty blocks to write out per
  88                                 wake-cycle */
  89                 int nrefill; /* Number of clean buffers to try and obtain
  90                                 each time we call refill */
  91                 int nref_dirt; /* Dirty buffer threshold for activating bdflush
  92                                   when trying to refill buffers. */
  93                 int clu_nfract;  /* Percentage of buffer cache to scan to 
  94                                     search for free clusters */
  95                 int age_buffer;  /* Time for normal buffer to age before 
  96                                     we flush it */
  97                 int age_super;  /* Time for superblock to age before we 
  98                                    flush it */
  99                 int lav_const;  /* Constant used for load average (time
 100                                    constant */
 101                 int lav_ratio;  /* Used to determine how low a lav for a
 102                                    particular size can go before we start to
 103                                    trim back the buffers */
 104         } b_un;
 105         unsigned int data[N_PARAM];
 106 } bdf_prm = {{60, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}};
 107 
 108 /* The lav constant is set for 1 minute, as long as the update process runs
 109    every 5 seconds.  If you change the frequency of update, the time
 110    constant will also change. */
 111 
 112 
 113 /* These are the min and max parameter values that we will allow to be assigned */
 114 int bdflush_min[N_PARAM] = {  0,  10,    5,   25,  0,   100,   100, 1, 1};
 115 int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 60000, 60000, 2047, 5};
 116 
 117 /*
 118  * Rewrote the wait-routines to use the "new" wait-queue functionality,
 119  * and getting rid of the cli-sti pairs. The wait-queue routines still
 120  * need cli-sti, but now it's just a couple of 386 instructions or so.
 121  *
 122  * Note that the real wait_on_buffer() is an inline function that checks
 123  * if 'b_wait' is set before calling this, so that the queues aren't set
 124  * up unnecessarily.
 125  */
 126 void __wait_on_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 127 {
 128         struct wait_queue wait = { current, NULL };
 129 
 130         bh->b_count++;
 131         add_wait_queue(&bh->b_wait, &wait);
 132 repeat:
 133         run_task_queue(&tq_disk);
 134         current->state = TASK_UNINTERRUPTIBLE;
 135         if (buffer_locked(bh)) {
 136                 schedule();
 137                 goto repeat;
 138         }
 139         remove_wait_queue(&bh->b_wait, &wait);
 140         bh->b_count--;
 141         current->state = TASK_RUNNING;
 142 }
 143 
 144 /* Call sync_buffers with wait!=0 to ensure that the call does not
 145    return until all buffer writes have completed.  Sync() may return
 146    before the writes have finished; fsync() may not. */
 147 
 148 
 149 /* Godamity-damn.  Some buffers (bitmaps for filesystems)
 150    spontaneously dirty themselves without ever brelse being called.
 151    We will ultimately want to put these in a separate list, but for
 152    now we search all of the lists for dirty buffers */
 153 
 154 static int sync_buffers(kdev_t dev, int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 155 {
 156         int i, retry, pass = 0, err = 0;
 157         int nlist, ncount;
 158         struct buffer_head * bh, *next;
 159 
 160         /* One pass for no-wait, three for wait:
 161            0) write out all dirty, unlocked buffers;
 162            1) write out all dirty buffers, waiting if locked;
 163            2) wait for completion by waiting for all buffers to unlock. */
 164  repeat:
 165         retry = 0;
 166  repeat2:
 167         ncount = 0;
 168         /* We search all lists as a failsafe mechanism, not because we expect
 169            there to be dirty buffers on any of the other lists. */
 170         for(nlist = 0; nlist < NR_LIST; nlist++)
 171          {
 172          repeat1:
 173                  bh = lru_list[nlist];
 174                  if(!bh) continue;
 175                  for (i = nr_buffers_type[nlist]*2 ; i-- > 0 ; bh = next) {
 176                          if(bh->b_list != nlist) goto repeat1;
 177                          next = bh->b_next_free;
 178                          if(!lru_list[nlist]) break;
 179                          if (dev && bh->b_dev != dev)
 180                                   continue;
 181                          if (buffer_locked(bh))
 182                           {
 183                                   /* Buffer is locked; skip it unless wait is
 184                                      requested AND pass > 0. */
 185                                   if (!wait || !pass) {
 186                                           retry = 1;
 187                                           continue;
 188                                   }
 189                                   wait_on_buffer (bh);
 190                                   goto repeat2;
 191                           }
 192                          /* If an unlocked buffer is not uptodate, there has
 193                              been an IO error. Skip it. */
 194                          if (wait && buffer_req(bh) && !buffer_locked(bh) &&
 195                              !buffer_dirty(bh) && !buffer_uptodate(bh)) {
 196                                   err = 1;
 197                                   continue;
 198                           }
 199                          /* Don't write clean buffers.  Don't write ANY buffers
 200                             on the third pass. */
 201                          if (!buffer_dirty(bh) || pass>=2)
 202                                   continue;
 203                          /* don't bother about locked buffers */
 204                          if (buffer_locked(bh))
 205                                  continue;
 206                          bh->b_count++;
 207                          bh->b_flushtime = 0;
 208                          ll_rw_block(WRITE, 1, &bh);
 209 
 210                          if(nlist != BUF_DIRTY) { 
 211                                  printk("[%d %s %ld] ", nlist,
 212                                         kdevname(bh->b_dev), bh->b_blocknr);
 213                                  ncount++;
 214                          };
 215                          bh->b_count--;
 216                          retry = 1;
 217                  }
 218          }
 219         if (ncount)
 220           printk("sys_sync: %d dirty buffers not on dirty list\n", ncount);
 221         
 222         /* If we are waiting for the sync to succeed, and if any dirty
 223            blocks were written, then repeat; on the second pass, only
 224            wait for buffers being written (do not pass to write any
 225            more buffers on the second pass). */
 226         if (wait && retry && ++pass<=2)
 227                  goto repeat;
 228         return err;
 229 }
 230 
 231 void sync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 232 {
 233         sync_buffers(dev, 0);
 234         sync_supers(dev);
 235         sync_inodes(dev);
 236         sync_buffers(dev, 0);
 237         sync_dquots(dev, -1);
 238 }
 239 
 240 int fsync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 241 {
 242         sync_buffers(dev, 0);
 243         sync_supers(dev);
 244         sync_inodes(dev);
 245         sync_dquots(dev, -1);
 246         return sync_buffers(dev, 1);
 247 }
 248 
 249 asmlinkage int sys_sync(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 250 {
 251         fsync_dev(0);
 252         return 0;
 253 }
 254 
 255 int file_fsync (struct inode *inode, struct file *filp)
     /* [previous][next][first][last][top][bottom][index][help] */
 256 {
 257         return fsync_dev(inode->i_dev);
 258 }
 259 
 260 asmlinkage int sys_fsync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 261 {
 262         struct file * file;
 263         struct inode * inode;
 264 
 265         if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
 266                 return -EBADF;
 267         if (!file->f_op || !file->f_op->fsync)
 268                 return -EINVAL;
 269         if (file->f_op->fsync(inode,file))
 270                 return -EIO;
 271         return 0;
 272 }
 273 
 274 asmlinkage int sys_fdatasync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 275 {
 276         struct file * file;
 277         struct inode * inode;
 278 
 279         if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
 280                 return -EBADF;
 281         if (!file->f_op || !file->f_op->fsync)
 282                 return -EINVAL;
 283         /* this needs further work, at the moment it is identical to fsync() */
 284         if (file->f_op->fsync(inode,file))
 285                 return -EIO;
 286         return 0;
 287 }
 288 
 289 void invalidate_buffers(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 290 {
 291         int i;
 292         int nlist;
 293         struct buffer_head * bh;
 294 
 295         for(nlist = 0; nlist < NR_LIST; nlist++) {
 296                 bh = lru_list[nlist];
 297                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bh->b_next_free) {
 298                         if (bh->b_dev != dev)
 299                                 continue;
 300                         wait_on_buffer(bh);
 301                         if (bh->b_dev != dev)
 302                                 continue;
 303                         if (bh->b_count)
 304                                 continue;
 305                         bh->b_flushtime = 0;
 306                         clear_bit(BH_Protected, &bh->b_state);
 307                         clear_bit(BH_Uptodate, &bh->b_state);
 308                         clear_bit(BH_Dirty, &bh->b_state);
 309                         clear_bit(BH_Req, &bh->b_state);
 310                 }
 311         }
 312 }
 313 
 314 #define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block))%nr_hash)
 315 #define hash(dev,block) hash_table[_hashfn(dev,block)]
 316 
 317 static inline void remove_from_hash_queue(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 318 {
 319         if (bh->b_next)
 320                 bh->b_next->b_prev = bh->b_prev;
 321         if (bh->b_prev)
 322                 bh->b_prev->b_next = bh->b_next;
 323         if (hash(bh->b_dev,bh->b_blocknr) == bh)
 324                 hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
 325         bh->b_next = bh->b_prev = NULL;
 326 }
 327 
 328 static inline void remove_from_lru_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 329 {
 330         if (!(bh->b_prev_free) || !(bh->b_next_free))
 331                 panic("VFS: LRU block list corrupted");
 332         if (bh->b_dev == B_FREE)
 333                 panic("LRU list corrupted");
 334         bh->b_prev_free->b_next_free = bh->b_next_free;
 335         bh->b_next_free->b_prev_free = bh->b_prev_free;
 336 
 337         if (lru_list[bh->b_list] == bh)
 338                  lru_list[bh->b_list] = bh->b_next_free;
 339         if (lru_list[bh->b_list] == bh)
 340                  lru_list[bh->b_list] = NULL;
 341         if (next_to_age[bh->b_list] == bh)
 342                 next_to_age[bh->b_list] = bh->b_next_free;
 343         if (next_to_age[bh->b_list] == bh)
 344                 next_to_age[bh->b_list] = NULL;
 345 
 346         bh->b_next_free = bh->b_prev_free = NULL;
 347 }
 348 
 349 static inline void remove_from_free_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 350 {
 351         int isize = BUFSIZE_INDEX(bh->b_size);
 352         if (!(bh->b_prev_free) || !(bh->b_next_free))
 353                 panic("VFS: Free block list corrupted");
 354         if(bh->b_dev != B_FREE)
 355                 panic("Free list corrupted");
 356         if(!free_list[isize])
 357                 panic("Free list empty");
 358         nr_free[isize]--;
 359         if(bh->b_next_free == bh)
 360                  free_list[isize] = NULL;
 361         else {
 362                 bh->b_prev_free->b_next_free = bh->b_next_free;
 363                 bh->b_next_free->b_prev_free = bh->b_prev_free;
 364                 if (free_list[isize] == bh)
 365                          free_list[isize] = bh->b_next_free;
 366         }
 367         bh->b_next_free = bh->b_prev_free = NULL;
 368 }
 369 
 370 static inline void remove_from_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 371 {
 372         if(bh->b_dev == B_FREE) {
 373                 remove_from_free_list(bh); /* Free list entries should not be
 374                                               in the hash queue */
 375                 return;
 376         };
 377         nr_buffers_type[bh->b_list]--;
 378         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]--;
 379         remove_from_hash_queue(bh);
 380         remove_from_lru_list(bh);
 381 }
 382 
 383 static inline void put_last_lru(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 384 {
 385         if (!bh)
 386                 return;
 387         if (bh == lru_list[bh->b_list]) {
 388                 lru_list[bh->b_list] = bh->b_next_free;
 389                 if (next_to_age[bh->b_list] == bh)
 390                         next_to_age[bh->b_list] = bh->b_next_free;
 391                 return;
 392         }
 393         if(bh->b_dev == B_FREE)
 394                 panic("Wrong block for lru list");
 395         remove_from_lru_list(bh);
 396 /* add to back of free list */
 397 
 398         if(!lru_list[bh->b_list]) {
 399                 lru_list[bh->b_list] = bh;
 400                 lru_list[bh->b_list]->b_prev_free = bh;
 401         }
 402         if (!next_to_age[bh->b_list])
 403                 next_to_age[bh->b_list] = bh;
 404 
 405         bh->b_next_free = lru_list[bh->b_list];
 406         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 407         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 408         lru_list[bh->b_list]->b_prev_free = bh;
 409 }
 410 
 411 static inline void put_last_free(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 412 {
 413         int isize;
 414         if (!bh)
 415                 return;
 416 
 417         isize = BUFSIZE_INDEX(bh->b_size);      
 418         bh->b_dev = B_FREE;  /* So it is obvious we are on the free list */
 419         /* add to back of free list */
 420         if(!free_list[isize]) {
 421                 free_list[isize] = bh;
 422                 bh->b_prev_free = bh;
 423         };
 424 
 425         nr_free[isize]++;
 426         bh->b_next_free = free_list[isize];
 427         bh->b_prev_free = free_list[isize]->b_prev_free;
 428         free_list[isize]->b_prev_free->b_next_free = bh;
 429         free_list[isize]->b_prev_free = bh;
 430 }
 431 
 432 static inline void insert_into_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 433 {
 434         /* put at end of free list */
 435         if(bh->b_dev == B_FREE) {
 436                 put_last_free(bh);
 437                 return;
 438         }
 439         if(!lru_list[bh->b_list]) {
 440                 lru_list[bh->b_list] = bh;
 441                 bh->b_prev_free = bh;
 442         }
 443         if (!next_to_age[bh->b_list])
 444                 next_to_age[bh->b_list] = bh;
 445         if (bh->b_next_free) panic("VFS: buffer LRU pointers corrupted");
 446         bh->b_next_free = lru_list[bh->b_list];
 447         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 448         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 449         lru_list[bh->b_list]->b_prev_free = bh;
 450         nr_buffers_type[bh->b_list]++;
 451         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]++;
 452 /* put the buffer in new hash-queue if it has a device */
 453         bh->b_prev = NULL;
 454         bh->b_next = NULL;
 455         if (!(bh->b_dev))
 456                 return;
 457         bh->b_next = hash(bh->b_dev,bh->b_blocknr);
 458         hash(bh->b_dev,bh->b_blocknr) = bh;
 459         if (bh->b_next)
 460                 bh->b_next->b_prev = bh;
 461 }
 462 
 463 static inline struct buffer_head * find_buffer(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 464 {               
 465         struct buffer_head * tmp;
 466 
 467         for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
 468                 if (tmp->b_blocknr == block && tmp->b_dev == dev)
 469                         if (tmp->b_size == size)
 470                                 return tmp;
 471                         else {
 472                                 printk("VFS: Wrong blocksize on device %s\n",
 473                                         kdevname(dev));
 474                                 return NULL;
 475                         }
 476         return NULL;
 477 }
 478 
 479 /*
 480  * Why like this, I hear you say... The reason is race-conditions.
 481  * As we don't lock buffers (unless we are reading them, that is),
 482  * something might happen to it while we sleep (ie a read-error
 483  * will force it bad). This shouldn't really happen currently, but
 484  * the code is ready.
 485  */
 486 struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 487 {
 488         struct buffer_head * bh;
 489 
 490         for (;;) {
 491                 if (!(bh=find_buffer(dev,block,size)))
 492                         return NULL;
 493                 bh->b_count++;
 494                 wait_on_buffer(bh);
 495                 if (bh->b_dev == dev && bh->b_blocknr == block
 496                                              && bh->b_size == size)
 497                         return bh;
 498                 bh->b_count--;
 499         }
 500 }
 501 
 502 void set_blocksize(kdev_t dev, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 503 {
 504         int i, nlist;
 505         struct buffer_head * bh, *bhnext;
 506 
 507         if (!blksize_size[MAJOR(dev)])
 508                 return;
 509 
 510         if (size > PAGE_SIZE)
 511                 size = 0;
 512 
 513         switch (size) {
 514                 default: panic("Invalid blocksize passed to set_blocksize");
 515                 case 512: case 1024: case 2048: case 4096: case 8192: ;
 516         }
 517 
 518         if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
 519                 blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 520                 return;
 521         }
 522         if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
 523                 return;
 524         sync_buffers(dev, 2);
 525         blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 526 
 527   /* We need to be quite careful how we do this - we are moving entries
 528      around on the free list, and we can get in a loop if we are not careful.*/
 529 
 530         for(nlist = 0; nlist < NR_LIST; nlist++) {
 531                 bh = lru_list[nlist];
 532                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) {
 533                         if(!bh) break;
 534                         bhnext = bh->b_next_free; 
 535                         if (bh->b_dev != dev)
 536                                  continue;
 537                         if (bh->b_size == size)
 538                                  continue;
 539                         
 540                         wait_on_buffer(bh);
 541                         if (bh->b_dev == dev && bh->b_size != size) {
 542                                 clear_bit(BH_Dirty, &bh->b_state);
 543                                 clear_bit(BH_Uptodate, &bh->b_state);
 544                                 clear_bit(BH_Req, &bh->b_state);
 545                                 bh->b_flushtime = 0;
 546                         }
 547                         remove_from_hash_queue(bh);
 548                 }
 549         }
 550 }
 551 
 552 #define BADNESS(bh) (buffer_dirty(bh) || buffer_locked(bh))
 553 
 554 void refill_freelist(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 555 {
 556         struct buffer_head * bh, * tmp;
 557         struct buffer_head * candidate[NR_LIST];
 558         unsigned int best_time, winner;
 559         int isize = BUFSIZE_INDEX(size);
 560         int buffers[NR_LIST];
 561         int i;
 562         int needed;
 563 
 564         /* First see if we even need this.  Sometimes it is advantageous
 565          to request some blocks in a filesystem that we know that we will
 566          be needing ahead of time. */
 567 
 568         if (nr_free[isize] > 100)
 569                 return;
 570 
 571         /* If there are too many dirty buffers, we wake up the update process
 572            now so as to ensure that there are still clean buffers available
 573            for user processes to use (and dirty) */
 574         
 575         /* We are going to try and locate this much memory */
 576         needed =bdf_prm.b_un.nrefill * size;  
 577 
 578         while (nr_free_pages > min_free_pages*2 && needed > 0 &&
 579                grow_buffers(GFP_BUFFER, size)) {
 580                 needed -= PAGE_SIZE;
 581         }
 582 
 583         if(needed <= 0) return;
 584 
 585         /* See if there are too many buffers of a different size.
 586            If so, victimize them */
 587 
 588         while(maybe_shrink_lav_buffers(size))
 589          {
 590                  if(!grow_buffers(GFP_BUFFER, size)) break;
 591                  needed -= PAGE_SIZE;
 592                  if(needed <= 0) return;
 593          };
 594 
 595         /* OK, we cannot grow the buffer cache, now try and get some
 596            from the lru list */
 597 
 598         /* First set the candidate pointers to usable buffers.  This
 599            should be quick nearly all of the time. */
 600 
 601 repeat0:
 602         for(i=0; i<NR_LIST; i++){
 603                 if(i == BUF_DIRTY || i == BUF_SHARED || 
 604                    nr_buffers_type[i] == 0) {
 605                         candidate[i] = NULL;
 606                         buffers[i] = 0;
 607                         continue;
 608                 }
 609                 buffers[i] = nr_buffers_type[i];
 610                 for (bh = lru_list[i]; buffers[i] > 0; bh = tmp, buffers[i]--)
 611                  {
 612                          if(buffers[i] < 0) panic("Here is the problem");
 613                          tmp = bh->b_next_free;
 614                          if (!bh) break;
 615                          
 616                          if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 617                              buffer_dirty(bh)) {
 618                                  refile_buffer(bh);
 619                                  continue;
 620                          }
 621                          
 622                          if (bh->b_count || buffer_protected(bh) || bh->b_size != size)
 623                                   continue;
 624                          
 625                          /* Buffers are written in the order they are placed 
 626                             on the locked list. If we encounter a locked
 627                             buffer here, this means that the rest of them
 628                             are also locked */
 629                          if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 630                                  buffers[i] = 0;
 631                                  break;
 632                          }
 633                          
 634                          if (BADNESS(bh)) continue;
 635                          break;
 636                  };
 637                 if(!buffers[i]) candidate[i] = NULL; /* Nothing on this list */
 638                 else candidate[i] = bh;
 639                 if(candidate[i] && candidate[i]->b_count) panic("Here is the problem");
 640         }
 641         
 642  repeat:
 643         if(needed <= 0) return;
 644         
 645         /* Now see which candidate wins the election */
 646         
 647         winner = best_time = UINT_MAX;  
 648         for(i=0; i<NR_LIST; i++){
 649                 if(!candidate[i]) continue;
 650                 if(candidate[i]->b_lru_time < best_time){
 651                         best_time = candidate[i]->b_lru_time;
 652                         winner = i;
 653                 }
 654         }
 655         
 656         /* If we have a winner, use it, and then get a new candidate from that list */
 657         if(winner != UINT_MAX) {
 658                 i = winner;
 659                 bh = candidate[i];
 660                 candidate[i] = bh->b_next_free;
 661                 if(candidate[i] == bh) candidate[i] = NULL;  /* Got last one */
 662                 if (bh->b_count || bh->b_size != size)
 663                          panic("Busy buffer in candidate list\n");
 664                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1)
 665                          panic("Shared buffer in candidate list\n");
 666                 if (buffer_protected(bh))
 667                         panic("Protected buffer in candidate list\n");
 668                 if (BADNESS(bh)) panic("Buffer in candidate list with BADNESS != 0\n");
 669                 
 670                 if(bh->b_dev == B_FREE)
 671                         panic("Wrong list");
 672                 remove_from_queues(bh);
 673                 bh->b_dev = B_FREE;
 674                 put_last_free(bh);
 675                 needed -= bh->b_size;
 676                 buffers[i]--;
 677                 if(buffers[i] < 0) panic("Here is the problem");
 678                 
 679                 if(buffers[i] == 0) candidate[i] = NULL;
 680                 
 681                 /* Now all we need to do is advance the candidate pointer
 682                    from the winner list to the next usable buffer */
 683                 if(candidate[i] && buffers[i] > 0){
 684                         if(buffers[i] <= 0) panic("Here is another problem");
 685                         for (bh = candidate[i]; buffers[i] > 0; bh = tmp, buffers[i]--) {
 686                                 if(buffers[i] < 0) panic("Here is the problem");
 687                                 tmp = bh->b_next_free;
 688                                 if (!bh) break;
 689                                 
 690                                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 691                                     buffer_dirty(bh)) {
 692                                         refile_buffer(bh);
 693                                         continue;
 694                                 };
 695                                 
 696                                 if (bh->b_count || buffer_protected(bh) || bh->b_size != size)
 697                                          continue;
 698                                 
 699                                 /* Buffers are written in the order they are
 700                                    placed on the locked list.  If we encounter
 701                                    a locked buffer here, this means that the
 702                                    rest of them are also locked */
 703                                 if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 704                                         buffers[i] = 0;
 705                                         break;
 706                                 }
 707               
 708                                 if (BADNESS(bh)) continue;
 709                                 break;
 710                         };
 711                         if(!buffers[i]) candidate[i] = NULL; /* Nothing here */
 712                         else candidate[i] = bh;
 713                         if(candidate[i] && candidate[i]->b_count) 
 714                                  panic("Here is the problem");
 715                 }
 716                 
 717                 goto repeat;
 718         }
 719         
 720         if(needed <= 0) return;
 721         
 722         /* Too bad, that was not enough. Try a little harder to grow some. */
 723         
 724         if (grow_buffers(GFP_ATOMIC, size)) {
 725                 needed -= PAGE_SIZE;
 726                 goto repeat0;
 727         }
 728         wakeup_bdflush(1);
 729         goto repeat0;
 730 }
 731 
 732 /*
 733  * Ok, this is getblk, and it isn't very clear, again to hinder
 734  * race-conditions. Most of the code is seldom used, (ie repeating),
 735  * so it should be much more efficient than it looks.
 736  *
 737  * The algorithm is changed: hopefully better, and an elusive bug removed.
 738  *
 739  * 14.02.92: changed it to sync dirty buffers a bit: better performance
 740  * when the filesystem starts to get full of dirty blocks (I hope).
 741  */
 742 struct buffer_head * getblk(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 743 {
 744         struct buffer_head * bh;
 745         int isize = BUFSIZE_INDEX(size);
 746 
 747         /* Update this for the buffer size lav. */
 748         buffer_usage[isize]++;
 749 
 750         /* If there are too many dirty buffers, we wake up the update process
 751            now so as to ensure that there are still clean buffers available
 752            for user processes to use (and dirty) */
 753 repeat:
 754         bh = get_hash_table(dev, block, size);
 755         if (bh) {
 756                 if (!buffer_dirty(bh)) {
 757                         if (buffer_uptodate(bh))
 758                                  put_last_lru(bh);
 759                         bh->b_flushtime = 0;
 760                 }
 761                 set_bit(BH_Touched, &bh->b_state);
 762                 return bh;
 763         }
 764 
 765         while(!free_list[isize]) refill_freelist(size);
 766         
 767         if (find_buffer(dev,block,size))
 768                  goto repeat;
 769 
 770         bh = free_list[isize];
 771         remove_from_free_list(bh);
 772 
 773 /* OK, FINALLY we know that this buffer is the only one of its kind, */
 774 /* and that it's unused (b_count=0), unlocked (buffer_locked=0), and clean */
 775         bh->b_count=1;
 776         bh->b_flushtime=0;
 777         bh->b_state=(1<<BH_Touched);
 778         bh->b_dev=dev;
 779         bh->b_blocknr=block;
 780         insert_into_queues(bh);
 781         return bh;
 782 }
 783 
 784 void set_writetime(struct buffer_head * buf, int flag)
     /* [previous][next][first][last][top][bottom][index][help] */
 785 {
 786         int newtime;
 787 
 788         if (buffer_dirty(buf)) {
 789                 /* Move buffer to dirty list if jiffies is clear */
 790                 newtime = jiffies + (flag ? bdf_prm.b_un.age_super : 
 791                                      bdf_prm.b_un.age_buffer);
 792                 if(!buf->b_flushtime || buf->b_flushtime > newtime)
 793                          buf->b_flushtime = newtime;
 794         } else {
 795                 buf->b_flushtime = 0;
 796         }
 797 }
 798 
 799 
 800 /*
 801  * A buffer may need to be moved from one buffer list to another
 802  * (e.g. in case it is not shared any more). Handle this.
 803  */
 804 void refile_buffer(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 805 {
 806         int dispose;
 807 
 808         if(buf->b_dev == B_FREE) {
 809                 printk("Attempt to refile free buffer\n");
 810                 return;
 811         }
 812         if (buffer_dirty(buf))
 813                 dispose = BUF_DIRTY;
 814         else if ((mem_map[MAP_NR((unsigned long) buf->b_data)].count > 1) || buffer_protected(buf))
 815                 dispose = BUF_SHARED;
 816         else if (buffer_locked(buf))
 817                 dispose = BUF_LOCKED;
 818         else if (buf->b_list == BUF_SHARED)
 819                 dispose = BUF_UNSHARED;
 820         else
 821                 dispose = BUF_CLEAN;
 822         if(dispose == BUF_CLEAN) buf->b_lru_time = jiffies;
 823         if(dispose != buf->b_list)  {
 824                 if(dispose == BUF_DIRTY || dispose == BUF_UNSHARED)
 825                          buf->b_lru_time = jiffies;
 826                 if(dispose == BUF_LOCKED && 
 827                    (buf->b_flushtime - buf->b_lru_time) <= bdf_prm.b_un.age_super)
 828                          dispose = BUF_LOCKED1;
 829                 remove_from_queues(buf);
 830                 buf->b_list = dispose;
 831                 insert_into_queues(buf);
 832                 if(dispose == BUF_DIRTY && nr_buffers_type[BUF_DIRTY] > 
 833                    (nr_buffers - nr_buffers_type[BUF_SHARED]) *
 834                    bdf_prm.b_un.nfract/100)
 835                          wakeup_bdflush(0);
 836         }
 837 }
 838 
 839 /*
 840  * Release a buffer head
 841  */
 842 void __brelse(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 843 {
 844         wait_on_buffer(buf);
 845 
 846         /* If dirty, mark the time this buffer should be written back */
 847         set_writetime(buf, 0);
 848         refile_buffer(buf);
 849 
 850         if (buf->b_count) {
 851                 buf->b_count--;
 852                 return;
 853         }
 854         printk("VFS: brelse: Trying to free free buffer\n");
 855 }
 856 
 857 /*
 858  * bforget() is like brelse(), except it removes the buffer
 859  * from the hash-queues (so that it won't be re-used if it's
 860  * shared).
 861  */
 862 void __bforget(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 863 {
 864         wait_on_buffer(buf);
 865         mark_buffer_clean(buf);
 866         clear_bit(BH_Protected, &buf->b_state);
 867         buf->b_count--;
 868         remove_from_hash_queue(buf);
 869         buf->b_dev = NODEV;
 870         refile_buffer(buf);
 871 }
 872 
 873 /*
 874  * bread() reads a specified block and returns the buffer that contains
 875  * it. It returns NULL if the block was unreadable.
 876  */
 877 struct buffer_head * bread(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 878 {
 879         struct buffer_head * bh;
 880 
 881         if (!(bh = getblk(dev, block, size))) {
 882                 printk("VFS: bread: READ error on device %s\n",
 883                         kdevname(dev));
 884                 return NULL;
 885         }
 886         if (buffer_uptodate(bh))
 887                 return bh;
 888         ll_rw_block(READ, 1, &bh);
 889         wait_on_buffer(bh);
 890         if (buffer_uptodate(bh))
 891                 return bh;
 892         brelse(bh);
 893         return NULL;
 894 }
 895 
 896 /*
 897  * Ok, breada can be used as bread, but additionally to mark other
 898  * blocks for reading as well. End the argument list with a negative
 899  * number.
 900  */
 901 
 902 #define NBUF 16
 903 
 904 struct buffer_head * breada(kdev_t dev, int block, int bufsize,
     /* [previous][next][first][last][top][bottom][index][help] */
 905         unsigned int pos, unsigned int filesize)
 906 {
 907         struct buffer_head * bhlist[NBUF];
 908         unsigned int blocks;
 909         struct buffer_head * bh;
 910         int index;
 911         int i, j;
 912 
 913         if (pos >= filesize)
 914                 return NULL;
 915 
 916         if (block < 0 || !(bh = getblk(dev,block,bufsize)))
 917                 return NULL;
 918 
 919         index = BUFSIZE_INDEX(bh->b_size);
 920 
 921         if (buffer_uptodate(bh))
 922                 return bh;
 923 
 924         blocks = ((filesize & (bufsize - 1)) - (pos & (bufsize - 1))) >> (9+index);
 925 
 926         if (blocks > (read_ahead[MAJOR(dev)] >> index))
 927                 blocks = read_ahead[MAJOR(dev)] >> index;
 928         if (blocks > NBUF)
 929                 blocks = NBUF;
 930         
 931         bhlist[0] = bh;
 932         j = 1;
 933         for(i=1; i<blocks; i++) {
 934                 bh = getblk(dev,block+i,bufsize);
 935                 if (buffer_uptodate(bh)) {
 936                         brelse(bh);
 937                         break;
 938                 }
 939                 bhlist[j++] = bh;
 940         }
 941 
 942         /* Request the read for these buffers, and then release them */
 943         ll_rw_block(READ, j, bhlist);
 944 
 945         for(i=1; i<j; i++)
 946                 brelse(bhlist[i]);
 947 
 948         /* Wait for this buffer, and then continue on */
 949         bh = bhlist[0];
 950         wait_on_buffer(bh);
 951         if (buffer_uptodate(bh))
 952                 return bh;
 953         brelse(bh);
 954         return NULL;
 955 }
 956 
 957 /*
 958  * See fs/inode.c for the weird use of volatile..
 959  */
 960 static void put_unused_buffer_head(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 961 {
 962         struct wait_queue * wait;
 963 
 964         wait = ((volatile struct buffer_head *) bh)->b_wait;
 965         memset(bh,0,sizeof(*bh));
 966         ((volatile struct buffer_head *) bh)->b_wait = wait;
 967         bh->b_next_free = unused_list;
 968         unused_list = bh;
 969         wake_up(&buffer_wait);
 970 }
 971 
 972 static void get_more_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 973 {
 974         int i;
 975         struct buffer_head * bh;
 976 
 977         for (;;) {
 978                 if (unused_list)
 979                         return;
 980 
 981                 /*
 982                  * This is critical.  We can't swap out pages to get
 983                  * more buffer heads, because the swap-out may need
 984                  * more buffer-heads itself.  Thus GFP_ATOMIC.
 985                  */
 986                 bh = (struct buffer_head *) get_free_page(GFP_ATOMIC);
 987                 if (bh)
 988                         break;
 989 
 990                 /*
 991                  * Uhhuh. We're _really_ low on memory. Now we just
 992                  * wait for old buffer heads to become free due to
 993                  * finishing IO..
 994                  */
 995                 run_task_queue(&tq_disk);
 996                 sleep_on(&buffer_wait);
 997         }
 998 
 999         for (nr_buffer_heads+=i=PAGE_SIZE/sizeof*bh ; i>0; i--) {
1000                 bh->b_next_free = unused_list;  /* only make link */
1001                 unused_list = bh++;
1002         }
1003 }
1004 
1005 /* 
1006  * We can't put completed temporary IO buffer_heads directly onto the
1007  * unused_list when they become unlocked, since the device driver
1008  * end_request routines still expect access to the buffer_head's
1009  * fields after the final unlock.  So, the device driver puts them on
1010  * the reuse_list instead once IO completes, and we recover these to
1011  * the unused_list here.
1012  *
1013  * The reuse_list receives buffers from interrupt routines, so we need
1014  * to be IRQ-safe here (but note that interrupts only _add_ to the
1015  * reuse_list, never take away. So we don't need to worry about the
1016  * reuse_list magically emptying).
1017  */
1018 static inline void recover_reusable_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1019 {
1020         if (reuse_list) {
1021                 struct buffer_head *bh;
1022                 unsigned long flags;
1023         
1024                 save_flags(flags);
1025                 do {
1026                         cli();
1027                         bh = reuse_list;
1028                         reuse_list = bh->b_next_free;
1029                         restore_flags(flags);
1030                         put_unused_buffer_head(bh);
1031                 } while (reuse_list);
1032         }
1033 }
1034 
1035 static struct buffer_head * get_unused_buffer_head(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1036 {
1037         struct buffer_head * bh;
1038 
1039         recover_reusable_buffer_heads();
1040         get_more_buffer_heads();
1041         if (!unused_list)
1042                 return NULL;
1043         bh = unused_list;
1044         unused_list = bh->b_next_free;
1045         bh->b_next_free = NULL;
1046         bh->b_data = NULL;
1047         bh->b_size = 0;
1048         bh->b_state = 0;
1049         return bh;
1050 }
1051 
1052 /*
1053  * Create the appropriate buffers when given a page for data area and
1054  * the size of each buffer.. Use the bh->b_this_page linked list to
1055  * follow the buffers created.  Return NULL if unable to create more
1056  * buffers.
1057  */
1058 static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
1059 {
1060         struct buffer_head *bh, *head;
1061         unsigned long offset;
1062 
1063         head = NULL;
1064         offset = PAGE_SIZE;
1065         while ((offset -= size) < PAGE_SIZE) {
1066                 bh = get_unused_buffer_head();
1067                 if (!bh)
1068                         goto no_grow;
1069                 bh->b_this_page = head;
1070                 head = bh;
1071                 bh->b_data = (char *) (page+offset);
1072                 bh->b_size = size;
1073                 bh->b_dev = B_FREE;  /* Flag as unused */
1074         }
1075         return head;
1076 /*
1077  * In case anything failed, we just free everything we got.
1078  */
1079 no_grow:
1080         bh = head;
1081         while (bh) {
1082                 head = bh;
1083                 bh = bh->b_this_page;
1084                 put_unused_buffer_head(head);
1085         }
1086         return NULL;
1087 }
1088 
1089 /* Run the hooks that have to be done when a page I/O has completed. */
1090 static inline void after_unlock_page (struct page * page)
     /* [previous][next][first][last][top][bottom][index][help] */
1091 {
1092         if (clear_bit(PG_decr_after, &page->flags))
1093                 nr_async_pages--;
1094         if (clear_bit(PG_free_after, &page->flags))
1095                 free_page(page_address(page));
1096         if (clear_bit(PG_swap_unlock_after, &page->flags))
1097                 swap_after_unlock_page(page->swap_unlock_entry);
1098 }
1099 
1100 /* Free all temporary buffers belonging to a page. */
1101 static inline void free_async_buffers (struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
1102 {
1103         struct buffer_head * tmp;
1104         unsigned long flags;
1105 
1106         tmp = bh;
1107         save_flags(flags);
1108         cli();
1109         do {
1110                 if (!test_bit(BH_FreeOnIO, &tmp->b_state)) {
1111                         printk ("Whoops: unlock_buffer: "
1112                                 "async IO mismatch on page.\n");
1113                         restore_flags(flags);
1114                         return;
1115                 }
1116                 tmp->b_next_free = reuse_list;
1117                 reuse_list = tmp;
1118                 clear_bit(BH_FreeOnIO, &tmp->b_state);
1119                 tmp = tmp->b_this_page;
1120         } while (tmp != bh);
1121         restore_flags(flags);
1122 }
1123 
1124 /*
1125  * Start I/O on a page.
1126  * This function expects the page to be locked and may return before I/O is complete.
1127  * You then have to check page->locked, page->uptodate, and maybe wait on page->wait.
1128  */
1129 int brw_page(int rw, unsigned long address, kdev_t dev, int b[], int size, int bmap)
     /* [previous][next][first][last][top][bottom][index][help] */
1130 {
1131         struct buffer_head *bh, *prev, *next, *arr[MAX_BUF_PER_PAGE];
1132         int block, nr;
1133         struct page *page;
1134 
1135         page = mem_map + MAP_NR(address);
1136         if (!PageLocked(page))
1137                 panic("brw_page: page not locked for I/O");
1138         clear_bit(PG_uptodate, &page->flags);
1139         /*
1140          * Allocate buffer heads pointing to this page, just for I/O.
1141          * They do _not_ show up in the buffer hash table!
1142          * They are _not_ registered in page->buffers either!
1143          */
1144         bh = create_buffers(address, size);
1145         if (!bh) {
1146                 clear_bit(PG_locked, &page->flags);
1147                 wake_up(&page->wait);
1148                 return -ENOMEM;
1149         }
1150         nr = 0;
1151         next = bh;
1152         do {
1153                 struct buffer_head * tmp;
1154                 block = *(b++);
1155 
1156                 set_bit(BH_FreeOnIO, &next->b_state);
1157                 next->b_list = BUF_CLEAN;
1158                 next->b_dev = dev;
1159                 next->b_blocknr = block;
1160                 next->b_count = 1;
1161                 next->b_flushtime = 0;
1162                 set_bit(BH_Uptodate, &next->b_state);
1163 
1164                 /* When we use bmap, we define block zero to represent
1165                    a hole.  ll_rw_page, however, may legitimately
1166                    access block zero, and we need to distinguish the
1167                    two cases. 
1168                    */
1169                 if (bmap && !block) {
1170                         memset(next->b_data, 0, size);
1171                         next->b_count--;
1172                         continue;
1173                 }
1174                 tmp = get_hash_table(dev, block, size);
1175                 if (tmp) {
1176                         if (!buffer_uptodate(tmp)) {
1177                                 if (rw == READ)
1178                                         ll_rw_block(READ, 1, &tmp);
1179                                 wait_on_buffer(tmp);
1180                         }
1181                         if (rw == READ) 
1182                                 memcpy(next->b_data, tmp->b_data, size);
1183                         else {
1184                                 memcpy(tmp->b_data, next->b_data, size);
1185                                 mark_buffer_dirty(tmp, 0);
1186                         }
1187                         brelse(tmp);
1188                         next->b_count--;
1189                         continue;
1190                 }
1191                 if (rw == READ)
1192                         clear_bit(BH_Uptodate, &next->b_state);
1193                 else
1194                         set_bit(BH_Dirty, &next->b_state);
1195                 arr[nr++] = next;
1196         } while (prev = next, (next = next->b_this_page) != NULL);
1197         prev->b_this_page = bh;
1198         
1199         if (nr) {
1200                 ll_rw_block(rw, nr, arr);
1201                 /* The rest of the work is done in mark_buffer_uptodate()
1202                  * and unlock_buffer(). */
1203         } else {
1204                 clear_bit(PG_locked, &page->flags);
1205                 set_bit(PG_uptodate, &page->flags);
1206                 wake_up(&page->wait);
1207                 free_async_buffers(bh);
1208                 after_unlock_page(page);
1209         }
1210         ++current->maj_flt;
1211         return 0;
1212 }
1213 
1214 /*
1215  * This is called by end_request() when I/O has completed.
1216  */
1217 void mark_buffer_uptodate(struct buffer_head * bh, int on)
     /* [previous][next][first][last][top][bottom][index][help] */
1218 {
1219         if (on) {
1220                 struct buffer_head *tmp = bh;
1221                 int page_uptodate = 1;
1222                 set_bit(BH_Uptodate, &bh->b_state);
1223                 /* If a page has buffers and all these buffers are uptodate,
1224                  * then the page is uptodate. */
1225                 do {
1226                         if (!test_bit(BH_Uptodate, &tmp->b_state)) {
1227                                 page_uptodate = 0;
1228                                 break;
1229                         }
1230                         tmp=tmp->b_this_page;
1231                 } while (tmp && tmp != bh);
1232                 if (page_uptodate)
1233                         set_bit(PG_uptodate, &mem_map[MAP_NR(bh->b_data)].flags);
1234         } else
1235                 clear_bit(BH_Uptodate, &bh->b_state);
1236 }
1237 
1238 /*
1239  * This is called by end_request() when I/O has completed.
1240  */
1241 void unlock_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
1242 {
1243         struct buffer_head *tmp;
1244         struct page *page;
1245 
1246         clear_bit(BH_Lock, &bh->b_state);
1247         wake_up(&bh->b_wait);
1248 
1249         if (!test_bit(BH_FreeOnIO, &bh->b_state))
1250                 return;
1251         /* This is a temporary buffer used for page I/O. */
1252         page = mem_map + MAP_NR(bh->b_data);
1253         if (!PageLocked(page)) {
1254                 printk ("Whoops: unlock_buffer: "
1255                         "async io complete on unlocked page\n");
1256                 return;
1257         }
1258         if (bh->b_count != 1) {
1259                 printk ("Whoops: unlock_buffer: b_count != 1 on async io.\n");
1260                 return;
1261         }
1262         /* Async buffer_heads are here only as labels for IO, and get
1263            thrown away once the IO for this page is complete.  IO is
1264            deemed complete once all buffers have been visited
1265            (b_count==0) and are now unlocked. */
1266         bh->b_count--;
1267         for (tmp = bh; tmp=tmp->b_this_page, tmp!=bh; ) {
1268                 if (test_bit(BH_Lock, &tmp->b_state) || tmp->b_count)
1269                         return;
1270         }
1271         /* OK, the async IO on this page is complete. */
1272         clear_bit(PG_locked, &page->flags);
1273         wake_up(&page->wait);
1274         free_async_buffers(bh);
1275         after_unlock_page(page);
1276         wake_up(&buffer_wait);
1277 }
1278 
1279 /*
1280  * Generic "readpage" function for block devices that have the normal
1281  * bmap functionality. This is most of the block device filesystems.
1282  * Reads the page asynchronously --- the unlock_buffer() and
1283  * mark_buffer_uptodate() functions propagate buffer state into the
1284  * page struct once IO has completed.
1285  */
1286 int generic_readpage(struct inode * inode, struct page * page)
     /* [previous][next][first][last][top][bottom][index][help] */
1287 {
1288         unsigned long block, address;
1289         int *p, nr[PAGE_SIZE/512];
1290         int i;
1291 
1292         address = page_address(page);
1293         page->count++;
1294         set_bit(PG_locked, &page->flags);
1295         set_bit(PG_free_after, &page->flags);
1296         
1297         i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
1298         block = page->offset >> inode->i_sb->s_blocksize_bits;
1299         p = nr;
1300         do {
1301                 *p = inode->i_op->bmap(inode, block);
1302                 i--;
1303                 block++;
1304                 p++;
1305         } while (i > 0);
1306 
1307         /* IO start */
1308         brw_page(READ, address, inode->i_dev, nr, inode->i_sb->s_blocksize, 1);
1309         return 0;
1310 }
1311 
1312 /*
1313  * Try to increase the number of buffers available: the size argument
1314  * is used to determine what kind of buffers we want.
1315  */
1316 static int grow_buffers(int pri, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1317 {
1318         unsigned long page;
1319         struct buffer_head *bh, *tmp;
1320         struct buffer_head * insert_point;
1321         int isize;
1322 
1323         if ((size & 511) || (size > PAGE_SIZE)) {
1324                 printk("VFS: grow_buffers: size = %d\n",size);
1325                 return 0;
1326         }
1327 
1328         isize = BUFSIZE_INDEX(size);
1329 
1330         if (!(page = __get_free_page(pri)))
1331                 return 0;
1332         bh = create_buffers(page, size);
1333         if (!bh) {
1334                 free_page(page);
1335                 return 0;
1336         }
1337 
1338         insert_point = free_list[isize];
1339 
1340         tmp = bh;
1341         while (1) {
1342                 nr_free[isize]++;
1343                 if (insert_point) {
1344                         tmp->b_next_free = insert_point->b_next_free;
1345                         tmp->b_prev_free = insert_point;
1346                         insert_point->b_next_free->b_prev_free = tmp;
1347                         insert_point->b_next_free = tmp;
1348                 } else {
1349                         tmp->b_prev_free = tmp;
1350                         tmp->b_next_free = tmp;
1351                 }
1352                 insert_point = tmp;
1353                 ++nr_buffers;
1354                 if (tmp->b_this_page)
1355                         tmp = tmp->b_this_page;
1356                 else
1357                         break;
1358         }
1359         free_list[isize] = bh;
1360         mem_map[MAP_NR(page)].buffers = bh;
1361         tmp->b_this_page = bh;
1362         buffermem += PAGE_SIZE;
1363         return 1;
1364 }
1365 
1366 
1367 /* =========== Reduce the buffer memory ============= */
1368 
1369 /*
1370  * try_to_free_buffer() checks if all the buffers on this particular page
1371  * are unused, and free's the page if so.
1372  */
1373 int try_to_free_buffer(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1374                        int priority)
1375 {
1376         unsigned long page;
1377         struct buffer_head * tmp, * p;
1378         int isize = BUFSIZE_INDEX(bh->b_size);
1379 
1380         *bhp = bh;
1381         page = (unsigned long) bh->b_data;
1382         page &= PAGE_MASK;
1383         tmp = bh;
1384         do {
1385                 if (!tmp)
1386                         return 0;
1387                 if (tmp->b_count || buffer_protected(tmp) ||
1388                     buffer_dirty(tmp) || buffer_locked(tmp) || tmp->b_wait)
1389                         return 0;
1390                 if (priority && buffer_touched(tmp))
1391                         return 0;
1392                 tmp = tmp->b_this_page;
1393         } while (tmp != bh);
1394         tmp = bh;
1395         do {
1396                 p = tmp;
1397                 tmp = tmp->b_this_page;
1398                 nr_buffers--;
1399                 nr_buffers_size[isize]--;
1400                 if (p == *bhp)
1401                   {
1402                     *bhp = p->b_prev_free;
1403                     if (p == *bhp) /* Was this the last in the list? */
1404                       *bhp = NULL;
1405                   }
1406                 remove_from_queues(p);
1407                 put_unused_buffer_head(p);
1408         } while (tmp != bh);
1409         buffermem -= PAGE_SIZE;
1410         mem_map[MAP_NR(page)].buffers = NULL;
1411         free_page(page);
1412         return !mem_map[MAP_NR(page)].count;
1413 }
1414 
1415 /* Age buffers on a given page, according to whether they have been
1416    visited recently or not. */
1417 static inline void age_buffer(struct buffer_head *bh)
     /* [previous][next][first][last][top][bottom][index][help] */
1418 {
1419         struct buffer_head *tmp = bh;
1420         int touched = 0;
1421 
1422         /*
1423          * When we age a page, we mark all other buffers in the page
1424          * with the "has_aged" flag.  Then, when these aliased buffers
1425          * come up for aging, we skip them until next pass.  This
1426          * ensures that a page full of multiple buffers only gets aged
1427          * once per pass through the lru lists. 
1428          */
1429         if (clear_bit(BH_Has_aged, &bh->b_state))
1430                 return;
1431         
1432         do {
1433                 touched |= clear_bit(BH_Touched, &tmp->b_state);
1434                 tmp = tmp->b_this_page;
1435                 set_bit(BH_Has_aged, &tmp->b_state);
1436         } while (tmp != bh);
1437         clear_bit(BH_Has_aged, &bh->b_state);
1438 
1439         if (touched) 
1440                 touch_page(mem_map + MAP_NR((unsigned long) bh->b_data));
1441         else
1442                 age_page(mem_map + MAP_NR((unsigned long) bh->b_data));
1443 }
1444 
1445 /*
1446  * Consult the load average for buffers and decide whether or not
1447  * we should shrink the buffers of one size or not.  If we decide yes,
1448  * do it and return 1.  Else return 0.  Do not attempt to shrink size
1449  * that is specified.
1450  *
1451  * I would prefer not to use a load average, but the way things are now it
1452  * seems unavoidable.  The way to get rid of it would be to force clustering
1453  * universally, so that when we reclaim buffers we always reclaim an entire
1454  * page.  Doing this would mean that we all need to move towards QMAGIC.
1455  */
1456 
1457 static int maybe_shrink_lav_buffers(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1458 {          
1459         int nlist;
1460         int isize;
1461         int total_lav, total_n_buffers, n_sizes;
1462         
1463         /* Do not consider the shared buffers since they would not tend
1464            to have getblk called very often, and this would throw off
1465            the lav.  They are not easily reclaimable anyway (let the swapper
1466            make the first move). */
1467   
1468         total_lav = total_n_buffers = n_sizes = 0;
1469         for(nlist = 0; nlist < NR_SIZES; nlist++)
1470          {
1471                  total_lav += buffers_lav[nlist];
1472                  if(nr_buffers_size[nlist]) n_sizes++;
1473                  total_n_buffers += nr_buffers_size[nlist];
1474                  total_n_buffers -= nr_buffers_st[nlist][BUF_SHARED]; 
1475          }
1476         
1477         /* See if we have an excessive number of buffers of a particular
1478            size - if so, victimize that bunch. */
1479   
1480         isize = (size ? BUFSIZE_INDEX(size) : -1);
1481         
1482         if (n_sizes > 1)
1483                  for(nlist = 0; nlist < NR_SIZES; nlist++)
1484                   {
1485                           if(nlist == isize) continue;
1486                           if(nr_buffers_size[nlist] &&
1487                              bdf_prm.b_un.lav_const * buffers_lav[nlist]*total_n_buffers < 
1488                              total_lav * (nr_buffers_size[nlist] - nr_buffers_st[nlist][BUF_SHARED]))
1489                                    if(shrink_specific_buffers(6, bufferindex_size[nlist])) 
1490                                             return 1;
1491                   }
1492         return 0;
1493 }
1494 
1495 /*
1496  * Try to free up some pages by shrinking the buffer-cache
1497  *
1498  * Priority tells the routine how hard to try to shrink the
1499  * buffers: 6 means "don't bother too much", while a value
1500  * of 0 means "we'd better get some free pages now".
1501  *
1502  * "limit" is meant to limit the shrink-action only to pages
1503  * that are in the 0 - limit address range, for DMA re-allocations.
1504  * We ignore that right now.
1505  */
1506 
1507 static int shrink_specific_buffers(unsigned int priority, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1508 {
1509         struct buffer_head *bh;
1510         int nlist;
1511         int i, isize, isize1;
1512 
1513 #ifdef DEBUG
1514         if(size) printk("Shrinking buffers of size %d\n", size);
1515 #endif
1516         /* First try the free lists, and see if we can get a complete page
1517            from here */
1518         isize1 = (size ? BUFSIZE_INDEX(size) : -1);
1519 
1520         for(isize = 0; isize<NR_SIZES; isize++){
1521                 if(isize1 != -1 && isize1 != isize) continue;
1522                 bh = free_list[isize];
1523                 if(!bh) continue;
1524                 for (i=0 ; !i || bh != free_list[isize]; bh = bh->b_next_free, i++) {
1525                         if (bh->b_count || buffer_protected(bh) ||
1526                             !bh->b_this_page)
1527                                  continue;
1528                         if (!age_of((unsigned long) bh->b_data) &&
1529                             try_to_free_buffer(bh, &bh, 6))
1530                                  return 1;
1531                         if(!bh) break;
1532                         /* Some interrupt must have used it after we
1533                            freed the page.  No big deal - keep looking */
1534                 }
1535         }
1536         
1537         /* Not enough in the free lists, now try the lru list */
1538         
1539         for(nlist = 0; nlist < NR_LIST; nlist++) {
1540         repeat1:
1541                 if(priority > 2 && nlist == BUF_SHARED) continue;
1542                 i = nr_buffers_type[nlist];
1543                 i = ((BUFFEROUT_WEIGHT * i) >> 10) >> priority;
1544                 for ( ; i > 0; i-- ) {
1545                         bh = next_to_age[nlist];
1546                         if (!bh)
1547                                 break;
1548                         next_to_age[nlist] = bh->b_next_free;
1549 
1550                         /* First, age the buffer. */
1551                         age_buffer(bh);
1552                         /* We may have stalled while waiting for I/O
1553                            to complete. */
1554                         if(bh->b_list != nlist) goto repeat1;
1555                         if (bh->b_count || buffer_protected(bh) ||
1556                             !bh->b_this_page)
1557                                  continue;
1558                         if(size && bh->b_size != size) continue;
1559                         if (buffer_locked(bh))
1560                                  if (priority)
1561                                           continue;
1562                                  else
1563                                           wait_on_buffer(bh);
1564                         if (buffer_dirty(bh)) {
1565                                 bh->b_count++;
1566                                 bh->b_flushtime = 0;
1567                                 ll_rw_block(WRITEA, 1, &bh);
1568                                 bh->b_count--;
1569                                 continue;
1570                         }
1571                         /* At priority 6, only consider really old
1572                            (age==0) buffers for reclaiming.  At
1573                            priority 0, consider any buffers. */
1574                         if ((age_of((unsigned long) bh->b_data) >>
1575                              (6-priority)) > 0)
1576                                 continue;                               
1577                         if (try_to_free_buffer(bh, &bh, 0))
1578                                  return 1;
1579                         if(!bh) break;
1580                 }
1581         }
1582         return 0;
1583 }
1584 
1585 
1586 /* ================== Debugging =================== */
1587 
1588 void show_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1589 {
1590         struct buffer_head * bh;
1591         int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
1592         int protected = 0;
1593         int shared;
1594         int nlist, isize;
1595 
1596         printk("Buffer memory:   %6dkB\n",buffermem>>10);
1597         printk("Buffer heads:    %6d\n",nr_buffer_heads);
1598         printk("Buffer blocks:   %6d\n",nr_buffers);
1599 
1600         for(nlist = 0; nlist < NR_LIST; nlist++) {
1601           shared = found = locked = dirty = used = lastused = protected = 0;
1602           bh = lru_list[nlist];
1603           if(!bh) continue;
1604           do {
1605                 found++;
1606                 if (buffer_locked(bh))
1607                         locked++;
1608                 if (buffer_protected(bh))
1609                         protected++;
1610                 if (buffer_dirty(bh))
1611                         dirty++;
1612                 if (mem_map[MAP_NR(((unsigned long) bh->b_data))].count != 1)
1613                         shared++;
1614                 if (bh->b_count)
1615                         used++, lastused = found;
1616                 bh = bh->b_next_free;
1617           } while (bh != lru_list[nlist]);
1618           printk("Buffer[%d] mem: %d buffers, %d used (last=%d), "
1619                  "%d locked, %d protected, %d dirty %d shrd\n",
1620                  nlist, found, used, lastused,
1621                  locked, protected, dirty, shared);
1622         };
1623         printk("Size    [LAV]     Free  Clean  Unshar     Lck    Lck1   Dirty  Shared \n");
1624         for(isize = 0; isize<NR_SIZES; isize++){
1625                 printk("%5d [%5d]: %7d ", bufferindex_size[isize],
1626                        buffers_lav[isize], nr_free[isize]);
1627                 for(nlist = 0; nlist < NR_LIST; nlist++)
1628                          printk("%7d ", nr_buffers_st[isize][nlist]);
1629                 printk("\n");
1630         }
1631 }
1632 
1633 
1634 /* ====================== Cluster patches for ext2 ==================== */
1635 
1636 /*
1637  * try_to_reassign() checks if all the buffers on this particular page
1638  * are unused, and reassign to a new cluster them if this is true.
1639  */
1640 static inline int try_to_reassign(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1641                            kdev_t dev, unsigned int starting_block)
1642 {
1643         unsigned long page;
1644         struct buffer_head * tmp, * p;
1645 
1646         *bhp = bh;
1647         page = (unsigned long) bh->b_data;
1648         page &= PAGE_MASK;
1649         if(mem_map[MAP_NR(page)].count != 1) return 0;
1650         tmp = bh;
1651         do {
1652                 if (!tmp)
1653                          return 0;
1654                 
1655                 if (tmp->b_count || buffer_protected(tmp) ||
1656                     buffer_dirty(tmp) || buffer_locked(tmp))
1657                          return 0;
1658                 tmp = tmp->b_this_page;
1659         } while (tmp != bh);
1660         tmp = bh;
1661         
1662         while((unsigned long) tmp->b_data & (PAGE_SIZE - 1)) 
1663                  tmp = tmp->b_this_page;
1664         
1665         /* This is the buffer at the head of the page */
1666         bh = tmp;
1667         do {
1668                 p = tmp;
1669                 tmp = tmp->b_this_page;
1670                 remove_from_queues(p);
1671                 p->b_dev = dev;
1672                 mark_buffer_uptodate(p, 0);
1673                 clear_bit(BH_Req, &p->b_state);
1674                 p->b_blocknr = starting_block++;
1675                 insert_into_queues(p);
1676         } while (tmp != bh);
1677         return 1;
1678 }
1679 
1680 /*
1681  * Try to find a free cluster by locating a page where
1682  * all of the buffers are unused.  We would like this function
1683  * to be atomic, so we do not call anything that might cause
1684  * the process to sleep.  The priority is somewhat similar to
1685  * the priority used in shrink_buffers.
1686  * 
1687  * My thinking is that the kernel should end up using whole
1688  * pages for the buffer cache as much of the time as possible.
1689  * This way the other buffers on a particular page are likely
1690  * to be very near each other on the free list, and we will not
1691  * be expiring data prematurely.  For now we only cannibalize buffers
1692  * of the same size to keep the code simpler.
1693  */
1694 static int reassign_cluster(kdev_t dev, 
     /* [previous][next][first][last][top][bottom][index][help] */
1695                      unsigned int starting_block, int size)
1696 {
1697         struct buffer_head *bh;
1698         int isize = BUFSIZE_INDEX(size);
1699         int i;
1700 
1701         /* We want to give ourselves a really good shot at generating
1702            a cluster, and since we only take buffers from the free
1703            list, we "overfill" it a little. */
1704 
1705         while(nr_free[isize] < 32) refill_freelist(size);
1706 
1707         bh = free_list[isize];
1708         if(bh)
1709                  for (i=0 ; !i || bh != free_list[isize] ; bh = bh->b_next_free, i++) {
1710                          if (!bh->b_this_page)  continue;
1711                          if (try_to_reassign(bh, &bh, dev, starting_block))
1712                                  return 4;
1713                  }
1714         return 0;
1715 }
1716 
1717 /* This function tries to generate a new cluster of buffers
1718  * from a new page in memory.  We should only do this if we have
1719  * not expanded the buffer cache to the maximum size that we allow.
1720  */
1721 static unsigned long try_to_generate_cluster(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1722 {
1723         struct buffer_head * bh, * tmp, * arr[MAX_BUF_PER_PAGE];
1724         int isize = BUFSIZE_INDEX(size);
1725         unsigned long offset;
1726         unsigned long page;
1727         int nblock;
1728 
1729         page = get_free_page(GFP_NOBUFFER);
1730         if(!page) return 0;
1731 
1732         bh = create_buffers(page, size);
1733         if (!bh) {
1734                 free_page(page);
1735                 return 0;
1736         };
1737         nblock = block;
1738         for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
1739                 if (find_buffer(dev, nblock++, size))
1740                          goto not_aligned;
1741         }
1742         tmp = bh;
1743         nblock = 0;
1744         while (1) {
1745                 arr[nblock++] = bh;
1746                 bh->b_count = 1;
1747                 bh->b_flushtime = 0;
1748                 bh->b_state = 0;
1749                 bh->b_dev = dev;
1750                 bh->b_list = BUF_CLEAN;
1751                 bh->b_blocknr = block++;
1752                 nr_buffers++;
1753                 nr_buffers_size[isize]++;
1754                 insert_into_queues(bh);
1755                 if (bh->b_this_page)
1756                         bh = bh->b_this_page;
1757                 else
1758                         break;
1759         }
1760         buffermem += PAGE_SIZE;
1761         mem_map[MAP_NR(page)].buffers = bh;
1762         bh->b_this_page = tmp;
1763         while (nblock-- > 0)
1764                 brelse(arr[nblock]);
1765         return 4; /* ?? */
1766 not_aligned:
1767         while ((tmp = bh) != NULL) {
1768                 bh = bh->b_this_page;
1769                 put_unused_buffer_head(tmp);
1770         }
1771         free_page(page);
1772         return 0;
1773 }
1774 
1775 unsigned long generate_cluster(kdev_t dev, int b[], int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1776 {
1777         int i, offset;
1778         
1779         for (i = 0, offset = 0 ; offset < PAGE_SIZE ; i++, offset += size) {
1780                 if(i && b[i]-1 != b[i-1]) return 0;  /* No need to cluster */
1781                 if(find_buffer(dev, b[i], size)) return 0;
1782         };
1783 
1784         /* OK, we have a candidate for a new cluster */
1785         
1786         /* See if one size of buffer is over-represented in the buffer cache,
1787            if so reduce the numbers of buffers */
1788         if(maybe_shrink_lav_buffers(size))
1789          {
1790                  int retval;
1791                  retval = try_to_generate_cluster(dev, b[0], size);
1792                  if(retval) return retval;
1793          };
1794         
1795         if (nr_free_pages > min_free_pages*2) 
1796                  return try_to_generate_cluster(dev, b[0], size);
1797         else
1798                  return reassign_cluster(dev, b[0], size);
1799 }
1800 
1801 
1802 /* ===================== Init ======================= */
1803 
1804 /*
1805  * This initializes the initial buffer free list.  nr_buffers_type is set
1806  * to one less the actual number of buffers, as a sop to backwards
1807  * compatibility --- the old code did this (I think unintentionally,
1808  * but I'm not sure), and programs in the ps package expect it.
1809  *                                      - TYT 8/30/92
1810  */
1811 void buffer_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1812 {
1813         int i;
1814         int isize = BUFSIZE_INDEX(BLOCK_SIZE);
1815         long memsize = MAP_NR(high_memory) << PAGE_SHIFT;
1816 
1817         if (memsize >= 4*1024*1024) {
1818                 if(memsize >= 16*1024*1024)
1819                          nr_hash = 16381;
1820                 else
1821                          nr_hash = 4093;
1822         } else {
1823                 nr_hash = 997;
1824         };
1825         
1826         hash_table = (struct buffer_head **) vmalloc(nr_hash * 
1827                                                      sizeof(struct buffer_head *));
1828 
1829 
1830         for (i = 0 ; i < nr_hash ; i++)
1831                 hash_table[i] = NULL;
1832         lru_list[BUF_CLEAN] = 0;
1833         grow_buffers(GFP_KERNEL, BLOCK_SIZE);
1834         if (!free_list[isize])
1835                 panic("VFS: Unable to initialize buffer free list!");
1836         return;
1837 }
1838 
1839 
1840 /* ====================== bdflush support =================== */
1841 
1842 /* This is a simple kernel daemon, whose job it is to provide a dynamic
1843  * response to dirty buffers.  Once this process is activated, we write back
1844  * a limited number of buffers to the disks and then go back to sleep again.
1845  */
1846 struct wait_queue * bdflush_wait = NULL;
1847 struct wait_queue * bdflush_done = NULL;
1848 
1849 static void wakeup_bdflush(int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
1850 {
1851         wake_up(&bdflush_wait);
1852         if (wait) {
1853                 run_task_queue(&tq_disk);
1854                 sleep_on(&bdflush_done);
1855         }
1856 }
1857 
1858 
1859 /* 
1860  * Here we attempt to write back old buffers.  We also try and flush inodes 
1861  * and supers as well, since this function is essentially "update", and 
1862  * otherwise there would be no way of ensuring that these quantities ever 
1863  * get written back.  Ideally, we would have a timestamp on the inodes
1864  * and superblocks so that we could write back only the old ones as well
1865  */
1866 
1867 asmlinkage int sync_old_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1868 {
1869         int i, isize;
1870         int ndirty, nwritten;
1871         int nlist;
1872         int ncount;
1873         struct buffer_head * bh, *next;
1874 
1875         sync_supers(0);
1876         sync_inodes(0);
1877 
1878         ncount = 0;
1879 #ifdef DEBUG
1880         for(nlist = 0; nlist < NR_LIST; nlist++)
1881 #else
1882         for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1883 #endif
1884         {
1885                 ndirty = 0;
1886                 nwritten = 0;
1887         repeat:
1888                 bh = lru_list[nlist];
1889                 if(bh) 
1890                          for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) {
1891                                  /* We may have stalled while waiting for I/O to complete. */
1892                                  if(bh->b_list != nlist) goto repeat;
1893                                  next = bh->b_next_free;
1894                                  if(!lru_list[nlist]) {
1895                                          printk("Dirty list empty %d\n", i);
1896                                          break;
1897                                  }
1898                                  
1899                                  /* Clean buffer on dirty list?  Refile it */
1900                                  if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
1901                                   {
1902                                           refile_buffer(bh);
1903                                           continue;
1904                                   }
1905                                  
1906                                  if (buffer_locked(bh) || !buffer_dirty(bh))
1907                                           continue;
1908                                  ndirty++;
1909                                  if(bh->b_flushtime > jiffies) continue;
1910                                  nwritten++;
1911                                  bh->b_count++;
1912                                  bh->b_flushtime = 0;
1913 #ifdef DEBUG
1914                                  if(nlist != BUF_DIRTY) ncount++;
1915 #endif
1916                                  ll_rw_block(WRITE, 1, &bh);
1917                                  bh->b_count--;
1918                          }
1919         }
1920 #ifdef DEBUG
1921         if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount);
1922         printk("Wrote %d/%d buffers\n", nwritten, ndirty);
1923 #endif
1924         
1925         /* We assume that we only come through here on a regular
1926            schedule, like every 5 seconds.  Now update load averages.  
1927            Shift usage counts to prevent overflow. */
1928         for(isize = 0; isize<NR_SIZES; isize++){
1929                 CALC_LOAD(buffers_lav[isize], bdf_prm.b_un.lav_const, buffer_usage[isize]);
1930                 buffer_usage[isize] = 0;
1931         };
1932         return 0;
1933 }
1934 
1935 
1936 /* This is the interface to bdflush.  As we get more sophisticated, we can
1937  * pass tuning parameters to this "process", to adjust how it behaves. 
1938  * We would want to verify each parameter, however, to make sure that it 
1939  * is reasonable. */
1940 
1941 asmlinkage int sys_bdflush(int func, long data)
     /* [previous][next][first][last][top][bottom][index][help] */
1942 {
1943         int i, error;
1944 
1945         if (!suser())
1946                 return -EPERM;
1947 
1948         if (func == 1)
1949                  return sync_old_buffers();
1950 
1951         /* Basically func 1 means read param 1, 2 means write param 1, etc */
1952         if (func >= 2) {
1953                 i = (func-2) >> 1;
1954                 if (i < 0 || i >= N_PARAM)
1955                         return -EINVAL;
1956                 if((func & 1) == 0) {
1957                         error = verify_area(VERIFY_WRITE, (void *) data, sizeof(int));
1958                         if (error)
1959                                 return error;
1960                         put_user(bdf_prm.data[i], (int*)data);
1961                         return 0;
1962                 };
1963                 if (data < bdflush_min[i] || data > bdflush_max[i])
1964                         return -EINVAL;
1965                 bdf_prm.data[i] = data;
1966                 return 0;
1967         }
1968 
1969         /* Having func 0 used to launch the actual bdflush and then never
1970         return (unless explicitly killed). We return zero here to 
1971         remain semi-compatible with present update(8) programs. */
1972 
1973         return 0;
1974 }
1975 
1976 /* This is the actual bdflush daemon itself. It used to be started from
1977  * the syscall above, but now we launch it ourselves internally with
1978  * kernel_thread(...)  directly after the first thread in init/main.c */
1979 
1980 int bdflush(void * unused) 
     /* [previous][next][first][last][top][bottom][index][help] */
1981 {
1982         int i;
1983         int ndirty;
1984         int nlist;
1985         int ncount;
1986         struct buffer_head * bh, *next;
1987 
1988         /*
1989          *      We have a bare-bones task_struct, and really should fill
1990          *      in a few more things so "top" and /proc/2/{exe,root,cwd}
1991          *      display semi-sane things. Not real crucial though...  
1992          */
1993 
1994         current->session = 1;
1995         current->pgrp = 1;
1996         sprintf(current->comm, "kflushd");
1997 
1998         /*
1999          *      As a kernel thread we want to tamper with system buffers
2000          *      and other internals and thus be subject to the SMP locking
2001          *      rules. (On a uniprocessor box this does nothing).
2002          */
2003          
2004 #ifdef __SMP__
2005         lock_kernel();
2006         syscall_count++;
2007 #endif
2008                  
2009         for (;;) {
2010 #ifdef DEBUG
2011                 printk("bdflush() activated...");
2012 #endif
2013                 
2014                 ncount = 0;
2015 #ifdef DEBUG
2016                 for(nlist = 0; nlist < NR_LIST; nlist++)
2017 #else
2018                 for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
2019 #endif
2020                  {
2021                          ndirty = 0;
2022                  repeat:
2023                          bh = lru_list[nlist];
2024                          if(bh) 
2025                                   for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty; 
2026                                        bh = next) {
2027                                           /* We may have stalled while waiting for I/O to complete. */
2028                                           if(bh->b_list != nlist) goto repeat;
2029                                           next = bh->b_next_free;
2030                                           if(!lru_list[nlist]) {
2031                                                   printk("Dirty list empty %d\n", i);
2032                                                   break;
2033                                           }
2034                                           
2035                                           /* Clean buffer on dirty list?  Refile it */
2036                                           if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
2037                                            {
2038                                                    refile_buffer(bh);
2039                                                    continue;
2040                                            }
2041                                           
2042                                           if (buffer_locked(bh) || !buffer_dirty(bh))
2043                                                    continue;
2044                                           /* Should we write back buffers that are shared or not??
2045                                              currently dirty buffers are not shared, so it does not matter */
2046                                           bh->b_count++;
2047                                           ndirty++;
2048                                           bh->b_flushtime = 0;
2049                                           ll_rw_block(WRITE, 1, &bh);
2050 #ifdef DEBUG
2051                                           if(nlist != BUF_DIRTY) ncount++;
2052 #endif
2053                                           bh->b_count--;
2054                                   }
2055                  }
2056 #ifdef DEBUG
2057                 if (ncount) printk("sys_bdflush: %d dirty buffers not on dirty list\n", ncount);
2058                 printk("sleeping again.\n");
2059 #endif
2060                 run_task_queue(&tq_disk);
2061                 wake_up(&bdflush_done);
2062                 
2063                 /* If there are still a lot of dirty buffers around, skip the sleep
2064                    and flush some more */
2065                 
2066                 if(nr_buffers_type[BUF_DIRTY] <= (nr_buffers - nr_buffers_type[BUF_SHARED]) * 
2067                    bdf_prm.b_un.nfract/100) {
2068                         current->signal = 0;
2069                         interruptible_sleep_on(&bdflush_wait);
2070                 }
2071         }
2072 }
2073 
2074 
2075 /*
2076  * Overrides for Emacs so that we follow Linus's tabbing style.
2077  * Emacs will notice this stuff at the end of the file and automatically
2078  * adjust the settings for this buffer only.  This must remain at the end
2079  * of the file.
2080  * ---------------------------------------------------------------------------
2081  * Local variables:
2082  * c-indent-level: 8
2083  * c-brace-imaginary-offset: 0
2084  * c-brace-offset: -8
2085  * c-argdecl-indent: 8
2086  * c-label-offset: -8
2087  * c-continued-statement-offset: 8
2088  * c-continued-brace-offset: 0
2089  * End:
2090  */

/* [previous][next][first][last][top][bottom][index][help] */