root/fs/buffer.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. __wait_on_buffer
  2. sync_buffers
  3. sync_dev
  4. fsync_dev
  5. sys_sync
  6. file_fsync
  7. sys_fsync
  8. sys_fdatasync
  9. invalidate_buffers
  10. remove_from_hash_queue
  11. remove_from_lru_list
  12. remove_from_free_list
  13. remove_from_queues
  14. put_last_lru
  15. put_last_free
  16. insert_into_queues
  17. find_buffer
  18. get_hash_table
  19. set_blocksize
  20. refill_freelist
  21. getblk
  22. set_writetime
  23. refile_buffer
  24. __brelse
  25. __bforget
  26. bread
  27. breada
  28. put_unused_buffer_head
  29. get_more_buffer_heads
  30. recover_reusable_buffer_heads
  31. get_unused_buffer_head
  32. create_buffers
  33. after_unlock_page
  34. free_async_buffers
  35. brw_page
  36. mark_buffer_uptodate
  37. unlock_buffer
  38. generic_readpage
  39. grow_buffers
  40. try_to_free_buffer
  41. age_buffer
  42. maybe_shrink_lav_buffers
  43. shrink_specific_buffers
  44. show_buffers
  45. try_to_reassign
  46. reassign_cluster
  47. try_to_generate_cluster
  48. generate_cluster
  49. buffer_init
  50. wakeup_bdflush
  51. sync_old_buffers
  52. sys_bdflush
  53. bdflush

   1 /*
   2  *  linux/fs/buffer.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  *  'buffer.c' implements the buffer-cache functions. Race-conditions have
   9  * been avoided by NEVER letting an interrupt change a buffer (except for the
  10  * data, of course), but instead letting the caller do it.
  11  */
  12 
  13 /*
  14  * NOTE! There is one discordant note here: checking floppies for
  15  * disk change. This is where it fits best, I think, as it should
  16  * invalidate changed floppy-disk-caches.
  17  */
  18  
  19 /* Some bdflush() changes for the dynamic ramdisk - Paul Gortmaker, 12/94 */
  20 
  21 #include <linux/sched.h>
  22 #include <linux/kernel.h>
  23 #include <linux/major.h>
  24 #include <linux/string.h>
  25 #include <linux/locks.h>
  26 #include <linux/errno.h>
  27 #include <linux/malloc.h>
  28 #include <linux/pagemap.h>
  29 #include <linux/swap.h>
  30 #include <linux/swapctl.h>
  31 #include <linux/smp.h>
  32 #include <linux/smp_lock.h>
  33 
  34 #include <asm/system.h>
  35 #include <asm/segment.h>
  36 #include <asm/io.h>
  37 
  38 #define NR_SIZES 5
  39 static char buffersize_index[17] =
  40 {-1,  0,  1, -1,  2, -1, -1, -1, 3, -1, -1, -1, -1, -1, -1, -1, 4};
  41 static short int bufferindex_size[NR_SIZES] = {512, 1024, 2048, 4096, 8192};
  42 
  43 #define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9])
  44 #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
  45 
  46 static int grow_buffers(int pri, int size);
  47 static int shrink_specific_buffers(unsigned int priority, int size);
  48 static int maybe_shrink_lav_buffers(int);
  49 
  50 static int nr_hash = 0;  /* Size of hash table */
  51 static struct buffer_head ** hash_table;
  52 static struct buffer_head * lru_list[NR_LIST] = {NULL, };
  53 /* next_to_age is an array of pointers into the lru lists, used to
  54    cycle through the buffers aging their contents when deciding which
  55    buffers to discard when more memory is needed */
  56 static struct buffer_head * next_to_age[NR_LIST] = {NULL, };
  57 static struct buffer_head * free_list[NR_SIZES] = {NULL, };
  58 
  59 static struct buffer_head * unused_list = NULL;
  60 struct buffer_head * reuse_list = NULL;
  61 static struct wait_queue * buffer_wait = NULL;
  62 
  63 int nr_buffers = 0;
  64 int nr_buffers_type[NR_LIST] = {0,};
  65 int nr_buffers_size[NR_SIZES] = {0,};
  66 int nr_buffers_st[NR_SIZES][NR_LIST] = {{0,},};
  67 int buffer_usage[NR_SIZES] = {0,};  /* Usage counts used to determine load average */
  68 int buffers_lav[NR_SIZES] = {0,};  /* Load average of buffer usage */
  69 int nr_free[NR_SIZES] = {0,};
  70 int buffermem = 0;
  71 int nr_buffer_heads = 0;
  72 extern int *blksize_size[];
  73 
  74 /* Here is the parameter block for the bdflush process. If you add or
  75  * remove any of the parameters, make sure to update kernel/sysctl.c.
  76  */
  77 
  78 static void wakeup_bdflush(int);
  79 
  80 #define N_PARAM 9
  81 #define LAV
  82 
  83 union bdflush_param{
  84         struct {
  85                 int nfract;  /* Percentage of buffer cache dirty to 
  86                                 activate bdflush */
  87                 int ndirty;  /* Maximum number of dirty blocks to write out per
  88                                 wake-cycle */
  89                 int nrefill; /* Number of clean buffers to try and obtain
  90                                 each time we call refill */
  91                 int nref_dirt; /* Dirty buffer threshold for activating bdflush
  92                                   when trying to refill buffers. */
  93                 int clu_nfract;  /* Percentage of buffer cache to scan to 
  94                                     search for free clusters */
  95                 int age_buffer;  /* Time for normal buffer to age before 
  96                                     we flush it */
  97                 int age_super;  /* Time for superblock to age before we 
  98                                    flush it */
  99                 int lav_const;  /* Constant used for load average (time
 100                                    constant */
 101                 int lav_ratio;  /* Used to determine how low a lav for a
 102                                    particular size can go before we start to
 103                                    trim back the buffers */
 104         } b_un;
 105         unsigned int data[N_PARAM];
 106 } bdf_prm = {{60, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}};
 107 
 108 /* The lav constant is set for 1 minute, as long as the update process runs
 109    every 5 seconds.  If you change the frequency of update, the time
 110    constant will also change. */
 111 
 112 
 113 /* These are the min and max parameter values that we will allow to be assigned */
 114 int bdflush_min[N_PARAM] = {  0,  10,    5,   25,  0,   100,   100, 1, 1};
 115 int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 60000, 60000, 2047, 5};
 116 
 117 /*
 118  * Rewrote the wait-routines to use the "new" wait-queue functionality,
 119  * and getting rid of the cli-sti pairs. The wait-queue routines still
 120  * need cli-sti, but now it's just a couple of 386 instructions or so.
 121  *
 122  * Note that the real wait_on_buffer() is an inline function that checks
 123  * if 'b_wait' is set before calling this, so that the queues aren't set
 124  * up unnecessarily.
 125  */
 126 void __wait_on_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 127 {
 128         struct wait_queue wait = { current, NULL };
 129 
 130         bh->b_count++;
 131         add_wait_queue(&bh->b_wait, &wait);
 132 repeat:
 133         run_task_queue(&tq_disk);
 134         current->state = TASK_UNINTERRUPTIBLE;
 135         if (buffer_locked(bh)) {
 136                 schedule();
 137                 goto repeat;
 138         }
 139         remove_wait_queue(&bh->b_wait, &wait);
 140         bh->b_count--;
 141         current->state = TASK_RUNNING;
 142 }
 143 
 144 /* Call sync_buffers with wait!=0 to ensure that the call does not
 145    return until all buffer writes have completed.  Sync() may return
 146    before the writes have finished; fsync() may not. */
 147 
 148 
 149 /* Godamity-damn.  Some buffers (bitmaps for filesystems)
 150    spontaneously dirty themselves without ever brelse being called.
 151    We will ultimately want to put these in a separate list, but for
 152    now we search all of the lists for dirty buffers */
 153 
 154 static int sync_buffers(kdev_t dev, int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 155 {
 156         int i, retry, pass = 0, err = 0;
 157         int nlist, ncount;
 158         struct buffer_head * bh, *next;
 159 
 160         /* One pass for no-wait, three for wait:
 161            0) write out all dirty, unlocked buffers;
 162            1) write out all dirty buffers, waiting if locked;
 163            2) wait for completion by waiting for all buffers to unlock. */
 164  repeat:
 165         retry = 0;
 166  repeat2:
 167         ncount = 0;
 168         /* We search all lists as a failsafe mechanism, not because we expect
 169            there to be dirty buffers on any of the other lists. */
 170         for(nlist = 0; nlist < NR_LIST; nlist++)
 171          {
 172          repeat1:
 173                  bh = lru_list[nlist];
 174                  if(!bh) continue;
 175                  for (i = nr_buffers_type[nlist]*2 ; i-- > 0 ; bh = next) {
 176                          if(bh->b_list != nlist) goto repeat1;
 177                          next = bh->b_next_free;
 178                          if(!lru_list[nlist]) break;
 179                          if (dev && bh->b_dev != dev)
 180                                   continue;
 181                          if (buffer_locked(bh))
 182                           {
 183                                   /* Buffer is locked; skip it unless wait is
 184                                      requested AND pass > 0. */
 185                                   if (!wait || !pass) {
 186                                           retry = 1;
 187                                           continue;
 188                                   }
 189                                   wait_on_buffer (bh);
 190                                   goto repeat2;
 191                           }
 192                          /* If an unlocked buffer is not uptodate, there has
 193                              been an IO error. Skip it. */
 194                          if (wait && buffer_req(bh) && !buffer_locked(bh) &&
 195                              !buffer_dirty(bh) && !buffer_uptodate(bh)) {
 196                                   err = 1;
 197                                   continue;
 198                           }
 199                          /* Don't write clean buffers.  Don't write ANY buffers
 200                             on the third pass. */
 201                          if (!buffer_dirty(bh) || pass>=2)
 202                                   continue;
 203                          /* don't bother about locked buffers */
 204                          if (buffer_locked(bh))
 205                                  continue;
 206                          bh->b_count++;
 207                          bh->b_flushtime = 0;
 208                          ll_rw_block(WRITE, 1, &bh);
 209 
 210                          if(nlist != BUF_DIRTY) { 
 211                                  printk("[%d %s %ld] ", nlist,
 212                                         kdevname(bh->b_dev), bh->b_blocknr);
 213                                  ncount++;
 214                          }
 215                          bh->b_count--;
 216                          retry = 1;
 217                  }
 218          }
 219         if (ncount)
 220           printk("sys_sync: %d dirty buffers not on dirty list\n", ncount);
 221         
 222         /* If we are waiting for the sync to succeed, and if any dirty
 223            blocks were written, then repeat; on the second pass, only
 224            wait for buffers being written (do not pass to write any
 225            more buffers on the second pass). */
 226         if (wait && retry && ++pass<=2)
 227                  goto repeat;
 228         return err;
 229 }
 230 
 231 void sync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 232 {
 233         sync_buffers(dev, 0);
 234         sync_supers(dev);
 235         sync_inodes(dev);
 236         sync_buffers(dev, 0);
 237         sync_dquots(dev, -1);
 238 }
 239 
 240 int fsync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 241 {
 242         sync_buffers(dev, 0);
 243         sync_supers(dev);
 244         sync_inodes(dev);
 245         sync_dquots(dev, -1);
 246         return sync_buffers(dev, 1);
 247 }
 248 
 249 asmlinkage int sys_sync(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 250 {
 251         fsync_dev(0);
 252         return 0;
 253 }
 254 
 255 int file_fsync (struct inode *inode, struct file *filp)
     /* [previous][next][first][last][top][bottom][index][help] */
 256 {
 257         return fsync_dev(inode->i_dev);
 258 }
 259 
 260 asmlinkage int sys_fsync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 261 {
 262         struct file * file;
 263         struct inode * inode;
 264 
 265         if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
 266                 return -EBADF;
 267         if (!file->f_op || !file->f_op->fsync)
 268                 return -EINVAL;
 269         if (file->f_op->fsync(inode,file))
 270                 return -EIO;
 271         return 0;
 272 }
 273 
 274 asmlinkage int sys_fdatasync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 275 {
 276         struct file * file;
 277         struct inode * inode;
 278 
 279         if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
 280                 return -EBADF;
 281         if (!file->f_op || !file->f_op->fsync)
 282                 return -EINVAL;
 283         /* this needs further work, at the moment it is identical to fsync() */
 284         if (file->f_op->fsync(inode,file))
 285                 return -EIO;
 286         return 0;
 287 }
 288 
 289 void invalidate_buffers(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 290 {
 291         int i;
 292         int nlist;
 293         struct buffer_head * bh;
 294 
 295         for(nlist = 0; nlist < NR_LIST; nlist++) {
 296                 bh = lru_list[nlist];
 297                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bh->b_next_free) {
 298                         if (bh->b_dev != dev)
 299                                 continue;
 300                         wait_on_buffer(bh);
 301                         if (bh->b_dev != dev)
 302                                 continue;
 303                         if (bh->b_count)
 304                                 continue;
 305                         bh->b_flushtime = 0;
 306                         clear_bit(BH_Protected, &bh->b_state);
 307                         clear_bit(BH_Uptodate, &bh->b_state);
 308                         clear_bit(BH_Dirty, &bh->b_state);
 309                         clear_bit(BH_Req, &bh->b_state);
 310                 }
 311         }
 312 }
 313 
 314 #define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block))%nr_hash)
 315 #define hash(dev,block) hash_table[_hashfn(dev,block)]
 316 
 317 static inline void remove_from_hash_queue(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 318 {
 319         if (bh->b_next)
 320                 bh->b_next->b_prev = bh->b_prev;
 321         if (bh->b_prev)
 322                 bh->b_prev->b_next = bh->b_next;
 323         if (hash(bh->b_dev,bh->b_blocknr) == bh)
 324                 hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
 325         bh->b_next = bh->b_prev = NULL;
 326 }
 327 
 328 static inline void remove_from_lru_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 329 {
 330         if (!(bh->b_prev_free) || !(bh->b_next_free))
 331                 panic("VFS: LRU block list corrupted");
 332         if (bh->b_dev == B_FREE)
 333                 panic("LRU list corrupted");
 334         bh->b_prev_free->b_next_free = bh->b_next_free;
 335         bh->b_next_free->b_prev_free = bh->b_prev_free;
 336 
 337         if (lru_list[bh->b_list] == bh)
 338                  lru_list[bh->b_list] = bh->b_next_free;
 339         if (lru_list[bh->b_list] == bh)
 340                  lru_list[bh->b_list] = NULL;
 341         if (next_to_age[bh->b_list] == bh)
 342                 next_to_age[bh->b_list] = bh->b_next_free;
 343         if (next_to_age[bh->b_list] == bh)
 344                 next_to_age[bh->b_list] = NULL;
 345 
 346         bh->b_next_free = bh->b_prev_free = NULL;
 347 }
 348 
 349 static inline void remove_from_free_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 350 {
 351         int isize = BUFSIZE_INDEX(bh->b_size);
 352         if (!(bh->b_prev_free) || !(bh->b_next_free))
 353                 panic("VFS: Free block list corrupted");
 354         if(bh->b_dev != B_FREE)
 355                 panic("Free list corrupted");
 356         if(!free_list[isize])
 357                 panic("Free list empty");
 358         nr_free[isize]--;
 359         if(bh->b_next_free == bh)
 360                  free_list[isize] = NULL;
 361         else {
 362                 bh->b_prev_free->b_next_free = bh->b_next_free;
 363                 bh->b_next_free->b_prev_free = bh->b_prev_free;
 364                 if (free_list[isize] == bh)
 365                          free_list[isize] = bh->b_next_free;
 366         }
 367         bh->b_next_free = bh->b_prev_free = NULL;
 368 }
 369 
 370 static inline void remove_from_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 371 {
 372         if(bh->b_dev == B_FREE) {
 373                 remove_from_free_list(bh); /* Free list entries should not be
 374                                               in the hash queue */
 375                 return;
 376         }
 377         nr_buffers_type[bh->b_list]--;
 378         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]--;
 379         remove_from_hash_queue(bh);
 380         remove_from_lru_list(bh);
 381 }
 382 
 383 static inline void put_last_lru(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 384 {
 385         if (!bh)
 386                 return;
 387         if (bh == lru_list[bh->b_list]) {
 388                 lru_list[bh->b_list] = bh->b_next_free;
 389                 if (next_to_age[bh->b_list] == bh)
 390                         next_to_age[bh->b_list] = bh->b_next_free;
 391                 return;
 392         }
 393         if(bh->b_dev == B_FREE)
 394                 panic("Wrong block for lru list");
 395         remove_from_lru_list(bh);
 396 /* add to back of free list */
 397 
 398         if(!lru_list[bh->b_list]) {
 399                 lru_list[bh->b_list] = bh;
 400                 lru_list[bh->b_list]->b_prev_free = bh;
 401         }
 402         if (!next_to_age[bh->b_list])
 403                 next_to_age[bh->b_list] = bh;
 404 
 405         bh->b_next_free = lru_list[bh->b_list];
 406         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 407         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 408         lru_list[bh->b_list]->b_prev_free = bh;
 409 }
 410 
 411 static inline void put_last_free(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 412 {
 413         int isize;
 414         if (!bh)
 415                 return;
 416 
 417         isize = BUFSIZE_INDEX(bh->b_size);      
 418         bh->b_dev = B_FREE;  /* So it is obvious we are on the free list */
 419         /* add to back of free list */
 420         if(!free_list[isize]) {
 421                 free_list[isize] = bh;
 422                 bh->b_prev_free = bh;
 423         }
 424 
 425         nr_free[isize]++;
 426         bh->b_next_free = free_list[isize];
 427         bh->b_prev_free = free_list[isize]->b_prev_free;
 428         free_list[isize]->b_prev_free->b_next_free = bh;
 429         free_list[isize]->b_prev_free = bh;
 430 }
 431 
 432 static inline void insert_into_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 433 {
 434         /* put at end of free list */
 435         if(bh->b_dev == B_FREE) {
 436                 put_last_free(bh);
 437                 return;
 438         }
 439         if(!lru_list[bh->b_list]) {
 440                 lru_list[bh->b_list] = bh;
 441                 bh->b_prev_free = bh;
 442         }
 443         if (!next_to_age[bh->b_list])
 444                 next_to_age[bh->b_list] = bh;
 445         if (bh->b_next_free) panic("VFS: buffer LRU pointers corrupted");
 446         bh->b_next_free = lru_list[bh->b_list];
 447         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 448         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 449         lru_list[bh->b_list]->b_prev_free = bh;
 450         nr_buffers_type[bh->b_list]++;
 451         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]++;
 452 /* put the buffer in new hash-queue if it has a device */
 453         bh->b_prev = NULL;
 454         bh->b_next = NULL;
 455         if (!(bh->b_dev))
 456                 return;
 457         bh->b_next = hash(bh->b_dev,bh->b_blocknr);
 458         hash(bh->b_dev,bh->b_blocknr) = bh;
 459         if (bh->b_next)
 460                 bh->b_next->b_prev = bh;
 461 }
 462 
 463 static inline struct buffer_head * find_buffer(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 464 {               
 465         struct buffer_head * tmp;
 466 
 467         for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
 468                 if (tmp->b_blocknr == block && tmp->b_dev == dev)
 469                         if (tmp->b_size == size)
 470                                 return tmp;
 471                         else {
 472                                 printk("VFS: Wrong blocksize on device %s\n",
 473                                         kdevname(dev));
 474                                 return NULL;
 475                         }
 476         return NULL;
 477 }
 478 
 479 /*
 480  * Why like this, I hear you say... The reason is race-conditions.
 481  * As we don't lock buffers (unless we are reading them, that is),
 482  * something might happen to it while we sleep (ie a read-error
 483  * will force it bad). This shouldn't really happen currently, but
 484  * the code is ready.
 485  */
 486 struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 487 {
 488         struct buffer_head * bh;
 489 
 490         for (;;) {
 491                 if (!(bh=find_buffer(dev,block,size)))
 492                         return NULL;
 493                 bh->b_count++;
 494                 wait_on_buffer(bh);
 495                 if (bh->b_dev == dev && bh->b_blocknr == block
 496                                              && bh->b_size == size)
 497                         return bh;
 498                 bh->b_count--;
 499         }
 500 }
 501 
 502 void set_blocksize(kdev_t dev, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 503 {
 504         int i, nlist;
 505         struct buffer_head * bh, *bhnext;
 506 
 507         if (!blksize_size[MAJOR(dev)])
 508                 return;
 509 
 510         if (size > PAGE_SIZE)
 511                 size = 0;
 512 
 513         switch (size) {
 514                 default: panic("Invalid blocksize passed to set_blocksize");
 515                 case 512: case 1024: case 2048: case 4096: case 8192: ;
 516         }
 517 
 518         if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
 519                 blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 520                 return;
 521         }
 522         if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
 523                 return;
 524         sync_buffers(dev, 2);
 525         blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 526 
 527   /* We need to be quite careful how we do this - we are moving entries
 528      around on the free list, and we can get in a loop if we are not careful.*/
 529 
 530         for(nlist = 0; nlist < NR_LIST; nlist++) {
 531                 bh = lru_list[nlist];
 532                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) {
 533                         if(!bh) break;
 534                         bhnext = bh->b_next_free; 
 535                         if (bh->b_dev != dev)
 536                                  continue;
 537                         if (bh->b_size == size)
 538                                  continue;
 539                         
 540                         wait_on_buffer(bh);
 541                         if (bh->b_dev == dev && bh->b_size != size) {
 542                                 clear_bit(BH_Dirty, &bh->b_state);
 543                                 clear_bit(BH_Uptodate, &bh->b_state);
 544                                 clear_bit(BH_Req, &bh->b_state);
 545                                 bh->b_flushtime = 0;
 546                         }
 547                         remove_from_hash_queue(bh);
 548                 }
 549         }
 550 }
 551 
 552 #define BADNESS(bh) (buffer_dirty(bh) || buffer_locked(bh))
 553 
 554 void refill_freelist(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 555 {
 556         struct buffer_head * bh, * tmp;
 557         struct buffer_head * candidate[NR_LIST];
 558         unsigned int best_time, winner;
 559         int isize = BUFSIZE_INDEX(size);
 560         int buffers[NR_LIST];
 561         int i;
 562         int needed;
 563 
 564         /* First see if we even need this.  Sometimes it is advantageous
 565          to request some blocks in a filesystem that we know that we will
 566          be needing ahead of time. */
 567 
 568         if (nr_free[isize] > 100)
 569                 return;
 570 
 571         /* If there are too many dirty buffers, we wake up the update process
 572            now so as to ensure that there are still clean buffers available
 573            for user processes to use (and dirty) */
 574         
 575         /* We are going to try and locate this much memory */
 576         needed =bdf_prm.b_un.nrefill * size;  
 577 
 578         while (nr_free_pages > min_free_pages*2 && needed > 0 &&
 579                grow_buffers(GFP_BUFFER, size)) {
 580                 needed -= PAGE_SIZE;
 581         }
 582 
 583         if(needed <= 0) return;
 584 
 585         /* See if there are too many buffers of a different size.
 586            If so, victimize them */
 587 
 588         while(maybe_shrink_lav_buffers(size))
 589          {
 590                  if(!grow_buffers(GFP_BUFFER, size)) break;
 591                  needed -= PAGE_SIZE;
 592                  if(needed <= 0) return;
 593          };
 594 
 595         /* OK, we cannot grow the buffer cache, now try and get some
 596            from the lru list */
 597 
 598         /* First set the candidate pointers to usable buffers.  This
 599            should be quick nearly all of the time. */
 600 
 601 repeat0:
 602         for(i=0; i<NR_LIST; i++){
 603                 if(i == BUF_DIRTY || i == BUF_SHARED || 
 604                    nr_buffers_type[i] == 0) {
 605                         candidate[i] = NULL;
 606                         buffers[i] = 0;
 607                         continue;
 608                 }
 609                 buffers[i] = nr_buffers_type[i];
 610                 for (bh = lru_list[i]; buffers[i] > 0; bh = tmp, buffers[i]--)
 611                  {
 612                          if(buffers[i] < 0) panic("Here is the problem");
 613                          tmp = bh->b_next_free;
 614                          if (!bh) break;
 615                          
 616                          if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 617                              buffer_dirty(bh)) {
 618                                  refile_buffer(bh);
 619                                  continue;
 620                          }
 621                          
 622                          if (bh->b_count || buffer_protected(bh) || bh->b_size != size)
 623                                   continue;
 624                          
 625                          /* Buffers are written in the order they are placed 
 626                             on the locked list. If we encounter a locked
 627                             buffer here, this means that the rest of them
 628                             are also locked */
 629                          if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 630                                  buffers[i] = 0;
 631                                  break;
 632                          }
 633                          
 634                          if (BADNESS(bh)) continue;
 635                          break;
 636                  };
 637                 if(!buffers[i]) candidate[i] = NULL; /* Nothing on this list */
 638                 else candidate[i] = bh;
 639                 if(candidate[i] && candidate[i]->b_count) panic("Here is the problem");
 640         }
 641         
 642  repeat:
 643         if(needed <= 0) return;
 644         
 645         /* Now see which candidate wins the election */
 646         
 647         winner = best_time = UINT_MAX;  
 648         for(i=0; i<NR_LIST; i++){
 649                 if(!candidate[i]) continue;
 650                 if(candidate[i]->b_lru_time < best_time){
 651                         best_time = candidate[i]->b_lru_time;
 652                         winner = i;
 653                 }
 654         }
 655         
 656         /* If we have a winner, use it, and then get a new candidate from that list */
 657         if(winner != UINT_MAX) {
 658                 i = winner;
 659                 bh = candidate[i];
 660                 candidate[i] = bh->b_next_free;
 661                 if(candidate[i] == bh) candidate[i] = NULL;  /* Got last one */
 662                 if (bh->b_count || bh->b_size != size)
 663                          panic("Busy buffer in candidate list\n");
 664                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1)
 665                          panic("Shared buffer in candidate list\n");
 666                 if (buffer_protected(bh))
 667                         panic("Protected buffer in candidate list\n");
 668                 if (BADNESS(bh)) panic("Buffer in candidate list with BADNESS != 0\n");
 669                 
 670                 if(bh->b_dev == B_FREE)
 671                         panic("Wrong list");
 672                 remove_from_queues(bh);
 673                 bh->b_dev = B_FREE;
 674                 put_last_free(bh);
 675                 needed -= bh->b_size;
 676                 buffers[i]--;
 677                 if(buffers[i] < 0) panic("Here is the problem");
 678                 
 679                 if(buffers[i] == 0) candidate[i] = NULL;
 680                 
 681                 /* Now all we need to do is advance the candidate pointer
 682                    from the winner list to the next usable buffer */
 683                 if(candidate[i] && buffers[i] > 0){
 684                         if(buffers[i] <= 0) panic("Here is another problem");
 685                         for (bh = candidate[i]; buffers[i] > 0; bh = tmp, buffers[i]--) {
 686                                 if(buffers[i] < 0) panic("Here is the problem");
 687                                 tmp = bh->b_next_free;
 688                                 if (!bh) break;
 689                                 
 690                                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 691                                     buffer_dirty(bh)) {
 692                                         refile_buffer(bh);
 693                                         continue;
 694                                 };
 695                                 
 696                                 if (bh->b_count || buffer_protected(bh) || bh->b_size != size)
 697                                          continue;
 698                                 
 699                                 /* Buffers are written in the order they are
 700                                    placed on the locked list.  If we encounter
 701                                    a locked buffer here, this means that the
 702                                    rest of them are also locked */
 703                                 if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 704                                         buffers[i] = 0;
 705                                         break;
 706                                 }
 707               
 708                                 if (BADNESS(bh)) continue;
 709                                 break;
 710                         };
 711                         if(!buffers[i]) candidate[i] = NULL; /* Nothing here */
 712                         else candidate[i] = bh;
 713                         if(candidate[i] && candidate[i]->b_count) 
 714                                  panic("Here is the problem");
 715                 }
 716                 
 717                 goto repeat;
 718         }
 719         
 720         if(needed <= 0) return;
 721         
 722         /* Too bad, that was not enough. Try a little harder to grow some. */
 723         
 724         if (nr_free_pages > min_free_pages + 5) {
 725                 if (grow_buffers(GFP_BUFFER, size)) {
 726                         needed -= PAGE_SIZE;
 727                         goto repeat0;
 728                 };
 729         }
 730         
 731         /* and repeat until we find something good */
 732         if (!grow_buffers(GFP_ATOMIC, size))
 733                 wakeup_bdflush(1);
 734         needed -= PAGE_SIZE;
 735         goto repeat0;
 736 }
 737 
 738 /*
 739  * Ok, this is getblk, and it isn't very clear, again to hinder
 740  * race-conditions. Most of the code is seldom used, (ie repeating),
 741  * so it should be much more efficient than it looks.
 742  *
 743  * The algorithm is changed: hopefully better, and an elusive bug removed.
 744  *
 745  * 14.02.92: changed it to sync dirty buffers a bit: better performance
 746  * when the filesystem starts to get full of dirty blocks (I hope).
 747  */
 748 struct buffer_head * getblk(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 749 {
 750         struct buffer_head * bh;
 751         int isize = BUFSIZE_INDEX(size);
 752 
 753         /* Update this for the buffer size lav. */
 754         buffer_usage[isize]++;
 755 
 756         /* If there are too many dirty buffers, we wake up the update process
 757            now so as to ensure that there are still clean buffers available
 758            for user processes to use (and dirty) */
 759 repeat:
 760         bh = get_hash_table(dev, block, size);
 761         if (bh) {
 762                 if (!buffer_dirty(bh)) {
 763                         if (buffer_uptodate(bh))
 764                                  put_last_lru(bh);
 765                         bh->b_flushtime = 0;
 766                 }
 767                 set_bit(BH_Touched, &bh->b_state);
 768                 return bh;
 769         }
 770 
 771         while(!free_list[isize]) refill_freelist(size);
 772         
 773         if (find_buffer(dev,block,size))
 774                  goto repeat;
 775 
 776         bh = free_list[isize];
 777         remove_from_free_list(bh);
 778 
 779 /* OK, FINALLY we know that this buffer is the only one of its kind, */
 780 /* and that it's unused (b_count=0), unlocked (buffer_locked=0), and clean */
 781         bh->b_count=1;
 782         bh->b_flushtime=0;
 783         bh->b_state=(1<<BH_Touched);
 784         bh->b_dev=dev;
 785         bh->b_blocknr=block;
 786         insert_into_queues(bh);
 787         return bh;
 788 }
 789 
 790 void set_writetime(struct buffer_head * buf, int flag)
     /* [previous][next][first][last][top][bottom][index][help] */
 791 {
 792         int newtime;
 793 
 794         if (buffer_dirty(buf)) {
 795                 /* Move buffer to dirty list if jiffies is clear */
 796                 newtime = jiffies + (flag ? bdf_prm.b_un.age_super : 
 797                                      bdf_prm.b_un.age_buffer);
 798                 if(!buf->b_flushtime || buf->b_flushtime > newtime)
 799                          buf->b_flushtime = newtime;
 800         } else {
 801                 buf->b_flushtime = 0;
 802         }
 803 }
 804 
 805 
 806 /*
 807  * A buffer may need to be moved from one buffer list to another
 808  * (e.g. in case it is not shared any more). Handle this.
 809  */
 810 void refile_buffer(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 811 {
 812         int dispose;
 813 
 814         if(buf->b_dev == B_FREE) {
 815                 printk("Attempt to refile free buffer\n");
 816                 return;
 817         }
 818         if (buffer_dirty(buf))
 819                 dispose = BUF_DIRTY;
 820         else if ((mem_map[MAP_NR((unsigned long) buf->b_data)].count > 1) || buffer_protected(buf))
 821                 dispose = BUF_SHARED;
 822         else if (buffer_locked(buf))
 823                 dispose = BUF_LOCKED;
 824         else if (buf->b_list == BUF_SHARED)
 825                 dispose = BUF_UNSHARED;
 826         else
 827                 dispose = BUF_CLEAN;
 828         if(dispose == BUF_CLEAN) buf->b_lru_time = jiffies;
 829         if(dispose != buf->b_list)  {
 830                 if(dispose == BUF_DIRTY || dispose == BUF_UNSHARED)
 831                          buf->b_lru_time = jiffies;
 832                 if(dispose == BUF_LOCKED && 
 833                    (buf->b_flushtime - buf->b_lru_time) <= bdf_prm.b_un.age_super)
 834                          dispose = BUF_LOCKED1;
 835                 remove_from_queues(buf);
 836                 buf->b_list = dispose;
 837                 insert_into_queues(buf);
 838                 if(dispose == BUF_DIRTY && nr_buffers_type[BUF_DIRTY] > 
 839                    (nr_buffers - nr_buffers_type[BUF_SHARED]) *
 840                    bdf_prm.b_un.nfract/100)
 841                          wakeup_bdflush(0);
 842         }
 843 }
 844 
 845 /*
 846  * Release a buffer head
 847  */
 848 void __brelse(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 849 {
 850         wait_on_buffer(buf);
 851 
 852         /* If dirty, mark the time this buffer should be written back */
 853         set_writetime(buf, 0);
 854         refile_buffer(buf);
 855 
 856         if (buf->b_count) {
 857                 buf->b_count--;
 858                 return;
 859         }
 860         printk("VFS: brelse: Trying to free free buffer\n");
 861 }
 862 
 863 /*
 864  * bforget() is like brelse(), except it removes the buffer
 865  * from the hash-queues (so that it won't be re-used if it's
 866  * shared).
 867  */
 868 void __bforget(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 869 {
 870         wait_on_buffer(buf);
 871         mark_buffer_clean(buf);
 872         clear_bit(BH_Protected, &buf->b_state);
 873         buf->b_count--;
 874         remove_from_hash_queue(buf);
 875         buf->b_dev = NODEV;
 876         refile_buffer(buf);
 877 }
 878 
 879 /*
 880  * bread() reads a specified block and returns the buffer that contains
 881  * it. It returns NULL if the block was unreadable.
 882  */
 883 struct buffer_head * bread(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 884 {
 885         struct buffer_head * bh;
 886 
 887         if (!(bh = getblk(dev, block, size))) {
 888                 printk("VFS: bread: READ error on device %s\n",
 889                         kdevname(dev));
 890                 return NULL;
 891         }
 892         if (buffer_uptodate(bh))
 893                 return bh;
 894         ll_rw_block(READ, 1, &bh);
 895         wait_on_buffer(bh);
 896         if (buffer_uptodate(bh))
 897                 return bh;
 898         brelse(bh);
 899         return NULL;
 900 }
 901 
 902 /*
 903  * Ok, breada can be used as bread, but additionally to mark other
 904  * blocks for reading as well. End the argument list with a negative
 905  * number.
 906  */
 907 
 908 #define NBUF 16
 909 
 910 struct buffer_head * breada(kdev_t dev, int block, int bufsize,
     /* [previous][next][first][last][top][bottom][index][help] */
 911         unsigned int pos, unsigned int filesize)
 912 {
 913         struct buffer_head * bhlist[NBUF];
 914         unsigned int blocks;
 915         struct buffer_head * bh;
 916         int index;
 917         int i, j;
 918 
 919         if (pos >= filesize)
 920                 return NULL;
 921 
 922         if (block < 0 || !(bh = getblk(dev,block,bufsize)))
 923                 return NULL;
 924 
 925         index = BUFSIZE_INDEX(bh->b_size);
 926 
 927         if (buffer_uptodate(bh))
 928                 return(bh);   
 929         else ll_rw_block(READ, 1, &bh);
 930 
 931         blocks = (filesize - pos) >> (9+index);
 932 
 933         if (blocks < (read_ahead[MAJOR(dev)] >> index))
 934                 blocks = read_ahead[MAJOR(dev)] >> index;
 935         if (blocks > NBUF) 
 936                 blocks = NBUF;
 937 
 938 /*      if (blocks) printk("breada (new) %d blocks\n",blocks); */
 939 
 940 
 941         bhlist[0] = bh;
 942         j = 1;
 943         for(i=1; i<blocks; i++) {
 944                 bh = getblk(dev,block+i,bufsize);
 945                 if (buffer_uptodate(bh)) {
 946                         brelse(bh);
 947                         break;
 948                 }
 949                 else bhlist[j++] = bh;
 950         }
 951 
 952         /* Request the read for these buffers, and then release them */
 953         if (j>1)  
 954                 ll_rw_block(READA, (j-1), bhlist+1); 
 955         for(i=1; i<j; i++)
 956                 brelse(bhlist[i]);
 957 
 958         /* Wait for this buffer, and then continue on */
 959         bh = bhlist[0];
 960         wait_on_buffer(bh);
 961         if (buffer_uptodate(bh))
 962                 return bh;
 963         brelse(bh);
 964         return NULL;
 965 }
 966 
 967 /*
 968  * See fs/inode.c for the weird use of volatile..
 969  */
 970 static void put_unused_buffer_head(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 971 {
 972         struct wait_queue * wait;
 973 
 974         wait = ((volatile struct buffer_head *) bh)->b_wait;
 975         memset(bh,0,sizeof(*bh));
 976         ((volatile struct buffer_head *) bh)->b_wait = wait;
 977         bh->b_next_free = unused_list;
 978         unused_list = bh;
 979         wake_up(&buffer_wait);
 980 }
 981 
 982 static void get_more_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 983 {
 984         int i;
 985         struct buffer_head * bh;
 986 
 987         for (;;) {
 988                 if (unused_list)
 989                         return;
 990 
 991                 /*
 992                  * This is critical.  We can't swap out pages to get
 993                  * more buffer heads, because the swap-out may need
 994                  * more buffer-heads itself.  Thus GFP_ATOMIC.
 995                  */
 996                 bh = (struct buffer_head *) get_free_page(GFP_ATOMIC);
 997                 if (bh)
 998                         break;
 999 
1000                 /*
1001                  * Uhhuh. We're _really_ low on memory. Now we just
1002                  * wait for old buffer heads to become free due to
1003                  * finishing IO..
1004                  */
1005                 run_task_queue(&tq_disk);
1006                 sleep_on(&buffer_wait);
1007         }
1008 
1009         for (nr_buffer_heads+=i=PAGE_SIZE/sizeof*bh ; i>0; i--) {
1010                 bh->b_next_free = unused_list;  /* only make link */
1011                 unused_list = bh++;
1012         }
1013 }
1014 
1015 /* 
1016  * We can't put completed temporary IO buffer_heads directly onto the
1017  * unused_list when they become unlocked, since the device driver
1018  * end_request routines still expect access to the buffer_head's
1019  * fields after the final unlock.  So, the device driver puts them on
1020  * the reuse_list instead once IO completes, and we recover these to
1021  * the unused_list here.
1022  *
1023  * The reuse_list receives buffers from interrupt routines, so we need
1024  * to be IRQ-safe here (but note that interrupts only _add_ to the
1025  * reuse_list, never take away. So we don't need to worry about the
1026  * reuse_list magically emptying).
1027  */
1028 static inline void recover_reusable_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1029 {
1030         if (reuse_list) {
1031                 struct buffer_head *bh;
1032                 unsigned long flags;
1033         
1034                 save_flags(flags);
1035                 do {
1036                         cli();
1037                         bh = reuse_list;
1038                         reuse_list = bh->b_next_free;
1039                         restore_flags(flags);
1040                         put_unused_buffer_head(bh);
1041                 } while (reuse_list);
1042         }
1043 }
1044 
1045 static struct buffer_head * get_unused_buffer_head(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1046 {
1047         struct buffer_head * bh;
1048 
1049         recover_reusable_buffer_heads();
1050         get_more_buffer_heads();
1051         if (!unused_list)
1052                 return NULL;
1053         bh = unused_list;
1054         unused_list = bh->b_next_free;
1055         bh->b_next_free = NULL;
1056         bh->b_data = NULL;
1057         bh->b_size = 0;
1058         bh->b_state = 0;
1059         return bh;
1060 }
1061 
1062 /*
1063  * Create the appropriate buffers when given a page for data area and
1064  * the size of each buffer.. Use the bh->b_this_page linked list to
1065  * follow the buffers created.  Return NULL if unable to create more
1066  * buffers.
1067  */
1068 static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
1069 {
1070         struct buffer_head *bh, *head;
1071         long offset;
1072 
1073         head = NULL;
1074         offset = PAGE_SIZE;
1075         while ((offset -= size) >= 0) {
1076                 bh = get_unused_buffer_head();
1077                 if (!bh)
1078                         goto no_grow;
1079                 bh->b_this_page = head;
1080                 head = bh;
1081                 bh->b_data = (char *) (page+offset);
1082                 bh->b_size = size;
1083                 bh->b_dev = B_FREE;  /* Flag as unused */
1084         }
1085         return head;
1086 /*
1087  * In case anything failed, we just free everything we got.
1088  */
1089 no_grow:
1090         bh = head;
1091         while (bh) {
1092                 head = bh;
1093                 bh = bh->b_this_page;
1094                 put_unused_buffer_head(head);
1095         }
1096         return NULL;
1097 }
1098 
1099 /* Run the hooks that have to be done when a page I/O has completed. */
1100 static inline void after_unlock_page (struct page * page)
     /* [previous][next][first][last][top][bottom][index][help] */
1101 {
1102         if (clear_bit(PG_decr_after, &page->flags))
1103                 nr_async_pages--;
1104         if (clear_bit(PG_free_after, &page->flags))
1105                 free_page(page_address(page));
1106         if (clear_bit(PG_swap_unlock_after, &page->flags))
1107                 swap_after_unlock_page(page->swap_unlock_entry);
1108 }
1109 
1110 /* Free all temporary buffers belonging to a page. */
1111 static inline void free_async_buffers (struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
1112 {
1113         struct buffer_head * tmp;
1114         unsigned long flags;
1115 
1116         tmp = bh;
1117         save_flags(flags);
1118         cli();
1119         do {
1120                 if (!test_bit(BH_FreeOnIO, &tmp->b_state)) {
1121                         printk ("Whoops: unlock_buffer: "
1122                                 "async IO mismatch on page.\n");
1123                         restore_flags(flags);
1124                         return;
1125                 }
1126                 tmp->b_next_free = reuse_list;
1127                 reuse_list = tmp;
1128                 clear_bit(BH_FreeOnIO, &tmp->b_state);
1129                 tmp = tmp->b_this_page;
1130         } while (tmp != bh);
1131         restore_flags(flags);
1132 }
1133 
1134 /*
1135  * Start I/O on a page.
1136  * This function expects the page to be locked and may return before I/O is complete.
1137  * You then have to check page->locked, page->uptodate, and maybe wait on page->wait.
1138  */
1139 int brw_page(int rw, unsigned long address, kdev_t dev, int b[], int size, int bmap)
     /* [previous][next][first][last][top][bottom][index][help] */
1140 {
1141         struct buffer_head *bh, *prev, *next, *arr[MAX_BUF_PER_PAGE];
1142         int block, nr;
1143         struct page *page;
1144 
1145         page = mem_map + MAP_NR(address);
1146         if (!PageLocked(page))
1147                 panic("brw_page: page not locked for I/O");
1148         clear_bit(PG_uptodate, &page->flags);
1149         /*
1150          * Allocate buffer heads pointing to this page, just for I/O.
1151          * They do _not_ show up in the buffer hash table!
1152          * They are _not_ registered in page->buffers either!
1153          */
1154         bh = create_buffers(address, size);
1155         if (!bh) {
1156                 clear_bit(PG_locked, &page->flags);
1157                 wake_up(&page->wait);
1158                 return -ENOMEM;
1159         }
1160         nr = 0;
1161         next = bh;
1162         do {
1163                 struct buffer_head * tmp;
1164                 block = *(b++);
1165 
1166                 set_bit(BH_FreeOnIO, &next->b_state);
1167                 next->b_list = BUF_CLEAN;
1168                 next->b_dev = dev;
1169                 next->b_blocknr = block;
1170                 next->b_count = 1;
1171                 next->b_flushtime = 0;
1172                 set_bit(BH_Uptodate, &next->b_state);
1173 
1174                 /* When we use bmap, we define block zero to represent
1175                    a hole.  ll_rw_page, however, may legitimately
1176                    access block zero, and we need to distinguish the
1177                    two cases. 
1178                    */
1179                 if (bmap && !block) {
1180                         memset(next->b_data, 0, size);
1181                         next->b_count--;
1182                         continue;
1183                 }
1184                 tmp = get_hash_table(dev, block, size);
1185                 if (tmp) {
1186                         if (!buffer_uptodate(tmp)) {
1187                                 if (rw == READ)
1188                                         ll_rw_block(READ, 1, &tmp);
1189                                 wait_on_buffer(tmp);
1190                         }
1191                         if (rw == READ) 
1192                                 memcpy(next->b_data, tmp->b_data, size);
1193                         else {
1194                                 memcpy(tmp->b_data, next->b_data, size);
1195                                 mark_buffer_dirty(tmp, 0);
1196                         }
1197                         brelse(tmp);
1198                         next->b_count--;
1199                         continue;
1200                 }
1201                 if (rw == READ)
1202                         clear_bit(BH_Uptodate, &next->b_state);
1203                 else
1204                         set_bit(BH_Dirty, &next->b_state);
1205                 arr[nr++] = next;
1206         } while (prev = next, (next = next->b_this_page) != NULL);
1207         prev->b_this_page = bh;
1208         
1209         if (nr) {
1210                 ll_rw_block(rw, nr, arr);
1211                 /* The rest of the work is done in mark_buffer_uptodate()
1212                  * and unlock_buffer(). */
1213         } else {
1214                 clear_bit(PG_locked, &page->flags);
1215                 set_bit(PG_uptodate, &page->flags);
1216                 wake_up(&page->wait);
1217                 free_async_buffers(bh);
1218                 after_unlock_page(page);
1219         }
1220         ++current->maj_flt;
1221         return 0;
1222 }
1223 
1224 /*
1225  * This is called by end_request() when I/O has completed.
1226  */
1227 void mark_buffer_uptodate(struct buffer_head * bh, int on)
     /* [previous][next][first][last][top][bottom][index][help] */
1228 {
1229         if (on) {
1230                 struct buffer_head *tmp = bh;
1231                 set_bit(BH_Uptodate, &bh->b_state);
1232                 /* If a page has buffers and all these buffers are uptodate,
1233                  * then the page is uptodate. */
1234                 do {
1235                         if (!test_bit(BH_Uptodate, &tmp->b_state))
1236                                 return;
1237                         tmp=tmp->b_this_page;
1238                 } while (tmp && tmp != bh);
1239                 set_bit(PG_uptodate, &mem_map[MAP_NR(bh->b_data)].flags);
1240                 return;
1241         }
1242         clear_bit(BH_Uptodate, &bh->b_state);
1243 }
1244 
1245 /*
1246  * This is called by end_request() when I/O has completed.
1247  */
1248 void unlock_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
1249 {
1250         struct buffer_head *tmp;
1251         struct page *page;
1252 
1253         clear_bit(BH_Lock, &bh->b_state);
1254         wake_up(&bh->b_wait);
1255 
1256         if (!test_bit(BH_FreeOnIO, &bh->b_state))
1257                 return;
1258         /* This is a temporary buffer used for page I/O. */
1259         page = mem_map + MAP_NR(bh->b_data);
1260         if (!PageLocked(page)) {
1261                 printk ("Whoops: unlock_buffer: "
1262                         "async io complete on unlocked page\n");
1263                 return;
1264         }
1265         if (bh->b_count != 1) {
1266                 printk ("Whoops: unlock_buffer: b_count != 1 on async io.\n");
1267                 return;
1268         }
1269         /* Async buffer_heads are here only as labels for IO, and get
1270            thrown away once the IO for this page is complete.  IO is
1271            deemed complete once all buffers have been visited
1272            (b_count==0) and are now unlocked. */
1273         bh->b_count--;
1274         for (tmp = bh; tmp=tmp->b_this_page, tmp!=bh; ) {
1275                 if (test_bit(BH_Lock, &tmp->b_state) || tmp->b_count)
1276                         return;
1277         }
1278         /* OK, the async IO on this page is complete. */
1279         clear_bit(PG_locked, &page->flags);
1280         wake_up(&page->wait);
1281         free_async_buffers(bh);
1282         after_unlock_page(page);
1283         wake_up(&buffer_wait);
1284 }
1285 
1286 /*
1287  * Generic "readpage" function for block devices that have the normal
1288  * bmap functionality. This is most of the block device filesystems.
1289  * Reads the page asynchronously --- the unlock_buffer() and
1290  * mark_buffer_uptodate() functions propagate buffer state into the
1291  * page struct once IO has completed.
1292  */
1293 int generic_readpage(struct inode * inode, struct page * page)
     /* [previous][next][first][last][top][bottom][index][help] */
1294 {
1295         unsigned long block, address;
1296         int *p, nr[PAGE_SIZE/512];
1297         int i;
1298 
1299         address = page_address(page);
1300         page->count++;
1301         set_bit(PG_locked, &page->flags);
1302         set_bit(PG_free_after, &page->flags);
1303         
1304         i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
1305         block = page->offset >> inode->i_sb->s_blocksize_bits;
1306         p = nr;
1307         do {
1308                 *p = inode->i_op->bmap(inode, block);
1309                 i--;
1310                 block++;
1311                 p++;
1312         } while (i > 0);
1313 
1314         /* IO start */
1315         brw_page(READ, address, inode->i_dev, nr, inode->i_sb->s_blocksize, 1);
1316         return 0;
1317 }
1318 
1319 /*
1320  * Try to increase the number of buffers available: the size argument
1321  * is used to determine what kind of buffers we want.
1322  */
1323 static int grow_buffers(int pri, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1324 {
1325         unsigned long page;
1326         struct buffer_head *bh, *tmp;
1327         struct buffer_head * insert_point;
1328         int isize;
1329 
1330         if ((size & 511) || (size > PAGE_SIZE)) {
1331                 printk("VFS: grow_buffers: size = %d\n",size);
1332                 return 0;
1333         }
1334 
1335         isize = BUFSIZE_INDEX(size);
1336 
1337         if (!(page = __get_free_page(pri)))
1338                 return 0;
1339         bh = create_buffers(page, size);
1340         if (!bh) {
1341                 free_page(page);
1342                 return 0;
1343         }
1344 
1345         insert_point = free_list[isize];
1346 
1347         tmp = bh;
1348         while (1) {
1349                 nr_free[isize]++;
1350                 if (insert_point) {
1351                         tmp->b_next_free = insert_point->b_next_free;
1352                         tmp->b_prev_free = insert_point;
1353                         insert_point->b_next_free->b_prev_free = tmp;
1354                         insert_point->b_next_free = tmp;
1355                 } else {
1356                         tmp->b_prev_free = tmp;
1357                         tmp->b_next_free = tmp;
1358                 }
1359                 insert_point = tmp;
1360                 ++nr_buffers;
1361                 if (tmp->b_this_page)
1362                         tmp = tmp->b_this_page;
1363                 else
1364                         break;
1365         }
1366         tmp->b_this_page = bh;
1367         free_list[isize] = bh;
1368         mem_map[MAP_NR(page)].buffers = bh;
1369         buffermem += PAGE_SIZE;
1370         return 1;
1371 }
1372 
1373 
1374 /* =========== Reduce the buffer memory ============= */
1375 
1376 /*
1377  * try_to_free_buffer() checks if all the buffers on this particular page
1378  * are unused, and free's the page if so.
1379  */
1380 int try_to_free_buffer(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1381                        int priority)
1382 {
1383         unsigned long page;
1384         struct buffer_head * tmp, * p;
1385         int isize = BUFSIZE_INDEX(bh->b_size);
1386 
1387         *bhp = bh;
1388         page = (unsigned long) bh->b_data;
1389         page &= PAGE_MASK;
1390         tmp = bh;
1391         do {
1392                 if (!tmp)
1393                         return 0;
1394                 if (tmp->b_count || buffer_protected(tmp) ||
1395                     buffer_dirty(tmp) || buffer_locked(tmp) || tmp->b_wait)
1396                         return 0;
1397                 if (priority && buffer_touched(tmp))
1398                         return 0;
1399                 tmp = tmp->b_this_page;
1400         } while (tmp != bh);
1401         tmp = bh;
1402         do {
1403                 p = tmp;
1404                 tmp = tmp->b_this_page;
1405                 nr_buffers--;
1406                 nr_buffers_size[isize]--;
1407                 if (p == *bhp)
1408                   {
1409                     *bhp = p->b_prev_free;
1410                     if (p == *bhp) /* Was this the last in the list? */
1411                       *bhp = NULL;
1412                   }
1413                 remove_from_queues(p);
1414                 put_unused_buffer_head(p);
1415         } while (tmp != bh);
1416         buffermem -= PAGE_SIZE;
1417         mem_map[MAP_NR(page)].buffers = NULL;
1418         free_page(page);
1419         return !mem_map[MAP_NR(page)].count;
1420 }
1421 
1422 /* Age buffers on a given page, according to whether they have been
1423    visited recently or not. */
1424 static inline void age_buffer(struct buffer_head *bh)
     /* [previous][next][first][last][top][bottom][index][help] */
1425 {
1426         struct buffer_head *tmp = bh;
1427         int touched = 0;
1428 
1429         /*
1430          * When we age a page, we mark all other buffers in the page
1431          * with the "has_aged" flag.  Then, when these aliased buffers
1432          * come up for aging, we skip them until next pass.  This
1433          * ensures that a page full of multiple buffers only gets aged
1434          * once per pass through the lru lists. 
1435          */
1436         if (clear_bit(BH_Has_aged, &bh->b_state))
1437                 return;
1438         
1439         do {
1440                 touched |= clear_bit(BH_Touched, &tmp->b_state);
1441                 tmp = tmp->b_this_page;
1442                 set_bit(BH_Has_aged, &tmp->b_state);
1443         } while (tmp != bh);
1444         clear_bit(BH_Has_aged, &bh->b_state);
1445 
1446         if (touched) 
1447                 touch_page(mem_map + MAP_NR((unsigned long) bh->b_data));
1448         else
1449                 age_page(mem_map + MAP_NR((unsigned long) bh->b_data));
1450 }
1451 
1452 /*
1453  * Consult the load average for buffers and decide whether or not
1454  * we should shrink the buffers of one size or not.  If we decide yes,
1455  * do it and return 1.  Else return 0.  Do not attempt to shrink size
1456  * that is specified.
1457  *
1458  * I would prefer not to use a load average, but the way things are now it
1459  * seems unavoidable.  The way to get rid of it would be to force clustering
1460  * universally, so that when we reclaim buffers we always reclaim an entire
1461  * page.  Doing this would mean that we all need to move towards QMAGIC.
1462  */
1463 
1464 static int maybe_shrink_lav_buffers(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1465 {          
1466         int nlist;
1467         int isize;
1468         int total_lav, total_n_buffers, n_sizes;
1469         
1470         /* Do not consider the shared buffers since they would not tend
1471            to have getblk called very often, and this would throw off
1472            the lav.  They are not easily reclaimable anyway (let the swapper
1473            make the first move). */
1474   
1475         total_lav = total_n_buffers = n_sizes = 0;
1476         for(nlist = 0; nlist < NR_SIZES; nlist++)
1477          {
1478                  total_lav += buffers_lav[nlist];
1479                  if(nr_buffers_size[nlist]) n_sizes++;
1480                  total_n_buffers += nr_buffers_size[nlist];
1481                  total_n_buffers -= nr_buffers_st[nlist][BUF_SHARED]; 
1482          }
1483         
1484         /* See if we have an excessive number of buffers of a particular
1485            size - if so, victimize that bunch. */
1486   
1487         isize = (size ? BUFSIZE_INDEX(size) : -1);
1488         
1489         if (n_sizes > 1)
1490                  for(nlist = 0; nlist < NR_SIZES; nlist++)
1491                   {
1492                           if(nlist == isize) continue;
1493                           if(nr_buffers_size[nlist] &&
1494                              bdf_prm.b_un.lav_const * buffers_lav[nlist]*total_n_buffers < 
1495                              total_lav * (nr_buffers_size[nlist] - nr_buffers_st[nlist][BUF_SHARED]))
1496                                    if(shrink_specific_buffers(6, bufferindex_size[nlist])) 
1497                                             return 1;
1498                   }
1499         return 0;
1500 }
1501 
1502 /*
1503  * Try to free up some pages by shrinking the buffer-cache
1504  *
1505  * Priority tells the routine how hard to try to shrink the
1506  * buffers: 6 means "don't bother too much", while a value
1507  * of 0 means "we'd better get some free pages now".
1508  *
1509  * "limit" is meant to limit the shrink-action only to pages
1510  * that are in the 0 - limit address range, for DMA re-allocations.
1511  * We ignore that right now.
1512  */
1513 
1514 static int shrink_specific_buffers(unsigned int priority, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1515 {
1516         struct buffer_head *bh;
1517         int nlist;
1518         int i, isize, isize1;
1519 
1520 #ifdef DEBUG
1521         if(size) printk("Shrinking buffers of size %d\n", size);
1522 #endif
1523         /* First try the free lists, and see if we can get a complete page
1524            from here */
1525         isize1 = (size ? BUFSIZE_INDEX(size) : -1);
1526 
1527         for(isize = 0; isize<NR_SIZES; isize++){
1528                 if(isize1 != -1 && isize1 != isize) continue;
1529                 bh = free_list[isize];
1530                 if(!bh) continue;
1531                 for (i=0 ; !i || bh != free_list[isize]; bh = bh->b_next_free, i++) {
1532                         if (bh->b_count || buffer_protected(bh) ||
1533                             !bh->b_this_page)
1534                                  continue;
1535                         if (!age_of((unsigned long) bh->b_data) &&
1536                             try_to_free_buffer(bh, &bh, 6))
1537                                  return 1;
1538                         if(!bh) break;
1539                         /* Some interrupt must have used it after we
1540                            freed the page.  No big deal - keep looking */
1541                 }
1542         }
1543         
1544         /* Not enough in the free lists, now try the lru list */
1545         
1546         for(nlist = 0; nlist < NR_LIST; nlist++) {
1547         repeat1:
1548                 if(priority > 2 && nlist == BUF_SHARED) continue;
1549                 i = nr_buffers_type[nlist];
1550                 i = ((BUFFEROUT_WEIGHT * i) >> 10) >> priority;
1551                 for ( ; i > 0; i-- ) {
1552                         bh = next_to_age[nlist];
1553                         if (!bh)
1554                                 break;
1555                         next_to_age[nlist] = bh->b_next_free;
1556 
1557                         /* First, age the buffer. */
1558                         age_buffer(bh);
1559                         /* We may have stalled while waiting for I/O
1560                            to complete. */
1561                         if(bh->b_list != nlist) goto repeat1;
1562                         if (bh->b_count || buffer_protected(bh) ||
1563                             !bh->b_this_page)
1564                                  continue;
1565                         if(size && bh->b_size != size) continue;
1566                         if (buffer_locked(bh))
1567                                  if (priority)
1568                                           continue;
1569                                  else
1570                                           wait_on_buffer(bh);
1571                         if (buffer_dirty(bh)) {
1572                                 bh->b_count++;
1573                                 bh->b_flushtime = 0;
1574                                 ll_rw_block(WRITEA, 1, &bh);
1575                                 bh->b_count--;
1576                                 continue;
1577                         }
1578                         /* At priority 6, only consider really old
1579                            (age==0) buffers for reclaiming.  At
1580                            priority 0, consider any buffers. */
1581                         if ((age_of((unsigned long) bh->b_data) >>
1582                              (6-priority)) > 0)
1583                                 continue;                               
1584                         if (try_to_free_buffer(bh, &bh, 0))
1585                                  return 1;
1586                         if(!bh) break;
1587                 }
1588         }
1589         return 0;
1590 }
1591 
1592 
1593 /* ================== Debugging =================== */
1594 
1595 void show_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1596 {
1597         struct buffer_head * bh;
1598         int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
1599         int protected = 0;
1600         int shared;
1601         int nlist, isize;
1602 
1603         printk("Buffer memory:   %6dkB\n",buffermem>>10);
1604         printk("Buffer heads:    %6d\n",nr_buffer_heads);
1605         printk("Buffer blocks:   %6d\n",nr_buffers);
1606 
1607         for(nlist = 0; nlist < NR_LIST; nlist++) {
1608           shared = found = locked = dirty = used = lastused = protected = 0;
1609           bh = lru_list[nlist];
1610           if(!bh) continue;
1611           do {
1612                 found++;
1613                 if (buffer_locked(bh))
1614                         locked++;
1615                 if (buffer_protected(bh))
1616                         protected++;
1617                 if (buffer_dirty(bh))
1618                         dirty++;
1619                 if (mem_map[MAP_NR(((unsigned long) bh->b_data))].count != 1)
1620                         shared++;
1621                 if (bh->b_count)
1622                         used++, lastused = found;
1623                 bh = bh->b_next_free;
1624           } while (bh != lru_list[nlist]);
1625           printk("Buffer[%d] mem: %d buffers, %d used (last=%d), "
1626                  "%d locked, %d protected, %d dirty %d shrd\n",
1627                  nlist, found, used, lastused,
1628                  locked, protected, dirty, shared);
1629         };
1630         printk("Size    [LAV]     Free  Clean  Unshar     Lck    Lck1   Dirty  Shared \n");
1631         for(isize = 0; isize<NR_SIZES; isize++){
1632                 printk("%5d [%5d]: %7d ", bufferindex_size[isize],
1633                        buffers_lav[isize], nr_free[isize]);
1634                 for(nlist = 0; nlist < NR_LIST; nlist++)
1635                          printk("%7d ", nr_buffers_st[isize][nlist]);
1636                 printk("\n");
1637         }
1638 }
1639 
1640 
1641 /* ====================== Cluster patches for ext2 ==================== */
1642 
1643 /*
1644  * try_to_reassign() checks if all the buffers on this particular page
1645  * are unused, and reassign to a new cluster them if this is true.
1646  */
1647 static inline int try_to_reassign(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1648                            kdev_t dev, unsigned int starting_block)
1649 {
1650         unsigned long page;
1651         struct buffer_head * tmp, * p;
1652 
1653         *bhp = bh;
1654         page = (unsigned long) bh->b_data;
1655         page &= PAGE_MASK;
1656         if(mem_map[MAP_NR(page)].count != 1) return 0;
1657         tmp = bh;
1658         do {
1659                 if (!tmp)
1660                          return 0;
1661                 
1662                 if (tmp->b_count || buffer_protected(tmp) ||
1663                     buffer_dirty(tmp) || buffer_locked(tmp))
1664                          return 0;
1665                 tmp = tmp->b_this_page;
1666         } while (tmp != bh);
1667         tmp = bh;
1668         
1669         while((unsigned long) tmp->b_data & (PAGE_SIZE - 1)) 
1670                  tmp = tmp->b_this_page;
1671         
1672         /* This is the buffer at the head of the page */
1673         bh = tmp;
1674         do {
1675                 p = tmp;
1676                 tmp = tmp->b_this_page;
1677                 remove_from_queues(p);
1678                 p->b_dev = dev;
1679                 mark_buffer_uptodate(p, 0);
1680                 clear_bit(BH_Req, &p->b_state);
1681                 p->b_blocknr = starting_block++;
1682                 insert_into_queues(p);
1683         } while (tmp != bh);
1684         return 1;
1685 }
1686 
1687 /*
1688  * Try to find a free cluster by locating a page where
1689  * all of the buffers are unused.  We would like this function
1690  * to be atomic, so we do not call anything that might cause
1691  * the process to sleep.  The priority is somewhat similar to
1692  * the priority used in shrink_buffers.
1693  * 
1694  * My thinking is that the kernel should end up using whole
1695  * pages for the buffer cache as much of the time as possible.
1696  * This way the other buffers on a particular page are likely
1697  * to be very near each other on the free list, and we will not
1698  * be expiring data prematurely.  For now we only cannibalize buffers
1699  * of the same size to keep the code simpler.
1700  */
1701 static int reassign_cluster(kdev_t dev, 
     /* [previous][next][first][last][top][bottom][index][help] */
1702                      unsigned int starting_block, int size)
1703 {
1704         struct buffer_head *bh;
1705         int isize = BUFSIZE_INDEX(size);
1706         int i;
1707 
1708         /* We want to give ourselves a really good shot at generating
1709            a cluster, and since we only take buffers from the free
1710            list, we "overfill" it a little. */
1711 
1712         while(nr_free[isize] < 32) refill_freelist(size);
1713 
1714         bh = free_list[isize];
1715         if(bh)
1716                  for (i=0 ; !i || bh != free_list[isize] ; bh = bh->b_next_free, i++) {
1717                          if (!bh->b_this_page)  continue;
1718                          if (try_to_reassign(bh, &bh, dev, starting_block))
1719                                  return 4;
1720                  }
1721         return 0;
1722 }
1723 
1724 /* This function tries to generate a new cluster of buffers
1725  * from a new page in memory.  We should only do this if we have
1726  * not expanded the buffer cache to the maximum size that we allow.
1727  */
1728 static unsigned long try_to_generate_cluster(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1729 {
1730         struct buffer_head * bh, * tmp, * arr[MAX_BUF_PER_PAGE];
1731         int isize = BUFSIZE_INDEX(size);
1732         unsigned long offset;
1733         unsigned long page;
1734         int nblock;
1735 
1736         page = get_free_page(GFP_NOBUFFER);
1737         if(!page) return 0;
1738 
1739         bh = create_buffers(page, size);
1740         if (!bh) {
1741                 free_page(page);
1742                 return 0;
1743         };
1744         nblock = block;
1745         for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
1746                 if (find_buffer(dev, nblock++, size))
1747                          goto not_aligned;
1748         }
1749         tmp = bh;
1750         nblock = 0;
1751         while (1) {
1752                 arr[nblock++] = bh;
1753                 bh->b_count = 1;
1754                 bh->b_flushtime = 0;
1755                 bh->b_state = 0;
1756                 bh->b_dev = dev;
1757                 bh->b_list = BUF_CLEAN;
1758                 bh->b_blocknr = block++;
1759                 nr_buffers++;
1760                 nr_buffers_size[isize]++;
1761                 insert_into_queues(bh);
1762                 if (bh->b_this_page)
1763                         bh = bh->b_this_page;
1764                 else
1765                         break;
1766         }
1767         buffermem += PAGE_SIZE;
1768         mem_map[MAP_NR(page)].buffers = bh;
1769         bh->b_this_page = tmp;
1770         while (nblock-- > 0)
1771                 brelse(arr[nblock]);
1772         return 4; /* ?? */
1773 not_aligned:
1774         while ((tmp = bh) != NULL) {
1775                 bh = bh->b_this_page;
1776                 put_unused_buffer_head(tmp);
1777         }
1778         free_page(page);
1779         return 0;
1780 }
1781 
1782 unsigned long generate_cluster(kdev_t dev, int b[], int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1783 {
1784         int i, offset;
1785         
1786         for (i = 0, offset = 0 ; offset < PAGE_SIZE ; i++, offset += size) {
1787                 if(i && b[i]-1 != b[i-1]) return 0;  /* No need to cluster */
1788                 if(find_buffer(dev, b[i], size)) return 0;
1789         };
1790 
1791         /* OK, we have a candidate for a new cluster */
1792         
1793         /* See if one size of buffer is over-represented in the buffer cache,
1794            if so reduce the numbers of buffers */
1795         if(maybe_shrink_lav_buffers(size))
1796          {
1797                  int retval;
1798                  retval = try_to_generate_cluster(dev, b[0], size);
1799                  if(retval) return retval;
1800          };
1801         
1802         if (nr_free_pages > min_free_pages*2) 
1803                  return try_to_generate_cluster(dev, b[0], size);
1804         else
1805                  return reassign_cluster(dev, b[0], size);
1806 }
1807 
1808 
1809 /* ===================== Init ======================= */
1810 
1811 /*
1812  * This initializes the initial buffer free list.  nr_buffers_type is set
1813  * to one less the actual number of buffers, as a sop to backwards
1814  * compatibility --- the old code did this (I think unintentionally,
1815  * but I'm not sure), and programs in the ps package expect it.
1816  *                                      - TYT 8/30/92
1817  */
1818 void buffer_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1819 {
1820         int i;
1821         int isize = BUFSIZE_INDEX(BLOCK_SIZE);
1822         long memsize = MAP_NR(high_memory) << PAGE_SHIFT;
1823 
1824         if (memsize >= 64*1024*1024)
1825                 nr_hash = 65521;
1826         else if (memsize >= 32*1024*1024)
1827                 nr_hash = 32749;
1828         else if (memsize >= 16*1024*1024)
1829                 nr_hash = 16381;
1830         else if (memsize >= 8*1024*1024)
1831                 nr_hash = 8191;
1832         else if (memsize >= 4*1024*1024)
1833                 nr_hash = 4093;
1834         else nr_hash = 997;
1835         
1836         hash_table = (struct buffer_head **) vmalloc(nr_hash * 
1837                                                      sizeof(struct buffer_head *));
1838 
1839 
1840         for (i = 0 ; i < nr_hash ; i++)
1841                 hash_table[i] = NULL;
1842         lru_list[BUF_CLEAN] = 0;
1843         grow_buffers(GFP_KERNEL, BLOCK_SIZE);
1844         if (!free_list[isize])
1845                 panic("VFS: Unable to initialize buffer free list!");
1846         return;
1847 }
1848 
1849 
1850 /* ====================== bdflush support =================== */
1851 
1852 /* This is a simple kernel daemon, whose job it is to provide a dynamic
1853  * response to dirty buffers.  Once this process is activated, we write back
1854  * a limited number of buffers to the disks and then go back to sleep again.
1855  */
1856 struct wait_queue * bdflush_wait = NULL;
1857 struct wait_queue * bdflush_done = NULL;
1858 
1859 static void wakeup_bdflush(int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
1860 {
1861         wake_up(&bdflush_wait);
1862         if (wait) {
1863                 run_task_queue(&tq_disk);
1864                 sleep_on(&bdflush_done);
1865         }
1866 }
1867 
1868 
1869 /* 
1870  * Here we attempt to write back old buffers.  We also try and flush inodes 
1871  * and supers as well, since this function is essentially "update", and 
1872  * otherwise there would be no way of ensuring that these quantities ever 
1873  * get written back.  Ideally, we would have a timestamp on the inodes
1874  * and superblocks so that we could write back only the old ones as well
1875  */
1876 
1877 asmlinkage int sync_old_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1878 {
1879         int i, isize;
1880         int ndirty, nwritten;
1881         int nlist;
1882         int ncount;
1883         struct buffer_head * bh, *next;
1884 
1885         sync_supers(0);
1886         sync_inodes(0);
1887 
1888         ncount = 0;
1889 #ifdef DEBUG
1890         for(nlist = 0; nlist < NR_LIST; nlist++)
1891 #else
1892         for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1893 #endif
1894         {
1895                 ndirty = 0;
1896                 nwritten = 0;
1897         repeat:
1898                 bh = lru_list[nlist];
1899                 if(bh) 
1900                          for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) {
1901                                  /* We may have stalled while waiting for I/O to complete. */
1902                                  if(bh->b_list != nlist) goto repeat;
1903                                  next = bh->b_next_free;
1904                                  if(!lru_list[nlist]) {
1905                                          printk("Dirty list empty %d\n", i);
1906                                          break;
1907                                  }
1908                                  
1909                                  /* Clean buffer on dirty list?  Refile it */
1910                                  if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
1911                                   {
1912                                           refile_buffer(bh);
1913                                           continue;
1914                                   }
1915                                  
1916                                  if (buffer_locked(bh) || !buffer_dirty(bh))
1917                                           continue;
1918                                  ndirty++;
1919                                  if(bh->b_flushtime > jiffies) continue;
1920                                  nwritten++;
1921                                  bh->b_count++;
1922                                  bh->b_flushtime = 0;
1923 #ifdef DEBUG
1924                                  if(nlist != BUF_DIRTY) ncount++;
1925 #endif
1926                                  ll_rw_block(WRITE, 1, &bh);
1927                                  bh->b_count--;
1928                          }
1929         }
1930 #ifdef DEBUG
1931         if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount);
1932         printk("Wrote %d/%d buffers\n", nwritten, ndirty);
1933 #endif
1934         
1935         /* We assume that we only come through here on a regular
1936            schedule, like every 5 seconds.  Now update load averages.  
1937            Shift usage counts to prevent overflow. */
1938         for(isize = 0; isize<NR_SIZES; isize++){
1939                 CALC_LOAD(buffers_lav[isize], bdf_prm.b_un.lav_const, buffer_usage[isize]);
1940                 buffer_usage[isize] = 0;
1941         }
1942         return 0;
1943 }
1944 
1945 
1946 /* This is the interface to bdflush.  As we get more sophisticated, we can
1947  * pass tuning parameters to this "process", to adjust how it behaves. 
1948  * We would want to verify each parameter, however, to make sure that it 
1949  * is reasonable. */
1950 
1951 asmlinkage int sys_bdflush(int func, long data)
     /* [previous][next][first][last][top][bottom][index][help] */
1952 {
1953         int i, error;
1954 
1955         if (!suser())
1956                 return -EPERM;
1957 
1958         if (func == 1)
1959                  return sync_old_buffers();
1960 
1961         /* Basically func 1 means read param 1, 2 means write param 1, etc */
1962         if (func >= 2) {
1963                 i = (func-2) >> 1;
1964                 if (i < 0 || i >= N_PARAM)
1965                         return -EINVAL;
1966                 if((func & 1) == 0) {
1967                         error = verify_area(VERIFY_WRITE, (void *) data, sizeof(int));
1968                         if (error)
1969                                 return error;
1970                         put_user(bdf_prm.data[i], (int*)data);
1971                         return 0;
1972                 };
1973                 if (data < bdflush_min[i] || data > bdflush_max[i])
1974                         return -EINVAL;
1975                 bdf_prm.data[i] = data;
1976                 return 0;
1977         };
1978 
1979         /* Having func 0 used to launch the actual bdflush and then never
1980         return (unless explicitly killed). We return zero here to 
1981         remain semi-compatible with present update(8) programs. */
1982 
1983         return 0;
1984 }
1985 
1986 /* This is the actual bdflush daemon itself. It used to be started from
1987  * the syscall above, but now we launch it ourselves internally with
1988  * kernel_thread(...)  directly after the first thread in init/main.c */
1989 
1990 int bdflush(void * unused) 
     /* [previous][next][first][last][top][bottom][index][help] */
1991 {
1992         int i;
1993         int ndirty;
1994         int nlist;
1995         int ncount;
1996         struct buffer_head * bh, *next;
1997 
1998         /*
1999          *      We have a bare-bones task_struct, and really should fill
2000          *      in a few more things so "top" and /proc/2/{exe,root,cwd}
2001          *      display semi-sane things. Not real crucial though...  
2002          */
2003 
2004         current->session = 1;
2005         current->pgrp = 1;
2006         sprintf(current->comm, "kflushd");
2007 
2008         /*
2009          *      As a kernel thread we want to tamper with system buffers
2010          *      and other internals and thus be subject to the SMP locking
2011          *      rules. (On a uniprocessor box this does nothing).
2012          */
2013          
2014 #ifdef __SMP__
2015         lock_kernel();
2016         syscall_count++;
2017 #endif
2018                  
2019         for (;;) {
2020 #ifdef DEBUG
2021                 printk("bdflush() activated...");
2022 #endif
2023                 
2024                 ncount = 0;
2025 #ifdef DEBUG
2026                 for(nlist = 0; nlist < NR_LIST; nlist++)
2027 #else
2028                 for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
2029 #endif
2030                  {
2031                          ndirty = 0;
2032                  repeat:
2033                          bh = lru_list[nlist];
2034                          if(bh) 
2035                                   for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty; 
2036                                        bh = next) {
2037                                           /* We may have stalled while waiting for I/O to complete. */
2038                                           if(bh->b_list != nlist) goto repeat;
2039                                           next = bh->b_next_free;
2040                                           if(!lru_list[nlist]) {
2041                                                   printk("Dirty list empty %d\n", i);
2042                                                   break;
2043                                           }
2044                                           
2045                                           /* Clean buffer on dirty list?  Refile it */
2046                                           if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
2047                                            {
2048                                                    refile_buffer(bh);
2049                                                    continue;
2050                                            }
2051                                           
2052                                           if (buffer_locked(bh) || !buffer_dirty(bh))
2053                                                    continue;
2054                                           /* Should we write back buffers that are shared or not??
2055                                              currently dirty buffers are not shared, so it does not matter */
2056                                           bh->b_count++;
2057                                           ndirty++;
2058                                           bh->b_flushtime = 0;
2059                                           ll_rw_block(WRITE, 1, &bh);
2060 #ifdef DEBUG
2061                                           if(nlist != BUF_DIRTY) ncount++;
2062 #endif
2063                                           bh->b_count--;
2064                                   }
2065                  }
2066 #ifdef DEBUG
2067                 if (ncount) printk("sys_bdflush: %d dirty buffers not on dirty list\n", ncount);
2068                 printk("sleeping again.\n");
2069 #endif
2070                 run_task_queue(&tq_disk);
2071                 wake_up(&bdflush_done);
2072                 
2073                 /* If there are still a lot of dirty buffers around, skip the sleep
2074                    and flush some more */
2075                 
2076                 if(nr_buffers_type[BUF_DIRTY] <= (nr_buffers - nr_buffers_type[BUF_SHARED]) * 
2077                    bdf_prm.b_un.nfract/100) {
2078                         current->signal = 0;
2079                         interruptible_sleep_on(&bdflush_wait);
2080                 }
2081         }
2082 }
2083 
2084 
2085 /*
2086  * Overrides for Emacs so that we follow Linus's tabbing style.
2087  * Emacs will notice this stuff at the end of the file and automatically
2088  * adjust the settings for this buffer only.  This must remain at the end
2089  * of the file.
2090  * ---------------------------------------------------------------------------
2091  * Local variables:
2092  * c-indent-level: 8
2093  * c-brace-imaginary-offset: 0
2094  * c-brace-offset: -8
2095  * c-argdecl-indent: 8
2096  * c-label-offset: -8
2097  * c-continued-statement-offset: 8
2098  * c-continued-brace-offset: 0
2099  * End:
2100  */

/* [previous][next][first][last][top][bottom][index][help] */