root/fs/buffer.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. __wait_on_buffer
  2. sync_buffers
  3. sync_dev
  4. fsync_dev
  5. sys_sync
  6. file_fsync
  7. sys_fsync
  8. sys_fdatasync
  9. invalidate_buffers
  10. remove_from_hash_queue
  11. remove_from_lru_list
  12. remove_from_free_list
  13. remove_from_queues
  14. put_last_lru
  15. put_last_free
  16. insert_into_queues
  17. find_buffer
  18. get_hash_table
  19. set_blocksize
  20. refill_freelist
  21. getblk
  22. set_writetime
  23. refile_buffer
  24. __brelse
  25. __bforget
  26. bread
  27. breada
  28. put_unused_buffer_head
  29. get_more_buffer_heads
  30. recover_reusable_buffer_heads
  31. get_unused_buffer_head
  32. create_buffers
  33. brw_page
  34. mark_buffer_uptodate
  35. unlock_buffer
  36. generic_readpage
  37. grow_buffers
  38. try_to_free_buffer
  39. age_buffer
  40. maybe_shrink_lav_buffers
  41. shrink_specific_buffers
  42. show_buffers
  43. try_to_reassign
  44. reassign_cluster
  45. try_to_generate_cluster
  46. generate_cluster
  47. buffer_init
  48. wakeup_bdflush
  49. sync_old_buffers
  50. sys_bdflush
  51. bdflush

   1 /*
   2  *  linux/fs/buffer.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  *  'buffer.c' implements the buffer-cache functions. Race-conditions have
   9  * been avoided by NEVER letting an interrupt change a buffer (except for the
  10  * data, of course), but instead letting the caller do it.
  11  */
  12 
  13 /*
  14  * NOTE! There is one discordant note here: checking floppies for
  15  * disk change. This is where it fits best, I think, as it should
  16  * invalidate changed floppy-disk-caches.
  17  */
  18  
  19 /* Some bdflush() changes for the dynamic ramdisk - Paul Gortmaker, 12/94 */
  20 
  21 #include <linux/sched.h>
  22 #include <linux/kernel.h>
  23 #include <linux/major.h>
  24 #include <linux/string.h>
  25 #include <linux/locks.h>
  26 #include <linux/errno.h>
  27 #include <linux/malloc.h>
  28 #include <linux/pagemap.h>
  29 #include <linux/swap.h>
  30 #include <linux/swapctl.h>
  31 #include <linux/smp.h>
  32 #include <linux/smp_lock.h>
  33 
  34 #include <asm/system.h>
  35 #include <asm/segment.h>
  36 #include <asm/io.h>
  37 
  38 #define NR_SIZES 5
  39 static char buffersize_index[17] =
  40 {-1,  0,  1, -1,  2, -1, -1, -1, 3, -1, -1, -1, -1, -1, -1, -1, 4};
  41 static short int bufferindex_size[NR_SIZES] = {512, 1024, 2048, 4096, 8192};
  42 
  43 #define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9])
  44 #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
  45 
  46 static int grow_buffers(int pri, int size);
  47 static int shrink_specific_buffers(unsigned int priority, int size);
  48 static int maybe_shrink_lav_buffers(int);
  49 
  50 static int nr_hash = 0;  /* Size of hash table */
  51 static struct buffer_head ** hash_table;
  52 struct buffer_head ** buffer_pages;
  53 static struct buffer_head * lru_list[NR_LIST] = {NULL, };
  54 /* next_to_age is an array of pointers into the lru lists, used to
  55    cycle through the buffers aging their contents when deciding which
  56    buffers to discard when more memory is needed */
  57 static struct buffer_head * next_to_age[NR_LIST] = {NULL, };
  58 static struct buffer_head * free_list[NR_SIZES] = {NULL, };
  59 static struct buffer_head * unused_list = NULL;
  60 struct buffer_head * reuse_list = NULL;
  61 static struct wait_queue * buffer_wait = NULL;
  62 
  63 int nr_buffers = 0;
  64 int nr_buffers_type[NR_LIST] = {0,};
  65 int nr_buffers_size[NR_SIZES] = {0,};
  66 int nr_buffers_st[NR_SIZES][NR_LIST] = {{0,},};
  67 int buffer_usage[NR_SIZES] = {0,};  /* Usage counts used to determine load average */
  68 int buffers_lav[NR_SIZES] = {0,};  /* Load average of buffer usage */
  69 int nr_free[NR_SIZES] = {0,};
  70 int buffermem = 0;
  71 int nr_buffer_heads = 0;
  72 extern int *blksize_size[];
  73 
  74 /* Here is the parameter block for the bdflush process. */
  75 static void wakeup_bdflush(int);
  76 
  77 #define N_PARAM 9
  78 #define LAV
  79 
  80 static union bdflush_param{
  81         struct {
  82                 int nfract;  /* Percentage of buffer cache dirty to 
  83                                 activate bdflush */
  84                 int ndirty;  /* Maximum number of dirty blocks to write out per
  85                                 wake-cycle */
  86                 int nrefill; /* Number of clean buffers to try and obtain
  87                                 each time we call refill */
  88                 int nref_dirt; /* Dirty buffer threshold for activating bdflush
  89                                   when trying to refill buffers. */
  90                 int clu_nfract;  /* Percentage of buffer cache to scan to 
  91                                     search for free clusters */
  92                 int age_buffer;  /* Time for normal buffer to age before 
  93                                     we flush it */
  94                 int age_super;  /* Time for superblock to age before we 
  95                                    flush it */
  96                 int lav_const;  /* Constant used for load average (time
  97                                    constant */
  98                 int lav_ratio;  /* Used to determine how low a lav for a
  99                                    particular size can go before we start to
 100                                    trim back the buffers */
 101         } b_un;
 102         unsigned int data[N_PARAM];
 103 } bdf_prm = {{60, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}};
 104 
 105 /* The lav constant is set for 1 minute, as long as the update process runs
 106    every 5 seconds.  If you change the frequency of update, the time
 107    constant will also change. */
 108 
 109 
 110 /* These are the min and max parameter values that we will allow to be assigned */
 111 static int bdflush_min[N_PARAM] = {  0,  10,    5,   25,  0,   100,   100, 1, 1};
 112 static int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 60000, 60000, 2047, 5};
 113 
 114 /*
 115  * Rewrote the wait-routines to use the "new" wait-queue functionality,
 116  * and getting rid of the cli-sti pairs. The wait-queue routines still
 117  * need cli-sti, but now it's just a couple of 386 instructions or so.
 118  *
 119  * Note that the real wait_on_buffer() is an inline function that checks
 120  * if 'b_wait' is set before calling this, so that the queues aren't set
 121  * up unnecessarily.
 122  */
 123 void __wait_on_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 124 {
 125         struct wait_queue wait = { current, NULL };
 126 
 127         bh->b_count++;
 128         add_wait_queue(&bh->b_wait, &wait);
 129 repeat:
 130         current->state = TASK_UNINTERRUPTIBLE;
 131         if (buffer_locked(bh)) {
 132                 schedule();
 133                 goto repeat;
 134         }
 135         remove_wait_queue(&bh->b_wait, &wait);
 136         bh->b_count--;
 137         current->state = TASK_RUNNING;
 138 }
 139 
 140 /* Call sync_buffers with wait!=0 to ensure that the call does not
 141    return until all buffer writes have completed.  Sync() may return
 142    before the writes have finished; fsync() may not. */
 143 
 144 
 145 /* Godamity-damn.  Some buffers (bitmaps for filesystems)
 146    spontaneously dirty themselves without ever brelse being called.
 147    We will ultimately want to put these in a separate list, but for
 148    now we search all of the lists for dirty buffers */
 149 
 150 static int sync_buffers(kdev_t dev, int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 151 {
 152         int i, retry, pass = 0, err = 0;
 153         int nlist, ncount;
 154         struct buffer_head * bh, *next;
 155 
 156         /* One pass for no-wait, three for wait:
 157            0) write out all dirty, unlocked buffers;
 158            1) write out all dirty buffers, waiting if locked;
 159            2) wait for completion by waiting for all buffers to unlock. */
 160  repeat:
 161         retry = 0;
 162  repeat2:
 163         ncount = 0;
 164         /* We search all lists as a failsafe mechanism, not because we expect
 165            there to be dirty buffers on any of the other lists. */
 166         for(nlist = 0; nlist < NR_LIST; nlist++)
 167          {
 168          repeat1:
 169                  bh = lru_list[nlist];
 170                  if(!bh) continue;
 171                  for (i = nr_buffers_type[nlist]*2 ; i-- > 0 ; bh = next) {
 172                          if(bh->b_list != nlist) goto repeat1;
 173                          next = bh->b_next_free;
 174                          if(!lru_list[nlist]) break;
 175                          if (dev && bh->b_dev != dev)
 176                                   continue;
 177                          if (buffer_locked(bh))
 178                           {
 179                                   /* Buffer is locked; skip it unless wait is
 180                                      requested AND pass > 0. */
 181                                   if (!wait || !pass) {
 182                                           retry = 1;
 183                                           continue;
 184                                   }
 185                                   wait_on_buffer (bh);
 186                                   goto repeat2;
 187                           }
 188                          /* If an unlocked buffer is not uptodate, there has
 189                              been an IO error. Skip it. */
 190                          if (wait && buffer_req(bh) && !buffer_locked(bh) &&
 191                              !buffer_dirty(bh) && !buffer_uptodate(bh)) {
 192                                   err = 1;
 193                                   continue;
 194                           }
 195                          /* Don't write clean buffers.  Don't write ANY buffers
 196                             on the third pass. */
 197                          if (!buffer_dirty(bh) || pass>=2)
 198                                   continue;
 199                          /* don't bother about locked buffers */
 200                          if (buffer_locked(bh))
 201                                  continue;
 202                          bh->b_count++;
 203                          bh->b_flushtime = 0;
 204                          ll_rw_block(WRITE, 1, &bh);
 205 
 206                          if(nlist != BUF_DIRTY) { 
 207                                  printk("[%d %s %ld] ", nlist,
 208                                         kdevname(bh->b_dev), bh->b_blocknr);
 209                                  ncount++;
 210                          };
 211                          bh->b_count--;
 212                          retry = 1;
 213                  }
 214          }
 215         if (ncount)
 216           printk("sys_sync: %d dirty buffers not on dirty list\n", ncount);
 217         
 218         /* If we are waiting for the sync to succeed, and if any dirty
 219            blocks were written, then repeat; on the second pass, only
 220            wait for buffers being written (do not pass to write any
 221            more buffers on the second pass). */
 222         if (wait && retry && ++pass<=2)
 223                  goto repeat;
 224         return err;
 225 }
 226 
 227 void sync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 228 {
 229         sync_buffers(dev, 0);
 230         sync_supers(dev);
 231         sync_inodes(dev);
 232         sync_buffers(dev, 0);
 233         sync_dquots(dev, -1);
 234 }
 235 
 236 int fsync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 237 {
 238         sync_buffers(dev, 0);
 239         sync_supers(dev);
 240         sync_inodes(dev);
 241         sync_dquots(dev, -1);
 242         return sync_buffers(dev, 1);
 243 }
 244 
 245 asmlinkage int sys_sync(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 246 {
 247         fsync_dev(0);
 248         return 0;
 249 }
 250 
 251 int file_fsync (struct inode *inode, struct file *filp)
     /* [previous][next][first][last][top][bottom][index][help] */
 252 {
 253         return fsync_dev(inode->i_dev);
 254 }
 255 
 256 asmlinkage int sys_fsync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 257 {
 258         struct file * file;
 259         struct inode * inode;
 260 
 261         if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
 262                 return -EBADF;
 263         if (!file->f_op || !file->f_op->fsync)
 264                 return -EINVAL;
 265         if (file->f_op->fsync(inode,file))
 266                 return -EIO;
 267         return 0;
 268 }
 269 
 270 asmlinkage int sys_fdatasync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 271 {
 272         struct file * file;
 273         struct inode * inode;
 274 
 275         if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
 276                 return -EBADF;
 277         if (!file->f_op || !file->f_op->fsync)
 278                 return -EINVAL;
 279         /* this needs further work, at the moment it is identical to fsync() */
 280         if (file->f_op->fsync(inode,file))
 281                 return -EIO;
 282         return 0;
 283 }
 284 
 285 void invalidate_buffers(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 286 {
 287         int i;
 288         int nlist;
 289         struct buffer_head * bh;
 290 
 291         for(nlist = 0; nlist < NR_LIST; nlist++) {
 292                 bh = lru_list[nlist];
 293                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bh->b_next_free) {
 294                         if (bh->b_dev != dev)
 295                                 continue;
 296                         wait_on_buffer(bh);
 297                         if (bh->b_dev != dev)
 298                                 continue;
 299                         if (bh->b_count)
 300                                 continue;
 301                         bh->b_flushtime = 0;
 302                         clear_bit(BH_Protected, &bh->b_state);
 303                         clear_bit(BH_Uptodate, &bh->b_state);
 304                         clear_bit(BH_Dirty, &bh->b_state);
 305                         clear_bit(BH_Req, &bh->b_state);
 306                 }
 307         }
 308 }
 309 
 310 #define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block))%nr_hash)
 311 #define hash(dev,block) hash_table[_hashfn(dev,block)]
 312 
 313 static inline void remove_from_hash_queue(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 314 {
 315         if (bh->b_next)
 316                 bh->b_next->b_prev = bh->b_prev;
 317         if (bh->b_prev)
 318                 bh->b_prev->b_next = bh->b_next;
 319         if (hash(bh->b_dev,bh->b_blocknr) == bh)
 320                 hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
 321         bh->b_next = bh->b_prev = NULL;
 322 }
 323 
 324 static inline void remove_from_lru_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 325 {
 326         if (!(bh->b_prev_free) || !(bh->b_next_free))
 327                 panic("VFS: LRU block list corrupted");
 328         if (bh->b_dev == B_FREE)
 329                 panic("LRU list corrupted");
 330         bh->b_prev_free->b_next_free = bh->b_next_free;
 331         bh->b_next_free->b_prev_free = bh->b_prev_free;
 332 
 333         if (lru_list[bh->b_list] == bh)
 334                  lru_list[bh->b_list] = bh->b_next_free;
 335         if (lru_list[bh->b_list] == bh)
 336                  lru_list[bh->b_list] = NULL;
 337         if (next_to_age[bh->b_list] == bh)
 338                 next_to_age[bh->b_list] = bh->b_next_free;
 339         if (next_to_age[bh->b_list] == bh)
 340                 next_to_age[bh->b_list] = NULL;
 341 
 342         bh->b_next_free = bh->b_prev_free = NULL;
 343 }
 344 
 345 static inline void remove_from_free_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 346 {
 347         int isize = BUFSIZE_INDEX(bh->b_size);
 348         if (!(bh->b_prev_free) || !(bh->b_next_free))
 349                 panic("VFS: Free block list corrupted");
 350         if(bh->b_dev != B_FREE)
 351                 panic("Free list corrupted");
 352         if(!free_list[isize])
 353                 panic("Free list empty");
 354         nr_free[isize]--;
 355         if(bh->b_next_free == bh)
 356                  free_list[isize] = NULL;
 357         else {
 358                 bh->b_prev_free->b_next_free = bh->b_next_free;
 359                 bh->b_next_free->b_prev_free = bh->b_prev_free;
 360                 if (free_list[isize] == bh)
 361                          free_list[isize] = bh->b_next_free;
 362         };
 363         bh->b_next_free = bh->b_prev_free = NULL;
 364 }
 365 
 366 static inline void remove_from_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 367 {
 368         if(bh->b_dev == B_FREE) {
 369                 remove_from_free_list(bh); /* Free list entries should not be
 370                                               in the hash queue */
 371                 return;
 372         };
 373         nr_buffers_type[bh->b_list]--;
 374         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]--;
 375         remove_from_hash_queue(bh);
 376         remove_from_lru_list(bh);
 377 }
 378 
 379 static inline void put_last_lru(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 380 {
 381         if (!bh)
 382                 return;
 383         if (bh == lru_list[bh->b_list]) {
 384                 lru_list[bh->b_list] = bh->b_next_free;
 385                 if (next_to_age[bh->b_list] == bh)
 386                         next_to_age[bh->b_list] = bh->b_next_free;
 387                 return;
 388         }
 389         if(bh->b_dev == B_FREE)
 390                 panic("Wrong block for lru list");
 391         remove_from_lru_list(bh);
 392 /* add to back of free list */
 393 
 394         if(!lru_list[bh->b_list]) {
 395                 lru_list[bh->b_list] = bh;
 396                 lru_list[bh->b_list]->b_prev_free = bh;
 397         };
 398         if (!next_to_age[bh->b_list])
 399                 next_to_age[bh->b_list] = bh;
 400 
 401         bh->b_next_free = lru_list[bh->b_list];
 402         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 403         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 404         lru_list[bh->b_list]->b_prev_free = bh;
 405 }
 406 
 407 static inline void put_last_free(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 408 {
 409         int isize;
 410         if (!bh)
 411                 return;
 412 
 413         isize = BUFSIZE_INDEX(bh->b_size);      
 414         bh->b_dev = B_FREE;  /* So it is obvious we are on the free list */
 415         /* add to back of free list */
 416         if(!free_list[isize]) {
 417                 free_list[isize] = bh;
 418                 bh->b_prev_free = bh;
 419         };
 420 
 421         nr_free[isize]++;
 422         bh->b_next_free = free_list[isize];
 423         bh->b_prev_free = free_list[isize]->b_prev_free;
 424         free_list[isize]->b_prev_free->b_next_free = bh;
 425         free_list[isize]->b_prev_free = bh;
 426 }
 427 
 428 static inline void insert_into_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 429 {
 430         /* put at end of free list */
 431         if(bh->b_dev == B_FREE) {
 432                 put_last_free(bh);
 433                 return;
 434         }
 435         if(!lru_list[bh->b_list]) {
 436                 lru_list[bh->b_list] = bh;
 437                 bh->b_prev_free = bh;
 438         }
 439         if (!next_to_age[bh->b_list])
 440                 next_to_age[bh->b_list] = bh;
 441         if (bh->b_next_free) panic("VFS: buffer LRU pointers corrupted");
 442         bh->b_next_free = lru_list[bh->b_list];
 443         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 444         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 445         lru_list[bh->b_list]->b_prev_free = bh;
 446         nr_buffers_type[bh->b_list]++;
 447         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]++;
 448 /* put the buffer in new hash-queue if it has a device */
 449         bh->b_prev = NULL;
 450         bh->b_next = NULL;
 451         if (!(bh->b_dev))
 452                 return;
 453         bh->b_next = hash(bh->b_dev,bh->b_blocknr);
 454         hash(bh->b_dev,bh->b_blocknr) = bh;
 455         if (bh->b_next)
 456                 bh->b_next->b_prev = bh;
 457 }
 458 
 459 static inline struct buffer_head * find_buffer(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 460 {               
 461         struct buffer_head * tmp;
 462 
 463         for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
 464                 if (tmp->b_blocknr == block && tmp->b_dev == dev)
 465                         if (tmp->b_size == size)
 466                                 return tmp;
 467                         else {
 468                                 printk("VFS: Wrong blocksize on device %s\n",
 469                                         kdevname(dev));
 470                                 return NULL;
 471                         }
 472         return NULL;
 473 }
 474 
 475 /*
 476  * Why like this, I hear you say... The reason is race-conditions.
 477  * As we don't lock buffers (unless we are reading them, that is),
 478  * something might happen to it while we sleep (ie a read-error
 479  * will force it bad). This shouldn't really happen currently, but
 480  * the code is ready.
 481  */
 482 struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 483 {
 484         struct buffer_head * bh;
 485 
 486         for (;;) {
 487                 if (!(bh=find_buffer(dev,block,size)))
 488                         return NULL;
 489                 bh->b_count++;
 490                 wait_on_buffer(bh);
 491                 if (bh->b_dev == dev && bh->b_blocknr == block
 492                                              && bh->b_size == size)
 493                         return bh;
 494                 bh->b_count--;
 495         }
 496 }
 497 
 498 void set_blocksize(kdev_t dev, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 499 {
 500         int i, nlist;
 501         struct buffer_head * bh, *bhnext;
 502 
 503         if (!blksize_size[MAJOR(dev)])
 504                 return;
 505 
 506         if (size > PAGE_SIZE)
 507                 size = 0;
 508 
 509         switch(size) {
 510                 default: panic("Invalid blocksize passed to set_blocksize");
 511                 case 512: case 1024: case 2048: case 4096: case 8192: ;
 512         }
 513 
 514         if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
 515                 blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 516                 return;
 517         }
 518         if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
 519                 return;
 520         sync_buffers(dev, 2);
 521         blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 522 
 523   /* We need to be quite careful how we do this - we are moving entries
 524      around on the free list, and we can get in a loop if we are not careful.*/
 525 
 526         for(nlist = 0; nlist < NR_LIST; nlist++) {
 527                 bh = lru_list[nlist];
 528                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) {
 529                         if(!bh) break;
 530                         bhnext = bh->b_next_free; 
 531                         if (bh->b_dev != dev)
 532                                  continue;
 533                         if (bh->b_size == size)
 534                                  continue;
 535                         
 536                         wait_on_buffer(bh);
 537                         if (bh->b_dev == dev && bh->b_size != size) {
 538                                 clear_bit(BH_Dirty, &bh->b_state);
 539                                 clear_bit(BH_Uptodate, &bh->b_state);
 540                                 clear_bit(BH_Req, &bh->b_state);
 541                                 bh->b_flushtime = 0;
 542                         }
 543                         remove_from_hash_queue(bh);
 544                 }
 545         }
 546 }
 547 
 548 #define BADNESS(bh) (buffer_dirty(bh) || buffer_locked(bh))
 549 
 550 void refill_freelist(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 551 {
 552         struct buffer_head * bh, * tmp;
 553         struct buffer_head * candidate[NR_LIST];
 554         unsigned int best_time, winner;
 555         int isize = BUFSIZE_INDEX(size);
 556         int buffers[NR_LIST];
 557         int i;
 558         int needed;
 559 
 560         /* First see if we even need this.  Sometimes it is advantageous
 561          to request some blocks in a filesystem that we know that we will
 562          be needing ahead of time. */
 563 
 564         if (nr_free[isize] > 100)
 565                 return;
 566 
 567         /* If there are too many dirty buffers, we wake up the update process
 568            now so as to ensure that there are still clean buffers available
 569            for user processes to use (and dirty) */
 570         
 571         /* We are going to try and locate this much memory */
 572         needed =bdf_prm.b_un.nrefill * size;  
 573 
 574         while (nr_free_pages > min_free_pages*2 && needed > 0 &&
 575                grow_buffers(GFP_BUFFER, size)) {
 576                 needed -= PAGE_SIZE;
 577         }
 578 
 579         if(needed <= 0) return;
 580 
 581         /* See if there are too many buffers of a different size.
 582            If so, victimize them */
 583 
 584         while(maybe_shrink_lav_buffers(size))
 585          {
 586                  if(!grow_buffers(GFP_BUFFER, size)) break;
 587                  needed -= PAGE_SIZE;
 588                  if(needed <= 0) return;
 589          };
 590 
 591         /* OK, we cannot grow the buffer cache, now try and get some
 592            from the lru list */
 593 
 594         /* First set the candidate pointers to usable buffers.  This
 595            should be quick nearly all of the time. */
 596 
 597 repeat0:
 598         for(i=0; i<NR_LIST; i++){
 599                 if(i == BUF_DIRTY || i == BUF_SHARED || 
 600                    nr_buffers_type[i] == 0) {
 601                         candidate[i] = NULL;
 602                         buffers[i] = 0;
 603                         continue;
 604                 }
 605                 buffers[i] = nr_buffers_type[i];
 606                 for (bh = lru_list[i]; buffers[i] > 0; bh = tmp, buffers[i]--)
 607                  {
 608                          if(buffers[i] < 0) panic("Here is the problem");
 609                          tmp = bh->b_next_free;
 610                          if (!bh) break;
 611                          
 612                          if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 613                              buffer_dirty(bh)) {
 614                                  refile_buffer(bh);
 615                                  continue;
 616                          }
 617                          
 618                          if (bh->b_count || buffer_protected(bh) || bh->b_size != size)
 619                                   continue;
 620                          
 621                          /* Buffers are written in the order they are placed 
 622                             on the locked list. If we encounter a locked
 623                             buffer here, this means that the rest of them
 624                             are also locked */
 625                          if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 626                                  buffers[i] = 0;
 627                                  break;
 628                          }
 629                          
 630                          if (BADNESS(bh)) continue;
 631                          break;
 632                  };
 633                 if(!buffers[i]) candidate[i] = NULL; /* Nothing on this list */
 634                 else candidate[i] = bh;
 635                 if(candidate[i] && candidate[i]->b_count) panic("Here is the problem");
 636         }
 637         
 638  repeat:
 639         if(needed <= 0) return;
 640         
 641         /* Now see which candidate wins the election */
 642         
 643         winner = best_time = UINT_MAX;  
 644         for(i=0; i<NR_LIST; i++){
 645                 if(!candidate[i]) continue;
 646                 if(candidate[i]->b_lru_time < best_time){
 647                         best_time = candidate[i]->b_lru_time;
 648                         winner = i;
 649                 }
 650         }
 651         
 652         /* If we have a winner, use it, and then get a new candidate from that list */
 653         if(winner != UINT_MAX) {
 654                 i = winner;
 655                 bh = candidate[i];
 656                 candidate[i] = bh->b_next_free;
 657                 if(candidate[i] == bh) candidate[i] = NULL;  /* Got last one */
 658                 if (bh->b_count || bh->b_size != size)
 659                          panic("Busy buffer in candidate list\n");
 660                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1)
 661                          panic("Shared buffer in candidate list\n");
 662                 if (buffer_protected(bh))
 663                         panic("Protected buffer in candidate list\n");
 664                 if (BADNESS(bh)) panic("Buffer in candidate list with BADNESS != 0\n");
 665                 
 666                 if(bh->b_dev == B_FREE)
 667                         panic("Wrong list");
 668                 remove_from_queues(bh);
 669                 bh->b_dev = B_FREE;
 670                 put_last_free(bh);
 671                 needed -= bh->b_size;
 672                 buffers[i]--;
 673                 if(buffers[i] < 0) panic("Here is the problem");
 674                 
 675                 if(buffers[i] == 0) candidate[i] = NULL;
 676                 
 677                 /* Now all we need to do is advance the candidate pointer
 678                    from the winner list to the next usable buffer */
 679                 if(candidate[i] && buffers[i] > 0){
 680                         if(buffers[i] <= 0) panic("Here is another problem");
 681                         for (bh = candidate[i]; buffers[i] > 0; bh = tmp, buffers[i]--) {
 682                                 if(buffers[i] < 0) panic("Here is the problem");
 683                                 tmp = bh->b_next_free;
 684                                 if (!bh) break;
 685                                 
 686                                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 687                                     buffer_dirty(bh)) {
 688                                         refile_buffer(bh);
 689                                         continue;
 690                                 };
 691                                 
 692                                 if (bh->b_count || buffer_protected(bh) || bh->b_size != size)
 693                                          continue;
 694                                 
 695                                 /* Buffers are written in the order they are
 696                                    placed on the locked list.  If we encounter
 697                                    a locked buffer here, this means that the
 698                                    rest of them are also locked */
 699                                 if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 700                                         buffers[i] = 0;
 701                                         break;
 702                                 }
 703               
 704                                 if (BADNESS(bh)) continue;
 705                                 break;
 706                         };
 707                         if(!buffers[i]) candidate[i] = NULL; /* Nothing here */
 708                         else candidate[i] = bh;
 709                         if(candidate[i] && candidate[i]->b_count) 
 710                                  panic("Here is the problem");
 711                 }
 712                 
 713                 goto repeat;
 714         }
 715         
 716         if(needed <= 0) return;
 717         
 718         /* Too bad, that was not enough. Try a little harder to grow some. */
 719         
 720         if (nr_free_pages > min_free_pages + 5) {
 721                 if (grow_buffers(GFP_BUFFER, size)) {
 722                         needed -= PAGE_SIZE;
 723                         goto repeat0;
 724                 };
 725         }
 726         
 727         /* and repeat until we find something good */
 728         if (!grow_buffers(GFP_ATOMIC, size))
 729                 wakeup_bdflush(1);
 730         needed -= PAGE_SIZE;
 731         goto repeat0;
 732 }
 733 
 734 /*
 735  * Ok, this is getblk, and it isn't very clear, again to hinder
 736  * race-conditions. Most of the code is seldom used, (ie repeating),
 737  * so it should be much more efficient than it looks.
 738  *
 739  * The algorithm is changed: hopefully better, and an elusive bug removed.
 740  *
 741  * 14.02.92: changed it to sync dirty buffers a bit: better performance
 742  * when the filesystem starts to get full of dirty blocks (I hope).
 743  */
 744 struct buffer_head * getblk(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 745 {
 746         struct buffer_head * bh;
 747         int isize = BUFSIZE_INDEX(size);
 748 
 749         /* Update this for the buffer size lav. */
 750         buffer_usage[isize]++;
 751 
 752         /* If there are too many dirty buffers, we wake up the update process
 753            now so as to ensure that there are still clean buffers available
 754            for user processes to use (and dirty) */
 755 repeat:
 756         bh = get_hash_table(dev, block, size);
 757         if (bh) {
 758                 if (!buffer_dirty(bh)) {
 759                         if (buffer_uptodate(bh))
 760                                  put_last_lru(bh);
 761                         bh->b_flushtime = 0;
 762                 }
 763                 set_bit(BH_Touched, &bh->b_state);
 764                 return bh;
 765         }
 766 
 767         while(!free_list[isize]) refill_freelist(size);
 768         
 769         if (find_buffer(dev,block,size))
 770                  goto repeat;
 771 
 772         bh = free_list[isize];
 773         remove_from_free_list(bh);
 774 
 775 /* OK, FINALLY we know that this buffer is the only one of its kind, */
 776 /* and that it's unused (b_count=0), unlocked (buffer_locked=0), and clean */
 777         bh->b_count=1;
 778         bh->b_flushtime=0;
 779         bh->b_state=(1<<BH_Touched);
 780         bh->b_dev=dev;
 781         bh->b_blocknr=block;
 782         insert_into_queues(bh);
 783         return bh;
 784 }
 785 
 786 void set_writetime(struct buffer_head * buf, int flag)
     /* [previous][next][first][last][top][bottom][index][help] */
 787 {
 788         int newtime;
 789 
 790         if (buffer_dirty(buf)) {
 791                 /* Move buffer to dirty list if jiffies is clear */
 792                 newtime = jiffies + (flag ? bdf_prm.b_un.age_super : 
 793                                      bdf_prm.b_un.age_buffer);
 794                 if(!buf->b_flushtime || buf->b_flushtime > newtime)
 795                          buf->b_flushtime = newtime;
 796         } else {
 797                 buf->b_flushtime = 0;
 798         }
 799 }
 800 
 801 
 802 void refile_buffer(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 803 {
 804         int dispose;
 805 
 806         if(buf->b_dev == B_FREE) {
 807                 printk("Attempt to refile free buffer\n");
 808                 return;
 809         }
 810         if (buffer_dirty(buf))
 811                 dispose = BUF_DIRTY;
 812         else if ((mem_map[MAP_NR((unsigned long) buf->b_data)].count > 1) || buffer_protected(buf))
 813                 dispose = BUF_SHARED;
 814         else if (buffer_locked(buf))
 815                 dispose = BUF_LOCKED;
 816         else if (buf->b_list == BUF_SHARED)
 817                 dispose = BUF_UNSHARED;
 818         else
 819                 dispose = BUF_CLEAN;
 820         if(dispose == BUF_CLEAN) buf->b_lru_time = jiffies;
 821         if(dispose != buf->b_list)  {
 822                 if(dispose == BUF_DIRTY || dispose == BUF_UNSHARED)
 823                          buf->b_lru_time = jiffies;
 824                 if(dispose == BUF_LOCKED && 
 825                    (buf->b_flushtime - buf->b_lru_time) <= bdf_prm.b_un.age_super)
 826                          dispose = BUF_LOCKED1;
 827                 remove_from_queues(buf);
 828                 buf->b_list = dispose;
 829                 insert_into_queues(buf);
 830                 if(dispose == BUF_DIRTY && nr_buffers_type[BUF_DIRTY] > 
 831                    (nr_buffers - nr_buffers_type[BUF_SHARED]) *
 832                    bdf_prm.b_un.nfract/100)
 833                          wakeup_bdflush(0);
 834         }
 835 }
 836 
 837 /*
 838  * Release a buffer head
 839  */
 840 void __brelse(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 841 {
 842         wait_on_buffer(buf);
 843 
 844         /* If dirty, mark the time this buffer should be written back */
 845         set_writetime(buf, 0);
 846         refile_buffer(buf);
 847 
 848         if (buf->b_count) {
 849                 if (!--buf->b_count)
 850                         wake_up(&buffer_wait);
 851                 return;
 852         }
 853         printk("VFS: brelse: Trying to free free buffer\n");
 854 }
 855 
 856 /*
 857  * bforget() is like brelse(), except it removes the buffer
 858  * from the hash-queues (so that it won't be re-used if it's
 859  * shared).
 860  */
 861 void __bforget(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 862 {
 863         wait_on_buffer(buf);
 864         mark_buffer_clean(buf);
 865         clear_bit(BH_Protected, &buf->b_state);
 866         buf->b_count--;
 867         remove_from_hash_queue(buf);
 868         buf->b_dev = NODEV;
 869         refile_buffer(buf);
 870         wake_up(&buffer_wait);
 871 }
 872 
 873 /*
 874  * bread() reads a specified block and returns the buffer that contains
 875  * it. It returns NULL if the block was unreadable.
 876  */
 877 struct buffer_head * bread(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 878 {
 879         struct buffer_head * bh;
 880 
 881         if (!(bh = getblk(dev, block, size))) {
 882                 printk("VFS: bread: READ error on device %s\n",
 883                         kdevname(dev));
 884                 return NULL;
 885         }
 886         if (buffer_uptodate(bh))
 887                 return bh;
 888         ll_rw_block(READ, 1, &bh);
 889         wait_on_buffer(bh);
 890         if (buffer_uptodate(bh))
 891                 return bh;
 892         brelse(bh);
 893         return NULL;
 894 }
 895 
 896 /*
 897  * Ok, breada can be used as bread, but additionally to mark other
 898  * blocks for reading as well. End the argument list with a negative
 899  * number.
 900  */
 901 
 902 #define NBUF 16
 903 
 904 struct buffer_head * breada(kdev_t dev, int block, int bufsize,
     /* [previous][next][first][last][top][bottom][index][help] */
 905         unsigned int pos, unsigned int filesize)
 906 {
 907         struct buffer_head * bhlist[NBUF];
 908         unsigned int blocks;
 909         struct buffer_head * bh;
 910         int index;
 911         int i, j;
 912 
 913         if (pos >= filesize)
 914                 return NULL;
 915 
 916         if (block < 0 || !(bh = getblk(dev,block,bufsize)))
 917                 return NULL;
 918 
 919         index = BUFSIZE_INDEX(bh->b_size);
 920 
 921         if (buffer_uptodate(bh))
 922                 return bh;
 923 
 924         blocks = ((filesize & (bufsize - 1)) - (pos & (bufsize - 1))) >> (9+index);
 925 
 926         if (blocks > (read_ahead[MAJOR(dev)] >> index))
 927                 blocks = read_ahead[MAJOR(dev)] >> index;
 928         if (blocks > NBUF)
 929                 blocks = NBUF;
 930         
 931         bhlist[0] = bh;
 932         j = 1;
 933         for(i=1; i<blocks; i++) {
 934                 bh = getblk(dev,block+i,bufsize);
 935                 if (buffer_uptodate(bh)) {
 936                         brelse(bh);
 937                         break;
 938                 }
 939                 bhlist[j++] = bh;
 940         }
 941 
 942         /* Request the read for these buffers, and then release them */
 943         ll_rw_block(READ, j, bhlist);
 944 
 945         for(i=1; i<j; i++)
 946                 brelse(bhlist[i]);
 947 
 948         /* Wait for this buffer, and then continue on */
 949         bh = bhlist[0];
 950         wait_on_buffer(bh);
 951         if (buffer_uptodate(bh))
 952                 return bh;
 953         brelse(bh);
 954         return NULL;
 955 }
 956 
 957 /*
 958  * See fs/inode.c for the weird use of volatile..
 959  */
 960 static void put_unused_buffer_head(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 961 {
 962         struct wait_queue * wait;
 963 
 964         wait = ((volatile struct buffer_head *) bh)->b_wait;
 965         memset(bh,0,sizeof(*bh));
 966         ((volatile struct buffer_head *) bh)->b_wait = wait;
 967         bh->b_next_free = unused_list;
 968         unused_list = bh;
 969 }
 970 
 971 static void get_more_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 972 {
 973         int i;
 974         struct buffer_head * bh;
 975 
 976         if (unused_list)
 977                 return;
 978 
 979         if (!(bh = (struct buffer_head*) get_free_page(GFP_KERNEL)))
 980                 return;
 981 
 982         for (nr_buffer_heads+=i=PAGE_SIZE/sizeof*bh ; i>0; i--) {
 983                 bh->b_next_free = unused_list;  /* only make link */
 984                 unused_list = bh++;
 985         }
 986 }
 987 
 988 /* 
 989  * We can't put completed temporary IO buffer_heads directly onto the
 990  * unused_list when they become unlocked, since the device driver
 991  * end_request routines still expect access to the buffer_head's
 992  * fields after the final unlock.  So, the device driver puts them on
 993  * the reuse_list instead once IO completes, and we recover these to
 994  * the unused_list here.
 995  *
 996  * The reuse_list receives buffers from interrupt routines, so we need
 997  * to be IRQ-safe here.
 998  */
 999 static inline void recover_reusable_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1000 {
1001         struct buffer_head *bh;
1002         unsigned long flags;
1003         
1004         save_flags(flags);
1005         while (reuse_list) {
1006                 cli();
1007                 bh = reuse_list;
1008                 reuse_list = bh->b_next_free;
1009                 restore_flags(flags);
1010                 put_unused_buffer_head(bh);
1011         }
1012 }
1013 
1014 static struct buffer_head * get_unused_buffer_head(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1015 {
1016         struct buffer_head * bh;
1017 
1018         recover_reusable_buffer_heads();
1019         get_more_buffer_heads();
1020         if (!unused_list)
1021                 return NULL;
1022         bh = unused_list;
1023         unused_list = bh->b_next_free;
1024         bh->b_next_free = NULL;
1025         bh->b_data = NULL;
1026         bh->b_size = 0;
1027         bh->b_state = 0;
1028         return bh;
1029 }
1030 
1031 /*
1032  * Create the appropriate buffers when given a page for data area and
1033  * the size of each buffer.. Use the bh->b_this_page linked list to
1034  * follow the buffers created.  Return NULL if unable to create more
1035  * buffers.
1036  */
1037 static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
1038 {
1039         struct buffer_head *bh, *head;
1040         unsigned long offset;
1041 
1042         head = NULL;
1043         offset = PAGE_SIZE;
1044         while ((offset -= size) < PAGE_SIZE) {
1045                 bh = get_unused_buffer_head();
1046                 if (!bh)
1047                         goto no_grow;
1048                 bh->b_this_page = head;
1049                 head = bh;
1050                 bh->b_data = (char *) (page+offset);
1051                 bh->b_size = size;
1052                 bh->b_dev = B_FREE;  /* Flag as unused */
1053         }
1054         return head;
1055 /*
1056  * In case anything failed, we just free everything we got.
1057  */
1058 no_grow:
1059         bh = head;
1060         while (bh) {
1061                 head = bh;
1062                 bh = bh->b_this_page;
1063                 put_unused_buffer_head(head);
1064         }
1065         return NULL;
1066 }
1067 
1068 int brw_page(int rw, unsigned long address, kdev_t dev, int b[], int size, int bmap)
     /* [previous][next][first][last][top][bottom][index][help] */
1069 {
1070         struct buffer_head *bh, *prev, *next, *arr[MAX_BUF_PER_PAGE];
1071         int block, nr;
1072         struct page *page;
1073 
1074         page = mem_map + MAP_NR(address);
1075         page->uptodate = 0;
1076         bh = create_buffers(address, size);
1077         if (!bh)
1078                 return -ENOMEM;
1079         nr = 0;
1080         next = bh;
1081         do {
1082                 struct buffer_head * tmp;
1083                 block = *(b++);
1084 
1085                 set_bit(BH_FreeOnIO, &next->b_state);
1086                 next->b_list = BUF_CLEAN;
1087                 next->b_dev = dev;
1088                 next->b_blocknr = block;
1089                 next->b_count = 1;
1090                 next->b_flushtime = 0;
1091                 set_bit(BH_Uptodate, &next->b_state);
1092 
1093                 /* When we use bmap, we define block zero to represent
1094                    a hole.  ll_rw_page, however, may legitimately
1095                    access block zero, and we need to distinguish the
1096                    two cases. 
1097                    */
1098                 if (bmap && !block) {
1099                         memset(next->b_data, 0, size);
1100                         next->b_count--;
1101                         continue;
1102                 }
1103                 tmp = get_hash_table(dev, block, size);
1104                 if (tmp) {
1105                         if (!buffer_uptodate(tmp)) {
1106                                 if (rw == READ)
1107                                         ll_rw_block(READ, 1, &tmp);
1108                                 wait_on_buffer(tmp);
1109                         }
1110                         if (rw == READ) 
1111                                 memcpy(next->b_data, tmp->b_data, size);
1112                         else {
1113                                 memcpy(tmp->b_data, next->b_data, size);
1114                                 set_bit(BH_Dirty, &tmp->b_state);
1115                         }
1116                         brelse(tmp);
1117                         next->b_count--;
1118                         continue;
1119                 }
1120                 if (rw == READ)
1121                         clear_bit(BH_Uptodate, &next->b_state);
1122                 else
1123                         set_bit(BH_Dirty, &next->b_state);
1124                 arr[nr++] = next;
1125         } while (prev = next, (next = next->b_this_page) != NULL);
1126         prev->b_this_page = bh;
1127         
1128         if (nr)
1129                 ll_rw_block(rw, nr, arr);
1130         else {
1131                 page->locked = 0;
1132                 page->uptodate = 1;
1133                 wake_up(&page->wait);
1134                 next = bh;
1135                 do {
1136                         next->b_next_free = reuse_list;
1137                         reuse_list = next;
1138                         next = next->b_this_page;
1139                 } while (next != bh);
1140         }
1141         ++current->maj_flt;
1142         return 0;
1143 }
1144 
1145 void mark_buffer_uptodate(struct buffer_head * bh, int on)
     /* [previous][next][first][last][top][bottom][index][help] */
1146 {
1147         if (on) {
1148                 struct buffer_head *tmp = bh;
1149                 int page_uptodate = 1;
1150                 set_bit(BH_Uptodate, &bh->b_state);
1151                 do {
1152                         if (!test_bit(BH_Uptodate, &tmp->b_state)) {
1153                                 page_uptodate = 0;
1154                                 break;
1155                         }
1156                         tmp=tmp->b_this_page;
1157                 } while (tmp && tmp != bh);
1158                 if (page_uptodate)
1159                         mem_map[MAP_NR(bh->b_data)].uptodate = 1;
1160         } else
1161                 clear_bit(BH_Uptodate, &bh->b_state);
1162 }
1163 
1164 void unlock_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
1165 {
1166         struct buffer_head *tmp;
1167         unsigned long flags;
1168         struct page *page;
1169 
1170         clear_bit(BH_Lock, &bh->b_state);
1171         wake_up(&bh->b_wait);
1172 
1173         if (!test_bit(BH_FreeOnIO, &bh->b_state))
1174                 return;
1175         page = mem_map + MAP_NR(bh->b_data);
1176         if (!page->locked) {
1177                 printk ("Whoops: unlock_buffer: "
1178                         "async io complete on unlocked page\n");
1179                 return;
1180         }
1181         if (bh->b_count != 1) {
1182                 printk ("Whoops: unlock_buffer: b_count != 1 on async io.\n");
1183                 return;
1184         }
1185         /* Async buffer_heads are here only as labels for IO, and get
1186            thrown away once the IO for this page is complete.  IO is
1187            deemed complete once all buffers have been visited
1188            (b_count==0) and are now unlocked. */
1189         bh->b_count--;
1190         for (tmp = bh; tmp=tmp->b_this_page, tmp!=bh; ) {
1191                 if (test_bit(BH_Lock, &tmp->b_state) || tmp->b_count)
1192                         return;
1193         }
1194 
1195         /* OK, go ahead and complete the async IO on this page. */
1196         save_flags(flags);
1197         page->locked = 0;
1198         wake_up(&page->wait);
1199         cli();
1200         tmp = bh;
1201         do {
1202                 if (!test_bit(BH_FreeOnIO, &tmp->b_state)) {
1203                         printk ("Whoops: unlock_buffer: "
1204                                 "async IO mismatch on page.\n");
1205                         restore_flags(flags);
1206                         return;
1207                 }
1208                 tmp->b_next_free = reuse_list;
1209                 reuse_list = tmp;
1210                 clear_bit(BH_FreeOnIO, &tmp->b_state);
1211                 tmp = tmp->b_this_page;
1212         } while (tmp != bh);
1213         restore_flags(flags);
1214         if (page->free_after) {
1215                 extern int nr_async_pages;
1216                 nr_async_pages--;
1217                 page->free_after = 0;
1218                 free_page(page_address(page));
1219         }
1220 }
1221 
1222 /*
1223  * Generic "readpage" function for block devices that have the normal
1224  * bmap functionality. This is most of the block device filesystems.
1225  * Reads the page asynchronously --- the unlock_buffer() and
1226  * mark_buffer_uptodate() functions propogate buffer state into the
1227  * page struct once IO has completed.
1228  */
1229 int generic_readpage(struct inode * inode, struct page * page)
     /* [previous][next][first][last][top][bottom][index][help] */
1230 {
1231         unsigned long block, address;
1232         int *p, nr[PAGE_SIZE/512];
1233         int i;
1234 
1235         address = page_address(page);
1236         page->count++;
1237         wait_on_page(page);
1238         if (page->uptodate) {
1239                 free_page(address);
1240                 return 0;
1241         }
1242         page->locked = 1;
1243         
1244         i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
1245         block = page->offset >> inode->i_sb->s_blocksize_bits;
1246         p = nr;
1247         do {
1248                 *p = inode->i_op->bmap(inode, block);
1249                 i--;
1250                 block++;
1251                 p++;
1252         } while (i > 0);
1253 
1254         /* IO start */
1255         brw_page(READ, address, inode->i_dev, nr, inode->i_sb->s_blocksize, 1);
1256         free_page(address);
1257         return 0;
1258 }
1259 
1260 /*
1261  * Try to increase the number of buffers available: the size argument
1262  * is used to determine what kind of buffers we want.
1263  */
1264 static int grow_buffers(int pri, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1265 {
1266         unsigned long page;
1267         struct buffer_head *bh, *tmp;
1268         struct buffer_head * insert_point;
1269         int isize;
1270 
1271         if ((size & 511) || (size > PAGE_SIZE)) {
1272                 printk("VFS: grow_buffers: size = %d\n",size);
1273                 return 0;
1274         }
1275 
1276         isize = BUFSIZE_INDEX(size);
1277 
1278         if (!(page = __get_free_page(pri)))
1279                 return 0;
1280         bh = create_buffers(page, size);
1281         if (!bh) {
1282                 free_page(page);
1283                 return 0;
1284         }
1285 
1286         insert_point = free_list[isize];
1287 
1288         tmp = bh;
1289         while (1) {
1290                 nr_free[isize]++;
1291                 if (insert_point) {
1292                         tmp->b_next_free = insert_point->b_next_free;
1293                         tmp->b_prev_free = insert_point;
1294                         insert_point->b_next_free->b_prev_free = tmp;
1295                         insert_point->b_next_free = tmp;
1296                 } else {
1297                         tmp->b_prev_free = tmp;
1298                         tmp->b_next_free = tmp;
1299                 }
1300                 insert_point = tmp;
1301                 ++nr_buffers;
1302                 if (tmp->b_this_page)
1303                         tmp = tmp->b_this_page;
1304                 else
1305                         break;
1306         }
1307         free_list[isize] = bh;
1308         buffer_pages[MAP_NR(page)] = bh;
1309         tmp->b_this_page = bh;
1310         wake_up(&buffer_wait);
1311         buffermem += PAGE_SIZE;
1312         return 1;
1313 }
1314 
1315 
1316 /* =========== Reduce the buffer memory ============= */
1317 
1318 /*
1319  * try_to_free_buffer() checks if all the buffers on this particular page
1320  * are unused, and free's the page if so.
1321  */
1322 int try_to_free_buffer(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1323                        int priority)
1324 {
1325         unsigned long page;
1326         struct buffer_head * tmp, * p;
1327         int isize = BUFSIZE_INDEX(bh->b_size);
1328 
1329         *bhp = bh;
1330         page = (unsigned long) bh->b_data;
1331         page &= PAGE_MASK;
1332         tmp = bh;
1333         do {
1334                 if (!tmp)
1335                         return 0;
1336                 if (tmp->b_count || buffer_protected(tmp) ||
1337                     buffer_dirty(tmp) || buffer_locked(tmp) || tmp->b_wait)
1338                         return 0;
1339                 if (priority && buffer_touched(tmp))
1340                         return 0;
1341                 tmp = tmp->b_this_page;
1342         } while (tmp != bh);
1343         tmp = bh;
1344         do {
1345                 p = tmp;
1346                 tmp = tmp->b_this_page;
1347                 nr_buffers--;
1348                 nr_buffers_size[isize]--;
1349                 if (p == *bhp)
1350                   {
1351                     *bhp = p->b_prev_free;
1352                     if (p == *bhp) /* Was this the last in the list? */
1353                       *bhp = NULL;
1354                   }
1355                 remove_from_queues(p);
1356                 put_unused_buffer_head(p);
1357         } while (tmp != bh);
1358         buffermem -= PAGE_SIZE;
1359         buffer_pages[MAP_NR(page)] = NULL;
1360         free_page(page);
1361         return !mem_map[MAP_NR(page)].count;
1362 }
1363 
1364 /* Age buffers on a given page, according to whether they have been
1365    visited recently or not. */
1366 static inline void age_buffer(struct buffer_head *bh)
     /* [previous][next][first][last][top][bottom][index][help] */
1367 {
1368         struct buffer_head *tmp = bh;
1369         int touched = 0;
1370 
1371         /*
1372          * When we age a page, we mark all other buffers in the page
1373          * with the "has_aged" flag.  Then, when these aliased buffers
1374          * come up for aging, we skip them until next pass.  This
1375          * ensures that a page full of multiple buffers only gets aged
1376          * once per pass through the lru lists. 
1377          */
1378         if (clear_bit(BH_Has_aged, &bh->b_state))
1379                 return;
1380         
1381         do {
1382                 touched |= clear_bit(BH_Touched, &tmp->b_state);
1383                 tmp = tmp->b_this_page;
1384                 set_bit(BH_Has_aged, &tmp->b_state);
1385         } while (tmp != bh);
1386         clear_bit(BH_Has_aged, &bh->b_state);
1387 
1388         if (touched) 
1389                 touch_page(mem_map + MAP_NR((unsigned long) bh->b_data));
1390         else
1391                 age_page(mem_map + MAP_NR((unsigned long) bh->b_data));
1392 }
1393 
1394 /*
1395  * Consult the load average for buffers and decide whether or not
1396  * we should shrink the buffers of one size or not.  If we decide yes,
1397  * do it and return 1.  Else return 0.  Do not attempt to shrink size
1398  * that is specified.
1399  *
1400  * I would prefer not to use a load average, but the way things are now it
1401  * seems unavoidable.  The way to get rid of it would be to force clustering
1402  * universally, so that when we reclaim buffers we always reclaim an entire
1403  * page.  Doing this would mean that we all need to move towards QMAGIC.
1404  */
1405 
1406 static int maybe_shrink_lav_buffers(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1407 {          
1408         int nlist;
1409         int isize;
1410         int total_lav, total_n_buffers, n_sizes;
1411         
1412         /* Do not consider the shared buffers since they would not tend
1413            to have getblk called very often, and this would throw off
1414            the lav.  They are not easily reclaimable anyway (let the swapper
1415            make the first move). */
1416   
1417         total_lav = total_n_buffers = n_sizes = 0;
1418         for(nlist = 0; nlist < NR_SIZES; nlist++)
1419          {
1420                  total_lav += buffers_lav[nlist];
1421                  if(nr_buffers_size[nlist]) n_sizes++;
1422                  total_n_buffers += nr_buffers_size[nlist];
1423                  total_n_buffers -= nr_buffers_st[nlist][BUF_SHARED]; 
1424          }
1425         
1426         /* See if we have an excessive number of buffers of a particular
1427            size - if so, victimize that bunch. */
1428   
1429         isize = (size ? BUFSIZE_INDEX(size) : -1);
1430         
1431         if (n_sizes > 1)
1432                  for(nlist = 0; nlist < NR_SIZES; nlist++)
1433                   {
1434                           if(nlist == isize) continue;
1435                           if(nr_buffers_size[nlist] &&
1436                              bdf_prm.b_un.lav_const * buffers_lav[nlist]*total_n_buffers < 
1437                              total_lav * (nr_buffers_size[nlist] - nr_buffers_st[nlist][BUF_SHARED]))
1438                                    if(shrink_specific_buffers(6, bufferindex_size[nlist])) 
1439                                             return 1;
1440                   }
1441         return 0;
1442 }
1443 
1444 /*
1445  * Try to free up some pages by shrinking the buffer-cache
1446  *
1447  * Priority tells the routine how hard to try to shrink the
1448  * buffers: 6 means "don't bother too much", while a value
1449  * of 0 means "we'd better get some free pages now".
1450  *
1451  * "limit" is meant to limit the shrink-action only to pages
1452  * that are in the 0 - limit address range, for DMA re-allocations.
1453  * We ignore that right now.
1454  */
1455 
1456 static int shrink_specific_buffers(unsigned int priority, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1457 {
1458         struct buffer_head *bh;
1459         int nlist;
1460         int i, isize, isize1;
1461 
1462 #ifdef DEBUG
1463         if(size) printk("Shrinking buffers of size %d\n", size);
1464 #endif
1465         /* First try the free lists, and see if we can get a complete page
1466            from here */
1467         isize1 = (size ? BUFSIZE_INDEX(size) : -1);
1468 
1469         for(isize = 0; isize<NR_SIZES; isize++){
1470                 if(isize1 != -1 && isize1 != isize) continue;
1471                 bh = free_list[isize];
1472                 if(!bh) continue;
1473                 for (i=0 ; !i || bh != free_list[isize]; bh = bh->b_next_free, i++) {
1474                         if (bh->b_count || buffer_protected(bh) ||
1475                             !bh->b_this_page)
1476                                  continue;
1477                         if (!age_of((unsigned long) bh->b_data) &&
1478                             try_to_free_buffer(bh, &bh, 6))
1479                                  return 1;
1480                         if(!bh) break;
1481                         /* Some interrupt must have used it after we
1482                            freed the page.  No big deal - keep looking */
1483                 }
1484         }
1485         
1486         /* Not enough in the free lists, now try the lru list */
1487         
1488         for(nlist = 0; nlist < NR_LIST; nlist++) {
1489         repeat1:
1490                 if(priority > 2 && nlist == BUF_SHARED) continue;
1491                 i = nr_buffers_type[nlist];
1492                 i = ((BUFFEROUT_WEIGHT * i) >> 10) >> priority;
1493                 for ( ; i > 0; i-- ) {
1494                         bh = next_to_age[nlist];
1495                         if (!bh)
1496                                 break;
1497                         next_to_age[nlist] = bh->b_next_free;
1498 
1499                         /* First, age the buffer. */
1500                         age_buffer(bh);
1501                         /* We may have stalled while waiting for I/O
1502                            to complete. */
1503                         if(bh->b_list != nlist) goto repeat1;
1504                         if (bh->b_count || buffer_protected(bh) ||
1505                             !bh->b_this_page)
1506                                  continue;
1507                         if(size && bh->b_size != size) continue;
1508                         if (buffer_locked(bh))
1509                                  if (priority)
1510                                           continue;
1511                                  else
1512                                           wait_on_buffer(bh);
1513                         if (buffer_dirty(bh)) {
1514                                 bh->b_count++;
1515                                 bh->b_flushtime = 0;
1516                                 ll_rw_block(WRITEA, 1, &bh);
1517                                 bh->b_count--;
1518                                 continue;
1519                         }
1520                         /* At priority 6, only consider really old
1521                            (age==0) buffers for reclaiming.  At
1522                            priority 0, consider any buffers. */
1523                         if ((age_of((unsigned long) bh->b_data) >>
1524                              (6-priority)) > 0)
1525                                 continue;                               
1526                         if (try_to_free_buffer(bh, &bh, 0))
1527                                  return 1;
1528                         if(!bh) break;
1529                 }
1530         }
1531         return 0;
1532 }
1533 
1534 
1535 /* ================== Debugging =================== */
1536 
1537 void show_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1538 {
1539         struct buffer_head * bh;
1540         int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
1541         int protected = 0;
1542         int shared;
1543         int nlist, isize;
1544 
1545         printk("Buffer memory:   %6dkB\n",buffermem>>10);
1546         printk("Buffer heads:    %6d\n",nr_buffer_heads);
1547         printk("Buffer blocks:   %6d\n",nr_buffers);
1548 
1549         for(nlist = 0; nlist < NR_LIST; nlist++) {
1550           shared = found = locked = dirty = used = lastused = protected = 0;
1551           bh = lru_list[nlist];
1552           if(!bh) continue;
1553           do {
1554                 found++;
1555                 if (buffer_locked(bh))
1556                         locked++;
1557                 if (buffer_protected(bh))
1558                         protected++;
1559                 if (buffer_dirty(bh))
1560                         dirty++;
1561                 if(mem_map[MAP_NR(((unsigned long) bh->b_data))].count !=1) shared++;
1562                 if (bh->b_count)
1563                         used++, lastused = found;
1564                 bh = bh->b_next_free;
1565               } while (bh != lru_list[nlist]);
1566         printk("Buffer[%d] mem: %d buffers, %d used (last=%d), %d locked, "
1567                "%d protected, %d dirty %d shrd\n",
1568                 nlist, found, used, lastused, locked, protected, dirty, shared);
1569         };
1570         printk("Size    [LAV]     Free  Clean  Unshar     Lck    Lck1   Dirty  Shared \n");
1571         for(isize = 0; isize<NR_SIZES; isize++){
1572                 printk("%5d [%5d]: %7d ", bufferindex_size[isize],
1573                        buffers_lav[isize], nr_free[isize]);
1574                 for(nlist = 0; nlist < NR_LIST; nlist++)
1575                          printk("%7d ", nr_buffers_st[isize][nlist]);
1576                 printk("\n");
1577         }
1578 }
1579 
1580 
1581 /* ====================== Cluster patches for ext2 ==================== */
1582 
1583 /*
1584  * try_to_reassign() checks if all the buffers on this particular page
1585  * are unused, and reassign to a new cluster them if this is true.
1586  */
1587 static inline int try_to_reassign(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1588                            kdev_t dev, unsigned int starting_block)
1589 {
1590         unsigned long page;
1591         struct buffer_head * tmp, * p;
1592 
1593         *bhp = bh;
1594         page = (unsigned long) bh->b_data;
1595         page &= PAGE_MASK;
1596         if(mem_map[MAP_NR(page)].count != 1) return 0;
1597         tmp = bh;
1598         do {
1599                 if (!tmp)
1600                          return 0;
1601                 
1602                 if (tmp->b_count || buffer_protected(tmp) ||
1603                     buffer_dirty(tmp) || buffer_locked(tmp))
1604                          return 0;
1605                 tmp = tmp->b_this_page;
1606         } while (tmp != bh);
1607         tmp = bh;
1608         
1609         while((unsigned long) tmp->b_data & (PAGE_SIZE - 1)) 
1610                  tmp = tmp->b_this_page;
1611         
1612         /* This is the buffer at the head of the page */
1613         bh = tmp;
1614         do {
1615                 p = tmp;
1616                 tmp = tmp->b_this_page;
1617                 remove_from_queues(p);
1618                 p->b_dev = dev;
1619                 mark_buffer_uptodate(p, 0);
1620                 clear_bit(BH_Req, &p->b_state);
1621                 p->b_blocknr = starting_block++;
1622                 insert_into_queues(p);
1623         } while (tmp != bh);
1624         return 1;
1625 }
1626 
1627 /*
1628  * Try to find a free cluster by locating a page where
1629  * all of the buffers are unused.  We would like this function
1630  * to be atomic, so we do not call anything that might cause
1631  * the process to sleep.  The priority is somewhat similar to
1632  * the priority used in shrink_buffers.
1633  * 
1634  * My thinking is that the kernel should end up using whole
1635  * pages for the buffer cache as much of the time as possible.
1636  * This way the other buffers on a particular page are likely
1637  * to be very near each other on the free list, and we will not
1638  * be expiring data prematurely.  For now we only cannibalize buffers
1639  * of the same size to keep the code simpler.
1640  */
1641 static int reassign_cluster(kdev_t dev, 
     /* [previous][next][first][last][top][bottom][index][help] */
1642                      unsigned int starting_block, int size)
1643 {
1644         struct buffer_head *bh;
1645         int isize = BUFSIZE_INDEX(size);
1646         int i;
1647 
1648         /* We want to give ourselves a really good shot at generating
1649            a cluster, and since we only take buffers from the free
1650            list, we "overfill" it a little. */
1651 
1652         while(nr_free[isize] < 32) refill_freelist(size);
1653 
1654         bh = free_list[isize];
1655         if(bh)
1656                  for (i=0 ; !i || bh != free_list[isize] ; bh = bh->b_next_free, i++) {
1657                          if (!bh->b_this_page)  continue;
1658                          if (try_to_reassign(bh, &bh, dev, starting_block))
1659                                  return 4;
1660                  }
1661         return 0;
1662 }
1663 
1664 /* This function tries to generate a new cluster of buffers
1665  * from a new page in memory.  We should only do this if we have
1666  * not expanded the buffer cache to the maximum size that we allow.
1667  */
1668 static unsigned long try_to_generate_cluster(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1669 {
1670         struct buffer_head * bh, * tmp, * arr[MAX_BUF_PER_PAGE];
1671         int isize = BUFSIZE_INDEX(size);
1672         unsigned long offset;
1673         unsigned long page;
1674         int nblock;
1675 
1676         page = get_free_page(GFP_NOBUFFER);
1677         if(!page) return 0;
1678 
1679         bh = create_buffers(page, size);
1680         if (!bh) {
1681                 free_page(page);
1682                 return 0;
1683         };
1684         nblock = block;
1685         for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
1686                 if (find_buffer(dev, nblock++, size))
1687                          goto not_aligned;
1688         }
1689         tmp = bh;
1690         nblock = 0;
1691         while (1) {
1692                 arr[nblock++] = bh;
1693                 bh->b_count = 1;
1694                 bh->b_flushtime = 0;
1695                 bh->b_state = 0;
1696                 bh->b_dev = dev;
1697                 bh->b_list = BUF_CLEAN;
1698                 bh->b_blocknr = block++;
1699                 nr_buffers++;
1700                 nr_buffers_size[isize]++;
1701                 insert_into_queues(bh);
1702                 if (bh->b_this_page)
1703                         bh = bh->b_this_page;
1704                 else
1705                         break;
1706         }
1707         buffermem += PAGE_SIZE;
1708         buffer_pages[MAP_NR(page)] = bh;
1709         bh->b_this_page = tmp;
1710         while (nblock-- > 0)
1711                 brelse(arr[nblock]);
1712         return 4; /* ?? */
1713 not_aligned:
1714         while ((tmp = bh) != NULL) {
1715                 bh = bh->b_this_page;
1716                 put_unused_buffer_head(tmp);
1717         }
1718         free_page(page);
1719         return 0;
1720 }
1721 
1722 unsigned long generate_cluster(kdev_t dev, int b[], int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1723 {
1724         int i, offset;
1725         
1726         for (i = 0, offset = 0 ; offset < PAGE_SIZE ; i++, offset += size) {
1727                 if(i && b[i]-1 != b[i-1]) return 0;  /* No need to cluster */
1728                 if(find_buffer(dev, b[i], size)) return 0;
1729         };
1730 
1731         /* OK, we have a candidate for a new cluster */
1732         
1733         /* See if one size of buffer is over-represented in the buffer cache,
1734            if so reduce the numbers of buffers */
1735         if(maybe_shrink_lav_buffers(size))
1736          {
1737                  int retval;
1738                  retval = try_to_generate_cluster(dev, b[0], size);
1739                  if(retval) return retval;
1740          };
1741         
1742         if (nr_free_pages > min_free_pages*2) 
1743                  return try_to_generate_cluster(dev, b[0], size);
1744         else
1745                  return reassign_cluster(dev, b[0], size);
1746 }
1747 
1748 
1749 /* ===================== Init ======================= */
1750 
1751 /*
1752  * This initializes the initial buffer free list.  nr_buffers_type is set
1753  * to one less the actual number of buffers, as a sop to backwards
1754  * compatibility --- the old code did this (I think unintentionally,
1755  * but I'm not sure), and programs in the ps package expect it.
1756  *                                      - TYT 8/30/92
1757  */
1758 void buffer_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1759 {
1760         int i;
1761         int isize = BUFSIZE_INDEX(BLOCK_SIZE);
1762         long memsize = MAP_NR(high_memory) << PAGE_SHIFT;
1763 
1764         if (memsize >= 4*1024*1024) {
1765                 if(memsize >= 16*1024*1024)
1766                          nr_hash = 16381;
1767                 else
1768                          nr_hash = 4093;
1769         } else {
1770                 nr_hash = 997;
1771         };
1772         
1773         hash_table = (struct buffer_head **) vmalloc(nr_hash * 
1774                                                      sizeof(struct buffer_head *));
1775 
1776 
1777         buffer_pages = (struct buffer_head **) vmalloc(MAP_NR(high_memory) * 
1778                                                      sizeof(struct buffer_head *));
1779         for (i = 0 ; i < MAP_NR(high_memory) ; i++)
1780                 buffer_pages[i] = NULL;
1781 
1782         for (i = 0 ; i < nr_hash ; i++)
1783                 hash_table[i] = NULL;
1784         lru_list[BUF_CLEAN] = 0;
1785         grow_buffers(GFP_KERNEL, BLOCK_SIZE);
1786         if (!free_list[isize])
1787                 panic("VFS: Unable to initialize buffer free list!");
1788         return;
1789 }
1790 
1791 
1792 /* ====================== bdflush support =================== */
1793 
1794 /* This is a simple kernel daemon, whose job it is to provide a dynamic
1795  * response to dirty buffers.  Once this process is activated, we write back
1796  * a limited number of buffers to the disks and then go back to sleep again.
1797  */
1798 struct wait_queue * bdflush_wait = NULL;
1799 struct wait_queue * bdflush_done = NULL;
1800 
1801 static void wakeup_bdflush(int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
1802 {
1803         wake_up(&bdflush_wait);
1804         if(wait) sleep_on(&bdflush_done);
1805 }
1806 
1807 
1808 /* 
1809  * Here we attempt to write back old buffers.  We also try and flush inodes 
1810  * and supers as well, since this function is essentially "update", and 
1811  * otherwise there would be no way of ensuring that these quantities ever 
1812  * get written back.  Ideally, we would have a timestamp on the inodes
1813  * and superblocks so that we could write back only the old ones as well
1814  */
1815 
1816 asmlinkage int sync_old_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1817 {
1818         int i, isize;
1819         int ndirty, nwritten;
1820         int nlist;
1821         int ncount;
1822         struct buffer_head * bh, *next;
1823 
1824         sync_supers(0);
1825         sync_inodes(0);
1826 
1827         ncount = 0;
1828 #ifdef DEBUG
1829         for(nlist = 0; nlist < NR_LIST; nlist++)
1830 #else
1831         for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1832 #endif
1833         {
1834                 ndirty = 0;
1835                 nwritten = 0;
1836         repeat:
1837                 bh = lru_list[nlist];
1838                 if(bh) 
1839                          for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) {
1840                                  /* We may have stalled while waiting for I/O to complete. */
1841                                  if(bh->b_list != nlist) goto repeat;
1842                                  next = bh->b_next_free;
1843                                  if(!lru_list[nlist]) {
1844                                          printk("Dirty list empty %d\n", i);
1845                                          break;
1846                                  }
1847                                  
1848                                  /* Clean buffer on dirty list?  Refile it */
1849                                  if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
1850                                   {
1851                                           refile_buffer(bh);
1852                                           continue;
1853                                   }
1854                                  
1855                                  if (buffer_locked(bh) || !buffer_dirty(bh))
1856                                           continue;
1857                                  ndirty++;
1858                                  if(bh->b_flushtime > jiffies) continue;
1859                                  nwritten++;
1860                                  bh->b_count++;
1861                                  bh->b_flushtime = 0;
1862 #ifdef DEBUG
1863                                  if(nlist != BUF_DIRTY) ncount++;
1864 #endif
1865                                  ll_rw_block(WRITE, 1, &bh);
1866                                  bh->b_count--;
1867                          }
1868         }
1869 #ifdef DEBUG
1870         if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount);
1871         printk("Wrote %d/%d buffers\n", nwritten, ndirty);
1872 #endif
1873         
1874         /* We assume that we only come through here on a regular
1875            schedule, like every 5 seconds.  Now update load averages.  
1876            Shift usage counts to prevent overflow. */
1877         for(isize = 0; isize<NR_SIZES; isize++){
1878                 CALC_LOAD(buffers_lav[isize], bdf_prm.b_un.lav_const, buffer_usage[isize]);
1879                 buffer_usage[isize] = 0;
1880         };
1881         return 0;
1882 }
1883 
1884 
1885 /* This is the interface to bdflush.  As we get more sophisticated, we can
1886  * pass tuning parameters to this "process", to adjust how it behaves. 
1887  * We would want to verify each parameter, however, to make sure that it 
1888  * is reasonable. */
1889 
1890 asmlinkage int sys_bdflush(int func, long data)
     /* [previous][next][first][last][top][bottom][index][help] */
1891 {
1892         int i, error;
1893 
1894         if (!suser())
1895                 return -EPERM;
1896 
1897         if (func == 1)
1898                  return sync_old_buffers();
1899 
1900         /* Basically func 1 means read param 1, 2 means write param 1, etc */
1901         if (func >= 2) {
1902                 i = (func-2) >> 1;
1903                 if (i < 0 || i >= N_PARAM)
1904                         return -EINVAL;
1905                 if((func & 1) == 0) {
1906                         error = verify_area(VERIFY_WRITE, (void *) data, sizeof(int));
1907                         if (error)
1908                                 return error;
1909                         put_user(bdf_prm.data[i], (int*)data);
1910                         return 0;
1911                 };
1912                 if (data < bdflush_min[i] || data > bdflush_max[i])
1913                         return -EINVAL;
1914                 bdf_prm.data[i] = data;
1915                 return 0;
1916         };
1917 
1918         /* Having func 0 used to launch the actual bdflush and then never
1919         return (unless explicitly killed). We return zero here to 
1920         remain semi-compatible with present update(8) programs. */
1921 
1922         return 0;
1923 }
1924 
1925 /* This is the actual bdflush daemon itself. It used to be started from
1926  * the syscall above, but now we launch it ourselves internally with
1927  * kernel_thread(...)  directly after the first thread in init/main.c */
1928 
1929 int bdflush(void * unused) 
     /* [previous][next][first][last][top][bottom][index][help] */
1930 {
1931         int i;
1932         int ndirty;
1933         int nlist;
1934         int ncount;
1935         struct buffer_head * bh, *next;
1936 
1937         /*
1938          *      We have a bare-bones task_struct, and really should fill
1939          *      in a few more things so "top" and /proc/2/{exe,root,cwd}
1940          *      display semi-sane things. Not real crucial though...  
1941          */
1942 
1943         current->session = 1;
1944         current->pgrp = 1;
1945         sprintf(current->comm, "kflushd");
1946 
1947         /*
1948          *      As a kernel thread we want to tamper with system buffers
1949          *      and other internals and thus be subject to the SMP locking
1950          *      rules. (On a uniprocessor box this does nothing).
1951          */
1952          
1953 #ifdef __SMP__
1954         lock_kernel();
1955         syscall_count++;
1956 #endif
1957                  
1958         for (;;) {
1959 #ifdef DEBUG
1960                 printk("bdflush() activated...");
1961 #endif
1962                 
1963                 ncount = 0;
1964 #ifdef DEBUG
1965                 for(nlist = 0; nlist < NR_LIST; nlist++)
1966 #else
1967                 for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1968 #endif
1969                  {
1970                          ndirty = 0;
1971                  repeat:
1972                          bh = lru_list[nlist];
1973                          if(bh) 
1974                                   for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty; 
1975                                        bh = next) {
1976                                           /* We may have stalled while waiting for I/O to complete. */
1977                                           if(bh->b_list != nlist) goto repeat;
1978                                           next = bh->b_next_free;
1979                                           if(!lru_list[nlist]) {
1980                                                   printk("Dirty list empty %d\n", i);
1981                                                   break;
1982                                           }
1983                                           
1984                                           /* Clean buffer on dirty list?  Refile it */
1985                                           if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
1986                                            {
1987                                                    refile_buffer(bh);
1988                                                    continue;
1989                                            }
1990                                           
1991                                           if (buffer_locked(bh) || !buffer_dirty(bh))
1992                                                    continue;
1993                                           /* Should we write back buffers that are shared or not??
1994                                              currently dirty buffers are not shared, so it does not matter */
1995                                           bh->b_count++;
1996                                           ndirty++;
1997                                           bh->b_flushtime = 0;
1998                                           ll_rw_block(WRITE, 1, &bh);
1999 #ifdef DEBUG
2000                                           if(nlist != BUF_DIRTY) ncount++;
2001 #endif
2002                                           bh->b_count--;
2003                                   }
2004                  }
2005 #ifdef DEBUG
2006                 if (ncount) printk("sys_bdflush: %d dirty buffers not on dirty list\n", ncount);
2007                 printk("sleeping again.\n");
2008 #endif
2009                 wake_up(&bdflush_done);
2010                 
2011                 /* If there are still a lot of dirty buffers around, skip the sleep
2012                    and flush some more */
2013                 
2014                 if(nr_buffers_type[BUF_DIRTY] <= (nr_buffers - nr_buffers_type[BUF_SHARED]) * 
2015                    bdf_prm.b_un.nfract/100) {
2016                         current->signal = 0;
2017                         interruptible_sleep_on(&bdflush_wait);
2018                 }
2019         }
2020 }
2021 
2022 
2023 /*
2024  * Overrides for Emacs so that we follow Linus's tabbing style.
2025  * Emacs will notice this stuff at the end of the file and automatically
2026  * adjust the settings for this buffer only.  This must remain at the end
2027  * of the file.
2028  * ---------------------------------------------------------------------------
2029  * Local variables:
2030  * c-indent-level: 8
2031  * c-brace-imaginary-offset: 0
2032  * c-brace-offset: -8
2033  * c-argdecl-indent: 8
2034  * c-label-offset: -8
2035  * c-continued-statement-offset: 8
2036  * c-continued-brace-offset: 0
2037  * End:
2038  */

/* [previous][next][first][last][top][bottom][index][help] */