root/fs/buffer.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. __wait_on_buffer
  2. sync_buffers
  3. sync_dev
  4. fsync_dev
  5. sys_sync
  6. file_fsync
  7. sys_fsync
  8. sys_fdatasync
  9. invalidate_buffers
  10. remove_from_hash_queue
  11. remove_from_lru_list
  12. remove_from_free_list
  13. remove_from_queues
  14. put_last_lru
  15. put_last_free
  16. insert_into_queues
  17. find_buffer
  18. get_hash_table
  19. set_blocksize
  20. refill_freelist
  21. getblk
  22. set_writetime
  23. refile_buffer
  24. __brelse
  25. __bforget
  26. bread
  27. breada
  28. put_unused_buffer_head
  29. get_more_buffer_heads
  30. recover_reusable_buffer_heads
  31. get_unused_buffer_head
  32. create_buffers
  33. brw_page
  34. mark_buffer_uptodate
  35. unlock_buffer
  36. generic_readpage
  37. grow_buffers
  38. try_to_free_buffer
  39. age_buffer
  40. maybe_shrink_lav_buffers
  41. shrink_specific_buffers
  42. show_buffers
  43. try_to_reassign
  44. reassign_cluster
  45. try_to_generate_cluster
  46. generate_cluster
  47. buffer_init
  48. wakeup_bdflush
  49. sync_old_buffers
  50. sys_bdflush
  51. bdflush

   1 /*
   2  *  linux/fs/buffer.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  *  'buffer.c' implements the buffer-cache functions. Race-conditions have
   9  * been avoided by NEVER letting an interrupt change a buffer (except for the
  10  * data, of course), but instead letting the caller do it.
  11  */
  12 
  13 /*
  14  * NOTE! There is one discordant note here: checking floppies for
  15  * disk change. This is where it fits best, I think, as it should
  16  * invalidate changed floppy-disk-caches.
  17  */
  18  
  19 /* Some bdflush() changes for the dynamic ramdisk - Paul Gortmaker, 12/94 */
  20 
  21 #include <linux/sched.h>
  22 #include <linux/kernel.h>
  23 #include <linux/major.h>
  24 #include <linux/string.h>
  25 #include <linux/locks.h>
  26 #include <linux/errno.h>
  27 #include <linux/malloc.h>
  28 #include <linux/pagemap.h>
  29 #include <linux/swap.h>
  30 #include <linux/swapctl.h>
  31 #include <linux/smp.h>
  32 #include <linux/smp_lock.h>
  33 
  34 #include <asm/system.h>
  35 #include <asm/segment.h>
  36 #include <asm/io.h>
  37 
  38 #define NR_SIZES 5
  39 static char buffersize_index[17] =
  40 {-1,  0,  1, -1,  2, -1, -1, -1, 3, -1, -1, -1, -1, -1, -1, -1, 4};
  41 static short int bufferindex_size[NR_SIZES] = {512, 1024, 2048, 4096, 8192};
  42 
  43 #define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9])
  44 #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
  45 
  46 static int grow_buffers(int pri, int size);
  47 static int shrink_specific_buffers(unsigned int priority, int size);
  48 static int maybe_shrink_lav_buffers(int);
  49 
  50 static int nr_hash = 0;  /* Size of hash table */
  51 static struct buffer_head ** hash_table;
  52 struct buffer_head ** buffer_pages;
  53 static struct buffer_head * lru_list[NR_LIST] = {NULL, };
  54 /* next_to_age is an array of pointers into the lru lists, used to
  55    cycle through the buffers aging their contents when deciding which
  56    buffers to discard when more memory is needed */
  57 static struct buffer_head * next_to_age[NR_LIST] = {NULL, };
  58 static struct buffer_head * free_list[NR_SIZES] = {NULL, };
  59 
  60 static struct buffer_head * unused_list = NULL;
  61 struct buffer_head * reuse_list = NULL;
  62 static struct wait_queue * buffer_wait = NULL;
  63 
  64 int nr_buffers = 0;
  65 int nr_buffers_type[NR_LIST] = {0,};
  66 int nr_buffers_size[NR_SIZES] = {0,};
  67 int nr_buffers_st[NR_SIZES][NR_LIST] = {{0,},};
  68 int buffer_usage[NR_SIZES] = {0,};  /* Usage counts used to determine load average */
  69 int buffers_lav[NR_SIZES] = {0,};  /* Load average of buffer usage */
  70 int nr_free[NR_SIZES] = {0,};
  71 int buffermem = 0;
  72 int nr_buffer_heads = 0;
  73 extern int *blksize_size[];
  74 
  75 /* Here is the parameter block for the bdflush process. */
  76 static void wakeup_bdflush(int);
  77 
  78 #define N_PARAM 9
  79 #define LAV
  80 
  81 static union bdflush_param{
  82         struct {
  83                 int nfract;  /* Percentage of buffer cache dirty to 
  84                                 activate bdflush */
  85                 int ndirty;  /* Maximum number of dirty blocks to write out per
  86                                 wake-cycle */
  87                 int nrefill; /* Number of clean buffers to try and obtain
  88                                 each time we call refill */
  89                 int nref_dirt; /* Dirty buffer threshold for activating bdflush
  90                                   when trying to refill buffers. */
  91                 int clu_nfract;  /* Percentage of buffer cache to scan to 
  92                                     search for free clusters */
  93                 int age_buffer;  /* Time for normal buffer to age before 
  94                                     we flush it */
  95                 int age_super;  /* Time for superblock to age before we 
  96                                    flush it */
  97                 int lav_const;  /* Constant used for load average (time
  98                                    constant */
  99                 int lav_ratio;  /* Used to determine how low a lav for a
 100                                    particular size can go before we start to
 101                                    trim back the buffers */
 102         } b_un;
 103         unsigned int data[N_PARAM];
 104 } bdf_prm = {{25, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}};
 105 
 106 /* The lav constant is set for 1 minute, as long as the update process runs
 107    every 5 seconds.  If you change the frequency of update, the time
 108    constant will also change. */
 109 
 110 
 111 /* These are the min and max parameter values that we will allow to be assigned */
 112 static int bdflush_min[N_PARAM] = {  0,  10,    5,   25,  0,   100,   100, 1, 1};
 113 static int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 60000, 60000, 2047, 5};
 114 
 115 /*
 116  * Rewrote the wait-routines to use the "new" wait-queue functionality,
 117  * and getting rid of the cli-sti pairs. The wait-queue routines still
 118  * need cli-sti, but now it's just a couple of 386 instructions or so.
 119  *
 120  * Note that the real wait_on_buffer() is an inline function that checks
 121  * if 'b_wait' is set before calling this, so that the queues aren't set
 122  * up unnecessarily.
 123  */
 124 void __wait_on_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 125 {
 126         struct wait_queue wait = { current, NULL };
 127 
 128         bh->b_count++;
 129         add_wait_queue(&bh->b_wait, &wait);
 130 repeat:
 131         current->state = TASK_UNINTERRUPTIBLE;
 132         if (buffer_locked(bh)) {
 133                 schedule();
 134                 goto repeat;
 135         }
 136         remove_wait_queue(&bh->b_wait, &wait);
 137         bh->b_count--;
 138         current->state = TASK_RUNNING;
 139 }
 140 
 141 /* Call sync_buffers with wait!=0 to ensure that the call does not
 142    return until all buffer writes have completed.  Sync() may return
 143    before the writes have finished; fsync() may not. */
 144 
 145 
 146 /* Godamity-damn.  Some buffers (bitmaps for filesystems)
 147    spontaneously dirty themselves without ever brelse being called.
 148    We will ultimately want to put these in a separate list, but for
 149    now we search all of the lists for dirty buffers */
 150 
 151 static int sync_buffers(kdev_t dev, int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 152 {
 153         int i, retry, pass = 0, err = 0;
 154         int nlist, ncount;
 155         struct buffer_head * bh, *next;
 156 
 157         /* One pass for no-wait, three for wait:
 158            0) write out all dirty, unlocked buffers;
 159            1) write out all dirty buffers, waiting if locked;
 160            2) wait for completion by waiting for all buffers to unlock. */
 161  repeat:
 162         retry = 0;
 163  repeat2:
 164         ncount = 0;
 165         /* We search all lists as a failsafe mechanism, not because we expect
 166            there to be dirty buffers on any of the other lists. */
 167         for(nlist = 0; nlist < NR_LIST; nlist++)
 168          {
 169          repeat1:
 170                  bh = lru_list[nlist];
 171                  if(!bh) continue;
 172                  for (i = nr_buffers_type[nlist]*2 ; i-- > 0 ; bh = next) {
 173                          if(bh->b_list != nlist) goto repeat1;
 174                          next = bh->b_next_free;
 175                          if(!lru_list[nlist]) break;
 176                          if (dev && bh->b_dev != dev)
 177                                   continue;
 178                          if (buffer_locked(bh))
 179                           {
 180                                   /* Buffer is locked; skip it unless wait is
 181                                      requested AND pass > 0. */
 182                                   if (!wait || !pass) {
 183                                           retry = 1;
 184                                           continue;
 185                                   }
 186                                   wait_on_buffer (bh);
 187                                   goto repeat2;
 188                           }
 189                          /* If an unlocked buffer is not uptodate, there has
 190                              been an IO error. Skip it. */
 191                          if (wait && buffer_req(bh) && !buffer_locked(bh) &&
 192                              !buffer_dirty(bh) && !buffer_uptodate(bh)) {
 193                                   err = 1;
 194                                   continue;
 195                           }
 196                          /* Don't write clean buffers.  Don't write ANY buffers
 197                             on the third pass. */
 198                          if (!buffer_dirty(bh) || pass>=2)
 199                                   continue;
 200                          /* don't bother about locked buffers */
 201                          if (buffer_locked(bh))
 202                                  continue;
 203                          bh->b_count++;
 204                          bh->b_flushtime = 0;
 205                          ll_rw_block(WRITE, 1, &bh);
 206 
 207                          if(nlist != BUF_DIRTY) { 
 208                                  printk("[%d %s %ld] ", nlist,
 209                                         kdevname(bh->b_dev), bh->b_blocknr);
 210                                  ncount++;
 211                          };
 212                          bh->b_count--;
 213                          retry = 1;
 214                  }
 215          }
 216         if (ncount)
 217           printk("sys_sync: %d dirty buffers not on dirty list\n", ncount);
 218         
 219         /* If we are waiting for the sync to succeed, and if any dirty
 220            blocks were written, then repeat; on the second pass, only
 221            wait for buffers being written (do not pass to write any
 222            more buffers on the second pass). */
 223         if (wait && retry && ++pass<=2)
 224                  goto repeat;
 225         return err;
 226 }
 227 
 228 void sync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 229 {
 230         sync_buffers(dev, 0);
 231         sync_supers(dev);
 232         sync_inodes(dev);
 233         sync_buffers(dev, 0);
 234         sync_dquots(dev, -1);
 235 }
 236 
 237 int fsync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 238 {
 239         sync_buffers(dev, 0);
 240         sync_supers(dev);
 241         sync_inodes(dev);
 242         sync_dquots(dev, -1);
 243         return sync_buffers(dev, 1);
 244 }
 245 
 246 asmlinkage int sys_sync(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 247 {
 248         fsync_dev(0);
 249         return 0;
 250 }
 251 
 252 int file_fsync (struct inode *inode, struct file *filp)
     /* [previous][next][first][last][top][bottom][index][help] */
 253 {
 254         return fsync_dev(inode->i_dev);
 255 }
 256 
 257 asmlinkage int sys_fsync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 258 {
 259         struct file * file;
 260         struct inode * inode;
 261 
 262         if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
 263                 return -EBADF;
 264         if (!file->f_op || !file->f_op->fsync)
 265                 return -EINVAL;
 266         if (file->f_op->fsync(inode,file))
 267                 return -EIO;
 268         return 0;
 269 }
 270 
 271 asmlinkage int sys_fdatasync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 272 {
 273         struct file * file;
 274         struct inode * inode;
 275 
 276         if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
 277                 return -EBADF;
 278         if (!file->f_op || !file->f_op->fsync)
 279                 return -EINVAL;
 280         /* this needs further work, at the moment it is identical to fsync() */
 281         if (file->f_op->fsync(inode,file))
 282                 return -EIO;
 283         return 0;
 284 }
 285 
 286 void invalidate_buffers(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 287 {
 288         int i;
 289         int nlist;
 290         struct buffer_head * bh;
 291 
 292         for(nlist = 0; nlist < NR_LIST; nlist++) {
 293                 bh = lru_list[nlist];
 294                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bh->b_next_free) {
 295                         if (bh->b_dev != dev)
 296                                 continue;
 297                         wait_on_buffer(bh);
 298                         if (bh->b_dev != dev)
 299                                 continue;
 300                         if (bh->b_count)
 301                                 continue;
 302                         bh->b_flushtime = 0;
 303                         clear_bit(BH_Protected, &bh->b_state);
 304                         clear_bit(BH_Uptodate, &bh->b_state);
 305                         clear_bit(BH_Dirty, &bh->b_state);
 306                         clear_bit(BH_Req, &bh->b_state);
 307                 }
 308         }
 309 }
 310 
 311 #define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block))%nr_hash)
 312 #define hash(dev,block) hash_table[_hashfn(dev,block)]
 313 
 314 static inline void remove_from_hash_queue(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 315 {
 316         if (bh->b_next)
 317                 bh->b_next->b_prev = bh->b_prev;
 318         if (bh->b_prev)
 319                 bh->b_prev->b_next = bh->b_next;
 320         if (hash(bh->b_dev,bh->b_blocknr) == bh)
 321                 hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
 322         bh->b_next = bh->b_prev = NULL;
 323 }
 324 
 325 static inline void remove_from_lru_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 326 {
 327         if (!(bh->b_prev_free) || !(bh->b_next_free))
 328                 panic("VFS: LRU block list corrupted");
 329         if (bh->b_dev == B_FREE)
 330                 panic("LRU list corrupted");
 331         bh->b_prev_free->b_next_free = bh->b_next_free;
 332         bh->b_next_free->b_prev_free = bh->b_prev_free;
 333 
 334         if (lru_list[bh->b_list] == bh)
 335                  lru_list[bh->b_list] = bh->b_next_free;
 336         if (lru_list[bh->b_list] == bh)
 337                  lru_list[bh->b_list] = NULL;
 338         if (next_to_age[bh->b_list] == bh)
 339                 next_to_age[bh->b_list] = bh->b_next_free;
 340         if (next_to_age[bh->b_list] == bh)
 341                 next_to_age[bh->b_list] = NULL;
 342 
 343         bh->b_next_free = bh->b_prev_free = NULL;
 344 }
 345 
 346 static inline void remove_from_free_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 347 {
 348         int isize = BUFSIZE_INDEX(bh->b_size);
 349         if (!(bh->b_prev_free) || !(bh->b_next_free))
 350                 panic("VFS: Free block list corrupted");
 351         if(bh->b_dev != B_FREE)
 352                 panic("Free list corrupted");
 353         if(!free_list[isize])
 354                 panic("Free list empty");
 355         nr_free[isize]--;
 356         if(bh->b_next_free == bh)
 357                  free_list[isize] = NULL;
 358         else {
 359                 bh->b_prev_free->b_next_free = bh->b_next_free;
 360                 bh->b_next_free->b_prev_free = bh->b_prev_free;
 361                 if (free_list[isize] == bh)
 362                          free_list[isize] = bh->b_next_free;
 363         };
 364         bh->b_next_free = bh->b_prev_free = NULL;
 365 }
 366 
 367 static inline void remove_from_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 368 {
 369         if(bh->b_dev == B_FREE) {
 370                 remove_from_free_list(bh); /* Free list entries should not be
 371                                               in the hash queue */
 372                 return;
 373         };
 374         nr_buffers_type[bh->b_list]--;
 375         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]--;
 376         remove_from_hash_queue(bh);
 377         remove_from_lru_list(bh);
 378 }
 379 
 380 static inline void put_last_lru(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 381 {
 382         if (!bh)
 383                 return;
 384         if (bh == lru_list[bh->b_list]) {
 385                 lru_list[bh->b_list] = bh->b_next_free;
 386                 if (next_to_age[bh->b_list] == bh)
 387                         next_to_age[bh->b_list] = bh->b_next_free;
 388                 return;
 389         }
 390         if(bh->b_dev == B_FREE)
 391                 panic("Wrong block for lru list");
 392         remove_from_lru_list(bh);
 393 /* add to back of free list */
 394 
 395         if(!lru_list[bh->b_list]) {
 396                 lru_list[bh->b_list] = bh;
 397                 lru_list[bh->b_list]->b_prev_free = bh;
 398         };
 399         if (!next_to_age[bh->b_list])
 400                 next_to_age[bh->b_list] = bh;
 401 
 402         bh->b_next_free = lru_list[bh->b_list];
 403         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 404         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 405         lru_list[bh->b_list]->b_prev_free = bh;
 406 }
 407 
 408 static inline void put_last_free(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 409 {
 410         int isize;
 411         if (!bh)
 412                 return;
 413 
 414         isize = BUFSIZE_INDEX(bh->b_size);      
 415         bh->b_dev = B_FREE;  /* So it is obvious we are on the free list */
 416         /* add to back of free list */
 417         if(!free_list[isize]) {
 418                 free_list[isize] = bh;
 419                 bh->b_prev_free = bh;
 420         };
 421 
 422         nr_free[isize]++;
 423         bh->b_next_free = free_list[isize];
 424         bh->b_prev_free = free_list[isize]->b_prev_free;
 425         free_list[isize]->b_prev_free->b_next_free = bh;
 426         free_list[isize]->b_prev_free = bh;
 427 }
 428 
 429 static inline void insert_into_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 430 {
 431         /* put at end of free list */
 432         if(bh->b_dev == B_FREE) {
 433                 put_last_free(bh);
 434                 return;
 435         }
 436         if(!lru_list[bh->b_list]) {
 437                 lru_list[bh->b_list] = bh;
 438                 bh->b_prev_free = bh;
 439         }
 440         if (!next_to_age[bh->b_list])
 441                 next_to_age[bh->b_list] = bh;
 442         if (bh->b_next_free) panic("VFS: buffer LRU pointers corrupted");
 443         bh->b_next_free = lru_list[bh->b_list];
 444         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 445         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 446         lru_list[bh->b_list]->b_prev_free = bh;
 447         nr_buffers_type[bh->b_list]++;
 448         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]++;
 449 /* put the buffer in new hash-queue if it has a device */
 450         bh->b_prev = NULL;
 451         bh->b_next = NULL;
 452         if (!(bh->b_dev))
 453                 return;
 454         bh->b_next = hash(bh->b_dev,bh->b_blocknr);
 455         hash(bh->b_dev,bh->b_blocknr) = bh;
 456         if (bh->b_next)
 457                 bh->b_next->b_prev = bh;
 458 }
 459 
 460 static inline struct buffer_head * find_buffer(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 461 {               
 462         struct buffer_head * tmp;
 463 
 464         for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
 465                 if (tmp->b_blocknr == block && tmp->b_dev == dev)
 466                         if (tmp->b_size == size)
 467                                 return tmp;
 468                         else {
 469                                 printk("VFS: Wrong blocksize on device %s\n",
 470                                         kdevname(dev));
 471                                 return NULL;
 472                         }
 473         return NULL;
 474 }
 475 
 476 /*
 477  * Why like this, I hear you say... The reason is race-conditions.
 478  * As we don't lock buffers (unless we are reading them, that is),
 479  * something might happen to it while we sleep (ie a read-error
 480  * will force it bad). This shouldn't really happen currently, but
 481  * the code is ready.
 482  */
 483 struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 484 {
 485         struct buffer_head * bh;
 486 
 487         for (;;) {
 488                 if (!(bh=find_buffer(dev,block,size)))
 489                         return NULL;
 490                 bh->b_count++;
 491                 wait_on_buffer(bh);
 492                 if (bh->b_dev == dev && bh->b_blocknr == block
 493                                              && bh->b_size == size)
 494                         return bh;
 495                 bh->b_count--;
 496         }
 497 }
 498 
 499 void set_blocksize(kdev_t dev, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 500 {
 501         int i, nlist;
 502         struct buffer_head * bh, *bhnext;
 503 
 504         if (!blksize_size[MAJOR(dev)])
 505                 return;
 506 
 507         if (size > PAGE_SIZE)
 508                 size = 0;
 509 
 510         switch (size) {
 511                 default: panic("Invalid blocksize passed to set_blocksize");
 512                 case 512: case 1024: case 2048: case 4096: case 8192: ;
 513         }
 514 
 515         if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
 516                 blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 517                 return;
 518         }
 519         if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
 520                 return;
 521         sync_buffers(dev, 2);
 522         blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 523 
 524   /* We need to be quite careful how we do this - we are moving entries
 525      around on the free list, and we can get in a loop if we are not careful.*/
 526 
 527         for(nlist = 0; nlist < NR_LIST; nlist++) {
 528                 bh = lru_list[nlist];
 529                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) {
 530                         if(!bh) break;
 531                         bhnext = bh->b_next_free; 
 532                         if (bh->b_dev != dev)
 533                                  continue;
 534                         if (bh->b_size == size)
 535                                  continue;
 536                         
 537                         wait_on_buffer(bh);
 538                         if (bh->b_dev == dev && bh->b_size != size) {
 539                                 clear_bit(BH_Dirty, &bh->b_state);
 540                                 clear_bit(BH_Uptodate, &bh->b_state);
 541                                 clear_bit(BH_Req, &bh->b_state);
 542                                 bh->b_flushtime = 0;
 543                         }
 544                         remove_from_hash_queue(bh);
 545                 }
 546         }
 547 }
 548 
 549 #define BADNESS(bh) (buffer_dirty(bh) || buffer_locked(bh))
 550 
 551 void refill_freelist(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 552 {
 553         struct buffer_head * bh, * tmp;
 554         struct buffer_head * candidate[NR_LIST];
 555         unsigned int best_time, winner;
 556         int isize = BUFSIZE_INDEX(size);
 557         int buffers[NR_LIST];
 558         int i;
 559         int needed;
 560 
 561         /* First see if we even need this.  Sometimes it is advantageous
 562          to request some blocks in a filesystem that we know that we will
 563          be needing ahead of time. */
 564 
 565         if (nr_free[isize] > 100)
 566                 return;
 567 
 568         /* If there are too many dirty buffers, we wake up the update process
 569            now so as to ensure that there are still clean buffers available
 570            for user processes to use (and dirty) */
 571         
 572         /* We are going to try and locate this much memory */
 573         needed =bdf_prm.b_un.nrefill * size;  
 574 
 575         while (nr_free_pages > min_free_pages*2 && needed > 0 &&
 576                grow_buffers(GFP_BUFFER, size)) {
 577                 needed -= PAGE_SIZE;
 578         }
 579 
 580         if(needed <= 0) return;
 581 
 582         /* See if there are too many buffers of a different size.
 583            If so, victimize them */
 584 
 585         while(maybe_shrink_lav_buffers(size))
 586          {
 587                  if(!grow_buffers(GFP_BUFFER, size)) break;
 588                  needed -= PAGE_SIZE;
 589                  if(needed <= 0) return;
 590          };
 591 
 592         /* OK, we cannot grow the buffer cache, now try and get some
 593            from the lru list */
 594 
 595         /* First set the candidate pointers to usable buffers.  This
 596            should be quick nearly all of the time. */
 597 
 598 repeat0:
 599         for(i=0; i<NR_LIST; i++){
 600                 if(i == BUF_DIRTY || i == BUF_SHARED || 
 601                    nr_buffers_type[i] == 0) {
 602                         candidate[i] = NULL;
 603                         buffers[i] = 0;
 604                         continue;
 605                 }
 606                 buffers[i] = nr_buffers_type[i];
 607                 for (bh = lru_list[i]; buffers[i] > 0; bh = tmp, buffers[i]--)
 608                  {
 609                          if(buffers[i] < 0) panic("Here is the problem");
 610                          tmp = bh->b_next_free;
 611                          if (!bh) break;
 612                          
 613                          if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 614                              buffer_dirty(bh)) {
 615                                  refile_buffer(bh);
 616                                  continue;
 617                          }
 618                          
 619                          if (bh->b_count || buffer_protected(bh) || bh->b_size != size)
 620                                   continue;
 621                          
 622                          /* Buffers are written in the order they are placed 
 623                             on the locked list. If we encounter a locked
 624                             buffer here, this means that the rest of them
 625                             are also locked */
 626                          if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 627                                  buffers[i] = 0;
 628                                  break;
 629                          }
 630                          
 631                          if (BADNESS(bh)) continue;
 632                          break;
 633                  };
 634                 if(!buffers[i]) candidate[i] = NULL; /* Nothing on this list */
 635                 else candidate[i] = bh;
 636                 if(candidate[i] && candidate[i]->b_count) panic("Here is the problem");
 637         }
 638         
 639  repeat:
 640         if(needed <= 0) return;
 641         
 642         /* Now see which candidate wins the election */
 643         
 644         winner = best_time = UINT_MAX;  
 645         for(i=0; i<NR_LIST; i++){
 646                 if(!candidate[i]) continue;
 647                 if(candidate[i]->b_lru_time < best_time){
 648                         best_time = candidate[i]->b_lru_time;
 649                         winner = i;
 650                 }
 651         }
 652         
 653         /* If we have a winner, use it, and then get a new candidate from that list */
 654         if(winner != UINT_MAX) {
 655                 i = winner;
 656                 bh = candidate[i];
 657                 candidate[i] = bh->b_next_free;
 658                 if(candidate[i] == bh) candidate[i] = NULL;  /* Got last one */
 659                 if (bh->b_count || bh->b_size != size)
 660                          panic("Busy buffer in candidate list\n");
 661                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1)
 662                          panic("Shared buffer in candidate list\n");
 663                 if (buffer_protected(bh))
 664                         panic("Protected buffer in candidate list\n");
 665                 if (BADNESS(bh)) panic("Buffer in candidate list with BADNESS != 0\n");
 666                 
 667                 if(bh->b_dev == B_FREE)
 668                         panic("Wrong list");
 669                 remove_from_queues(bh);
 670                 bh->b_dev = B_FREE;
 671                 put_last_free(bh);
 672                 needed -= bh->b_size;
 673                 buffers[i]--;
 674                 if(buffers[i] < 0) panic("Here is the problem");
 675                 
 676                 if(buffers[i] == 0) candidate[i] = NULL;
 677                 
 678                 /* Now all we need to do is advance the candidate pointer
 679                    from the winner list to the next usable buffer */
 680                 if(candidate[i] && buffers[i] > 0){
 681                         if(buffers[i] <= 0) panic("Here is another problem");
 682                         for (bh = candidate[i]; buffers[i] > 0; bh = tmp, buffers[i]--) {
 683                                 if(buffers[i] < 0) panic("Here is the problem");
 684                                 tmp = bh->b_next_free;
 685                                 if (!bh) break;
 686                                 
 687                                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 688                                     buffer_dirty(bh)) {
 689                                         refile_buffer(bh);
 690                                         continue;
 691                                 };
 692                                 
 693                                 if (bh->b_count || buffer_protected(bh) || bh->b_size != size)
 694                                          continue;
 695                                 
 696                                 /* Buffers are written in the order they are
 697                                    placed on the locked list.  If we encounter
 698                                    a locked buffer here, this means that the
 699                                    rest of them are also locked */
 700                                 if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 701                                         buffers[i] = 0;
 702                                         break;
 703                                 }
 704               
 705                                 if (BADNESS(bh)) continue;
 706                                 break;
 707                         };
 708                         if(!buffers[i]) candidate[i] = NULL; /* Nothing here */
 709                         else candidate[i] = bh;
 710                         if(candidate[i] && candidate[i]->b_count) 
 711                                  panic("Here is the problem");
 712                 }
 713                 
 714                 goto repeat;
 715         }
 716         
 717         if(needed <= 0) return;
 718         
 719         /* Too bad, that was not enough. Try a little harder to grow some. */
 720         
 721         if (nr_free_pages > min_free_pages + 5) {
 722                 if (grow_buffers(GFP_BUFFER, size)) {
 723                         needed -= PAGE_SIZE;
 724                         goto repeat0;
 725                 };
 726         }
 727         
 728         /* and repeat until we find something good */
 729         if (!grow_buffers(GFP_ATOMIC, size))
 730                 wakeup_bdflush(1);
 731         needed -= PAGE_SIZE;
 732         goto repeat0;
 733 }
 734 
 735 /*
 736  * Ok, this is getblk, and it isn't very clear, again to hinder
 737  * race-conditions. Most of the code is seldom used, (ie repeating),
 738  * so it should be much more efficient than it looks.
 739  *
 740  * The algorithm is changed: hopefully better, and an elusive bug removed.
 741  *
 742  * 14.02.92: changed it to sync dirty buffers a bit: better performance
 743  * when the filesystem starts to get full of dirty blocks (I hope).
 744  */
 745 struct buffer_head * getblk(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 746 {
 747         struct buffer_head * bh;
 748         int isize = BUFSIZE_INDEX(size);
 749 
 750         /* Update this for the buffer size lav. */
 751         buffer_usage[isize]++;
 752 
 753         /* If there are too many dirty buffers, we wake up the update process
 754            now so as to ensure that there are still clean buffers available
 755            for user processes to use (and dirty) */
 756 repeat:
 757         bh = get_hash_table(dev, block, size);
 758         if (bh) {
 759                 if (!buffer_dirty(bh)) {
 760                         if (buffer_uptodate(bh))
 761                                  put_last_lru(bh);
 762                         bh->b_flushtime = 0;
 763                 }
 764                 set_bit(BH_Touched, &bh->b_state);
 765                 return bh;
 766         }
 767 
 768         while(!free_list[isize]) refill_freelist(size);
 769         
 770         if (find_buffer(dev,block,size))
 771                  goto repeat;
 772 
 773         bh = free_list[isize];
 774         remove_from_free_list(bh);
 775 
 776 /* OK, FINALLY we know that this buffer is the only one of its kind, */
 777 /* and that it's unused (b_count=0), unlocked (buffer_locked=0), and clean */
 778         bh->b_count=1;
 779         bh->b_flushtime=0;
 780         bh->b_state=(1<<BH_Touched);
 781         bh->b_dev=dev;
 782         bh->b_blocknr=block;
 783         insert_into_queues(bh);
 784         return bh;
 785 }
 786 
 787 void set_writetime(struct buffer_head * buf, int flag)
     /* [previous][next][first][last][top][bottom][index][help] */
 788 {
 789         int newtime;
 790 
 791         if (buffer_dirty(buf)) {
 792                 /* Move buffer to dirty list if jiffies is clear */
 793                 newtime = jiffies + (flag ? bdf_prm.b_un.age_super : 
 794                                      bdf_prm.b_un.age_buffer);
 795                 if(!buf->b_flushtime || buf->b_flushtime > newtime)
 796                          buf->b_flushtime = newtime;
 797         } else {
 798                 buf->b_flushtime = 0;
 799         }
 800 }
 801 
 802 
 803 void refile_buffer(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 804 {
 805         int dispose;
 806 
 807         if(buf->b_dev == B_FREE) {
 808                 printk("Attempt to refile free buffer\n");
 809                 return;
 810         }
 811         if (buffer_dirty(buf))
 812                 dispose = BUF_DIRTY;
 813         else if ((mem_map[MAP_NR((unsigned long) buf->b_data)].count > 1) || buffer_protected(buf))
 814                 dispose = BUF_SHARED;
 815         else if (buffer_locked(buf))
 816                 dispose = BUF_LOCKED;
 817         else if (buf->b_list == BUF_SHARED)
 818                 dispose = BUF_UNSHARED;
 819         else
 820                 dispose = BUF_CLEAN;
 821         if(dispose == BUF_CLEAN) buf->b_lru_time = jiffies;
 822         if(dispose != buf->b_list)  {
 823                 if(dispose == BUF_DIRTY || dispose == BUF_UNSHARED)
 824                          buf->b_lru_time = jiffies;
 825                 if(dispose == BUF_LOCKED && 
 826                    (buf->b_flushtime - buf->b_lru_time) <= bdf_prm.b_un.age_super)
 827                          dispose = BUF_LOCKED1;
 828                 remove_from_queues(buf);
 829                 buf->b_list = dispose;
 830                 insert_into_queues(buf);
 831                 if(dispose == BUF_DIRTY && nr_buffers_type[BUF_DIRTY] > 
 832                    (nr_buffers - nr_buffers_type[BUF_SHARED]) *
 833                    bdf_prm.b_un.nfract/100)
 834                          wakeup_bdflush(0);
 835         }
 836 }
 837 
 838 /*
 839  * Release a buffer head
 840  */
 841 void __brelse(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 842 {
 843         wait_on_buffer(buf);
 844 
 845         /* If dirty, mark the time this buffer should be written back */
 846         set_writetime(buf, 0);
 847         refile_buffer(buf);
 848 
 849         if (buf->b_count) {
 850                 buf->b_count--;
 851                 return;
 852         }
 853         printk("VFS: brelse: Trying to free free buffer\n");
 854 }
 855 
 856 /*
 857  * bforget() is like brelse(), except it removes the buffer
 858  * from the hash-queues (so that it won't be re-used if it's
 859  * shared).
 860  */
 861 void __bforget(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 862 {
 863         wait_on_buffer(buf);
 864         mark_buffer_clean(buf);
 865         clear_bit(BH_Protected, &buf->b_state);
 866         buf->b_count--;
 867         remove_from_hash_queue(buf);
 868         buf->b_dev = NODEV;
 869         refile_buffer(buf);
 870 }
 871 
 872 /*
 873  * bread() reads a specified block and returns the buffer that contains
 874  * it. It returns NULL if the block was unreadable.
 875  */
 876 struct buffer_head * bread(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 877 {
 878         struct buffer_head * bh;
 879 
 880         if (!(bh = getblk(dev, block, size))) {
 881                 printk("VFS: bread: READ error on device %s\n",
 882                         kdevname(dev));
 883                 return NULL;
 884         }
 885         if (buffer_uptodate(bh))
 886                 return bh;
 887         ll_rw_block(READ, 1, &bh);
 888         wait_on_buffer(bh);
 889         if (buffer_uptodate(bh))
 890                 return bh;
 891         brelse(bh);
 892         return NULL;
 893 }
 894 
 895 /*
 896  * Ok, breada can be used as bread, but additionally to mark other
 897  * blocks for reading as well. End the argument list with a negative
 898  * number.
 899  */
 900 
 901 #define NBUF 16
 902 
 903 struct buffer_head * breada(kdev_t dev, int block, int bufsize,
     /* [previous][next][first][last][top][bottom][index][help] */
 904         unsigned int pos, unsigned int filesize)
 905 {
 906         struct buffer_head * bhlist[NBUF];
 907         unsigned int blocks;
 908         struct buffer_head * bh;
 909         int index;
 910         int i, j;
 911 
 912         if (pos >= filesize)
 913                 return NULL;
 914 
 915         if (block < 0 || !(bh = getblk(dev,block,bufsize)))
 916                 return NULL;
 917 
 918         index = BUFSIZE_INDEX(bh->b_size);
 919 
 920         if (buffer_uptodate(bh))
 921                 return bh;
 922 
 923         blocks = ((filesize & (bufsize - 1)) - (pos & (bufsize - 1))) >> (9+index);
 924 
 925         if (blocks > (read_ahead[MAJOR(dev)] >> index))
 926                 blocks = read_ahead[MAJOR(dev)] >> index;
 927         if (blocks > NBUF)
 928                 blocks = NBUF;
 929         
 930         bhlist[0] = bh;
 931         j = 1;
 932         for(i=1; i<blocks; i++) {
 933                 bh = getblk(dev,block+i,bufsize);
 934                 if (buffer_uptodate(bh)) {
 935                         brelse(bh);
 936                         break;
 937                 }
 938                 bhlist[j++] = bh;
 939         }
 940 
 941         /* Request the read for these buffers, and then release them */
 942         ll_rw_block(READ, j, bhlist);
 943 
 944         for(i=1; i<j; i++)
 945                 brelse(bhlist[i]);
 946 
 947         /* Wait for this buffer, and then continue on */
 948         bh = bhlist[0];
 949         wait_on_buffer(bh);
 950         if (buffer_uptodate(bh))
 951                 return bh;
 952         brelse(bh);
 953         return NULL;
 954 }
 955 
 956 /*
 957  * See fs/inode.c for the weird use of volatile..
 958  */
 959 static void put_unused_buffer_head(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 960 {
 961         struct wait_queue * wait;
 962 
 963         wait = ((volatile struct buffer_head *) bh)->b_wait;
 964         memset(bh,0,sizeof(*bh));
 965         ((volatile struct buffer_head *) bh)->b_wait = wait;
 966         bh->b_next_free = unused_list;
 967         unused_list = bh;
 968         wake_up(&buffer_wait);
 969 }
 970 
 971 static void get_more_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 972 {
 973         int i;
 974         struct buffer_head * bh;
 975 
 976         for (;;) {
 977                 if (unused_list)
 978                         return;
 979 
 980                 /*
 981                  * This is critical.  We can't swap out pages to get
 982                  * more buffer heads, because the swap-out may need
 983                  * more buffer-heads itself.  Thus GFP_ATOMIC.
 984                  */
 985                 bh = (struct buffer_head *) get_free_page(GFP_ATOMIC);
 986                 if (bh)
 987                         break;
 988 
 989                 /*
 990                  * Uhhuh. We're _really_ low on memory. Now we just
 991                  * wait for old buffer heads to become free due to
 992                  * finishing IO..
 993                  */
 994                 sleep_on(&buffer_wait);
 995         }
 996 
 997         for (nr_buffer_heads+=i=PAGE_SIZE/sizeof*bh ; i>0; i--) {
 998                 bh->b_next_free = unused_list;  /* only make link */
 999                 unused_list = bh++;
1000         }
1001 }
1002 
1003 /* 
1004  * We can't put completed temporary IO buffer_heads directly onto the
1005  * unused_list when they become unlocked, since the device driver
1006  * end_request routines still expect access to the buffer_head's
1007  * fields after the final unlock.  So, the device driver puts them on
1008  * the reuse_list instead once IO completes, and we recover these to
1009  * the unused_list here.
1010  *
1011  * The reuse_list receives buffers from interrupt routines, so we need
1012  * to be IRQ-safe here.
1013  */
1014 static inline void recover_reusable_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1015 {
1016         struct buffer_head *bh;
1017         unsigned long flags;
1018         
1019         save_flags(flags);
1020         while (reuse_list) {
1021                 cli();
1022                 bh = reuse_list;
1023                 reuse_list = bh->b_next_free;
1024                 restore_flags(flags);
1025                 put_unused_buffer_head(bh);
1026         }
1027 }
1028 
1029 static struct buffer_head * get_unused_buffer_head(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1030 {
1031         struct buffer_head * bh;
1032 
1033         recover_reusable_buffer_heads();
1034         get_more_buffer_heads();
1035         if (!unused_list)
1036                 return NULL;
1037         bh = unused_list;
1038         unused_list = bh->b_next_free;
1039         bh->b_next_free = NULL;
1040         bh->b_data = NULL;
1041         bh->b_size = 0;
1042         bh->b_state = 0;
1043         return bh;
1044 }
1045 
1046 /*
1047  * Create the appropriate buffers when given a page for data area and
1048  * the size of each buffer.. Use the bh->b_this_page linked list to
1049  * follow the buffers created.  Return NULL if unable to create more
1050  * buffers.
1051  */
1052 static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
1053 {
1054         struct buffer_head *bh, *head;
1055         unsigned long offset;
1056 
1057         head = NULL;
1058         offset = PAGE_SIZE;
1059         while ((offset -= size) < PAGE_SIZE) {
1060                 bh = get_unused_buffer_head();
1061                 if (!bh)
1062                         goto no_grow;
1063                 bh->b_this_page = head;
1064                 head = bh;
1065                 bh->b_data = (char *) (page+offset);
1066                 bh->b_size = size;
1067                 bh->b_dev = B_FREE;  /* Flag as unused */
1068         }
1069         return head;
1070 /*
1071  * In case anything failed, we just free everything we got.
1072  */
1073 no_grow:
1074         bh = head;
1075         while (bh) {
1076                 head = bh;
1077                 bh = bh->b_this_page;
1078                 put_unused_buffer_head(head);
1079         }
1080         return NULL;
1081 }
1082 
1083 int brw_page(int rw, unsigned long address, kdev_t dev, int b[], int size, int bmap)
     /* [previous][next][first][last][top][bottom][index][help] */
1084 {
1085         struct buffer_head *bh, *prev, *next, *arr[MAX_BUF_PER_PAGE];
1086         int block, nr;
1087         struct page *page;
1088 
1089         page = mem_map + MAP_NR(address);
1090         page->uptodate = 0;
1091         bh = create_buffers(address, size);
1092         if (!bh)
1093                 return -ENOMEM;
1094         nr = 0;
1095         next = bh;
1096         do {
1097                 struct buffer_head * tmp;
1098                 block = *(b++);
1099 
1100                 set_bit(BH_FreeOnIO, &next->b_state);
1101                 next->b_list = BUF_CLEAN;
1102                 next->b_dev = dev;
1103                 next->b_blocknr = block;
1104                 next->b_count = 1;
1105                 next->b_flushtime = 0;
1106                 set_bit(BH_Uptodate, &next->b_state);
1107 
1108                 /* When we use bmap, we define block zero to represent
1109                    a hole.  ll_rw_page, however, may legitimately
1110                    access block zero, and we need to distinguish the
1111                    two cases. 
1112                    */
1113                 if (bmap && !block) {
1114                         memset(next->b_data, 0, size);
1115                         next->b_count--;
1116                         continue;
1117                 }
1118                 tmp = get_hash_table(dev, block, size);
1119                 if (tmp) {
1120                         if (!buffer_uptodate(tmp)) {
1121                                 if (rw == READ)
1122                                         ll_rw_block(READ, 1, &tmp);
1123                                 wait_on_buffer(tmp);
1124                         }
1125                         if (rw == READ) 
1126                                 memcpy(next->b_data, tmp->b_data, size);
1127                         else {
1128                                 memcpy(tmp->b_data, next->b_data, size);
1129                                 mark_buffer_dirty(tmp, 0);
1130                         }
1131                         brelse(tmp);
1132                         next->b_count--;
1133                         continue;
1134                 }
1135                 if (rw == READ)
1136                         clear_bit(BH_Uptodate, &next->b_state);
1137                 else
1138                         set_bit(BH_Dirty, &next->b_state);
1139                 arr[nr++] = next;
1140         } while (prev = next, (next = next->b_this_page) != NULL);
1141         prev->b_this_page = bh;
1142         
1143         if (nr)
1144                 ll_rw_block(rw, nr, arr);
1145         else {
1146                 page->locked = 0;
1147                 page->uptodate = 1;
1148                 wake_up(&page->wait);
1149                 next = bh;
1150                 do {
1151                         next->b_next_free = reuse_list;
1152                         reuse_list = next;
1153                         next = next->b_this_page;
1154                 } while (next != bh);
1155         }
1156         ++current->maj_flt;
1157         return 0;
1158 }
1159 
1160 void mark_buffer_uptodate(struct buffer_head * bh, int on)
     /* [previous][next][first][last][top][bottom][index][help] */
1161 {
1162         if (on) {
1163                 struct buffer_head *tmp = bh;
1164                 int page_uptodate = 1;
1165                 set_bit(BH_Uptodate, &bh->b_state);
1166                 do {
1167                         if (!test_bit(BH_Uptodate, &tmp->b_state)) {
1168                                 page_uptodate = 0;
1169                                 break;
1170                         }
1171                         tmp=tmp->b_this_page;
1172                 } while (tmp && tmp != bh);
1173                 if (page_uptodate)
1174                         mem_map[MAP_NR(bh->b_data)].uptodate = 1;
1175         } else
1176                 clear_bit(BH_Uptodate, &bh->b_state);
1177 }
1178 
1179 void unlock_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
1180 {
1181         struct buffer_head *tmp;
1182         unsigned long flags;
1183         struct page *page;
1184 
1185         clear_bit(BH_Lock, &bh->b_state);
1186         wake_up(&bh->b_wait);
1187 
1188         if (!test_bit(BH_FreeOnIO, &bh->b_state))
1189                 return;
1190         page = mem_map + MAP_NR(bh->b_data);
1191         if (!page->locked) {
1192                 printk ("Whoops: unlock_buffer: "
1193                         "async io complete on unlocked page\n");
1194                 return;
1195         }
1196         if (bh->b_count != 1) {
1197                 printk ("Whoops: unlock_buffer: b_count != 1 on async io.\n");
1198                 return;
1199         }
1200         /* Async buffer_heads are here only as labels for IO, and get
1201            thrown away once the IO for this page is complete.  IO is
1202            deemed complete once all buffers have been visited
1203            (b_count==0) and are now unlocked. */
1204         bh->b_count--;
1205         for (tmp = bh; tmp=tmp->b_this_page, tmp!=bh; ) {
1206                 if (test_bit(BH_Lock, &tmp->b_state) || tmp->b_count)
1207                         return;
1208         }
1209 
1210         /* OK, go ahead and complete the async IO on this page. */
1211         save_flags(flags);
1212         page->locked = 0;
1213         wake_up(&page->wait);
1214         cli();
1215         tmp = bh;
1216         do {
1217                 if (!test_bit(BH_FreeOnIO, &tmp->b_state)) {
1218                         printk ("Whoops: unlock_buffer: "
1219                                 "async IO mismatch on page.\n");
1220                         restore_flags(flags);
1221                         return;
1222                 }
1223                 tmp->b_next_free = reuse_list;
1224                 reuse_list = tmp;
1225                 clear_bit(BH_FreeOnIO, &tmp->b_state);
1226                 tmp = tmp->b_this_page;
1227         } while (tmp != bh);
1228         restore_flags(flags);
1229         if (page->free_after) {
1230                 extern int nr_async_pages;
1231                 nr_async_pages--;
1232                 page->free_after = 0;
1233                 free_page(page_address(page));
1234         }
1235         wake_up(&buffer_wait);
1236 }
1237 
1238 /*
1239  * Generic "readpage" function for block devices that have the normal
1240  * bmap functionality. This is most of the block device filesystems.
1241  * Reads the page asynchronously --- the unlock_buffer() and
1242  * mark_buffer_uptodate() functions propogate buffer state into the
1243  * page struct once IO has completed.
1244  */
1245 int generic_readpage(struct inode * inode, struct page * page)
     /* [previous][next][first][last][top][bottom][index][help] */
1246 {
1247         unsigned long block, address;
1248         int *p, nr[PAGE_SIZE/512];
1249         int i;
1250 
1251         address = page_address(page);
1252         page->count++;
1253         wait_on_page(page);
1254         if (page->uptodate) {
1255                 free_page(address);
1256                 return 0;
1257         }
1258         page->locked = 1;
1259         
1260         i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
1261         block = page->offset >> inode->i_sb->s_blocksize_bits;
1262         p = nr;
1263         do {
1264                 *p = inode->i_op->bmap(inode, block);
1265                 i--;
1266                 block++;
1267                 p++;
1268         } while (i > 0);
1269 
1270         /* IO start */
1271         brw_page(READ, address, inode->i_dev, nr, inode->i_sb->s_blocksize, 1);
1272         free_page(address);
1273         return 0;
1274 }
1275 
1276 /*
1277  * Try to increase the number of buffers available: the size argument
1278  * is used to determine what kind of buffers we want.
1279  */
1280 static int grow_buffers(int pri, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1281 {
1282         unsigned long page;
1283         struct buffer_head *bh, *tmp;
1284         struct buffer_head * insert_point;
1285         int isize;
1286 
1287         if ((size & 511) || (size > PAGE_SIZE)) {
1288                 printk("VFS: grow_buffers: size = %d\n",size);
1289                 return 0;
1290         }
1291 
1292         isize = BUFSIZE_INDEX(size);
1293 
1294         if (!(page = __get_free_page(pri)))
1295                 return 0;
1296         bh = create_buffers(page, size);
1297         if (!bh) {
1298                 free_page(page);
1299                 return 0;
1300         }
1301 
1302         insert_point = free_list[isize];
1303 
1304         tmp = bh;
1305         while (1) {
1306                 nr_free[isize]++;
1307                 if (insert_point) {
1308                         tmp->b_next_free = insert_point->b_next_free;
1309                         tmp->b_prev_free = insert_point;
1310                         insert_point->b_next_free->b_prev_free = tmp;
1311                         insert_point->b_next_free = tmp;
1312                 } else {
1313                         tmp->b_prev_free = tmp;
1314                         tmp->b_next_free = tmp;
1315                 }
1316                 insert_point = tmp;
1317                 ++nr_buffers;
1318                 if (tmp->b_this_page)
1319                         tmp = tmp->b_this_page;
1320                 else
1321                         break;
1322         }
1323         free_list[isize] = bh;
1324         buffer_pages[MAP_NR(page)] = bh;
1325         tmp->b_this_page = bh;
1326         buffermem += PAGE_SIZE;
1327         return 1;
1328 }
1329 
1330 
1331 /* =========== Reduce the buffer memory ============= */
1332 
1333 /*
1334  * try_to_free_buffer() checks if all the buffers on this particular page
1335  * are unused, and free's the page if so.
1336  */
1337 int try_to_free_buffer(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1338                        int priority)
1339 {
1340         unsigned long page;
1341         struct buffer_head * tmp, * p;
1342         int isize = BUFSIZE_INDEX(bh->b_size);
1343 
1344         *bhp = bh;
1345         page = (unsigned long) bh->b_data;
1346         page &= PAGE_MASK;
1347         tmp = bh;
1348         do {
1349                 if (!tmp)
1350                         return 0;
1351                 if (tmp->b_count || buffer_protected(tmp) ||
1352                     buffer_dirty(tmp) || buffer_locked(tmp) || tmp->b_wait)
1353                         return 0;
1354                 if (priority && buffer_touched(tmp))
1355                         return 0;
1356                 tmp = tmp->b_this_page;
1357         } while (tmp != bh);
1358         tmp = bh;
1359         do {
1360                 p = tmp;
1361                 tmp = tmp->b_this_page;
1362                 nr_buffers--;
1363                 nr_buffers_size[isize]--;
1364                 if (p == *bhp)
1365                   {
1366                     *bhp = p->b_prev_free;
1367                     if (p == *bhp) /* Was this the last in the list? */
1368                       *bhp = NULL;
1369                   }
1370                 remove_from_queues(p);
1371                 put_unused_buffer_head(p);
1372         } while (tmp != bh);
1373         buffermem -= PAGE_SIZE;
1374         buffer_pages[MAP_NR(page)] = NULL;
1375         free_page(page);
1376         return !mem_map[MAP_NR(page)].count;
1377 }
1378 
1379 /* Age buffers on a given page, according to whether they have been
1380    visited recently or not. */
1381 static inline void age_buffer(struct buffer_head *bh)
     /* [previous][next][first][last][top][bottom][index][help] */
1382 {
1383         struct buffer_head *tmp = bh;
1384         int touched = 0;
1385 
1386         /*
1387          * When we age a page, we mark all other buffers in the page
1388          * with the "has_aged" flag.  Then, when these aliased buffers
1389          * come up for aging, we skip them until next pass.  This
1390          * ensures that a page full of multiple buffers only gets aged
1391          * once per pass through the lru lists. 
1392          */
1393         if (clear_bit(BH_Has_aged, &bh->b_state))
1394                 return;
1395         
1396         do {
1397                 touched |= clear_bit(BH_Touched, &tmp->b_state);
1398                 tmp = tmp->b_this_page;
1399                 set_bit(BH_Has_aged, &tmp->b_state);
1400         } while (tmp != bh);
1401         clear_bit(BH_Has_aged, &bh->b_state);
1402 
1403         if (touched) 
1404                 touch_page(mem_map + MAP_NR((unsigned long) bh->b_data));
1405         else
1406                 age_page(mem_map + MAP_NR((unsigned long) bh->b_data));
1407 }
1408 
1409 /*
1410  * Consult the load average for buffers and decide whether or not
1411  * we should shrink the buffers of one size or not.  If we decide yes,
1412  * do it and return 1.  Else return 0.  Do not attempt to shrink size
1413  * that is specified.
1414  *
1415  * I would prefer not to use a load average, but the way things are now it
1416  * seems unavoidable.  The way to get rid of it would be to force clustering
1417  * universally, so that when we reclaim buffers we always reclaim an entire
1418  * page.  Doing this would mean that we all need to move towards QMAGIC.
1419  */
1420 
1421 static int maybe_shrink_lav_buffers(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1422 {          
1423         int nlist;
1424         int isize;
1425         int total_lav, total_n_buffers, n_sizes;
1426         
1427         /* Do not consider the shared buffers since they would not tend
1428            to have getblk called very often, and this would throw off
1429            the lav.  They are not easily reclaimable anyway (let the swapper
1430            make the first move). */
1431   
1432         total_lav = total_n_buffers = n_sizes = 0;
1433         for(nlist = 0; nlist < NR_SIZES; nlist++)
1434          {
1435                  total_lav += buffers_lav[nlist];
1436                  if(nr_buffers_size[nlist]) n_sizes++;
1437                  total_n_buffers += nr_buffers_size[nlist];
1438                  total_n_buffers -= nr_buffers_st[nlist][BUF_SHARED]; 
1439          }
1440         
1441         /* See if we have an excessive number of buffers of a particular
1442            size - if so, victimize that bunch. */
1443   
1444         isize = (size ? BUFSIZE_INDEX(size) : -1);
1445         
1446         if (n_sizes > 1)
1447                  for(nlist = 0; nlist < NR_SIZES; nlist++)
1448                   {
1449                           if(nlist == isize) continue;
1450                           if(nr_buffers_size[nlist] &&
1451                              bdf_prm.b_un.lav_const * buffers_lav[nlist]*total_n_buffers < 
1452                              total_lav * (nr_buffers_size[nlist] - nr_buffers_st[nlist][BUF_SHARED]))
1453                                    if(shrink_specific_buffers(6, bufferindex_size[nlist])) 
1454                                             return 1;
1455                   }
1456         return 0;
1457 }
1458 
1459 /*
1460  * Try to free up some pages by shrinking the buffer-cache
1461  *
1462  * Priority tells the routine how hard to try to shrink the
1463  * buffers: 6 means "don't bother too much", while a value
1464  * of 0 means "we'd better get some free pages now".
1465  *
1466  * "limit" is meant to limit the shrink-action only to pages
1467  * that are in the 0 - limit address range, for DMA re-allocations.
1468  * We ignore that right now.
1469  */
1470 
1471 static int shrink_specific_buffers(unsigned int priority, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1472 {
1473         struct buffer_head *bh;
1474         int nlist;
1475         int i, isize, isize1;
1476 
1477 #ifdef DEBUG
1478         if(size) printk("Shrinking buffers of size %d\n", size);
1479 #endif
1480         /* First try the free lists, and see if we can get a complete page
1481            from here */
1482         isize1 = (size ? BUFSIZE_INDEX(size) : -1);
1483 
1484         for(isize = 0; isize<NR_SIZES; isize++){
1485                 if(isize1 != -1 && isize1 != isize) continue;
1486                 bh = free_list[isize];
1487                 if(!bh) continue;
1488                 for (i=0 ; !i || bh != free_list[isize]; bh = bh->b_next_free, i++) {
1489                         if (bh->b_count || buffer_protected(bh) ||
1490                             !bh->b_this_page)
1491                                  continue;
1492                         if (!age_of((unsigned long) bh->b_data) &&
1493                             try_to_free_buffer(bh, &bh, 6))
1494                                  return 1;
1495                         if(!bh) break;
1496                         /* Some interrupt must have used it after we
1497                            freed the page.  No big deal - keep looking */
1498                 }
1499         }
1500         
1501         /* Not enough in the free lists, now try the lru list */
1502         
1503         for(nlist = 0; nlist < NR_LIST; nlist++) {
1504         repeat1:
1505                 if(priority > 2 && nlist == BUF_SHARED) continue;
1506                 i = nr_buffers_type[nlist];
1507                 i = ((BUFFEROUT_WEIGHT * i) >> 10) >> priority;
1508                 for ( ; i > 0; i-- ) {
1509                         bh = next_to_age[nlist];
1510                         if (!bh)
1511                                 break;
1512                         next_to_age[nlist] = bh->b_next_free;
1513 
1514                         /* First, age the buffer. */
1515                         age_buffer(bh);
1516                         /* We may have stalled while waiting for I/O
1517                            to complete. */
1518                         if(bh->b_list != nlist) goto repeat1;
1519                         if (bh->b_count || buffer_protected(bh) ||
1520                             !bh->b_this_page)
1521                                  continue;
1522                         if(size && bh->b_size != size) continue;
1523                         if (buffer_locked(bh))
1524                                  if (priority)
1525                                           continue;
1526                                  else
1527                                           wait_on_buffer(bh);
1528                         if (buffer_dirty(bh)) {
1529                                 bh->b_count++;
1530                                 bh->b_flushtime = 0;
1531                                 ll_rw_block(WRITEA, 1, &bh);
1532                                 bh->b_count--;
1533                                 continue;
1534                         }
1535                         /* At priority 6, only consider really old
1536                            (age==0) buffers for reclaiming.  At
1537                            priority 0, consider any buffers. */
1538                         if ((age_of((unsigned long) bh->b_data) >>
1539                              (6-priority)) > 0)
1540                                 continue;                               
1541                         if (try_to_free_buffer(bh, &bh, 0))
1542                                  return 1;
1543                         if(!bh) break;
1544                 }
1545         }
1546         return 0;
1547 }
1548 
1549 
1550 /* ================== Debugging =================== */
1551 
1552 void show_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1553 {
1554         struct buffer_head * bh;
1555         int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
1556         int protected = 0;
1557         int shared;
1558         int nlist, isize;
1559 
1560         printk("Buffer memory:   %6dkB\n",buffermem>>10);
1561         printk("Buffer heads:    %6d\n",nr_buffer_heads);
1562         printk("Buffer blocks:   %6d\n",nr_buffers);
1563 
1564         for(nlist = 0; nlist < NR_LIST; nlist++) {
1565           shared = found = locked = dirty = used = lastused = protected = 0;
1566           bh = lru_list[nlist];
1567           if(!bh) continue;
1568           do {
1569                 found++;
1570                 if (buffer_locked(bh))
1571                         locked++;
1572                 if (buffer_protected(bh))
1573                         protected++;
1574                 if (buffer_dirty(bh))
1575                         dirty++;
1576                 if(mem_map[MAP_NR(((unsigned long) bh->b_data))].count !=1) shared++;
1577                 if (bh->b_count)
1578                         used++, lastused = found;
1579                 bh = bh->b_next_free;
1580               } while (bh != lru_list[nlist]);
1581         printk("Buffer[%d] mem: %d buffers, %d used (last=%d), %d locked, "
1582                "%d protected, %d dirty %d shrd\n",
1583                 nlist, found, used, lastused, locked, protected, dirty, shared);
1584         };
1585         printk("Size    [LAV]     Free  Clean  Unshar     Lck    Lck1   Dirty  Shared \n");
1586         for(isize = 0; isize<NR_SIZES; isize++){
1587                 printk("%5d [%5d]: %7d ", bufferindex_size[isize],
1588                        buffers_lav[isize], nr_free[isize]);
1589                 for(nlist = 0; nlist < NR_LIST; nlist++)
1590                          printk("%7d ", nr_buffers_st[isize][nlist]);
1591                 printk("\n");
1592         }
1593 }
1594 
1595 
1596 /* ====================== Cluster patches for ext2 ==================== */
1597 
1598 /*
1599  * try_to_reassign() checks if all the buffers on this particular page
1600  * are unused, and reassign to a new cluster them if this is true.
1601  */
1602 static inline int try_to_reassign(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1603                            kdev_t dev, unsigned int starting_block)
1604 {
1605         unsigned long page;
1606         struct buffer_head * tmp, * p;
1607 
1608         *bhp = bh;
1609         page = (unsigned long) bh->b_data;
1610         page &= PAGE_MASK;
1611         if(mem_map[MAP_NR(page)].count != 1) return 0;
1612         tmp = bh;
1613         do {
1614                 if (!tmp)
1615                          return 0;
1616                 
1617                 if (tmp->b_count || buffer_protected(tmp) ||
1618                     buffer_dirty(tmp) || buffer_locked(tmp))
1619                          return 0;
1620                 tmp = tmp->b_this_page;
1621         } while (tmp != bh);
1622         tmp = bh;
1623         
1624         while((unsigned long) tmp->b_data & (PAGE_SIZE - 1)) 
1625                  tmp = tmp->b_this_page;
1626         
1627         /* This is the buffer at the head of the page */
1628         bh = tmp;
1629         do {
1630                 p = tmp;
1631                 tmp = tmp->b_this_page;
1632                 remove_from_queues(p);
1633                 p->b_dev = dev;
1634                 mark_buffer_uptodate(p, 0);
1635                 clear_bit(BH_Req, &p->b_state);
1636                 p->b_blocknr = starting_block++;
1637                 insert_into_queues(p);
1638         } while (tmp != bh);
1639         return 1;
1640 }
1641 
1642 /*
1643  * Try to find a free cluster by locating a page where
1644  * all of the buffers are unused.  We would like this function
1645  * to be atomic, so we do not call anything that might cause
1646  * the process to sleep.  The priority is somewhat similar to
1647  * the priority used in shrink_buffers.
1648  * 
1649  * My thinking is that the kernel should end up using whole
1650  * pages for the buffer cache as much of the time as possible.
1651  * This way the other buffers on a particular page are likely
1652  * to be very near each other on the free list, and we will not
1653  * be expiring data prematurely.  For now we only cannibalize buffers
1654  * of the same size to keep the code simpler.
1655  */
1656 static int reassign_cluster(kdev_t dev, 
     /* [previous][next][first][last][top][bottom][index][help] */
1657                      unsigned int starting_block, int size)
1658 {
1659         struct buffer_head *bh;
1660         int isize = BUFSIZE_INDEX(size);
1661         int i;
1662 
1663         /* We want to give ourselves a really good shot at generating
1664            a cluster, and since we only take buffers from the free
1665            list, we "overfill" it a little. */
1666 
1667         while(nr_free[isize] < 32) refill_freelist(size);
1668 
1669         bh = free_list[isize];
1670         if(bh)
1671                  for (i=0 ; !i || bh != free_list[isize] ; bh = bh->b_next_free, i++) {
1672                          if (!bh->b_this_page)  continue;
1673                          if (try_to_reassign(bh, &bh, dev, starting_block))
1674                                  return 4;
1675                  }
1676         return 0;
1677 }
1678 
1679 /* This function tries to generate a new cluster of buffers
1680  * from a new page in memory.  We should only do this if we have
1681  * not expanded the buffer cache to the maximum size that we allow.
1682  */
1683 static unsigned long try_to_generate_cluster(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1684 {
1685         struct buffer_head * bh, * tmp, * arr[MAX_BUF_PER_PAGE];
1686         int isize = BUFSIZE_INDEX(size);
1687         unsigned long offset;
1688         unsigned long page;
1689         int nblock;
1690 
1691         page = get_free_page(GFP_NOBUFFER);
1692         if(!page) return 0;
1693 
1694         bh = create_buffers(page, size);
1695         if (!bh) {
1696                 free_page(page);
1697                 return 0;
1698         };
1699         nblock = block;
1700         for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
1701                 if (find_buffer(dev, nblock++, size))
1702                          goto not_aligned;
1703         }
1704         tmp = bh;
1705         nblock = 0;
1706         while (1) {
1707                 arr[nblock++] = bh;
1708                 bh->b_count = 1;
1709                 bh->b_flushtime = 0;
1710                 bh->b_state = 0;
1711                 bh->b_dev = dev;
1712                 bh->b_list = BUF_CLEAN;
1713                 bh->b_blocknr = block++;
1714                 nr_buffers++;
1715                 nr_buffers_size[isize]++;
1716                 insert_into_queues(bh);
1717                 if (bh->b_this_page)
1718                         bh = bh->b_this_page;
1719                 else
1720                         break;
1721         }
1722         buffermem += PAGE_SIZE;
1723         buffer_pages[MAP_NR(page)] = bh;
1724         bh->b_this_page = tmp;
1725         while (nblock-- > 0)
1726                 brelse(arr[nblock]);
1727         return 4; /* ?? */
1728 not_aligned:
1729         while ((tmp = bh) != NULL) {
1730                 bh = bh->b_this_page;
1731                 put_unused_buffer_head(tmp);
1732         }
1733         free_page(page);
1734         return 0;
1735 }
1736 
1737 unsigned long generate_cluster(kdev_t dev, int b[], int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1738 {
1739         int i, offset;
1740         
1741         for (i = 0, offset = 0 ; offset < PAGE_SIZE ; i++, offset += size) {
1742                 if(i && b[i]-1 != b[i-1]) return 0;  /* No need to cluster */
1743                 if(find_buffer(dev, b[i], size)) return 0;
1744         };
1745 
1746         /* OK, we have a candidate for a new cluster */
1747         
1748         /* See if one size of buffer is over-represented in the buffer cache,
1749            if so reduce the numbers of buffers */
1750         if(maybe_shrink_lav_buffers(size))
1751          {
1752                  int retval;
1753                  retval = try_to_generate_cluster(dev, b[0], size);
1754                  if(retval) return retval;
1755          };
1756         
1757         if (nr_free_pages > min_free_pages*2) 
1758                  return try_to_generate_cluster(dev, b[0], size);
1759         else
1760                  return reassign_cluster(dev, b[0], size);
1761 }
1762 
1763 
1764 /* ===================== Init ======================= */
1765 
1766 /*
1767  * This initializes the initial buffer free list.  nr_buffers_type is set
1768  * to one less the actual number of buffers, as a sop to backwards
1769  * compatibility --- the old code did this (I think unintentionally,
1770  * but I'm not sure), and programs in the ps package expect it.
1771  *                                      - TYT 8/30/92
1772  */
1773 void buffer_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1774 {
1775         int i;
1776         int isize = BUFSIZE_INDEX(BLOCK_SIZE);
1777         long memsize = MAP_NR(high_memory) << PAGE_SHIFT;
1778 
1779         if (memsize >= 4*1024*1024) {
1780                 if(memsize >= 16*1024*1024)
1781                          nr_hash = 16381;
1782                 else
1783                          nr_hash = 4093;
1784         } else {
1785                 nr_hash = 997;
1786         };
1787         
1788         hash_table = (struct buffer_head **) vmalloc(nr_hash * 
1789                                                      sizeof(struct buffer_head *));
1790 
1791 
1792         buffer_pages = (struct buffer_head **) vmalloc(MAP_NR(high_memory) * 
1793                                                      sizeof(struct buffer_head *));
1794         for (i = 0 ; i < MAP_NR(high_memory) ; i++)
1795                 buffer_pages[i] = NULL;
1796 
1797         for (i = 0 ; i < nr_hash ; i++)
1798                 hash_table[i] = NULL;
1799         lru_list[BUF_CLEAN] = 0;
1800         grow_buffers(GFP_KERNEL, BLOCK_SIZE);
1801         if (!free_list[isize])
1802                 panic("VFS: Unable to initialize buffer free list!");
1803         return;
1804 }
1805 
1806 
1807 /* ====================== bdflush support =================== */
1808 
1809 /* This is a simple kernel daemon, whose job it is to provide a dynamic
1810  * response to dirty buffers.  Once this process is activated, we write back
1811  * a limited number of buffers to the disks and then go back to sleep again.
1812  */
1813 struct wait_queue * bdflush_wait = NULL;
1814 struct wait_queue * bdflush_done = NULL;
1815 
1816 static void wakeup_bdflush(int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
1817 {
1818         wake_up(&bdflush_wait);
1819         if(wait) sleep_on(&bdflush_done);
1820 }
1821 
1822 
1823 /* 
1824  * Here we attempt to write back old buffers.  We also try and flush inodes 
1825  * and supers as well, since this function is essentially "update", and 
1826  * otherwise there would be no way of ensuring that these quantities ever 
1827  * get written back.  Ideally, we would have a timestamp on the inodes
1828  * and superblocks so that we could write back only the old ones as well
1829  */
1830 
1831 asmlinkage int sync_old_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1832 {
1833         int i, isize;
1834         int ndirty, nwritten;
1835         int nlist;
1836         int ncount;
1837         struct buffer_head * bh, *next;
1838 
1839         sync_supers(0);
1840         sync_inodes(0);
1841 
1842         ncount = 0;
1843 #ifdef DEBUG
1844         for(nlist = 0; nlist < NR_LIST; nlist++)
1845 #else
1846         for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1847 #endif
1848         {
1849                 ndirty = 0;
1850                 nwritten = 0;
1851         repeat:
1852                 bh = lru_list[nlist];
1853                 if(bh) 
1854                          for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) {
1855                                  /* We may have stalled while waiting for I/O to complete. */
1856                                  if(bh->b_list != nlist) goto repeat;
1857                                  next = bh->b_next_free;
1858                                  if(!lru_list[nlist]) {
1859                                          printk("Dirty list empty %d\n", i);
1860                                          break;
1861                                  }
1862                                  
1863                                  /* Clean buffer on dirty list?  Refile it */
1864                                  if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
1865                                   {
1866                                           refile_buffer(bh);
1867                                           continue;
1868                                   }
1869                                  
1870                                  if (buffer_locked(bh) || !buffer_dirty(bh))
1871                                           continue;
1872                                  ndirty++;
1873                                  if(bh->b_flushtime > jiffies) continue;
1874                                  nwritten++;
1875                                  bh->b_count++;
1876                                  bh->b_flushtime = 0;
1877 #ifdef DEBUG
1878                                  if(nlist != BUF_DIRTY) ncount++;
1879 #endif
1880                                  ll_rw_block(WRITE, 1, &bh);
1881                                  bh->b_count--;
1882                          }
1883         }
1884 #ifdef DEBUG
1885         if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount);
1886         printk("Wrote %d/%d buffers\n", nwritten, ndirty);
1887 #endif
1888         
1889         /* We assume that we only come through here on a regular
1890            schedule, like every 5 seconds.  Now update load averages.  
1891            Shift usage counts to prevent overflow. */
1892         for(isize = 0; isize<NR_SIZES; isize++){
1893                 CALC_LOAD(buffers_lav[isize], bdf_prm.b_un.lav_const, buffer_usage[isize]);
1894                 buffer_usage[isize] = 0;
1895         };
1896         return 0;
1897 }
1898 
1899 
1900 /* This is the interface to bdflush.  As we get more sophisticated, we can
1901  * pass tuning parameters to this "process", to adjust how it behaves. 
1902  * We would want to verify each parameter, however, to make sure that it 
1903  * is reasonable. */
1904 
1905 asmlinkage int sys_bdflush(int func, long data)
     /* [previous][next][first][last][top][bottom][index][help] */
1906 {
1907         int i, error;
1908 
1909         if (!suser())
1910                 return -EPERM;
1911 
1912         if (func == 1)
1913                  return sync_old_buffers();
1914 
1915         /* Basically func 1 means read param 1, 2 means write param 1, etc */
1916         if (func >= 2) {
1917                 i = (func-2) >> 1;
1918                 if (i < 0 || i >= N_PARAM)
1919                         return -EINVAL;
1920                 if((func & 1) == 0) {
1921                         error = verify_area(VERIFY_WRITE, (void *) data, sizeof(int));
1922                         if (error)
1923                                 return error;
1924                         put_user(bdf_prm.data[i], (int*)data);
1925                         return 0;
1926                 };
1927                 if (data < bdflush_min[i] || data > bdflush_max[i])
1928                         return -EINVAL;
1929                 bdf_prm.data[i] = data;
1930                 return 0;
1931         };
1932 
1933         /* Having func 0 used to launch the actual bdflush and then never
1934         return (unless explicitly killed). We return zero here to 
1935         remain semi-compatible with present update(8) programs. */
1936 
1937         return 0;
1938 }
1939 
1940 /* This is the actual bdflush daemon itself. It used to be started from
1941  * the syscall above, but now we launch it ourselves internally with
1942  * kernel_thread(...)  directly after the first thread in init/main.c */
1943 
1944 int bdflush(void * unused) 
     /* [previous][next][first][last][top][bottom][index][help] */
1945 {
1946         int i;
1947         int ndirty;
1948         int nlist;
1949         int ncount;
1950         struct buffer_head * bh, *next;
1951 
1952         /*
1953          *      We have a bare-bones task_struct, and really should fill
1954          *      in a few more things so "top" and /proc/2/{exe,root,cwd}
1955          *      display semi-sane things. Not real crucial though...  
1956          */
1957 
1958         current->session = 1;
1959         current->pgrp = 1;
1960         sprintf(current->comm, "kflushd");
1961 
1962         /*
1963          *      As a kernel thread we want to tamper with system buffers
1964          *      and other internals and thus be subject to the SMP locking
1965          *      rules. (On a uniprocessor box this does nothing).
1966          */
1967          
1968 #ifdef __SMP__
1969         lock_kernel();
1970         syscall_count++;
1971 #endif
1972                  
1973         for (;;) {
1974 #ifdef DEBUG
1975                 printk("bdflush() activated...");
1976 #endif
1977                 
1978                 ncount = 0;
1979 #ifdef DEBUG
1980                 for(nlist = 0; nlist < NR_LIST; nlist++)
1981 #else
1982                 for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1983 #endif
1984                  {
1985                          ndirty = 0;
1986                  repeat:
1987                          bh = lru_list[nlist];
1988                          if(bh) 
1989                                   for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty; 
1990                                        bh = next) {
1991                                           /* We may have stalled while waiting for I/O to complete. */
1992                                           if(bh->b_list != nlist) goto repeat;
1993                                           next = bh->b_next_free;
1994                                           if(!lru_list[nlist]) {
1995                                                   printk("Dirty list empty %d\n", i);
1996                                                   break;
1997                                           }
1998                                           
1999                                           /* Clean buffer on dirty list?  Refile it */
2000                                           if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
2001                                            {
2002                                                    refile_buffer(bh);
2003                                                    continue;
2004                                            }
2005                                           
2006                                           if (buffer_locked(bh) || !buffer_dirty(bh))
2007                                                    continue;
2008                                           /* Should we write back buffers that are shared or not??
2009                                              currently dirty buffers are not shared, so it does not matter */
2010                                           bh->b_count++;
2011                                           ndirty++;
2012                                           bh->b_flushtime = 0;
2013                                           ll_rw_block(WRITE, 1, &bh);
2014 #ifdef DEBUG
2015                                           if(nlist != BUF_DIRTY) ncount++;
2016 #endif
2017                                           bh->b_count--;
2018                                   }
2019                  }
2020 #ifdef DEBUG
2021                 if (ncount) printk("sys_bdflush: %d dirty buffers not on dirty list\n", ncount);
2022                 printk("sleeping again.\n");
2023 #endif
2024                 wake_up(&bdflush_done);
2025                 
2026                 /* If there are still a lot of dirty buffers around, skip the sleep
2027                    and flush some more */
2028                 
2029                 if(nr_buffers_type[BUF_DIRTY] <= (nr_buffers - nr_buffers_type[BUF_SHARED]) * 
2030                    bdf_prm.b_un.nfract/100) {
2031                         current->signal = 0;
2032                         interruptible_sleep_on(&bdflush_wait);
2033                 }
2034         }
2035 }
2036 
2037 
2038 /*
2039  * Overrides for Emacs so that we follow Linus's tabbing style.
2040  * Emacs will notice this stuff at the end of the file and automatically
2041  * adjust the settings for this buffer only.  This must remain at the end
2042  * of the file.
2043  * ---------------------------------------------------------------------------
2044  * Local variables:
2045  * c-indent-level: 8
2046  * c-brace-imaginary-offset: 0
2047  * c-brace-offset: -8
2048  * c-argdecl-indent: 8
2049  * c-label-offset: -8
2050  * c-continued-statement-offset: 8
2051  * c-continued-brace-offset: 0
2052  * End:
2053  */

/* [previous][next][first][last][top][bottom][index][help] */