root/fs/buffer.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. __wait_on_buffer
  2. sync_buffers
  3. sync_dev
  4. fsync_dev
  5. sys_sync
  6. file_fsync
  7. sys_fsync
  8. sys_fdatasync
  9. invalidate_buffers
  10. remove_from_hash_queue
  11. remove_from_lru_list
  12. remove_from_free_list
  13. remove_from_queues
  14. put_last_lru
  15. put_last_free
  16. insert_into_queues
  17. find_buffer
  18. get_hash_table
  19. set_blocksize
  20. refill_freelist
  21. getblk
  22. set_writetime
  23. refile_buffer
  24. __brelse
  25. __bforget
  26. bread
  27. breada
  28. put_unused_buffer_head
  29. get_more_buffer_heads
  30. recover_reusable_buffer_heads
  31. get_unused_buffer_head
  32. create_buffers
  33. brw_page
  34. mark_buffer_uptodate
  35. unlock_buffer
  36. generic_readpage
  37. grow_buffers
  38. try_to_free_buffer
  39. age_buffer
  40. maybe_shrink_lav_buffers
  41. shrink_specific_buffers
  42. show_buffers
  43. try_to_reassign
  44. reassign_cluster
  45. try_to_generate_cluster
  46. generate_cluster
  47. buffer_init
  48. wakeup_bdflush
  49. sync_old_buffers
  50. sys_bdflush
  51. bdflush

   1 /*
   2  *  linux/fs/buffer.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  *  'buffer.c' implements the buffer-cache functions. Race-conditions have
   9  * been avoided by NEVER letting an interrupt change a buffer (except for the
  10  * data, of course), but instead letting the caller do it.
  11  */
  12 
  13 /*
  14  * NOTE! There is one discordant note here: checking floppies for
  15  * disk change. This is where it fits best, I think, as it should
  16  * invalidate changed floppy-disk-caches.
  17  */
  18  
  19 /* Some bdflush() changes for the dynamic ramdisk - Paul Gortmaker, 12/94 */
  20 
  21 #include <linux/sched.h>
  22 #include <linux/kernel.h>
  23 #include <linux/major.h>
  24 #include <linux/string.h>
  25 #include <linux/locks.h>
  26 #include <linux/errno.h>
  27 #include <linux/malloc.h>
  28 #include <linux/pagemap.h>
  29 #include <linux/swap.h>
  30 #include <linux/swapctl.h>
  31 #include <linux/smp.h>
  32 #include <linux/smp_lock.h>
  33 
  34 #include <asm/system.h>
  35 #include <asm/segment.h>
  36 #include <asm/io.h>
  37 
  38 #define NR_SIZES 4
  39 static char buffersize_index[9] = {-1,  0,  1, -1,  2, -1, -1, -1, 3};
  40 static short int bufferindex_size[NR_SIZES] = {512, 1024, 2048, 4096};
  41 
  42 #define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9])
  43 #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
  44 
  45 static int grow_buffers(int pri, int size);
  46 static int shrink_specific_buffers(unsigned int priority, int size);
  47 static int maybe_shrink_lav_buffers(int);
  48 
  49 static int nr_hash = 0;  /* Size of hash table */
  50 static struct buffer_head ** hash_table;
  51 struct buffer_head ** buffer_pages;
  52 static struct buffer_head * lru_list[NR_LIST] = {NULL, };
  53 /* next_to_age is an array of pointers into the lru lists, used to
  54    cycle through the buffers aging their contents when deciding which
  55    buffers to discard when more memory is needed */
  56 static struct buffer_head * next_to_age[NR_LIST] = {NULL, };
  57 static struct buffer_head * free_list[NR_SIZES] = {NULL, };
  58 static struct buffer_head * unused_list = NULL;
  59 struct buffer_head * reuse_list = NULL;
  60 static struct wait_queue * buffer_wait = NULL;
  61 
  62 int nr_buffers = 0;
  63 int nr_buffers_type[NR_LIST] = {0,};
  64 int nr_buffers_size[NR_SIZES] = {0,};
  65 int nr_buffers_st[NR_SIZES][NR_LIST] = {{0,},};
  66 int buffer_usage[NR_SIZES] = {0,};  /* Usage counts used to determine load average */
  67 int buffers_lav[NR_SIZES] = {0,};  /* Load average of buffer usage */
  68 int nr_free[NR_SIZES] = {0,};
  69 int buffermem = 0;
  70 int nr_buffer_heads = 0;
  71 extern int *blksize_size[];
  72 
  73 /* Here is the parameter block for the bdflush process. */
  74 static void wakeup_bdflush(int);
  75 
  76 #define N_PARAM 9
  77 #define LAV
  78 
  79 static union bdflush_param{
  80         struct {
  81                 int nfract;  /* Percentage of buffer cache dirty to 
  82                                 activate bdflush */
  83                 int ndirty;  /* Maximum number of dirty blocks to write out per
  84                                 wake-cycle */
  85                 int nrefill; /* Number of clean buffers to try and obtain
  86                                 each time we call refill */
  87                 int nref_dirt; /* Dirty buffer threshold for activating bdflush
  88                                   when trying to refill buffers. */
  89                 int clu_nfract;  /* Percentage of buffer cache to scan to 
  90                                     search for free clusters */
  91                 int age_buffer;  /* Time for normal buffer to age before 
  92                                     we flush it */
  93                 int age_super;  /* Time for superblock to age before we 
  94                                    flush it */
  95                 int lav_const;  /* Constant used for load average (time
  96                                    constant */
  97                 int lav_ratio;  /* Used to determine how low a lav for a
  98                                    particular size can go before we start to
  99                                    trim back the buffers */
 100         } b_un;
 101         unsigned int data[N_PARAM];
 102 } bdf_prm = {{25, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}};
 103 
 104 /* The lav constant is set for 1 minute, as long as the update process runs
 105    every 5 seconds.  If you change the frequency of update, the time
 106    constant will also change. */
 107 
 108 
 109 /* These are the min and max parameter values that we will allow to be assigned */
 110 static int bdflush_min[N_PARAM] = {  0,  10,    5,   25,  0,   100,   100, 1, 1};
 111 static int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 60000, 60000, 2047, 5};
 112 
 113 /*
 114  * Rewrote the wait-routines to use the "new" wait-queue functionality,
 115  * and getting rid of the cli-sti pairs. The wait-queue routines still
 116  * need cli-sti, but now it's just a couple of 386 instructions or so.
 117  *
 118  * Note that the real wait_on_buffer() is an inline function that checks
 119  * if 'b_wait' is set before calling this, so that the queues aren't set
 120  * up unnecessarily.
 121  */
 122 void __wait_on_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 123 {
 124         struct wait_queue wait = { current, NULL };
 125 
 126         bh->b_count++;
 127         add_wait_queue(&bh->b_wait, &wait);
 128 repeat:
 129         current->state = TASK_UNINTERRUPTIBLE;
 130         if (buffer_locked(bh)) {
 131                 schedule();
 132                 goto repeat;
 133         }
 134         remove_wait_queue(&bh->b_wait, &wait);
 135         bh->b_count--;
 136         current->state = TASK_RUNNING;
 137 }
 138 
 139 /* Call sync_buffers with wait!=0 to ensure that the call does not
 140    return until all buffer writes have completed.  Sync() may return
 141    before the writes have finished; fsync() may not. */
 142 
 143 
 144 /* Godamity-damn.  Some buffers (bitmaps for filesystems)
 145    spontaneously dirty themselves without ever brelse being called.
 146    We will ultimately want to put these in a separate list, but for
 147    now we search all of the lists for dirty buffers */
 148 
 149 static int sync_buffers(kdev_t dev, int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 150 {
 151         int i, retry, pass = 0, err = 0;
 152         int nlist, ncount;
 153         struct buffer_head * bh, *next;
 154 
 155         /* One pass for no-wait, three for wait:
 156            0) write out all dirty, unlocked buffers;
 157            1) write out all dirty buffers, waiting if locked;
 158            2) wait for completion by waiting for all buffers to unlock. */
 159  repeat:
 160         retry = 0;
 161  repeat2:
 162         ncount = 0;
 163         /* We search all lists as a failsafe mechanism, not because we expect
 164            there to be dirty buffers on any of the other lists. */
 165         for(nlist = 0; nlist < NR_LIST; nlist++)
 166          {
 167          repeat1:
 168                  bh = lru_list[nlist];
 169                  if(!bh) continue;
 170                  for (i = nr_buffers_type[nlist]*2 ; i-- > 0 ; bh = next) {
 171                          if(bh->b_list != nlist) goto repeat1;
 172                          next = bh->b_next_free;
 173                          if(!lru_list[nlist]) break;
 174                          if (dev && bh->b_dev != dev)
 175                                   continue;
 176                          if (buffer_locked(bh))
 177                           {
 178                                   /* Buffer is locked; skip it unless wait is
 179                                      requested AND pass > 0. */
 180                                   if (!wait || !pass) {
 181                                           retry = 1;
 182                                           continue;
 183                                   }
 184                                   wait_on_buffer (bh);
 185                                   goto repeat2;
 186                           }
 187                          /* If an unlocked buffer is not uptodate, there has
 188                              been an IO error. Skip it. */
 189                          if (wait && buffer_req(bh) && !buffer_locked(bh) &&
 190                              !buffer_dirty(bh) && !buffer_uptodate(bh)) {
 191                                   err = 1;
 192                                   continue;
 193                           }
 194                          /* Don't write clean buffers.  Don't write ANY buffers
 195                             on the third pass. */
 196                          if (!buffer_dirty(bh) || pass>=2)
 197                                   continue;
 198                          /* don't bother about locked buffers */
 199                          if (buffer_locked(bh))
 200                                  continue;
 201                          bh->b_count++;
 202                          bh->b_flushtime = 0;
 203                          ll_rw_block(WRITE, 1, &bh);
 204 
 205                          if(nlist != BUF_DIRTY) { 
 206                                  printk("[%d %s %ld] ", nlist,
 207                                         kdevname(bh->b_dev), bh->b_blocknr);
 208                                  ncount++;
 209                          };
 210                          bh->b_count--;
 211                          retry = 1;
 212                  }
 213          }
 214         if (ncount)
 215           printk("sys_sync: %d dirty buffers not on dirty list\n", ncount);
 216         
 217         /* If we are waiting for the sync to succeed, and if any dirty
 218            blocks were written, then repeat; on the second pass, only
 219            wait for buffers being written (do not pass to write any
 220            more buffers on the second pass). */
 221         if (wait && retry && ++pass<=2)
 222                  goto repeat;
 223         return err;
 224 }
 225 
 226 void sync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 227 {
 228         sync_buffers(dev, 0);
 229         sync_supers(dev);
 230         sync_inodes(dev);
 231         sync_buffers(dev, 0);
 232         sync_dquots(dev, -1);
 233 }
 234 
 235 int fsync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 236 {
 237         sync_buffers(dev, 0);
 238         sync_supers(dev);
 239         sync_inodes(dev);
 240         sync_dquots(dev, -1);
 241         return sync_buffers(dev, 1);
 242 }
 243 
 244 asmlinkage int sys_sync(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 245 {
 246         fsync_dev(0);
 247         return 0;
 248 }
 249 
 250 int file_fsync (struct inode *inode, struct file *filp)
     /* [previous][next][first][last][top][bottom][index][help] */
 251 {
 252         return fsync_dev(inode->i_dev);
 253 }
 254 
 255 asmlinkage int sys_fsync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 256 {
 257         struct file * file;
 258         struct inode * inode;
 259 
 260         if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
 261                 return -EBADF;
 262         if (!file->f_op || !file->f_op->fsync)
 263                 return -EINVAL;
 264         if (file->f_op->fsync(inode,file))
 265                 return -EIO;
 266         return 0;
 267 }
 268 
 269 asmlinkage int sys_fdatasync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 270 {
 271         struct file * file;
 272         struct inode * inode;
 273 
 274         if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
 275                 return -EBADF;
 276         if (!file->f_op || !file->f_op->fsync)
 277                 return -EINVAL;
 278         /* this needs further work, at the moment it is identical to fsync() */
 279         if (file->f_op->fsync(inode,file))
 280                 return -EIO;
 281         return 0;
 282 }
 283 
 284 void invalidate_buffers(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 285 {
 286         int i;
 287         int nlist;
 288         struct buffer_head * bh;
 289 
 290         for(nlist = 0; nlist < NR_LIST; nlist++) {
 291                 bh = lru_list[nlist];
 292                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bh->b_next_free) {
 293                         if (bh->b_dev != dev)
 294                                 continue;
 295                         wait_on_buffer(bh);
 296                         if (bh->b_dev != dev)
 297                                 continue;
 298                         if (bh->b_count)
 299                                 continue;
 300                         bh->b_flushtime = 0;
 301                         clear_bit(BH_Protected, &bh->b_state);
 302                         clear_bit(BH_Uptodate, &bh->b_state);
 303                         clear_bit(BH_Dirty, &bh->b_state);
 304                         clear_bit(BH_Req, &bh->b_state);
 305                 }
 306         }
 307 }
 308 
 309 #define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block))%nr_hash)
 310 #define hash(dev,block) hash_table[_hashfn(dev,block)]
 311 
 312 static inline void remove_from_hash_queue(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 313 {
 314         if (bh->b_next)
 315                 bh->b_next->b_prev = bh->b_prev;
 316         if (bh->b_prev)
 317                 bh->b_prev->b_next = bh->b_next;
 318         if (hash(bh->b_dev,bh->b_blocknr) == bh)
 319                 hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
 320         bh->b_next = bh->b_prev = NULL;
 321 }
 322 
 323 static inline void remove_from_lru_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 324 {
 325         if (!(bh->b_prev_free) || !(bh->b_next_free))
 326                 panic("VFS: LRU block list corrupted");
 327         if (bh->b_dev == B_FREE)
 328                 panic("LRU list corrupted");
 329         bh->b_prev_free->b_next_free = bh->b_next_free;
 330         bh->b_next_free->b_prev_free = bh->b_prev_free;
 331 
 332         if (lru_list[bh->b_list] == bh)
 333                  lru_list[bh->b_list] = bh->b_next_free;
 334         if (lru_list[bh->b_list] == bh)
 335                  lru_list[bh->b_list] = NULL;
 336         if (next_to_age[bh->b_list] == bh)
 337                 next_to_age[bh->b_list] = bh->b_next_free;
 338         if (next_to_age[bh->b_list] == bh)
 339                 next_to_age[bh->b_list] = NULL;
 340 
 341         bh->b_next_free = bh->b_prev_free = NULL;
 342 }
 343 
 344 static inline void remove_from_free_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 345 {
 346         int isize = BUFSIZE_INDEX(bh->b_size);
 347         if (!(bh->b_prev_free) || !(bh->b_next_free))
 348                 panic("VFS: Free block list corrupted");
 349         if(bh->b_dev != B_FREE)
 350                 panic("Free list corrupted");
 351         if(!free_list[isize])
 352                 panic("Free list empty");
 353         nr_free[isize]--;
 354         if(bh->b_next_free == bh)
 355                  free_list[isize] = NULL;
 356         else {
 357                 bh->b_prev_free->b_next_free = bh->b_next_free;
 358                 bh->b_next_free->b_prev_free = bh->b_prev_free;
 359                 if (free_list[isize] == bh)
 360                          free_list[isize] = bh->b_next_free;
 361         };
 362         bh->b_next_free = bh->b_prev_free = NULL;
 363 }
 364 
 365 static inline void remove_from_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 366 {
 367         if(bh->b_dev == B_FREE) {
 368                 remove_from_free_list(bh); /* Free list entries should not be
 369                                               in the hash queue */
 370                 return;
 371         };
 372         nr_buffers_type[bh->b_list]--;
 373         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]--;
 374         remove_from_hash_queue(bh);
 375         remove_from_lru_list(bh);
 376 }
 377 
 378 static inline void put_last_lru(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 379 {
 380         if (!bh)
 381                 return;
 382         if (bh == lru_list[bh->b_list]) {
 383                 lru_list[bh->b_list] = bh->b_next_free;
 384                 if (next_to_age[bh->b_list] == bh)
 385                         next_to_age[bh->b_list] = bh->b_next_free;
 386                 return;
 387         }
 388         if(bh->b_dev == B_FREE)
 389                 panic("Wrong block for lru list");
 390         remove_from_lru_list(bh);
 391 /* add to back of free list */
 392 
 393         if(!lru_list[bh->b_list]) {
 394                 lru_list[bh->b_list] = bh;
 395                 lru_list[bh->b_list]->b_prev_free = bh;
 396         };
 397         if (!next_to_age[bh->b_list])
 398                 next_to_age[bh->b_list] = bh;
 399 
 400         bh->b_next_free = lru_list[bh->b_list];
 401         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 402         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 403         lru_list[bh->b_list]->b_prev_free = bh;
 404 }
 405 
 406 static inline void put_last_free(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 407 {
 408         int isize;
 409         if (!bh)
 410                 return;
 411 
 412         isize = BUFSIZE_INDEX(bh->b_size);      
 413         bh->b_dev = B_FREE;  /* So it is obvious we are on the free list */
 414         /* add to back of free list */
 415         if(!free_list[isize]) {
 416                 free_list[isize] = bh;
 417                 bh->b_prev_free = bh;
 418         };
 419 
 420         nr_free[isize]++;
 421         bh->b_next_free = free_list[isize];
 422         bh->b_prev_free = free_list[isize]->b_prev_free;
 423         free_list[isize]->b_prev_free->b_next_free = bh;
 424         free_list[isize]->b_prev_free = bh;
 425 }
 426 
 427 static inline void insert_into_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 428 {
 429         /* put at end of free list */
 430         if(bh->b_dev == B_FREE) {
 431                 put_last_free(bh);
 432                 return;
 433         }
 434         if(!lru_list[bh->b_list]) {
 435                 lru_list[bh->b_list] = bh;
 436                 bh->b_prev_free = bh;
 437         }
 438         if (!next_to_age[bh->b_list])
 439                 next_to_age[bh->b_list] = bh;
 440         if (bh->b_next_free) panic("VFS: buffer LRU pointers corrupted");
 441         bh->b_next_free = lru_list[bh->b_list];
 442         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 443         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 444         lru_list[bh->b_list]->b_prev_free = bh;
 445         nr_buffers_type[bh->b_list]++;
 446         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]++;
 447 /* put the buffer in new hash-queue if it has a device */
 448         bh->b_prev = NULL;
 449         bh->b_next = NULL;
 450         if (!(bh->b_dev))
 451                 return;
 452         bh->b_next = hash(bh->b_dev,bh->b_blocknr);
 453         hash(bh->b_dev,bh->b_blocknr) = bh;
 454         if (bh->b_next)
 455                 bh->b_next->b_prev = bh;
 456 }
 457 
 458 static inline struct buffer_head * find_buffer(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 459 {               
 460         struct buffer_head * tmp;
 461 
 462         for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
 463                 if (tmp->b_blocknr == block && tmp->b_dev == dev)
 464                         if (tmp->b_size == size)
 465                                 return tmp;
 466                         else {
 467                                 printk("VFS: Wrong blocksize on device %s\n",
 468                                         kdevname(dev));
 469                                 return NULL;
 470                         }
 471         return NULL;
 472 }
 473 
 474 /*
 475  * Why like this, I hear you say... The reason is race-conditions.
 476  * As we don't lock buffers (unless we are reading them, that is),
 477  * something might happen to it while we sleep (ie a read-error
 478  * will force it bad). This shouldn't really happen currently, but
 479  * the code is ready.
 480  */
 481 struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 482 {
 483         struct buffer_head * bh;
 484 
 485         for (;;) {
 486                 if (!(bh=find_buffer(dev,block,size)))
 487                         return NULL;
 488                 bh->b_count++;
 489                 wait_on_buffer(bh);
 490                 if (bh->b_dev == dev && bh->b_blocknr == block
 491                                              && bh->b_size == size)
 492                         return bh;
 493                 bh->b_count--;
 494         }
 495 }
 496 
 497 void set_blocksize(kdev_t dev, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 498 {
 499         int i, nlist;
 500         struct buffer_head * bh, *bhnext;
 501 
 502         if (!blksize_size[MAJOR(dev)])
 503                 return;
 504 
 505         switch(size) {
 506                 default: panic("Invalid blocksize passed to set_blocksize");
 507                 case 512: case 1024: case 2048: case 4096:;
 508         }
 509 
 510         if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
 511                 blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 512                 return;
 513         }
 514         if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
 515                 return;
 516         sync_buffers(dev, 2);
 517         blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 518 
 519   /* We need to be quite careful how we do this - we are moving entries
 520      around on the free list, and we can get in a loop if we are not careful.*/
 521 
 522         for(nlist = 0; nlist < NR_LIST; nlist++) {
 523                 bh = lru_list[nlist];
 524                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) {
 525                         if(!bh) break;
 526                         bhnext = bh->b_next_free; 
 527                         if (bh->b_dev != dev)
 528                                  continue;
 529                         if (bh->b_size == size)
 530                                  continue;
 531                         
 532                         wait_on_buffer(bh);
 533                         if (bh->b_dev == dev && bh->b_size != size) {
 534                                 clear_bit(BH_Dirty, &bh->b_state);
 535                                 clear_bit(BH_Uptodate, &bh->b_state);
 536                                 clear_bit(BH_Req, &bh->b_state);
 537                                 bh->b_flushtime = 0;
 538                         }
 539                         remove_from_hash_queue(bh);
 540                 }
 541         }
 542 }
 543 
 544 #define BADNESS(bh) (buffer_dirty(bh) || buffer_locked(bh))
 545 
 546 void refill_freelist(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 547 {
 548         struct buffer_head * bh, * tmp;
 549         struct buffer_head * candidate[NR_LIST];
 550         unsigned int best_time, winner;
 551         int isize = BUFSIZE_INDEX(size);
 552         int buffers[NR_LIST];
 553         int i;
 554         int needed;
 555 
 556         /* First see if we even need this.  Sometimes it is advantageous
 557          to request some blocks in a filesystem that we know that we will
 558          be needing ahead of time. */
 559 
 560         if (nr_free[isize] > 100)
 561                 return;
 562 
 563         /* If there are too many dirty buffers, we wake up the update process
 564            now so as to ensure that there are still clean buffers available
 565            for user processes to use (and dirty) */
 566         
 567         /* We are going to try and locate this much memory */
 568         needed =bdf_prm.b_un.nrefill * size;  
 569 
 570         while (nr_free_pages > min_free_pages*2 && needed > 0 &&
 571                grow_buffers(GFP_BUFFER, size)) {
 572                 needed -= PAGE_SIZE;
 573         }
 574 
 575         if(needed <= 0) return;
 576 
 577         /* See if there are too many buffers of a different size.
 578            If so, victimize them */
 579 
 580         while(maybe_shrink_lav_buffers(size))
 581          {
 582                  if(!grow_buffers(GFP_BUFFER, size)) break;
 583                  needed -= PAGE_SIZE;
 584                  if(needed <= 0) return;
 585          };
 586 
 587         /* OK, we cannot grow the buffer cache, now try and get some
 588            from the lru list */
 589 
 590         /* First set the candidate pointers to usable buffers.  This
 591            should be quick nearly all of the time. */
 592 
 593 repeat0:
 594         for(i=0; i<NR_LIST; i++){
 595                 if(i == BUF_DIRTY || i == BUF_SHARED || 
 596                    nr_buffers_type[i] == 0) {
 597                         candidate[i] = NULL;
 598                         buffers[i] = 0;
 599                         continue;
 600                 }
 601                 buffers[i] = nr_buffers_type[i];
 602                 for (bh = lru_list[i]; buffers[i] > 0; bh = tmp, buffers[i]--)
 603                  {
 604                          if(buffers[i] < 0) panic("Here is the problem");
 605                          tmp = bh->b_next_free;
 606                          if (!bh) break;
 607                          
 608                          if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 609                              buffer_dirty(bh)) {
 610                                  refile_buffer(bh);
 611                                  continue;
 612                          }
 613                          
 614                          if (bh->b_count || buffer_protected(bh) || bh->b_size != size)
 615                                   continue;
 616                          
 617                          /* Buffers are written in the order they are placed 
 618                             on the locked list. If we encounter a locked
 619                             buffer here, this means that the rest of them
 620                             are also locked */
 621                          if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 622                                  buffers[i] = 0;
 623                                  break;
 624                          }
 625                          
 626                          if (BADNESS(bh)) continue;
 627                          break;
 628                  };
 629                 if(!buffers[i]) candidate[i] = NULL; /* Nothing on this list */
 630                 else candidate[i] = bh;
 631                 if(candidate[i] && candidate[i]->b_count) panic("Here is the problem");
 632         }
 633         
 634  repeat:
 635         if(needed <= 0) return;
 636         
 637         /* Now see which candidate wins the election */
 638         
 639         winner = best_time = UINT_MAX;  
 640         for(i=0; i<NR_LIST; i++){
 641                 if(!candidate[i]) continue;
 642                 if(candidate[i]->b_lru_time < best_time){
 643                         best_time = candidate[i]->b_lru_time;
 644                         winner = i;
 645                 }
 646         }
 647         
 648         /* If we have a winner, use it, and then get a new candidate from that list */
 649         if(winner != UINT_MAX) {
 650                 i = winner;
 651                 bh = candidate[i];
 652                 candidate[i] = bh->b_next_free;
 653                 if(candidate[i] == bh) candidate[i] = NULL;  /* Got last one */
 654                 if (bh->b_count || bh->b_size != size)
 655                          panic("Busy buffer in candidate list\n");
 656                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1)
 657                          panic("Shared buffer in candidate list\n");
 658                 if (buffer_protected(bh))
 659                         panic("Protected buffer in candidate list\n");
 660                 if (BADNESS(bh)) panic("Buffer in candidate list with BADNESS != 0\n");
 661                 
 662                 if(bh->b_dev == B_FREE)
 663                         panic("Wrong list");
 664                 remove_from_queues(bh);
 665                 bh->b_dev = B_FREE;
 666                 put_last_free(bh);
 667                 needed -= bh->b_size;
 668                 buffers[i]--;
 669                 if(buffers[i] < 0) panic("Here is the problem");
 670                 
 671                 if(buffers[i] == 0) candidate[i] = NULL;
 672                 
 673                 /* Now all we need to do is advance the candidate pointer
 674                    from the winner list to the next usable buffer */
 675                 if(candidate[i] && buffers[i] > 0){
 676                         if(buffers[i] <= 0) panic("Here is another problem");
 677                         for (bh = candidate[i]; buffers[i] > 0; bh = tmp, buffers[i]--) {
 678                                 if(buffers[i] < 0) panic("Here is the problem");
 679                                 tmp = bh->b_next_free;
 680                                 if (!bh) break;
 681                                 
 682                                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 683                                     buffer_dirty(bh)) {
 684                                         refile_buffer(bh);
 685                                         continue;
 686                                 };
 687                                 
 688                                 if (bh->b_count || buffer_protected(bh) || bh->b_size != size)
 689                                          continue;
 690                                 
 691                                 /* Buffers are written in the order they are
 692                                    placed on the locked list.  If we encounter
 693                                    a locked buffer here, this means that the
 694                                    rest of them are also locked */
 695                                 if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 696                                         buffers[i] = 0;
 697                                         break;
 698                                 }
 699               
 700                                 if (BADNESS(bh)) continue;
 701                                 break;
 702                         };
 703                         if(!buffers[i]) candidate[i] = NULL; /* Nothing here */
 704                         else candidate[i] = bh;
 705                         if(candidate[i] && candidate[i]->b_count) 
 706                                  panic("Here is the problem");
 707                 }
 708                 
 709                 goto repeat;
 710         }
 711         
 712         if(needed <= 0) return;
 713         
 714         /* Too bad, that was not enough. Try a little harder to grow some. */
 715         
 716         if (nr_free_pages > min_free_pages + 5) {
 717                 if (grow_buffers(GFP_BUFFER, size)) {
 718                         needed -= PAGE_SIZE;
 719                         goto repeat0;
 720                 };
 721         }
 722         
 723         /* and repeat until we find something good */
 724         if (!grow_buffers(GFP_ATOMIC, size))
 725                 wakeup_bdflush(1);
 726         needed -= PAGE_SIZE;
 727         goto repeat0;
 728 }
 729 
 730 /*
 731  * Ok, this is getblk, and it isn't very clear, again to hinder
 732  * race-conditions. Most of the code is seldom used, (ie repeating),
 733  * so it should be much more efficient than it looks.
 734  *
 735  * The algorithm is changed: hopefully better, and an elusive bug removed.
 736  *
 737  * 14.02.92: changed it to sync dirty buffers a bit: better performance
 738  * when the filesystem starts to get full of dirty blocks (I hope).
 739  */
 740 struct buffer_head * getblk(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 741 {
 742         struct buffer_head * bh;
 743         int isize = BUFSIZE_INDEX(size);
 744 
 745         /* Update this for the buffer size lav. */
 746         buffer_usage[isize]++;
 747 
 748         /* If there are too many dirty buffers, we wake up the update process
 749            now so as to ensure that there are still clean buffers available
 750            for user processes to use (and dirty) */
 751 repeat:
 752         bh = get_hash_table(dev, block, size);
 753         if (bh) {
 754                 if (!buffer_dirty(bh)) {
 755                         if (buffer_uptodate(bh))
 756                                  put_last_lru(bh);
 757                         bh->b_flushtime = 0;
 758                 }
 759                 set_bit(BH_Touched, &bh->b_state);
 760                 return bh;
 761         }
 762 
 763         while(!free_list[isize]) refill_freelist(size);
 764         
 765         if (find_buffer(dev,block,size))
 766                  goto repeat;
 767 
 768         bh = free_list[isize];
 769         remove_from_free_list(bh);
 770 
 771 /* OK, FINALLY we know that this buffer is the only one of its kind, */
 772 /* and that it's unused (b_count=0), unlocked (buffer_locked=0), and clean */
 773         bh->b_count=1;
 774         bh->b_flushtime=0;
 775         bh->b_state=(1<<BH_Touched);
 776         bh->b_dev=dev;
 777         bh->b_blocknr=block;
 778         insert_into_queues(bh);
 779         return bh;
 780 }
 781 
 782 void set_writetime(struct buffer_head * buf, int flag)
     /* [previous][next][first][last][top][bottom][index][help] */
 783 {
 784         int newtime;
 785 
 786         if (buffer_dirty(buf)) {
 787                 /* Move buffer to dirty list if jiffies is clear */
 788                 newtime = jiffies + (flag ? bdf_prm.b_un.age_super : 
 789                                      bdf_prm.b_un.age_buffer);
 790                 if(!buf->b_flushtime || buf->b_flushtime > newtime)
 791                          buf->b_flushtime = newtime;
 792         } else {
 793                 buf->b_flushtime = 0;
 794         }
 795 }
 796 
 797 
 798 void refile_buffer(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 799 {
 800         int dispose;
 801 
 802         if(buf->b_dev == B_FREE) {
 803                 printk("Attempt to refile free buffer\n");
 804                 return;
 805         }
 806         if (buffer_dirty(buf))
 807                 dispose = BUF_DIRTY;
 808         else if ((mem_map[MAP_NR((unsigned long) buf->b_data)].count > 1) || buffer_protected(buf))
 809                 dispose = BUF_SHARED;
 810         else if (buffer_locked(buf))
 811                 dispose = BUF_LOCKED;
 812         else if (buf->b_list == BUF_SHARED)
 813                 dispose = BUF_UNSHARED;
 814         else
 815                 dispose = BUF_CLEAN;
 816         if(dispose == BUF_CLEAN) buf->b_lru_time = jiffies;
 817         if(dispose != buf->b_list)  {
 818                 if(dispose == BUF_DIRTY || dispose == BUF_UNSHARED)
 819                          buf->b_lru_time = jiffies;
 820                 if(dispose == BUF_LOCKED && 
 821                    (buf->b_flushtime - buf->b_lru_time) <= bdf_prm.b_un.age_super)
 822                          dispose = BUF_LOCKED1;
 823                 remove_from_queues(buf);
 824                 buf->b_list = dispose;
 825                 insert_into_queues(buf);
 826                 if(dispose == BUF_DIRTY && nr_buffers_type[BUF_DIRTY] > 
 827                    (nr_buffers - nr_buffers_type[BUF_SHARED]) *
 828                    bdf_prm.b_un.nfract/100)
 829                          wakeup_bdflush(0);
 830         }
 831 }
 832 
 833 /*
 834  * Release a buffer head
 835  */
 836 void __brelse(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 837 {
 838         wait_on_buffer(buf);
 839 
 840         /* If dirty, mark the time this buffer should be written back */
 841         set_writetime(buf, 0);
 842         refile_buffer(buf);
 843 
 844         if (buf->b_count) {
 845                 if (!--buf->b_count)
 846                         wake_up(&buffer_wait);
 847                 return;
 848         }
 849         printk("VFS: brelse: Trying to free free buffer\n");
 850 }
 851 
 852 /*
 853  * bforget() is like brelse(), except it removes the buffer
 854  * from the hash-queues (so that it won't be re-used if it's
 855  * shared).
 856  */
 857 void __bforget(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 858 {
 859         wait_on_buffer(buf);
 860         mark_buffer_clean(buf);
 861         clear_bit(BH_Protected, &buf->b_state);
 862         buf->b_count--;
 863         remove_from_hash_queue(buf);
 864         buf->b_dev = NODEV;
 865         refile_buffer(buf);
 866         wake_up(&buffer_wait);
 867 }
 868 
 869 /*
 870  * bread() reads a specified block and returns the buffer that contains
 871  * it. It returns NULL if the block was unreadable.
 872  */
 873 struct buffer_head * bread(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 874 {
 875         struct buffer_head * bh;
 876 
 877         if (!(bh = getblk(dev, block, size))) {
 878                 printk("VFS: bread: READ error on device %s\n",
 879                         kdevname(dev));
 880                 return NULL;
 881         }
 882         if (buffer_uptodate(bh))
 883                 return bh;
 884         ll_rw_block(READ, 1, &bh);
 885         wait_on_buffer(bh);
 886         if (buffer_uptodate(bh))
 887                 return bh;
 888         brelse(bh);
 889         return NULL;
 890 }
 891 
 892 /*
 893  * Ok, breada can be used as bread, but additionally to mark other
 894  * blocks for reading as well. End the argument list with a negative
 895  * number.
 896  */
 897 
 898 #define NBUF 16
 899 
 900 struct buffer_head * breada(kdev_t dev, int block, int bufsize,
     /* [previous][next][first][last][top][bottom][index][help] */
 901         unsigned int pos, unsigned int filesize)
 902 {
 903         struct buffer_head * bhlist[NBUF];
 904         unsigned int blocks;
 905         struct buffer_head * bh;
 906         int index;
 907         int i, j;
 908 
 909         if (pos >= filesize)
 910                 return NULL;
 911 
 912         if (block < 0 || !(bh = getblk(dev,block,bufsize)))
 913                 return NULL;
 914 
 915         index = BUFSIZE_INDEX(bh->b_size);
 916 
 917         if (buffer_uptodate(bh))
 918                 return bh;
 919 
 920         blocks = ((filesize & (bufsize - 1)) - (pos & (bufsize - 1))) >> (9+index);
 921 
 922         if (blocks > (read_ahead[MAJOR(dev)] >> index))
 923                 blocks = read_ahead[MAJOR(dev)] >> index;
 924         if (blocks > NBUF)
 925                 blocks = NBUF;
 926         
 927         bhlist[0] = bh;
 928         j = 1;
 929         for(i=1; i<blocks; i++) {
 930                 bh = getblk(dev,block+i,bufsize);
 931                 if (buffer_uptodate(bh)) {
 932                         brelse(bh);
 933                         break;
 934                 }
 935                 bhlist[j++] = bh;
 936         }
 937 
 938         /* Request the read for these buffers, and then release them */
 939         ll_rw_block(READ, j, bhlist);
 940 
 941         for(i=1; i<j; i++)
 942                 brelse(bhlist[i]);
 943 
 944         /* Wait for this buffer, and then continue on */
 945         bh = bhlist[0];
 946         wait_on_buffer(bh);
 947         if (buffer_uptodate(bh))
 948                 return bh;
 949         brelse(bh);
 950         return NULL;
 951 }
 952 
 953 /*
 954  * See fs/inode.c for the weird use of volatile..
 955  */
 956 static void put_unused_buffer_head(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 957 {
 958         struct wait_queue * wait;
 959 
 960         wait = ((volatile struct buffer_head *) bh)->b_wait;
 961         memset(bh,0,sizeof(*bh));
 962         ((volatile struct buffer_head *) bh)->b_wait = wait;
 963         bh->b_next_free = unused_list;
 964         unused_list = bh;
 965 }
 966 
 967 static void get_more_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 968 {
 969         int i;
 970         struct buffer_head * bh;
 971 
 972         if (unused_list)
 973                 return;
 974 
 975         if (!(bh = (struct buffer_head*) get_free_page(GFP_KERNEL)))
 976                 return;
 977 
 978         for (nr_buffer_heads+=i=PAGE_SIZE/sizeof*bh ; i>0; i--) {
 979                 bh->b_next_free = unused_list;  /* only make link */
 980                 unused_list = bh++;
 981         }
 982 }
 983 
 984 /* 
 985  * We can't put completed temporary IO buffer_heads directly onto the
 986  * unused_list when they become unlocked, since the device driver
 987  * end_request routines still expect access to the buffer_head's
 988  * fields after the final unlock.  So, the device driver puts them on
 989  * the reuse_list instead once IO completes, and we recover these to
 990  * the unused_list here.
 991  *
 992  * The reuse_list receives buffers from interrupt routines, so we need
 993  * to be IRQ-safe here.
 994  */
 995 static inline void recover_reusable_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 996 {
 997         struct buffer_head *bh;
 998         unsigned long flags;
 999         
1000         save_flags(flags);
1001         while (reuse_list) {
1002                 cli();
1003                 bh = reuse_list;
1004                 reuse_list = bh->b_next_free;
1005                 restore_flags(flags);
1006                 put_unused_buffer_head(bh);
1007         }
1008 }
1009 
1010 static struct buffer_head * get_unused_buffer_head(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1011 {
1012         struct buffer_head * bh;
1013 
1014         recover_reusable_buffer_heads();
1015         get_more_buffer_heads();
1016         if (!unused_list)
1017                 return NULL;
1018         bh = unused_list;
1019         unused_list = bh->b_next_free;
1020         bh->b_next_free = NULL;
1021         bh->b_data = NULL;
1022         bh->b_size = 0;
1023         bh->b_state = 0;
1024         return bh;
1025 }
1026 
1027 /*
1028  * Create the appropriate buffers when given a page for data area and
1029  * the size of each buffer.. Use the bh->b_this_page linked list to
1030  * follow the buffers created.  Return NULL if unable to create more
1031  * buffers.
1032  */
1033 static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
1034 {
1035         struct buffer_head *bh, *head;
1036         unsigned long offset;
1037 
1038         head = NULL;
1039         offset = PAGE_SIZE;
1040         while ((offset -= size) < PAGE_SIZE) {
1041                 bh = get_unused_buffer_head();
1042                 if (!bh)
1043                         goto no_grow;
1044                 bh->b_this_page = head;
1045                 head = bh;
1046                 bh->b_data = (char *) (page+offset);
1047                 bh->b_size = size;
1048                 bh->b_dev = B_FREE;  /* Flag as unused */
1049         }
1050         return head;
1051 /*
1052  * In case anything failed, we just free everything we got.
1053  */
1054 no_grow:
1055         bh = head;
1056         while (bh) {
1057                 head = bh;
1058                 bh = bh->b_this_page;
1059                 put_unused_buffer_head(head);
1060         }
1061         return NULL;
1062 }
1063 
1064 int brw_page(int rw, unsigned long address, kdev_t dev, int b[], int size, int bmap)
     /* [previous][next][first][last][top][bottom][index][help] */
1065 {
1066         struct buffer_head *bh, *prev, *next, *arr[MAX_BUF_PER_PAGE];
1067         int block, nr;
1068         struct page *page;
1069 
1070         page = mem_map + MAP_NR(address);
1071         page->uptodate = 0;
1072         bh = create_buffers(address, size);
1073         if (!bh)
1074                 return -ENOMEM;
1075         nr = 0;
1076         next = bh;
1077         do {
1078                 struct buffer_head * tmp;
1079                 block = *(b++);
1080 
1081                 set_bit(BH_FreeOnIO, &next->b_state);
1082                 next->b_list = BUF_CLEAN;
1083                 next->b_dev = dev;
1084                 next->b_blocknr = block;
1085                 next->b_count = 1;
1086                 next->b_flushtime = 0;
1087                 set_bit(BH_Uptodate, &next->b_state);
1088 
1089                 /* When we use bmap, we define block zero to represent
1090                    a hole.  ll_rw_page, however, may legitimately
1091                    access block zero, and we need to distinguish the
1092                    two cases. 
1093                    */
1094                 if (bmap && !block) {
1095                         memset(next->b_data, 0, size);
1096                         next->b_count--;
1097                         continue;
1098                 }
1099                 tmp = get_hash_table(dev, block, size);
1100                 if (tmp) {
1101                         if (!buffer_uptodate(tmp)) {
1102                                 if (rw == READ)
1103                                         ll_rw_block(READ, 1, &tmp);
1104                                 wait_on_buffer(tmp);
1105                         }
1106                         if (rw == READ) 
1107                                 memcpy(next->b_data, tmp->b_data, size);
1108                         else {
1109                                 memcpy(tmp->b_data, next->b_data, size);
1110                                 set_bit(BH_Dirty, &tmp->b_state);
1111                         }
1112                         brelse(tmp);
1113                         next->b_count--;
1114                         continue;
1115                 }
1116                 if (rw == READ)
1117                         clear_bit(BH_Uptodate, &next->b_state);
1118                 else
1119                         set_bit(BH_Dirty, &next->b_state);
1120                 arr[nr++] = next;
1121         } while (prev = next, (next = next->b_this_page) != NULL);
1122         prev->b_this_page = bh;
1123         
1124         if (nr)
1125                 ll_rw_block(rw, nr, arr);
1126         else {
1127                 page->locked = 0;
1128                 page->uptodate = 1;
1129                 wake_up(&page->wait);
1130                 next = bh;
1131                 do {
1132                         next->b_next_free = reuse_list;
1133                         reuse_list = next;
1134                         next = next->b_this_page;
1135                 } while (next != bh);
1136         }
1137         ++current->maj_flt;
1138         return 0;
1139 }
1140 
1141 void mark_buffer_uptodate(struct buffer_head * bh, int on)
     /* [previous][next][first][last][top][bottom][index][help] */
1142 {
1143         if (on) {
1144                 struct buffer_head *tmp = bh;
1145                 int page_uptodate = 1;
1146                 set_bit(BH_Uptodate, &bh->b_state);
1147                 do {
1148                         if (!test_bit(BH_Uptodate, &tmp->b_state)) {
1149                                 page_uptodate = 0;
1150                                 break;
1151                         }
1152                         tmp=tmp->b_this_page;
1153                 } while (tmp && tmp != bh);
1154                 if (page_uptodate)
1155                         mem_map[MAP_NR(bh->b_data)].uptodate = 1;
1156         } else
1157                 clear_bit(BH_Uptodate, &bh->b_state);
1158 }
1159 
1160 void unlock_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
1161 {
1162         struct buffer_head *tmp;
1163         unsigned long flags;
1164         struct page *page;
1165 
1166         clear_bit(BH_Lock, &bh->b_state);
1167         wake_up(&bh->b_wait);
1168 
1169         if (!test_bit(BH_FreeOnIO, &bh->b_state))
1170                 return;
1171         page = mem_map + MAP_NR(bh->b_data);
1172         if (!page->locked) {
1173                 printk ("Whoops: unlock_buffer: "
1174                         "async io complete on unlocked page\n");
1175                 return;
1176         }
1177         if (bh->b_count != 1) {
1178                 printk ("Whoops: unlock_buffer: b_count != 1 on async io.\n");
1179                 return;
1180         }
1181         /* Async buffer_heads are here only as labels for IO, and get
1182            thrown away once the IO for this page is complete.  IO is
1183            deemed complete once all buffers have been visited
1184            (b_count==0) and are now unlocked. */
1185         bh->b_count--;
1186         for (tmp = bh; tmp=tmp->b_this_page, tmp!=bh; ) {
1187                 if (test_bit(BH_Lock, &tmp->b_state) || tmp->b_count)
1188                         return;
1189         }
1190 
1191         /* OK, go ahead and complete the async IO on this page. */
1192         save_flags(flags);
1193         page->locked = 0;
1194         wake_up(&page->wait);
1195         cli();
1196         tmp = bh;
1197         do {
1198                 if (!test_bit(BH_FreeOnIO, &tmp->b_state)) {
1199                         printk ("Whoops: unlock_buffer: "
1200                                 "async IO mismatch on page.\n");
1201                         restore_flags(flags);
1202                         return;
1203                 }
1204                 tmp->b_next_free = reuse_list;
1205                 reuse_list = tmp;
1206                 clear_bit(BH_FreeOnIO, &tmp->b_state);
1207                 tmp = tmp->b_this_page;
1208         } while (tmp != bh);
1209         restore_flags(flags);
1210         if (page->free_after) {
1211                 extern int nr_async_pages;
1212                 nr_async_pages--;
1213                 page->free_after = 0;
1214                 free_page(page_address(page));
1215         }
1216 }
1217 
1218 /*
1219  * Generic "readpage" function for block devices that have the normal
1220  * bmap functionality. This is most of the block device filesystems.
1221  * Reads the page asynchronously --- the unlock_buffer() and
1222  * mark_buffer_uptodate() functions propogate buffer state into the
1223  * page struct once IO has completed.
1224  */
1225 int generic_readpage(struct inode * inode, struct page * page)
     /* [previous][next][first][last][top][bottom][index][help] */
1226 {
1227         unsigned long block, address;
1228         int *p, nr[PAGE_SIZE/512];
1229         int i;
1230 
1231         address = page_address(page);
1232         page->count++;
1233         wait_on_page(page);
1234         if (page->uptodate) {
1235                 free_page(address);
1236                 return 0;
1237         }
1238         page->locked = 1;
1239         
1240         i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
1241         block = page->offset >> inode->i_sb->s_blocksize_bits;
1242         p = nr;
1243         do {
1244                 *p = inode->i_op->bmap(inode, block);
1245                 i--;
1246                 block++;
1247                 p++;
1248         } while (i > 0);
1249 
1250         /* IO start */
1251         brw_page(READ, address, inode->i_dev, nr, inode->i_sb->s_blocksize, 1);
1252         free_page(address);
1253         return 0;
1254 }
1255 
1256 /*
1257  * Try to increase the number of buffers available: the size argument
1258  * is used to determine what kind of buffers we want.
1259  */
1260 static int grow_buffers(int pri, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1261 {
1262         unsigned long page;
1263         struct buffer_head *bh, *tmp;
1264         struct buffer_head * insert_point;
1265         int isize;
1266 
1267         if ((size & 511) || (size > PAGE_SIZE)) {
1268                 printk("VFS: grow_buffers: size = %d\n",size);
1269                 return 0;
1270         }
1271 
1272         isize = BUFSIZE_INDEX(size);
1273 
1274         if (!(page = __get_free_page(pri)))
1275                 return 0;
1276         bh = create_buffers(page, size);
1277         if (!bh) {
1278                 free_page(page);
1279                 return 0;
1280         }
1281 
1282         insert_point = free_list[isize];
1283 
1284         tmp = bh;
1285         while (1) {
1286                 nr_free[isize]++;
1287                 if (insert_point) {
1288                         tmp->b_next_free = insert_point->b_next_free;
1289                         tmp->b_prev_free = insert_point;
1290                         insert_point->b_next_free->b_prev_free = tmp;
1291                         insert_point->b_next_free = tmp;
1292                 } else {
1293                         tmp->b_prev_free = tmp;
1294                         tmp->b_next_free = tmp;
1295                 }
1296                 insert_point = tmp;
1297                 ++nr_buffers;
1298                 if (tmp->b_this_page)
1299                         tmp = tmp->b_this_page;
1300                 else
1301                         break;
1302         }
1303         free_list[isize] = bh;
1304         buffer_pages[MAP_NR(page)] = bh;
1305         tmp->b_this_page = bh;
1306         wake_up(&buffer_wait);
1307         buffermem += PAGE_SIZE;
1308         return 1;
1309 }
1310 
1311 
1312 /* =========== Reduce the buffer memory ============= */
1313 
1314 /*
1315  * try_to_free_buffer() checks if all the buffers on this particular page
1316  * are unused, and free's the page if so.
1317  */
1318 int try_to_free_buffer(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1319                        int priority)
1320 {
1321         unsigned long page;
1322         struct buffer_head * tmp, * p;
1323         int isize = BUFSIZE_INDEX(bh->b_size);
1324 
1325         *bhp = bh;
1326         page = (unsigned long) bh->b_data;
1327         page &= PAGE_MASK;
1328         tmp = bh;
1329         do {
1330                 if (!tmp)
1331                         return 0;
1332                 if (tmp->b_count || buffer_protected(tmp) ||
1333                     buffer_dirty(tmp) || buffer_locked(tmp) || tmp->b_wait)
1334                         return 0;
1335                 if (priority && buffer_touched(tmp))
1336                         return 0;
1337                 tmp = tmp->b_this_page;
1338         } while (tmp != bh);
1339         tmp = bh;
1340         do {
1341                 p = tmp;
1342                 tmp = tmp->b_this_page;
1343                 nr_buffers--;
1344                 nr_buffers_size[isize]--;
1345                 if (p == *bhp)
1346                   {
1347                     *bhp = p->b_prev_free;
1348                     if (p == *bhp) /* Was this the last in the list? */
1349                       *bhp = NULL;
1350                   }
1351                 remove_from_queues(p);
1352                 put_unused_buffer_head(p);
1353         } while (tmp != bh);
1354         buffermem -= PAGE_SIZE;
1355         buffer_pages[MAP_NR(page)] = NULL;
1356         free_page(page);
1357         return !mem_map[MAP_NR(page)].count;
1358 }
1359 
1360 /* Age buffers on a given page, according to whether they have been
1361    visited recently or not. */
1362 static inline void age_buffer(struct buffer_head *bh)
     /* [previous][next][first][last][top][bottom][index][help] */
1363 {
1364         struct buffer_head *tmp = bh;
1365         int touched = 0;
1366 
1367         /*
1368          * When we age a page, we mark all other buffers in the page
1369          * with the "has_aged" flag.  Then, when these aliased buffers
1370          * come up for aging, we skip them until next pass.  This
1371          * ensures that a page full of multiple buffers only gets aged
1372          * once per pass through the lru lists. 
1373          */
1374         if (clear_bit(BH_Has_aged, &bh->b_state))
1375                 return;
1376         
1377         do {
1378                 touched |= clear_bit(BH_Touched, &tmp->b_state);
1379                 tmp = tmp->b_this_page;
1380                 set_bit(BH_Has_aged, &tmp->b_state);
1381         } while (tmp != bh);
1382         clear_bit(BH_Has_aged, &bh->b_state);
1383 
1384         if (touched) 
1385                 touch_page(mem_map + MAP_NR((unsigned long) bh->b_data));
1386         else
1387                 age_page(mem_map + MAP_NR((unsigned long) bh->b_data));
1388 }
1389 
1390 /*
1391  * Consult the load average for buffers and decide whether or not
1392  * we should shrink the buffers of one size or not.  If we decide yes,
1393  * do it and return 1.  Else return 0.  Do not attempt to shrink size
1394  * that is specified.
1395  *
1396  * I would prefer not to use a load average, but the way things are now it
1397  * seems unavoidable.  The way to get rid of it would be to force clustering
1398  * universally, so that when we reclaim buffers we always reclaim an entire
1399  * page.  Doing this would mean that we all need to move towards QMAGIC.
1400  */
1401 
1402 static int maybe_shrink_lav_buffers(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1403 {          
1404         int nlist;
1405         int isize;
1406         int total_lav, total_n_buffers, n_sizes;
1407         
1408         /* Do not consider the shared buffers since they would not tend
1409            to have getblk called very often, and this would throw off
1410            the lav.  They are not easily reclaimable anyway (let the swapper
1411            make the first move). */
1412   
1413         total_lav = total_n_buffers = n_sizes = 0;
1414         for(nlist = 0; nlist < NR_SIZES; nlist++)
1415          {
1416                  total_lav += buffers_lav[nlist];
1417                  if(nr_buffers_size[nlist]) n_sizes++;
1418                  total_n_buffers += nr_buffers_size[nlist];
1419                  total_n_buffers -= nr_buffers_st[nlist][BUF_SHARED]; 
1420          }
1421         
1422         /* See if we have an excessive number of buffers of a particular
1423            size - if so, victimize that bunch. */
1424   
1425         isize = (size ? BUFSIZE_INDEX(size) : -1);
1426         
1427         if (n_sizes > 1)
1428                  for(nlist = 0; nlist < NR_SIZES; nlist++)
1429                   {
1430                           if(nlist == isize) continue;
1431                           if(nr_buffers_size[nlist] &&
1432                              bdf_prm.b_un.lav_const * buffers_lav[nlist]*total_n_buffers < 
1433                              total_lav * (nr_buffers_size[nlist] - nr_buffers_st[nlist][BUF_SHARED]))
1434                                    if(shrink_specific_buffers(6, bufferindex_size[nlist])) 
1435                                             return 1;
1436                   }
1437         return 0;
1438 }
1439 
1440 /*
1441  * Try to free up some pages by shrinking the buffer-cache
1442  *
1443  * Priority tells the routine how hard to try to shrink the
1444  * buffers: 6 means "don't bother too much", while a value
1445  * of 0 means "we'd better get some free pages now".
1446  *
1447  * "limit" is meant to limit the shrink-action only to pages
1448  * that are in the 0 - limit address range, for DMA re-allocations.
1449  * We ignore that right now.
1450  */
1451 
1452 static int shrink_specific_buffers(unsigned int priority, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1453 {
1454         struct buffer_head *bh;
1455         int nlist;
1456         int i, isize, isize1;
1457 
1458 #ifdef DEBUG
1459         if(size) printk("Shrinking buffers of size %d\n", size);
1460 #endif
1461         /* First try the free lists, and see if we can get a complete page
1462            from here */
1463         isize1 = (size ? BUFSIZE_INDEX(size) : -1);
1464 
1465         for(isize = 0; isize<NR_SIZES; isize++){
1466                 if(isize1 != -1 && isize1 != isize) continue;
1467                 bh = free_list[isize];
1468                 if(!bh) continue;
1469                 for (i=0 ; !i || bh != free_list[isize]; bh = bh->b_next_free, i++) {
1470                         if (bh->b_count || buffer_protected(bh) ||
1471                             !bh->b_this_page)
1472                                  continue;
1473                         if (!age_of((unsigned long) bh->b_data) &&
1474                             try_to_free_buffer(bh, &bh, 6))
1475                                  return 1;
1476                         if(!bh) break;
1477                         /* Some interrupt must have used it after we
1478                            freed the page.  No big deal - keep looking */
1479                 }
1480         }
1481         
1482         /* Not enough in the free lists, now try the lru list */
1483         
1484         for(nlist = 0; nlist < NR_LIST; nlist++) {
1485         repeat1:
1486                 if(priority > 2 && nlist == BUF_SHARED) continue;
1487                 i = nr_buffers_type[nlist];
1488                 i = ((BUFFEROUT_WEIGHT * i) >> 10) >> priority;
1489                 for ( ; i > 0; i-- ) {
1490                         bh = next_to_age[nlist];
1491                         if (!bh)
1492                                 break;
1493                         next_to_age[nlist] = bh->b_next_free;
1494 
1495                         /* First, age the buffer. */
1496                         age_buffer(bh);
1497                         /* We may have stalled while waiting for I/O
1498                            to complete. */
1499                         if(bh->b_list != nlist) goto repeat1;
1500                         if (bh->b_count || buffer_protected(bh) ||
1501                             !bh->b_this_page)
1502                                  continue;
1503                         if(size && bh->b_size != size) continue;
1504                         if (buffer_locked(bh))
1505                                  if (priority)
1506                                           continue;
1507                                  else
1508                                           wait_on_buffer(bh);
1509                         if (buffer_dirty(bh)) {
1510                                 bh->b_count++;
1511                                 bh->b_flushtime = 0;
1512                                 ll_rw_block(WRITEA, 1, &bh);
1513                                 bh->b_count--;
1514                                 continue;
1515                         }
1516                         /* At priority 6, only consider really old
1517                            (age==0) buffers for reclaiming.  At
1518                            priority 0, consider any buffers. */
1519                         if ((age_of((unsigned long) bh->b_data) >>
1520                              (6-priority)) > 0)
1521                                 continue;                               
1522                         if (try_to_free_buffer(bh, &bh, 0))
1523                                  return 1;
1524                         if(!bh) break;
1525                 }
1526         }
1527         return 0;
1528 }
1529 
1530 
1531 /* ================== Debugging =================== */
1532 
1533 void show_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1534 {
1535         struct buffer_head * bh;
1536         int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
1537         int protected = 0;
1538         int shared;
1539         int nlist, isize;
1540 
1541         printk("Buffer memory:   %6dkB\n",buffermem>>10);
1542         printk("Buffer heads:    %6d\n",nr_buffer_heads);
1543         printk("Buffer blocks:   %6d\n",nr_buffers);
1544 
1545         for(nlist = 0; nlist < NR_LIST; nlist++) {
1546           shared = found = locked = dirty = used = lastused = protected = 0;
1547           bh = lru_list[nlist];
1548           if(!bh) continue;
1549           do {
1550                 found++;
1551                 if (buffer_locked(bh))
1552                         locked++;
1553                 if (buffer_protected(bh))
1554                         protected++;
1555                 if (buffer_dirty(bh))
1556                         dirty++;
1557                 if(mem_map[MAP_NR(((unsigned long) bh->b_data))].count !=1) shared++;
1558                 if (bh->b_count)
1559                         used++, lastused = found;
1560                 bh = bh->b_next_free;
1561               } while (bh != lru_list[nlist]);
1562         printk("Buffer[%d] mem: %d buffers, %d used (last=%d), %d locked, "
1563                "%d protected, %d dirty %d shrd\n",
1564                 nlist, found, used, lastused, locked, protected, dirty, shared);
1565         };
1566         printk("Size    [LAV]     Free  Clean  Unshar     Lck    Lck1   Dirty  Shared \n");
1567         for(isize = 0; isize<NR_SIZES; isize++){
1568                 printk("%5d [%5d]: %7d ", bufferindex_size[isize],
1569                        buffers_lav[isize], nr_free[isize]);
1570                 for(nlist = 0; nlist < NR_LIST; nlist++)
1571                          printk("%7d ", nr_buffers_st[isize][nlist]);
1572                 printk("\n");
1573         }
1574 }
1575 
1576 
1577 /* ====================== Cluster patches for ext2 ==================== */
1578 
1579 /*
1580  * try_to_reassign() checks if all the buffers on this particular page
1581  * are unused, and reassign to a new cluster them if this is true.
1582  */
1583 static inline int try_to_reassign(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1584                            kdev_t dev, unsigned int starting_block)
1585 {
1586         unsigned long page;
1587         struct buffer_head * tmp, * p;
1588 
1589         *bhp = bh;
1590         page = (unsigned long) bh->b_data;
1591         page &= PAGE_MASK;
1592         if(mem_map[MAP_NR(page)].count != 1) return 0;
1593         tmp = bh;
1594         do {
1595                 if (!tmp)
1596                          return 0;
1597                 
1598                 if (tmp->b_count || buffer_protected(tmp) ||
1599                     buffer_dirty(tmp) || buffer_locked(tmp))
1600                          return 0;
1601                 tmp = tmp->b_this_page;
1602         } while (tmp != bh);
1603         tmp = bh;
1604         
1605         while((unsigned long) tmp->b_data & (PAGE_SIZE - 1)) 
1606                  tmp = tmp->b_this_page;
1607         
1608         /* This is the buffer at the head of the page */
1609         bh = tmp;
1610         do {
1611                 p = tmp;
1612                 tmp = tmp->b_this_page;
1613                 remove_from_queues(p);
1614                 p->b_dev = dev;
1615                 mark_buffer_uptodate(p, 0);
1616                 clear_bit(BH_Req, &p->b_state);
1617                 p->b_blocknr = starting_block++;
1618                 insert_into_queues(p);
1619         } while (tmp != bh);
1620         return 1;
1621 }
1622 
1623 /*
1624  * Try to find a free cluster by locating a page where
1625  * all of the buffers are unused.  We would like this function
1626  * to be atomic, so we do not call anything that might cause
1627  * the process to sleep.  The priority is somewhat similar to
1628  * the priority used in shrink_buffers.
1629  * 
1630  * My thinking is that the kernel should end up using whole
1631  * pages for the buffer cache as much of the time as possible.
1632  * This way the other buffers on a particular page are likely
1633  * to be very near each other on the free list, and we will not
1634  * be expiring data prematurely.  For now we only cannibalize buffers
1635  * of the same size to keep the code simpler.
1636  */
1637 static int reassign_cluster(kdev_t dev, 
     /* [previous][next][first][last][top][bottom][index][help] */
1638                      unsigned int starting_block, int size)
1639 {
1640         struct buffer_head *bh;
1641         int isize = BUFSIZE_INDEX(size);
1642         int i;
1643 
1644         /* We want to give ourselves a really good shot at generating
1645            a cluster, and since we only take buffers from the free
1646            list, we "overfill" it a little. */
1647 
1648         while(nr_free[isize] < 32) refill_freelist(size);
1649 
1650         bh = free_list[isize];
1651         if(bh)
1652                  for (i=0 ; !i || bh != free_list[isize] ; bh = bh->b_next_free, i++) {
1653                          if (!bh->b_this_page)  continue;
1654                          if (try_to_reassign(bh, &bh, dev, starting_block))
1655                                  return 4;
1656                  }
1657         return 0;
1658 }
1659 
1660 /* This function tries to generate a new cluster of buffers
1661  * from a new page in memory.  We should only do this if we have
1662  * not expanded the buffer cache to the maximum size that we allow.
1663  */
1664 static unsigned long try_to_generate_cluster(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1665 {
1666         struct buffer_head * bh, * tmp, * arr[MAX_BUF_PER_PAGE];
1667         int isize = BUFSIZE_INDEX(size);
1668         unsigned long offset;
1669         unsigned long page;
1670         int nblock;
1671 
1672         page = get_free_page(GFP_NOBUFFER);
1673         if(!page) return 0;
1674 
1675         bh = create_buffers(page, size);
1676         if (!bh) {
1677                 free_page(page);
1678                 return 0;
1679         };
1680         nblock = block;
1681         for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
1682                 if (find_buffer(dev, nblock++, size))
1683                          goto not_aligned;
1684         }
1685         tmp = bh;
1686         nblock = 0;
1687         while (1) {
1688                 arr[nblock++] = bh;
1689                 bh->b_count = 1;
1690                 bh->b_flushtime = 0;
1691                 bh->b_state = 0;
1692                 bh->b_dev = dev;
1693                 bh->b_list = BUF_CLEAN;
1694                 bh->b_blocknr = block++;
1695                 nr_buffers++;
1696                 nr_buffers_size[isize]++;
1697                 insert_into_queues(bh);
1698                 if (bh->b_this_page)
1699                         bh = bh->b_this_page;
1700                 else
1701                         break;
1702         }
1703         buffermem += PAGE_SIZE;
1704         buffer_pages[MAP_NR(page)] = bh;
1705         bh->b_this_page = tmp;
1706         while (nblock-- > 0)
1707                 brelse(arr[nblock]);
1708         return 4; /* ?? */
1709 not_aligned:
1710         while ((tmp = bh) != NULL) {
1711                 bh = bh->b_this_page;
1712                 put_unused_buffer_head(tmp);
1713         }
1714         free_page(page);
1715         return 0;
1716 }
1717 
1718 unsigned long generate_cluster(kdev_t dev, int b[], int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1719 {
1720         int i, offset;
1721         
1722         for (i = 0, offset = 0 ; offset < PAGE_SIZE ; i++, offset += size) {
1723                 if(i && b[i]-1 != b[i-1]) return 0;  /* No need to cluster */
1724                 if(find_buffer(dev, b[i], size)) return 0;
1725         };
1726 
1727         /* OK, we have a candidate for a new cluster */
1728         
1729         /* See if one size of buffer is over-represented in the buffer cache,
1730            if so reduce the numbers of buffers */
1731         if(maybe_shrink_lav_buffers(size))
1732          {
1733                  int retval;
1734                  retval = try_to_generate_cluster(dev, b[0], size);
1735                  if(retval) return retval;
1736          };
1737         
1738         if (nr_free_pages > min_free_pages*2) 
1739                  return try_to_generate_cluster(dev, b[0], size);
1740         else
1741                  return reassign_cluster(dev, b[0], size);
1742 }
1743 
1744 
1745 /* ===================== Init ======================= */
1746 
1747 /*
1748  * This initializes the initial buffer free list.  nr_buffers_type is set
1749  * to one less the actual number of buffers, as a sop to backwards
1750  * compatibility --- the old code did this (I think unintentionally,
1751  * but I'm not sure), and programs in the ps package expect it.
1752  *                                      - TYT 8/30/92
1753  */
1754 void buffer_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1755 {
1756         int i;
1757         int isize = BUFSIZE_INDEX(BLOCK_SIZE);
1758         long memsize = MAP_NR(high_memory) << PAGE_SHIFT;
1759 
1760         if (memsize >= 4*1024*1024) {
1761                 if(memsize >= 16*1024*1024)
1762                          nr_hash = 16381;
1763                 else
1764                          nr_hash = 4093;
1765         } else {
1766                 nr_hash = 997;
1767         };
1768         
1769         hash_table = (struct buffer_head **) vmalloc(nr_hash * 
1770                                                      sizeof(struct buffer_head *));
1771 
1772 
1773         buffer_pages = (struct buffer_head **) vmalloc(MAP_NR(high_memory) * 
1774                                                      sizeof(struct buffer_head *));
1775         for (i = 0 ; i < MAP_NR(high_memory) ; i++)
1776                 buffer_pages[i] = NULL;
1777 
1778         for (i = 0 ; i < nr_hash ; i++)
1779                 hash_table[i] = NULL;
1780         lru_list[BUF_CLEAN] = 0;
1781         grow_buffers(GFP_KERNEL, BLOCK_SIZE);
1782         if (!free_list[isize])
1783                 panic("VFS: Unable to initialize buffer free list!");
1784         return;
1785 }
1786 
1787 
1788 /* ====================== bdflush support =================== */
1789 
1790 /* This is a simple kernel daemon, whose job it is to provide a dynamic
1791  * response to dirty buffers.  Once this process is activated, we write back
1792  * a limited number of buffers to the disks and then go back to sleep again.
1793  */
1794 struct wait_queue * bdflush_wait = NULL;
1795 struct wait_queue * bdflush_done = NULL;
1796 
1797 static void wakeup_bdflush(int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
1798 {
1799         wake_up(&bdflush_wait);
1800         if(wait) sleep_on(&bdflush_done);
1801 }
1802 
1803 
1804 /* 
1805  * Here we attempt to write back old buffers.  We also try and flush inodes 
1806  * and supers as well, since this function is essentially "update", and 
1807  * otherwise there would be no way of ensuring that these quantities ever 
1808  * get written back.  Ideally, we would have a timestamp on the inodes
1809  * and superblocks so that we could write back only the old ones as well
1810  */
1811 
1812 asmlinkage int sync_old_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1813 {
1814         int i, isize;
1815         int ndirty, nwritten;
1816         int nlist;
1817         int ncount;
1818         struct buffer_head * bh, *next;
1819 
1820         sync_supers(0);
1821         sync_inodes(0);
1822 
1823         ncount = 0;
1824 #ifdef DEBUG
1825         for(nlist = 0; nlist < NR_LIST; nlist++)
1826 #else
1827         for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1828 #endif
1829         {
1830                 ndirty = 0;
1831                 nwritten = 0;
1832         repeat:
1833                 bh = lru_list[nlist];
1834                 if(bh) 
1835                          for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) {
1836                                  /* We may have stalled while waiting for I/O to complete. */
1837                                  if(bh->b_list != nlist) goto repeat;
1838                                  next = bh->b_next_free;
1839                                  if(!lru_list[nlist]) {
1840                                          printk("Dirty list empty %d\n", i);
1841                                          break;
1842                                  }
1843                                  
1844                                  /* Clean buffer on dirty list?  Refile it */
1845                                  if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
1846                                   {
1847                                           refile_buffer(bh);
1848                                           continue;
1849                                   }
1850                                  
1851                                  if (buffer_locked(bh) || !buffer_dirty(bh))
1852                                           continue;
1853                                  ndirty++;
1854                                  if(bh->b_flushtime > jiffies) continue;
1855                                  nwritten++;
1856                                  bh->b_count++;
1857                                  bh->b_flushtime = 0;
1858 #ifdef DEBUG
1859                                  if(nlist != BUF_DIRTY) ncount++;
1860 #endif
1861                                  ll_rw_block(WRITE, 1, &bh);
1862                                  bh->b_count--;
1863                          }
1864         }
1865 #ifdef DEBUG
1866         if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount);
1867         printk("Wrote %d/%d buffers\n", nwritten, ndirty);
1868 #endif
1869         
1870         /* We assume that we only come through here on a regular
1871            schedule, like every 5 seconds.  Now update load averages.  
1872            Shift usage counts to prevent overflow. */
1873         for(isize = 0; isize<NR_SIZES; isize++){
1874                 CALC_LOAD(buffers_lav[isize], bdf_prm.b_un.lav_const, buffer_usage[isize]);
1875                 buffer_usage[isize] = 0;
1876         };
1877         return 0;
1878 }
1879 
1880 
1881 /* This is the interface to bdflush.  As we get more sophisticated, we can
1882  * pass tuning parameters to this "process", to adjust how it behaves. 
1883  * We would want to verify each parameter, however, to make sure that it 
1884  * is reasonable. */
1885 
1886 asmlinkage int sys_bdflush(int func, long data)
     /* [previous][next][first][last][top][bottom][index][help] */
1887 {
1888         int i, error;
1889 
1890         if (!suser())
1891                 return -EPERM;
1892 
1893         if (func == 1)
1894                  return sync_old_buffers();
1895 
1896         /* Basically func 1 means read param 1, 2 means write param 1, etc */
1897         if (func >= 2) {
1898                 i = (func-2) >> 1;
1899                 if (i < 0 || i >= N_PARAM)
1900                         return -EINVAL;
1901                 if((func & 1) == 0) {
1902                         error = verify_area(VERIFY_WRITE, (void *) data, sizeof(int));
1903                         if (error)
1904                                 return error;
1905                         put_user(bdf_prm.data[i], (int*)data);
1906                         return 0;
1907                 };
1908                 if (data < bdflush_min[i] || data > bdflush_max[i])
1909                         return -EINVAL;
1910                 bdf_prm.data[i] = data;
1911                 return 0;
1912         };
1913 
1914         /* Having func 0 used to launch the actual bdflush and then never
1915         return (unless explicitly killed). We return zero here to 
1916         remain semi-compatible with present update(8) programs. */
1917 
1918         return 0;
1919 }
1920 
1921 /* This is the actual bdflush daemon itself. It used to be started from
1922  * the syscall above, but now we launch it ourselves internally with
1923  * kernel_thread(...)  directly after the first thread in init/main.c */
1924 
1925 int bdflush(void * unused) 
     /* [previous][next][first][last][top][bottom][index][help] */
1926 {
1927         int i;
1928         int ndirty;
1929         int nlist;
1930         int ncount;
1931         struct buffer_head * bh, *next;
1932 
1933         /*
1934          *      We have a bare-bones task_struct, and really should fill
1935          *      in a few more things so "top" and /proc/2/{exe,root,cwd}
1936          *      display semi-sane things. Not real crucial though...  
1937          */
1938 
1939         current->session = 1;
1940         current->pgrp = 1;
1941         sprintf(current->comm, "kflushd");
1942 
1943         /*
1944          *      As a kernel thread we want to tamper with system buffers
1945          *      and other internals and thus be subject to the SMP locking
1946          *      rules. (On a uniprocessor box this does nothing).
1947          */
1948          
1949 #ifdef __SMP__
1950         lock_kernel();
1951         syscall_count++;
1952 #endif
1953                  
1954         for (;;) {
1955 #ifdef DEBUG
1956                 printk("bdflush() activated...");
1957 #endif
1958                 
1959                 ncount = 0;
1960 #ifdef DEBUG
1961                 for(nlist = 0; nlist < NR_LIST; nlist++)
1962 #else
1963                 for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1964 #endif
1965                  {
1966                          ndirty = 0;
1967                  repeat:
1968                          bh = lru_list[nlist];
1969                          if(bh) 
1970                                   for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty; 
1971                                        bh = next) {
1972                                           /* We may have stalled while waiting for I/O to complete. */
1973                                           if(bh->b_list != nlist) goto repeat;
1974                                           next = bh->b_next_free;
1975                                           if(!lru_list[nlist]) {
1976                                                   printk("Dirty list empty %d\n", i);
1977                                                   break;
1978                                           }
1979                                           
1980                                           /* Clean buffer on dirty list?  Refile it */
1981                                           if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
1982                                            {
1983                                                    refile_buffer(bh);
1984                                                    continue;
1985                                            }
1986                                           
1987                                           if (buffer_locked(bh) || !buffer_dirty(bh))
1988                                                    continue;
1989                                           /* Should we write back buffers that are shared or not??
1990                                              currently dirty buffers are not shared, so it does not matter */
1991                                           bh->b_count++;
1992                                           ndirty++;
1993                                           bh->b_flushtime = 0;
1994                                           ll_rw_block(WRITE, 1, &bh);
1995 #ifdef DEBUG
1996                                           if(nlist != BUF_DIRTY) ncount++;
1997 #endif
1998                                           bh->b_count--;
1999                                   }
2000                  }
2001 #ifdef DEBUG
2002                 if (ncount) printk("sys_bdflush: %d dirty buffers not on dirty list\n", ncount);
2003                 printk("sleeping again.\n");
2004 #endif
2005                 wake_up(&bdflush_done);
2006                 
2007                 /* If there are still a lot of dirty buffers around, skip the sleep
2008                    and flush some more */
2009                 
2010                 if(nr_buffers_type[BUF_DIRTY] <= (nr_buffers - nr_buffers_type[BUF_SHARED]) * 
2011                    bdf_prm.b_un.nfract/100) {
2012                         current->signal = 0;
2013                         interruptible_sleep_on(&bdflush_wait);
2014                 }
2015         }
2016 }
2017 
2018 
2019 /*
2020  * Overrides for Emacs so that we follow Linus's tabbing style.
2021  * Emacs will notice this stuff at the end of the file and automatically
2022  * adjust the settings for this buffer only.  This must remain at the end
2023  * of the file.
2024  * ---------------------------------------------------------------------------
2025  * Local variables:
2026  * c-indent-level: 8
2027  * c-brace-imaginary-offset: 0
2028  * c-brace-offset: -8
2029  * c-argdecl-indent: 8
2030  * c-label-offset: -8
2031  * c-continued-statement-offset: 8
2032  * c-continued-brace-offset: 0
2033  * End:
2034  */

/* [previous][next][first][last][top][bottom][index][help] */