root/fs/buffer.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. __wait_on_buffer
  2. sync_buffers
  3. sync_dev
  4. fsync_dev
  5. sys_sync
  6. file_fsync
  7. sys_fsync
  8. sys_fdatasync
  9. invalidate_buffers
  10. remove_from_hash_queue
  11. remove_from_lru_list
  12. remove_from_free_list
  13. remove_from_queues
  14. put_last_lru
  15. put_last_free
  16. insert_into_queues
  17. find_buffer
  18. get_hash_table
  19. set_blocksize
  20. refill_freelist
  21. getblk
  22. set_writetime
  23. refile_buffer
  24. __brelse
  25. __bforget
  26. bread
  27. breada
  28. put_unused_buffer_head
  29. get_more_buffer_heads
  30. recover_reusable_buffer_heads
  31. get_unused_buffer_head
  32. create_buffers
  33. brw_page
  34. mark_buffer_uptodate
  35. unlock_buffer
  36. generic_readpage
  37. grow_buffers
  38. try_to_free_buffer
  39. age_buffer
  40. maybe_shrink_lav_buffers
  41. shrink_specific_buffers
  42. show_buffers
  43. try_to_reassign
  44. reassign_cluster
  45. try_to_generate_cluster
  46. generate_cluster
  47. buffer_init
  48. wakeup_bdflush
  49. sync_old_buffers
  50. sys_bdflush
  51. bdflush

   1 /*
   2  *  linux/fs/buffer.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  *  'buffer.c' implements the buffer-cache functions. Race-conditions have
   9  * been avoided by NEVER letting an interrupt change a buffer (except for the
  10  * data, of course), but instead letting the caller do it.
  11  */
  12 
  13 /*
  14  * NOTE! There is one discordant note here: checking floppies for
  15  * disk change. This is where it fits best, I think, as it should
  16  * invalidate changed floppy-disk-caches.
  17  */
  18  
  19 /* Some bdflush() changes for the dynamic ramdisk - Paul Gortmaker, 12/94 */
  20 
  21 #include <linux/sched.h>
  22 #include <linux/kernel.h>
  23 #include <linux/major.h>
  24 #include <linux/string.h>
  25 #include <linux/locks.h>
  26 #include <linux/errno.h>
  27 #include <linux/malloc.h>
  28 #include <linux/pagemap.h>
  29 #include <linux/swap.h>
  30 #include <linux/swapctl.h>
  31 #include <linux/smp.h>
  32 #include <linux/smp_lock.h>
  33 
  34 #include <asm/system.h>
  35 #include <asm/segment.h>
  36 #include <asm/io.h>
  37 
  38 #define NR_SIZES 5
  39 static char buffersize_index[17] =
  40 {-1,  0,  1, -1,  2, -1, -1, -1, 3, -1, -1, -1, -1, -1, -1, -1, 4};
  41 static short int bufferindex_size[NR_SIZES] = {512, 1024, 2048, 4096, 8192};
  42 
  43 #define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9])
  44 #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
  45 
  46 static int grow_buffers(int pri, int size);
  47 static int shrink_specific_buffers(unsigned int priority, int size);
  48 static int maybe_shrink_lav_buffers(int);
  49 
  50 static int nr_hash = 0;  /* Size of hash table */
  51 static struct buffer_head ** hash_table;
  52 static struct buffer_head * lru_list[NR_LIST] = {NULL, };
  53 /* next_to_age is an array of pointers into the lru lists, used to
  54    cycle through the buffers aging their contents when deciding which
  55    buffers to discard when more memory is needed */
  56 static struct buffer_head * next_to_age[NR_LIST] = {NULL, };
  57 static struct buffer_head * free_list[NR_SIZES] = {NULL, };
  58 
  59 static struct buffer_head * unused_list = NULL;
  60 struct buffer_head * reuse_list = NULL;
  61 static struct wait_queue * buffer_wait = NULL;
  62 
  63 int nr_buffers = 0;
  64 int nr_buffers_type[NR_LIST] = {0,};
  65 int nr_buffers_size[NR_SIZES] = {0,};
  66 int nr_buffers_st[NR_SIZES][NR_LIST] = {{0,},};
  67 int buffer_usage[NR_SIZES] = {0,};  /* Usage counts used to determine load average */
  68 int buffers_lav[NR_SIZES] = {0,};  /* Load average of buffer usage */
  69 int nr_free[NR_SIZES] = {0,};
  70 int buffermem = 0;
  71 int nr_buffer_heads = 0;
  72 extern int *blksize_size[];
  73 
  74 /* Here is the parameter block for the bdflush process. If you add or
  75  * remove any of the parameters, make sure to update kernel/sysctl.c.
  76  */
  77 
  78 static void wakeup_bdflush(int);
  79 
  80 #define N_PARAM 9
  81 #define LAV
  82 
  83 union bdflush_param{
  84         struct {
  85                 int nfract;  /* Percentage of buffer cache dirty to 
  86                                 activate bdflush */
  87                 int ndirty;  /* Maximum number of dirty blocks to write out per
  88                                 wake-cycle */
  89                 int nrefill; /* Number of clean buffers to try and obtain
  90                                 each time we call refill */
  91                 int nref_dirt; /* Dirty buffer threshold for activating bdflush
  92                                   when trying to refill buffers. */
  93                 int clu_nfract;  /* Percentage of buffer cache to scan to 
  94                                     search for free clusters */
  95                 int age_buffer;  /* Time for normal buffer to age before 
  96                                     we flush it */
  97                 int age_super;  /* Time for superblock to age before we 
  98                                    flush it */
  99                 int lav_const;  /* Constant used for load average (time
 100                                    constant */
 101                 int lav_ratio;  /* Used to determine how low a lav for a
 102                                    particular size can go before we start to
 103                                    trim back the buffers */
 104         } b_un;
 105         unsigned int data[N_PARAM];
 106 } bdf_prm = {{60, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}};
 107 
 108 /* The lav constant is set for 1 minute, as long as the update process runs
 109    every 5 seconds.  If you change the frequency of update, the time
 110    constant will also change. */
 111 
 112 
 113 /* These are the min and max parameter values that we will allow to be assigned */
 114 int bdflush_min[N_PARAM] = {  0,  10,    5,   25,  0,   100,   100, 1, 1};
 115 int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 60000, 60000, 2047, 5};
 116 
 117 /*
 118  * Rewrote the wait-routines to use the "new" wait-queue functionality,
 119  * and getting rid of the cli-sti pairs. The wait-queue routines still
 120  * need cli-sti, but now it's just a couple of 386 instructions or so.
 121  *
 122  * Note that the real wait_on_buffer() is an inline function that checks
 123  * if 'b_wait' is set before calling this, so that the queues aren't set
 124  * up unnecessarily.
 125  */
 126 void __wait_on_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 127 {
 128         struct wait_queue wait = { current, NULL };
 129 
 130         bh->b_count++;
 131         add_wait_queue(&bh->b_wait, &wait);
 132 repeat:
 133         run_task_queue(&tq_disk);
 134         current->state = TASK_UNINTERRUPTIBLE;
 135         if (buffer_locked(bh)) {
 136                 schedule();
 137                 goto repeat;
 138         }
 139         remove_wait_queue(&bh->b_wait, &wait);
 140         bh->b_count--;
 141         current->state = TASK_RUNNING;
 142 }
 143 
 144 /* Call sync_buffers with wait!=0 to ensure that the call does not
 145    return until all buffer writes have completed.  Sync() may return
 146    before the writes have finished; fsync() may not. */
 147 
 148 
 149 /* Godamity-damn.  Some buffers (bitmaps for filesystems)
 150    spontaneously dirty themselves without ever brelse being called.
 151    We will ultimately want to put these in a separate list, but for
 152    now we search all of the lists for dirty buffers */
 153 
 154 static int sync_buffers(kdev_t dev, int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 155 {
 156         int i, retry, pass = 0, err = 0;
 157         int nlist, ncount;
 158         struct buffer_head * bh, *next;
 159 
 160         /* One pass for no-wait, three for wait:
 161            0) write out all dirty, unlocked buffers;
 162            1) write out all dirty buffers, waiting if locked;
 163            2) wait for completion by waiting for all buffers to unlock. */
 164  repeat:
 165         retry = 0;
 166  repeat2:
 167         ncount = 0;
 168         /* We search all lists as a failsafe mechanism, not because we expect
 169            there to be dirty buffers on any of the other lists. */
 170         for(nlist = 0; nlist < NR_LIST; nlist++)
 171          {
 172          repeat1:
 173                  bh = lru_list[nlist];
 174                  if(!bh) continue;
 175                  for (i = nr_buffers_type[nlist]*2 ; i-- > 0 ; bh = next) {
 176                          if(bh->b_list != nlist) goto repeat1;
 177                          next = bh->b_next_free;
 178                          if(!lru_list[nlist]) break;
 179                          if (dev && bh->b_dev != dev)
 180                                   continue;
 181                          if (buffer_locked(bh))
 182                           {
 183                                   /* Buffer is locked; skip it unless wait is
 184                                      requested AND pass > 0. */
 185                                   if (!wait || !pass) {
 186                                           retry = 1;
 187                                           continue;
 188                                   }
 189                                   wait_on_buffer (bh);
 190                                   goto repeat2;
 191                           }
 192                          /* If an unlocked buffer is not uptodate, there has
 193                              been an IO error. Skip it. */
 194                          if (wait && buffer_req(bh) && !buffer_locked(bh) &&
 195                              !buffer_dirty(bh) && !buffer_uptodate(bh)) {
 196                                   err = 1;
 197                                   continue;
 198                           }
 199                          /* Don't write clean buffers.  Don't write ANY buffers
 200                             on the third pass. */
 201                          if (!buffer_dirty(bh) || pass>=2)
 202                                   continue;
 203                          /* don't bother about locked buffers */
 204                          if (buffer_locked(bh))
 205                                  continue;
 206                          bh->b_count++;
 207                          bh->b_flushtime = 0;
 208                          ll_rw_block(WRITE, 1, &bh);
 209 
 210                          if(nlist != BUF_DIRTY) { 
 211                                  printk("[%d %s %ld] ", nlist,
 212                                         kdevname(bh->b_dev), bh->b_blocknr);
 213                                  ncount++;
 214                          };
 215                          bh->b_count--;
 216                          retry = 1;
 217                  }
 218          }
 219         if (ncount)
 220           printk("sys_sync: %d dirty buffers not on dirty list\n", ncount);
 221         
 222         /* If we are waiting for the sync to succeed, and if any dirty
 223            blocks were written, then repeat; on the second pass, only
 224            wait for buffers being written (do not pass to write any
 225            more buffers on the second pass). */
 226         if (wait && retry && ++pass<=2)
 227                  goto repeat;
 228         return err;
 229 }
 230 
 231 void sync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 232 {
 233         sync_buffers(dev, 0);
 234         sync_supers(dev);
 235         sync_inodes(dev);
 236         sync_buffers(dev, 0);
 237         sync_dquots(dev, -1);
 238 }
 239 
 240 int fsync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 241 {
 242         sync_buffers(dev, 0);
 243         sync_supers(dev);
 244         sync_inodes(dev);
 245         sync_dquots(dev, -1);
 246         return sync_buffers(dev, 1);
 247 }
 248 
 249 asmlinkage int sys_sync(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 250 {
 251         fsync_dev(0);
 252         return 0;
 253 }
 254 
 255 int file_fsync (struct inode *inode, struct file *filp)
     /* [previous][next][first][last][top][bottom][index][help] */
 256 {
 257         return fsync_dev(inode->i_dev);
 258 }
 259 
 260 asmlinkage int sys_fsync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 261 {
 262         struct file * file;
 263         struct inode * inode;
 264 
 265         if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
 266                 return -EBADF;
 267         if (!file->f_op || !file->f_op->fsync)
 268                 return -EINVAL;
 269         if (file->f_op->fsync(inode,file))
 270                 return -EIO;
 271         return 0;
 272 }
 273 
 274 asmlinkage int sys_fdatasync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 275 {
 276         struct file * file;
 277         struct inode * inode;
 278 
 279         if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
 280                 return -EBADF;
 281         if (!file->f_op || !file->f_op->fsync)
 282                 return -EINVAL;
 283         /* this needs further work, at the moment it is identical to fsync() */
 284         if (file->f_op->fsync(inode,file))
 285                 return -EIO;
 286         return 0;
 287 }
 288 
 289 void invalidate_buffers(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 290 {
 291         int i;
 292         int nlist;
 293         struct buffer_head * bh;
 294 
 295         for(nlist = 0; nlist < NR_LIST; nlist++) {
 296                 bh = lru_list[nlist];
 297                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bh->b_next_free) {
 298                         if (bh->b_dev != dev)
 299                                 continue;
 300                         wait_on_buffer(bh);
 301                         if (bh->b_dev != dev)
 302                                 continue;
 303                         if (bh->b_count)
 304                                 continue;
 305                         bh->b_flushtime = 0;
 306                         clear_bit(BH_Protected, &bh->b_state);
 307                         clear_bit(BH_Uptodate, &bh->b_state);
 308                         clear_bit(BH_Dirty, &bh->b_state);
 309                         clear_bit(BH_Req, &bh->b_state);
 310                 }
 311         }
 312 }
 313 
 314 #define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block))%nr_hash)
 315 #define hash(dev,block) hash_table[_hashfn(dev,block)]
 316 
 317 static inline void remove_from_hash_queue(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 318 {
 319         if (bh->b_next)
 320                 bh->b_next->b_prev = bh->b_prev;
 321         if (bh->b_prev)
 322                 bh->b_prev->b_next = bh->b_next;
 323         if (hash(bh->b_dev,bh->b_blocknr) == bh)
 324                 hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
 325         bh->b_next = bh->b_prev = NULL;
 326 }
 327 
 328 static inline void remove_from_lru_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 329 {
 330         if (!(bh->b_prev_free) || !(bh->b_next_free))
 331                 panic("VFS: LRU block list corrupted");
 332         if (bh->b_dev == B_FREE)
 333                 panic("LRU list corrupted");
 334         bh->b_prev_free->b_next_free = bh->b_next_free;
 335         bh->b_next_free->b_prev_free = bh->b_prev_free;
 336 
 337         if (lru_list[bh->b_list] == bh)
 338                  lru_list[bh->b_list] = bh->b_next_free;
 339         if (lru_list[bh->b_list] == bh)
 340                  lru_list[bh->b_list] = NULL;
 341         if (next_to_age[bh->b_list] == bh)
 342                 next_to_age[bh->b_list] = bh->b_next_free;
 343         if (next_to_age[bh->b_list] == bh)
 344                 next_to_age[bh->b_list] = NULL;
 345 
 346         bh->b_next_free = bh->b_prev_free = NULL;
 347 }
 348 
 349 static inline void remove_from_free_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 350 {
 351         int isize = BUFSIZE_INDEX(bh->b_size);
 352         if (!(bh->b_prev_free) || !(bh->b_next_free))
 353                 panic("VFS: Free block list corrupted");
 354         if(bh->b_dev != B_FREE)
 355                 panic("Free list corrupted");
 356         if(!free_list[isize])
 357                 panic("Free list empty");
 358         nr_free[isize]--;
 359         if(bh->b_next_free == bh)
 360                  free_list[isize] = NULL;
 361         else {
 362                 bh->b_prev_free->b_next_free = bh->b_next_free;
 363                 bh->b_next_free->b_prev_free = bh->b_prev_free;
 364                 if (free_list[isize] == bh)
 365                          free_list[isize] = bh->b_next_free;
 366         }
 367         bh->b_next_free = bh->b_prev_free = NULL;
 368 }
 369 
 370 static inline void remove_from_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 371 {
 372         if(bh->b_dev == B_FREE) {
 373                 remove_from_free_list(bh); /* Free list entries should not be
 374                                               in the hash queue */
 375                 return;
 376         };
 377         nr_buffers_type[bh->b_list]--;
 378         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]--;
 379         remove_from_hash_queue(bh);
 380         remove_from_lru_list(bh);
 381 }
 382 
 383 static inline void put_last_lru(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 384 {
 385         if (!bh)
 386                 return;
 387         if (bh == lru_list[bh->b_list]) {
 388                 lru_list[bh->b_list] = bh->b_next_free;
 389                 if (next_to_age[bh->b_list] == bh)
 390                         next_to_age[bh->b_list] = bh->b_next_free;
 391                 return;
 392         }
 393         if(bh->b_dev == B_FREE)
 394                 panic("Wrong block for lru list");
 395         remove_from_lru_list(bh);
 396 /* add to back of free list */
 397 
 398         if(!lru_list[bh->b_list]) {
 399                 lru_list[bh->b_list] = bh;
 400                 lru_list[bh->b_list]->b_prev_free = bh;
 401         }
 402         if (!next_to_age[bh->b_list])
 403                 next_to_age[bh->b_list] = bh;
 404 
 405         bh->b_next_free = lru_list[bh->b_list];
 406         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 407         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 408         lru_list[bh->b_list]->b_prev_free = bh;
 409 }
 410 
 411 static inline void put_last_free(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 412 {
 413         int isize;
 414         if (!bh)
 415                 return;
 416 
 417         isize = BUFSIZE_INDEX(bh->b_size);      
 418         bh->b_dev = B_FREE;  /* So it is obvious we are on the free list */
 419         /* add to back of free list */
 420         if(!free_list[isize]) {
 421                 free_list[isize] = bh;
 422                 bh->b_prev_free = bh;
 423         };
 424 
 425         nr_free[isize]++;
 426         bh->b_next_free = free_list[isize];
 427         bh->b_prev_free = free_list[isize]->b_prev_free;
 428         free_list[isize]->b_prev_free->b_next_free = bh;
 429         free_list[isize]->b_prev_free = bh;
 430 }
 431 
 432 static inline void insert_into_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 433 {
 434         /* put at end of free list */
 435         if(bh->b_dev == B_FREE) {
 436                 put_last_free(bh);
 437                 return;
 438         }
 439         if(!lru_list[bh->b_list]) {
 440                 lru_list[bh->b_list] = bh;
 441                 bh->b_prev_free = bh;
 442         }
 443         if (!next_to_age[bh->b_list])
 444                 next_to_age[bh->b_list] = bh;
 445         if (bh->b_next_free) panic("VFS: buffer LRU pointers corrupted");
 446         bh->b_next_free = lru_list[bh->b_list];
 447         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 448         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 449         lru_list[bh->b_list]->b_prev_free = bh;
 450         nr_buffers_type[bh->b_list]++;
 451         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]++;
 452 /* put the buffer in new hash-queue if it has a device */
 453         bh->b_prev = NULL;
 454         bh->b_next = NULL;
 455         if (!(bh->b_dev))
 456                 return;
 457         bh->b_next = hash(bh->b_dev,bh->b_blocknr);
 458         hash(bh->b_dev,bh->b_blocknr) = bh;
 459         if (bh->b_next)
 460                 bh->b_next->b_prev = bh;
 461 }
 462 
 463 static inline struct buffer_head * find_buffer(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 464 {               
 465         struct buffer_head * tmp;
 466 
 467         for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
 468                 if (tmp->b_blocknr == block && tmp->b_dev == dev)
 469                         if (tmp->b_size == size)
 470                                 return tmp;
 471                         else {
 472                                 printk("VFS: Wrong blocksize on device %s\n",
 473                                         kdevname(dev));
 474                                 return NULL;
 475                         }
 476         return NULL;
 477 }
 478 
 479 /*
 480  * Why like this, I hear you say... The reason is race-conditions.
 481  * As we don't lock buffers (unless we are reading them, that is),
 482  * something might happen to it while we sleep (ie a read-error
 483  * will force it bad). This shouldn't really happen currently, but
 484  * the code is ready.
 485  */
 486 struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 487 {
 488         struct buffer_head * bh;
 489 
 490         for (;;) {
 491                 if (!(bh=find_buffer(dev,block,size)))
 492                         return NULL;
 493                 bh->b_count++;
 494                 wait_on_buffer(bh);
 495                 if (bh->b_dev == dev && bh->b_blocknr == block
 496                                              && bh->b_size == size)
 497                         return bh;
 498                 bh->b_count--;
 499         }
 500 }
 501 
 502 void set_blocksize(kdev_t dev, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 503 {
 504         int i, nlist;
 505         struct buffer_head * bh, *bhnext;
 506 
 507         if (!blksize_size[MAJOR(dev)])
 508                 return;
 509 
 510         if (size > PAGE_SIZE)
 511                 size = 0;
 512 
 513         switch (size) {
 514                 default: panic("Invalid blocksize passed to set_blocksize");
 515                 case 512: case 1024: case 2048: case 4096: case 8192: ;
 516         }
 517 
 518         if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
 519                 blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 520                 return;
 521         }
 522         if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
 523                 return;
 524         sync_buffers(dev, 2);
 525         blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 526 
 527   /* We need to be quite careful how we do this - we are moving entries
 528      around on the free list, and we can get in a loop if we are not careful.*/
 529 
 530         for(nlist = 0; nlist < NR_LIST; nlist++) {
 531                 bh = lru_list[nlist];
 532                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) {
 533                         if(!bh) break;
 534                         bhnext = bh->b_next_free; 
 535                         if (bh->b_dev != dev)
 536                                  continue;
 537                         if (bh->b_size == size)
 538                                  continue;
 539                         
 540                         wait_on_buffer(bh);
 541                         if (bh->b_dev == dev && bh->b_size != size) {
 542                                 clear_bit(BH_Dirty, &bh->b_state);
 543                                 clear_bit(BH_Uptodate, &bh->b_state);
 544                                 clear_bit(BH_Req, &bh->b_state);
 545                                 bh->b_flushtime = 0;
 546                         }
 547                         remove_from_hash_queue(bh);
 548                 }
 549         }
 550 }
 551 
 552 #define BADNESS(bh) (buffer_dirty(bh) || buffer_locked(bh))
 553 
 554 void refill_freelist(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 555 {
 556         struct buffer_head * bh, * tmp;
 557         struct buffer_head * candidate[NR_LIST];
 558         unsigned int best_time, winner;
 559         int isize = BUFSIZE_INDEX(size);
 560         int buffers[NR_LIST];
 561         int i;
 562         int needed;
 563 
 564         /* First see if we even need this.  Sometimes it is advantageous
 565          to request some blocks in a filesystem that we know that we will
 566          be needing ahead of time. */
 567 
 568         if (nr_free[isize] > 100)
 569                 return;
 570 
 571         /* If there are too many dirty buffers, we wake up the update process
 572            now so as to ensure that there are still clean buffers available
 573            for user processes to use (and dirty) */
 574         
 575         /* We are going to try and locate this much memory */
 576         needed =bdf_prm.b_un.nrefill * size;  
 577 
 578         while (nr_free_pages > min_free_pages*2 && needed > 0 &&
 579                grow_buffers(GFP_BUFFER, size)) {
 580                 needed -= PAGE_SIZE;
 581         }
 582 
 583         if(needed <= 0) return;
 584 
 585         /* See if there are too many buffers of a different size.
 586            If so, victimize them */
 587 
 588         while(maybe_shrink_lav_buffers(size))
 589          {
 590                  if(!grow_buffers(GFP_BUFFER, size)) break;
 591                  needed -= PAGE_SIZE;
 592                  if(needed <= 0) return;
 593          };
 594 
 595         /* OK, we cannot grow the buffer cache, now try and get some
 596            from the lru list */
 597 
 598         /* First set the candidate pointers to usable buffers.  This
 599            should be quick nearly all of the time. */
 600 
 601 repeat0:
 602         for(i=0; i<NR_LIST; i++){
 603                 if(i == BUF_DIRTY || i == BUF_SHARED || 
 604                    nr_buffers_type[i] == 0) {
 605                         candidate[i] = NULL;
 606                         buffers[i] = 0;
 607                         continue;
 608                 }
 609                 buffers[i] = nr_buffers_type[i];
 610                 for (bh = lru_list[i]; buffers[i] > 0; bh = tmp, buffers[i]--)
 611                  {
 612                          if(buffers[i] < 0) panic("Here is the problem");
 613                          tmp = bh->b_next_free;
 614                          if (!bh) break;
 615                          
 616                          if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 617                              buffer_dirty(bh)) {
 618                                  refile_buffer(bh);
 619                                  continue;
 620                          }
 621                          
 622                          if (bh->b_count || buffer_protected(bh) || bh->b_size != size)
 623                                   continue;
 624                          
 625                          /* Buffers are written in the order they are placed 
 626                             on the locked list. If we encounter a locked
 627                             buffer here, this means that the rest of them
 628                             are also locked */
 629                          if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 630                                  buffers[i] = 0;
 631                                  break;
 632                          }
 633                          
 634                          if (BADNESS(bh)) continue;
 635                          break;
 636                  };
 637                 if(!buffers[i]) candidate[i] = NULL; /* Nothing on this list */
 638                 else candidate[i] = bh;
 639                 if(candidate[i] && candidate[i]->b_count) panic("Here is the problem");
 640         }
 641         
 642  repeat:
 643         if(needed <= 0) return;
 644         
 645         /* Now see which candidate wins the election */
 646         
 647         winner = best_time = UINT_MAX;  
 648         for(i=0; i<NR_LIST; i++){
 649                 if(!candidate[i]) continue;
 650                 if(candidate[i]->b_lru_time < best_time){
 651                         best_time = candidate[i]->b_lru_time;
 652                         winner = i;
 653                 }
 654         }
 655         
 656         /* If we have a winner, use it, and then get a new candidate from that list */
 657         if(winner != UINT_MAX) {
 658                 i = winner;
 659                 bh = candidate[i];
 660                 candidate[i] = bh->b_next_free;
 661                 if(candidate[i] == bh) candidate[i] = NULL;  /* Got last one */
 662                 if (bh->b_count || bh->b_size != size)
 663                          panic("Busy buffer in candidate list\n");
 664                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1)
 665                          panic("Shared buffer in candidate list\n");
 666                 if (buffer_protected(bh))
 667                         panic("Protected buffer in candidate list\n");
 668                 if (BADNESS(bh)) panic("Buffer in candidate list with BADNESS != 0\n");
 669                 
 670                 if(bh->b_dev == B_FREE)
 671                         panic("Wrong list");
 672                 remove_from_queues(bh);
 673                 bh->b_dev = B_FREE;
 674                 put_last_free(bh);
 675                 needed -= bh->b_size;
 676                 buffers[i]--;
 677                 if(buffers[i] < 0) panic("Here is the problem");
 678                 
 679                 if(buffers[i] == 0) candidate[i] = NULL;
 680                 
 681                 /* Now all we need to do is advance the candidate pointer
 682                    from the winner list to the next usable buffer */
 683                 if(candidate[i] && buffers[i] > 0){
 684                         if(buffers[i] <= 0) panic("Here is another problem");
 685                         for (bh = candidate[i]; buffers[i] > 0; bh = tmp, buffers[i]--) {
 686                                 if(buffers[i] < 0) panic("Here is the problem");
 687                                 tmp = bh->b_next_free;
 688                                 if (!bh) break;
 689                                 
 690                                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 691                                     buffer_dirty(bh)) {
 692                                         refile_buffer(bh);
 693                                         continue;
 694                                 };
 695                                 
 696                                 if (bh->b_count || buffer_protected(bh) || bh->b_size != size)
 697                                          continue;
 698                                 
 699                                 /* Buffers are written in the order they are
 700                                    placed on the locked list.  If we encounter
 701                                    a locked buffer here, this means that the
 702                                    rest of them are also locked */
 703                                 if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 704                                         buffers[i] = 0;
 705                                         break;
 706                                 }
 707               
 708                                 if (BADNESS(bh)) continue;
 709                                 break;
 710                         };
 711                         if(!buffers[i]) candidate[i] = NULL; /* Nothing here */
 712                         else candidate[i] = bh;
 713                         if(candidate[i] && candidate[i]->b_count) 
 714                                  panic("Here is the problem");
 715                 }
 716                 
 717                 goto repeat;
 718         }
 719         
 720         if(needed <= 0) return;
 721         
 722         /* Too bad, that was not enough. Try a little harder to grow some. */
 723         
 724         if (nr_free_pages > min_free_pages + 5) {
 725                 if (grow_buffers(GFP_BUFFER, size)) {
 726                         needed -= PAGE_SIZE;
 727                         goto repeat0;
 728                 };
 729         }
 730         
 731         /* and repeat until we find something good */
 732         if (!grow_buffers(GFP_ATOMIC, size))
 733                 wakeup_bdflush(1);
 734         needed -= PAGE_SIZE;
 735         goto repeat0;
 736 }
 737 
 738 /*
 739  * Ok, this is getblk, and it isn't very clear, again to hinder
 740  * race-conditions. Most of the code is seldom used, (ie repeating),
 741  * so it should be much more efficient than it looks.
 742  *
 743  * The algorithm is changed: hopefully better, and an elusive bug removed.
 744  *
 745  * 14.02.92: changed it to sync dirty buffers a bit: better performance
 746  * when the filesystem starts to get full of dirty blocks (I hope).
 747  */
 748 struct buffer_head * getblk(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 749 {
 750         struct buffer_head * bh;
 751         int isize = BUFSIZE_INDEX(size);
 752 
 753         /* Update this for the buffer size lav. */
 754         buffer_usage[isize]++;
 755 
 756         /* If there are too many dirty buffers, we wake up the update process
 757            now so as to ensure that there are still clean buffers available
 758            for user processes to use (and dirty) */
 759 repeat:
 760         bh = get_hash_table(dev, block, size);
 761         if (bh) {
 762                 if (!buffer_dirty(bh)) {
 763                         if (buffer_uptodate(bh))
 764                                  put_last_lru(bh);
 765                         bh->b_flushtime = 0;
 766                 }
 767                 set_bit(BH_Touched, &bh->b_state);
 768                 return bh;
 769         }
 770 
 771         while(!free_list[isize]) refill_freelist(size);
 772         
 773         if (find_buffer(dev,block,size))
 774                  goto repeat;
 775 
 776         bh = free_list[isize];
 777         remove_from_free_list(bh);
 778 
 779 /* OK, FINALLY we know that this buffer is the only one of its kind, */
 780 /* and that it's unused (b_count=0), unlocked (buffer_locked=0), and clean */
 781         bh->b_count=1;
 782         bh->b_flushtime=0;
 783         bh->b_state=(1<<BH_Touched);
 784         bh->b_dev=dev;
 785         bh->b_blocknr=block;
 786         insert_into_queues(bh);
 787         return bh;
 788 }
 789 
 790 void set_writetime(struct buffer_head * buf, int flag)
     /* [previous][next][first][last][top][bottom][index][help] */
 791 {
 792         int newtime;
 793 
 794         if (buffer_dirty(buf)) {
 795                 /* Move buffer to dirty list if jiffies is clear */
 796                 newtime = jiffies + (flag ? bdf_prm.b_un.age_super : 
 797                                      bdf_prm.b_un.age_buffer);
 798                 if(!buf->b_flushtime || buf->b_flushtime > newtime)
 799                          buf->b_flushtime = newtime;
 800         } else {
 801                 buf->b_flushtime = 0;
 802         }
 803 }
 804 
 805 
 806 void refile_buffer(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 807 {
 808         int dispose;
 809 
 810         if(buf->b_dev == B_FREE) {
 811                 printk("Attempt to refile free buffer\n");
 812                 return;
 813         }
 814         if (buffer_dirty(buf))
 815                 dispose = BUF_DIRTY;
 816         else if ((mem_map[MAP_NR((unsigned long) buf->b_data)].count > 1) || buffer_protected(buf))
 817                 dispose = BUF_SHARED;
 818         else if (buffer_locked(buf))
 819                 dispose = BUF_LOCKED;
 820         else if (buf->b_list == BUF_SHARED)
 821                 dispose = BUF_UNSHARED;
 822         else
 823                 dispose = BUF_CLEAN;
 824         if(dispose == BUF_CLEAN) buf->b_lru_time = jiffies;
 825         if(dispose != buf->b_list)  {
 826                 if(dispose == BUF_DIRTY || dispose == BUF_UNSHARED)
 827                          buf->b_lru_time = jiffies;
 828                 if(dispose == BUF_LOCKED && 
 829                    (buf->b_flushtime - buf->b_lru_time) <= bdf_prm.b_un.age_super)
 830                          dispose = BUF_LOCKED1;
 831                 remove_from_queues(buf);
 832                 buf->b_list = dispose;
 833                 insert_into_queues(buf);
 834                 if(dispose == BUF_DIRTY && nr_buffers_type[BUF_DIRTY] > 
 835                    (nr_buffers - nr_buffers_type[BUF_SHARED]) *
 836                    bdf_prm.b_un.nfract/100)
 837                          wakeup_bdflush(0);
 838         }
 839 }
 840 
 841 /*
 842  * Release a buffer head
 843  */
 844 void __brelse(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 845 {
 846         wait_on_buffer(buf);
 847 
 848         /* If dirty, mark the time this buffer should be written back */
 849         set_writetime(buf, 0);
 850         refile_buffer(buf);
 851 
 852         if (buf->b_count) {
 853                 buf->b_count--;
 854                 return;
 855         }
 856         printk("VFS: brelse: Trying to free free buffer\n");
 857 }
 858 
 859 /*
 860  * bforget() is like brelse(), except it removes the buffer
 861  * from the hash-queues (so that it won't be re-used if it's
 862  * shared).
 863  */
 864 void __bforget(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 865 {
 866         wait_on_buffer(buf);
 867         mark_buffer_clean(buf);
 868         clear_bit(BH_Protected, &buf->b_state);
 869         buf->b_count--;
 870         remove_from_hash_queue(buf);
 871         buf->b_dev = NODEV;
 872         refile_buffer(buf);
 873 }
 874 
 875 /*
 876  * bread() reads a specified block and returns the buffer that contains
 877  * it. It returns NULL if the block was unreadable.
 878  */
 879 struct buffer_head * bread(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 880 {
 881         struct buffer_head * bh;
 882 
 883         if (!(bh = getblk(dev, block, size))) {
 884                 printk("VFS: bread: READ error on device %s\n",
 885                         kdevname(dev));
 886                 return NULL;
 887         }
 888         if (buffer_uptodate(bh))
 889                 return bh;
 890         ll_rw_block(READ, 1, &bh);
 891         wait_on_buffer(bh);
 892         if (buffer_uptodate(bh))
 893                 return bh;
 894         brelse(bh);
 895         return NULL;
 896 }
 897 
 898 /*
 899  * Ok, breada can be used as bread, but additionally to mark other
 900  * blocks for reading as well. End the argument list with a negative
 901  * number.
 902  */
 903 
 904 #define NBUF 16
 905 
 906 struct buffer_head * breada(kdev_t dev, int block, int bufsize,
     /* [previous][next][first][last][top][bottom][index][help] */
 907         unsigned int pos, unsigned int filesize)
 908 {
 909         struct buffer_head * bhlist[NBUF];
 910         unsigned int blocks;
 911         struct buffer_head * bh;
 912         int index;
 913         int i, j;
 914 
 915         if (pos >= filesize)
 916                 return NULL;
 917 
 918         if (block < 0 || !(bh = getblk(dev,block,bufsize)))
 919                 return NULL;
 920 
 921         index = BUFSIZE_INDEX(bh->b_size);
 922 
 923         if (buffer_uptodate(bh))
 924                 return bh;
 925 
 926         blocks = ((filesize & (bufsize - 1)) - (pos & (bufsize - 1))) >> (9+index);
 927 
 928         if (blocks > (read_ahead[MAJOR(dev)] >> index))
 929                 blocks = read_ahead[MAJOR(dev)] >> index;
 930         if (blocks > NBUF)
 931                 blocks = NBUF;
 932         
 933         bhlist[0] = bh;
 934         j = 1;
 935         for(i=1; i<blocks; i++) {
 936                 bh = getblk(dev,block+i,bufsize);
 937                 if (buffer_uptodate(bh)) {
 938                         brelse(bh);
 939                         break;
 940                 }
 941                 bhlist[j++] = bh;
 942         }
 943 
 944         /* Request the read for these buffers, and then release them */
 945         ll_rw_block(READ, j, bhlist);
 946 
 947         for(i=1; i<j; i++)
 948                 brelse(bhlist[i]);
 949 
 950         /* Wait for this buffer, and then continue on */
 951         bh = bhlist[0];
 952         wait_on_buffer(bh);
 953         if (buffer_uptodate(bh))
 954                 return bh;
 955         brelse(bh);
 956         return NULL;
 957 }
 958 
 959 /*
 960  * See fs/inode.c for the weird use of volatile..
 961  */
 962 static void put_unused_buffer_head(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 963 {
 964         struct wait_queue * wait;
 965 
 966         wait = ((volatile struct buffer_head *) bh)->b_wait;
 967         memset(bh,0,sizeof(*bh));
 968         ((volatile struct buffer_head *) bh)->b_wait = wait;
 969         bh->b_next_free = unused_list;
 970         unused_list = bh;
 971         wake_up(&buffer_wait);
 972 }
 973 
 974 static void get_more_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 975 {
 976         int i;
 977         struct buffer_head * bh;
 978 
 979         for (;;) {
 980                 if (unused_list)
 981                         return;
 982 
 983                 /*
 984                  * This is critical.  We can't swap out pages to get
 985                  * more buffer heads, because the swap-out may need
 986                  * more buffer-heads itself.  Thus GFP_ATOMIC.
 987                  */
 988                 bh = (struct buffer_head *) get_free_page(GFP_ATOMIC);
 989                 if (bh)
 990                         break;
 991 
 992                 /*
 993                  * Uhhuh. We're _really_ low on memory. Now we just
 994                  * wait for old buffer heads to become free due to
 995                  * finishing IO..
 996                  */
 997                 run_task_queue(&tq_disk);
 998                 sleep_on(&buffer_wait);
 999         }
1000 
1001         for (nr_buffer_heads+=i=PAGE_SIZE/sizeof*bh ; i>0; i--) {
1002                 bh->b_next_free = unused_list;  /* only make link */
1003                 unused_list = bh++;
1004         }
1005 }
1006 
1007 /* 
1008  * We can't put completed temporary IO buffer_heads directly onto the
1009  * unused_list when they become unlocked, since the device driver
1010  * end_request routines still expect access to the buffer_head's
1011  * fields after the final unlock.  So, the device driver puts them on
1012  * the reuse_list instead once IO completes, and we recover these to
1013  * the unused_list here.
1014  *
1015  * The reuse_list receives buffers from interrupt routines, so we need
1016  * to be IRQ-safe here (but note that interrupts only _add_ to the
1017  * reuse_list, never take away. So we don't need to worry about the
1018  * reuse_list magically emptying).
1019  */
1020 static inline void recover_reusable_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1021 {
1022         if (reuse_list) {
1023                 struct buffer_head *bh;
1024                 unsigned long flags;
1025         
1026                 save_flags(flags);
1027                 do {
1028                         cli();
1029                         bh = reuse_list;
1030                         reuse_list = bh->b_next_free;
1031                         restore_flags(flags);
1032                         put_unused_buffer_head(bh);
1033                 } while (reuse_list);
1034         }
1035 }
1036 
1037 static struct buffer_head * get_unused_buffer_head(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1038 {
1039         struct buffer_head * bh;
1040 
1041         recover_reusable_buffer_heads();
1042         get_more_buffer_heads();
1043         if (!unused_list)
1044                 return NULL;
1045         bh = unused_list;
1046         unused_list = bh->b_next_free;
1047         bh->b_next_free = NULL;
1048         bh->b_data = NULL;
1049         bh->b_size = 0;
1050         bh->b_state = 0;
1051         return bh;
1052 }
1053 
1054 /*
1055  * Create the appropriate buffers when given a page for data area and
1056  * the size of each buffer.. Use the bh->b_this_page linked list to
1057  * follow the buffers created.  Return NULL if unable to create more
1058  * buffers.
1059  */
1060 static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
1061 {
1062         struct buffer_head *bh, *head;
1063         unsigned long offset;
1064 
1065         head = NULL;
1066         offset = PAGE_SIZE;
1067         while ((offset -= size) < PAGE_SIZE) {
1068                 bh = get_unused_buffer_head();
1069                 if (!bh)
1070                         goto no_grow;
1071                 bh->b_this_page = head;
1072                 head = bh;
1073                 bh->b_data = (char *) (page+offset);
1074                 bh->b_size = size;
1075                 bh->b_dev = B_FREE;  /* Flag as unused */
1076         }
1077         return head;
1078 /*
1079  * In case anything failed, we just free everything we got.
1080  */
1081 no_grow:
1082         bh = head;
1083         while (bh) {
1084                 head = bh;
1085                 bh = bh->b_this_page;
1086                 put_unused_buffer_head(head);
1087         }
1088         return NULL;
1089 }
1090 
1091 int brw_page(int rw, unsigned long address, kdev_t dev, int b[], int size, int bmap)
     /* [previous][next][first][last][top][bottom][index][help] */
1092 {
1093         struct buffer_head *bh, *prev, *next, *arr[MAX_BUF_PER_PAGE];
1094         int block, nr;
1095         struct page *page;
1096 
1097         page = mem_map + MAP_NR(address);
1098         clear_bit(PG_uptodate, &page->flags);
1099         bh = create_buffers(address, size);
1100         if (!bh)
1101                 return -ENOMEM;
1102         nr = 0;
1103         next = bh;
1104         do {
1105                 struct buffer_head * tmp;
1106                 block = *(b++);
1107 
1108                 set_bit(BH_FreeOnIO, &next->b_state);
1109                 next->b_list = BUF_CLEAN;
1110                 next->b_dev = dev;
1111                 next->b_blocknr = block;
1112                 next->b_count = 1;
1113                 next->b_flushtime = 0;
1114                 set_bit(BH_Uptodate, &next->b_state);
1115 
1116                 /* When we use bmap, we define block zero to represent
1117                    a hole.  ll_rw_page, however, may legitimately
1118                    access block zero, and we need to distinguish the
1119                    two cases. 
1120                    */
1121                 if (bmap && !block) {
1122                         memset(next->b_data, 0, size);
1123                         next->b_count--;
1124                         continue;
1125                 }
1126                 tmp = get_hash_table(dev, block, size);
1127                 if (tmp) {
1128                         if (!buffer_uptodate(tmp)) {
1129                                 if (rw == READ)
1130                                         ll_rw_block(READ, 1, &tmp);
1131                                 wait_on_buffer(tmp);
1132                         }
1133                         if (rw == READ) 
1134                                 memcpy(next->b_data, tmp->b_data, size);
1135                         else {
1136                                 memcpy(tmp->b_data, next->b_data, size);
1137                                 mark_buffer_dirty(tmp, 0);
1138                         }
1139                         brelse(tmp);
1140                         next->b_count--;
1141                         continue;
1142                 }
1143                 if (rw == READ)
1144                         clear_bit(BH_Uptodate, &next->b_state);
1145                 else
1146                         set_bit(BH_Dirty, &next->b_state);
1147                 arr[nr++] = next;
1148         } while (prev = next, (next = next->b_this_page) != NULL);
1149         prev->b_this_page = bh;
1150         
1151         if (nr)
1152                 ll_rw_block(rw, nr, arr);
1153         else {
1154                 unsigned long flags;
1155                 clear_bit(PG_locked, &page->flags);
1156                 set_bit(PG_uptodate, &page->flags);
1157                 wake_up(&page->wait);
1158                 next = bh;
1159                 save_flags(flags);
1160                 cli();
1161                 do {
1162                         next->b_next_free = reuse_list;
1163                         reuse_list = next;
1164                         next = next->b_this_page;
1165                 } while (next != bh);
1166                 restore_flags(flags);
1167         }
1168         ++current->maj_flt;
1169         return 0;
1170 }
1171 
1172 void mark_buffer_uptodate(struct buffer_head * bh, int on)
     /* [previous][next][first][last][top][bottom][index][help] */
1173 {
1174         if (on) {
1175                 struct buffer_head *tmp = bh;
1176                 int page_uptodate = 1;
1177                 set_bit(BH_Uptodate, &bh->b_state);
1178                 do {
1179                         if (!test_bit(BH_Uptodate, &tmp->b_state)) {
1180                                 page_uptodate = 0;
1181                                 break;
1182                         }
1183                         tmp=tmp->b_this_page;
1184                 } while (tmp && tmp != bh);
1185                 if (page_uptodate)
1186                         set_bit(PG_uptodate, &mem_map[MAP_NR(bh->b_data)].flags);
1187         } else
1188                 clear_bit(BH_Uptodate, &bh->b_state);
1189 }
1190 
1191 void unlock_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
1192 {
1193         struct buffer_head *tmp;
1194         unsigned long flags;
1195         struct page *page;
1196 
1197         clear_bit(BH_Lock, &bh->b_state);
1198         wake_up(&bh->b_wait);
1199 
1200         if (!test_bit(BH_FreeOnIO, &bh->b_state))
1201                 return;
1202         page = mem_map + MAP_NR(bh->b_data);
1203         if (!PageLocked(page)) {
1204                 printk ("Whoops: unlock_buffer: "
1205                         "async io complete on unlocked page\n");
1206                 return;
1207         }
1208         if (bh->b_count != 1) {
1209                 printk ("Whoops: unlock_buffer: b_count != 1 on async io.\n");
1210                 return;
1211         }
1212         /* Async buffer_heads are here only as labels for IO, and get
1213            thrown away once the IO for this page is complete.  IO is
1214            deemed complete once all buffers have been visited
1215            (b_count==0) and are now unlocked. */
1216         bh->b_count--;
1217         for (tmp = bh; tmp=tmp->b_this_page, tmp!=bh; ) {
1218                 if (test_bit(BH_Lock, &tmp->b_state) || tmp->b_count)
1219                         return;
1220         }
1221 
1222         /* OK, go ahead and complete the async IO on this page. */
1223         save_flags(flags);
1224         clear_bit(PG_locked, &page->flags);
1225         wake_up(&page->wait);
1226         cli();
1227         tmp = bh;
1228         do {
1229                 if (!test_bit(BH_FreeOnIO, &tmp->b_state)) {
1230                         printk ("Whoops: unlock_buffer: "
1231                                 "async IO mismatch on page.\n");
1232                         restore_flags(flags);
1233                         return;
1234                 }
1235                 tmp->b_next_free = reuse_list;
1236                 reuse_list = tmp;
1237                 clear_bit(BH_FreeOnIO, &tmp->b_state);
1238                 tmp = tmp->b_this_page;
1239         } while (tmp != bh);
1240         restore_flags(flags);
1241         if (clear_bit(PG_freeafter, &page->flags)) {
1242                 extern int nr_async_pages;
1243                 nr_async_pages--;
1244                 free_page(page_address(page));
1245         }
1246         wake_up(&buffer_wait);
1247 }
1248 
1249 /*
1250  * Generic "readpage" function for block devices that have the normal
1251  * bmap functionality. This is most of the block device filesystems.
1252  * Reads the page asynchronously --- the unlock_buffer() and
1253  * mark_buffer_uptodate() functions propagate buffer state into the
1254  * page struct once IO has completed.
1255  */
1256 int generic_readpage(struct inode * inode, struct page * page)
     /* [previous][next][first][last][top][bottom][index][help] */
1257 {
1258         unsigned long block, address;
1259         int *p, nr[PAGE_SIZE/512];
1260         int i;
1261 
1262         address = page_address(page);
1263         page->count++;
1264         set_bit(PG_locked, &page->flags);
1265         
1266         i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
1267         block = page->offset >> inode->i_sb->s_blocksize_bits;
1268         p = nr;
1269         do {
1270                 *p = inode->i_op->bmap(inode, block);
1271                 i--;
1272                 block++;
1273                 p++;
1274         } while (i > 0);
1275 
1276         /* IO start */
1277         brw_page(READ, address, inode->i_dev, nr, inode->i_sb->s_blocksize, 1);
1278         free_page(address);
1279         return 0;
1280 }
1281 
1282 /*
1283  * Try to increase the number of buffers available: the size argument
1284  * is used to determine what kind of buffers we want.
1285  */
1286 static int grow_buffers(int pri, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1287 {
1288         unsigned long page;
1289         struct buffer_head *bh, *tmp;
1290         struct buffer_head * insert_point;
1291         int isize;
1292 
1293         if ((size & 511) || (size > PAGE_SIZE)) {
1294                 printk("VFS: grow_buffers: size = %d\n",size);
1295                 return 0;
1296         }
1297 
1298         isize = BUFSIZE_INDEX(size);
1299 
1300         if (!(page = __get_free_page(pri)))
1301                 return 0;
1302         bh = create_buffers(page, size);
1303         if (!bh) {
1304                 free_page(page);
1305                 return 0;
1306         }
1307 
1308         insert_point = free_list[isize];
1309 
1310         tmp = bh;
1311         while (1) {
1312                 nr_free[isize]++;
1313                 if (insert_point) {
1314                         tmp->b_next_free = insert_point->b_next_free;
1315                         tmp->b_prev_free = insert_point;
1316                         insert_point->b_next_free->b_prev_free = tmp;
1317                         insert_point->b_next_free = tmp;
1318                 } else {
1319                         tmp->b_prev_free = tmp;
1320                         tmp->b_next_free = tmp;
1321                 }
1322                 insert_point = tmp;
1323                 ++nr_buffers;
1324                 if (tmp->b_this_page)
1325                         tmp = tmp->b_this_page;
1326                 else
1327                         break;
1328         }
1329         free_list[isize] = bh;
1330         mem_map[MAP_NR(page)].buffers = bh;
1331         tmp->b_this_page = bh;
1332         buffermem += PAGE_SIZE;
1333         return 1;
1334 }
1335 
1336 
1337 /* =========== Reduce the buffer memory ============= */
1338 
1339 /*
1340  * try_to_free_buffer() checks if all the buffers on this particular page
1341  * are unused, and free's the page if so.
1342  */
1343 int try_to_free_buffer(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1344                        int priority)
1345 {
1346         unsigned long page;
1347         struct buffer_head * tmp, * p;
1348         int isize = BUFSIZE_INDEX(bh->b_size);
1349 
1350         *bhp = bh;
1351         page = (unsigned long) bh->b_data;
1352         page &= PAGE_MASK;
1353         tmp = bh;
1354         do {
1355                 if (!tmp)
1356                         return 0;
1357                 if (tmp->b_count || buffer_protected(tmp) ||
1358                     buffer_dirty(tmp) || buffer_locked(tmp) || tmp->b_wait)
1359                         return 0;
1360                 if (priority && buffer_touched(tmp))
1361                         return 0;
1362                 tmp = tmp->b_this_page;
1363         } while (tmp != bh);
1364         tmp = bh;
1365         do {
1366                 p = tmp;
1367                 tmp = tmp->b_this_page;
1368                 nr_buffers--;
1369                 nr_buffers_size[isize]--;
1370                 if (p == *bhp)
1371                   {
1372                     *bhp = p->b_prev_free;
1373                     if (p == *bhp) /* Was this the last in the list? */
1374                       *bhp = NULL;
1375                   }
1376                 remove_from_queues(p);
1377                 put_unused_buffer_head(p);
1378         } while (tmp != bh);
1379         buffermem -= PAGE_SIZE;
1380         mem_map[MAP_NR(page)].buffers = NULL;
1381         free_page(page);
1382         return !mem_map[MAP_NR(page)].count;
1383 }
1384 
1385 /* Age buffers on a given page, according to whether they have been
1386    visited recently or not. */
1387 static inline void age_buffer(struct buffer_head *bh)
     /* [previous][next][first][last][top][bottom][index][help] */
1388 {
1389         struct buffer_head *tmp = bh;
1390         int touched = 0;
1391 
1392         /*
1393          * When we age a page, we mark all other buffers in the page
1394          * with the "has_aged" flag.  Then, when these aliased buffers
1395          * come up for aging, we skip them until next pass.  This
1396          * ensures that a page full of multiple buffers only gets aged
1397          * once per pass through the lru lists. 
1398          */
1399         if (clear_bit(BH_Has_aged, &bh->b_state))
1400                 return;
1401         
1402         do {
1403                 touched |= clear_bit(BH_Touched, &tmp->b_state);
1404                 tmp = tmp->b_this_page;
1405                 set_bit(BH_Has_aged, &tmp->b_state);
1406         } while (tmp != bh);
1407         clear_bit(BH_Has_aged, &bh->b_state);
1408 
1409         if (touched) 
1410                 touch_page(mem_map + MAP_NR((unsigned long) bh->b_data));
1411         else
1412                 age_page(mem_map + MAP_NR((unsigned long) bh->b_data));
1413 }
1414 
1415 /*
1416  * Consult the load average for buffers and decide whether or not
1417  * we should shrink the buffers of one size or not.  If we decide yes,
1418  * do it and return 1.  Else return 0.  Do not attempt to shrink size
1419  * that is specified.
1420  *
1421  * I would prefer not to use a load average, but the way things are now it
1422  * seems unavoidable.  The way to get rid of it would be to force clustering
1423  * universally, so that when we reclaim buffers we always reclaim an entire
1424  * page.  Doing this would mean that we all need to move towards QMAGIC.
1425  */
1426 
1427 static int maybe_shrink_lav_buffers(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1428 {          
1429         int nlist;
1430         int isize;
1431         int total_lav, total_n_buffers, n_sizes;
1432         
1433         /* Do not consider the shared buffers since they would not tend
1434            to have getblk called very often, and this would throw off
1435            the lav.  They are not easily reclaimable anyway (let the swapper
1436            make the first move). */
1437   
1438         total_lav = total_n_buffers = n_sizes = 0;
1439         for(nlist = 0; nlist < NR_SIZES; nlist++)
1440          {
1441                  total_lav += buffers_lav[nlist];
1442                  if(nr_buffers_size[nlist]) n_sizes++;
1443                  total_n_buffers += nr_buffers_size[nlist];
1444                  total_n_buffers -= nr_buffers_st[nlist][BUF_SHARED]; 
1445          }
1446         
1447         /* See if we have an excessive number of buffers of a particular
1448            size - if so, victimize that bunch. */
1449   
1450         isize = (size ? BUFSIZE_INDEX(size) : -1);
1451         
1452         if (n_sizes > 1)
1453                  for(nlist = 0; nlist < NR_SIZES; nlist++)
1454                   {
1455                           if(nlist == isize) continue;
1456                           if(nr_buffers_size[nlist] &&
1457                              bdf_prm.b_un.lav_const * buffers_lav[nlist]*total_n_buffers < 
1458                              total_lav * (nr_buffers_size[nlist] - nr_buffers_st[nlist][BUF_SHARED]))
1459                                    if(shrink_specific_buffers(6, bufferindex_size[nlist])) 
1460                                             return 1;
1461                   }
1462         return 0;
1463 }
1464 
1465 /*
1466  * Try to free up some pages by shrinking the buffer-cache
1467  *
1468  * Priority tells the routine how hard to try to shrink the
1469  * buffers: 6 means "don't bother too much", while a value
1470  * of 0 means "we'd better get some free pages now".
1471  *
1472  * "limit" is meant to limit the shrink-action only to pages
1473  * that are in the 0 - limit address range, for DMA re-allocations.
1474  * We ignore that right now.
1475  */
1476 
1477 static int shrink_specific_buffers(unsigned int priority, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1478 {
1479         struct buffer_head *bh;
1480         int nlist;
1481         int i, isize, isize1;
1482 
1483 #ifdef DEBUG
1484         if(size) printk("Shrinking buffers of size %d\n", size);
1485 #endif
1486         /* First try the free lists, and see if we can get a complete page
1487            from here */
1488         isize1 = (size ? BUFSIZE_INDEX(size) : -1);
1489 
1490         for(isize = 0; isize<NR_SIZES; isize++){
1491                 if(isize1 != -1 && isize1 != isize) continue;
1492                 bh = free_list[isize];
1493                 if(!bh) continue;
1494                 for (i=0 ; !i || bh != free_list[isize]; bh = bh->b_next_free, i++) {
1495                         if (bh->b_count || buffer_protected(bh) ||
1496                             !bh->b_this_page)
1497                                  continue;
1498                         if (!age_of((unsigned long) bh->b_data) &&
1499                             try_to_free_buffer(bh, &bh, 6))
1500                                  return 1;
1501                         if(!bh) break;
1502                         /* Some interrupt must have used it after we
1503                            freed the page.  No big deal - keep looking */
1504                 }
1505         }
1506         
1507         /* Not enough in the free lists, now try the lru list */
1508         
1509         for(nlist = 0; nlist < NR_LIST; nlist++) {
1510         repeat1:
1511                 if(priority > 2 && nlist == BUF_SHARED) continue;
1512                 i = nr_buffers_type[nlist];
1513                 i = ((BUFFEROUT_WEIGHT * i) >> 10) >> priority;
1514                 for ( ; i > 0; i-- ) {
1515                         bh = next_to_age[nlist];
1516                         if (!bh)
1517                                 break;
1518                         next_to_age[nlist] = bh->b_next_free;
1519 
1520                         /* First, age the buffer. */
1521                         age_buffer(bh);
1522                         /* We may have stalled while waiting for I/O
1523                            to complete. */
1524                         if(bh->b_list != nlist) goto repeat1;
1525                         if (bh->b_count || buffer_protected(bh) ||
1526                             !bh->b_this_page)
1527                                  continue;
1528                         if(size && bh->b_size != size) continue;
1529                         if (buffer_locked(bh))
1530                                  if (priority)
1531                                           continue;
1532                                  else
1533                                           wait_on_buffer(bh);
1534                         if (buffer_dirty(bh)) {
1535                                 bh->b_count++;
1536                                 bh->b_flushtime = 0;
1537                                 ll_rw_block(WRITEA, 1, &bh);
1538                                 bh->b_count--;
1539                                 continue;
1540                         }
1541                         /* At priority 6, only consider really old
1542                            (age==0) buffers for reclaiming.  At
1543                            priority 0, consider any buffers. */
1544                         if ((age_of((unsigned long) bh->b_data) >>
1545                              (6-priority)) > 0)
1546                                 continue;                               
1547                         if (try_to_free_buffer(bh, &bh, 0))
1548                                  return 1;
1549                         if(!bh) break;
1550                 }
1551         }
1552         return 0;
1553 }
1554 
1555 
1556 /* ================== Debugging =================== */
1557 
1558 void show_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1559 {
1560         struct buffer_head * bh;
1561         int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
1562         int protected = 0;
1563         int shared;
1564         int nlist, isize;
1565 
1566         printk("Buffer memory:   %6dkB\n",buffermem>>10);
1567         printk("Buffer heads:    %6d\n",nr_buffer_heads);
1568         printk("Buffer blocks:   %6d\n",nr_buffers);
1569 
1570         for(nlist = 0; nlist < NR_LIST; nlist++) {
1571           shared = found = locked = dirty = used = lastused = protected = 0;
1572           bh = lru_list[nlist];
1573           if(!bh) continue;
1574           do {
1575                 found++;
1576                 if (buffer_locked(bh))
1577                         locked++;
1578                 if (buffer_protected(bh))
1579                         protected++;
1580                 if (buffer_dirty(bh))
1581                         dirty++;
1582                 if (mem_map[MAP_NR(((unsigned long) bh->b_data))].count != 1)
1583                         shared++;
1584                 if (bh->b_count)
1585                         used++, lastused = found;
1586                 bh = bh->b_next_free;
1587           } while (bh != lru_list[nlist]);
1588           printk("Buffer[%d] mem: %d buffers, %d used (last=%d), "
1589                  "%d locked, %d protected, %d dirty %d shrd\n",
1590                  nlist, found, used, lastused,
1591                  locked, protected, dirty, shared);
1592         };
1593         printk("Size    [LAV]     Free  Clean  Unshar     Lck    Lck1   Dirty  Shared \n");
1594         for(isize = 0; isize<NR_SIZES; isize++){
1595                 printk("%5d [%5d]: %7d ", bufferindex_size[isize],
1596                        buffers_lav[isize], nr_free[isize]);
1597                 for(nlist = 0; nlist < NR_LIST; nlist++)
1598                          printk("%7d ", nr_buffers_st[isize][nlist]);
1599                 printk("\n");
1600         }
1601 }
1602 
1603 
1604 /* ====================== Cluster patches for ext2 ==================== */
1605 
1606 /*
1607  * try_to_reassign() checks if all the buffers on this particular page
1608  * are unused, and reassign to a new cluster them if this is true.
1609  */
1610 static inline int try_to_reassign(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1611                            kdev_t dev, unsigned int starting_block)
1612 {
1613         unsigned long page;
1614         struct buffer_head * tmp, * p;
1615 
1616         *bhp = bh;
1617         page = (unsigned long) bh->b_data;
1618         page &= PAGE_MASK;
1619         if(mem_map[MAP_NR(page)].count != 1) return 0;
1620         tmp = bh;
1621         do {
1622                 if (!tmp)
1623                          return 0;
1624                 
1625                 if (tmp->b_count || buffer_protected(tmp) ||
1626                     buffer_dirty(tmp) || buffer_locked(tmp))
1627                          return 0;
1628                 tmp = tmp->b_this_page;
1629         } while (tmp != bh);
1630         tmp = bh;
1631         
1632         while((unsigned long) tmp->b_data & (PAGE_SIZE - 1)) 
1633                  tmp = tmp->b_this_page;
1634         
1635         /* This is the buffer at the head of the page */
1636         bh = tmp;
1637         do {
1638                 p = tmp;
1639                 tmp = tmp->b_this_page;
1640                 remove_from_queues(p);
1641                 p->b_dev = dev;
1642                 mark_buffer_uptodate(p, 0);
1643                 clear_bit(BH_Req, &p->b_state);
1644                 p->b_blocknr = starting_block++;
1645                 insert_into_queues(p);
1646         } while (tmp != bh);
1647         return 1;
1648 }
1649 
1650 /*
1651  * Try to find a free cluster by locating a page where
1652  * all of the buffers are unused.  We would like this function
1653  * to be atomic, so we do not call anything that might cause
1654  * the process to sleep.  The priority is somewhat similar to
1655  * the priority used in shrink_buffers.
1656  * 
1657  * My thinking is that the kernel should end up using whole
1658  * pages for the buffer cache as much of the time as possible.
1659  * This way the other buffers on a particular page are likely
1660  * to be very near each other on the free list, and we will not
1661  * be expiring data prematurely.  For now we only cannibalize buffers
1662  * of the same size to keep the code simpler.
1663  */
1664 static int reassign_cluster(kdev_t dev, 
     /* [previous][next][first][last][top][bottom][index][help] */
1665                      unsigned int starting_block, int size)
1666 {
1667         struct buffer_head *bh;
1668         int isize = BUFSIZE_INDEX(size);
1669         int i;
1670 
1671         /* We want to give ourselves a really good shot at generating
1672            a cluster, and since we only take buffers from the free
1673            list, we "overfill" it a little. */
1674 
1675         while(nr_free[isize] < 32) refill_freelist(size);
1676 
1677         bh = free_list[isize];
1678         if(bh)
1679                  for (i=0 ; !i || bh != free_list[isize] ; bh = bh->b_next_free, i++) {
1680                          if (!bh->b_this_page)  continue;
1681                          if (try_to_reassign(bh, &bh, dev, starting_block))
1682                                  return 4;
1683                  }
1684         return 0;
1685 }
1686 
1687 /* This function tries to generate a new cluster of buffers
1688  * from a new page in memory.  We should only do this if we have
1689  * not expanded the buffer cache to the maximum size that we allow.
1690  */
1691 static unsigned long try_to_generate_cluster(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1692 {
1693         struct buffer_head * bh, * tmp, * arr[MAX_BUF_PER_PAGE];
1694         int isize = BUFSIZE_INDEX(size);
1695         unsigned long offset;
1696         unsigned long page;
1697         int nblock;
1698 
1699         page = get_free_page(GFP_NOBUFFER);
1700         if(!page) return 0;
1701 
1702         bh = create_buffers(page, size);
1703         if (!bh) {
1704                 free_page(page);
1705                 return 0;
1706         };
1707         nblock = block;
1708         for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
1709                 if (find_buffer(dev, nblock++, size))
1710                          goto not_aligned;
1711         }
1712         tmp = bh;
1713         nblock = 0;
1714         while (1) {
1715                 arr[nblock++] = bh;
1716                 bh->b_count = 1;
1717                 bh->b_flushtime = 0;
1718                 bh->b_state = 0;
1719                 bh->b_dev = dev;
1720                 bh->b_list = BUF_CLEAN;
1721                 bh->b_blocknr = block++;
1722                 nr_buffers++;
1723                 nr_buffers_size[isize]++;
1724                 insert_into_queues(bh);
1725                 if (bh->b_this_page)
1726                         bh = bh->b_this_page;
1727                 else
1728                         break;
1729         }
1730         buffermem += PAGE_SIZE;
1731         mem_map[MAP_NR(page)].buffers = bh;
1732         bh->b_this_page = tmp;
1733         while (nblock-- > 0)
1734                 brelse(arr[nblock]);
1735         return 4; /* ?? */
1736 not_aligned:
1737         while ((tmp = bh) != NULL) {
1738                 bh = bh->b_this_page;
1739                 put_unused_buffer_head(tmp);
1740         }
1741         free_page(page);
1742         return 0;
1743 }
1744 
1745 unsigned long generate_cluster(kdev_t dev, int b[], int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1746 {
1747         int i, offset;
1748         
1749         for (i = 0, offset = 0 ; offset < PAGE_SIZE ; i++, offset += size) {
1750                 if(i && b[i]-1 != b[i-1]) return 0;  /* No need to cluster */
1751                 if(find_buffer(dev, b[i], size)) return 0;
1752         };
1753 
1754         /* OK, we have a candidate for a new cluster */
1755         
1756         /* See if one size of buffer is over-represented in the buffer cache,
1757            if so reduce the numbers of buffers */
1758         if(maybe_shrink_lav_buffers(size))
1759          {
1760                  int retval;
1761                  retval = try_to_generate_cluster(dev, b[0], size);
1762                  if(retval) return retval;
1763          };
1764         
1765         if (nr_free_pages > min_free_pages*2) 
1766                  return try_to_generate_cluster(dev, b[0], size);
1767         else
1768                  return reassign_cluster(dev, b[0], size);
1769 }
1770 
1771 
1772 /* ===================== Init ======================= */
1773 
1774 /*
1775  * This initializes the initial buffer free list.  nr_buffers_type is set
1776  * to one less the actual number of buffers, as a sop to backwards
1777  * compatibility --- the old code did this (I think unintentionally,
1778  * but I'm not sure), and programs in the ps package expect it.
1779  *                                      - TYT 8/30/92
1780  */
1781 void buffer_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1782 {
1783         int i;
1784         int isize = BUFSIZE_INDEX(BLOCK_SIZE);
1785         long memsize = MAP_NR(high_memory) << PAGE_SHIFT;
1786 
1787         if (memsize >= 4*1024*1024) {
1788                 if(memsize >= 16*1024*1024)
1789                          nr_hash = 16381;
1790                 else
1791                          nr_hash = 4093;
1792         } else {
1793                 nr_hash = 997;
1794         };
1795         
1796         hash_table = (struct buffer_head **) vmalloc(nr_hash * 
1797                                                      sizeof(struct buffer_head *));
1798 
1799 
1800         for (i = 0 ; i < nr_hash ; i++)
1801                 hash_table[i] = NULL;
1802         lru_list[BUF_CLEAN] = 0;
1803         grow_buffers(GFP_KERNEL, BLOCK_SIZE);
1804         if (!free_list[isize])
1805                 panic("VFS: Unable to initialize buffer free list!");
1806         return;
1807 }
1808 
1809 
1810 /* ====================== bdflush support =================== */
1811 
1812 /* This is a simple kernel daemon, whose job it is to provide a dynamic
1813  * response to dirty buffers.  Once this process is activated, we write back
1814  * a limited number of buffers to the disks and then go back to sleep again.
1815  */
1816 struct wait_queue * bdflush_wait = NULL;
1817 struct wait_queue * bdflush_done = NULL;
1818 
1819 static void wakeup_bdflush(int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
1820 {
1821         run_task_queue(&tq_disk);
1822         wake_up(&bdflush_wait);
1823         if(wait) sleep_on(&bdflush_done);
1824 }
1825 
1826 
1827 /* 
1828  * Here we attempt to write back old buffers.  We also try and flush inodes 
1829  * and supers as well, since this function is essentially "update", and 
1830  * otherwise there would be no way of ensuring that these quantities ever 
1831  * get written back.  Ideally, we would have a timestamp on the inodes
1832  * and superblocks so that we could write back only the old ones as well
1833  */
1834 
1835 asmlinkage int sync_old_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1836 {
1837         int i, isize;
1838         int ndirty, nwritten;
1839         int nlist;
1840         int ncount;
1841         struct buffer_head * bh, *next;
1842 
1843         sync_supers(0);
1844         sync_inodes(0);
1845 
1846         ncount = 0;
1847 #ifdef DEBUG
1848         for(nlist = 0; nlist < NR_LIST; nlist++)
1849 #else
1850         for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1851 #endif
1852         {
1853                 ndirty = 0;
1854                 nwritten = 0;
1855         repeat:
1856                 bh = lru_list[nlist];
1857                 if(bh) 
1858                          for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) {
1859                                  /* We may have stalled while waiting for I/O to complete. */
1860                                  if(bh->b_list != nlist) goto repeat;
1861                                  next = bh->b_next_free;
1862                                  if(!lru_list[nlist]) {
1863                                          printk("Dirty list empty %d\n", i);
1864                                          break;
1865                                  }
1866                                  
1867                                  /* Clean buffer on dirty list?  Refile it */
1868                                  if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
1869                                   {
1870                                           refile_buffer(bh);
1871                                           continue;
1872                                   }
1873                                  
1874                                  if (buffer_locked(bh) || !buffer_dirty(bh))
1875                                           continue;
1876                                  ndirty++;
1877                                  if(bh->b_flushtime > jiffies) continue;
1878                                  nwritten++;
1879                                  bh->b_count++;
1880                                  bh->b_flushtime = 0;
1881 #ifdef DEBUG
1882                                  if(nlist != BUF_DIRTY) ncount++;
1883 #endif
1884                                  ll_rw_block(WRITE, 1, &bh);
1885                                  bh->b_count--;
1886                          }
1887         }
1888 #ifdef DEBUG
1889         if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount);
1890         printk("Wrote %d/%d buffers\n", nwritten, ndirty);
1891 #endif
1892         
1893         /* We assume that we only come through here on a regular
1894            schedule, like every 5 seconds.  Now update load averages.  
1895            Shift usage counts to prevent overflow. */
1896         for(isize = 0; isize<NR_SIZES; isize++){
1897                 CALC_LOAD(buffers_lav[isize], bdf_prm.b_un.lav_const, buffer_usage[isize]);
1898                 buffer_usage[isize] = 0;
1899         };
1900         return 0;
1901 }
1902 
1903 
1904 /* This is the interface to bdflush.  As we get more sophisticated, we can
1905  * pass tuning parameters to this "process", to adjust how it behaves. 
1906  * We would want to verify each parameter, however, to make sure that it 
1907  * is reasonable. */
1908 
1909 asmlinkage int sys_bdflush(int func, long data)
     /* [previous][next][first][last][top][bottom][index][help] */
1910 {
1911         int i, error;
1912 
1913         if (!suser())
1914                 return -EPERM;
1915 
1916         if (func == 1)
1917                  return sync_old_buffers();
1918 
1919         /* Basically func 1 means read param 1, 2 means write param 1, etc */
1920         if (func >= 2) {
1921                 i = (func-2) >> 1;
1922                 if (i < 0 || i >= N_PARAM)
1923                         return -EINVAL;
1924                 if((func & 1) == 0) {
1925                         error = verify_area(VERIFY_WRITE, (void *) data, sizeof(int));
1926                         if (error)
1927                                 return error;
1928                         put_user(bdf_prm.data[i], (int*)data);
1929                         return 0;
1930                 };
1931                 if (data < bdflush_min[i] || data > bdflush_max[i])
1932                         return -EINVAL;
1933                 bdf_prm.data[i] = data;
1934                 return 0;
1935         }
1936 
1937         /* Having func 0 used to launch the actual bdflush and then never
1938         return (unless explicitly killed). We return zero here to 
1939         remain semi-compatible with present update(8) programs. */
1940 
1941         return 0;
1942 }
1943 
1944 /* This is the actual bdflush daemon itself. It used to be started from
1945  * the syscall above, but now we launch it ourselves internally with
1946  * kernel_thread(...)  directly after the first thread in init/main.c */
1947 
1948 int bdflush(void * unused) 
     /* [previous][next][first][last][top][bottom][index][help] */
1949 {
1950         int i;
1951         int ndirty;
1952         int nlist;
1953         int ncount;
1954         struct buffer_head * bh, *next;
1955 
1956         /*
1957          *      We have a bare-bones task_struct, and really should fill
1958          *      in a few more things so "top" and /proc/2/{exe,root,cwd}
1959          *      display semi-sane things. Not real crucial though...  
1960          */
1961 
1962         current->session = 1;
1963         current->pgrp = 1;
1964         sprintf(current->comm, "kflushd");
1965 
1966         /*
1967          *      As a kernel thread we want to tamper with system buffers
1968          *      and other internals and thus be subject to the SMP locking
1969          *      rules. (On a uniprocessor box this does nothing).
1970          */
1971          
1972 #ifdef __SMP__
1973         lock_kernel();
1974         syscall_count++;
1975 #endif
1976                  
1977         for (;;) {
1978 #ifdef DEBUG
1979                 printk("bdflush() activated...");
1980 #endif
1981                 
1982                 ncount = 0;
1983 #ifdef DEBUG
1984                 for(nlist = 0; nlist < NR_LIST; nlist++)
1985 #else
1986                 for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1987 #endif
1988                  {
1989                          ndirty = 0;
1990                  repeat:
1991                          bh = lru_list[nlist];
1992                          if(bh) 
1993                                   for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty; 
1994                                        bh = next) {
1995                                           /* We may have stalled while waiting for I/O to complete. */
1996                                           if(bh->b_list != nlist) goto repeat;
1997                                           next = bh->b_next_free;
1998                                           if(!lru_list[nlist]) {
1999                                                   printk("Dirty list empty %d\n", i);
2000                                                   break;
2001                                           }
2002                                           
2003                                           /* Clean buffer on dirty list?  Refile it */
2004                                           if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
2005                                            {
2006                                                    refile_buffer(bh);
2007                                                    continue;
2008                                            }
2009                                           
2010                                           if (buffer_locked(bh) || !buffer_dirty(bh))
2011                                                    continue;
2012                                           /* Should we write back buffers that are shared or not??
2013                                              currently dirty buffers are not shared, so it does not matter */
2014                                           bh->b_count++;
2015                                           ndirty++;
2016                                           bh->b_flushtime = 0;
2017                                           ll_rw_block(WRITE, 1, &bh);
2018 #ifdef DEBUG
2019                                           if(nlist != BUF_DIRTY) ncount++;
2020 #endif
2021                                           bh->b_count--;
2022                                   }
2023                  }
2024 #ifdef DEBUG
2025                 if (ncount) printk("sys_bdflush: %d dirty buffers not on dirty list\n", ncount);
2026                 printk("sleeping again.\n");
2027 #endif
2028                 run_task_queue(&tq_disk);
2029                 wake_up(&bdflush_done);
2030                 
2031                 /* If there are still a lot of dirty buffers around, skip the sleep
2032                    and flush some more */
2033                 
2034                 if(nr_buffers_type[BUF_DIRTY] <= (nr_buffers - nr_buffers_type[BUF_SHARED]) * 
2035                    bdf_prm.b_un.nfract/100) {
2036                         current->signal = 0;
2037                         interruptible_sleep_on(&bdflush_wait);
2038                 }
2039         }
2040 }
2041 
2042 
2043 /*
2044  * Overrides for Emacs so that we follow Linus's tabbing style.
2045  * Emacs will notice this stuff at the end of the file and automatically
2046  * adjust the settings for this buffer only.  This must remain at the end
2047  * of the file.
2048  * ---------------------------------------------------------------------------
2049  * Local variables:
2050  * c-indent-level: 8
2051  * c-brace-imaginary-offset: 0
2052  * c-brace-offset: -8
2053  * c-argdecl-indent: 8
2054  * c-label-offset: -8
2055  * c-continued-statement-offset: 8
2056  * c-continued-brace-offset: 0
2057  * End:
2058  */

/* [previous][next][first][last][top][bottom][index][help] */