root/fs/buffer.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. __wait_on_buffer
  2. sync_buffers
  3. sync_dev
  4. fsync_dev
  5. sys_sync
  6. file_fsync
  7. sys_fsync
  8. invalidate_buffers
  9. remove_from_hash_queue
  10. remove_from_lru_list
  11. remove_from_free_list
  12. remove_from_queues
  13. put_last_lru
  14. put_last_free
  15. insert_into_queues
  16. find_buffer
  17. get_hash_table
  18. set_blocksize
  19. refill_freelist
  20. getblk
  21. set_writetime
  22. refile_buffer
  23. __brelse
  24. __bforget
  25. bread
  26. breada
  27. put_unused_buffer_head
  28. get_more_buffer_heads
  29. get_unused_buffer_head
  30. create_buffers
  31. read_buffers
  32. try_to_align
  33. check_aligned
  34. try_to_load_aligned
  35. try_to_share_buffers
  36. bread_page
  37. bwrite_page
  38. grow_buffers
  39. try_to_free
  40. age_buffer
  41. maybe_shrink_lav_buffers
  42. shrink_buffers
  43. shrink_specific_buffers
  44. show_buffers
  45. try_to_reassign
  46. reassign_cluster
  47. try_to_generate_cluster
  48. generate_cluster
  49. buffer_init
  50. wakeup_bdflush
  51. sync_old_buffers
  52. sys_bdflush

   1 /*
   2  *  linux/fs/buffer.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  *  'buffer.c' implements the buffer-cache functions. Race-conditions have
   9  * been avoided by NEVER letting an interrupt change a buffer (except for the
  10  * data, of course), but instead letting the caller do it.
  11  */
  12 
  13 /*
  14  * NOTE! There is one discordant note here: checking floppies for
  15  * disk change. This is where it fits best, I think, as it should
  16  * invalidate changed floppy-disk-caches.
  17  */
  18  
  19 #include <linux/sched.h>
  20 #include <linux/kernel.h>
  21 #include <linux/major.h>
  22 #include <linux/string.h>
  23 #include <linux/locks.h>
  24 #include <linux/errno.h>
  25 #include <linux/malloc.h>
  26 #include <linux/swapctl.h>
  27 
  28 #include <asm/system.h>
  29 #include <asm/segment.h>
  30 #include <asm/io.h>
  31 
  32 #define NR_SIZES 4
  33 static char buffersize_index[9] = {-1,  0,  1, -1,  2, -1, -1, -1, 3};
  34 static short int bufferindex_size[NR_SIZES] = {512, 1024, 2048, 4096};
  35 
  36 #define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9])
  37 #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
  38 
  39 static int grow_buffers(int pri, int size);
  40 static int shrink_specific_buffers(unsigned int priority, int size);
  41 static int maybe_shrink_lav_buffers(int);
  42 
  43 static int nr_hash = 0;  /* Size of hash table */
  44 static struct buffer_head ** hash_table;
  45 struct buffer_head ** buffer_pages;
  46 static struct buffer_head * lru_list[NR_LIST] = {NULL, };
  47 /* next_to_age is an array of pointers into the lru lists, used to
  48    cycle through the buffers aging their contents when deciding which
  49    buffers to discard when more memory is needed */
  50 static struct buffer_head * next_to_age[NR_LIST] = {NULL, };
  51 static struct buffer_head * free_list[NR_SIZES] = {NULL, };
  52 static struct buffer_head * unused_list = NULL;
  53 static struct wait_queue * buffer_wait = NULL;
  54 
  55 int nr_buffers = 0;
  56 int nr_buffers_type[NR_LIST] = {0,};
  57 int nr_buffers_size[NR_SIZES] = {0,};
  58 int nr_buffers_st[NR_SIZES][NR_LIST] = {{0,},};
  59 int buffer_usage[NR_SIZES] = {0,};  /* Usage counts used to determine load average */
  60 int buffers_lav[NR_SIZES] = {0,};  /* Load average of buffer usage */
  61 int nr_free[NR_SIZES] = {0,};
  62 int buffermem = 0;
  63 int nr_buffer_heads = 0;
  64 extern int *blksize_size[];
  65 
  66 /* Here is the parameter block for the bdflush process. */
  67 static void wakeup_bdflush(int);
  68 
  69 #define N_PARAM 9
  70 #define LAV
  71 
  72 static union bdflush_param{
  73         struct {
  74                 int nfract;  /* Percentage of buffer cache dirty to 
  75                                 activate bdflush */
  76                 int ndirty;  /* Maximum number of dirty blocks to write out per
  77                                 wake-cycle */
  78                 int nrefill; /* Number of clean buffers to try and obtain
  79                                 each time we call refill */
  80                 int nref_dirt; /* Dirty buffer threshold for activating bdflush
  81                                   when trying to refill buffers. */
  82                 int clu_nfract;  /* Percentage of buffer cache to scan to 
  83                                     search for free clusters */
  84                 int age_buffer;  /* Time for normal buffer to age before 
  85                                     we flush it */
  86                 int age_super;  /* Time for superblock to age before we 
  87                                    flush it */
  88                 int lav_const;  /* Constant used for load average (time
  89                                    constant */
  90                 int lav_ratio;  /* Used to determine how low a lav for a
  91                                    particular size can go before we start to
  92                                    trim back the buffers */
  93         } b_un;
  94         unsigned int data[N_PARAM];
  95 } bdf_prm = {{25, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}};
  96 
  97 /* The lav constant is set for 1 minute, as long as the update process runs
  98    every 5 seconds.  If you change the frequency of update, the time
  99    constant will also change. */
 100 
 101 
 102 /* These are the min and max parameter values that we will allow to be assigned */
 103 static int bdflush_min[N_PARAM] = {  0,  10,    5,   25,  0,   100,   100, 1, 1};
 104 static int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 60000, 60000, 2047, 5};
 105 
 106 /*
 107  * Rewrote the wait-routines to use the "new" wait-queue functionality,
 108  * and getting rid of the cli-sti pairs. The wait-queue routines still
 109  * need cli-sti, but now it's just a couple of 386 instructions or so.
 110  *
 111  * Note that the real wait_on_buffer() is an inline function that checks
 112  * if 'b_wait' is set before calling this, so that the queues aren't set
 113  * up unnecessarily.
 114  */
 115 void __wait_on_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 116 {
 117         struct wait_queue wait = { current, NULL };
 118 
 119         bh->b_count++;
 120         add_wait_queue(&bh->b_wait, &wait);
 121 repeat:
 122         current->state = TASK_UNINTERRUPTIBLE;
 123         if (buffer_locked(bh)) {
 124                 schedule();
 125                 goto repeat;
 126         }
 127         remove_wait_queue(&bh->b_wait, &wait);
 128         bh->b_count--;
 129         current->state = TASK_RUNNING;
 130 }
 131 
 132 /* Call sync_buffers with wait!=0 to ensure that the call does not
 133    return until all buffer writes have completed.  Sync() may return
 134    before the writes have finished; fsync() may not. */
 135 
 136 
 137 /* Godamity-damn.  Some buffers (bitmaps for filesystems)
 138    spontaneously dirty themselves without ever brelse being called.
 139    We will ultimately want to put these in a separate list, but for
 140    now we search all of the lists for dirty buffers */
 141 
 142 static int sync_buffers(kdev_t dev, int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 143 {
 144         int i, retry, pass = 0, err = 0;
 145         int nlist, ncount;
 146         struct buffer_head * bh, *next;
 147 
 148         /* One pass for no-wait, three for wait:
 149            0) write out all dirty, unlocked buffers;
 150            1) write out all dirty buffers, waiting if locked;
 151            2) wait for completion by waiting for all buffers to unlock. */
 152  repeat:
 153         retry = 0;
 154  repeat2:
 155         ncount = 0;
 156         /* We search all lists as a failsafe mechanism, not because we expect
 157            there to be dirty buffers on any of the other lists. */
 158         for(nlist = 0; nlist < NR_LIST; nlist++)
 159          {
 160          repeat1:
 161                  bh = lru_list[nlist];
 162                  if(!bh) continue;
 163                  for (i = nr_buffers_type[nlist]*2 ; i-- > 0 ; bh = next) {
 164                          if(bh->b_list != nlist) goto repeat1;
 165                          next = bh->b_next_free;
 166                          if(!lru_list[nlist]) break;
 167                          if (dev && bh->b_dev != dev)
 168                                   continue;
 169                          if (buffer_locked(bh))
 170                           {
 171                                   /* Buffer is locked; skip it unless wait is
 172                                      requested AND pass > 0. */
 173                                   if (!wait || !pass) {
 174                                           retry = 1;
 175                                           continue;
 176                                   }
 177                                   wait_on_buffer (bh);
 178                                   goto repeat2;
 179                           }
 180                          /* If an unlocked buffer is not uptodate, there has
 181                              been an IO error. Skip it. */
 182                          if (wait && buffer_req(bh) && !buffer_locked(bh) &&
 183                              !buffer_dirty(bh) && !buffer_uptodate(bh)) {
 184                                   err = 1;
 185                                   continue;
 186                           }
 187                          /* Don't write clean buffers.  Don't write ANY buffers
 188                             on the third pass. */
 189                          if (!buffer_dirty(bh) || pass>=2)
 190                                   continue;
 191                          /* don't bother about locked buffers */
 192                          if (buffer_locked(bh))
 193                                  continue;
 194                          bh->b_count++;
 195                          bh->b_flushtime = 0;
 196                          ll_rw_block(WRITE, 1, &bh);
 197 
 198                          if(nlist != BUF_DIRTY) { 
 199                                  printk("[%d %s %ld] ", nlist,
 200                                         kdevname(bh->b_dev), bh->b_blocknr);
 201                                  ncount++;
 202                          };
 203                          bh->b_count--;
 204                          retry = 1;
 205                  }
 206          }
 207         if (ncount)
 208           printk("sys_sync: %d dirty buffers not on dirty list\n", ncount);
 209         
 210         /* If we are waiting for the sync to succeed, and if any dirty
 211            blocks were written, then repeat; on the second pass, only
 212            wait for buffers being written (do not pass to write any
 213            more buffers on the second pass). */
 214         if (wait && retry && ++pass<=2)
 215                  goto repeat;
 216         return err;
 217 }
 218 
 219 void sync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 220 {
 221         sync_buffers(dev, 0);
 222         sync_supers(dev);
 223         sync_inodes(dev);
 224         sync_buffers(dev, 0);
 225         sync_dquots(dev, -1);
 226 }
 227 
 228 int fsync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 229 {
 230         sync_buffers(dev, 0);
 231         sync_supers(dev);
 232         sync_inodes(dev);
 233         sync_dquots(dev, -1);
 234         return sync_buffers(dev, 1);
 235 }
 236 
 237 asmlinkage int sys_sync(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 238 {
 239         fsync_dev(0);
 240         return 0;
 241 }
 242 
 243 int file_fsync (struct inode *inode, struct file *filp)
     /* [previous][next][first][last][top][bottom][index][help] */
 244 {
 245         return fsync_dev(inode->i_dev);
 246 }
 247 
 248 asmlinkage int sys_fsync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 249 {
 250         struct file * file;
 251         struct inode * inode;
 252 
 253         if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
 254                 return -EBADF;
 255         if (!file->f_op || !file->f_op->fsync)
 256                 return -EINVAL;
 257         if (file->f_op->fsync(inode,file))
 258                 return -EIO;
 259         return 0;
 260 }
 261 
 262 void invalidate_buffers(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 263 {
 264         int i;
 265         int nlist;
 266         struct buffer_head * bh;
 267 
 268         for(nlist = 0; nlist < NR_LIST; nlist++) {
 269                 bh = lru_list[nlist];
 270                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bh->b_next_free) {
 271                         if (bh->b_dev != dev)
 272                                 continue;
 273                         wait_on_buffer(bh);
 274                         if (bh->b_dev != dev)
 275                                 continue;
 276                         if (bh->b_count)
 277                                 continue;
 278                         bh->b_flushtime = 0;
 279                         clear_bit(BH_Uptodate, &bh->b_state);
 280                         clear_bit(BH_Dirty, &bh->b_state);
 281                         clear_bit(BH_Req, &bh->b_state);
 282                 }
 283         }
 284 }
 285 
 286 #define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block))%nr_hash)
 287 #define hash(dev,block) hash_table[_hashfn(dev,block)]
 288 
 289 static inline void remove_from_hash_queue(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 290 {
 291         if (bh->b_next)
 292                 bh->b_next->b_prev = bh->b_prev;
 293         if (bh->b_prev)
 294                 bh->b_prev->b_next = bh->b_next;
 295         if (hash(bh->b_dev,bh->b_blocknr) == bh)
 296                 hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
 297         bh->b_next = bh->b_prev = NULL;
 298 }
 299 
 300 static inline void remove_from_lru_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 301 {
 302         if (!(bh->b_prev_free) || !(bh->b_next_free))
 303                 panic("VFS: LRU block list corrupted");
 304         if (bh->b_dev == B_FREE)
 305                 panic("LRU list corrupted");
 306         bh->b_prev_free->b_next_free = bh->b_next_free;
 307         bh->b_next_free->b_prev_free = bh->b_prev_free;
 308 
 309         if (lru_list[bh->b_list] == bh)
 310                  lru_list[bh->b_list] = bh->b_next_free;
 311         if (lru_list[bh->b_list] == bh)
 312                  lru_list[bh->b_list] = NULL;
 313         if (next_to_age[bh->b_list] == bh)
 314                 next_to_age[bh->b_list] = bh->b_next_free;
 315         if (next_to_age[bh->b_list] == bh)
 316                 next_to_age[bh->b_list] = NULL;
 317 
 318         bh->b_next_free = bh->b_prev_free = NULL;
 319 }
 320 
 321 static inline void remove_from_free_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 322 {
 323         int isize = BUFSIZE_INDEX(bh->b_size);
 324         if (!(bh->b_prev_free) || !(bh->b_next_free))
 325                 panic("VFS: Free block list corrupted");
 326         if(bh->b_dev != B_FREE)
 327                 panic("Free list corrupted");
 328         if(!free_list[isize])
 329                 panic("Free list empty");
 330         nr_free[isize]--;
 331         if(bh->b_next_free == bh)
 332                  free_list[isize] = NULL;
 333         else {
 334                 bh->b_prev_free->b_next_free = bh->b_next_free;
 335                 bh->b_next_free->b_prev_free = bh->b_prev_free;
 336                 if (free_list[isize] == bh)
 337                          free_list[isize] = bh->b_next_free;
 338         };
 339         bh->b_next_free = bh->b_prev_free = NULL;
 340 }
 341 
 342 static inline void remove_from_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 343 {
 344         if(bh->b_dev == B_FREE) {
 345                 remove_from_free_list(bh); /* Free list entries should not be
 346                                               in the hash queue */
 347                 return;
 348         };
 349         nr_buffers_type[bh->b_list]--;
 350         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]--;
 351         remove_from_hash_queue(bh);
 352         remove_from_lru_list(bh);
 353 }
 354 
 355 static inline void put_last_lru(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 356 {
 357         if (!bh)
 358                 return;
 359         if (bh == lru_list[bh->b_list]) {
 360                 lru_list[bh->b_list] = bh->b_next_free;
 361                 if (next_to_age[bh->b_list] == bh)
 362                         next_to_age[bh->b_list] = bh->b_next_free;
 363                 return;
 364         }
 365         if(bh->b_dev == B_FREE)
 366                 panic("Wrong block for lru list");
 367         remove_from_lru_list(bh);
 368 /* add to back of free list */
 369 
 370         if(!lru_list[bh->b_list]) {
 371                 lru_list[bh->b_list] = bh;
 372                 lru_list[bh->b_list]->b_prev_free = bh;
 373         };
 374         if (!next_to_age[bh->b_list])
 375                 next_to_age[bh->b_list] = bh;
 376 
 377         bh->b_next_free = lru_list[bh->b_list];
 378         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 379         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 380         lru_list[bh->b_list]->b_prev_free = bh;
 381 }
 382 
 383 static inline void put_last_free(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 384 {
 385         int isize;
 386         if (!bh)
 387                 return;
 388 
 389         isize = BUFSIZE_INDEX(bh->b_size);      
 390         bh->b_dev = B_FREE;  /* So it is obvious we are on the free list */
 391         /* add to back of free list */
 392         if(!free_list[isize]) {
 393                 free_list[isize] = bh;
 394                 bh->b_prev_free = bh;
 395         };
 396 
 397         nr_free[isize]++;
 398         bh->b_next_free = free_list[isize];
 399         bh->b_prev_free = free_list[isize]->b_prev_free;
 400         free_list[isize]->b_prev_free->b_next_free = bh;
 401         free_list[isize]->b_prev_free = bh;
 402 }
 403 
 404 static inline void insert_into_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 405 {
 406         /* put at end of free list */
 407         if(bh->b_dev == B_FREE) {
 408                 put_last_free(bh);
 409                 return;
 410         }
 411         if(!lru_list[bh->b_list]) {
 412                 lru_list[bh->b_list] = bh;
 413                 bh->b_prev_free = bh;
 414         }
 415         if (!next_to_age[bh->b_list])
 416                 next_to_age[bh->b_list] = bh;
 417         if (bh->b_next_free) panic("VFS: buffer LRU pointers corrupted");
 418         bh->b_next_free = lru_list[bh->b_list];
 419         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 420         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 421         lru_list[bh->b_list]->b_prev_free = bh;
 422         nr_buffers_type[bh->b_list]++;
 423         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]++;
 424 /* put the buffer in new hash-queue if it has a device */
 425         bh->b_prev = NULL;
 426         bh->b_next = NULL;
 427         if (!(bh->b_dev))
 428                 return;
 429         bh->b_next = hash(bh->b_dev,bh->b_blocknr);
 430         hash(bh->b_dev,bh->b_blocknr) = bh;
 431         if (bh->b_next)
 432                 bh->b_next->b_prev = bh;
 433 }
 434 
 435 static inline struct buffer_head * find_buffer(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 436 {               
 437         struct buffer_head * tmp;
 438 
 439         for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
 440                 if (tmp->b_dev == dev && tmp->b_blocknr == block)
 441                         if (tmp->b_size == size)
 442                                 return tmp;
 443                         else {
 444                                 printk("VFS: Wrong blocksize on device %s\n",
 445                                         kdevname(dev));
 446                                 return NULL;
 447                         }
 448         return NULL;
 449 }
 450 
 451 /*
 452  * Why like this, I hear you say... The reason is race-conditions.
 453  * As we don't lock buffers (unless we are reading them, that is),
 454  * something might happen to it while we sleep (ie a read-error
 455  * will force it bad). This shouldn't really happen currently, but
 456  * the code is ready.
 457  */
 458 struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 459 {
 460         struct buffer_head * bh;
 461 
 462         for (;;) {
 463                 if (!(bh=find_buffer(dev,block,size)))
 464                         return NULL;
 465                 bh->b_count++;
 466                 wait_on_buffer(bh);
 467                 if (bh->b_dev == dev && bh->b_blocknr == block
 468                                              && bh->b_size == size)
 469                         return bh;
 470                 bh->b_count--;
 471         }
 472 }
 473 
 474 void set_blocksize(kdev_t dev, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 475 {
 476         int i, nlist;
 477         struct buffer_head * bh, *bhnext;
 478 
 479         if (!blksize_size[MAJOR(dev)])
 480                 return;
 481 
 482         switch(size) {
 483                 default: panic("Invalid blocksize passed to set_blocksize");
 484                 case 512: case 1024: case 2048: case 4096:;
 485         }
 486 
 487         if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
 488                 blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 489                 return;
 490         }
 491         if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
 492                 return;
 493         sync_buffers(dev, 2);
 494         blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 495 
 496   /* We need to be quite careful how we do this - we are moving entries
 497      around on the free list, and we can get in a loop if we are not careful.*/
 498 
 499         for(nlist = 0; nlist < NR_LIST; nlist++) {
 500                 bh = lru_list[nlist];
 501                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) {
 502                         if(!bh) break;
 503                         bhnext = bh->b_next_free; 
 504                         if (bh->b_dev != dev)
 505                                  continue;
 506                         if (bh->b_size == size)
 507                                  continue;
 508                         
 509                         wait_on_buffer(bh);
 510                         if (bh->b_dev == dev && bh->b_size != size) {
 511                                 clear_bit(BH_Dirty, &bh->b_state);
 512                                 clear_bit(BH_Uptodate, &bh->b_state);
 513                                 clear_bit(BH_Req, &bh->b_state);
 514                                 bh->b_flushtime = 0;
 515                         }
 516                         remove_from_hash_queue(bh);
 517                 }
 518         }
 519 }
 520 
 521 #define BADNESS(bh) (buffer_dirty(bh) || buffer_locked(bh))
 522 
 523 void refill_freelist(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 524 {
 525         struct buffer_head * bh, * tmp;
 526         struct buffer_head * candidate[NR_LIST];
 527         unsigned int best_time, winner;
 528         int isize = BUFSIZE_INDEX(size);
 529         int buffers[NR_LIST];
 530         int i;
 531         int needed;
 532 
 533         /* First see if we even need this.  Sometimes it is advantageous
 534          to request some blocks in a filesystem that we know that we will
 535          be needing ahead of time. */
 536 
 537         if (nr_free[isize] > 100)
 538                 return;
 539 
 540         /* If there are too many dirty buffers, we wake up the update process
 541            now so as to ensure that there are still clean buffers available
 542            for user processes to use (and dirty) */
 543         
 544         /* We are going to try and locate this much memory */
 545         needed =bdf_prm.b_un.nrefill * size;  
 546 
 547         while (nr_free_pages > min_free_pages*2 && needed > 0 &&
 548                grow_buffers(GFP_BUFFER, size)) {
 549                 needed -= PAGE_SIZE;
 550         }
 551 
 552         if(needed <= 0) return;
 553 
 554         /* See if there are too many buffers of a different size.
 555            If so, victimize them */
 556 
 557         while(maybe_shrink_lav_buffers(size))
 558          {
 559                  if(!grow_buffers(GFP_BUFFER, size)) break;
 560                  needed -= PAGE_SIZE;
 561                  if(needed <= 0) return;
 562          };
 563 
 564         /* OK, we cannot grow the buffer cache, now try and get some
 565            from the lru list */
 566 
 567         /* First set the candidate pointers to usable buffers.  This
 568            should be quick nearly all of the time. */
 569 
 570 repeat0:
 571         for(i=0; i<NR_LIST; i++){
 572                 if(i == BUF_DIRTY || i == BUF_SHARED || 
 573                    nr_buffers_type[i] == 0) {
 574                         candidate[i] = NULL;
 575                         buffers[i] = 0;
 576                         continue;
 577                 }
 578                 buffers[i] = nr_buffers_type[i];
 579                 for (bh = lru_list[i]; buffers[i] > 0; bh = tmp, buffers[i]--)
 580                  {
 581                          if(buffers[i] < 0) panic("Here is the problem");
 582                          tmp = bh->b_next_free;
 583                          if (!bh) break;
 584                          
 585                          if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 586                              buffer_dirty(bh)) {
 587                                  refile_buffer(bh);
 588                                  continue;
 589                          }
 590                          
 591                          if (bh->b_count || buffer_protected(bh) || bh->b_size != size)
 592                                   continue;
 593                          
 594                          /* Buffers are written in the order they are placed 
 595                             on the locked list. If we encounter a locked
 596                             buffer here, this means that the rest of them
 597                             are also locked */
 598                          if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 599                                  buffers[i] = 0;
 600                                  break;
 601                          }
 602                          
 603                          if (BADNESS(bh)) continue;
 604                          break;
 605                  };
 606                 if(!buffers[i]) candidate[i] = NULL; /* Nothing on this list */
 607                 else candidate[i] = bh;
 608                 if(candidate[i] && candidate[i]->b_count) panic("Here is the problem");
 609         }
 610         
 611  repeat:
 612         if(needed <= 0) return;
 613         
 614         /* Now see which candidate wins the election */
 615         
 616         winner = best_time = UINT_MAX;  
 617         for(i=0; i<NR_LIST; i++){
 618                 if(!candidate[i]) continue;
 619                 if(candidate[i]->b_lru_time < best_time){
 620                         best_time = candidate[i]->b_lru_time;
 621                         winner = i;
 622                 }
 623         }
 624         
 625         /* If we have a winner, use it, and then get a new candidate from that list */
 626         if(winner != UINT_MAX) {
 627                 i = winner;
 628                 bh = candidate[i];
 629                 candidate[i] = bh->b_next_free;
 630                 if(candidate[i] == bh) candidate[i] = NULL;  /* Got last one */
 631                 if (bh->b_count || bh->b_size != size)
 632                          panic("Busy buffer in candidate list\n");
 633                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1)
 634                          panic("Shared buffer in candidate list\n");
 635                 if (buffer_protected(bh))
 636                         panic("Protected buffer in candidate list\n");
 637                 if (BADNESS(bh)) panic("Buffer in candidate list with BADNESS != 0\n");
 638                 
 639                 if(bh->b_dev == B_FREE)
 640                         panic("Wrong list");
 641                 remove_from_queues(bh);
 642                 bh->b_dev = B_FREE;
 643                 put_last_free(bh);
 644                 needed -= bh->b_size;
 645                 buffers[i]--;
 646                 if(buffers[i] < 0) panic("Here is the problem");
 647                 
 648                 if(buffers[i] == 0) candidate[i] = NULL;
 649                 
 650                 /* Now all we need to do is advance the candidate pointer
 651                    from the winner list to the next usable buffer */
 652                 if(candidate[i] && buffers[i] > 0){
 653                         if(buffers[i] <= 0) panic("Here is another problem");
 654                         for (bh = candidate[i]; buffers[i] > 0; bh = tmp, buffers[i]--) {
 655                                 if(buffers[i] < 0) panic("Here is the problem");
 656                                 tmp = bh->b_next_free;
 657                                 if (!bh) break;
 658                                 
 659                                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 660                                     buffer_dirty(bh)) {
 661                                         refile_buffer(bh);
 662                                         continue;
 663                                 };
 664                                 
 665                                 if (bh->b_count || bh->b_size != size)
 666                                          continue;
 667                                 
 668                                 /* Buffers are written in the order they are
 669                                    placed on the locked list.  If we encounter
 670                                    a locked buffer here, this means that the
 671                                    rest of them are also locked */
 672                                 if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 673                                         buffers[i] = 0;
 674                                         break;
 675                                 }
 676               
 677                                 if (BADNESS(bh)) continue;
 678                                 break;
 679                         };
 680                         if(!buffers[i]) candidate[i] = NULL; /* Nothing here */
 681                         else candidate[i] = bh;
 682                         if(candidate[i] && candidate[i]->b_count) 
 683                                  panic("Here is the problem");
 684                 }
 685                 
 686                 goto repeat;
 687         }
 688         
 689         if(needed <= 0) return;
 690         
 691         /* Too bad, that was not enough. Try a little harder to grow some. */
 692         
 693         if (nr_free_pages > min_free_pages + 5) {
 694                 if (grow_buffers(GFP_BUFFER, size)) {
 695                         needed -= PAGE_SIZE;
 696                         goto repeat0;
 697                 };
 698         }
 699         
 700         /* and repeat until we find something good */
 701         if (!grow_buffers(GFP_ATOMIC, size))
 702                 wakeup_bdflush(1);
 703         needed -= PAGE_SIZE;
 704         goto repeat0;
 705 }
 706 
 707 /*
 708  * Ok, this is getblk, and it isn't very clear, again to hinder
 709  * race-conditions. Most of the code is seldom used, (ie repeating),
 710  * so it should be much more efficient than it looks.
 711  *
 712  * The algorithm is changed: hopefully better, and an elusive bug removed.
 713  *
 714  * 14.02.92: changed it to sync dirty buffers a bit: better performance
 715  * when the filesystem starts to get full of dirty blocks (I hope).
 716  */
 717 struct buffer_head * getblk(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 718 {
 719         struct buffer_head * bh;
 720         int isize = BUFSIZE_INDEX(size);
 721 
 722         /* Update this for the buffer size lav. */
 723         buffer_usage[isize]++;
 724 
 725         /* If there are too many dirty buffers, we wake up the update process
 726            now so as to ensure that there are still clean buffers available
 727            for user processes to use (and dirty) */
 728 repeat:
 729         bh = get_hash_table(dev, block, size);
 730         if (bh) {
 731                 if (!buffer_dirty(bh)) {
 732                         if (buffer_uptodate(bh))
 733                                  put_last_lru(bh);
 734                         bh->b_flushtime = 0;
 735                 }
 736                 set_bit(BH_Touched, &bh->b_state);
 737                 return bh;
 738         }
 739 
 740         while(!free_list[isize]) refill_freelist(size);
 741         
 742         if (find_buffer(dev,block,size))
 743                  goto repeat;
 744 
 745         bh = free_list[isize];
 746         remove_from_free_list(bh);
 747 
 748 /* OK, FINALLY we know that this buffer is the only one of its kind, */
 749 /* and that it's unused (b_count=0), unlocked (buffer_locked=0), and clean */
 750         bh->b_count=1;
 751         bh->b_flushtime=0;
 752         bh->b_state=(1<<BH_Touched);
 753         bh->b_dev=dev;
 754         bh->b_blocknr=block;
 755         insert_into_queues(bh);
 756         return bh;
 757 }
 758 
 759 void set_writetime(struct buffer_head * buf, int flag)
     /* [previous][next][first][last][top][bottom][index][help] */
 760 {
 761         int newtime;
 762 
 763         if (buffer_dirty(buf)) {
 764                 /* Move buffer to dirty list if jiffies is clear */
 765                 newtime = jiffies + (flag ? bdf_prm.b_un.age_super : 
 766                                      bdf_prm.b_un.age_buffer);
 767                 if(!buf->b_flushtime || buf->b_flushtime > newtime)
 768                          buf->b_flushtime = newtime;
 769         } else {
 770                 buf->b_flushtime = 0;
 771         }
 772 }
 773 
 774 
 775 void refile_buffer(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 776 {
 777         int dispose;
 778 
 779         if(buf->b_dev == B_FREE) {
 780                 printk("Attempt to refile free buffer\n");
 781                 return;
 782         }
 783         if (buffer_dirty(buf))
 784                 dispose = BUF_DIRTY;
 785         else if ((mem_map[MAP_NR((unsigned long) buf->b_data)].count > 1) || buffer_protected(buf))
 786                 dispose = BUF_SHARED;
 787         else if (buffer_locked(buf))
 788                 dispose = BUF_LOCKED;
 789         else if (buf->b_list == BUF_SHARED)
 790                 dispose = BUF_UNSHARED;
 791         else
 792                 dispose = BUF_CLEAN;
 793         if(dispose == BUF_CLEAN) buf->b_lru_time = jiffies;
 794         if(dispose != buf->b_list)  {
 795                 if(dispose == BUF_DIRTY || dispose == BUF_UNSHARED)
 796                          buf->b_lru_time = jiffies;
 797                 if(dispose == BUF_LOCKED && 
 798                    (buf->b_flushtime - buf->b_lru_time) <= bdf_prm.b_un.age_super)
 799                          dispose = BUF_LOCKED1;
 800                 remove_from_queues(buf);
 801                 buf->b_list = dispose;
 802                 insert_into_queues(buf);
 803                 if(dispose == BUF_DIRTY && nr_buffers_type[BUF_DIRTY] > 
 804                    (nr_buffers - nr_buffers_type[BUF_SHARED]) *
 805                    bdf_prm.b_un.nfract/100)
 806                          wakeup_bdflush(0);
 807         }
 808 }
 809 
 810 /*
 811  * Release a buffer head
 812  */
 813 void __brelse(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 814 {
 815         wait_on_buffer(buf);
 816 
 817         /* If dirty, mark the time this buffer should be written back */
 818         set_writetime(buf, 0);
 819         refile_buffer(buf);
 820 
 821         if (buf->b_count) {
 822                 if (!--buf->b_count)
 823                         wake_up(&buffer_wait);
 824                 return;
 825         }
 826         printk("VFS: brelse: Trying to free free buffer\n");
 827 }
 828 
 829 /*
 830  * bforget() is like brelse(), except is throws the buffer away
 831  */
 832 void __bforget(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 833 {
 834         wait_on_buffer(buf);
 835         if (buf->b_count != 1) {
 836                 printk("Aieee... bforget(): count = %d\n", buf->b_count);
 837                 return;
 838         }
 839         if (mem_map[MAP_NR(buf->b_data)].count != 1) {
 840                 printk("Aieee... bforget(): shared buffer\n");
 841                 return;
 842         }
 843         if (buffer_protected(buf)) {
 844                 printk("Aieee... bforget(): protected buffer\n");
 845                 return;
 846         }
 847         mark_buffer_clean(buf);
 848         buf->b_count = 0;
 849         remove_from_queues(buf);
 850         buf->b_dev = B_FREE;
 851         put_last_free(buf);
 852         wake_up(&buffer_wait);
 853 }
 854 
 855 /*
 856  * bread() reads a specified block and returns the buffer that contains
 857  * it. It returns NULL if the block was unreadable.
 858  */
 859 struct buffer_head * bread(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 860 {
 861         struct buffer_head * bh;
 862 
 863         if (!(bh = getblk(dev, block, size))) {
 864                 printk("VFS: bread: READ error on device %s\n",
 865                         kdevname(dev));
 866                 return NULL;
 867         }
 868         if (buffer_uptodate(bh))
 869                 return bh;
 870         ll_rw_block(READ, 1, &bh);
 871         wait_on_buffer(bh);
 872         if (buffer_uptodate(bh))
 873                 return bh;
 874         brelse(bh);
 875         return NULL;
 876 }
 877 
 878 /*
 879  * Ok, breada can be used as bread, but additionally to mark other
 880  * blocks for reading as well. End the argument list with a negative
 881  * number.
 882  */
 883 
 884 #define NBUF 16
 885 
 886 struct buffer_head * breada(kdev_t dev, int block, int bufsize,
     /* [previous][next][first][last][top][bottom][index][help] */
 887         unsigned int pos, unsigned int filesize)
 888 {
 889         struct buffer_head * bhlist[NBUF];
 890         unsigned int blocks;
 891         struct buffer_head * bh;
 892         int index;
 893         int i, j;
 894 
 895         if (pos >= filesize)
 896                 return NULL;
 897 
 898         if (block < 0 || !(bh = getblk(dev,block,bufsize)))
 899                 return NULL;
 900 
 901         index = BUFSIZE_INDEX(bh->b_size);
 902 
 903         if (buffer_uptodate(bh))
 904                 return bh;
 905 
 906         blocks = ((filesize & (bufsize - 1)) - (pos & (bufsize - 1))) >> (9+index);
 907 
 908         if (blocks > (read_ahead[MAJOR(dev)] >> index))
 909                 blocks = read_ahead[MAJOR(dev)] >> index;
 910         if (blocks > NBUF)
 911                 blocks = NBUF;
 912         
 913         bhlist[0] = bh;
 914         j = 1;
 915         for(i=1; i<blocks; i++) {
 916                 bh = getblk(dev,block+i,bufsize);
 917                 if (buffer_uptodate(bh)) {
 918                         brelse(bh);
 919                         break;
 920                 }
 921                 bhlist[j++] = bh;
 922         }
 923 
 924         /* Request the read for these buffers, and then release them */
 925         ll_rw_block(READ, j, bhlist);
 926 
 927         for(i=1; i<j; i++)
 928                 brelse(bhlist[i]);
 929 
 930         /* Wait for this buffer, and then continue on */
 931         bh = bhlist[0];
 932         wait_on_buffer(bh);
 933         if (buffer_uptodate(bh))
 934                 return bh;
 935         brelse(bh);
 936         return NULL;
 937 }
 938 
 939 /*
 940  * See fs/inode.c for the weird use of volatile..
 941  */
 942 static void put_unused_buffer_head(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 943 {
 944         struct wait_queue * wait;
 945 
 946         wait = ((volatile struct buffer_head *) bh)->b_wait;
 947         memset(bh,0,sizeof(*bh));
 948         ((volatile struct buffer_head *) bh)->b_wait = wait;
 949         bh->b_next_free = unused_list;
 950         unused_list = bh;
 951 }
 952 
 953 static void get_more_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 954 {
 955         int i;
 956         struct buffer_head * bh;
 957 
 958         if (unused_list)
 959                 return;
 960 
 961         if (!(bh = (struct buffer_head*) get_free_page(GFP_BUFFER)))
 962                 return;
 963 
 964         for (nr_buffer_heads+=i=PAGE_SIZE/sizeof*bh ; i>0; i--) {
 965                 bh->b_next_free = unused_list;  /* only make link */
 966                 unused_list = bh++;
 967         }
 968 }
 969 
 970 static struct buffer_head * get_unused_buffer_head(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 971 {
 972         struct buffer_head * bh;
 973 
 974         get_more_buffer_heads();
 975         if (!unused_list)
 976                 return NULL;
 977         bh = unused_list;
 978         unused_list = bh->b_next_free;
 979         bh->b_next_free = NULL;
 980         bh->b_data = NULL;
 981         bh->b_size = 0;
 982         bh->b_state = 0;
 983         return bh;
 984 }
 985 
 986 /*
 987  * Create the appropriate buffers when given a page for data area and
 988  * the size of each buffer.. Use the bh->b_this_page linked list to
 989  * follow the buffers created.  Return NULL if unable to create more
 990  * buffers.
 991  */
 992 static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
 993 {
 994         struct buffer_head *bh, *head;
 995         unsigned long offset;
 996 
 997         head = NULL;
 998         offset = PAGE_SIZE;
 999         while ((offset -= size) < PAGE_SIZE) {
1000                 bh = get_unused_buffer_head();
1001                 if (!bh)
1002                         goto no_grow;
1003                 bh->b_this_page = head;
1004                 head = bh;
1005                 bh->b_data = (char *) (page+offset);
1006                 bh->b_size = size;
1007                 bh->b_dev = B_FREE;  /* Flag as unused */
1008         }
1009         return head;
1010 /*
1011  * In case anything failed, we just free everything we got.
1012  */
1013 no_grow:
1014         bh = head;
1015         while (bh) {
1016                 head = bh;
1017                 bh = bh->b_this_page;
1018                 put_unused_buffer_head(head);
1019         }
1020         return NULL;
1021 }
1022 
1023 static void read_buffers(struct buffer_head * bh[], int nrbuf)
     /* [previous][next][first][last][top][bottom][index][help] */
1024 {
1025         int i;
1026         int bhnum = 0;
1027         struct buffer_head * bhr[MAX_BUF_PER_PAGE];
1028 
1029         for (i = 0 ; i < nrbuf ; i++) {
1030                 if (bh[i] && !buffer_uptodate(bh[i]))
1031                         bhr[bhnum++] = bh[i];
1032         }
1033         if (bhnum)
1034                 ll_rw_block(READ, bhnum, bhr);
1035         for (i = nrbuf ; --i >= 0 ; ) {
1036                 if (bh[i]) {
1037                         wait_on_buffer(bh[i]);
1038                 }
1039         }
1040 }
1041 
1042 /*
1043  * This actually gets enough info to try to align the stuff,
1044  * but we don't bother yet.. We'll have to check that nobody
1045  * else uses the buffers etc.
1046  *
1047  * "address" points to the new page we can use to move things
1048  * around..
1049  */
1050 static inline unsigned long try_to_align(struct buffer_head ** bh, int nrbuf,
     /* [previous][next][first][last][top][bottom][index][help] */
1051         unsigned long address)
1052 {
1053         while (nrbuf-- > 0)
1054                 brelse(bh[nrbuf]);
1055         return 0;
1056 }
1057 
1058 static unsigned long check_aligned(struct buffer_head * first, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
1059         kdev_t dev, int *b, int size)
1060 {
1061         struct buffer_head * bh[MAX_BUF_PER_PAGE];
1062         unsigned long page;
1063         unsigned long offset;
1064         int block;
1065         int nrbuf;
1066         int aligned = 1;
1067 
1068         bh[0] = first;
1069         nrbuf = 1;
1070         page = (unsigned long) first->b_data;
1071         if (page & ~PAGE_MASK)
1072                 aligned = 0;
1073         for (offset = size ; offset < PAGE_SIZE ; offset += size) {
1074                 block = *++b;
1075                 if (!block)
1076                         goto no_go;
1077                 first = get_hash_table(dev, block, size);
1078                 if (!first)
1079                         goto no_go;
1080                 bh[nrbuf++] = first;
1081                 if (page+offset != (unsigned long) first->b_data)
1082                         aligned = 0;
1083         }
1084         if (!aligned)
1085                 return try_to_align(bh, nrbuf, address);
1086         mem_map[MAP_NR(page)].count++;
1087         read_buffers(bh,nrbuf);         /* make sure they are actually read correctly */
1088         while (nrbuf-- > 0)
1089                 brelse(bh[nrbuf]);
1090         free_page(address);
1091         ++current->min_flt;
1092         return page;
1093 no_go:
1094         while (nrbuf-- > 0)
1095                 brelse(bh[nrbuf]);
1096         return 0;
1097 }
1098 
1099 static unsigned long try_to_load_aligned(unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
1100         kdev_t dev, int b[], int size)
1101 {
1102         struct buffer_head * bh, * tmp, * arr[MAX_BUF_PER_PAGE];
1103         unsigned long offset;
1104         int isize = BUFSIZE_INDEX(size);
1105         int * p;
1106         int block;
1107 
1108         bh = create_buffers(address, size);
1109         if (!bh)
1110                 return 0;
1111         /* do any of the buffers already exist? punt if so.. */
1112         p = b;
1113         for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
1114                 block = *(p++);
1115                 if (!block)
1116                         goto not_aligned;
1117                 if (find_buffer(dev, block, size))
1118                         goto not_aligned;
1119         }
1120         tmp = bh;
1121         p = b;
1122         block = 0;
1123         while (1) {
1124                 arr[block++] = bh;
1125                 bh->b_count = 1;
1126                 bh->b_flushtime = 0;
1127                 clear_bit(BH_Dirty, &bh->b_state);
1128                 clear_bit(BH_Uptodate, &bh->b_state);
1129                 clear_bit(BH_Req, &bh->b_state);
1130                 bh->b_dev = dev;
1131                 bh->b_blocknr = *(p++);
1132                 bh->b_list = BUF_CLEAN;
1133                 nr_buffers++;
1134                 nr_buffers_size[isize]++;
1135                 insert_into_queues(bh);
1136                 if (bh->b_this_page)
1137                         bh = bh->b_this_page;
1138                 else
1139                         break;
1140         }
1141         buffermem += PAGE_SIZE;
1142         bh->b_this_page = tmp;
1143         mem_map[MAP_NR(address)].count++;
1144         buffer_pages[MAP_NR(address)] = bh;
1145         read_buffers(arr,block);
1146         while (block-- > 0)
1147                 brelse(arr[block]);
1148         ++current->maj_flt;
1149         return address;
1150 not_aligned:
1151         while ((tmp = bh) != NULL) {
1152                 bh = bh->b_this_page;
1153                 put_unused_buffer_head(tmp);
1154         }
1155         return 0;
1156 }
1157 
1158 /*
1159  * Try-to-share-buffers tries to minimize memory use by trying to keep
1160  * both code pages and the buffer area in the same page. This is done by
1161  * (a) checking if the buffers are already aligned correctly in memory and
1162  * (b) if none of the buffer heads are in memory at all, trying to load
1163  * them into memory the way we want them.
1164  *
1165  * This doesn't guarantee that the memory is shared, but should under most
1166  * circumstances work very well indeed (ie >90% sharing of code pages on
1167  * demand-loadable executables).
1168  */
1169 static inline unsigned long try_to_share_buffers(unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
1170         kdev_t dev, int *b, int size)
1171 {
1172         struct buffer_head * bh;
1173         int block;
1174 
1175         block = b[0];
1176         if (!block)
1177                 return 0;
1178         bh = get_hash_table(dev, block, size);
1179         if (bh)
1180                 return check_aligned(bh, address, dev, b, size);
1181         return try_to_load_aligned(address, dev, b, size);
1182 }
1183 
1184 /*
1185  * bread_page reads four buffers into memory at the desired address. It's
1186  * a function of its own, as there is some speed to be got by reading them
1187  * all at the same time, not waiting for one to be read, and then another
1188  * etc. This also allows us to optimize memory usage by sharing code pages
1189  * and filesystem buffers..
1190  */
1191 unsigned long bread_page(unsigned long address, kdev_t dev, int b[], int size, int no_share)
     /* [previous][next][first][last][top][bottom][index][help] */
1192 {
1193         struct buffer_head * bh[MAX_BUF_PER_PAGE];
1194         unsigned long where;
1195         int i, j;
1196 
1197         if (!no_share) {
1198                 where = try_to_share_buffers(address, dev, b, size);
1199                 if (where)
1200                         return where;
1201         }
1202         ++current->maj_flt;
1203         for (i=0, j=0; j<PAGE_SIZE ; i++, j+= size) {
1204                 bh[i] = NULL;
1205                 if (b[i])
1206                         bh[i] = getblk(dev, b[i], size);
1207         }
1208         read_buffers(bh,i);
1209         where = address;
1210         for (i=0, j=0; j<PAGE_SIZE ; i++, j += size, where += size) {
1211                 if (bh[i]) {
1212                         if (buffer_uptodate(bh[i]))
1213                                 memcpy((void *) where, bh[i]->b_data, size);
1214                         brelse(bh[i]);
1215                 } else
1216                         memset((void *) where, 0, size);
1217         }
1218         return address;
1219 }
1220 
1221 #if 0
1222 /*
1223  * bwrite_page writes a page out to the buffer cache and/or the physical device.
1224  * It's used for mmap writes (the same way bread_page() is used for mmap reads).
1225  */
1226 void bwrite_page(unsigned long address, kdev_t dev, int b[], int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1227 {
1228         struct buffer_head * bh[MAX_BUF_PER_PAGE];
1229         int i, j;
1230 
1231         for (i=0, j=0; j<PAGE_SIZE ; i++, j+= size) {
1232                 bh[i] = NULL;
1233                 if (b[i])
1234                         bh[i] = getblk(dev, b[i], size);
1235         }
1236         for (i=0, j=0; j<PAGE_SIZE ; i++, j += size, address += size) {
1237                 if (bh[i]) {
1238                         memcpy(bh[i]->b_data, (void *) address, size);
1239                         mark_buffer_uptodate(bh[i], 1);
1240                         mark_buffer_dirty(bh[i], 0);
1241                         brelse(bh[i]);
1242                 } else
1243                         memset((void *) address, 0, size); /* ???!?!! */
1244         }       
1245 }
1246 #endif
1247 
1248 /*
1249  * Try to increase the number of buffers available: the size argument
1250  * is used to determine what kind of buffers we want.
1251  */
1252 static int grow_buffers(int pri, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1253 {
1254         unsigned long page;
1255         struct buffer_head *bh, *tmp;
1256         struct buffer_head * insert_point;
1257         int isize;
1258 
1259         if ((size & 511) || (size > PAGE_SIZE)) {
1260                 printk("VFS: grow_buffers: size = %d\n",size);
1261                 return 0;
1262         }
1263 
1264         isize = BUFSIZE_INDEX(size);
1265 
1266         if (!(page = __get_free_page(pri)))
1267                 return 0;
1268         bh = create_buffers(page, size);
1269         if (!bh) {
1270                 free_page(page);
1271                 return 0;
1272         }
1273 
1274         insert_point = free_list[isize];
1275 
1276         tmp = bh;
1277         while (1) {
1278                 nr_free[isize]++;
1279                 if (insert_point) {
1280                         tmp->b_next_free = insert_point->b_next_free;
1281                         tmp->b_prev_free = insert_point;
1282                         insert_point->b_next_free->b_prev_free = tmp;
1283                         insert_point->b_next_free = tmp;
1284                 } else {
1285                         tmp->b_prev_free = tmp;
1286                         tmp->b_next_free = tmp;
1287                 }
1288                 insert_point = tmp;
1289                 ++nr_buffers;
1290                 if (tmp->b_this_page)
1291                         tmp = tmp->b_this_page;
1292                 else
1293                         break;
1294         }
1295         free_list[isize] = bh;
1296         buffer_pages[MAP_NR(page)] = bh;
1297         tmp->b_this_page = bh;
1298         wake_up(&buffer_wait);
1299         buffermem += PAGE_SIZE;
1300         return 1;
1301 }
1302 
1303 
1304 /* =========== Reduce the buffer memory ============= */
1305 
1306 /*
1307  * try_to_free() checks if all the buffers on this particular page
1308  * are unused, and free's the page if so.
1309  */
1310 static int try_to_free(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1311                        int priority)
1312 {
1313         unsigned long page;
1314         struct buffer_head * tmp, * p;
1315         int isize = BUFSIZE_INDEX(bh->b_size);
1316 
1317         *bhp = bh;
1318         page = (unsigned long) bh->b_data;
1319         page &= PAGE_MASK;
1320         tmp = bh;
1321         do {
1322                 if (!tmp)
1323                         return 0;
1324                 if (tmp->b_count || buffer_protected(tmp) ||
1325                     buffer_dirty(tmp) || buffer_locked(tmp) || tmp->b_wait)
1326                         return 0;
1327                 if (priority && buffer_touched(tmp))
1328                         return 0;
1329                 tmp = tmp->b_this_page;
1330         } while (tmp != bh);
1331         tmp = bh;
1332         do {
1333                 p = tmp;
1334                 tmp = tmp->b_this_page;
1335                 nr_buffers--;
1336                 nr_buffers_size[isize]--;
1337                 if (p == *bhp)
1338                   {
1339                     *bhp = p->b_prev_free;
1340                     if (p == *bhp) /* Was this the last in the list? */
1341                       *bhp = NULL;
1342                   }
1343                 remove_from_queues(p);
1344                 put_unused_buffer_head(p);
1345         } while (tmp != bh);
1346         buffermem -= PAGE_SIZE;
1347         buffer_pages[MAP_NR(page)] = NULL;
1348         free_page(page);
1349         return !mem_map[MAP_NR(page)].count;
1350 }
1351 
1352 /* Age buffers on a given page, according to whether they have been
1353    visited recently or not. */
1354 static inline void age_buffer(struct buffer_head *bh)
     /* [previous][next][first][last][top][bottom][index][help] */
1355 {
1356         struct buffer_head *tmp = bh;
1357         int touched = 0;
1358 
1359         /*
1360          * When we age a page, we mark all other buffers in the page
1361          * with the "has_aged" flag.  Then, when these aliased buffers
1362          * come up for aging, we skip them until next pass.  This
1363          * ensures that a page full of multiple buffers only gets aged
1364          * once per pass through the lru lists. 
1365          */
1366         if (clear_bit(BH_Has_aged, &bh->b_state))
1367                 return;
1368         
1369         do {
1370                 touched |= clear_bit(BH_Touched, &tmp->b_state);
1371                 tmp = tmp->b_this_page;
1372                 set_bit(BH_Has_aged, &tmp->b_state);
1373         } while (tmp != bh);
1374         clear_bit(BH_Has_aged, &bh->b_state);
1375 
1376         if (touched) 
1377                 touch_page((unsigned long) bh->b_data);
1378         else
1379                 age_page((unsigned long) bh->b_data);
1380 }
1381 
1382 /*
1383  * Consult the load average for buffers and decide whether or not
1384  * we should shrink the buffers of one size or not.  If we decide yes,
1385  * do it and return 1.  Else return 0.  Do not attempt to shrink size
1386  * that is specified.
1387  *
1388  * I would prefer not to use a load average, but the way things are now it
1389  * seems unavoidable.  The way to get rid of it would be to force clustering
1390  * universally, so that when we reclaim buffers we always reclaim an entire
1391  * page.  Doing this would mean that we all need to move towards QMAGIC.
1392  */
1393 
1394 static int maybe_shrink_lav_buffers(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1395 {          
1396         int nlist;
1397         int isize;
1398         int total_lav, total_n_buffers, n_sizes;
1399         
1400         /* Do not consider the shared buffers since they would not tend
1401            to have getblk called very often, and this would throw off
1402            the lav.  They are not easily reclaimable anyway (let the swapper
1403            make the first move). */
1404   
1405         total_lav = total_n_buffers = n_sizes = 0;
1406         for(nlist = 0; nlist < NR_SIZES; nlist++)
1407          {
1408                  total_lav += buffers_lav[nlist];
1409                  if(nr_buffers_size[nlist]) n_sizes++;
1410                  total_n_buffers += nr_buffers_size[nlist];
1411                  total_n_buffers -= nr_buffers_st[nlist][BUF_SHARED]; 
1412          }
1413         
1414         /* See if we have an excessive number of buffers of a particular
1415            size - if so, victimize that bunch. */
1416   
1417         isize = (size ? BUFSIZE_INDEX(size) : -1);
1418         
1419         if (n_sizes > 1)
1420                  for(nlist = 0; nlist < NR_SIZES; nlist++)
1421                   {
1422                           if(nlist == isize) continue;
1423                           if(nr_buffers_size[nlist] &&
1424                              bdf_prm.b_un.lav_const * buffers_lav[nlist]*total_n_buffers < 
1425                              total_lav * (nr_buffers_size[nlist] - nr_buffers_st[nlist][BUF_SHARED]))
1426                                    if(shrink_specific_buffers(6, bufferindex_size[nlist])) 
1427                                             return 1;
1428                   }
1429         return 0;
1430 }
1431 
1432 /*
1433  * Try to free up some pages by shrinking the buffer-cache
1434  *
1435  * Priority tells the routine how hard to try to shrink the
1436  * buffers: 6 means "don't bother too much", while a value
1437  * of 0 means "we'd better get some free pages now".
1438  *
1439  * "limit" is meant to limit the shrink-action only to pages
1440  * that are in the 0 - limit address range, for DMA re-allocations.
1441  * We ignore that right now.
1442  */
1443 int shrink_buffers(unsigned int priority, unsigned long limit)
     /* [previous][next][first][last][top][bottom][index][help] */
1444 {
1445         if (priority < 2) {
1446                 sync_buffers(0,0);
1447         }
1448 
1449         if(priority == 2) wakeup_bdflush(1);
1450 
1451         if(maybe_shrink_lav_buffers(0)) return 1;
1452 
1453         /* No good candidate size - take any size we can find */
1454         return shrink_specific_buffers(priority, 0);
1455 }
1456 
1457 static int shrink_specific_buffers(unsigned int priority, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1458 {
1459         struct buffer_head *bh;
1460         int nlist;
1461         int i, isize, isize1;
1462 
1463 #ifdef DEBUG
1464         if(size) printk("Shrinking buffers of size %d\n", size);
1465 #endif
1466         /* First try the free lists, and see if we can get a complete page
1467            from here */
1468         isize1 = (size ? BUFSIZE_INDEX(size) : -1);
1469 
1470         for(isize = 0; isize<NR_SIZES; isize++){
1471                 if(isize1 != -1 && isize1 != isize) continue;
1472                 bh = free_list[isize];
1473                 if(!bh) continue;
1474                 for (i=0 ; !i || bh != free_list[isize]; bh = bh->b_next_free, i++) {
1475                         if (bh->b_count || buffer_protected(bh) ||
1476                             !bh->b_this_page)
1477                                  continue;
1478                         if (!age_of((unsigned long) bh->b_data) &&
1479                             try_to_free(bh, &bh, 6))
1480                                  return 1;
1481                         if(!bh) break;
1482                         /* Some interrupt must have used it after we
1483                            freed the page.  No big deal - keep looking */
1484                 }
1485         }
1486         
1487         /* Not enough in the free lists, now try the lru list */
1488         
1489         for(nlist = 0; nlist < NR_LIST; nlist++) {
1490         repeat1:
1491                 if(priority > 2 && nlist == BUF_SHARED) continue;
1492                 i = nr_buffers_type[nlist];
1493                 i = ((BUFFEROUT_WEIGHT * i) >> 10) >> priority;
1494                 for ( ; i > 0; i-- ) {
1495                         bh = next_to_age[nlist];
1496                         if (!bh)
1497                                 break;
1498                         next_to_age[nlist] = bh->b_next_free;
1499 
1500                         /* First, age the buffer. */
1501                         age_buffer(bh);
1502                         /* We may have stalled while waiting for I/O
1503                            to complete. */
1504                         if(bh->b_list != nlist) goto repeat1;
1505                         if (bh->b_count || buffer_protected(bh) ||
1506                             !bh->b_this_page)
1507                                  continue;
1508                         if(size && bh->b_size != size) continue;
1509                         if (buffer_locked(bh))
1510                                  if (priority)
1511                                           continue;
1512                                  else
1513                                           wait_on_buffer(bh);
1514                         if (buffer_dirty(bh)) {
1515                                 bh->b_count++;
1516                                 bh->b_flushtime = 0;
1517                                 ll_rw_block(WRITEA, 1, &bh);
1518                                 bh->b_count--;
1519                                 continue;
1520                         }
1521                         /* At priority 6, only consider really old
1522                            (age==0) buffers for reclaiming.  At
1523                            priority 0, consider any buffers. */
1524                         if ((age_of((unsigned long) bh->b_data) >>
1525                              (6-priority)) > 0)
1526                                 continue;                               
1527                         if (try_to_free(bh, &bh, 0))
1528                                  return 1;
1529                         if(!bh) break;
1530                 }
1531         }
1532         return 0;
1533 }
1534 
1535 
1536 /* ================== Debugging =================== */
1537 
1538 void show_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1539 {
1540         struct buffer_head * bh;
1541         int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
1542         int protected = 0;
1543         int shared;
1544         int nlist, isize;
1545 
1546         printk("Buffer memory:   %6dkB\n",buffermem>>10);
1547         printk("Buffer heads:    %6d\n",nr_buffer_heads);
1548         printk("Buffer blocks:   %6d\n",nr_buffers);
1549 
1550         for(nlist = 0; nlist < NR_LIST; nlist++) {
1551           shared = found = locked = dirty = used = lastused = protected = 0;
1552           bh = lru_list[nlist];
1553           if(!bh) continue;
1554           do {
1555                 found++;
1556                 if (buffer_locked(bh))
1557                         locked++;
1558                 if (buffer_protected(bh))
1559                         protected++;
1560                 if (buffer_dirty(bh))
1561                         dirty++;
1562                 if(mem_map[MAP_NR(((unsigned long) bh->b_data))].count !=1) shared++;
1563                 if (bh->b_count)
1564                         used++, lastused = found;
1565                 bh = bh->b_next_free;
1566               } while (bh != lru_list[nlist]);
1567         printk("Buffer[%d] mem: %d buffers, %d used (last=%d), %d locked, "
1568                "%d protected, %d dirty %d shrd\n",
1569                 nlist, found, used, lastused, locked, protected, dirty, shared);
1570         };
1571         printk("Size    [LAV]     Free  Clean  Unshar     Lck    Lck1   Dirty  Shared \n");
1572         for(isize = 0; isize<NR_SIZES; isize++){
1573                 printk("%5d [%5d]: %7d ", bufferindex_size[isize],
1574                        buffers_lav[isize], nr_free[isize]);
1575                 for(nlist = 0; nlist < NR_LIST; nlist++)
1576                          printk("%7d ", nr_buffers_st[isize][nlist]);
1577                 printk("\n");
1578         }
1579 }
1580 
1581 
1582 /* ====================== Cluster patches for ext2 ==================== */
1583 
1584 /*
1585  * try_to_reassign() checks if all the buffers on this particular page
1586  * are unused, and reassign to a new cluster them if this is true.
1587  */
1588 static inline int try_to_reassign(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1589                            kdev_t dev, unsigned int starting_block)
1590 {
1591         unsigned long page;
1592         struct buffer_head * tmp, * p;
1593 
1594         *bhp = bh;
1595         page = (unsigned long) bh->b_data;
1596         page &= PAGE_MASK;
1597         if(mem_map[MAP_NR(page)].count != 1) return 0;
1598         tmp = bh;
1599         do {
1600                 if (!tmp)
1601                          return 0;
1602                 
1603                 if (tmp->b_count || buffer_protected(tmp) ||
1604                     buffer_dirty(tmp) || buffer_locked(tmp))
1605                          return 0;
1606                 tmp = tmp->b_this_page;
1607         } while (tmp != bh);
1608         tmp = bh;
1609         
1610         while((unsigned long) tmp->b_data & (PAGE_SIZE - 1)) 
1611                  tmp = tmp->b_this_page;
1612         
1613         /* This is the buffer at the head of the page */
1614         bh = tmp;
1615         do {
1616                 p = tmp;
1617                 tmp = tmp->b_this_page;
1618                 remove_from_queues(p);
1619                 p->b_dev = dev;
1620                 mark_buffer_uptodate(p, 0);
1621                 clear_bit(BH_Req, &p->b_state);
1622                 p->b_blocknr = starting_block++;
1623                 insert_into_queues(p);
1624         } while (tmp != bh);
1625         return 1;
1626 }
1627 
1628 /*
1629  * Try to find a free cluster by locating a page where
1630  * all of the buffers are unused.  We would like this function
1631  * to be atomic, so we do not call anything that might cause
1632  * the process to sleep.  The priority is somewhat similar to
1633  * the priority used in shrink_buffers.
1634  * 
1635  * My thinking is that the kernel should end up using whole
1636  * pages for the buffer cache as much of the time as possible.
1637  * This way the other buffers on a particular page are likely
1638  * to be very near each other on the free list, and we will not
1639  * be expiring data prematurely.  For now we only cannibalize buffers
1640  * of the same size to keep the code simpler.
1641  */
1642 static int reassign_cluster(kdev_t dev, 
     /* [previous][next][first][last][top][bottom][index][help] */
1643                      unsigned int starting_block, int size)
1644 {
1645         struct buffer_head *bh;
1646         int isize = BUFSIZE_INDEX(size);
1647         int i;
1648 
1649         /* We want to give ourselves a really good shot at generating
1650            a cluster, and since we only take buffers from the free
1651            list, we "overfill" it a little. */
1652 
1653         while(nr_free[isize] < 32) refill_freelist(size);
1654 
1655         bh = free_list[isize];
1656         if(bh)
1657                  for (i=0 ; !i || bh != free_list[isize] ; bh = bh->b_next_free, i++) {
1658                          if (!bh->b_this_page)  continue;
1659                          if (try_to_reassign(bh, &bh, dev, starting_block))
1660                                  return 4;
1661                  }
1662         return 0;
1663 }
1664 
1665 /* This function tries to generate a new cluster of buffers
1666  * from a new page in memory.  We should only do this if we have
1667  * not expanded the buffer cache to the maximum size that we allow.
1668  */
1669 static unsigned long try_to_generate_cluster(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1670 {
1671         struct buffer_head * bh, * tmp, * arr[MAX_BUF_PER_PAGE];
1672         int isize = BUFSIZE_INDEX(size);
1673         unsigned long offset;
1674         unsigned long page;
1675         int nblock;
1676 
1677         page = get_free_page(GFP_NOBUFFER);
1678         if(!page) return 0;
1679 
1680         bh = create_buffers(page, size);
1681         if (!bh) {
1682                 free_page(page);
1683                 return 0;
1684         };
1685         nblock = block;
1686         for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
1687                 if (find_buffer(dev, nblock++, size))
1688                          goto not_aligned;
1689         }
1690         tmp = bh;
1691         nblock = 0;
1692         while (1) {
1693                 arr[nblock++] = bh;
1694                 bh->b_count = 1;
1695                 bh->b_flushtime = 0;
1696                 bh->b_state = 0;
1697                 bh->b_dev = dev;
1698                 bh->b_list = BUF_CLEAN;
1699                 bh->b_blocknr = block++;
1700                 nr_buffers++;
1701                 nr_buffers_size[isize]++;
1702                 insert_into_queues(bh);
1703                 if (bh->b_this_page)
1704                         bh = bh->b_this_page;
1705                 else
1706                         break;
1707         }
1708         buffermem += PAGE_SIZE;
1709         buffer_pages[MAP_NR(page)] = bh;
1710         bh->b_this_page = tmp;
1711         while (nblock-- > 0)
1712                 brelse(arr[nblock]);
1713         return 4; /* ?? */
1714 not_aligned:
1715         while ((tmp = bh) != NULL) {
1716                 bh = bh->b_this_page;
1717                 put_unused_buffer_head(tmp);
1718         }
1719         free_page(page);
1720         return 0;
1721 }
1722 
1723 unsigned long generate_cluster(kdev_t dev, int b[], int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1724 {
1725         int i, offset;
1726         
1727         for (i = 0, offset = 0 ; offset < PAGE_SIZE ; i++, offset += size) {
1728                 if(i && b[i]-1 != b[i-1]) return 0;  /* No need to cluster */
1729                 if(find_buffer(dev, b[i], size)) return 0;
1730         };
1731 
1732         /* OK, we have a candidate for a new cluster */
1733         
1734         /* See if one size of buffer is over-represented in the buffer cache,
1735            if so reduce the numbers of buffers */
1736         if(maybe_shrink_lav_buffers(size))
1737          {
1738                  int retval;
1739                  retval = try_to_generate_cluster(dev, b[0], size);
1740                  if(retval) return retval;
1741          };
1742         
1743         if (nr_free_pages > min_free_pages*2) 
1744                  return try_to_generate_cluster(dev, b[0], size);
1745         else
1746                  return reassign_cluster(dev, b[0], size);
1747 }
1748 
1749 
1750 /* ===================== Init ======================= */
1751 
1752 /*
1753  * This initializes the initial buffer free list.  nr_buffers_type is set
1754  * to one less the actual number of buffers, as a sop to backwards
1755  * compatibility --- the old code did this (I think unintentionally,
1756  * but I'm not sure), and programs in the ps package expect it.
1757  *                                      - TYT 8/30/92
1758  */
1759 void buffer_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1760 {
1761         int i;
1762         int isize = BUFSIZE_INDEX(BLOCK_SIZE);
1763         long memsize = MAP_NR(high_memory) << PAGE_SHIFT;
1764 
1765         if (memsize >= 4*1024*1024) {
1766                 if(memsize >= 16*1024*1024)
1767                          nr_hash = 16381;
1768                 else
1769                          nr_hash = 4093;
1770         } else {
1771                 nr_hash = 997;
1772         };
1773         
1774         hash_table = (struct buffer_head **) vmalloc(nr_hash * 
1775                                                      sizeof(struct buffer_head *));
1776 
1777 
1778         buffer_pages = (struct buffer_head **) vmalloc(MAP_NR(high_memory) * 
1779                                                      sizeof(struct buffer_head *));
1780         for (i = 0 ; i < MAP_NR(high_memory) ; i++)
1781                 buffer_pages[i] = NULL;
1782 
1783         for (i = 0 ; i < nr_hash ; i++)
1784                 hash_table[i] = NULL;
1785         lru_list[BUF_CLEAN] = 0;
1786         grow_buffers(GFP_KERNEL, BLOCK_SIZE);
1787         if (!free_list[isize])
1788                 panic("VFS: Unable to initialize buffer free list!");
1789         return;
1790 }
1791 
1792 
1793 /* ====================== bdflush support =================== */
1794 
1795 /* This is a simple kernel daemon, whose job it is to provide a dynamically
1796  * response to dirty buffers.  Once this process is activated, we write back
1797  * a limited number of buffers to the disks and then go back to sleep again.
1798  * In effect this is a process which never leaves kernel mode, and does not have
1799  * any user memory associated with it except for the stack.  There is also
1800  * a kernel stack page, which obviously must be separate from the user stack.
1801  */
1802 struct wait_queue * bdflush_wait = NULL;
1803 struct wait_queue * bdflush_done = NULL;
1804 
1805 static int bdflush_running = 0;
1806 
1807 static void wakeup_bdflush(int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
1808 {
1809         extern int      rd_loading;
1810         
1811         if (!bdflush_running){
1812                 if (!rd_loading)
1813                         printk("Warning - bdflush not running\n");
1814                 sync_buffers(0,0);
1815                 return;
1816         };
1817         wake_up(&bdflush_wait);
1818         if(wait) sleep_on(&bdflush_done);
1819 }
1820 
1821 
1822 
1823 /* 
1824  * Here we attempt to write back old buffers.  We also try and flush inodes 
1825  * and supers as well, since this function is essentially "update", and 
1826  * otherwise there would be no way of ensuring that these quantities ever 
1827  * get written back.  Ideally, we would have a timestamp on the inodes
1828  * and superblocks so that we could write back only the old ones as well
1829  */
1830 
1831 asmlinkage int sync_old_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1832 {
1833         int i, isize;
1834         int ndirty, nwritten;
1835         int nlist;
1836         int ncount;
1837         struct buffer_head * bh, *next;
1838 
1839         sync_supers(0);
1840         sync_inodes(0);
1841 
1842         ncount = 0;
1843 #ifdef DEBUG
1844         for(nlist = 0; nlist < NR_LIST; nlist++)
1845 #else
1846         for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1847 #endif
1848         {
1849                 ndirty = 0;
1850                 nwritten = 0;
1851         repeat:
1852                 bh = lru_list[nlist];
1853                 if(bh) 
1854                          for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) {
1855                                  /* We may have stalled while waiting for I/O to complete. */
1856                                  if(bh->b_list != nlist) goto repeat;
1857                                  next = bh->b_next_free;
1858                                  if(!lru_list[nlist]) {
1859                                          printk("Dirty list empty %d\n", i);
1860                                          break;
1861                                  }
1862                                  
1863                                  /* Clean buffer on dirty list?  Refile it */
1864                                  if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
1865                                   {
1866                                           refile_buffer(bh);
1867                                           continue;
1868                                   }
1869                                  
1870                                  if (buffer_locked(bh) || !buffer_dirty(bh))
1871                                           continue;
1872                                  ndirty++;
1873                                  if(bh->b_flushtime > jiffies) continue;
1874                                  nwritten++;
1875                                  bh->b_count++;
1876                                  bh->b_flushtime = 0;
1877 #ifdef DEBUG
1878                                  if(nlist != BUF_DIRTY) ncount++;
1879 #endif
1880                                  ll_rw_block(WRITE, 1, &bh);
1881                                  bh->b_count--;
1882                          }
1883         }
1884 #ifdef DEBUG
1885         if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount);
1886         printk("Wrote %d/%d buffers\n", nwritten, ndirty);
1887 #endif
1888         
1889         /* We assume that we only come through here on a regular
1890            schedule, like every 5 seconds.  Now update load averages.  
1891            Shift usage counts to prevent overflow. */
1892         for(isize = 0; isize<NR_SIZES; isize++){
1893                 CALC_LOAD(buffers_lav[isize], bdf_prm.b_un.lav_const, buffer_usage[isize]);
1894                 buffer_usage[isize] = 0;
1895         };
1896         return 0;
1897 }
1898 
1899 
1900 /* This is the interface to bdflush.  As we get more sophisticated, we can
1901  * pass tuning parameters to this "process", to adjust how it behaves.  If you
1902  * invoke this again after you have done this once, you would simply modify 
1903  * the tuning parameters.  We would want to verify each parameter, however,
1904  * to make sure that it is reasonable. */
1905 
1906 asmlinkage int sys_bdflush(int func, long data)
     /* [previous][next][first][last][top][bottom][index][help] */
1907 {
1908         int i, error;
1909         int ndirty;
1910         int nlist;
1911         int ncount;
1912         struct buffer_head * bh, *next;
1913 
1914         if (!suser())
1915                 return -EPERM;
1916 
1917         if (func == 1)
1918                  return sync_old_buffers();
1919 
1920         /* Basically func 0 means start, 1 means read param 1, 2 means write param 1, etc */
1921         if (func >= 2) {
1922                 i = (func-2) >> 1;
1923                 if (i < 0 || i >= N_PARAM)
1924                         return -EINVAL;
1925                 if((func & 1) == 0) {
1926                         error = verify_area(VERIFY_WRITE, (void *) data, sizeof(int));
1927                         if (error)
1928                                 return error;
1929                         put_user(bdf_prm.data[i], (int*)data);
1930                         return 0;
1931                 };
1932                 if (data < bdflush_min[i] || data > bdflush_max[i])
1933                         return -EINVAL;
1934                 bdf_prm.data[i] = data;
1935                 return 0;
1936         };
1937         
1938         if (bdflush_running)
1939                 return -EBUSY; /* Only one copy of this running at one time */
1940         bdflush_running++;
1941         
1942         /* OK, from here on is the daemon */
1943         
1944         for (;;) {
1945 #ifdef DEBUG
1946                 printk("bdflush() activated...");
1947 #endif
1948                 
1949                 ncount = 0;
1950 #ifdef DEBUG
1951                 for(nlist = 0; nlist < NR_LIST; nlist++)
1952 #else
1953                 for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1954 #endif
1955                  {
1956                          ndirty = 0;
1957                  repeat:
1958                          bh = lru_list[nlist];
1959                          if(bh) 
1960                                   for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty; 
1961                                        bh = next) {
1962                                           /* We may have stalled while waiting for I/O to complete. */
1963                                           if(bh->b_list != nlist) goto repeat;
1964                                           next = bh->b_next_free;
1965                                           if(!lru_list[nlist]) {
1966                                                   printk("Dirty list empty %d\n", i);
1967                                                   break;
1968                                           }
1969                                           
1970                                           /* Clean buffer on dirty list?  Refile it */
1971                                           if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
1972                                            {
1973                                                    refile_buffer(bh);
1974                                                    continue;
1975                                            }
1976                                           
1977                                           if (buffer_locked(bh) || !buffer_dirty(bh))
1978                                                    continue;
1979                                           /* Should we write back buffers that are shared or not??
1980                                              currently dirty buffers are not shared, so it does not matter */
1981                                           bh->b_count++;
1982                                           ndirty++;
1983                                           bh->b_flushtime = 0;
1984                                           ll_rw_block(WRITE, 1, &bh);
1985 #ifdef DEBUG
1986                                           if(nlist != BUF_DIRTY) ncount++;
1987 #endif
1988                                           bh->b_count--;
1989                                   }
1990                  }
1991 #ifdef DEBUG
1992                 if (ncount) printk("sys_bdflush: %d dirty buffers not on dirty list\n", ncount);
1993                 printk("sleeping again.\n");
1994 #endif
1995                 wake_up(&bdflush_done);
1996                 
1997                 /* If there are still a lot of dirty buffers around, skip the sleep
1998                    and flush some more */
1999                 
2000                 if(nr_buffers_type[BUF_DIRTY] <= (nr_buffers - nr_buffers_type[BUF_SHARED]) * 
2001                    bdf_prm.b_un.nfract/100) {
2002                         if (current->signal & (1 << (SIGKILL-1))) {
2003                                 bdflush_running--;
2004                                 return 0;
2005                         }
2006                         current->signal = 0;
2007                         interruptible_sleep_on(&bdflush_wait);
2008                 }
2009         }
2010 }
2011 
2012 
2013 /*
2014  * Overrides for Emacs so that we follow Linus's tabbing style.
2015  * Emacs will notice this stuff at the end of the file and automatically
2016  * adjust the settings for this buffer only.  This must remain at the end
2017  * of the file.
2018  * ---------------------------------------------------------------------------
2019  * Local variables:
2020  * c-indent-level: 8
2021  * c-brace-imaginary-offset: 0
2022  * c-brace-offset: -8
2023  * c-argdecl-indent: 8
2024  * c-label-offset: -8
2025  * c-continued-statement-offset: 8
2026  * c-continued-brace-offset: 0
2027  * End:
2028  */

/* [previous][next][first][last][top][bottom][index][help] */