root/fs/buffer.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. __wait_on_buffer
  2. sync_buffers
  3. sync_dev
  4. fsync_dev
  5. sys_sync
  6. file_fsync
  7. sys_fsync
  8. invalidate_buffers
  9. remove_from_hash_queue
  10. remove_from_lru_list
  11. remove_from_free_list
  12. remove_from_queues
  13. put_last_lru
  14. put_last_free
  15. insert_into_queues
  16. find_buffer
  17. get_hash_table
  18. set_blocksize
  19. refill_freelist
  20. getblk
  21. set_writetime
  22. refile_buffer
  23. __brelse
  24. __bforget
  25. bread
  26. breada
  27. put_unused_buffer_head
  28. get_more_buffer_heads
  29. get_unused_buffer_head
  30. create_buffers
  31. read_buffers
  32. try_to_align
  33. check_aligned
  34. try_to_load_aligned
  35. try_to_share_buffers
  36. bread_page
  37. bwrite_page
  38. grow_buffers
  39. try_to_free
  40. age_buffer
  41. maybe_shrink_lav_buffers
  42. shrink_buffers
  43. shrink_specific_buffers
  44. show_buffers
  45. try_to_reassign
  46. reassign_cluster
  47. try_to_generate_cluster
  48. generate_cluster
  49. buffer_init
  50. wakeup_bdflush
  51. sync_old_buffers
  52. sys_bdflush

   1 /*
   2  *  linux/fs/buffer.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  *  'buffer.c' implements the buffer-cache functions. Race-conditions have
   9  * been avoided by NEVER letting an interrupt change a buffer (except for the
  10  * data, of course), but instead letting the caller do it.
  11  */
  12 
  13 /*
  14  * NOTE! There is one discordant note here: checking floppies for
  15  * disk change. This is where it fits best, I think, as it should
  16  * invalidate changed floppy-disk-caches.
  17  */
  18  
  19 #include <linux/sched.h>
  20 #include <linux/kernel.h>
  21 #include <linux/major.h>
  22 #include <linux/string.h>
  23 #include <linux/locks.h>
  24 #include <linux/errno.h>
  25 #include <linux/malloc.h>
  26 #include <linux/swapctl.h>
  27 
  28 #include <asm/system.h>
  29 #include <asm/segment.h>
  30 #include <asm/io.h>
  31 
  32 #define NR_SIZES 4
  33 static char buffersize_index[9] = {-1,  0,  1, -1,  2, -1, -1, -1, 3};
  34 static short int bufferindex_size[NR_SIZES] = {512, 1024, 2048, 4096};
  35 
  36 #define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9])
  37 #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
  38 
  39 static int grow_buffers(int pri, int size);
  40 static int shrink_specific_buffers(unsigned int priority, int size);
  41 static int maybe_shrink_lav_buffers(int);
  42 
  43 static int nr_hash = 0;  /* Size of hash table */
  44 static struct buffer_head ** hash_table;
  45 struct buffer_head ** buffer_pages;
  46 static struct buffer_head * lru_list[NR_LIST] = {NULL, };
  47 /* next_to_age is an array of pointers into the lru lists, used to
  48    cycle through the buffers aging their contents when deciding which
  49    buffers to discard when more memory is needed */
  50 static struct buffer_head * next_to_age[NR_LIST] = {NULL, };
  51 static struct buffer_head * free_list[NR_SIZES] = {NULL, };
  52 static struct buffer_head * unused_list = NULL;
  53 static struct wait_queue * buffer_wait = NULL;
  54 
  55 int nr_buffers = 0;
  56 int nr_buffers_type[NR_LIST] = {0,};
  57 int nr_buffers_size[NR_SIZES] = {0,};
  58 int nr_buffers_st[NR_SIZES][NR_LIST] = {{0,},};
  59 int buffer_usage[NR_SIZES] = {0,};  /* Usage counts used to determine load average */
  60 int buffers_lav[NR_SIZES] = {0,};  /* Load average of buffer usage */
  61 int nr_free[NR_SIZES] = {0,};
  62 int buffermem = 0;
  63 int nr_buffer_heads = 0;
  64 extern int *blksize_size[];
  65 
  66 /* Here is the parameter block for the bdflush process. */
  67 static void wakeup_bdflush(int);
  68 
  69 #define N_PARAM 9
  70 #define LAV
  71 
  72 static union bdflush_param{
  73         struct {
  74                 int nfract;  /* Percentage of buffer cache dirty to 
  75                                 activate bdflush */
  76                 int ndirty;  /* Maximum number of dirty blocks to write out per
  77                                 wake-cycle */
  78                 int nrefill; /* Number of clean buffers to try and obtain
  79                                 each time we call refill */
  80                 int nref_dirt; /* Dirty buffer threshold for activating bdflush
  81                                   when trying to refill buffers. */
  82                 int clu_nfract;  /* Percentage of buffer cache to scan to 
  83                                     search for free clusters */
  84                 int age_buffer;  /* Time for normal buffer to age before 
  85                                     we flush it */
  86                 int age_super;  /* Time for superblock to age before we 
  87                                    flush it */
  88                 int lav_const;  /* Constant used for load average (time
  89                                    constant */
  90                 int lav_ratio;  /* Used to determine how low a lav for a
  91                                    particular size can go before we start to
  92                                    trim back the buffers */
  93         } b_un;
  94         unsigned int data[N_PARAM];
  95 } bdf_prm = {{25, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}};
  96 
  97 /* The lav constant is set for 1 minute, as long as the update process runs
  98    every 5 seconds.  If you change the frequency of update, the time
  99    constant will also change. */
 100 
 101 
 102 /* These are the min and max parameter values that we will allow to be assigned */
 103 static int bdflush_min[N_PARAM] = {  0,  10,    5,   25,  0,   100,   100, 1, 1};
 104 static int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 60000, 60000, 2047, 5};
 105 
 106 /*
 107  * Rewrote the wait-routines to use the "new" wait-queue functionality,
 108  * and getting rid of the cli-sti pairs. The wait-queue routines still
 109  * need cli-sti, but now it's just a couple of 386 instructions or so.
 110  *
 111  * Note that the real wait_on_buffer() is an inline function that checks
 112  * if 'b_wait' is set before calling this, so that the queues aren't set
 113  * up unnecessarily.
 114  */
 115 void __wait_on_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 116 {
 117         struct wait_queue wait = { current, NULL };
 118 
 119         bh->b_count++;
 120         add_wait_queue(&bh->b_wait, &wait);
 121 repeat:
 122         current->state = TASK_UNINTERRUPTIBLE;
 123         if (bh->b_lock) {
 124                 schedule();
 125                 goto repeat;
 126         }
 127         remove_wait_queue(&bh->b_wait, &wait);
 128         bh->b_count--;
 129         current->state = TASK_RUNNING;
 130 }
 131 
 132 /* Call sync_buffers with wait!=0 to ensure that the call does not
 133    return until all buffer writes have completed.  Sync() may return
 134    before the writes have finished; fsync() may not. */
 135 
 136 
 137 /* Godamity-damn.  Some buffers (bitmaps for filesystems)
 138    spontaneously dirty themselves without ever brelse being called.
 139    We will ultimately want to put these in a separate list, but for
 140    now we search all of the lists for dirty buffers */
 141 
 142 static int sync_buffers(kdev_t dev, int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 143 {
 144         int i, retry, pass = 0, err = 0;
 145         int nlist, ncount;
 146         struct buffer_head * bh, *next;
 147 
 148         /* One pass for no-wait, three for wait:
 149            0) write out all dirty, unlocked buffers;
 150            1) write out all dirty buffers, waiting if locked;
 151            2) wait for completion by waiting for all buffers to unlock. */
 152  repeat:
 153         retry = 0;
 154  repeat2:
 155         ncount = 0;
 156         /* We search all lists as a failsafe mechanism, not because we expect
 157            there to be dirty buffers on any of the other lists. */
 158         for(nlist = 0; nlist < NR_LIST; nlist++)
 159          {
 160          repeat1:
 161                  bh = lru_list[nlist];
 162                  if(!bh) continue;
 163                  for (i = nr_buffers_type[nlist]*2 ; i-- > 0 ; bh = next) {
 164                          if(bh->b_list != nlist) goto repeat1;
 165                          next = bh->b_next_free;
 166                          if(!lru_list[nlist]) break;
 167                          if (dev && bh->b_dev != dev)
 168                                   continue;
 169                          if (bh->b_lock)
 170                           {
 171                                   /* Buffer is locked; skip it unless wait is
 172                                      requested AND pass > 0. */
 173                                   if (!wait || !pass) {
 174                                           retry = 1;
 175                                           continue;
 176                                   }
 177                                   wait_on_buffer (bh);
 178                                   goto repeat2;
 179                           }
 180                          /* If an unlocked buffer is not uptodate, there has
 181                              been an IO error. Skip it. */
 182                          if (wait && bh->b_req && !bh->b_lock &&
 183                              !bh->b_dirt && !bh->b_uptodate) {
 184                                   err = 1;
 185                                   continue;
 186                           }
 187                          /* Don't write clean buffers.  Don't write ANY buffers
 188                             on the third pass. */
 189                          if (!bh->b_dirt || pass>=2)
 190                                   continue;
 191                          /* don't bother about locked buffers */
 192                          if (bh->b_lock)
 193                                  continue;
 194                          bh->b_count++;
 195                          bh->b_flushtime = 0;
 196                          ll_rw_block(WRITE, 1, &bh);
 197 
 198                          if(nlist != BUF_DIRTY) { 
 199                                  printk("[%d %s %ld] ", nlist,
 200                                         kdevname(bh->b_dev), bh->b_blocknr);
 201                                  ncount++;
 202                          };
 203                          bh->b_count--;
 204                          retry = 1;
 205                  }
 206          }
 207         if (ncount)
 208           printk("sys_sync: %d dirty buffers not on dirty list\n", ncount);
 209         
 210         /* If we are waiting for the sync to succeed, and if any dirty
 211            blocks were written, then repeat; on the second pass, only
 212            wait for buffers being written (do not pass to write any
 213            more buffers on the second pass). */
 214         if (wait && retry && ++pass<=2)
 215                  goto repeat;
 216         return err;
 217 }
 218 
 219 void sync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 220 {
 221         sync_buffers(dev, 0);
 222         sync_supers(dev);
 223         sync_inodes(dev);
 224         sync_buffers(dev, 0);
 225 }
 226 
 227 int fsync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 228 {
 229         sync_buffers(dev, 0);
 230         sync_supers(dev);
 231         sync_inodes(dev);
 232         return sync_buffers(dev, 1);
 233 }
 234 
 235 asmlinkage int sys_sync(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 236 {
 237         fsync_dev(0);
 238         return 0;
 239 }
 240 
 241 int file_fsync (struct inode *inode, struct file *filp)
     /* [previous][next][first][last][top][bottom][index][help] */
 242 {
 243         return fsync_dev(inode->i_dev);
 244 }
 245 
 246 asmlinkage int sys_fsync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 247 {
 248         struct file * file;
 249         struct inode * inode;
 250 
 251         if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
 252                 return -EBADF;
 253         if (!file->f_op || !file->f_op->fsync)
 254                 return -EINVAL;
 255         if (file->f_op->fsync(inode,file))
 256                 return -EIO;
 257         return 0;
 258 }
 259 
 260 void invalidate_buffers(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 261 {
 262         int i;
 263         int nlist;
 264         struct buffer_head * bh;
 265 
 266         for(nlist = 0; nlist < NR_LIST; nlist++) {
 267                 bh = lru_list[nlist];
 268                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bh->b_next_free) {
 269                         if (bh->b_dev != dev)
 270                                 continue;
 271                         wait_on_buffer(bh);
 272                         if (bh->b_dev != dev)
 273                                 continue;
 274                         if (bh->b_count)
 275                                 continue;
 276                         bh->b_flushtime = bh->b_uptodate = 
 277                                 bh->b_dirt = bh->b_req = 0;
 278                 }
 279         }
 280 }
 281 
 282 #define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block))%nr_hash)
 283 #define hash(dev,block) hash_table[_hashfn(dev,block)]
 284 
 285 static inline void remove_from_hash_queue(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 286 {
 287         if (bh->b_next)
 288                 bh->b_next->b_prev = bh->b_prev;
 289         if (bh->b_prev)
 290                 bh->b_prev->b_next = bh->b_next;
 291         if (hash(bh->b_dev,bh->b_blocknr) == bh)
 292                 hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
 293         bh->b_next = bh->b_prev = NULL;
 294 }
 295 
 296 static inline void remove_from_lru_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 297 {
 298         if (!(bh->b_prev_free) || !(bh->b_next_free))
 299                 panic("VFS: LRU block list corrupted");
 300         if (bh->b_dev == B_FREE)
 301                 panic("LRU list corrupted");
 302         bh->b_prev_free->b_next_free = bh->b_next_free;
 303         bh->b_next_free->b_prev_free = bh->b_prev_free;
 304 
 305         if (lru_list[bh->b_list] == bh)
 306                  lru_list[bh->b_list] = bh->b_next_free;
 307         if (lru_list[bh->b_list] == bh)
 308                  lru_list[bh->b_list] = NULL;
 309         if (next_to_age[bh->b_list] == bh)
 310                 next_to_age[bh->b_list] = bh->b_next_free;
 311         if (next_to_age[bh->b_list] == bh)
 312                 next_to_age[bh->b_list] = NULL;
 313 
 314         bh->b_next_free = bh->b_prev_free = NULL;
 315 }
 316 
 317 static inline void remove_from_free_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 318 {
 319         int isize = BUFSIZE_INDEX(bh->b_size);
 320         if (!(bh->b_prev_free) || !(bh->b_next_free))
 321                 panic("VFS: Free block list corrupted");
 322         if(bh->b_dev != B_FREE)
 323                 panic("Free list corrupted");
 324         if(!free_list[isize])
 325                 panic("Free list empty");
 326         nr_free[isize]--;
 327         if(bh->b_next_free == bh)
 328                  free_list[isize] = NULL;
 329         else {
 330                 bh->b_prev_free->b_next_free = bh->b_next_free;
 331                 bh->b_next_free->b_prev_free = bh->b_prev_free;
 332                 if (free_list[isize] == bh)
 333                          free_list[isize] = bh->b_next_free;
 334         };
 335         bh->b_next_free = bh->b_prev_free = NULL;
 336 }
 337 
 338 static inline void remove_from_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 339 {
 340         if(bh->b_dev == B_FREE) {
 341                 remove_from_free_list(bh); /* Free list entries should not be
 342                                               in the hash queue */
 343                 return;
 344         };
 345         nr_buffers_type[bh->b_list]--;
 346         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]--;
 347         remove_from_hash_queue(bh);
 348         remove_from_lru_list(bh);
 349 }
 350 
 351 static inline void put_last_lru(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 352 {
 353         if (!bh)
 354                 return;
 355         if (bh == lru_list[bh->b_list]) {
 356                 lru_list[bh->b_list] = bh->b_next_free;
 357                 if (next_to_age[bh->b_list] == bh)
 358                         next_to_age[bh->b_list] = bh->b_next_free;
 359                 return;
 360         }
 361         if(bh->b_dev == B_FREE)
 362                 panic("Wrong block for lru list");
 363         remove_from_lru_list(bh);
 364 /* add to back of free list */
 365 
 366         if(!lru_list[bh->b_list]) {
 367                 lru_list[bh->b_list] = bh;
 368                 lru_list[bh->b_list]->b_prev_free = bh;
 369         };
 370         if (!next_to_age[bh->b_list])
 371                 next_to_age[bh->b_list] = bh;
 372 
 373         bh->b_next_free = lru_list[bh->b_list];
 374         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 375         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 376         lru_list[bh->b_list]->b_prev_free = bh;
 377 }
 378 
 379 static inline void put_last_free(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 380 {
 381         int isize;
 382         if (!bh)
 383                 return;
 384 
 385         isize = BUFSIZE_INDEX(bh->b_size);      
 386         bh->b_dev = B_FREE;  /* So it is obvious we are on the free list */
 387         /* add to back of free list */
 388         if(!free_list[isize]) {
 389                 free_list[isize] = bh;
 390                 bh->b_prev_free = bh;
 391         };
 392 
 393         nr_free[isize]++;
 394         bh->b_next_free = free_list[isize];
 395         bh->b_prev_free = free_list[isize]->b_prev_free;
 396         free_list[isize]->b_prev_free->b_next_free = bh;
 397         free_list[isize]->b_prev_free = bh;
 398 }
 399 
 400 static inline void insert_into_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 401 {
 402         /* put at end of free list */
 403         if(bh->b_dev == B_FREE) {
 404                 put_last_free(bh);
 405                 return;
 406         }
 407         if(!lru_list[bh->b_list]) {
 408                 lru_list[bh->b_list] = bh;
 409                 bh->b_prev_free = bh;
 410         }
 411         if (!next_to_age[bh->b_list])
 412                 next_to_age[bh->b_list] = bh;
 413         if (bh->b_next_free) panic("VFS: buffer LRU pointers corrupted");
 414         bh->b_next_free = lru_list[bh->b_list];
 415         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 416         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 417         lru_list[bh->b_list]->b_prev_free = bh;
 418         nr_buffers_type[bh->b_list]++;
 419         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]++;
 420 /* put the buffer in new hash-queue if it has a device */
 421         bh->b_prev = NULL;
 422         bh->b_next = NULL;
 423         if (!(bh->b_dev))
 424                 return;
 425         bh->b_next = hash(bh->b_dev,bh->b_blocknr);
 426         hash(bh->b_dev,bh->b_blocknr) = bh;
 427         if (bh->b_next)
 428                 bh->b_next->b_prev = bh;
 429 }
 430 
 431 static inline struct buffer_head * find_buffer(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 432 {               
 433         struct buffer_head * tmp;
 434 
 435         for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
 436                 if (tmp->b_dev == dev && tmp->b_blocknr == block)
 437                         if (tmp->b_size == size)
 438                                 return tmp;
 439                         else {
 440                                 printk("VFS: Wrong blocksize on device %s\n",
 441                                         kdevname(dev));
 442                                 return NULL;
 443                         }
 444         return NULL;
 445 }
 446 
 447 /*
 448  * Why like this, I hear you say... The reason is race-conditions.
 449  * As we don't lock buffers (unless we are reading them, that is),
 450  * something might happen to it while we sleep (ie a read-error
 451  * will force it bad). This shouldn't really happen currently, but
 452  * the code is ready.
 453  */
 454 struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 455 {
 456         struct buffer_head * bh;
 457 
 458         for (;;) {
 459                 if (!(bh=find_buffer(dev,block,size)))
 460                         return NULL;
 461                 bh->b_reuse=0;
 462                 bh->b_count++;
 463                 wait_on_buffer(bh);
 464                 if (bh->b_dev == dev && bh->b_blocknr == block
 465                                              && bh->b_size == size)
 466                         return bh;
 467                 bh->b_count--;
 468         }
 469 }
 470 
 471 void set_blocksize(kdev_t dev, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 472 {
 473         int i, nlist;
 474         struct buffer_head * bh, *bhnext;
 475 
 476         if (!blksize_size[MAJOR(dev)])
 477                 return;
 478 
 479         switch(size) {
 480                 default: panic("Invalid blocksize passed to set_blocksize");
 481                 case 512: case 1024: case 2048: case 4096:;
 482         }
 483 
 484         if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
 485                 blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 486                 return;
 487         }
 488         if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
 489                 return;
 490         sync_buffers(dev, 2);
 491         blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 492 
 493   /* We need to be quite careful how we do this - we are moving entries
 494      around on the free list, and we can get in a loop if we are not careful.*/
 495 
 496         for(nlist = 0; nlist < NR_LIST; nlist++) {
 497                 bh = lru_list[nlist];
 498                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) {
 499                         if(!bh) break;
 500                         bhnext = bh->b_next_free; 
 501                         if (bh->b_dev != dev)
 502                                  continue;
 503                         if (bh->b_size == size)
 504                                  continue;
 505                         
 506                         wait_on_buffer(bh);
 507                         if (bh->b_dev == dev && bh->b_size != size) {
 508                                 bh->b_uptodate = bh->b_dirt = bh->b_req =
 509                                          bh->b_flushtime = 0;
 510                         };
 511                         remove_from_hash_queue(bh);
 512                 }
 513         }
 514 }
 515 
 516 #define BADNESS(bh) (((bh)->b_dirt<<1)+(bh)->b_lock)
 517 
 518 void refill_freelist(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 519 {
 520         struct buffer_head * bh, * tmp;
 521         struct buffer_head * candidate[NR_LIST];
 522         unsigned int best_time, winner;
 523         int isize = BUFSIZE_INDEX(size);
 524         int buffers[NR_LIST];
 525         int i;
 526         int needed;
 527 
 528         /* First see if we even need this.  Sometimes it is advantageous
 529          to request some blocks in a filesystem that we know that we will
 530          be needing ahead of time. */
 531 
 532         if (nr_free[isize] > 100)
 533                 return;
 534 
 535         /* If there are too many dirty buffers, we wake up the update process
 536            now so as to ensure that there are still clean buffers available
 537            for user processes to use (and dirty) */
 538         
 539         /* We are going to try and locate this much memory */
 540         needed =bdf_prm.b_un.nrefill * size;  
 541 
 542         while (nr_free_pages > min_free_pages*2 && needed > 0 &&
 543                grow_buffers(GFP_BUFFER, size)) {
 544                 needed -= PAGE_SIZE;
 545         }
 546 
 547         if(needed <= 0) return;
 548 
 549         /* See if there are too many buffers of a different size.
 550            If so, victimize them */
 551 
 552         while(maybe_shrink_lav_buffers(size))
 553          {
 554                  if(!grow_buffers(GFP_BUFFER, size)) break;
 555                  needed -= PAGE_SIZE;
 556                  if(needed <= 0) return;
 557          };
 558 
 559         /* OK, we cannot grow the buffer cache, now try and get some
 560            from the lru list */
 561 
 562         /* First set the candidate pointers to usable buffers.  This
 563            should be quick nearly all of the time. */
 564 
 565 repeat0:
 566         for(i=0; i<NR_LIST; i++){
 567                 if(i == BUF_DIRTY || i == BUF_SHARED || 
 568                    nr_buffers_type[i] == 0) {
 569                         candidate[i] = NULL;
 570                         buffers[i] = 0;
 571                         continue;
 572                 }
 573                 buffers[i] = nr_buffers_type[i];
 574                 for (bh = lru_list[i]; buffers[i] > 0; bh = tmp, buffers[i]--)
 575                  {
 576                          if(buffers[i] < 0) panic("Here is the problem");
 577                          tmp = bh->b_next_free;
 578                          if (!bh) break;
 579                          
 580                          if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 581                              bh->b_dirt) {
 582                                  refile_buffer(bh);
 583                                  continue;
 584                          };
 585                          
 586                          if (bh->b_count || bh->b_size != size)
 587                                   continue;
 588                          
 589                          /* Buffers are written in the order they are placed 
 590                             on the locked list. If we encounter a locked
 591                             buffer here, this means that the rest of them
 592                             are also locked */
 593                          if(bh->b_lock && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 594                                  buffers[i] = 0;
 595                                  break;
 596                          }
 597                          
 598                          if (BADNESS(bh)) continue;
 599                          break;
 600                  };
 601                 if(!buffers[i]) candidate[i] = NULL; /* Nothing on this list */
 602                 else candidate[i] = bh;
 603                 if(candidate[i] && candidate[i]->b_count) panic("Here is the problem");
 604         }
 605         
 606  repeat:
 607         if(needed <= 0) return;
 608         
 609         /* Now see which candidate wins the election */
 610         
 611         winner = best_time = UINT_MAX;  
 612         for(i=0; i<NR_LIST; i++){
 613                 if(!candidate[i]) continue;
 614                 if(candidate[i]->b_lru_time < best_time){
 615                         best_time = candidate[i]->b_lru_time;
 616                         winner = i;
 617                 }
 618         }
 619         
 620         /* If we have a winner, use it, and then get a new candidate from that list */
 621         if(winner != UINT_MAX) {
 622                 i = winner;
 623                 bh = candidate[i];
 624                 candidate[i] = bh->b_next_free;
 625                 if(candidate[i] == bh) candidate[i] = NULL;  /* Got last one */
 626                 if (bh->b_count || bh->b_size != size)
 627                          panic("Busy buffer in candidate list\n");
 628                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1)
 629                          panic("Shared buffer in candidate list\n");
 630                 if (BADNESS(bh)) panic("Buffer in candidate list with BADNESS != 0\n");
 631                 
 632                 if(bh->b_dev == B_FREE)
 633                         panic("Wrong list");
 634                 remove_from_queues(bh);
 635                 bh->b_dev = B_FREE;
 636                 put_last_free(bh);
 637                 needed -= bh->b_size;
 638                 buffers[i]--;
 639                 if(buffers[i] < 0) panic("Here is the problem");
 640                 
 641                 if(buffers[i] == 0) candidate[i] = NULL;
 642                 
 643                 /* Now all we need to do is advance the candidate pointer
 644                    from the winner list to the next usable buffer */
 645                 if(candidate[i] && buffers[i] > 0){
 646                         if(buffers[i] <= 0) panic("Here is another problem");
 647                         for (bh = candidate[i]; buffers[i] > 0; bh = tmp, buffers[i]--) {
 648                                 if(buffers[i] < 0) panic("Here is the problem");
 649                                 tmp = bh->b_next_free;
 650                                 if (!bh) break;
 651                                 
 652                                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 653                                     bh->b_dirt) {
 654                                         refile_buffer(bh);
 655                                         continue;
 656                                 };
 657                                 
 658                                 if (bh->b_count || bh->b_size != size)
 659                                          continue;
 660                                 
 661                                 /* Buffers are written in the order they are
 662                                    placed on the locked list.  If we encounter
 663                                    a locked buffer here, this means that the
 664                                    rest of them are also locked */
 665                                 if(bh->b_lock && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 666                                         buffers[i] = 0;
 667                                         break;
 668                                 }
 669               
 670                                 if (BADNESS(bh)) continue;
 671                                 break;
 672                         };
 673                         if(!buffers[i]) candidate[i] = NULL; /* Nothing here */
 674                         else candidate[i] = bh;
 675                         if(candidate[i] && candidate[i]->b_count) 
 676                                  panic("Here is the problem");
 677                 }
 678                 
 679                 goto repeat;
 680         }
 681         
 682         if(needed <= 0) return;
 683         
 684         /* Too bad, that was not enough. Try a little harder to grow some. */
 685         
 686         if (nr_free_pages > min_free_pages + 5) {
 687                 if (grow_buffers(GFP_BUFFER, size)) {
 688                         needed -= PAGE_SIZE;
 689                         goto repeat0;
 690                 };
 691         }
 692         
 693         /* and repeat until we find something good */
 694         if (!grow_buffers(GFP_ATOMIC, size))
 695                 wakeup_bdflush(1);
 696         needed -= PAGE_SIZE;
 697         goto repeat0;
 698 }
 699 
 700 /*
 701  * Ok, this is getblk, and it isn't very clear, again to hinder
 702  * race-conditions. Most of the code is seldom used, (ie repeating),
 703  * so it should be much more efficient than it looks.
 704  *
 705  * The algorithm is changed: hopefully better, and an elusive bug removed.
 706  *
 707  * 14.02.92: changed it to sync dirty buffers a bit: better performance
 708  * when the filesystem starts to get full of dirty blocks (I hope).
 709  */
 710 struct buffer_head * getblk(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 711 {
 712         struct buffer_head * bh;
 713         int isize = BUFSIZE_INDEX(size);
 714 
 715         /* Update this for the buffer size lav. */
 716         buffer_usage[isize]++;
 717 
 718         /* If there are too many dirty buffers, we wake up the update process
 719            now so as to ensure that there are still clean buffers available
 720            for user processes to use (and dirty) */
 721 repeat:
 722         bh = get_hash_table(dev, block, size);
 723         if (bh) {
 724                 if (bh->b_uptodate && !bh->b_dirt)
 725                          put_last_lru(bh);
 726                 if(!bh->b_dirt) bh->b_flushtime = 0;
 727                 bh->b_touched = 1;
 728                 return bh;
 729         }
 730 
 731         while(!free_list[isize]) refill_freelist(size);
 732         
 733         if (find_buffer(dev,block,size))
 734                  goto repeat;
 735 
 736         bh = free_list[isize];
 737         remove_from_free_list(bh);
 738 
 739 /* OK, FINALLY we know that this buffer is the only one of its kind, */
 740 /* and that it's unused (b_count=0), unlocked (b_lock=0), and clean */
 741         bh->b_count=1;
 742         bh->b_dirt=0;
 743         bh->b_lock=0;
 744         bh->b_uptodate=0;
 745         bh->b_flushtime=0;
 746         bh->b_req=0;
 747         bh->b_reuse=0;
 748         bh->b_touched = 1;
 749         bh->b_has_aged = 0;
 750         bh->b_dev=dev;
 751         bh->b_blocknr=block;
 752         insert_into_queues(bh);
 753         return bh;
 754 }
 755 
 756 void set_writetime(struct buffer_head * buf, int flag)
     /* [previous][next][first][last][top][bottom][index][help] */
 757 {
 758         int newtime;
 759 
 760         if (buf->b_dirt){
 761                 /* Move buffer to dirty list if jiffies is clear */
 762                 newtime = jiffies + (flag ? bdf_prm.b_un.age_super : 
 763                                      bdf_prm.b_un.age_buffer);
 764                 if(!buf->b_flushtime || buf->b_flushtime > newtime)
 765                          buf->b_flushtime = newtime;
 766         } else {
 767                 buf->b_flushtime = 0;
 768         }
 769 }
 770 
 771 
 772 void refile_buffer(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 773 {
 774         int dispose;
 775 
 776         if(buf->b_dev == B_FREE) {
 777                 printk("Attempt to refile free buffer\n");
 778                 return;
 779         }
 780         if (buf->b_dirt)
 781                 dispose = BUF_DIRTY;
 782         else if (mem_map[MAP_NR((unsigned long) buf->b_data)].count > 1)
 783                 dispose = BUF_SHARED;
 784         else if (buf->b_lock)
 785                 dispose = BUF_LOCKED;
 786         else if (buf->b_list == BUF_SHARED)
 787                 dispose = BUF_UNSHARED;
 788         else
 789                 dispose = BUF_CLEAN;
 790         if(dispose == BUF_CLEAN) buf->b_lru_time = jiffies;
 791         if(dispose != buf->b_list)  {
 792                 if(dispose == BUF_DIRTY || dispose == BUF_UNSHARED)
 793                          buf->b_lru_time = jiffies;
 794                 if(dispose == BUF_LOCKED && 
 795                    (buf->b_flushtime - buf->b_lru_time) <= bdf_prm.b_un.age_super)
 796                          dispose = BUF_LOCKED1;
 797                 remove_from_queues(buf);
 798                 buf->b_list = dispose;
 799                 insert_into_queues(buf);
 800                 if(dispose == BUF_DIRTY && nr_buffers_type[BUF_DIRTY] > 
 801                    (nr_buffers - nr_buffers_type[BUF_SHARED]) *
 802                    bdf_prm.b_un.nfract/100)
 803                          wakeup_bdflush(0);
 804         }
 805 }
 806 
 807 /*
 808  * Release a buffer head
 809  */
 810 void __brelse(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 811 {
 812         wait_on_buffer(buf);
 813 
 814         /* If dirty, mark the time this buffer should be written back */
 815         set_writetime(buf, 0);
 816         refile_buffer(buf);
 817 
 818         if (buf->b_count) {
 819                 if (!--buf->b_count)
 820                         wake_up(&buffer_wait);
 821                 return;
 822         }
 823         printk("VFS: brelse: Trying to free free buffer\n");
 824 }
 825 
 826 /*
 827  * bforget() is like brelse(), except is throws the buffer away
 828  */
 829 void __bforget(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 830 {
 831         wait_on_buffer(buf);
 832         if (buf->b_count != 1) {
 833                 printk("Aieee... bforget(): count = %d\n", buf->b_count);
 834                 return;
 835         }
 836         if (mem_map[MAP_NR(buf->b_data)].count != 1) {
 837                 printk("Aieee... bforget(): shared buffer\n");
 838                 return;
 839         }
 840         mark_buffer_clean(buf);
 841         buf->b_count = 0;
 842         remove_from_queues(buf);
 843         buf->b_dev = B_FREE;
 844         put_last_free(buf);
 845         wake_up(&buffer_wait);
 846 }
 847 
 848 /*
 849  * bread() reads a specified block and returns the buffer that contains
 850  * it. It returns NULL if the block was unreadable.
 851  */
 852 struct buffer_head * bread(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 853 {
 854         struct buffer_head * bh;
 855 
 856         if (!(bh = getblk(dev, block, size))) {
 857                 printk("VFS: bread: READ error on device %s\n",
 858                         kdevname(dev));
 859                 return NULL;
 860         }
 861         if (bh->b_uptodate)
 862                 return bh;
 863         ll_rw_block(READ, 1, &bh);
 864         wait_on_buffer(bh);
 865         if (bh->b_uptodate)
 866                 return bh;
 867         brelse(bh);
 868         return NULL;
 869 }
 870 
 871 /*
 872  * Ok, breada can be used as bread, but additionally to mark other
 873  * blocks for reading as well. End the argument list with a negative
 874  * number.
 875  */
 876 
 877 #define NBUF 16
 878 
 879 struct buffer_head * breada(kdev_t dev, int block, int bufsize,
     /* [previous][next][first][last][top][bottom][index][help] */
 880         unsigned int pos, unsigned int filesize)
 881 {
 882         struct buffer_head * bhlist[NBUF];
 883         unsigned int blocks;
 884         struct buffer_head * bh;
 885         int index;
 886         int i, j;
 887 
 888         if (pos >= filesize)
 889                 return NULL;
 890 
 891         if (block < 0 || !(bh = getblk(dev,block,bufsize)))
 892                 return NULL;
 893 
 894         index = BUFSIZE_INDEX(bh->b_size);
 895 
 896         if (bh->b_uptodate)
 897                 return bh;
 898 
 899         blocks = ((filesize & (bufsize - 1)) - (pos & (bufsize - 1))) >> (9+index);
 900 
 901         if (blocks > (read_ahead[MAJOR(dev)] >> index))
 902                 blocks = read_ahead[MAJOR(dev)] >> index;
 903         if (blocks > NBUF)
 904                 blocks = NBUF;
 905         
 906         bhlist[0] = bh;
 907         j = 1;
 908         for(i=1; i<blocks; i++) {
 909                 bh = getblk(dev,block+i,bufsize);
 910                 if (bh->b_uptodate) {
 911                         brelse(bh);
 912                         break;
 913                 }
 914                 bhlist[j++] = bh;
 915         }
 916 
 917         /* Request the read for these buffers, and then release them */
 918         ll_rw_block(READ, j, bhlist);
 919 
 920         for(i=1; i<j; i++)
 921                 brelse(bhlist[i]);
 922 
 923         /* Wait for this buffer, and then continue on */
 924         bh = bhlist[0];
 925         wait_on_buffer(bh);
 926         if (bh->b_uptodate)
 927                 return bh;
 928         brelse(bh);
 929         return NULL;
 930 }
 931 
 932 /*
 933  * See fs/inode.c for the weird use of volatile..
 934  */
 935 static void put_unused_buffer_head(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 936 {
 937         struct wait_queue * wait;
 938 
 939         wait = ((volatile struct buffer_head *) bh)->b_wait;
 940         memset(bh,0,sizeof(*bh));
 941         ((volatile struct buffer_head *) bh)->b_wait = wait;
 942         bh->b_next_free = unused_list;
 943         unused_list = bh;
 944 }
 945 
 946 static void get_more_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 947 {
 948         int i;
 949         struct buffer_head * bh;
 950 
 951         if (unused_list)
 952                 return;
 953 
 954         if (!(bh = (struct buffer_head*) get_free_page(GFP_BUFFER)))
 955                 return;
 956 
 957         for (nr_buffer_heads+=i=PAGE_SIZE/sizeof*bh ; i>0; i--) {
 958                 bh->b_next_free = unused_list;  /* only make link */
 959                 unused_list = bh++;
 960         }
 961 }
 962 
 963 static struct buffer_head * get_unused_buffer_head(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 964 {
 965         struct buffer_head * bh;
 966 
 967         get_more_buffer_heads();
 968         if (!unused_list)
 969                 return NULL;
 970         bh = unused_list;
 971         unused_list = bh->b_next_free;
 972         bh->b_next_free = NULL;
 973         bh->b_data = NULL;
 974         bh->b_size = 0;
 975         bh->b_req = 0;
 976         return bh;
 977 }
 978 
 979 /*
 980  * Create the appropriate buffers when given a page for data area and
 981  * the size of each buffer.. Use the bh->b_this_page linked list to
 982  * follow the buffers created.  Return NULL if unable to create more
 983  * buffers.
 984  */
 985 static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
 986 {
 987         struct buffer_head *bh, *head;
 988         unsigned long offset;
 989 
 990         head = NULL;
 991         offset = PAGE_SIZE;
 992         while ((offset -= size) < PAGE_SIZE) {
 993                 bh = get_unused_buffer_head();
 994                 if (!bh)
 995                         goto no_grow;
 996                 bh->b_this_page = head;
 997                 head = bh;
 998                 bh->b_data = (char *) (page+offset);
 999                 bh->b_size = size;
1000                 bh->b_dev = B_FREE;  /* Flag as unused */
1001         }
1002         return head;
1003 /*
1004  * In case anything failed, we just free everything we got.
1005  */
1006 no_grow:
1007         bh = head;
1008         while (bh) {
1009                 head = bh;
1010                 bh = bh->b_this_page;
1011                 put_unused_buffer_head(head);
1012         }
1013         return NULL;
1014 }
1015 
1016 static void read_buffers(struct buffer_head * bh[], int nrbuf)
     /* [previous][next][first][last][top][bottom][index][help] */
1017 {
1018         int i;
1019         int bhnum = 0;
1020         struct buffer_head * bhr[MAX_BUF_PER_PAGE];
1021 
1022         for (i = 0 ; i < nrbuf ; i++) {
1023                 if (bh[i] && !bh[i]->b_uptodate)
1024                         bhr[bhnum++] = bh[i];
1025         }
1026         if (bhnum)
1027                 ll_rw_block(READ, bhnum, bhr);
1028         for (i = nrbuf ; --i >= 0 ; ) {
1029                 if (bh[i]) {
1030                         wait_on_buffer(bh[i]);
1031                 }
1032         }
1033 }
1034 
1035 /*
1036  * This actually gets enough info to try to align the stuff,
1037  * but we don't bother yet.. We'll have to check that nobody
1038  * else uses the buffers etc.
1039  *
1040  * "address" points to the new page we can use to move things
1041  * around..
1042  */
1043 static inline unsigned long try_to_align(struct buffer_head ** bh, int nrbuf,
     /* [previous][next][first][last][top][bottom][index][help] */
1044         unsigned long address)
1045 {
1046         while (nrbuf-- > 0)
1047                 brelse(bh[nrbuf]);
1048         return 0;
1049 }
1050 
1051 static unsigned long check_aligned(struct buffer_head * first, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
1052         kdev_t dev, int *b, int size)
1053 {
1054         struct buffer_head * bh[MAX_BUF_PER_PAGE];
1055         unsigned long page;
1056         unsigned long offset;
1057         int block;
1058         int nrbuf;
1059         int aligned = 1;
1060 
1061         bh[0] = first;
1062         nrbuf = 1;
1063         page = (unsigned long) first->b_data;
1064         if (page & ~PAGE_MASK)
1065                 aligned = 0;
1066         for (offset = size ; offset < PAGE_SIZE ; offset += size) {
1067                 block = *++b;
1068                 if (!block)
1069                         goto no_go;
1070                 first = get_hash_table(dev, block, size);
1071                 if (!first)
1072                         goto no_go;
1073                 bh[nrbuf++] = first;
1074                 if (page+offset != (unsigned long) first->b_data)
1075                         aligned = 0;
1076         }
1077         if (!aligned)
1078                 return try_to_align(bh, nrbuf, address);
1079         mem_map[MAP_NR(page)].count++;
1080         read_buffers(bh,nrbuf);         /* make sure they are actually read correctly */
1081         while (nrbuf-- > 0)
1082                 brelse(bh[nrbuf]);
1083         free_page(address);
1084         ++current->min_flt;
1085         return page;
1086 no_go:
1087         while (nrbuf-- > 0)
1088                 brelse(bh[nrbuf]);
1089         return 0;
1090 }
1091 
1092 static unsigned long try_to_load_aligned(unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
1093         kdev_t dev, int b[], int size)
1094 {
1095         struct buffer_head * bh, * tmp, * arr[MAX_BUF_PER_PAGE];
1096         unsigned long offset;
1097         int isize = BUFSIZE_INDEX(size);
1098         int * p;
1099         int block;
1100 
1101         bh = create_buffers(address, size);
1102         if (!bh)
1103                 return 0;
1104         /* do any of the buffers already exist? punt if so.. */
1105         p = b;
1106         for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
1107                 block = *(p++);
1108                 if (!block)
1109                         goto not_aligned;
1110                 if (find_buffer(dev, block, size))
1111                         goto not_aligned;
1112         }
1113         tmp = bh;
1114         p = b;
1115         block = 0;
1116         while (1) {
1117                 arr[block++] = bh;
1118                 bh->b_count = 1;
1119                 bh->b_dirt = 0;
1120                 bh->b_reuse = 0;
1121                 bh->b_flushtime = 0;
1122                 bh->b_uptodate = 0;
1123                 bh->b_req = 0;
1124                 bh->b_dev = dev;
1125                 bh->b_blocknr = *(p++);
1126                 bh->b_list = BUF_CLEAN;
1127                 nr_buffers++;
1128                 nr_buffers_size[isize]++;
1129                 insert_into_queues(bh);
1130                 if (bh->b_this_page)
1131                         bh = bh->b_this_page;
1132                 else
1133                         break;
1134         }
1135         buffermem += PAGE_SIZE;
1136         bh->b_this_page = tmp;
1137         mem_map[MAP_NR(address)].count++;
1138         buffer_pages[MAP_NR(address)] = bh;
1139         read_buffers(arr,block);
1140         while (block-- > 0)
1141                 brelse(arr[block]);
1142         ++current->maj_flt;
1143         return address;
1144 not_aligned:
1145         while ((tmp = bh) != NULL) {
1146                 bh = bh->b_this_page;
1147                 put_unused_buffer_head(tmp);
1148         }
1149         return 0;
1150 }
1151 
1152 /*
1153  * Try-to-share-buffers tries to minimize memory use by trying to keep
1154  * both code pages and the buffer area in the same page. This is done by
1155  * (a) checking if the buffers are already aligned correctly in memory and
1156  * (b) if none of the buffer heads are in memory at all, trying to load
1157  * them into memory the way we want them.
1158  *
1159  * This doesn't guarantee that the memory is shared, but should under most
1160  * circumstances work very well indeed (ie >90% sharing of code pages on
1161  * demand-loadable executables).
1162  */
1163 static inline unsigned long try_to_share_buffers(unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
1164         kdev_t dev, int *b, int size)
1165 {
1166         struct buffer_head * bh;
1167         int block;
1168 
1169         block = b[0];
1170         if (!block)
1171                 return 0;
1172         bh = get_hash_table(dev, block, size);
1173         if (bh)
1174                 return check_aligned(bh, address, dev, b, size);
1175         return try_to_load_aligned(address, dev, b, size);
1176 }
1177 
1178 /*
1179  * bread_page reads four buffers into memory at the desired address. It's
1180  * a function of its own, as there is some speed to be got by reading them
1181  * all at the same time, not waiting for one to be read, and then another
1182  * etc. This also allows us to optimize memory usage by sharing code pages
1183  * and filesystem buffers..
1184  */
1185 unsigned long bread_page(unsigned long address, kdev_t dev, int b[], int size, int no_share)
     /* [previous][next][first][last][top][bottom][index][help] */
1186 {
1187         struct buffer_head * bh[MAX_BUF_PER_PAGE];
1188         unsigned long where;
1189         int i, j;
1190 
1191         if (!no_share) {
1192                 where = try_to_share_buffers(address, dev, b, size);
1193                 if (where)
1194                         return where;
1195         }
1196         ++current->maj_flt;
1197         for (i=0, j=0; j<PAGE_SIZE ; i++, j+= size) {
1198                 bh[i] = NULL;
1199                 if (b[i])
1200                         bh[i] = getblk(dev, b[i], size);
1201         }
1202         read_buffers(bh,i);
1203         where = address;
1204         for (i=0, j=0; j<PAGE_SIZE ; i++, j += size, where += size) {
1205                 if (bh[i]) {
1206                         if (bh[i]->b_uptodate)
1207                                 memcpy((void *) where, bh[i]->b_data, size);
1208                         brelse(bh[i]);
1209                 } else
1210                         memset((void *) where, 0, size);
1211         }
1212         return address;
1213 }
1214 
1215 #if 0
1216 /*
1217  * bwrite_page writes a page out to the buffer cache and/or the physical device.
1218  * It's used for mmap writes (the same way bread_page() is used for mmap reads).
1219  */
1220 void bwrite_page(unsigned long address, kdev_t dev, int b[], int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1221 {
1222         struct buffer_head * bh[MAX_BUF_PER_PAGE];
1223         int i, j;
1224 
1225         for (i=0, j=0; j<PAGE_SIZE ; i++, j+= size) {
1226                 bh[i] = NULL;
1227                 if (b[i])
1228                         bh[i] = getblk(dev, b[i], size);
1229         }
1230         for (i=0, j=0; j<PAGE_SIZE ; i++, j += size, address += size) {
1231                 if (bh[i]) {
1232                         memcpy(bh[i]->b_data, (void *) address, size);
1233                         bh[i]->b_uptodate = 1;
1234                         mark_buffer_dirty(bh[i], 0);
1235                         brelse(bh[i]);
1236                 } else
1237                         memset((void *) address, 0, size); /* ???!?!! */
1238         }       
1239 }
1240 #endif
1241 
1242 /*
1243  * Try to increase the number of buffers available: the size argument
1244  * is used to determine what kind of buffers we want.
1245  */
1246 static int grow_buffers(int pri, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1247 {
1248         unsigned long page;
1249         struct buffer_head *bh, *tmp;
1250         struct buffer_head * insert_point;
1251         int isize;
1252 
1253         if ((size & 511) || (size > PAGE_SIZE)) {
1254                 printk("VFS: grow_buffers: size = %d\n",size);
1255                 return 0;
1256         }
1257 
1258         isize = BUFSIZE_INDEX(size);
1259 
1260         if (!(page = __get_free_page(pri)))
1261                 return 0;
1262         bh = create_buffers(page, size);
1263         if (!bh) {
1264                 free_page(page);
1265                 return 0;
1266         }
1267 
1268         insert_point = free_list[isize];
1269 
1270         tmp = bh;
1271         while (1) {
1272                 nr_free[isize]++;
1273                 if (insert_point) {
1274                         tmp->b_next_free = insert_point->b_next_free;
1275                         tmp->b_prev_free = insert_point;
1276                         insert_point->b_next_free->b_prev_free = tmp;
1277                         insert_point->b_next_free = tmp;
1278                 } else {
1279                         tmp->b_prev_free = tmp;
1280                         tmp->b_next_free = tmp;
1281                 }
1282                 insert_point = tmp;
1283                 ++nr_buffers;
1284                 if (tmp->b_this_page)
1285                         tmp = tmp->b_this_page;
1286                 else
1287                         break;
1288         }
1289         free_list[isize] = bh;
1290         buffer_pages[MAP_NR(page)] = bh;
1291         tmp->b_this_page = bh;
1292         wake_up(&buffer_wait);
1293         buffermem += PAGE_SIZE;
1294         return 1;
1295 }
1296 
1297 
1298 /* =========== Reduce the buffer memory ============= */
1299 
1300 /*
1301  * try_to_free() checks if all the buffers on this particular page
1302  * are unused, and free's the page if so.
1303  */
1304 static int try_to_free(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1305                        int priority)
1306 {
1307         unsigned long page;
1308         struct buffer_head * tmp, * p;
1309         int isize = BUFSIZE_INDEX(bh->b_size);
1310 
1311         *bhp = bh;
1312         page = (unsigned long) bh->b_data;
1313         page &= PAGE_MASK;
1314         tmp = bh;
1315         do {
1316                 if (!tmp)
1317                         return 0;
1318                 if (tmp->b_count || tmp->b_dirt || tmp->b_lock || tmp->b_wait)
1319                         return 0;
1320                 if (priority && tmp->b_touched)
1321                         return 0;
1322                 tmp = tmp->b_this_page;
1323         } while (tmp != bh);
1324         tmp = bh;
1325         do {
1326                 p = tmp;
1327                 tmp = tmp->b_this_page;
1328                 nr_buffers--;
1329                 nr_buffers_size[isize]--;
1330                 if (p == *bhp)
1331                   {
1332                     *bhp = p->b_prev_free;
1333                     if (p == *bhp) /* Was this the last in the list? */
1334                       *bhp = NULL;
1335                   }
1336                 remove_from_queues(p);
1337                 put_unused_buffer_head(p);
1338         } while (tmp != bh);
1339         buffermem -= PAGE_SIZE;
1340         buffer_pages[MAP_NR(page)] = NULL;
1341         free_page(page);
1342         return !mem_map[MAP_NR(page)].count;
1343 }
1344 
1345 /* Age buffers on a given page, according to whether they have been
1346    visited recently or not. */
1347 static inline void age_buffer(struct buffer_head *bh)
     /* [previous][next][first][last][top][bottom][index][help] */
1348 {
1349         struct buffer_head *tmp = bh;
1350         int touched = 0;
1351 
1352         /*
1353          * When we age a page, we mark all other buffers in the page
1354          * with the "has_aged" flag.  Then, when these aliased buffers
1355          * come up for aging, we skip them until next pass.  This
1356          * ensures that a page full of multiple buffers only gets aged
1357          * once per pass through the lru lists. 
1358          */
1359         if (bh->b_has_aged) {
1360                 bh->b_has_aged = 0;
1361                 return;
1362         }
1363         
1364         do {
1365                 touched |= tmp->b_touched;
1366                 tmp->b_touched = 0;
1367                 tmp = tmp->b_this_page;
1368                 tmp->b_has_aged = 1;
1369         } while (tmp != bh);
1370         bh->b_has_aged = 0;
1371 
1372         if (touched) 
1373                 touch_page((unsigned long) bh->b_data);
1374         else
1375                 age_page((unsigned long) bh->b_data);
1376 }
1377 
1378 /*
1379  * Consult the load average for buffers and decide whether or not
1380  * we should shrink the buffers of one size or not.  If we decide yes,
1381  * do it and return 1.  Else return 0.  Do not attempt to shrink size
1382  * that is specified.
1383  *
1384  * I would prefer not to use a load average, but the way things are now it
1385  * seems unavoidable.  The way to get rid of it would be to force clustering
1386  * universally, so that when we reclaim buffers we always reclaim an entire
1387  * page.  Doing this would mean that we all need to move towards QMAGIC.
1388  */
1389 
1390 static int maybe_shrink_lav_buffers(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1391 {          
1392         int nlist;
1393         int isize;
1394         int total_lav, total_n_buffers, n_sizes;
1395         
1396         /* Do not consider the shared buffers since they would not tend
1397            to have getblk called very often, and this would throw off
1398            the lav.  They are not easily reclaimable anyway (let the swapper
1399            make the first move). */
1400   
1401         total_lav = total_n_buffers = n_sizes = 0;
1402         for(nlist = 0; nlist < NR_SIZES; nlist++)
1403          {
1404                  total_lav += buffers_lav[nlist];
1405                  if(nr_buffers_size[nlist]) n_sizes++;
1406                  total_n_buffers += nr_buffers_size[nlist];
1407                  total_n_buffers -= nr_buffers_st[nlist][BUF_SHARED]; 
1408          }
1409         
1410         /* See if we have an excessive number of buffers of a particular
1411            size - if so, victimize that bunch. */
1412   
1413         isize = (size ? BUFSIZE_INDEX(size) : -1);
1414         
1415         if (n_sizes > 1)
1416                  for(nlist = 0; nlist < NR_SIZES; nlist++)
1417                   {
1418                           if(nlist == isize) continue;
1419                           if(nr_buffers_size[nlist] &&
1420                              bdf_prm.b_un.lav_const * buffers_lav[nlist]*total_n_buffers < 
1421                              total_lav * (nr_buffers_size[nlist] - nr_buffers_st[nlist][BUF_SHARED]))
1422                                    if(shrink_specific_buffers(6, bufferindex_size[nlist])) 
1423                                             return 1;
1424                   }
1425         return 0;
1426 }
1427 
1428 /*
1429  * Try to free up some pages by shrinking the buffer-cache
1430  *
1431  * Priority tells the routine how hard to try to shrink the
1432  * buffers: 6 means "don't bother too much", while a value
1433  * of 0 means "we'd better get some free pages now".
1434  *
1435  * "limit" is meant to limit the shrink-action only to pages
1436  * that are in the 0 - limit address range, for DMA re-allocations.
1437  * We ignore that right now.
1438  */
1439 int shrink_buffers(unsigned int priority, unsigned long limit)
     /* [previous][next][first][last][top][bottom][index][help] */
1440 {
1441         if (priority < 2) {
1442                 sync_buffers(0,0);
1443         }
1444 
1445         if(priority == 2) wakeup_bdflush(1);
1446 
1447         if(maybe_shrink_lav_buffers(0)) return 1;
1448 
1449         /* No good candidate size - take any size we can find */
1450         return shrink_specific_buffers(priority, 0);
1451 }
1452 
1453 static int shrink_specific_buffers(unsigned int priority, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1454 {
1455         struct buffer_head *bh;
1456         int nlist;
1457         int i, isize, isize1;
1458 
1459 #ifdef DEBUG
1460         if(size) printk("Shrinking buffers of size %d\n", size);
1461 #endif
1462         /* First try the free lists, and see if we can get a complete page
1463            from here */
1464         isize1 = (size ? BUFSIZE_INDEX(size) : -1);
1465 
1466         for(isize = 0; isize<NR_SIZES; isize++){
1467                 if(isize1 != -1 && isize1 != isize) continue;
1468                 bh = free_list[isize];
1469                 if(!bh) continue;
1470                 for (i=0 ; !i || bh != free_list[isize]; bh = bh->b_next_free, i++) {
1471                         if (bh->b_count || !bh->b_this_page)
1472                                  continue;
1473                         if (!age_of((unsigned long) bh->b_data) &&
1474                             try_to_free(bh, &bh, 6))
1475                                  return 1;
1476                         if(!bh) break;
1477                         /* Some interrupt must have used it after we
1478                            freed the page.  No big deal - keep looking */
1479                 }
1480         }
1481         
1482         /* Not enough in the free lists, now try the lru list */
1483         
1484         for(nlist = 0; nlist < NR_LIST; nlist++) {
1485         repeat1:
1486                 if(priority > 2 && nlist == BUF_SHARED) continue;
1487                 i = nr_buffers_type[nlist];
1488                 i = ((BUFFEROUT_WEIGHT * i) >> 10) >> priority;
1489                 for ( ; i > 0; i-- ) {
1490                         bh = next_to_age[nlist];
1491                         if (!bh)
1492                                 break;
1493                         next_to_age[nlist] = bh->b_next_free;
1494 
1495                         /* First, age the buffer. */
1496                         age_buffer(bh);
1497                         /* We may have stalled while waiting for I/O
1498                            to complete. */
1499                         if(bh->b_list != nlist) goto repeat1;
1500                         if (bh->b_count || !bh->b_this_page)
1501                                  continue;
1502                         if(size && bh->b_size != size) continue;
1503                         if (bh->b_lock)
1504                                  if (priority)
1505                                           continue;
1506                                  else
1507                                           wait_on_buffer(bh);
1508                         if (bh->b_dirt) {
1509                                 bh->b_count++;
1510                                 bh->b_flushtime = 0;
1511                                 ll_rw_block(WRITEA, 1, &bh);
1512                                 bh->b_count--;
1513                                 continue;
1514                         }
1515                         /* At priority 6, only consider really old
1516                            (age==0) buffers for reclaiming.  At
1517                            priority 0, consider any buffers. */
1518                         if ((age_of((unsigned long) bh->b_data) >>
1519                              (6-priority)) > 0)
1520                                 continue;                               
1521                         if (try_to_free(bh, &bh, 0))
1522                                  return 1;
1523                         if(!bh) break;
1524                 }
1525         }
1526         return 0;
1527 }
1528 
1529 
1530 /* ================== Debugging =================== */
1531 
1532 void show_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1533 {
1534         struct buffer_head * bh;
1535         int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
1536         int shared;
1537         int nlist, isize;
1538 
1539         printk("Buffer memory:   %6dkB\n",buffermem>>10);
1540         printk("Buffer heads:    %6d\n",nr_buffer_heads);
1541         printk("Buffer blocks:   %6d\n",nr_buffers);
1542 
1543         for(nlist = 0; nlist < NR_LIST; nlist++) {
1544           shared = found = locked = dirty = used = lastused = 0;
1545           bh = lru_list[nlist];
1546           if(!bh) continue;
1547           do {
1548                 found++;
1549                 if (bh->b_lock)
1550                         locked++;
1551                 if (bh->b_dirt)
1552                         dirty++;
1553                 if(mem_map[MAP_NR(((unsigned long) bh->b_data))].count !=1) shared++;
1554                 if (bh->b_count)
1555                         used++, lastused = found;
1556                 bh = bh->b_next_free;
1557               } while (bh != lru_list[nlist]);
1558         printk("Buffer[%d] mem: %d buffers, %d used (last=%d), %d locked, %d dirty %d shrd\n",
1559                 nlist, found, used, lastused, locked, dirty, shared);
1560         };
1561         printk("Size    [LAV]     Free  Clean  Unshar     Lck    Lck1   Dirty  Shared\n");
1562         for(isize = 0; isize<NR_SIZES; isize++){
1563                 printk("%5d [%5d]: %7d ", bufferindex_size[isize],
1564                        buffers_lav[isize], nr_free[isize]);
1565                 for(nlist = 0; nlist < NR_LIST; nlist++)
1566                          printk("%7d ", nr_buffers_st[isize][nlist]);
1567                 printk("\n");
1568         }
1569 }
1570 
1571 
1572 /* ====================== Cluster patches for ext2 ==================== */
1573 
1574 /*
1575  * try_to_reassign() checks if all the buffers on this particular page
1576  * are unused, and reassign to a new cluster them if this is true.
1577  */
1578 static inline int try_to_reassign(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1579                            kdev_t dev, unsigned int starting_block)
1580 {
1581         unsigned long page;
1582         struct buffer_head * tmp, * p;
1583 
1584         *bhp = bh;
1585         page = (unsigned long) bh->b_data;
1586         page &= PAGE_MASK;
1587         if(mem_map[MAP_NR(page)].count != 1) return 0;
1588         tmp = bh;
1589         do {
1590                 if (!tmp)
1591                          return 0;
1592                 
1593                 if (tmp->b_count || tmp->b_dirt || tmp->b_lock)
1594                          return 0;
1595                 tmp = tmp->b_this_page;
1596         } while (tmp != bh);
1597         tmp = bh;
1598         
1599         while((unsigned long) tmp->b_data & (PAGE_SIZE - 1)) 
1600                  tmp = tmp->b_this_page;
1601         
1602         /* This is the buffer at the head of the page */
1603         bh = tmp;
1604         do {
1605                 p = tmp;
1606                 tmp = tmp->b_this_page;
1607                 remove_from_queues(p);
1608                 p->b_dev = dev;
1609                 p->b_uptodate = 0;
1610                 p->b_req = 0;
1611                 p->b_blocknr = starting_block++;
1612                 insert_into_queues(p);
1613         } while (tmp != bh);
1614         return 1;
1615 }
1616 
1617 /*
1618  * Try to find a free cluster by locating a page where
1619  * all of the buffers are unused.  We would like this function
1620  * to be atomic, so we do not call anything that might cause
1621  * the process to sleep.  The priority is somewhat similar to
1622  * the priority used in shrink_buffers.
1623  * 
1624  * My thinking is that the kernel should end up using whole
1625  * pages for the buffer cache as much of the time as possible.
1626  * This way the other buffers on a particular page are likely
1627  * to be very near each other on the free list, and we will not
1628  * be expiring data prematurely.  For now we only cannibalize buffers
1629  * of the same size to keep the code simpler.
1630  */
1631 static int reassign_cluster(kdev_t dev, 
     /* [previous][next][first][last][top][bottom][index][help] */
1632                      unsigned int starting_block, int size)
1633 {
1634         struct buffer_head *bh;
1635         int isize = BUFSIZE_INDEX(size);
1636         int i;
1637 
1638         /* We want to give ourselves a really good shot at generating
1639            a cluster, and since we only take buffers from the free
1640            list, we "overfill" it a little. */
1641 
1642         while(nr_free[isize] < 32) refill_freelist(size);
1643 
1644         bh = free_list[isize];
1645         if(bh)
1646                  for (i=0 ; !i || bh != free_list[isize] ; bh = bh->b_next_free, i++) {
1647                          if (!bh->b_this_page)  continue;
1648                          if (try_to_reassign(bh, &bh, dev, starting_block))
1649                                  return 4;
1650                  }
1651         return 0;
1652 }
1653 
1654 /* This function tries to generate a new cluster of buffers
1655  * from a new page in memory.  We should only do this if we have
1656  * not expanded the buffer cache to the maximum size that we allow.
1657  */
1658 static unsigned long try_to_generate_cluster(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1659 {
1660         struct buffer_head * bh, * tmp, * arr[MAX_BUF_PER_PAGE];
1661         int isize = BUFSIZE_INDEX(size);
1662         unsigned long offset;
1663         unsigned long page;
1664         int nblock;
1665 
1666         page = get_free_page(GFP_NOBUFFER);
1667         if(!page) return 0;
1668 
1669         bh = create_buffers(page, size);
1670         if (!bh) {
1671                 free_page(page);
1672                 return 0;
1673         };
1674         nblock = block;
1675         for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
1676                 if (find_buffer(dev, nblock++, size))
1677                          goto not_aligned;
1678         }
1679         tmp = bh;
1680         nblock = 0;
1681         while (1) {
1682                 arr[nblock++] = bh;
1683                 bh->b_count = 1;
1684                 bh->b_dirt = 0;
1685                 bh->b_flushtime = 0;
1686                 bh->b_lock = 0;
1687                 bh->b_uptodate = 0;
1688                 bh->b_req = 0;
1689                 bh->b_dev = dev;
1690                 bh->b_list = BUF_CLEAN;
1691                 bh->b_blocknr = block++;
1692                 nr_buffers++;
1693                 nr_buffers_size[isize]++;
1694                 insert_into_queues(bh);
1695                 if (bh->b_this_page)
1696                         bh = bh->b_this_page;
1697                 else
1698                         break;
1699         }
1700         buffermem += PAGE_SIZE;
1701         buffer_pages[MAP_NR(page)] = bh;
1702         bh->b_this_page = tmp;
1703         while (nblock-- > 0)
1704                 brelse(arr[nblock]);
1705         return 4; /* ?? */
1706 not_aligned:
1707         while ((tmp = bh) != NULL) {
1708                 bh = bh->b_this_page;
1709                 put_unused_buffer_head(tmp);
1710         }
1711         free_page(page);
1712         return 0;
1713 }
1714 
1715 unsigned long generate_cluster(kdev_t dev, int b[], int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1716 {
1717         int i, offset;
1718         
1719         for (i = 0, offset = 0 ; offset < PAGE_SIZE ; i++, offset += size) {
1720                 if(i && b[i]-1 != b[i-1]) return 0;  /* No need to cluster */
1721                 if(find_buffer(dev, b[i], size)) return 0;
1722         };
1723 
1724         /* OK, we have a candidate for a new cluster */
1725         
1726         /* See if one size of buffer is over-represented in the buffer cache,
1727            if so reduce the numbers of buffers */
1728         if(maybe_shrink_lav_buffers(size))
1729          {
1730                  int retval;
1731                  retval = try_to_generate_cluster(dev, b[0], size);
1732                  if(retval) return retval;
1733          };
1734         
1735         if (nr_free_pages > min_free_pages*2) 
1736                  return try_to_generate_cluster(dev, b[0], size);
1737         else
1738                  return reassign_cluster(dev, b[0], size);
1739 }
1740 
1741 
1742 /* ===================== Init ======================= */
1743 
1744 /*
1745  * This initializes the initial buffer free list.  nr_buffers_type is set
1746  * to one less the actual number of buffers, as a sop to backwards
1747  * compatibility --- the old code did this (I think unintentionally,
1748  * but I'm not sure), and programs in the ps package expect it.
1749  *                                      - TYT 8/30/92
1750  */
1751 void buffer_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1752 {
1753         int i;
1754         int isize = BUFSIZE_INDEX(BLOCK_SIZE);
1755         long memsize = MAP_NR(high_memory) << PAGE_SHIFT;
1756 
1757         if (memsize >= 4*1024*1024) {
1758                 if(memsize >= 16*1024*1024)
1759                          nr_hash = 16381;
1760                 else
1761                          nr_hash = 4093;
1762         } else {
1763                 nr_hash = 997;
1764         };
1765         
1766         hash_table = (struct buffer_head **) vmalloc(nr_hash * 
1767                                                      sizeof(struct buffer_head *));
1768 
1769 
1770         buffer_pages = (struct buffer_head **) vmalloc(MAP_NR(high_memory) * 
1771                                                      sizeof(struct buffer_head *));
1772         for (i = 0 ; i < MAP_NR(high_memory) ; i++)
1773                 buffer_pages[i] = NULL;
1774 
1775         for (i = 0 ; i < nr_hash ; i++)
1776                 hash_table[i] = NULL;
1777         lru_list[BUF_CLEAN] = 0;
1778         grow_buffers(GFP_KERNEL, BLOCK_SIZE);
1779         if (!free_list[isize])
1780                 panic("VFS: Unable to initialize buffer free list!");
1781         return;
1782 }
1783 
1784 
1785 /* ====================== bdflush support =================== */
1786 
1787 /* This is a simple kernel daemon, whose job it is to provide a dynamically
1788  * response to dirty buffers.  Once this process is activated, we write back
1789  * a limited number of buffers to the disks and then go back to sleep again.
1790  * In effect this is a process which never leaves kernel mode, and does not have
1791  * any user memory associated with it except for the stack.  There is also
1792  * a kernel stack page, which obviously must be separate from the user stack.
1793  */
1794 struct wait_queue * bdflush_wait = NULL;
1795 struct wait_queue * bdflush_done = NULL;
1796 
1797 static int bdflush_running = 0;
1798 
1799 static void wakeup_bdflush(int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
1800 {
1801         if(!bdflush_running){
1802                 printk("Warning - bdflush not running\n");
1803                 sync_buffers(0,0);
1804                 return;
1805         };
1806         wake_up(&bdflush_wait);
1807         if(wait) sleep_on(&bdflush_done);
1808 }
1809 
1810 
1811 
1812 /* 
1813  * Here we attempt to write back old buffers.  We also try and flush inodes 
1814  * and supers as well, since this function is essentially "update", and 
1815  * otherwise there would be no way of ensuring that these quantities ever 
1816  * get written back.  Ideally, we would have a timestamp on the inodes
1817  * and superblocks so that we could write back only the old ones as well
1818  */
1819 
1820 asmlinkage int sync_old_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1821 {
1822         int i, isize;
1823         int ndirty, nwritten;
1824         int nlist;
1825         int ncount;
1826         struct buffer_head * bh, *next;
1827 
1828         sync_supers(0);
1829         sync_inodes(0);
1830 
1831         ncount = 0;
1832 #ifdef DEBUG
1833         for(nlist = 0; nlist < NR_LIST; nlist++)
1834 #else
1835         for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1836 #endif
1837         {
1838                 ndirty = 0;
1839                 nwritten = 0;
1840         repeat:
1841                 bh = lru_list[nlist];
1842                 if(bh) 
1843                          for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) {
1844                                  /* We may have stalled while waiting for I/O to complete. */
1845                                  if(bh->b_list != nlist) goto repeat;
1846                                  next = bh->b_next_free;
1847                                  if(!lru_list[nlist]) {
1848                                          printk("Dirty list empty %d\n", i);
1849                                          break;
1850                                  }
1851                                  
1852                                  /* Clean buffer on dirty list?  Refile it */
1853                                  if (nlist == BUF_DIRTY && !bh->b_dirt && !bh->b_lock)
1854                                   {
1855                                           refile_buffer(bh);
1856                                           continue;
1857                                   }
1858                                  
1859                                  if (bh->b_lock || !bh->b_dirt)
1860                                           continue;
1861                                  ndirty++;
1862                                  if(bh->b_flushtime > jiffies) continue;
1863                                  nwritten++;
1864                                  bh->b_count++;
1865                                  bh->b_flushtime = 0;
1866 #ifdef DEBUG
1867                                  if(nlist != BUF_DIRTY) ncount++;
1868 #endif
1869                                  ll_rw_block(WRITE, 1, &bh);
1870                                  bh->b_count--;
1871                          }
1872         }
1873 #ifdef DEBUG
1874         if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount);
1875         printk("Wrote %d/%d buffers\n", nwritten, ndirty);
1876 #endif
1877         
1878         /* We assume that we only come through here on a regular
1879            schedule, like every 5 seconds.  Now update load averages.  
1880            Shift usage counts to prevent overflow. */
1881         for(isize = 0; isize<NR_SIZES; isize++){
1882                 CALC_LOAD(buffers_lav[isize], bdf_prm.b_un.lav_const, buffer_usage[isize]);
1883                 buffer_usage[isize] = 0;
1884         };
1885         return 0;
1886 }
1887 
1888 
1889 /* This is the interface to bdflush.  As we get more sophisticated, we can
1890  * pass tuning parameters to this "process", to adjust how it behaves.  If you
1891  * invoke this again after you have done this once, you would simply modify 
1892  * the tuning parameters.  We would want to verify each parameter, however,
1893  * to make sure that it is reasonable. */
1894 
1895 asmlinkage int sys_bdflush(int func, long data)
     /* [previous][next][first][last][top][bottom][index][help] */
1896 {
1897         int i, error;
1898         int ndirty;
1899         int nlist;
1900         int ncount;
1901         struct buffer_head * bh, *next;
1902 
1903         if (!suser())
1904                 return -EPERM;
1905 
1906         if (func == 1)
1907                  return sync_old_buffers();
1908 
1909         /* Basically func 0 means start, 1 means read param 1, 2 means write param 1, etc */
1910         if (func >= 2) {
1911                 i = (func-2) >> 1;
1912                 if (i < 0 || i >= N_PARAM)
1913                         return -EINVAL;
1914                 if((func & 1) == 0) {
1915                         error = verify_area(VERIFY_WRITE, (void *) data, sizeof(int));
1916                         if (error)
1917                                 return error;
1918                         put_user(bdf_prm.data[i], (int*)data);
1919                         return 0;
1920                 };
1921                 if (data < bdflush_min[i] || data > bdflush_max[i])
1922                         return -EINVAL;
1923                 bdf_prm.data[i] = data;
1924                 return 0;
1925         };
1926         
1927         if (bdflush_running)
1928                 return -EBUSY; /* Only one copy of this running at one time */
1929         bdflush_running++;
1930         
1931         /* OK, from here on is the daemon */
1932         
1933         for (;;) {
1934 #ifdef DEBUG
1935                 printk("bdflush() activated...");
1936 #endif
1937                 
1938                 ncount = 0;
1939 #ifdef DEBUG
1940                 for(nlist = 0; nlist < NR_LIST; nlist++)
1941 #else
1942                 for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1943 #endif
1944                  {
1945                          ndirty = 0;
1946                  repeat:
1947                          bh = lru_list[nlist];
1948                          if(bh) 
1949                                   for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty; 
1950                                        bh = next) {
1951                                           /* We may have stalled while waiting for I/O to complete. */
1952                                           if(bh->b_list != nlist) goto repeat;
1953                                           next = bh->b_next_free;
1954                                           if(!lru_list[nlist]) {
1955                                                   printk("Dirty list empty %d\n", i);
1956                                                   break;
1957                                           }
1958                                           
1959                                           /* Clean buffer on dirty list?  Refile it */
1960                                           if (nlist == BUF_DIRTY && !bh->b_dirt && !bh->b_lock)
1961                                            {
1962                                                    refile_buffer(bh);
1963                                                    continue;
1964                                            }
1965                                           
1966                                           if (bh->b_lock || !bh->b_dirt)
1967                                                    continue;
1968                                           /* Should we write back buffers that are shared or not??
1969                                              currently dirty buffers are not shared, so it does not matter */
1970                                           bh->b_count++;
1971                                           ndirty++;
1972                                           bh->b_flushtime = 0;
1973                                           ll_rw_block(WRITE, 1, &bh);
1974 #ifdef DEBUG
1975                                           if(nlist != BUF_DIRTY) ncount++;
1976 #endif
1977                                           bh->b_count--;
1978                                   }
1979                  }
1980 #ifdef DEBUG
1981                 if (ncount) printk("sys_bdflush: %d dirty buffers not on dirty list\n", ncount);
1982                 printk("sleeping again.\n");
1983 #endif
1984                 wake_up(&bdflush_done);
1985                 
1986                 /* If there are still a lot of dirty buffers around, skip the sleep
1987                    and flush some more */
1988                 
1989                 if(nr_buffers_type[BUF_DIRTY] <= (nr_buffers - nr_buffers_type[BUF_SHARED]) * 
1990                    bdf_prm.b_un.nfract/100) {
1991                         if (current->signal & (1 << (SIGKILL-1))) {
1992                                 bdflush_running--;
1993                                 return 0;
1994                         }
1995                         current->signal = 0;
1996                         interruptible_sleep_on(&bdflush_wait);
1997                 }
1998         }
1999 }
2000 
2001 
2002 /*
2003  * Overrides for Emacs so that we follow Linus's tabbing style.
2004  * Emacs will notice this stuff at the end of the file and automatically
2005  * adjust the settings for this buffer only.  This must remain at the end
2006  * of the file.
2007  * ---------------------------------------------------------------------------
2008  * Local variables:
2009  * c-indent-level: 8
2010  * c-brace-imaginary-offset: 0
2011  * c-brace-offset: -8
2012  * c-argdecl-indent: 8
2013  * c-label-offset: -8
2014  * c-continued-statement-offset: 8
2015  * c-continued-brace-offset: 0
2016  * End:
2017  */

/* [previous][next][first][last][top][bottom][index][help] */