root/fs/buffer.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. __wait_on_buffer
  2. sync_buffers
  3. sync_dev
  4. fsync_dev
  5. sys_sync
  6. file_fsync
  7. sys_fsync
  8. invalidate_buffers
  9. remove_from_hash_queue
  10. remove_from_lru_list
  11. remove_from_free_list
  12. remove_from_queues
  13. put_last_lru
  14. put_last_free
  15. insert_into_queues
  16. find_buffer
  17. get_hash_table
  18. set_blocksize
  19. refill_freelist
  20. getblk
  21. set_writetime
  22. refile_buffer
  23. brelse
  24. bforget
  25. bread
  26. breada
  27. put_unused_buffer_head
  28. get_more_buffer_heads
  29. get_unused_buffer_head
  30. create_buffers
  31. read_buffers
  32. try_to_align
  33. check_aligned
  34. try_to_load_aligned
  35. try_to_share_buffers
  36. bread_page
  37. bwrite_page
  38. grow_buffers
  39. try_to_free
  40. age_buffer
  41. maybe_shrink_lav_buffers
  42. shrink_buffers
  43. shrink_specific_buffers
  44. show_buffers
  45. try_to_reassign
  46. reassign_cluster
  47. try_to_generate_cluster
  48. generate_cluster
  49. buffer_init
  50. wakeup_bdflush
  51. sync_old_buffers
  52. sys_bdflush

   1 /*
   2  *  linux/fs/buffer.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  *  'buffer.c' implements the buffer-cache functions. Race-conditions have
   9  * been avoided by NEVER letting an interrupt change a buffer (except for the
  10  * data, of course), but instead letting the caller do it.
  11  */
  12 
  13 /*
  14  * NOTE! There is one discordant note here: checking floppies for
  15  * disk change. This is where it fits best, I think, as it should
  16  * invalidate changed floppy-disk-caches.
  17  */
  18  
  19 #include <linux/sched.h>
  20 #include <linux/kernel.h>
  21 #include <linux/major.h>
  22 #include <linux/string.h>
  23 #include <linux/locks.h>
  24 #include <linux/errno.h>
  25 #include <linux/malloc.h>
  26 #include <linux/swapctl.h>
  27 
  28 #include <asm/system.h>
  29 #include <asm/segment.h>
  30 #include <asm/io.h>
  31 
  32 #define NR_SIZES 4
  33 static char buffersize_index[9] = {-1,  0,  1, -1,  2, -1, -1, -1, 3};
  34 static short int bufferindex_size[NR_SIZES] = {512, 1024, 2048, 4096};
  35 
  36 #define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9])
  37 #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
  38 
  39 static int grow_buffers(int pri, int size);
  40 static int shrink_specific_buffers(unsigned int priority, int size);
  41 static int maybe_shrink_lav_buffers(int);
  42 
  43 static int nr_hash = 0;  /* Size of hash table */
  44 static struct buffer_head ** hash_table;
  45 struct buffer_head ** buffer_pages;
  46 static struct buffer_head * lru_list[NR_LIST] = {NULL, };
  47 /* next_to_age is an array of pointers into the lru lists, used to
  48    cycle through the buffers aging their contents when deciding which
  49    buffers to discard when more memory is needed */
  50 static struct buffer_head * next_to_age[NR_LIST] = {NULL, };
  51 static struct buffer_head * free_list[NR_SIZES] = {NULL, };
  52 static struct buffer_head * unused_list = NULL;
  53 static struct wait_queue * buffer_wait = NULL;
  54 
  55 int nr_buffers = 0;
  56 int nr_buffers_type[NR_LIST] = {0,};
  57 int nr_buffers_size[NR_SIZES] = {0,};
  58 int nr_buffers_st[NR_SIZES][NR_LIST] = {{0,},};
  59 int buffer_usage[NR_SIZES] = {0,};  /* Usage counts used to determine load average */
  60 int buffers_lav[NR_SIZES] = {0,};  /* Load average of buffer usage */
  61 int nr_free[NR_SIZES] = {0,};
  62 int buffermem = 0;
  63 int nr_buffer_heads = 0;
  64 extern int *blksize_size[];
  65 
  66 /* Here is the parameter block for the bdflush process. */
  67 static void wakeup_bdflush(int);
  68 
  69 #define N_PARAM 9
  70 #define LAV
  71 
  72 static union bdflush_param{
  73         struct {
  74                 int nfract;  /* Percentage of buffer cache dirty to 
  75                                 activate bdflush */
  76                 int ndirty;  /* Maximum number of dirty blocks to write out per
  77                                 wake-cycle */
  78                 int nrefill; /* Number of clean buffers to try and obtain
  79                                 each time we call refill */
  80                 int nref_dirt; /* Dirty buffer threshold for activating bdflush
  81                                   when trying to refill buffers. */
  82                 int clu_nfract;  /* Percentage of buffer cache to scan to 
  83                                     search for free clusters */
  84                 int age_buffer;  /* Time for normal buffer to age before 
  85                                     we flush it */
  86                 int age_super;  /* Time for superblock to age before we 
  87                                    flush it */
  88                 int lav_const;  /* Constant used for load average (time
  89                                    constant */
  90                 int lav_ratio;  /* Used to determine how low a lav for a
  91                                    particular size can go before we start to
  92                                    trim back the buffers */
  93         } b_un;
  94         unsigned int data[N_PARAM];
  95 } bdf_prm = {{25, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}};
  96 
  97 /* The lav constant is set for 1 minute, as long as the update process runs
  98    every 5 seconds.  If you change the frequency of update, the time
  99    constant will also change. */
 100 
 101 
 102 /* These are the min and max parameter values that we will allow to be assigned */
 103 static int bdflush_min[N_PARAM] = {  0,  10,    5,   25,  0,   100,   100, 1, 1};
 104 static int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 60000, 60000, 2047, 5};
 105 
 106 /*
 107  * Rewrote the wait-routines to use the "new" wait-queue functionality,
 108  * and getting rid of the cli-sti pairs. The wait-queue routines still
 109  * need cli-sti, but now it's just a couple of 386 instructions or so.
 110  *
 111  * Note that the real wait_on_buffer() is an inline function that checks
 112  * if 'b_wait' is set before calling this, so that the queues aren't set
 113  * up unnecessarily.
 114  */
 115 void __wait_on_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 116 {
 117         struct wait_queue wait = { current, NULL };
 118 
 119         bh->b_count++;
 120         add_wait_queue(&bh->b_wait, &wait);
 121 repeat:
 122         current->state = TASK_UNINTERRUPTIBLE;
 123         if (bh->b_lock) {
 124                 schedule();
 125                 goto repeat;
 126         }
 127         remove_wait_queue(&bh->b_wait, &wait);
 128         bh->b_count--;
 129         current->state = TASK_RUNNING;
 130 }
 131 
 132 /* Call sync_buffers with wait!=0 to ensure that the call does not
 133    return until all buffer writes have completed.  Sync() may return
 134    before the writes have finished; fsync() may not. */
 135 
 136 
 137 /* Godamity-damn.  Some buffers (bitmaps for filesystems)
 138    spontaneously dirty themselves without ever brelse being called.
 139    We will ultimately want to put these in a separate list, but for
 140    now we search all of the lists for dirty buffers */
 141 
 142 static int sync_buffers(kdev_t dev, int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 143 {
 144         int i, retry, pass = 0, err = 0;
 145         int nlist, ncount;
 146         struct buffer_head * bh, *next;
 147 
 148         /* One pass for no-wait, three for wait:
 149            0) write out all dirty, unlocked buffers;
 150            1) write out all dirty buffers, waiting if locked;
 151            2) wait for completion by waiting for all buffers to unlock. */
 152  repeat:
 153         retry = 0;
 154  repeat2:
 155         ncount = 0;
 156         /* We search all lists as a failsafe mechanism, not because we expect
 157            there to be dirty buffers on any of the other lists. */
 158         for(nlist = 0; nlist < NR_LIST; nlist++)
 159          {
 160          repeat1:
 161                  bh = lru_list[nlist];
 162                  if(!bh) continue;
 163                  for (i = nr_buffers_type[nlist]*2 ; i-- > 0 ; bh = next) {
 164                          if(bh->b_list != nlist) goto repeat1;
 165                          next = bh->b_next_free;
 166                          if(!lru_list[nlist]) break;
 167                          if (dev && bh->b_dev != dev)
 168                                   continue;
 169                          if (bh->b_lock)
 170                           {
 171                                   /* Buffer is locked; skip it unless wait is
 172                                      requested AND pass > 0. */
 173                                   if (!wait || !pass) {
 174                                           retry = 1;
 175                                           continue;
 176                                   }
 177                                   wait_on_buffer (bh);
 178                                   goto repeat2;
 179                           }
 180                          /* If an unlocked buffer is not uptodate, there has
 181                              been an IO error. Skip it. */
 182                          if (wait && bh->b_req && !bh->b_lock &&
 183                              !bh->b_dirt && !bh->b_uptodate) {
 184                                   err = 1;
 185                                   continue;
 186                           }
 187                          /* Don't write clean buffers.  Don't write ANY buffers
 188                             on the third pass. */
 189                          if (!bh->b_dirt || pass>=2)
 190                                   continue;
 191                          /* don't bother about locked buffers */
 192                          if (bh->b_lock)
 193                                  continue;
 194                          bh->b_count++;
 195                          bh->b_flushtime = 0;
 196                          ll_rw_block(WRITE, 1, &bh);
 197 
 198                          if(nlist != BUF_DIRTY) { 
 199                                  printk("[%d %s %ld] ", nlist,
 200                                         kdevname(bh->b_dev), bh->b_blocknr);
 201                                  ncount++;
 202                          };
 203                          bh->b_count--;
 204                          retry = 1;
 205                  }
 206          }
 207         if (ncount)
 208           printk("sys_sync: %d dirty buffers not on dirty list\n", ncount);
 209         
 210         /* If we are waiting for the sync to succeed, and if any dirty
 211            blocks were written, then repeat; on the second pass, only
 212            wait for buffers being written (do not pass to write any
 213            more buffers on the second pass). */
 214         if (wait && retry && ++pass<=2)
 215                  goto repeat;
 216         return err;
 217 }
 218 
 219 void sync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 220 {
 221         sync_buffers(dev, 0);
 222         sync_supers(dev);
 223         sync_inodes(dev);
 224         sync_buffers(dev, 0);
 225 }
 226 
 227 int fsync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 228 {
 229         sync_buffers(dev, 0);
 230         sync_supers(dev);
 231         sync_inodes(dev);
 232         return sync_buffers(dev, 1);
 233 }
 234 
 235 asmlinkage int sys_sync(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 236 {
 237         fsync_dev(0);
 238         return 0;
 239 }
 240 
 241 int file_fsync (struct inode *inode, struct file *filp)
     /* [previous][next][first][last][top][bottom][index][help] */
 242 {
 243         return fsync_dev(inode->i_dev);
 244 }
 245 
 246 asmlinkage int sys_fsync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 247 {
 248         struct file * file;
 249         struct inode * inode;
 250 
 251         if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
 252                 return -EBADF;
 253         if (!file->f_op || !file->f_op->fsync)
 254                 return -EINVAL;
 255         if (file->f_op->fsync(inode,file))
 256                 return -EIO;
 257         return 0;
 258 }
 259 
 260 void invalidate_buffers(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 261 {
 262         int i;
 263         int nlist;
 264         struct buffer_head * bh;
 265 
 266         for(nlist = 0; nlist < NR_LIST; nlist++) {
 267                 bh = lru_list[nlist];
 268                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bh->b_next_free) {
 269                         if (bh->b_dev != dev)
 270                                 continue;
 271                         wait_on_buffer(bh);
 272                         if (bh->b_dev != dev)
 273                                 continue;
 274                         if (bh->b_count)
 275                                 continue;
 276                         bh->b_flushtime = bh->b_uptodate = 
 277                                 bh->b_dirt = bh->b_req = 0;
 278                 }
 279         }
 280 }
 281 
 282 #define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block))%nr_hash)
 283 #define hash(dev,block) hash_table[_hashfn(dev,block)]
 284 
 285 static inline void remove_from_hash_queue(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 286 {
 287         if (bh->b_next)
 288                 bh->b_next->b_prev = bh->b_prev;
 289         if (bh->b_prev)
 290                 bh->b_prev->b_next = bh->b_next;
 291         if (hash(bh->b_dev,bh->b_blocknr) == bh)
 292                 hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
 293         bh->b_next = bh->b_prev = NULL;
 294 }
 295 
 296 static inline void remove_from_lru_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 297 {
 298         if (!(bh->b_prev_free) || !(bh->b_next_free))
 299                 panic("VFS: LRU block list corrupted");
 300         if (bh->b_dev == B_FREE)
 301                 panic("LRU list corrupted");
 302         bh->b_prev_free->b_next_free = bh->b_next_free;
 303         bh->b_next_free->b_prev_free = bh->b_prev_free;
 304 
 305         if (lru_list[bh->b_list] == bh)
 306                  lru_list[bh->b_list] = bh->b_next_free;
 307         if (lru_list[bh->b_list] == bh)
 308                  lru_list[bh->b_list] = NULL;
 309         if (next_to_age[bh->b_list] == bh)
 310                 next_to_age[bh->b_list] = bh->b_next_free;
 311         if (next_to_age[bh->b_list] == bh)
 312                 next_to_age[bh->b_list] = NULL;
 313 
 314         bh->b_next_free = bh->b_prev_free = NULL;
 315 }
 316 
 317 static inline void remove_from_free_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 318 {
 319         int isize = BUFSIZE_INDEX(bh->b_size);
 320         if (!(bh->b_prev_free) || !(bh->b_next_free))
 321                 panic("VFS: Free block list corrupted");
 322         if(bh->b_dev != B_FREE)
 323                 panic("Free list corrupted");
 324         if(!free_list[isize])
 325                 panic("Free list empty");
 326         nr_free[isize]--;
 327         if(bh->b_next_free == bh)
 328                  free_list[isize] = NULL;
 329         else {
 330                 bh->b_prev_free->b_next_free = bh->b_next_free;
 331                 bh->b_next_free->b_prev_free = bh->b_prev_free;
 332                 if (free_list[isize] == bh)
 333                          free_list[isize] = bh->b_next_free;
 334         };
 335         bh->b_next_free = bh->b_prev_free = NULL;
 336 }
 337 
 338 static inline void remove_from_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 339 {
 340         if(bh->b_dev == B_FREE) {
 341                 remove_from_free_list(bh); /* Free list entries should not be
 342                                               in the hash queue */
 343                 return;
 344         };
 345         nr_buffers_type[bh->b_list]--;
 346         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]--;
 347         remove_from_hash_queue(bh);
 348         remove_from_lru_list(bh);
 349 }
 350 
 351 static inline void put_last_lru(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 352 {
 353         if (!bh)
 354                 return;
 355         if (bh == lru_list[bh->b_list]) {
 356                 lru_list[bh->b_list] = bh->b_next_free;
 357                 if (next_to_age[bh->b_list] == bh)
 358                         next_to_age[bh->b_list] = bh->b_next_free;
 359                 return;
 360         }
 361         if(bh->b_dev == B_FREE)
 362                 panic("Wrong block for lru list");
 363         remove_from_lru_list(bh);
 364 /* add to back of free list */
 365 
 366         if(!lru_list[bh->b_list]) {
 367                 lru_list[bh->b_list] = bh;
 368                 lru_list[bh->b_list]->b_prev_free = bh;
 369         };
 370         if (!next_to_age[bh->b_list])
 371                 next_to_age[bh->b_list] = bh;
 372 
 373         bh->b_next_free = lru_list[bh->b_list];
 374         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 375         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 376         lru_list[bh->b_list]->b_prev_free = bh;
 377 }
 378 
 379 static inline void put_last_free(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 380 {
 381         int isize;
 382         if (!bh)
 383                 return;
 384 
 385         isize = BUFSIZE_INDEX(bh->b_size);      
 386         bh->b_dev = B_FREE;  /* So it is obvious we are on the free list */
 387         /* add to back of free list */
 388         if(!free_list[isize]) {
 389                 free_list[isize] = bh;
 390                 bh->b_prev_free = bh;
 391         };
 392 
 393         nr_free[isize]++;
 394         bh->b_next_free = free_list[isize];
 395         bh->b_prev_free = free_list[isize]->b_prev_free;
 396         free_list[isize]->b_prev_free->b_next_free = bh;
 397         free_list[isize]->b_prev_free = bh;
 398 }
 399 
 400 static inline void insert_into_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 401 {
 402         /* put at end of free list */
 403         if(bh->b_dev == B_FREE) {
 404                 put_last_free(bh);
 405                 return;
 406         }
 407         if(!lru_list[bh->b_list]) {
 408                 lru_list[bh->b_list] = bh;
 409                 bh->b_prev_free = bh;
 410         }
 411         if (!next_to_age[bh->b_list])
 412                 next_to_age[bh->b_list] = bh;
 413         if (bh->b_next_free) panic("VFS: buffer LRU pointers corrupted");
 414         bh->b_next_free = lru_list[bh->b_list];
 415         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 416         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 417         lru_list[bh->b_list]->b_prev_free = bh;
 418         nr_buffers_type[bh->b_list]++;
 419         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]++;
 420 /* put the buffer in new hash-queue if it has a device */
 421         bh->b_prev = NULL;
 422         bh->b_next = NULL;
 423         if (!(bh->b_dev))
 424                 return;
 425         bh->b_next = hash(bh->b_dev,bh->b_blocknr);
 426         hash(bh->b_dev,bh->b_blocknr) = bh;
 427         if (bh->b_next)
 428                 bh->b_next->b_prev = bh;
 429 }
 430 
 431 static struct buffer_head * find_buffer(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 432 {               
 433         struct buffer_head * tmp;
 434 
 435         for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
 436                 if (tmp->b_dev == dev && tmp->b_blocknr == block)
 437                         if (tmp->b_size == size)
 438                                 return tmp;
 439                         else {
 440                                 printk("VFS: Wrong blocksize on device %s\n",
 441                                         kdevname(dev));
 442                                 return NULL;
 443                         }
 444         return NULL;
 445 }
 446 
 447 /*
 448  * Why like this, I hear you say... The reason is race-conditions.
 449  * As we don't lock buffers (unless we are reading them, that is),
 450  * something might happen to it while we sleep (ie a read-error
 451  * will force it bad). This shouldn't really happen currently, but
 452  * the code is ready.
 453  */
 454 struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 455 {
 456         struct buffer_head * bh;
 457 
 458         for (;;) {
 459                 if (!(bh=find_buffer(dev,block,size)))
 460                         return NULL;
 461                 bh->b_reuse=0;
 462                 bh->b_count++;
 463                 wait_on_buffer(bh);
 464                 if (bh->b_dev == dev && bh->b_blocknr == block
 465                                              && bh->b_size == size)
 466                         return bh;
 467                 bh->b_count--;
 468         }
 469 }
 470 
 471 void set_blocksize(kdev_t dev, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 472 {
 473         int i, nlist;
 474         struct buffer_head * bh, *bhnext;
 475 
 476         if (!blksize_size[MAJOR(dev)])
 477                 return;
 478 
 479         switch(size) {
 480                 default: panic("Invalid blocksize passed to set_blocksize");
 481                 case 512: case 1024: case 2048: case 4096:;
 482         }
 483 
 484         if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
 485                 blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 486                 return;
 487         }
 488         if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
 489                 return;
 490         sync_buffers(dev, 2);
 491         blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 492 
 493   /* We need to be quite careful how we do this - we are moving entries
 494      around on the free list, and we can get in a loop if we are not careful.*/
 495 
 496         for(nlist = 0; nlist < NR_LIST; nlist++) {
 497                 bh = lru_list[nlist];
 498                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) {
 499                         if(!bh) break;
 500                         bhnext = bh->b_next_free; 
 501                         if (bh->b_dev != dev)
 502                                  continue;
 503                         if (bh->b_size == size)
 504                                  continue;
 505                         
 506                         wait_on_buffer(bh);
 507                         if (bh->b_dev == dev && bh->b_size != size) {
 508                                 bh->b_uptodate = bh->b_dirt = bh->b_req =
 509                                          bh->b_flushtime = 0;
 510                         };
 511                         remove_from_hash_queue(bh);
 512                 }
 513         }
 514 }
 515 
 516 #define BADNESS(bh) (((bh)->b_dirt<<1)+(bh)->b_lock)
 517 
 518 void refill_freelist(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 519 {
 520         struct buffer_head * bh, * tmp;
 521         struct buffer_head * candidate[NR_LIST];
 522         unsigned int best_time, winner;
 523         int isize = BUFSIZE_INDEX(size);
 524         int buffers[NR_LIST];
 525         int i;
 526         int needed;
 527 
 528         /* First see if we even need this.  Sometimes it is advantageous
 529          to request some blocks in a filesystem that we know that we will
 530          be needing ahead of time. */
 531 
 532         if (nr_free[isize] > 100)
 533                 return;
 534 
 535         /* If there are too many dirty buffers, we wake up the update process
 536            now so as to ensure that there are still clean buffers available
 537            for user processes to use (and dirty) */
 538         
 539         /* We are going to try and locate this much memory */
 540         needed =bdf_prm.b_un.nrefill * size;  
 541 
 542         while (nr_free_pages > min_free_pages*2 && needed > 0 &&
 543                grow_buffers(GFP_BUFFER, size)) {
 544                 needed -= PAGE_SIZE;
 545         }
 546 
 547         if(needed <= 0) return;
 548 
 549         /* See if there are too many buffers of a different size.
 550            If so, victimize them */
 551 
 552         while(maybe_shrink_lav_buffers(size))
 553          {
 554                  if(!grow_buffers(GFP_BUFFER, size)) break;
 555                  needed -= PAGE_SIZE;
 556                  if(needed <= 0) return;
 557          };
 558 
 559         /* OK, we cannot grow the buffer cache, now try and get some
 560            from the lru list */
 561 
 562         /* First set the candidate pointers to usable buffers.  This
 563            should be quick nearly all of the time. */
 564 
 565 repeat0:
 566         for(i=0; i<NR_LIST; i++){
 567                 if(i == BUF_DIRTY || i == BUF_SHARED || 
 568                    nr_buffers_type[i] == 0) {
 569                         candidate[i] = NULL;
 570                         buffers[i] = 0;
 571                         continue;
 572                 }
 573                 buffers[i] = nr_buffers_type[i];
 574                 for (bh = lru_list[i]; buffers[i] > 0; bh = tmp, buffers[i]--)
 575                  {
 576                          if(buffers[i] < 0) panic("Here is the problem");
 577                          tmp = bh->b_next_free;
 578                          if (!bh) break;
 579                          
 580                          if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 581                              bh->b_dirt) {
 582                                  refile_buffer(bh);
 583                                  continue;
 584                          };
 585                          
 586                          if (bh->b_count || bh->b_size != size)
 587                                   continue;
 588                          
 589                          /* Buffers are written in the order they are placed 
 590                             on the locked list. If we encounter a locked
 591                             buffer here, this means that the rest of them
 592                             are also locked */
 593                          if(bh->b_lock && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 594                                  buffers[i] = 0;
 595                                  break;
 596                          }
 597                          
 598                          if (BADNESS(bh)) continue;
 599                          break;
 600                  };
 601                 if(!buffers[i]) candidate[i] = NULL; /* Nothing on this list */
 602                 else candidate[i] = bh;
 603                 if(candidate[i] && candidate[i]->b_count) panic("Here is the problem");
 604         }
 605         
 606  repeat:
 607         if(needed <= 0) return;
 608         
 609         /* Now see which candidate wins the election */
 610         
 611         winner = best_time = UINT_MAX;  
 612         for(i=0; i<NR_LIST; i++){
 613                 if(!candidate[i]) continue;
 614                 if(candidate[i]->b_lru_time < best_time){
 615                         best_time = candidate[i]->b_lru_time;
 616                         winner = i;
 617                 }
 618         }
 619         
 620         /* If we have a winner, use it, and then get a new candidate from that list */
 621         if(winner != UINT_MAX) {
 622                 i = winner;
 623                 bh = candidate[i];
 624                 candidate[i] = bh->b_next_free;
 625                 if(candidate[i] == bh) candidate[i] = NULL;  /* Got last one */
 626                 if (bh->b_count || bh->b_size != size)
 627                          panic("Busy buffer in candidate list\n");
 628                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1)
 629                          panic("Shared buffer in candidate list\n");
 630                 if (BADNESS(bh)) panic("Buffer in candidate list with BADNESS != 0\n");
 631                 
 632                 if(bh->b_dev == B_FREE)
 633                         panic("Wrong list");
 634                 remove_from_queues(bh);
 635                 bh->b_dev = B_FREE;
 636                 put_last_free(bh);
 637                 needed -= bh->b_size;
 638                 buffers[i]--;
 639                 if(buffers[i] < 0) panic("Here is the problem");
 640                 
 641                 if(buffers[i] == 0) candidate[i] = NULL;
 642                 
 643                 /* Now all we need to do is advance the candidate pointer
 644                    from the winner list to the next usable buffer */
 645                 if(candidate[i] && buffers[i] > 0){
 646                         if(buffers[i] <= 0) panic("Here is another problem");
 647                         for (bh = candidate[i]; buffers[i] > 0; bh = tmp, buffers[i]--) {
 648                                 if(buffers[i] < 0) panic("Here is the problem");
 649                                 tmp = bh->b_next_free;
 650                                 if (!bh) break;
 651                                 
 652                                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 653                                     bh->b_dirt) {
 654                                         refile_buffer(bh);
 655                                         continue;
 656                                 };
 657                                 
 658                                 if (bh->b_count || bh->b_size != size)
 659                                          continue;
 660                                 
 661                                 /* Buffers are written in the order they are
 662                                    placed on the locked list.  If we encounter
 663                                    a locked buffer here, this means that the
 664                                    rest of them are also locked */
 665                                 if(bh->b_lock && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 666                                         buffers[i] = 0;
 667                                         break;
 668                                 }
 669               
 670                                 if (BADNESS(bh)) continue;
 671                                 break;
 672                         };
 673                         if(!buffers[i]) candidate[i] = NULL; /* Nothing here */
 674                         else candidate[i] = bh;
 675                         if(candidate[i] && candidate[i]->b_count) 
 676                                  panic("Here is the problem");
 677                 }
 678                 
 679                 goto repeat;
 680         }
 681         
 682         if(needed <= 0) return;
 683         
 684         /* Too bad, that was not enough. Try a little harder to grow some. */
 685         
 686         if (nr_free_pages > min_free_pages + 5) {
 687                 if (grow_buffers(GFP_BUFFER, size)) {
 688                         needed -= PAGE_SIZE;
 689                         goto repeat0;
 690                 };
 691         }
 692         
 693         /* and repeat until we find something good */
 694         if (!grow_buffers(GFP_ATOMIC, size))
 695                 wakeup_bdflush(1);
 696         needed -= PAGE_SIZE;
 697         goto repeat0;
 698 }
 699 
 700 /*
 701  * Ok, this is getblk, and it isn't very clear, again to hinder
 702  * race-conditions. Most of the code is seldom used, (ie repeating),
 703  * so it should be much more efficient than it looks.
 704  *
 705  * The algorithm is changed: hopefully better, and an elusive bug removed.
 706  *
 707  * 14.02.92: changed it to sync dirty buffers a bit: better performance
 708  * when the filesystem starts to get full of dirty blocks (I hope).
 709  */
 710 struct buffer_head * getblk(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 711 {
 712         struct buffer_head * bh;
 713         int isize = BUFSIZE_INDEX(size);
 714 
 715         /* Update this for the buffer size lav. */
 716         buffer_usage[isize]++;
 717 
 718         /* If there are too many dirty buffers, we wake up the update process
 719            now so as to ensure that there are still clean buffers available
 720            for user processes to use (and dirty) */
 721 repeat:
 722         bh = get_hash_table(dev, block, size);
 723         if (bh) {
 724                 if (bh->b_uptodate && !bh->b_dirt)
 725                          put_last_lru(bh);
 726                 if(!bh->b_dirt) bh->b_flushtime = 0;
 727                 bh->b_touched = 1;
 728                 return bh;
 729         }
 730 
 731         while(!free_list[isize]) refill_freelist(size);
 732         
 733         if (find_buffer(dev,block,size))
 734                  goto repeat;
 735 
 736         bh = free_list[isize];
 737         remove_from_free_list(bh);
 738 
 739 /* OK, FINALLY we know that this buffer is the only one of its kind, */
 740 /* and that it's unused (b_count=0), unlocked (b_lock=0), and clean */
 741         bh->b_count=1;
 742         bh->b_dirt=0;
 743         bh->b_lock=0;
 744         bh->b_uptodate=0;
 745         bh->b_flushtime=0;
 746         bh->b_req=0;
 747         bh->b_reuse=0;
 748         bh->b_touched = 1;
 749         bh->b_has_aged = 0;
 750         bh->b_dev=dev;
 751         bh->b_blocknr=block;
 752         insert_into_queues(bh);
 753         return bh;
 754 }
 755 
 756 void set_writetime(struct buffer_head * buf, int flag)
     /* [previous][next][first][last][top][bottom][index][help] */
 757 {
 758         int newtime;
 759 
 760         if (buf->b_dirt){
 761                 /* Move buffer to dirty list if jiffies is clear */
 762                 newtime = jiffies + (flag ? bdf_prm.b_un.age_super : 
 763                                      bdf_prm.b_un.age_buffer);
 764                 if(!buf->b_flushtime || buf->b_flushtime > newtime)
 765                          buf->b_flushtime = newtime;
 766         } else {
 767                 buf->b_flushtime = 0;
 768         }
 769 }
 770 
 771 
 772 void refile_buffer(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 773 {
 774         int dispose;
 775 
 776         if(buf->b_dev == B_FREE) {
 777                 printk("Attempt to refile free buffer\n");
 778                 return;
 779         }
 780         if (buf->b_dirt)
 781                 dispose = BUF_DIRTY;
 782         else if (mem_map[MAP_NR((unsigned long) buf->b_data)].count > 1)
 783                 dispose = BUF_SHARED;
 784         else if (buf->b_lock)
 785                 dispose = BUF_LOCKED;
 786         else if (buf->b_list == BUF_SHARED)
 787                 dispose = BUF_UNSHARED;
 788         else
 789                 dispose = BUF_CLEAN;
 790         if(dispose == BUF_CLEAN) buf->b_lru_time = jiffies;
 791         if(dispose != buf->b_list)  {
 792                 if(dispose == BUF_DIRTY || dispose == BUF_UNSHARED)
 793                          buf->b_lru_time = jiffies;
 794                 if(dispose == BUF_LOCKED && 
 795                    (buf->b_flushtime - buf->b_lru_time) <= bdf_prm.b_un.age_super)
 796                          dispose = BUF_LOCKED1;
 797                 remove_from_queues(buf);
 798                 buf->b_list = dispose;
 799                 insert_into_queues(buf);
 800                 if(dispose == BUF_DIRTY && nr_buffers_type[BUF_DIRTY] > 
 801                    (nr_buffers - nr_buffers_type[BUF_SHARED]) *
 802                    bdf_prm.b_un.nfract/100)
 803                          wakeup_bdflush(0);
 804         }
 805 }
 806 
 807 /*
 808  * Release a buffer head
 809  */
 810 void brelse(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 811 {
 812         if (!buf)
 813                 return;
 814         wait_on_buffer(buf);
 815 
 816         /* If dirty, mark the time this buffer should be written back */
 817         set_writetime(buf, 0);
 818         refile_buffer(buf);
 819 
 820         if (buf->b_count) {
 821                 if (!--buf->b_count)
 822                         wake_up(&buffer_wait);
 823                 return;
 824         }
 825         printk("VFS: brelse: Trying to free free buffer\n");
 826 }
 827 
 828 /*
 829  * bforget() is like brelse(), except is throws the buffer away
 830  */
 831 void bforget(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 832 {
 833         if (!buf)
 834                 return;
 835         wait_on_buffer(buf);
 836         if (buf->b_count != 1) {
 837                 printk("Aieee... bforget(): count = %d\n", buf->b_count);
 838                 return;
 839         }
 840         if (mem_map[MAP_NR(buf->b_data)].count != 1) {
 841                 printk("Aieee... bforget(): shared buffer\n");
 842                 return;
 843         }
 844         mark_buffer_clean(buf);
 845         buf->b_count = 0;
 846         remove_from_queues(buf);
 847         buf->b_dev = B_FREE;
 848         put_last_free(buf);
 849         wake_up(&buffer_wait);
 850 }
 851 
 852 /*
 853  * bread() reads a specified block and returns the buffer that contains
 854  * it. It returns NULL if the block was unreadable.
 855  */
 856 struct buffer_head * bread(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 857 {
 858         struct buffer_head * bh;
 859 
 860         if (!(bh = getblk(dev, block, size))) {
 861                 printk("VFS: bread: READ error on device %s\n",
 862                         kdevname(dev));
 863                 return NULL;
 864         }
 865         if (bh->b_uptodate)
 866                 return bh;
 867         ll_rw_block(READ, 1, &bh);
 868         wait_on_buffer(bh);
 869         if (bh->b_uptodate)
 870                 return bh;
 871         brelse(bh);
 872         return NULL;
 873 }
 874 
 875 /*
 876  * Ok, breada can be used as bread, but additionally to mark other
 877  * blocks for reading as well. End the argument list with a negative
 878  * number.
 879  */
 880 
 881 #define NBUF 16
 882 
 883 struct buffer_head * breada(kdev_t dev, int block, int bufsize,
     /* [previous][next][first][last][top][bottom][index][help] */
 884         unsigned int pos, unsigned int filesize)
 885 {
 886         struct buffer_head * bhlist[NBUF];
 887         unsigned int blocks;
 888         struct buffer_head * bh;
 889         int index;
 890         int i, j;
 891 
 892         if (pos >= filesize)
 893                 return NULL;
 894 
 895         if (block < 0 || !(bh = getblk(dev,block,bufsize)))
 896                 return NULL;
 897 
 898         index = BUFSIZE_INDEX(bh->b_size);
 899 
 900         if (bh->b_uptodate)
 901                 return bh;
 902 
 903         blocks = ((filesize & (bufsize - 1)) - (pos & (bufsize - 1))) >> (9+index);
 904 
 905         if (blocks > (read_ahead[MAJOR(dev)] >> index))
 906                 blocks = read_ahead[MAJOR(dev)] >> index;
 907         if (blocks > NBUF)
 908                 blocks = NBUF;
 909         
 910         bhlist[0] = bh;
 911         j = 1;
 912         for(i=1; i<blocks; i++) {
 913                 bh = getblk(dev,block+i,bufsize);
 914                 if (bh->b_uptodate) {
 915                         brelse(bh);
 916                         break;
 917                 }
 918                 bhlist[j++] = bh;
 919         }
 920 
 921         /* Request the read for these buffers, and then release them */
 922         ll_rw_block(READ, j, bhlist);
 923 
 924         for(i=1; i<j; i++)
 925                 brelse(bhlist[i]);
 926 
 927         /* Wait for this buffer, and then continue on */
 928         bh = bhlist[0];
 929         wait_on_buffer(bh);
 930         if (bh->b_uptodate)
 931                 return bh;
 932         brelse(bh);
 933         return NULL;
 934 }
 935 
 936 /*
 937  * See fs/inode.c for the weird use of volatile..
 938  */
 939 static void put_unused_buffer_head(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 940 {
 941         struct wait_queue * wait;
 942 
 943         wait = ((volatile struct buffer_head *) bh)->b_wait;
 944         memset(bh,0,sizeof(*bh));
 945         ((volatile struct buffer_head *) bh)->b_wait = wait;
 946         bh->b_next_free = unused_list;
 947         unused_list = bh;
 948 }
 949 
 950 static void get_more_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 951 {
 952         int i;
 953         struct buffer_head * bh;
 954 
 955         if (unused_list)
 956                 return;
 957 
 958         if (!(bh = (struct buffer_head*) get_free_page(GFP_BUFFER)))
 959                 return;
 960 
 961         for (nr_buffer_heads+=i=PAGE_SIZE/sizeof*bh ; i>0; i--) {
 962                 bh->b_next_free = unused_list;  /* only make link */
 963                 unused_list = bh++;
 964         }
 965 }
 966 
 967 static struct buffer_head * get_unused_buffer_head(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 968 {
 969         struct buffer_head * bh;
 970 
 971         get_more_buffer_heads();
 972         if (!unused_list)
 973                 return NULL;
 974         bh = unused_list;
 975         unused_list = bh->b_next_free;
 976         bh->b_next_free = NULL;
 977         bh->b_data = NULL;
 978         bh->b_size = 0;
 979         bh->b_req = 0;
 980         return bh;
 981 }
 982 
 983 /*
 984  * Create the appropriate buffers when given a page for data area and
 985  * the size of each buffer.. Use the bh->b_this_page linked list to
 986  * follow the buffers created.  Return NULL if unable to create more
 987  * buffers.
 988  */
 989 static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
 990 {
 991         struct buffer_head *bh, *head;
 992         unsigned long offset;
 993 
 994         head = NULL;
 995         offset = PAGE_SIZE;
 996         while ((offset -= size) < PAGE_SIZE) {
 997                 bh = get_unused_buffer_head();
 998                 if (!bh)
 999                         goto no_grow;
1000                 bh->b_this_page = head;
1001                 head = bh;
1002                 bh->b_data = (char *) (page+offset);
1003                 bh->b_size = size;
1004                 bh->b_dev = B_FREE;  /* Flag as unused */
1005         }
1006         return head;
1007 /*
1008  * In case anything failed, we just free everything we got.
1009  */
1010 no_grow:
1011         bh = head;
1012         while (bh) {
1013                 head = bh;
1014                 bh = bh->b_this_page;
1015                 put_unused_buffer_head(head);
1016         }
1017         return NULL;
1018 }
1019 
1020 static void read_buffers(struct buffer_head * bh[], int nrbuf)
     /* [previous][next][first][last][top][bottom][index][help] */
1021 {
1022         int i;
1023         int bhnum = 0;
1024         struct buffer_head * bhr[MAX_BUF_PER_PAGE];
1025 
1026         for (i = 0 ; i < nrbuf ; i++) {
1027                 if (bh[i] && !bh[i]->b_uptodate)
1028                         bhr[bhnum++] = bh[i];
1029         }
1030         if (bhnum)
1031                 ll_rw_block(READ, bhnum, bhr);
1032         for (i = nrbuf ; --i >= 0 ; ) {
1033                 if (bh[i]) {
1034                         wait_on_buffer(bh[i]);
1035                 }
1036         }
1037 }
1038 
1039 /*
1040  * This actually gets enough info to try to align the stuff,
1041  * but we don't bother yet.. We'll have to check that nobody
1042  * else uses the buffers etc.
1043  *
1044  * "address" points to the new page we can use to move things
1045  * around..
1046  */
1047 static unsigned long try_to_align(struct buffer_head ** bh, int nrbuf,
     /* [previous][next][first][last][top][bottom][index][help] */
1048         unsigned long address)
1049 {
1050         while (nrbuf-- > 0)
1051                 brelse(bh[nrbuf]);
1052         return 0;
1053 }
1054 
1055 static unsigned long check_aligned(struct buffer_head * first, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
1056         kdev_t dev, int *b, int size)
1057 {
1058         struct buffer_head * bh[MAX_BUF_PER_PAGE];
1059         unsigned long page;
1060         unsigned long offset;
1061         int block;
1062         int nrbuf;
1063         int aligned = 1;
1064 
1065         bh[0] = first;
1066         nrbuf = 1;
1067         page = (unsigned long) first->b_data;
1068         if (page & ~PAGE_MASK)
1069                 aligned = 0;
1070         for (offset = size ; offset < PAGE_SIZE ; offset += size) {
1071                 block = *++b;
1072                 if (!block)
1073                         goto no_go;
1074                 first = get_hash_table(dev, block, size);
1075                 if (!first)
1076                         goto no_go;
1077                 bh[nrbuf++] = first;
1078                 if (page+offset != (unsigned long) first->b_data)
1079                         aligned = 0;
1080         }
1081         if (!aligned)
1082                 return try_to_align(bh, nrbuf, address);
1083         mem_map[MAP_NR(page)].count++;
1084         read_buffers(bh,nrbuf);         /* make sure they are actually read correctly */
1085         while (nrbuf-- > 0)
1086                 brelse(bh[nrbuf]);
1087         free_page(address);
1088         ++current->min_flt;
1089         return page;
1090 no_go:
1091         while (nrbuf-- > 0)
1092                 brelse(bh[nrbuf]);
1093         return 0;
1094 }
1095 
1096 static unsigned long try_to_load_aligned(unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
1097         kdev_t dev, int b[], int size)
1098 {
1099         struct buffer_head * bh, * tmp, * arr[MAX_BUF_PER_PAGE];
1100         unsigned long offset;
1101         int isize = BUFSIZE_INDEX(size);
1102         int * p;
1103         int block;
1104 
1105         bh = create_buffers(address, size);
1106         if (!bh)
1107                 return 0;
1108         /* do any of the buffers already exist? punt if so.. */
1109         p = b;
1110         for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
1111                 block = *(p++);
1112                 if (!block)
1113                         goto not_aligned;
1114                 if (find_buffer(dev, block, size))
1115                         goto not_aligned;
1116         }
1117         tmp = bh;
1118         p = b;
1119         block = 0;
1120         while (1) {
1121                 arr[block++] = bh;
1122                 bh->b_count = 1;
1123                 bh->b_dirt = 0;
1124                 bh->b_reuse = 0;
1125                 bh->b_flushtime = 0;
1126                 bh->b_uptodate = 0;
1127                 bh->b_req = 0;
1128                 bh->b_dev = dev;
1129                 bh->b_blocknr = *(p++);
1130                 bh->b_list = BUF_CLEAN;
1131                 nr_buffers++;
1132                 nr_buffers_size[isize]++;
1133                 insert_into_queues(bh);
1134                 if (bh->b_this_page)
1135                         bh = bh->b_this_page;
1136                 else
1137                         break;
1138         }
1139         buffermem += PAGE_SIZE;
1140         bh->b_this_page = tmp;
1141         mem_map[MAP_NR(address)].count++;
1142         buffer_pages[MAP_NR(address)] = bh;
1143         read_buffers(arr,block);
1144         while (block-- > 0)
1145                 brelse(arr[block]);
1146         ++current->maj_flt;
1147         return address;
1148 not_aligned:
1149         while ((tmp = bh) != NULL) {
1150                 bh = bh->b_this_page;
1151                 put_unused_buffer_head(tmp);
1152         }
1153         return 0;
1154 }
1155 
1156 /*
1157  * Try-to-share-buffers tries to minimize memory use by trying to keep
1158  * both code pages and the buffer area in the same page. This is done by
1159  * (a) checking if the buffers are already aligned correctly in memory and
1160  * (b) if none of the buffer heads are in memory at all, trying to load
1161  * them into memory the way we want them.
1162  *
1163  * This doesn't guarantee that the memory is shared, but should under most
1164  * circumstances work very well indeed (ie >90% sharing of code pages on
1165  * demand-loadable executables).
1166  */
1167 static inline unsigned long try_to_share_buffers(unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
1168         kdev_t dev, int *b, int size)
1169 {
1170         struct buffer_head * bh;
1171         int block;
1172 
1173         block = b[0];
1174         if (!block)
1175                 return 0;
1176         bh = get_hash_table(dev, block, size);
1177         if (bh)
1178                 return check_aligned(bh, address, dev, b, size);
1179         return try_to_load_aligned(address, dev, b, size);
1180 }
1181 
1182 /*
1183  * bread_page reads four buffers into memory at the desired address. It's
1184  * a function of its own, as there is some speed to be got by reading them
1185  * all at the same time, not waiting for one to be read, and then another
1186  * etc. This also allows us to optimize memory usage by sharing code pages
1187  * and filesystem buffers..
1188  */
1189 unsigned long bread_page(unsigned long address, kdev_t dev, int b[], int size, int no_share)
     /* [previous][next][first][last][top][bottom][index][help] */
1190 {
1191         struct buffer_head * bh[MAX_BUF_PER_PAGE];
1192         unsigned long where;
1193         int i, j;
1194 
1195         if (!no_share) {
1196                 where = try_to_share_buffers(address, dev, b, size);
1197                 if (where)
1198                         return where;
1199         }
1200         ++current->maj_flt;
1201         for (i=0, j=0; j<PAGE_SIZE ; i++, j+= size) {
1202                 bh[i] = NULL;
1203                 if (b[i])
1204                         bh[i] = getblk(dev, b[i], size);
1205         }
1206         read_buffers(bh,i);
1207         where = address;
1208         for (i=0, j=0; j<PAGE_SIZE ; i++, j += size, where += size) {
1209                 if (bh[i]) {
1210                         if (bh[i]->b_uptodate)
1211                                 memcpy((void *) where, bh[i]->b_data, size);
1212                         brelse(bh[i]);
1213                 } else
1214                         memset((void *) where, 0, size);
1215         }
1216         return address;
1217 }
1218 
1219 #if 0
1220 /*
1221  * bwrite_page writes a page out to the buffer cache and/or the physical device.
1222  * It's used for mmap writes (the same way bread_page() is used for mmap reads).
1223  */
1224 void bwrite_page(unsigned long address, kdev_t dev, int b[], int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1225 {
1226         struct buffer_head * bh[MAX_BUF_PER_PAGE];
1227         int i, j;
1228 
1229         for (i=0, j=0; j<PAGE_SIZE ; i++, j+= size) {
1230                 bh[i] = NULL;
1231                 if (b[i])
1232                         bh[i] = getblk(dev, b[i], size);
1233         }
1234         for (i=0, j=0; j<PAGE_SIZE ; i++, j += size, address += size) {
1235                 if (bh[i]) {
1236                         memcpy(bh[i]->b_data, (void *) address, size);
1237                         bh[i]->b_uptodate = 1;
1238                         mark_buffer_dirty(bh[i], 0);
1239                         brelse(bh[i]);
1240                 } else
1241                         memset((void *) address, 0, size); /* ???!?!! */
1242         }       
1243 }
1244 #endif
1245 
1246 /*
1247  * Try to increase the number of buffers available: the size argument
1248  * is used to determine what kind of buffers we want.
1249  */
1250 static int grow_buffers(int pri, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1251 {
1252         unsigned long page;
1253         struct buffer_head *bh, *tmp;
1254         struct buffer_head * insert_point;
1255         int isize;
1256 
1257         if ((size & 511) || (size > PAGE_SIZE)) {
1258                 printk("VFS: grow_buffers: size = %d\n",size);
1259                 return 0;
1260         }
1261 
1262         isize = BUFSIZE_INDEX(size);
1263 
1264         if (!(page = __get_free_page(pri)))
1265                 return 0;
1266         bh = create_buffers(page, size);
1267         if (!bh) {
1268                 free_page(page);
1269                 return 0;
1270         }
1271 
1272         insert_point = free_list[isize];
1273 
1274         tmp = bh;
1275         while (1) {
1276                 nr_free[isize]++;
1277                 if (insert_point) {
1278                         tmp->b_next_free = insert_point->b_next_free;
1279                         tmp->b_prev_free = insert_point;
1280                         insert_point->b_next_free->b_prev_free = tmp;
1281                         insert_point->b_next_free = tmp;
1282                 } else {
1283                         tmp->b_prev_free = tmp;
1284                         tmp->b_next_free = tmp;
1285                 }
1286                 insert_point = tmp;
1287                 ++nr_buffers;
1288                 if (tmp->b_this_page)
1289                         tmp = tmp->b_this_page;
1290                 else
1291                         break;
1292         }
1293         free_list[isize] = bh;
1294         buffer_pages[MAP_NR(page)] = bh;
1295         tmp->b_this_page = bh;
1296         wake_up(&buffer_wait);
1297         buffermem += PAGE_SIZE;
1298         return 1;
1299 }
1300 
1301 
1302 /* =========== Reduce the buffer memory ============= */
1303 
1304 /*
1305  * try_to_free() checks if all the buffers on this particular page
1306  * are unused, and free's the page if so.
1307  */
1308 static int try_to_free(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1309                        int priority)
1310 {
1311         unsigned long page;
1312         struct buffer_head * tmp, * p;
1313         int isize = BUFSIZE_INDEX(bh->b_size);
1314 
1315         *bhp = bh;
1316         page = (unsigned long) bh->b_data;
1317         page &= PAGE_MASK;
1318         tmp = bh;
1319         do {
1320                 if (!tmp)
1321                         return 0;
1322                 if (tmp->b_count || tmp->b_dirt || tmp->b_lock || tmp->b_wait)
1323                         return 0;
1324                 if (priority && tmp->b_touched)
1325                         return 0;
1326                 tmp = tmp->b_this_page;
1327         } while (tmp != bh);
1328         tmp = bh;
1329         do {
1330                 p = tmp;
1331                 tmp = tmp->b_this_page;
1332                 nr_buffers--;
1333                 nr_buffers_size[isize]--;
1334                 if (p == *bhp)
1335                   {
1336                     *bhp = p->b_prev_free;
1337                     if (p == *bhp) /* Was this the last in the list? */
1338                       *bhp = NULL;
1339                   }
1340                 remove_from_queues(p);
1341                 put_unused_buffer_head(p);
1342         } while (tmp != bh);
1343         buffermem -= PAGE_SIZE;
1344         buffer_pages[MAP_NR(page)] = NULL;
1345         free_page(page);
1346         return !mem_map[MAP_NR(page)].count;
1347 }
1348 
1349 /* Age buffers on a given page, according to whether they have been
1350    visited recently or not. */
1351 static inline void age_buffer(struct buffer_head *bh)
     /* [previous][next][first][last][top][bottom][index][help] */
1352 {
1353         struct buffer_head *tmp = bh;
1354         int touched = 0;
1355 
1356         /*
1357          * When we age a page, we mark all other buffers in the page
1358          * with the "has_aged" flag.  Then, when these aliased buffers
1359          * come up for aging, we skip them until next pass.  This
1360          * ensures that a page full of multiple buffers only gets aged
1361          * once per pass through the lru lists. 
1362          */
1363         if (bh->b_has_aged) {
1364                 bh->b_has_aged = 0;
1365                 return;
1366         }
1367         
1368         do {
1369                 touched |= tmp->b_touched;
1370                 tmp->b_touched = 0;
1371                 tmp = tmp->b_this_page;
1372                 tmp->b_has_aged = 1;
1373         } while (tmp != bh);
1374         bh->b_has_aged = 0;
1375 
1376         if (touched) 
1377                 touch_page((unsigned long) bh->b_data);
1378         else
1379                 age_page((unsigned long) bh->b_data);
1380 }
1381 
1382 /*
1383  * Consult the load average for buffers and decide whether or not
1384  * we should shrink the buffers of one size or not.  If we decide yes,
1385  * do it and return 1.  Else return 0.  Do not attempt to shrink size
1386  * that is specified.
1387  *
1388  * I would prefer not to use a load average, but the way things are now it
1389  * seems unavoidable.  The way to get rid of it would be to force clustering
1390  * universally, so that when we reclaim buffers we always reclaim an entire
1391  * page.  Doing this would mean that we all need to move towards QMAGIC.
1392  */
1393 
1394 static int maybe_shrink_lav_buffers(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1395 {          
1396         int nlist;
1397         int isize;
1398         int total_lav, total_n_buffers, n_sizes;
1399         
1400         /* Do not consider the shared buffers since they would not tend
1401            to have getblk called very often, and this would throw off
1402            the lav.  They are not easily reclaimable anyway (let the swapper
1403            make the first move). */
1404   
1405         total_lav = total_n_buffers = n_sizes = 0;
1406         for(nlist = 0; nlist < NR_SIZES; nlist++)
1407          {
1408                  total_lav += buffers_lav[nlist];
1409                  if(nr_buffers_size[nlist]) n_sizes++;
1410                  total_n_buffers += nr_buffers_size[nlist];
1411                  total_n_buffers -= nr_buffers_st[nlist][BUF_SHARED]; 
1412          }
1413         
1414         /* See if we have an excessive number of buffers of a particular
1415            size - if so, victimize that bunch. */
1416   
1417         isize = (size ? BUFSIZE_INDEX(size) : -1);
1418         
1419         if (n_sizes > 1)
1420                  for(nlist = 0; nlist < NR_SIZES; nlist++)
1421                   {
1422                           if(nlist == isize) continue;
1423                           if(nr_buffers_size[nlist] &&
1424                              bdf_prm.b_un.lav_const * buffers_lav[nlist]*total_n_buffers < 
1425                              total_lav * (nr_buffers_size[nlist] - nr_buffers_st[nlist][BUF_SHARED]))
1426                                    if(shrink_specific_buffers(6, bufferindex_size[nlist])) 
1427                                             return 1;
1428                   }
1429         return 0;
1430 }
1431 
1432 /*
1433  * Try to free up some pages by shrinking the buffer-cache
1434  *
1435  * Priority tells the routine how hard to try to shrink the
1436  * buffers: 6 means "don't bother too much", while a value
1437  * of 0 means "we'd better get some free pages now".
1438  *
1439  * "limit" is meant to limit the shrink-action only to pages
1440  * that are in the 0 - limit address range, for DMA re-allocations.
1441  * We ignore that right now.
1442  */
1443 int shrink_buffers(unsigned int priority, unsigned long limit)
     /* [previous][next][first][last][top][bottom][index][help] */
1444 {
1445         if (priority < 2) {
1446                 sync_buffers(0,0);
1447         }
1448 
1449         if(priority == 2) wakeup_bdflush(1);
1450 
1451         if(maybe_shrink_lav_buffers(0)) return 1;
1452 
1453         /* No good candidate size - take any size we can find */
1454         return shrink_specific_buffers(priority, 0);
1455 }
1456 
1457 static int shrink_specific_buffers(unsigned int priority, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1458 {
1459         struct buffer_head *bh;
1460         int nlist;
1461         int i, isize, isize1;
1462 
1463 #ifdef DEBUG
1464         if(size) printk("Shrinking buffers of size %d\n", size);
1465 #endif
1466         /* First try the free lists, and see if we can get a complete page
1467            from here */
1468         isize1 = (size ? BUFSIZE_INDEX(size) : -1);
1469 
1470         for(isize = 0; isize<NR_SIZES; isize++){
1471                 if(isize1 != -1 && isize1 != isize) continue;
1472                 bh = free_list[isize];
1473                 if(!bh) continue;
1474                 for (i=0 ; !i || bh != free_list[isize]; bh = bh->b_next_free, i++) {
1475                         if (bh->b_count || !bh->b_this_page)
1476                                  continue;
1477                         if (!age_of((unsigned long) bh->b_data) &&
1478                             try_to_free(bh, &bh, 6))
1479                                  return 1;
1480                         if(!bh) break;
1481                         /* Some interrupt must have used it after we
1482                            freed the page.  No big deal - keep looking */
1483                 }
1484         }
1485         
1486         /* Not enough in the free lists, now try the lru list */
1487         
1488         for(nlist = 0; nlist < NR_LIST; nlist++) {
1489         repeat1:
1490                 if(priority > 2 && nlist == BUF_SHARED) continue;
1491                 i = nr_buffers_type[nlist];
1492                 i = ((BUFFEROUT_WEIGHT * i) >> 10) >> priority;
1493                 for ( ; i > 0; i-- ) {
1494                         bh = next_to_age[nlist];
1495                         if (!bh)
1496                                 break;
1497                         next_to_age[nlist] = bh->b_next_free;
1498 
1499                         /* First, age the buffer. */
1500                         age_buffer(bh);
1501                         /* We may have stalled while waiting for I/O
1502                            to complete. */
1503                         if(bh->b_list != nlist) goto repeat1;
1504                         if (bh->b_count || !bh->b_this_page)
1505                                  continue;
1506                         if(size && bh->b_size != size) continue;
1507                         if (bh->b_lock)
1508                                  if (priority)
1509                                           continue;
1510                                  else
1511                                           wait_on_buffer(bh);
1512                         if (bh->b_dirt) {
1513                                 bh->b_count++;
1514                                 bh->b_flushtime = 0;
1515                                 ll_rw_block(WRITEA, 1, &bh);
1516                                 bh->b_count--;
1517                                 continue;
1518                         }
1519                         /* At priority 6, only consider really old
1520                            (age==0) buffers for reclaiming.  At
1521                            priority 0, consider any buffers. */
1522                         if ((age_of((unsigned long) bh->b_data) >>
1523                              (6-priority)) > 0)
1524                                 continue;                               
1525                         if (try_to_free(bh, &bh, 0))
1526                                  return 1;
1527                         if(!bh) break;
1528                 }
1529         }
1530         return 0;
1531 }
1532 
1533 
1534 /* ================== Debugging =================== */
1535 
1536 void show_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1537 {
1538         struct buffer_head * bh;
1539         int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
1540         int shared;
1541         int nlist, isize;
1542 
1543         printk("Buffer memory:   %6dkB\n",buffermem>>10);
1544         printk("Buffer heads:    %6d\n",nr_buffer_heads);
1545         printk("Buffer blocks:   %6d\n",nr_buffers);
1546 
1547         for(nlist = 0; nlist < NR_LIST; nlist++) {
1548           shared = found = locked = dirty = used = lastused = 0;
1549           bh = lru_list[nlist];
1550           if(!bh) continue;
1551           do {
1552                 found++;
1553                 if (bh->b_lock)
1554                         locked++;
1555                 if (bh->b_dirt)
1556                         dirty++;
1557                 if(mem_map[MAP_NR(((unsigned long) bh->b_data))].count !=1) shared++;
1558                 if (bh->b_count)
1559                         used++, lastused = found;
1560                 bh = bh->b_next_free;
1561               } while (bh != lru_list[nlist]);
1562         printk("Buffer[%d] mem: %d buffers, %d used (last=%d), %d locked, %d dirty %d shrd\n",
1563                 nlist, found, used, lastused, locked, dirty, shared);
1564         };
1565         printk("Size    [LAV]     Free  Clean  Unshar     Lck    Lck1   Dirty  Shared\n");
1566         for(isize = 0; isize<NR_SIZES; isize++){
1567                 printk("%5d [%5d]: %7d ", bufferindex_size[isize],
1568                        buffers_lav[isize], nr_free[isize]);
1569                 for(nlist = 0; nlist < NR_LIST; nlist++)
1570                          printk("%7d ", nr_buffers_st[isize][nlist]);
1571                 printk("\n");
1572         }
1573 }
1574 
1575 
1576 /* ====================== Cluster patches for ext2 ==================== */
1577 
1578 /*
1579  * try_to_reassign() checks if all the buffers on this particular page
1580  * are unused, and reassign to a new cluster them if this is true.
1581  */
1582 static inline int try_to_reassign(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1583                            kdev_t dev, unsigned int starting_block)
1584 {
1585         unsigned long page;
1586         struct buffer_head * tmp, * p;
1587 
1588         *bhp = bh;
1589         page = (unsigned long) bh->b_data;
1590         page &= PAGE_MASK;
1591         if(mem_map[MAP_NR(page)].count != 1) return 0;
1592         tmp = bh;
1593         do {
1594                 if (!tmp)
1595                          return 0;
1596                 
1597                 if (tmp->b_count || tmp->b_dirt || tmp->b_lock)
1598                          return 0;
1599                 tmp = tmp->b_this_page;
1600         } while (tmp != bh);
1601         tmp = bh;
1602         
1603         while((unsigned long) tmp->b_data & (PAGE_SIZE - 1)) 
1604                  tmp = tmp->b_this_page;
1605         
1606         /* This is the buffer at the head of the page */
1607         bh = tmp;
1608         do {
1609                 p = tmp;
1610                 tmp = tmp->b_this_page;
1611                 remove_from_queues(p);
1612                 p->b_dev = dev;
1613                 p->b_uptodate = 0;
1614                 p->b_req = 0;
1615                 p->b_blocknr = starting_block++;
1616                 insert_into_queues(p);
1617         } while (tmp != bh);
1618         return 1;
1619 }
1620 
1621 /*
1622  * Try to find a free cluster by locating a page where
1623  * all of the buffers are unused.  We would like this function
1624  * to be atomic, so we do not call anything that might cause
1625  * the process to sleep.  The priority is somewhat similar to
1626  * the priority used in shrink_buffers.
1627  * 
1628  * My thinking is that the kernel should end up using whole
1629  * pages for the buffer cache as much of the time as possible.
1630  * This way the other buffers on a particular page are likely
1631  * to be very near each other on the free list, and we will not
1632  * be expiring data prematurely.  For now we only cannibalize buffers
1633  * of the same size to keep the code simpler.
1634  */
1635 static int reassign_cluster(kdev_t dev, 
     /* [previous][next][first][last][top][bottom][index][help] */
1636                      unsigned int starting_block, int size)
1637 {
1638         struct buffer_head *bh;
1639         int isize = BUFSIZE_INDEX(size);
1640         int i;
1641 
1642         /* We want to give ourselves a really good shot at generating
1643            a cluster, and since we only take buffers from the free
1644            list, we "overfill" it a little. */
1645 
1646         while(nr_free[isize] < 32) refill_freelist(size);
1647 
1648         bh = free_list[isize];
1649         if(bh)
1650                  for (i=0 ; !i || bh != free_list[isize] ; bh = bh->b_next_free, i++) {
1651                          if (!bh->b_this_page)  continue;
1652                          if (try_to_reassign(bh, &bh, dev, starting_block))
1653                                  return 4;
1654                  }
1655         return 0;
1656 }
1657 
1658 /* This function tries to generate a new cluster of buffers
1659  * from a new page in memory.  We should only do this if we have
1660  * not expanded the buffer cache to the maximum size that we allow.
1661  */
1662 static unsigned long try_to_generate_cluster(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1663 {
1664         struct buffer_head * bh, * tmp, * arr[MAX_BUF_PER_PAGE];
1665         int isize = BUFSIZE_INDEX(size);
1666         unsigned long offset;
1667         unsigned long page;
1668         int nblock;
1669 
1670         page = get_free_page(GFP_NOBUFFER);
1671         if(!page) return 0;
1672 
1673         bh = create_buffers(page, size);
1674         if (!bh) {
1675                 free_page(page);
1676                 return 0;
1677         };
1678         nblock = block;
1679         for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
1680                 if (find_buffer(dev, nblock++, size))
1681                          goto not_aligned;
1682         }
1683         tmp = bh;
1684         nblock = 0;
1685         while (1) {
1686                 arr[nblock++] = bh;
1687                 bh->b_count = 1;
1688                 bh->b_dirt = 0;
1689                 bh->b_flushtime = 0;
1690                 bh->b_lock = 0;
1691                 bh->b_uptodate = 0;
1692                 bh->b_req = 0;
1693                 bh->b_dev = dev;
1694                 bh->b_list = BUF_CLEAN;
1695                 bh->b_blocknr = block++;
1696                 nr_buffers++;
1697                 nr_buffers_size[isize]++;
1698                 insert_into_queues(bh);
1699                 if (bh->b_this_page)
1700                         bh = bh->b_this_page;
1701                 else
1702                         break;
1703         }
1704         buffermem += PAGE_SIZE;
1705         buffer_pages[MAP_NR(page)] = bh;
1706         bh->b_this_page = tmp;
1707         while (nblock-- > 0)
1708                 brelse(arr[nblock]);
1709         return 4; /* ?? */
1710 not_aligned:
1711         while ((tmp = bh) != NULL) {
1712                 bh = bh->b_this_page;
1713                 put_unused_buffer_head(tmp);
1714         }
1715         free_page(page);
1716         return 0;
1717 }
1718 
1719 unsigned long generate_cluster(kdev_t dev, int b[], int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1720 {
1721         int i, offset;
1722         
1723         for (i = 0, offset = 0 ; offset < PAGE_SIZE ; i++, offset += size) {
1724                 if(i && b[i]-1 != b[i-1]) return 0;  /* No need to cluster */
1725                 if(find_buffer(dev, b[i], size)) return 0;
1726         };
1727 
1728         /* OK, we have a candidate for a new cluster */
1729         
1730         /* See if one size of buffer is over-represented in the buffer cache,
1731            if so reduce the numbers of buffers */
1732         if(maybe_shrink_lav_buffers(size))
1733          {
1734                  int retval;
1735                  retval = try_to_generate_cluster(dev, b[0], size);
1736                  if(retval) return retval;
1737          };
1738         
1739         if (nr_free_pages > min_free_pages*2) 
1740                  return try_to_generate_cluster(dev, b[0], size);
1741         else
1742                  return reassign_cluster(dev, b[0], size);
1743 }
1744 
1745 
1746 /* ===================== Init ======================= */
1747 
1748 /*
1749  * This initializes the initial buffer free list.  nr_buffers_type is set
1750  * to one less the actual number of buffers, as a sop to backwards
1751  * compatibility --- the old code did this (I think unintentionally,
1752  * but I'm not sure), and programs in the ps package expect it.
1753  *                                      - TYT 8/30/92
1754  */
1755 void buffer_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1756 {
1757         int i;
1758         int isize = BUFSIZE_INDEX(BLOCK_SIZE);
1759         long memsize = MAP_NR(high_memory) << PAGE_SHIFT;
1760 
1761         if (memsize >= 4*1024*1024) {
1762                 if(memsize >= 16*1024*1024)
1763                          nr_hash = 16381;
1764                 else
1765                          nr_hash = 4093;
1766         } else {
1767                 nr_hash = 997;
1768         };
1769         
1770         hash_table = (struct buffer_head **) vmalloc(nr_hash * 
1771                                                      sizeof(struct buffer_head *));
1772 
1773 
1774         buffer_pages = (struct buffer_head **) vmalloc(MAP_NR(high_memory) * 
1775                                                      sizeof(struct buffer_head *));
1776         for (i = 0 ; i < MAP_NR(high_memory) ; i++)
1777                 buffer_pages[i] = NULL;
1778 
1779         for (i = 0 ; i < nr_hash ; i++)
1780                 hash_table[i] = NULL;
1781         lru_list[BUF_CLEAN] = 0;
1782         grow_buffers(GFP_KERNEL, BLOCK_SIZE);
1783         if (!free_list[isize])
1784                 panic("VFS: Unable to initialize buffer free list!");
1785         return;
1786 }
1787 
1788 
1789 /* ====================== bdflush support =================== */
1790 
1791 /* This is a simple kernel daemon, whose job it is to provide a dynamically
1792  * response to dirty buffers.  Once this process is activated, we write back
1793  * a limited number of buffers to the disks and then go back to sleep again.
1794  * In effect this is a process which never leaves kernel mode, and does not have
1795  * any user memory associated with it except for the stack.  There is also
1796  * a kernel stack page, which obviously must be separate from the user stack.
1797  */
1798 struct wait_queue * bdflush_wait = NULL;
1799 struct wait_queue * bdflush_done = NULL;
1800 
1801 static int bdflush_running = 0;
1802 
1803 static void wakeup_bdflush(int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
1804 {
1805         if(!bdflush_running){
1806                 printk("Warning - bdflush not running\n");
1807                 sync_buffers(0,0);
1808                 return;
1809         };
1810         wake_up(&bdflush_wait);
1811         if(wait) sleep_on(&bdflush_done);
1812 }
1813 
1814 
1815 
1816 /* 
1817  * Here we attempt to write back old buffers.  We also try and flush inodes 
1818  * and supers as well, since this function is essentially "update", and 
1819  * otherwise there would be no way of ensuring that these quantities ever 
1820  * get written back.  Ideally, we would have a timestamp on the inodes
1821  * and superblocks so that we could write back only the old ones as well
1822  */
1823 
1824 asmlinkage int sync_old_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1825 {
1826         int i, isize;
1827         int ndirty, nwritten;
1828         int nlist;
1829         int ncount;
1830         struct buffer_head * bh, *next;
1831 
1832         sync_supers(0);
1833         sync_inodes(0);
1834 
1835         ncount = 0;
1836 #ifdef DEBUG
1837         for(nlist = 0; nlist < NR_LIST; nlist++)
1838 #else
1839         for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1840 #endif
1841         {
1842                 ndirty = 0;
1843                 nwritten = 0;
1844         repeat:
1845                 bh = lru_list[nlist];
1846                 if(bh) 
1847                          for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) {
1848                                  /* We may have stalled while waiting for I/O to complete. */
1849                                  if(bh->b_list != nlist) goto repeat;
1850                                  next = bh->b_next_free;
1851                                  if(!lru_list[nlist]) {
1852                                          printk("Dirty list empty %d\n", i);
1853                                          break;
1854                                  }
1855                                  
1856                                  /* Clean buffer on dirty list?  Refile it */
1857                                  if (nlist == BUF_DIRTY && !bh->b_dirt && !bh->b_lock)
1858                                   {
1859                                           refile_buffer(bh);
1860                                           continue;
1861                                   }
1862                                  
1863                                  if (bh->b_lock || !bh->b_dirt)
1864                                           continue;
1865                                  ndirty++;
1866                                  if(bh->b_flushtime > jiffies) continue;
1867                                  nwritten++;
1868                                  bh->b_count++;
1869                                  bh->b_flushtime = 0;
1870 #ifdef DEBUG
1871                                  if(nlist != BUF_DIRTY) ncount++;
1872 #endif
1873                                  ll_rw_block(WRITE, 1, &bh);
1874                                  bh->b_count--;
1875                          }
1876         }
1877 #ifdef DEBUG
1878         if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount);
1879         printk("Wrote %d/%d buffers\n", nwritten, ndirty);
1880 #endif
1881         
1882         /* We assume that we only come through here on a regular
1883            schedule, like every 5 seconds.  Now update load averages.  
1884            Shift usage counts to prevent overflow. */
1885         for(isize = 0; isize<NR_SIZES; isize++){
1886                 CALC_LOAD(buffers_lav[isize], bdf_prm.b_un.lav_const, buffer_usage[isize]);
1887                 buffer_usage[isize] = 0;
1888         };
1889         return 0;
1890 }
1891 
1892 
1893 /* This is the interface to bdflush.  As we get more sophisticated, we can
1894  * pass tuning parameters to this "process", to adjust how it behaves.  If you
1895  * invoke this again after you have done this once, you would simply modify 
1896  * the tuning parameters.  We would want to verify each parameter, however,
1897  * to make sure that it is reasonable. */
1898 
1899 asmlinkage int sys_bdflush(int func, long data)
     /* [previous][next][first][last][top][bottom][index][help] */
1900 {
1901         int i, error;
1902         int ndirty;
1903         int nlist;
1904         int ncount;
1905         struct buffer_head * bh, *next;
1906 
1907         if (!suser())
1908                 return -EPERM;
1909 
1910         if (func == 1)
1911                  return sync_old_buffers();
1912 
1913         /* Basically func 0 means start, 1 means read param 1, 2 means write param 1, etc */
1914         if (func >= 2) {
1915                 i = (func-2) >> 1;
1916                 if (i < 0 || i >= N_PARAM)
1917                         return -EINVAL;
1918                 if((func & 1) == 0) {
1919                         error = verify_area(VERIFY_WRITE, (void *) data, sizeof(int));
1920                         if (error)
1921                                 return error;
1922                         put_user(bdf_prm.data[i], (int*)data);
1923                         return 0;
1924                 };
1925                 if (data < bdflush_min[i] || data > bdflush_max[i])
1926                         return -EINVAL;
1927                 bdf_prm.data[i] = data;
1928                 return 0;
1929         };
1930         
1931         if (bdflush_running)
1932                 return -EBUSY; /* Only one copy of this running at one time */
1933         bdflush_running++;
1934         
1935         /* OK, from here on is the daemon */
1936         
1937         for (;;) {
1938 #ifdef DEBUG
1939                 printk("bdflush() activated...");
1940 #endif
1941                 
1942                 ncount = 0;
1943 #ifdef DEBUG
1944                 for(nlist = 0; nlist < NR_LIST; nlist++)
1945 #else
1946                 for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1947 #endif
1948                  {
1949                          ndirty = 0;
1950                  repeat:
1951                          bh = lru_list[nlist];
1952                          if(bh) 
1953                                   for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty; 
1954                                        bh = next) {
1955                                           /* We may have stalled while waiting for I/O to complete. */
1956                                           if(bh->b_list != nlist) goto repeat;
1957                                           next = bh->b_next_free;
1958                                           if(!lru_list[nlist]) {
1959                                                   printk("Dirty list empty %d\n", i);
1960                                                   break;
1961                                           }
1962                                           
1963                                           /* Clean buffer on dirty list?  Refile it */
1964                                           if (nlist == BUF_DIRTY && !bh->b_dirt && !bh->b_lock)
1965                                            {
1966                                                    refile_buffer(bh);
1967                                                    continue;
1968                                            }
1969                                           
1970                                           if (bh->b_lock || !bh->b_dirt)
1971                                                    continue;
1972                                           /* Should we write back buffers that are shared or not??
1973                                              currently dirty buffers are not shared, so it does not matter */
1974                                           bh->b_count++;
1975                                           ndirty++;
1976                                           bh->b_flushtime = 0;
1977                                           ll_rw_block(WRITE, 1, &bh);
1978 #ifdef DEBUG
1979                                           if(nlist != BUF_DIRTY) ncount++;
1980 #endif
1981                                           bh->b_count--;
1982                                   }
1983                  }
1984 #ifdef DEBUG
1985                 if (ncount) printk("sys_bdflush: %d dirty buffers not on dirty list\n", ncount);
1986                 printk("sleeping again.\n");
1987 #endif
1988                 wake_up(&bdflush_done);
1989                 
1990                 /* If there are still a lot of dirty buffers around, skip the sleep
1991                    and flush some more */
1992                 
1993                 if(nr_buffers_type[BUF_DIRTY] <= (nr_buffers - nr_buffers_type[BUF_SHARED]) * 
1994                    bdf_prm.b_un.nfract/100) {
1995                         if (current->signal & (1 << (SIGKILL-1))) {
1996                                 bdflush_running--;
1997                                 return 0;
1998                         }
1999                         current->signal = 0;
2000                         interruptible_sleep_on(&bdflush_wait);
2001                 }
2002         }
2003 }
2004 
2005 
2006 /*
2007  * Overrides for Emacs so that we follow Linus's tabbing style.
2008  * Emacs will notice this stuff at the end of the file and automatically
2009  * adjust the settings for this buffer only.  This must remain at the end
2010  * of the file.
2011  * ---------------------------------------------------------------------------
2012  * Local variables:
2013  * c-indent-level: 8
2014  * c-brace-imaginary-offset: 0
2015  * c-brace-offset: -8
2016  * c-argdecl-indent: 8
2017  * c-label-offset: -8
2018  * c-continued-statement-offset: 8
2019  * c-continued-brace-offset: 0
2020  * End:
2021  */

/* [previous][next][first][last][top][bottom][index][help] */