root/fs/buffer.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. __wait_on_buffer
  2. sync_buffers
  3. sync_dev
  4. fsync_dev
  5. sys_sync
  6. file_fsync
  7. sys_fsync
  8. invalidate_buffers
  9. remove_from_hash_queue
  10. remove_from_lru_list
  11. remove_from_free_list
  12. remove_from_queues
  13. put_last_lru
  14. put_last_free
  15. insert_into_queues
  16. find_buffer
  17. get_hash_table
  18. set_blocksize
  19. refill_freelist
  20. getblk
  21. set_writetime
  22. refile_buffer
  23. __brelse
  24. __bforget
  25. bread
  26. breada
  27. put_unused_buffer_head
  28. get_more_buffer_heads
  29. get_unused_buffer_head
  30. create_buffers
  31. read_buffers
  32. try_to_align
  33. check_aligned
  34. try_to_load_aligned
  35. try_to_share_buffers
  36. bread_page
  37. bwrite_page
  38. grow_buffers
  39. try_to_free
  40. age_buffer
  41. maybe_shrink_lav_buffers
  42. shrink_buffers
  43. shrink_specific_buffers
  44. show_buffers
  45. try_to_reassign
  46. reassign_cluster
  47. try_to_generate_cluster
  48. generate_cluster
  49. buffer_init
  50. wakeup_bdflush
  51. sync_old_buffers
  52. sys_bdflush

   1 /*
   2  *  linux/fs/buffer.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  *  'buffer.c' implements the buffer-cache functions. Race-conditions have
   9  * been avoided by NEVER letting an interrupt change a buffer (except for the
  10  * data, of course), but instead letting the caller do it.
  11  */
  12 
  13 /*
  14  * NOTE! There is one discordant note here: checking floppies for
  15  * disk change. This is where it fits best, I think, as it should
  16  * invalidate changed floppy-disk-caches.
  17  */
  18  
  19 #include <linux/sched.h>
  20 #include <linux/kernel.h>
  21 #include <linux/major.h>
  22 #include <linux/string.h>
  23 #include <linux/locks.h>
  24 #include <linux/errno.h>
  25 #include <linux/malloc.h>
  26 #include <linux/swapctl.h>
  27 
  28 #include <asm/system.h>
  29 #include <asm/segment.h>
  30 #include <asm/io.h>
  31 
  32 #define NR_SIZES 4
  33 static char buffersize_index[9] = {-1,  0,  1, -1,  2, -1, -1, -1, 3};
  34 static short int bufferindex_size[NR_SIZES] = {512, 1024, 2048, 4096};
  35 
  36 #define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9])
  37 #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
  38 
  39 static int grow_buffers(int pri, int size);
  40 static int shrink_specific_buffers(unsigned int priority, int size);
  41 static int maybe_shrink_lav_buffers(int);
  42 
  43 static int nr_hash = 0;  /* Size of hash table */
  44 static struct buffer_head ** hash_table;
  45 struct buffer_head ** buffer_pages;
  46 static struct buffer_head * lru_list[NR_LIST] = {NULL, };
  47 /* next_to_age is an array of pointers into the lru lists, used to
  48    cycle through the buffers aging their contents when deciding which
  49    buffers to discard when more memory is needed */
  50 static struct buffer_head * next_to_age[NR_LIST] = {NULL, };
  51 static struct buffer_head * free_list[NR_SIZES] = {NULL, };
  52 static struct buffer_head * unused_list = NULL;
  53 static struct wait_queue * buffer_wait = NULL;
  54 
  55 int nr_buffers = 0;
  56 int nr_buffers_type[NR_LIST] = {0,};
  57 int nr_buffers_size[NR_SIZES] = {0,};
  58 int nr_buffers_st[NR_SIZES][NR_LIST] = {{0,},};
  59 int buffer_usage[NR_SIZES] = {0,};  /* Usage counts used to determine load average */
  60 int buffers_lav[NR_SIZES] = {0,};  /* Load average of buffer usage */
  61 int nr_free[NR_SIZES] = {0,};
  62 int buffermem = 0;
  63 int nr_buffer_heads = 0;
  64 extern int *blksize_size[];
  65 
  66 /* Here is the parameter block for the bdflush process. */
  67 static void wakeup_bdflush(int);
  68 
  69 #define N_PARAM 9
  70 #define LAV
  71 
  72 static union bdflush_param{
  73         struct {
  74                 int nfract;  /* Percentage of buffer cache dirty to 
  75                                 activate bdflush */
  76                 int ndirty;  /* Maximum number of dirty blocks to write out per
  77                                 wake-cycle */
  78                 int nrefill; /* Number of clean buffers to try and obtain
  79                                 each time we call refill */
  80                 int nref_dirt; /* Dirty buffer threshold for activating bdflush
  81                                   when trying to refill buffers. */
  82                 int clu_nfract;  /* Percentage of buffer cache to scan to 
  83                                     search for free clusters */
  84                 int age_buffer;  /* Time for normal buffer to age before 
  85                                     we flush it */
  86                 int age_super;  /* Time for superblock to age before we 
  87                                    flush it */
  88                 int lav_const;  /* Constant used for load average (time
  89                                    constant */
  90                 int lav_ratio;  /* Used to determine how low a lav for a
  91                                    particular size can go before we start to
  92                                    trim back the buffers */
  93         } b_un;
  94         unsigned int data[N_PARAM];
  95 } bdf_prm = {{25, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}};
  96 
  97 /* The lav constant is set for 1 minute, as long as the update process runs
  98    every 5 seconds.  If you change the frequency of update, the time
  99    constant will also change. */
 100 
 101 
 102 /* These are the min and max parameter values that we will allow to be assigned */
 103 static int bdflush_min[N_PARAM] = {  0,  10,    5,   25,  0,   100,   100, 1, 1};
 104 static int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 60000, 60000, 2047, 5};
 105 
 106 /*
 107  * Rewrote the wait-routines to use the "new" wait-queue functionality,
 108  * and getting rid of the cli-sti pairs. The wait-queue routines still
 109  * need cli-sti, but now it's just a couple of 386 instructions or so.
 110  *
 111  * Note that the real wait_on_buffer() is an inline function that checks
 112  * if 'b_wait' is set before calling this, so that the queues aren't set
 113  * up unnecessarily.
 114  */
 115 void __wait_on_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 116 {
 117         struct wait_queue wait = { current, NULL };
 118 
 119         bh->b_count++;
 120         add_wait_queue(&bh->b_wait, &wait);
 121 repeat:
 122         current->state = TASK_UNINTERRUPTIBLE;
 123         if (buffer_locked(bh)) {
 124                 schedule();
 125                 goto repeat;
 126         }
 127         remove_wait_queue(&bh->b_wait, &wait);
 128         bh->b_count--;
 129         current->state = TASK_RUNNING;
 130 }
 131 
 132 /* Call sync_buffers with wait!=0 to ensure that the call does not
 133    return until all buffer writes have completed.  Sync() may return
 134    before the writes have finished; fsync() may not. */
 135 
 136 
 137 /* Godamity-damn.  Some buffers (bitmaps for filesystems)
 138    spontaneously dirty themselves without ever brelse being called.
 139    We will ultimately want to put these in a separate list, but for
 140    now we search all of the lists for dirty buffers */
 141 
 142 static int sync_buffers(kdev_t dev, int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 143 {
 144         int i, retry, pass = 0, err = 0;
 145         int nlist, ncount;
 146         struct buffer_head * bh, *next;
 147 
 148         /* One pass for no-wait, three for wait:
 149            0) write out all dirty, unlocked buffers;
 150            1) write out all dirty buffers, waiting if locked;
 151            2) wait for completion by waiting for all buffers to unlock. */
 152  repeat:
 153         retry = 0;
 154  repeat2:
 155         ncount = 0;
 156         /* We search all lists as a failsafe mechanism, not because we expect
 157            there to be dirty buffers on any of the other lists. */
 158         for(nlist = 0; nlist < NR_LIST; nlist++)
 159          {
 160          repeat1:
 161                  bh = lru_list[nlist];
 162                  if(!bh) continue;
 163                  for (i = nr_buffers_type[nlist]*2 ; i-- > 0 ; bh = next) {
 164                          if(bh->b_list != nlist) goto repeat1;
 165                          next = bh->b_next_free;
 166                          if(!lru_list[nlist]) break;
 167                          if (dev && bh->b_dev != dev)
 168                                   continue;
 169                          if (buffer_locked(bh))
 170                           {
 171                                   /* Buffer is locked; skip it unless wait is
 172                                      requested AND pass > 0. */
 173                                   if (!wait || !pass) {
 174                                           retry = 1;
 175                                           continue;
 176                                   }
 177                                   wait_on_buffer (bh);
 178                                   goto repeat2;
 179                           }
 180                          /* If an unlocked buffer is not uptodate, there has
 181                              been an IO error. Skip it. */
 182                          if (wait && buffer_req(bh) && !buffer_locked(bh) &&
 183                              !buffer_dirty(bh) && !buffer_uptodate(bh)) {
 184                                   err = 1;
 185                                   continue;
 186                           }
 187                          /* Don't write clean buffers.  Don't write ANY buffers
 188                             on the third pass. */
 189                          if (!buffer_dirty(bh) || pass>=2)
 190                                   continue;
 191                          /* don't bother about locked buffers */
 192                          if (buffer_locked(bh))
 193                                  continue;
 194                          bh->b_count++;
 195                          bh->b_flushtime = 0;
 196                          ll_rw_block(WRITE, 1, &bh);
 197 
 198                          if(nlist != BUF_DIRTY) { 
 199                                  printk("[%d %s %ld] ", nlist,
 200                                         kdevname(bh->b_dev), bh->b_blocknr);
 201                                  ncount++;
 202                          };
 203                          bh->b_count--;
 204                          retry = 1;
 205                  }
 206          }
 207         if (ncount)
 208           printk("sys_sync: %d dirty buffers not on dirty list\n", ncount);
 209         
 210         /* If we are waiting for the sync to succeed, and if any dirty
 211            blocks were written, then repeat; on the second pass, only
 212            wait for buffers being written (do not pass to write any
 213            more buffers on the second pass). */
 214         if (wait && retry && ++pass<=2)
 215                  goto repeat;
 216         return err;
 217 }
 218 
 219 void sync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 220 {
 221         sync_buffers(dev, 0);
 222         sync_supers(dev);
 223         sync_inodes(dev);
 224         sync_buffers(dev, 0);
 225 }
 226 
 227 int fsync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 228 {
 229         sync_buffers(dev, 0);
 230         sync_supers(dev);
 231         sync_inodes(dev);
 232         return sync_buffers(dev, 1);
 233 }
 234 
 235 asmlinkage int sys_sync(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 236 {
 237         fsync_dev(0);
 238         return 0;
 239 }
 240 
 241 int file_fsync (struct inode *inode, struct file *filp)
     /* [previous][next][first][last][top][bottom][index][help] */
 242 {
 243         return fsync_dev(inode->i_dev);
 244 }
 245 
 246 asmlinkage int sys_fsync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 247 {
 248         struct file * file;
 249         struct inode * inode;
 250 
 251         if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
 252                 return -EBADF;
 253         if (!file->f_op || !file->f_op->fsync)
 254                 return -EINVAL;
 255         if (file->f_op->fsync(inode,file))
 256                 return -EIO;
 257         return 0;
 258 }
 259 
 260 void invalidate_buffers(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 261 {
 262         int i;
 263         int nlist;
 264         struct buffer_head * bh;
 265 
 266         for(nlist = 0; nlist < NR_LIST; nlist++) {
 267                 bh = lru_list[nlist];
 268                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bh->b_next_free) {
 269                         if (bh->b_dev != dev)
 270                                 continue;
 271                         wait_on_buffer(bh);
 272                         if (bh->b_dev != dev)
 273                                 continue;
 274                         if (bh->b_count)
 275                                 continue;
 276                         bh->b_flushtime = 0;
 277                         clear_bit(BH_Uptodate, &bh->b_state);
 278                         clear_bit(BH_Dirty, &bh->b_state);
 279                         clear_bit(BH_Req, &bh->b_state);
 280                 }
 281         }
 282 }
 283 
 284 #define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block))%nr_hash)
 285 #define hash(dev,block) hash_table[_hashfn(dev,block)]
 286 
 287 static inline void remove_from_hash_queue(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 288 {
 289         if (bh->b_next)
 290                 bh->b_next->b_prev = bh->b_prev;
 291         if (bh->b_prev)
 292                 bh->b_prev->b_next = bh->b_next;
 293         if (hash(bh->b_dev,bh->b_blocknr) == bh)
 294                 hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
 295         bh->b_next = bh->b_prev = NULL;
 296 }
 297 
 298 static inline void remove_from_lru_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 299 {
 300         if (!(bh->b_prev_free) || !(bh->b_next_free))
 301                 panic("VFS: LRU block list corrupted");
 302         if (bh->b_dev == B_FREE)
 303                 panic("LRU list corrupted");
 304         bh->b_prev_free->b_next_free = bh->b_next_free;
 305         bh->b_next_free->b_prev_free = bh->b_prev_free;
 306 
 307         if (lru_list[bh->b_list] == bh)
 308                  lru_list[bh->b_list] = bh->b_next_free;
 309         if (lru_list[bh->b_list] == bh)
 310                  lru_list[bh->b_list] = NULL;
 311         if (next_to_age[bh->b_list] == bh)
 312                 next_to_age[bh->b_list] = bh->b_next_free;
 313         if (next_to_age[bh->b_list] == bh)
 314                 next_to_age[bh->b_list] = NULL;
 315 
 316         bh->b_next_free = bh->b_prev_free = NULL;
 317 }
 318 
 319 static inline void remove_from_free_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 320 {
 321         int isize = BUFSIZE_INDEX(bh->b_size);
 322         if (!(bh->b_prev_free) || !(bh->b_next_free))
 323                 panic("VFS: Free block list corrupted");
 324         if(bh->b_dev != B_FREE)
 325                 panic("Free list corrupted");
 326         if(!free_list[isize])
 327                 panic("Free list empty");
 328         nr_free[isize]--;
 329         if(bh->b_next_free == bh)
 330                  free_list[isize] = NULL;
 331         else {
 332                 bh->b_prev_free->b_next_free = bh->b_next_free;
 333                 bh->b_next_free->b_prev_free = bh->b_prev_free;
 334                 if (free_list[isize] == bh)
 335                          free_list[isize] = bh->b_next_free;
 336         };
 337         bh->b_next_free = bh->b_prev_free = NULL;
 338 }
 339 
 340 static inline void remove_from_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 341 {
 342         if(bh->b_dev == B_FREE) {
 343                 remove_from_free_list(bh); /* Free list entries should not be
 344                                               in the hash queue */
 345                 return;
 346         };
 347         nr_buffers_type[bh->b_list]--;
 348         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]--;
 349         remove_from_hash_queue(bh);
 350         remove_from_lru_list(bh);
 351 }
 352 
 353 static inline void put_last_lru(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 354 {
 355         if (!bh)
 356                 return;
 357         if (bh == lru_list[bh->b_list]) {
 358                 lru_list[bh->b_list] = bh->b_next_free;
 359                 if (next_to_age[bh->b_list] == bh)
 360                         next_to_age[bh->b_list] = bh->b_next_free;
 361                 return;
 362         }
 363         if(bh->b_dev == B_FREE)
 364                 panic("Wrong block for lru list");
 365         remove_from_lru_list(bh);
 366 /* add to back of free list */
 367 
 368         if(!lru_list[bh->b_list]) {
 369                 lru_list[bh->b_list] = bh;
 370                 lru_list[bh->b_list]->b_prev_free = bh;
 371         };
 372         if (!next_to_age[bh->b_list])
 373                 next_to_age[bh->b_list] = bh;
 374 
 375         bh->b_next_free = lru_list[bh->b_list];
 376         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 377         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 378         lru_list[bh->b_list]->b_prev_free = bh;
 379 }
 380 
 381 static inline void put_last_free(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 382 {
 383         int isize;
 384         if (!bh)
 385                 return;
 386 
 387         isize = BUFSIZE_INDEX(bh->b_size);      
 388         bh->b_dev = B_FREE;  /* So it is obvious we are on the free list */
 389         /* add to back of free list */
 390         if(!free_list[isize]) {
 391                 free_list[isize] = bh;
 392                 bh->b_prev_free = bh;
 393         };
 394 
 395         nr_free[isize]++;
 396         bh->b_next_free = free_list[isize];
 397         bh->b_prev_free = free_list[isize]->b_prev_free;
 398         free_list[isize]->b_prev_free->b_next_free = bh;
 399         free_list[isize]->b_prev_free = bh;
 400 }
 401 
 402 static inline void insert_into_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 403 {
 404         /* put at end of free list */
 405         if(bh->b_dev == B_FREE) {
 406                 put_last_free(bh);
 407                 return;
 408         }
 409         if(!lru_list[bh->b_list]) {
 410                 lru_list[bh->b_list] = bh;
 411                 bh->b_prev_free = bh;
 412         }
 413         if (!next_to_age[bh->b_list])
 414                 next_to_age[bh->b_list] = bh;
 415         if (bh->b_next_free) panic("VFS: buffer LRU pointers corrupted");
 416         bh->b_next_free = lru_list[bh->b_list];
 417         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 418         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 419         lru_list[bh->b_list]->b_prev_free = bh;
 420         nr_buffers_type[bh->b_list]++;
 421         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]++;
 422 /* put the buffer in new hash-queue if it has a device */
 423         bh->b_prev = NULL;
 424         bh->b_next = NULL;
 425         if (!(bh->b_dev))
 426                 return;
 427         bh->b_next = hash(bh->b_dev,bh->b_blocknr);
 428         hash(bh->b_dev,bh->b_blocknr) = bh;
 429         if (bh->b_next)
 430                 bh->b_next->b_prev = bh;
 431 }
 432 
 433 static inline struct buffer_head * find_buffer(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 434 {               
 435         struct buffer_head * tmp;
 436 
 437         for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
 438                 if (tmp->b_dev == dev && tmp->b_blocknr == block)
 439                         if (tmp->b_size == size)
 440                                 return tmp;
 441                         else {
 442                                 printk("VFS: Wrong blocksize on device %s\n",
 443                                         kdevname(dev));
 444                                 return NULL;
 445                         }
 446         return NULL;
 447 }
 448 
 449 /*
 450  * Why like this, I hear you say... The reason is race-conditions.
 451  * As we don't lock buffers (unless we are reading them, that is),
 452  * something might happen to it while we sleep (ie a read-error
 453  * will force it bad). This shouldn't really happen currently, but
 454  * the code is ready.
 455  */
 456 struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 457 {
 458         struct buffer_head * bh;
 459 
 460         for (;;) {
 461                 if (!(bh=find_buffer(dev,block,size)))
 462                         return NULL;
 463                 bh->b_count++;
 464                 wait_on_buffer(bh);
 465                 if (bh->b_dev == dev && bh->b_blocknr == block
 466                                              && bh->b_size == size)
 467                         return bh;
 468                 bh->b_count--;
 469         }
 470 }
 471 
 472 void set_blocksize(kdev_t dev, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 473 {
 474         int i, nlist;
 475         struct buffer_head * bh, *bhnext;
 476 
 477         if (!blksize_size[MAJOR(dev)])
 478                 return;
 479 
 480         switch(size) {
 481                 default: panic("Invalid blocksize passed to set_blocksize");
 482                 case 512: case 1024: case 2048: case 4096:;
 483         }
 484 
 485         if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
 486                 blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 487                 return;
 488         }
 489         if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
 490                 return;
 491         sync_buffers(dev, 2);
 492         blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 493 
 494   /* We need to be quite careful how we do this - we are moving entries
 495      around on the free list, and we can get in a loop if we are not careful.*/
 496 
 497         for(nlist = 0; nlist < NR_LIST; nlist++) {
 498                 bh = lru_list[nlist];
 499                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) {
 500                         if(!bh) break;
 501                         bhnext = bh->b_next_free; 
 502                         if (bh->b_dev != dev)
 503                                  continue;
 504                         if (bh->b_size == size)
 505                                  continue;
 506                         
 507                         wait_on_buffer(bh);
 508                         if (bh->b_dev == dev && bh->b_size != size) {
 509                                 clear_bit(BH_Dirty, &bh->b_state);
 510                                 clear_bit(BH_Uptodate, &bh->b_state);
 511                                 clear_bit(BH_Req, &bh->b_state);
 512                                 bh->b_flushtime = 0;
 513                         }
 514                         remove_from_hash_queue(bh);
 515                 }
 516         }
 517 }
 518 
 519 #define BADNESS(bh) (buffer_dirty(bh) || buffer_locked(bh))
 520 
 521 void refill_freelist(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 522 {
 523         struct buffer_head * bh, * tmp;
 524         struct buffer_head * candidate[NR_LIST];
 525         unsigned int best_time, winner;
 526         int isize = BUFSIZE_INDEX(size);
 527         int buffers[NR_LIST];
 528         int i;
 529         int needed;
 530 
 531         /* First see if we even need this.  Sometimes it is advantageous
 532          to request some blocks in a filesystem that we know that we will
 533          be needing ahead of time. */
 534 
 535         if (nr_free[isize] > 100)
 536                 return;
 537 
 538         /* If there are too many dirty buffers, we wake up the update process
 539            now so as to ensure that there are still clean buffers available
 540            for user processes to use (and dirty) */
 541         
 542         /* We are going to try and locate this much memory */
 543         needed =bdf_prm.b_un.nrefill * size;  
 544 
 545         while (nr_free_pages > min_free_pages*2 && needed > 0 &&
 546                grow_buffers(GFP_BUFFER, size)) {
 547                 needed -= PAGE_SIZE;
 548         }
 549 
 550         if(needed <= 0) return;
 551 
 552         /* See if there are too many buffers of a different size.
 553            If so, victimize them */
 554 
 555         while(maybe_shrink_lav_buffers(size))
 556          {
 557                  if(!grow_buffers(GFP_BUFFER, size)) break;
 558                  needed -= PAGE_SIZE;
 559                  if(needed <= 0) return;
 560          };
 561 
 562         /* OK, we cannot grow the buffer cache, now try and get some
 563            from the lru list */
 564 
 565         /* First set the candidate pointers to usable buffers.  This
 566            should be quick nearly all of the time. */
 567 
 568 repeat0:
 569         for(i=0; i<NR_LIST; i++){
 570                 if(i == BUF_DIRTY || i == BUF_SHARED || 
 571                    nr_buffers_type[i] == 0) {
 572                         candidate[i] = NULL;
 573                         buffers[i] = 0;
 574                         continue;
 575                 }
 576                 buffers[i] = nr_buffers_type[i];
 577                 for (bh = lru_list[i]; buffers[i] > 0; bh = tmp, buffers[i]--)
 578                  {
 579                          if(buffers[i] < 0) panic("Here is the problem");
 580                          tmp = bh->b_next_free;
 581                          if (!bh) break;
 582                          
 583                          if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 584                              buffer_dirty(bh)) {
 585                                  refile_buffer(bh);
 586                                  continue;
 587                          }
 588                          
 589                          if (bh->b_count || bh->b_size != size)
 590                                   continue;
 591                          
 592                          /* Buffers are written in the order they are placed 
 593                             on the locked list. If we encounter a locked
 594                             buffer here, this means that the rest of them
 595                             are also locked */
 596                          if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 597                                  buffers[i] = 0;
 598                                  break;
 599                          }
 600                          
 601                          if (BADNESS(bh)) continue;
 602                          break;
 603                  };
 604                 if(!buffers[i]) candidate[i] = NULL; /* Nothing on this list */
 605                 else candidate[i] = bh;
 606                 if(candidate[i] && candidate[i]->b_count) panic("Here is the problem");
 607         }
 608         
 609  repeat:
 610         if(needed <= 0) return;
 611         
 612         /* Now see which candidate wins the election */
 613         
 614         winner = best_time = UINT_MAX;  
 615         for(i=0; i<NR_LIST; i++){
 616                 if(!candidate[i]) continue;
 617                 if(candidate[i]->b_lru_time < best_time){
 618                         best_time = candidate[i]->b_lru_time;
 619                         winner = i;
 620                 }
 621         }
 622         
 623         /* If we have a winner, use it, and then get a new candidate from that list */
 624         if(winner != UINT_MAX) {
 625                 i = winner;
 626                 bh = candidate[i];
 627                 candidate[i] = bh->b_next_free;
 628                 if(candidate[i] == bh) candidate[i] = NULL;  /* Got last one */
 629                 if (bh->b_count || bh->b_size != size)
 630                          panic("Busy buffer in candidate list\n");
 631                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1)
 632                          panic("Shared buffer in candidate list\n");
 633                 if (BADNESS(bh)) panic("Buffer in candidate list with BADNESS != 0\n");
 634                 
 635                 if(bh->b_dev == B_FREE)
 636                         panic("Wrong list");
 637                 remove_from_queues(bh);
 638                 bh->b_dev = B_FREE;
 639                 put_last_free(bh);
 640                 needed -= bh->b_size;
 641                 buffers[i]--;
 642                 if(buffers[i] < 0) panic("Here is the problem");
 643                 
 644                 if(buffers[i] == 0) candidate[i] = NULL;
 645                 
 646                 /* Now all we need to do is advance the candidate pointer
 647                    from the winner list to the next usable buffer */
 648                 if(candidate[i] && buffers[i] > 0){
 649                         if(buffers[i] <= 0) panic("Here is another problem");
 650                         for (bh = candidate[i]; buffers[i] > 0; bh = tmp, buffers[i]--) {
 651                                 if(buffers[i] < 0) panic("Here is the problem");
 652                                 tmp = bh->b_next_free;
 653                                 if (!bh) break;
 654                                 
 655                                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 656                                     buffer_dirty(bh)) {
 657                                         refile_buffer(bh);
 658                                         continue;
 659                                 };
 660                                 
 661                                 if (bh->b_count || bh->b_size != size)
 662                                          continue;
 663                                 
 664                                 /* Buffers are written in the order they are
 665                                    placed on the locked list.  If we encounter
 666                                    a locked buffer here, this means that the
 667                                    rest of them are also locked */
 668                                 if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 669                                         buffers[i] = 0;
 670                                         break;
 671                                 }
 672               
 673                                 if (BADNESS(bh)) continue;
 674                                 break;
 675                         };
 676                         if(!buffers[i]) candidate[i] = NULL; /* Nothing here */
 677                         else candidate[i] = bh;
 678                         if(candidate[i] && candidate[i]->b_count) 
 679                                  panic("Here is the problem");
 680                 }
 681                 
 682                 goto repeat;
 683         }
 684         
 685         if(needed <= 0) return;
 686         
 687         /* Too bad, that was not enough. Try a little harder to grow some. */
 688         
 689         if (nr_free_pages > min_free_pages + 5) {
 690                 if (grow_buffers(GFP_BUFFER, size)) {
 691                         needed -= PAGE_SIZE;
 692                         goto repeat0;
 693                 };
 694         }
 695         
 696         /* and repeat until we find something good */
 697         if (!grow_buffers(GFP_ATOMIC, size))
 698                 wakeup_bdflush(1);
 699         needed -= PAGE_SIZE;
 700         goto repeat0;
 701 }
 702 
 703 /*
 704  * Ok, this is getblk, and it isn't very clear, again to hinder
 705  * race-conditions. Most of the code is seldom used, (ie repeating),
 706  * so it should be much more efficient than it looks.
 707  *
 708  * The algorithm is changed: hopefully better, and an elusive bug removed.
 709  *
 710  * 14.02.92: changed it to sync dirty buffers a bit: better performance
 711  * when the filesystem starts to get full of dirty blocks (I hope).
 712  */
 713 struct buffer_head * getblk(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 714 {
 715         struct buffer_head * bh;
 716         int isize = BUFSIZE_INDEX(size);
 717 
 718         /* Update this for the buffer size lav. */
 719         buffer_usage[isize]++;
 720 
 721         /* If there are too many dirty buffers, we wake up the update process
 722            now so as to ensure that there are still clean buffers available
 723            for user processes to use (and dirty) */
 724 repeat:
 725         bh = get_hash_table(dev, block, size);
 726         if (bh) {
 727                 if (!buffer_dirty(bh)) {
 728                         if (buffer_uptodate(bh))
 729                                  put_last_lru(bh);
 730                         bh->b_flushtime = 0;
 731                 }
 732                 set_bit(BH_Touched, &bh->b_state);
 733                 return bh;
 734         }
 735 
 736         while(!free_list[isize]) refill_freelist(size);
 737         
 738         if (find_buffer(dev,block,size))
 739                  goto repeat;
 740 
 741         bh = free_list[isize];
 742         remove_from_free_list(bh);
 743 
 744 /* OK, FINALLY we know that this buffer is the only one of its kind, */
 745 /* and that it's unused (b_count=0), unlocked (buffer_locked=0), and clean */
 746         bh->b_count=1;
 747         bh->b_flushtime=0;
 748         bh->b_state=(1<<BH_Touched);
 749         bh->b_dev=dev;
 750         bh->b_blocknr=block;
 751         insert_into_queues(bh);
 752         return bh;
 753 }
 754 
 755 void set_writetime(struct buffer_head * buf, int flag)
     /* [previous][next][first][last][top][bottom][index][help] */
 756 {
 757         int newtime;
 758 
 759         if (buffer_dirty(buf)) {
 760                 /* Move buffer to dirty list if jiffies is clear */
 761                 newtime = jiffies + (flag ? bdf_prm.b_un.age_super : 
 762                                      bdf_prm.b_un.age_buffer);
 763                 if(!buf->b_flushtime || buf->b_flushtime > newtime)
 764                          buf->b_flushtime = newtime;
 765         } else {
 766                 buf->b_flushtime = 0;
 767         }
 768 }
 769 
 770 
 771 void refile_buffer(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 772 {
 773         int dispose;
 774 
 775         if(buf->b_dev == B_FREE) {
 776                 printk("Attempt to refile free buffer\n");
 777                 return;
 778         }
 779         if (buffer_dirty(buf))
 780                 dispose = BUF_DIRTY;
 781         else if (mem_map[MAP_NR((unsigned long) buf->b_data)].count > 1)
 782                 dispose = BUF_SHARED;
 783         else if (buffer_locked(buf))
 784                 dispose = BUF_LOCKED;
 785         else if (buf->b_list == BUF_SHARED)
 786                 dispose = BUF_UNSHARED;
 787         else
 788                 dispose = BUF_CLEAN;
 789         if(dispose == BUF_CLEAN) buf->b_lru_time = jiffies;
 790         if(dispose != buf->b_list)  {
 791                 if(dispose == BUF_DIRTY || dispose == BUF_UNSHARED)
 792                          buf->b_lru_time = jiffies;
 793                 if(dispose == BUF_LOCKED && 
 794                    (buf->b_flushtime - buf->b_lru_time) <= bdf_prm.b_un.age_super)
 795                          dispose = BUF_LOCKED1;
 796                 remove_from_queues(buf);
 797                 buf->b_list = dispose;
 798                 insert_into_queues(buf);
 799                 if(dispose == BUF_DIRTY && nr_buffers_type[BUF_DIRTY] > 
 800                    (nr_buffers - nr_buffers_type[BUF_SHARED]) *
 801                    bdf_prm.b_un.nfract/100)
 802                          wakeup_bdflush(0);
 803         }
 804 }
 805 
 806 /*
 807  * Release a buffer head
 808  */
 809 void __brelse(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 810 {
 811         wait_on_buffer(buf);
 812 
 813         /* If dirty, mark the time this buffer should be written back */
 814         set_writetime(buf, 0);
 815         refile_buffer(buf);
 816 
 817         if (buf->b_count) {
 818                 if (!--buf->b_count)
 819                         wake_up(&buffer_wait);
 820                 return;
 821         }
 822         printk("VFS: brelse: Trying to free free buffer\n");
 823 }
 824 
 825 /*
 826  * bforget() is like brelse(), except is throws the buffer away
 827  */
 828 void __bforget(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 829 {
 830         wait_on_buffer(buf);
 831         if (buf->b_count != 1) {
 832                 printk("Aieee... bforget(): count = %d\n", buf->b_count);
 833                 return;
 834         }
 835         if (mem_map[MAP_NR(buf->b_data)].count != 1) {
 836                 printk("Aieee... bforget(): shared buffer\n");
 837                 return;
 838         }
 839         mark_buffer_clean(buf);
 840         buf->b_count = 0;
 841         remove_from_queues(buf);
 842         buf->b_dev = B_FREE;
 843         put_last_free(buf);
 844         wake_up(&buffer_wait);
 845 }
 846 
 847 /*
 848  * bread() reads a specified block and returns the buffer that contains
 849  * it. It returns NULL if the block was unreadable.
 850  */
 851 struct buffer_head * bread(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 852 {
 853         struct buffer_head * bh;
 854 
 855         if (!(bh = getblk(dev, block, size))) {
 856                 printk("VFS: bread: READ error on device %s\n",
 857                         kdevname(dev));
 858                 return NULL;
 859         }
 860         if (buffer_uptodate(bh))
 861                 return bh;
 862         ll_rw_block(READ, 1, &bh);
 863         wait_on_buffer(bh);
 864         if (buffer_uptodate(bh))
 865                 return bh;
 866         brelse(bh);
 867         return NULL;
 868 }
 869 
 870 /*
 871  * Ok, breada can be used as bread, but additionally to mark other
 872  * blocks for reading as well. End the argument list with a negative
 873  * number.
 874  */
 875 
 876 #define NBUF 16
 877 
 878 struct buffer_head * breada(kdev_t dev, int block, int bufsize,
     /* [previous][next][first][last][top][bottom][index][help] */
 879         unsigned int pos, unsigned int filesize)
 880 {
 881         struct buffer_head * bhlist[NBUF];
 882         unsigned int blocks;
 883         struct buffer_head * bh;
 884         int index;
 885         int i, j;
 886 
 887         if (pos >= filesize)
 888                 return NULL;
 889 
 890         if (block < 0 || !(bh = getblk(dev,block,bufsize)))
 891                 return NULL;
 892 
 893         index = BUFSIZE_INDEX(bh->b_size);
 894 
 895         if (buffer_uptodate(bh))
 896                 return bh;
 897 
 898         blocks = ((filesize & (bufsize - 1)) - (pos & (bufsize - 1))) >> (9+index);
 899 
 900         if (blocks > (read_ahead[MAJOR(dev)] >> index))
 901                 blocks = read_ahead[MAJOR(dev)] >> index;
 902         if (blocks > NBUF)
 903                 blocks = NBUF;
 904         
 905         bhlist[0] = bh;
 906         j = 1;
 907         for(i=1; i<blocks; i++) {
 908                 bh = getblk(dev,block+i,bufsize);
 909                 if (buffer_uptodate(bh)) {
 910                         brelse(bh);
 911                         break;
 912                 }
 913                 bhlist[j++] = bh;
 914         }
 915 
 916         /* Request the read for these buffers, and then release them */
 917         ll_rw_block(READ, j, bhlist);
 918 
 919         for(i=1; i<j; i++)
 920                 brelse(bhlist[i]);
 921 
 922         /* Wait for this buffer, and then continue on */
 923         bh = bhlist[0];
 924         wait_on_buffer(bh);
 925         if (buffer_uptodate(bh))
 926                 return bh;
 927         brelse(bh);
 928         return NULL;
 929 }
 930 
 931 /*
 932  * See fs/inode.c for the weird use of volatile..
 933  */
 934 static void put_unused_buffer_head(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 935 {
 936         struct wait_queue * wait;
 937 
 938         wait = ((volatile struct buffer_head *) bh)->b_wait;
 939         memset(bh,0,sizeof(*bh));
 940         ((volatile struct buffer_head *) bh)->b_wait = wait;
 941         bh->b_next_free = unused_list;
 942         unused_list = bh;
 943 }
 944 
 945 static void get_more_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 946 {
 947         int i;
 948         struct buffer_head * bh;
 949 
 950         if (unused_list)
 951                 return;
 952 
 953         if (!(bh = (struct buffer_head*) get_free_page(GFP_BUFFER)))
 954                 return;
 955 
 956         for (nr_buffer_heads+=i=PAGE_SIZE/sizeof*bh ; i>0; i--) {
 957                 bh->b_next_free = unused_list;  /* only make link */
 958                 unused_list = bh++;
 959         }
 960 }
 961 
 962 static struct buffer_head * get_unused_buffer_head(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 963 {
 964         struct buffer_head * bh;
 965 
 966         get_more_buffer_heads();
 967         if (!unused_list)
 968                 return NULL;
 969         bh = unused_list;
 970         unused_list = bh->b_next_free;
 971         bh->b_next_free = NULL;
 972         bh->b_data = NULL;
 973         bh->b_size = 0;
 974         bh->b_state = 0;
 975         return bh;
 976 }
 977 
 978 /*
 979  * Create the appropriate buffers when given a page for data area and
 980  * the size of each buffer.. Use the bh->b_this_page linked list to
 981  * follow the buffers created.  Return NULL if unable to create more
 982  * buffers.
 983  */
 984 static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
 985 {
 986         struct buffer_head *bh, *head;
 987         unsigned long offset;
 988 
 989         head = NULL;
 990         offset = PAGE_SIZE;
 991         while ((offset -= size) < PAGE_SIZE) {
 992                 bh = get_unused_buffer_head();
 993                 if (!bh)
 994                         goto no_grow;
 995                 bh->b_this_page = head;
 996                 head = bh;
 997                 bh->b_data = (char *) (page+offset);
 998                 bh->b_size = size;
 999                 bh->b_dev = B_FREE;  /* Flag as unused */
1000         }
1001         return head;
1002 /*
1003  * In case anything failed, we just free everything we got.
1004  */
1005 no_grow:
1006         bh = head;
1007         while (bh) {
1008                 head = bh;
1009                 bh = bh->b_this_page;
1010                 put_unused_buffer_head(head);
1011         }
1012         return NULL;
1013 }
1014 
1015 static void read_buffers(struct buffer_head * bh[], int nrbuf)
     /* [previous][next][first][last][top][bottom][index][help] */
1016 {
1017         int i;
1018         int bhnum = 0;
1019         struct buffer_head * bhr[MAX_BUF_PER_PAGE];
1020 
1021         for (i = 0 ; i < nrbuf ; i++) {
1022                 if (bh[i] && !buffer_uptodate(bh[i]))
1023                         bhr[bhnum++] = bh[i];
1024         }
1025         if (bhnum)
1026                 ll_rw_block(READ, bhnum, bhr);
1027         for (i = nrbuf ; --i >= 0 ; ) {
1028                 if (bh[i]) {
1029                         wait_on_buffer(bh[i]);
1030                 }
1031         }
1032 }
1033 
1034 /*
1035  * This actually gets enough info to try to align the stuff,
1036  * but we don't bother yet.. We'll have to check that nobody
1037  * else uses the buffers etc.
1038  *
1039  * "address" points to the new page we can use to move things
1040  * around..
1041  */
1042 static inline unsigned long try_to_align(struct buffer_head ** bh, int nrbuf,
     /* [previous][next][first][last][top][bottom][index][help] */
1043         unsigned long address)
1044 {
1045         while (nrbuf-- > 0)
1046                 brelse(bh[nrbuf]);
1047         return 0;
1048 }
1049 
1050 static unsigned long check_aligned(struct buffer_head * first, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
1051         kdev_t dev, int *b, int size)
1052 {
1053         struct buffer_head * bh[MAX_BUF_PER_PAGE];
1054         unsigned long page;
1055         unsigned long offset;
1056         int block;
1057         int nrbuf;
1058         int aligned = 1;
1059 
1060         bh[0] = first;
1061         nrbuf = 1;
1062         page = (unsigned long) first->b_data;
1063         if (page & ~PAGE_MASK)
1064                 aligned = 0;
1065         for (offset = size ; offset < PAGE_SIZE ; offset += size) {
1066                 block = *++b;
1067                 if (!block)
1068                         goto no_go;
1069                 first = get_hash_table(dev, block, size);
1070                 if (!first)
1071                         goto no_go;
1072                 bh[nrbuf++] = first;
1073                 if (page+offset != (unsigned long) first->b_data)
1074                         aligned = 0;
1075         }
1076         if (!aligned)
1077                 return try_to_align(bh, nrbuf, address);
1078         mem_map[MAP_NR(page)].count++;
1079         read_buffers(bh,nrbuf);         /* make sure they are actually read correctly */
1080         while (nrbuf-- > 0)
1081                 brelse(bh[nrbuf]);
1082         free_page(address);
1083         ++current->min_flt;
1084         return page;
1085 no_go:
1086         while (nrbuf-- > 0)
1087                 brelse(bh[nrbuf]);
1088         return 0;
1089 }
1090 
1091 static unsigned long try_to_load_aligned(unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
1092         kdev_t dev, int b[], int size)
1093 {
1094         struct buffer_head * bh, * tmp, * arr[MAX_BUF_PER_PAGE];
1095         unsigned long offset;
1096         int isize = BUFSIZE_INDEX(size);
1097         int * p;
1098         int block;
1099 
1100         bh = create_buffers(address, size);
1101         if (!bh)
1102                 return 0;
1103         /* do any of the buffers already exist? punt if so.. */
1104         p = b;
1105         for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
1106                 block = *(p++);
1107                 if (!block)
1108                         goto not_aligned;
1109                 if (find_buffer(dev, block, size))
1110                         goto not_aligned;
1111         }
1112         tmp = bh;
1113         p = b;
1114         block = 0;
1115         while (1) {
1116                 arr[block++] = bh;
1117                 bh->b_count = 1;
1118                 bh->b_flushtime = 0;
1119                 clear_bit(BH_Dirty, &bh->b_state);
1120                 clear_bit(BH_Uptodate, &bh->b_state);
1121                 clear_bit(BH_Req, &bh->b_state);
1122                 bh->b_dev = dev;
1123                 bh->b_blocknr = *(p++);
1124                 bh->b_list = BUF_CLEAN;
1125                 nr_buffers++;
1126                 nr_buffers_size[isize]++;
1127                 insert_into_queues(bh);
1128                 if (bh->b_this_page)
1129                         bh = bh->b_this_page;
1130                 else
1131                         break;
1132         }
1133         buffermem += PAGE_SIZE;
1134         bh->b_this_page = tmp;
1135         mem_map[MAP_NR(address)].count++;
1136         buffer_pages[MAP_NR(address)] = bh;
1137         read_buffers(arr,block);
1138         while (block-- > 0)
1139                 brelse(arr[block]);
1140         ++current->maj_flt;
1141         return address;
1142 not_aligned:
1143         while ((tmp = bh) != NULL) {
1144                 bh = bh->b_this_page;
1145                 put_unused_buffer_head(tmp);
1146         }
1147         return 0;
1148 }
1149 
1150 /*
1151  * Try-to-share-buffers tries to minimize memory use by trying to keep
1152  * both code pages and the buffer area in the same page. This is done by
1153  * (a) checking if the buffers are already aligned correctly in memory and
1154  * (b) if none of the buffer heads are in memory at all, trying to load
1155  * them into memory the way we want them.
1156  *
1157  * This doesn't guarantee that the memory is shared, but should under most
1158  * circumstances work very well indeed (ie >90% sharing of code pages on
1159  * demand-loadable executables).
1160  */
1161 static inline unsigned long try_to_share_buffers(unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
1162         kdev_t dev, int *b, int size)
1163 {
1164         struct buffer_head * bh;
1165         int block;
1166 
1167         block = b[0];
1168         if (!block)
1169                 return 0;
1170         bh = get_hash_table(dev, block, size);
1171         if (bh)
1172                 return check_aligned(bh, address, dev, b, size);
1173         return try_to_load_aligned(address, dev, b, size);
1174 }
1175 
1176 /*
1177  * bread_page reads four buffers into memory at the desired address. It's
1178  * a function of its own, as there is some speed to be got by reading them
1179  * all at the same time, not waiting for one to be read, and then another
1180  * etc. This also allows us to optimize memory usage by sharing code pages
1181  * and filesystem buffers..
1182  */
1183 unsigned long bread_page(unsigned long address, kdev_t dev, int b[], int size, int no_share)
     /* [previous][next][first][last][top][bottom][index][help] */
1184 {
1185         struct buffer_head * bh[MAX_BUF_PER_PAGE];
1186         unsigned long where;
1187         int i, j;
1188 
1189         if (!no_share) {
1190                 where = try_to_share_buffers(address, dev, b, size);
1191                 if (where)
1192                         return where;
1193         }
1194         ++current->maj_flt;
1195         for (i=0, j=0; j<PAGE_SIZE ; i++, j+= size) {
1196                 bh[i] = NULL;
1197                 if (b[i])
1198                         bh[i] = getblk(dev, b[i], size);
1199         }
1200         read_buffers(bh,i);
1201         where = address;
1202         for (i=0, j=0; j<PAGE_SIZE ; i++, j += size, where += size) {
1203                 if (bh[i]) {
1204                         if (buffer_uptodate(bh[i]))
1205                                 memcpy((void *) where, bh[i]->b_data, size);
1206                         brelse(bh[i]);
1207                 } else
1208                         memset((void *) where, 0, size);
1209         }
1210         return address;
1211 }
1212 
1213 #if 0
1214 /*
1215  * bwrite_page writes a page out to the buffer cache and/or the physical device.
1216  * It's used for mmap writes (the same way bread_page() is used for mmap reads).
1217  */
1218 void bwrite_page(unsigned long address, kdev_t dev, int b[], int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1219 {
1220         struct buffer_head * bh[MAX_BUF_PER_PAGE];
1221         int i, j;
1222 
1223         for (i=0, j=0; j<PAGE_SIZE ; i++, j+= size) {
1224                 bh[i] = NULL;
1225                 if (b[i])
1226                         bh[i] = getblk(dev, b[i], size);
1227         }
1228         for (i=0, j=0; j<PAGE_SIZE ; i++, j += size, address += size) {
1229                 if (bh[i]) {
1230                         memcpy(bh[i]->b_data, (void *) address, size);
1231                         mark_buffer_uptodate(bh[i], 1);
1232                         mark_buffer_dirty(bh[i], 0);
1233                         brelse(bh[i]);
1234                 } else
1235                         memset((void *) address, 0, size); /* ???!?!! */
1236         }       
1237 }
1238 #endif
1239 
1240 /*
1241  * Try to increase the number of buffers available: the size argument
1242  * is used to determine what kind of buffers we want.
1243  */
1244 static int grow_buffers(int pri, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1245 {
1246         unsigned long page;
1247         struct buffer_head *bh, *tmp;
1248         struct buffer_head * insert_point;
1249         int isize;
1250 
1251         if ((size & 511) || (size > PAGE_SIZE)) {
1252                 printk("VFS: grow_buffers: size = %d\n",size);
1253                 return 0;
1254         }
1255 
1256         isize = BUFSIZE_INDEX(size);
1257 
1258         if (!(page = __get_free_page(pri)))
1259                 return 0;
1260         bh = create_buffers(page, size);
1261         if (!bh) {
1262                 free_page(page);
1263                 return 0;
1264         }
1265 
1266         insert_point = free_list[isize];
1267 
1268         tmp = bh;
1269         while (1) {
1270                 nr_free[isize]++;
1271                 if (insert_point) {
1272                         tmp->b_next_free = insert_point->b_next_free;
1273                         tmp->b_prev_free = insert_point;
1274                         insert_point->b_next_free->b_prev_free = tmp;
1275                         insert_point->b_next_free = tmp;
1276                 } else {
1277                         tmp->b_prev_free = tmp;
1278                         tmp->b_next_free = tmp;
1279                 }
1280                 insert_point = tmp;
1281                 ++nr_buffers;
1282                 if (tmp->b_this_page)
1283                         tmp = tmp->b_this_page;
1284                 else
1285                         break;
1286         }
1287         free_list[isize] = bh;
1288         buffer_pages[MAP_NR(page)] = bh;
1289         tmp->b_this_page = bh;
1290         wake_up(&buffer_wait);
1291         buffermem += PAGE_SIZE;
1292         return 1;
1293 }
1294 
1295 
1296 /* =========== Reduce the buffer memory ============= */
1297 
1298 /*
1299  * try_to_free() checks if all the buffers on this particular page
1300  * are unused, and free's the page if so.
1301  */
1302 static int try_to_free(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1303                        int priority)
1304 {
1305         unsigned long page;
1306         struct buffer_head * tmp, * p;
1307         int isize = BUFSIZE_INDEX(bh->b_size);
1308 
1309         *bhp = bh;
1310         page = (unsigned long) bh->b_data;
1311         page &= PAGE_MASK;
1312         tmp = bh;
1313         do {
1314                 if (!tmp)
1315                         return 0;
1316                 if (tmp->b_count || buffer_dirty(tmp) || buffer_locked(tmp) || tmp->b_wait)
1317                         return 0;
1318                 if (priority && buffer_touched(tmp))
1319                         return 0;
1320                 tmp = tmp->b_this_page;
1321         } while (tmp != bh);
1322         tmp = bh;
1323         do {
1324                 p = tmp;
1325                 tmp = tmp->b_this_page;
1326                 nr_buffers--;
1327                 nr_buffers_size[isize]--;
1328                 if (p == *bhp)
1329                   {
1330                     *bhp = p->b_prev_free;
1331                     if (p == *bhp) /* Was this the last in the list? */
1332                       *bhp = NULL;
1333                   }
1334                 remove_from_queues(p);
1335                 put_unused_buffer_head(p);
1336         } while (tmp != bh);
1337         buffermem -= PAGE_SIZE;
1338         buffer_pages[MAP_NR(page)] = NULL;
1339         free_page(page);
1340         return !mem_map[MAP_NR(page)].count;
1341 }
1342 
1343 /* Age buffers on a given page, according to whether they have been
1344    visited recently or not. */
1345 static inline void age_buffer(struct buffer_head *bh)
     /* [previous][next][first][last][top][bottom][index][help] */
1346 {
1347         struct buffer_head *tmp = bh;
1348         int touched = 0;
1349 
1350         /*
1351          * When we age a page, we mark all other buffers in the page
1352          * with the "has_aged" flag.  Then, when these aliased buffers
1353          * come up for aging, we skip them until next pass.  This
1354          * ensures that a page full of multiple buffers only gets aged
1355          * once per pass through the lru lists. 
1356          */
1357         if (clear_bit(BH_Has_aged, &bh->b_state))
1358                 return;
1359         
1360         do {
1361                 touched |= clear_bit(BH_Touched, &tmp->b_state);
1362                 tmp = tmp->b_this_page;
1363                 set_bit(BH_Has_aged, &tmp->b_state);
1364         } while (tmp != bh);
1365         clear_bit(BH_Has_aged, &bh->b_state);
1366 
1367         if (touched) 
1368                 touch_page((unsigned long) bh->b_data);
1369         else
1370                 age_page((unsigned long) bh->b_data);
1371 }
1372 
1373 /*
1374  * Consult the load average for buffers and decide whether or not
1375  * we should shrink the buffers of one size or not.  If we decide yes,
1376  * do it and return 1.  Else return 0.  Do not attempt to shrink size
1377  * that is specified.
1378  *
1379  * I would prefer not to use a load average, but the way things are now it
1380  * seems unavoidable.  The way to get rid of it would be to force clustering
1381  * universally, so that when we reclaim buffers we always reclaim an entire
1382  * page.  Doing this would mean that we all need to move towards QMAGIC.
1383  */
1384 
1385 static int maybe_shrink_lav_buffers(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1386 {          
1387         int nlist;
1388         int isize;
1389         int total_lav, total_n_buffers, n_sizes;
1390         
1391         /* Do not consider the shared buffers since they would not tend
1392            to have getblk called very often, and this would throw off
1393            the lav.  They are not easily reclaimable anyway (let the swapper
1394            make the first move). */
1395   
1396         total_lav = total_n_buffers = n_sizes = 0;
1397         for(nlist = 0; nlist < NR_SIZES; nlist++)
1398          {
1399                  total_lav += buffers_lav[nlist];
1400                  if(nr_buffers_size[nlist]) n_sizes++;
1401                  total_n_buffers += nr_buffers_size[nlist];
1402                  total_n_buffers -= nr_buffers_st[nlist][BUF_SHARED]; 
1403          }
1404         
1405         /* See if we have an excessive number of buffers of a particular
1406            size - if so, victimize that bunch. */
1407   
1408         isize = (size ? BUFSIZE_INDEX(size) : -1);
1409         
1410         if (n_sizes > 1)
1411                  for(nlist = 0; nlist < NR_SIZES; nlist++)
1412                   {
1413                           if(nlist == isize) continue;
1414                           if(nr_buffers_size[nlist] &&
1415                              bdf_prm.b_un.lav_const * buffers_lav[nlist]*total_n_buffers < 
1416                              total_lav * (nr_buffers_size[nlist] - nr_buffers_st[nlist][BUF_SHARED]))
1417                                    if(shrink_specific_buffers(6, bufferindex_size[nlist])) 
1418                                             return 1;
1419                   }
1420         return 0;
1421 }
1422 
1423 /*
1424  * Try to free up some pages by shrinking the buffer-cache
1425  *
1426  * Priority tells the routine how hard to try to shrink the
1427  * buffers: 6 means "don't bother too much", while a value
1428  * of 0 means "we'd better get some free pages now".
1429  *
1430  * "limit" is meant to limit the shrink-action only to pages
1431  * that are in the 0 - limit address range, for DMA re-allocations.
1432  * We ignore that right now.
1433  */
1434 int shrink_buffers(unsigned int priority, unsigned long limit)
     /* [previous][next][first][last][top][bottom][index][help] */
1435 {
1436         if (priority < 2) {
1437                 sync_buffers(0,0);
1438         }
1439 
1440         if(priority == 2) wakeup_bdflush(1);
1441 
1442         if(maybe_shrink_lav_buffers(0)) return 1;
1443 
1444         /* No good candidate size - take any size we can find */
1445         return shrink_specific_buffers(priority, 0);
1446 }
1447 
1448 static int shrink_specific_buffers(unsigned int priority, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1449 {
1450         struct buffer_head *bh;
1451         int nlist;
1452         int i, isize, isize1;
1453 
1454 #ifdef DEBUG
1455         if(size) printk("Shrinking buffers of size %d\n", size);
1456 #endif
1457         /* First try the free lists, and see if we can get a complete page
1458            from here */
1459         isize1 = (size ? BUFSIZE_INDEX(size) : -1);
1460 
1461         for(isize = 0; isize<NR_SIZES; isize++){
1462                 if(isize1 != -1 && isize1 != isize) continue;
1463                 bh = free_list[isize];
1464                 if(!bh) continue;
1465                 for (i=0 ; !i || bh != free_list[isize]; bh = bh->b_next_free, i++) {
1466                         if (bh->b_count || !bh->b_this_page)
1467                                  continue;
1468                         if (!age_of((unsigned long) bh->b_data) &&
1469                             try_to_free(bh, &bh, 6))
1470                                  return 1;
1471                         if(!bh) break;
1472                         /* Some interrupt must have used it after we
1473                            freed the page.  No big deal - keep looking */
1474                 }
1475         }
1476         
1477         /* Not enough in the free lists, now try the lru list */
1478         
1479         for(nlist = 0; nlist < NR_LIST; nlist++) {
1480         repeat1:
1481                 if(priority > 2 && nlist == BUF_SHARED) continue;
1482                 i = nr_buffers_type[nlist];
1483                 i = ((BUFFEROUT_WEIGHT * i) >> 10) >> priority;
1484                 for ( ; i > 0; i-- ) {
1485                         bh = next_to_age[nlist];
1486                         if (!bh)
1487                                 break;
1488                         next_to_age[nlist] = bh->b_next_free;
1489 
1490                         /* First, age the buffer. */
1491                         age_buffer(bh);
1492                         /* We may have stalled while waiting for I/O
1493                            to complete. */
1494                         if(bh->b_list != nlist) goto repeat1;
1495                         if (bh->b_count || !bh->b_this_page)
1496                                  continue;
1497                         if(size && bh->b_size != size) continue;
1498                         if (buffer_locked(bh))
1499                                  if (priority)
1500                                           continue;
1501                                  else
1502                                           wait_on_buffer(bh);
1503                         if (buffer_dirty(bh)) {
1504                                 bh->b_count++;
1505                                 bh->b_flushtime = 0;
1506                                 ll_rw_block(WRITEA, 1, &bh);
1507                                 bh->b_count--;
1508                                 continue;
1509                         }
1510                         /* At priority 6, only consider really old
1511                            (age==0) buffers for reclaiming.  At
1512                            priority 0, consider any buffers. */
1513                         if ((age_of((unsigned long) bh->b_data) >>
1514                              (6-priority)) > 0)
1515                                 continue;                               
1516                         if (try_to_free(bh, &bh, 0))
1517                                  return 1;
1518                         if(!bh) break;
1519                 }
1520         }
1521         return 0;
1522 }
1523 
1524 
1525 /* ================== Debugging =================== */
1526 
1527 void show_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1528 {
1529         struct buffer_head * bh;
1530         int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
1531         int shared;
1532         int nlist, isize;
1533 
1534         printk("Buffer memory:   %6dkB\n",buffermem>>10);
1535         printk("Buffer heads:    %6d\n",nr_buffer_heads);
1536         printk("Buffer blocks:   %6d\n",nr_buffers);
1537 
1538         for(nlist = 0; nlist < NR_LIST; nlist++) {
1539           shared = found = locked = dirty = used = lastused = 0;
1540           bh = lru_list[nlist];
1541           if(!bh) continue;
1542           do {
1543                 found++;
1544                 if (buffer_locked(bh))
1545                         locked++;
1546                 if (buffer_dirty(bh))
1547                         dirty++;
1548                 if(mem_map[MAP_NR(((unsigned long) bh->b_data))].count !=1) shared++;
1549                 if (bh->b_count)
1550                         used++, lastused = found;
1551                 bh = bh->b_next_free;
1552               } while (bh != lru_list[nlist]);
1553         printk("Buffer[%d] mem: %d buffers, %d used (last=%d), %d locked, %d dirty %d shrd\n",
1554                 nlist, found, used, lastused, locked, dirty, shared);
1555         };
1556         printk("Size    [LAV]     Free  Clean  Unshar     Lck    Lck1   Dirty  Shared\n");
1557         for(isize = 0; isize<NR_SIZES; isize++){
1558                 printk("%5d [%5d]: %7d ", bufferindex_size[isize],
1559                        buffers_lav[isize], nr_free[isize]);
1560                 for(nlist = 0; nlist < NR_LIST; nlist++)
1561                          printk("%7d ", nr_buffers_st[isize][nlist]);
1562                 printk("\n");
1563         }
1564 }
1565 
1566 
1567 /* ====================== Cluster patches for ext2 ==================== */
1568 
1569 /*
1570  * try_to_reassign() checks if all the buffers on this particular page
1571  * are unused, and reassign to a new cluster them if this is true.
1572  */
1573 static inline int try_to_reassign(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1574                            kdev_t dev, unsigned int starting_block)
1575 {
1576         unsigned long page;
1577         struct buffer_head * tmp, * p;
1578 
1579         *bhp = bh;
1580         page = (unsigned long) bh->b_data;
1581         page &= PAGE_MASK;
1582         if(mem_map[MAP_NR(page)].count != 1) return 0;
1583         tmp = bh;
1584         do {
1585                 if (!tmp)
1586                          return 0;
1587                 
1588                 if (tmp->b_count || buffer_dirty(tmp) || buffer_locked(tmp))
1589                          return 0;
1590                 tmp = tmp->b_this_page;
1591         } while (tmp != bh);
1592         tmp = bh;
1593         
1594         while((unsigned long) tmp->b_data & (PAGE_SIZE - 1)) 
1595                  tmp = tmp->b_this_page;
1596         
1597         /* This is the buffer at the head of the page */
1598         bh = tmp;
1599         do {
1600                 p = tmp;
1601                 tmp = tmp->b_this_page;
1602                 remove_from_queues(p);
1603                 p->b_dev = dev;
1604                 mark_buffer_uptodate(p, 0);
1605                 clear_bit(BH_Req, &p->b_state);
1606                 p->b_blocknr = starting_block++;
1607                 insert_into_queues(p);
1608         } while (tmp != bh);
1609         return 1;
1610 }
1611 
1612 /*
1613  * Try to find a free cluster by locating a page where
1614  * all of the buffers are unused.  We would like this function
1615  * to be atomic, so we do not call anything that might cause
1616  * the process to sleep.  The priority is somewhat similar to
1617  * the priority used in shrink_buffers.
1618  * 
1619  * My thinking is that the kernel should end up using whole
1620  * pages for the buffer cache as much of the time as possible.
1621  * This way the other buffers on a particular page are likely
1622  * to be very near each other on the free list, and we will not
1623  * be expiring data prematurely.  For now we only cannibalize buffers
1624  * of the same size to keep the code simpler.
1625  */
1626 static int reassign_cluster(kdev_t dev, 
     /* [previous][next][first][last][top][bottom][index][help] */
1627                      unsigned int starting_block, int size)
1628 {
1629         struct buffer_head *bh;
1630         int isize = BUFSIZE_INDEX(size);
1631         int i;
1632 
1633         /* We want to give ourselves a really good shot at generating
1634            a cluster, and since we only take buffers from the free
1635            list, we "overfill" it a little. */
1636 
1637         while(nr_free[isize] < 32) refill_freelist(size);
1638 
1639         bh = free_list[isize];
1640         if(bh)
1641                  for (i=0 ; !i || bh != free_list[isize] ; bh = bh->b_next_free, i++) {
1642                          if (!bh->b_this_page)  continue;
1643                          if (try_to_reassign(bh, &bh, dev, starting_block))
1644                                  return 4;
1645                  }
1646         return 0;
1647 }
1648 
1649 /* This function tries to generate a new cluster of buffers
1650  * from a new page in memory.  We should only do this if we have
1651  * not expanded the buffer cache to the maximum size that we allow.
1652  */
1653 static unsigned long try_to_generate_cluster(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1654 {
1655         struct buffer_head * bh, * tmp, * arr[MAX_BUF_PER_PAGE];
1656         int isize = BUFSIZE_INDEX(size);
1657         unsigned long offset;
1658         unsigned long page;
1659         int nblock;
1660 
1661         page = get_free_page(GFP_NOBUFFER);
1662         if(!page) return 0;
1663 
1664         bh = create_buffers(page, size);
1665         if (!bh) {
1666                 free_page(page);
1667                 return 0;
1668         };
1669         nblock = block;
1670         for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
1671                 if (find_buffer(dev, nblock++, size))
1672                          goto not_aligned;
1673         }
1674         tmp = bh;
1675         nblock = 0;
1676         while (1) {
1677                 arr[nblock++] = bh;
1678                 bh->b_count = 1;
1679                 bh->b_flushtime = 0;
1680                 bh->b_state = 0;
1681                 bh->b_dev = dev;
1682                 bh->b_list = BUF_CLEAN;
1683                 bh->b_blocknr = block++;
1684                 nr_buffers++;
1685                 nr_buffers_size[isize]++;
1686                 insert_into_queues(bh);
1687                 if (bh->b_this_page)
1688                         bh = bh->b_this_page;
1689                 else
1690                         break;
1691         }
1692         buffermem += PAGE_SIZE;
1693         buffer_pages[MAP_NR(page)] = bh;
1694         bh->b_this_page = tmp;
1695         while (nblock-- > 0)
1696                 brelse(arr[nblock]);
1697         return 4; /* ?? */
1698 not_aligned:
1699         while ((tmp = bh) != NULL) {
1700                 bh = bh->b_this_page;
1701                 put_unused_buffer_head(tmp);
1702         }
1703         free_page(page);
1704         return 0;
1705 }
1706 
1707 unsigned long generate_cluster(kdev_t dev, int b[], int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1708 {
1709         int i, offset;
1710         
1711         for (i = 0, offset = 0 ; offset < PAGE_SIZE ; i++, offset += size) {
1712                 if(i && b[i]-1 != b[i-1]) return 0;  /* No need to cluster */
1713                 if(find_buffer(dev, b[i], size)) return 0;
1714         };
1715 
1716         /* OK, we have a candidate for a new cluster */
1717         
1718         /* See if one size of buffer is over-represented in the buffer cache,
1719            if so reduce the numbers of buffers */
1720         if(maybe_shrink_lav_buffers(size))
1721          {
1722                  int retval;
1723                  retval = try_to_generate_cluster(dev, b[0], size);
1724                  if(retval) return retval;
1725          };
1726         
1727         if (nr_free_pages > min_free_pages*2) 
1728                  return try_to_generate_cluster(dev, b[0], size);
1729         else
1730                  return reassign_cluster(dev, b[0], size);
1731 }
1732 
1733 
1734 /* ===================== Init ======================= */
1735 
1736 /*
1737  * This initializes the initial buffer free list.  nr_buffers_type is set
1738  * to one less the actual number of buffers, as a sop to backwards
1739  * compatibility --- the old code did this (I think unintentionally,
1740  * but I'm not sure), and programs in the ps package expect it.
1741  *                                      - TYT 8/30/92
1742  */
1743 void buffer_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1744 {
1745         int i;
1746         int isize = BUFSIZE_INDEX(BLOCK_SIZE);
1747         long memsize = MAP_NR(high_memory) << PAGE_SHIFT;
1748 
1749         if (memsize >= 4*1024*1024) {
1750                 if(memsize >= 16*1024*1024)
1751                          nr_hash = 16381;
1752                 else
1753                          nr_hash = 4093;
1754         } else {
1755                 nr_hash = 997;
1756         };
1757         
1758         hash_table = (struct buffer_head **) vmalloc(nr_hash * 
1759                                                      sizeof(struct buffer_head *));
1760 
1761 
1762         buffer_pages = (struct buffer_head **) vmalloc(MAP_NR(high_memory) * 
1763                                                      sizeof(struct buffer_head *));
1764         for (i = 0 ; i < MAP_NR(high_memory) ; i++)
1765                 buffer_pages[i] = NULL;
1766 
1767         for (i = 0 ; i < nr_hash ; i++)
1768                 hash_table[i] = NULL;
1769         lru_list[BUF_CLEAN] = 0;
1770         grow_buffers(GFP_KERNEL, BLOCK_SIZE);
1771         if (!free_list[isize])
1772                 panic("VFS: Unable to initialize buffer free list!");
1773         return;
1774 }
1775 
1776 
1777 /* ====================== bdflush support =================== */
1778 
1779 /* This is a simple kernel daemon, whose job it is to provide a dynamically
1780  * response to dirty buffers.  Once this process is activated, we write back
1781  * a limited number of buffers to the disks and then go back to sleep again.
1782  * In effect this is a process which never leaves kernel mode, and does not have
1783  * any user memory associated with it except for the stack.  There is also
1784  * a kernel stack page, which obviously must be separate from the user stack.
1785  */
1786 struct wait_queue * bdflush_wait = NULL;
1787 struct wait_queue * bdflush_done = NULL;
1788 
1789 static int bdflush_running = 0;
1790 
1791 static void wakeup_bdflush(int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
1792 {
1793         if(!bdflush_running){
1794                 printk("Warning - bdflush not running\n");
1795                 sync_buffers(0,0);
1796                 return;
1797         };
1798         wake_up(&bdflush_wait);
1799         if(wait) sleep_on(&bdflush_done);
1800 }
1801 
1802 
1803 
1804 /* 
1805  * Here we attempt to write back old buffers.  We also try and flush inodes 
1806  * and supers as well, since this function is essentially "update", and 
1807  * otherwise there would be no way of ensuring that these quantities ever 
1808  * get written back.  Ideally, we would have a timestamp on the inodes
1809  * and superblocks so that we could write back only the old ones as well
1810  */
1811 
1812 asmlinkage int sync_old_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1813 {
1814         int i, isize;
1815         int ndirty, nwritten;
1816         int nlist;
1817         int ncount;
1818         struct buffer_head * bh, *next;
1819 
1820         sync_supers(0);
1821         sync_inodes(0);
1822 
1823         ncount = 0;
1824 #ifdef DEBUG
1825         for(nlist = 0; nlist < NR_LIST; nlist++)
1826 #else
1827         for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1828 #endif
1829         {
1830                 ndirty = 0;
1831                 nwritten = 0;
1832         repeat:
1833                 bh = lru_list[nlist];
1834                 if(bh) 
1835                          for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) {
1836                                  /* We may have stalled while waiting for I/O to complete. */
1837                                  if(bh->b_list != nlist) goto repeat;
1838                                  next = bh->b_next_free;
1839                                  if(!lru_list[nlist]) {
1840                                          printk("Dirty list empty %d\n", i);
1841                                          break;
1842                                  }
1843                                  
1844                                  /* Clean buffer on dirty list?  Refile it */
1845                                  if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
1846                                   {
1847                                           refile_buffer(bh);
1848                                           continue;
1849                                   }
1850                                  
1851                                  if (buffer_locked(bh) || !buffer_dirty(bh))
1852                                           continue;
1853                                  ndirty++;
1854                                  if(bh->b_flushtime > jiffies) continue;
1855                                  nwritten++;
1856                                  bh->b_count++;
1857                                  bh->b_flushtime = 0;
1858 #ifdef DEBUG
1859                                  if(nlist != BUF_DIRTY) ncount++;
1860 #endif
1861                                  ll_rw_block(WRITE, 1, &bh);
1862                                  bh->b_count--;
1863                          }
1864         }
1865 #ifdef DEBUG
1866         if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount);
1867         printk("Wrote %d/%d buffers\n", nwritten, ndirty);
1868 #endif
1869         
1870         /* We assume that we only come through here on a regular
1871            schedule, like every 5 seconds.  Now update load averages.  
1872            Shift usage counts to prevent overflow. */
1873         for(isize = 0; isize<NR_SIZES; isize++){
1874                 CALC_LOAD(buffers_lav[isize], bdf_prm.b_un.lav_const, buffer_usage[isize]);
1875                 buffer_usage[isize] = 0;
1876         };
1877         return 0;
1878 }
1879 
1880 
1881 /* This is the interface to bdflush.  As we get more sophisticated, we can
1882  * pass tuning parameters to this "process", to adjust how it behaves.  If you
1883  * invoke this again after you have done this once, you would simply modify 
1884  * the tuning parameters.  We would want to verify each parameter, however,
1885  * to make sure that it is reasonable. */
1886 
1887 asmlinkage int sys_bdflush(int func, long data)
     /* [previous][next][first][last][top][bottom][index][help] */
1888 {
1889         int i, error;
1890         int ndirty;
1891         int nlist;
1892         int ncount;
1893         struct buffer_head * bh, *next;
1894 
1895         if (!suser())
1896                 return -EPERM;
1897 
1898         if (func == 1)
1899                  return sync_old_buffers();
1900 
1901         /* Basically func 0 means start, 1 means read param 1, 2 means write param 1, etc */
1902         if (func >= 2) {
1903                 i = (func-2) >> 1;
1904                 if (i < 0 || i >= N_PARAM)
1905                         return -EINVAL;
1906                 if((func & 1) == 0) {
1907                         error = verify_area(VERIFY_WRITE, (void *) data, sizeof(int));
1908                         if (error)
1909                                 return error;
1910                         put_user(bdf_prm.data[i], (int*)data);
1911                         return 0;
1912                 };
1913                 if (data < bdflush_min[i] || data > bdflush_max[i])
1914                         return -EINVAL;
1915                 bdf_prm.data[i] = data;
1916                 return 0;
1917         };
1918         
1919         if (bdflush_running)
1920                 return -EBUSY; /* Only one copy of this running at one time */
1921         bdflush_running++;
1922         
1923         /* OK, from here on is the daemon */
1924         
1925         for (;;) {
1926 #ifdef DEBUG
1927                 printk("bdflush() activated...");
1928 #endif
1929                 
1930                 ncount = 0;
1931 #ifdef DEBUG
1932                 for(nlist = 0; nlist < NR_LIST; nlist++)
1933 #else
1934                 for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1935 #endif
1936                  {
1937                          ndirty = 0;
1938                  repeat:
1939                          bh = lru_list[nlist];
1940                          if(bh) 
1941                                   for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty; 
1942                                        bh = next) {
1943                                           /* We may have stalled while waiting for I/O to complete. */
1944                                           if(bh->b_list != nlist) goto repeat;
1945                                           next = bh->b_next_free;
1946                                           if(!lru_list[nlist]) {
1947                                                   printk("Dirty list empty %d\n", i);
1948                                                   break;
1949                                           }
1950                                           
1951                                           /* Clean buffer on dirty list?  Refile it */
1952                                           if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
1953                                            {
1954                                                    refile_buffer(bh);
1955                                                    continue;
1956                                            }
1957                                           
1958                                           if (buffer_locked(bh) || !buffer_dirty(bh))
1959                                                    continue;
1960                                           /* Should we write back buffers that are shared or not??
1961                                              currently dirty buffers are not shared, so it does not matter */
1962                                           bh->b_count++;
1963                                           ndirty++;
1964                                           bh->b_flushtime = 0;
1965                                           ll_rw_block(WRITE, 1, &bh);
1966 #ifdef DEBUG
1967                                           if(nlist != BUF_DIRTY) ncount++;
1968 #endif
1969                                           bh->b_count--;
1970                                   }
1971                  }
1972 #ifdef DEBUG
1973                 if (ncount) printk("sys_bdflush: %d dirty buffers not on dirty list\n", ncount);
1974                 printk("sleeping again.\n");
1975 #endif
1976                 wake_up(&bdflush_done);
1977                 
1978                 /* If there are still a lot of dirty buffers around, skip the sleep
1979                    and flush some more */
1980                 
1981                 if(nr_buffers_type[BUF_DIRTY] <= (nr_buffers - nr_buffers_type[BUF_SHARED]) * 
1982                    bdf_prm.b_un.nfract/100) {
1983                         if (current->signal & (1 << (SIGKILL-1))) {
1984                                 bdflush_running--;
1985                                 return 0;
1986                         }
1987                         current->signal = 0;
1988                         interruptible_sleep_on(&bdflush_wait);
1989                 }
1990         }
1991 }
1992 
1993 
1994 /*
1995  * Overrides for Emacs so that we follow Linus's tabbing style.
1996  * Emacs will notice this stuff at the end of the file and automatically
1997  * adjust the settings for this buffer only.  This must remain at the end
1998  * of the file.
1999  * ---------------------------------------------------------------------------
2000  * Local variables:
2001  * c-indent-level: 8
2002  * c-brace-imaginary-offset: 0
2003  * c-brace-offset: -8
2004  * c-argdecl-indent: 8
2005  * c-label-offset: -8
2006  * c-continued-statement-offset: 8
2007  * c-continued-brace-offset: 0
2008  * End:
2009  */

/* [previous][next][first][last][top][bottom][index][help] */