root/fs/buffer.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. __wait_on_buffer
  2. sync_buffers
  3. sync_dev
  4. fsync_dev
  5. sys_sync
  6. file_fsync
  7. sys_fsync
  8. invalidate_buffers
  9. remove_from_hash_queue
  10. remove_from_lru_list
  11. remove_from_free_list
  12. remove_from_queues
  13. put_last_lru
  14. put_last_free
  15. insert_into_queues
  16. find_buffer
  17. get_hash_table
  18. set_blocksize
  19. refill_freelist
  20. getblk
  21. set_writetime
  22. refile_buffer
  23. brelse
  24. bread
  25. breada
  26. put_unused_buffer_head
  27. get_more_buffer_heads
  28. get_unused_buffer_head
  29. create_buffers
  30. read_buffers
  31. try_to_align
  32. check_aligned
  33. try_to_load_aligned
  34. try_to_share_buffers
  35. bread_page
  36. bwrite_page
  37. grow_buffers
  38. try_to_free
  39. maybe_shrink_lav_buffers
  40. shrink_buffers
  41. shrink_specific_buffers
  42. show_buffers
  43. try_to_reassign
  44. reassign_cluster
  45. try_to_generate_cluster
  46. generate_cluster
  47. buffer_init
  48. wakeup_bdflush
  49. sync_old_buffers
  50. sys_bdflush

   1 /*
   2  *  linux/fs/buffer.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  *  'buffer.c' implements the buffer-cache functions. Race-conditions have
   9  * been avoided by NEVER letting an interrupt change a buffer (except for the
  10  * data, of course), but instead letting the caller do it.
  11  */
  12 
  13 /*
  14  * NOTE! There is one discordant note here: checking floppies for
  15  * disk change. This is where it fits best, I think, as it should
  16  * invalidate changed floppy-disk-caches.
  17  */
  18  
  19 #include <linux/sched.h>
  20 #include <linux/kernel.h>
  21 #include <linux/major.h>
  22 #include <linux/string.h>
  23 #include <linux/locks.h>
  24 #include <linux/errno.h>
  25 #include <linux/malloc.h>
  26 
  27 #include <asm/system.h>
  28 #include <asm/segment.h>
  29 #include <asm/io.h>
  30 
  31 #define NR_SIZES 4
  32 static char buffersize_index[9] = {-1,  0,  1, -1,  2, -1, -1, -1, 3};
  33 static short int bufferindex_size[NR_SIZES] = {512, 1024, 2048, 4096};
  34 
  35 #define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9])
  36 #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
  37 
  38 static int grow_buffers(int pri, int size);
  39 static int shrink_specific_buffers(unsigned int priority, int size);
  40 static int maybe_shrink_lav_buffers(int);
  41 
  42 static int nr_hash = 0;  /* Size of hash table */
  43 static struct buffer_head ** hash_table;
  44 struct buffer_head ** buffer_pages;
  45 static struct buffer_head * lru_list[NR_LIST] = {NULL, };
  46 static struct buffer_head * free_list[NR_SIZES] = {NULL, };
  47 static struct buffer_head * unused_list = NULL;
  48 static struct wait_queue * buffer_wait = NULL;
  49 
  50 int nr_buffers = 0;
  51 int nr_buffers_type[NR_LIST] = {0,};
  52 int nr_buffers_size[NR_SIZES] = {0,};
  53 int nr_buffers_st[NR_SIZES][NR_LIST] = {{0,},};
  54 int buffer_usage[NR_SIZES] = {0,};  /* Usage counts used to determine load average */
  55 int buffers_lav[NR_SIZES] = {0,};  /* Load average of buffer usage */
  56 int nr_free[NR_SIZES] = {0,};
  57 int buffermem = 0;
  58 int nr_buffer_heads = 0;
  59 extern int *blksize_size[];
  60 
  61 /* Here is the parameter block for the bdflush process. */
  62 static void wakeup_bdflush(int);
  63 
  64 #define N_PARAM 9
  65 #define LAV
  66 
  67 static union bdflush_param{
  68         struct {
  69                 int nfract;  /* Percentage of buffer cache dirty to 
  70                                 activate bdflush */
  71                 int ndirty;  /* Maximum number of dirty blocks to write out per
  72                                 wake-cycle */
  73                 int nrefill; /* Number of clean buffers to try and obtain
  74                                 each time we call refill */
  75                 int nref_dirt; /* Dirty buffer threshold for activating bdflush
  76                                   when trying to refill buffers. */
  77                 int clu_nfract;  /* Percentage of buffer cache to scan to 
  78                                     search for free clusters */
  79                 int age_buffer;  /* Time for normal buffer to age before 
  80                                     we flush it */
  81                 int age_super;  /* Time for superblock to age before we 
  82                                    flush it */
  83                 int lav_const;  /* Constant used for load average (time
  84                                    constant */
  85                 int lav_ratio;  /* Used to determine how low a lav for a
  86                                    particular size can go before we start to
  87                                    trim back the buffers */
  88         } b_un;
  89         unsigned int data[N_PARAM];
  90 } bdf_prm = {{25, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}};
  91 
  92 /* The lav constant is set for 1 minute, as long as the update process runs
  93    every 5 seconds.  If you change the frequency of update, the time
  94    constant will also change. */
  95 
  96 
  97 /* These are the min and max parameter values that we will allow to be assigned */
  98 static int bdflush_min[N_PARAM] = {  0,  10,    5,   25,  0,   100,   100, 1, 1};
  99 static int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 60000, 60000, 2047, 5};
 100 
 101 /*
 102  * Rewrote the wait-routines to use the "new" wait-queue functionality,
 103  * and getting rid of the cli-sti pairs. The wait-queue routines still
 104  * need cli-sti, but now it's just a couple of 386 instructions or so.
 105  *
 106  * Note that the real wait_on_buffer() is an inline function that checks
 107  * if 'b_wait' is set before calling this, so that the queues aren't set
 108  * up unnecessarily.
 109  */
 110 void __wait_on_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 111 {
 112         struct wait_queue wait = { current, NULL };
 113 
 114         bh->b_count++;
 115         add_wait_queue(&bh->b_wait, &wait);
 116 repeat:
 117         current->state = TASK_UNINTERRUPTIBLE;
 118         if (bh->b_lock) {
 119                 schedule();
 120                 goto repeat;
 121         }
 122         remove_wait_queue(&bh->b_wait, &wait);
 123         bh->b_count--;
 124         current->state = TASK_RUNNING;
 125 }
 126 
 127 /* Call sync_buffers with wait!=0 to ensure that the call does not
 128    return until all buffer writes have completed.  Sync() may return
 129    before the writes have finished; fsync() may not. */
 130 
 131 
 132 /* Godamity-damn.  Some buffers (bitmaps for filesystems)
 133    spontaneously dirty themselves without ever brelse being called.
 134    We will ultimately want to put these in a separate list, but for
 135    now we search all of the lists for dirty buffers */
 136 
 137 static int sync_buffers(kdev_t dev, int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 138 {
 139         int i, retry, pass = 0, err = 0;
 140         int nlist, ncount;
 141         struct buffer_head * bh, *next;
 142 
 143         /* One pass for no-wait, three for wait:
 144            0) write out all dirty, unlocked buffers;
 145            1) write out all dirty buffers, waiting if locked;
 146            2) wait for completion by waiting for all buffers to unlock. */
 147  repeat:
 148         retry = 0;
 149  repeat2:
 150         ncount = 0;
 151         /* We search all lists as a failsafe mechanism, not because we expect
 152            there to be dirty buffers on any of the other lists. */
 153         for(nlist = 0; nlist < NR_LIST; nlist++)
 154          {
 155          repeat1:
 156                  bh = lru_list[nlist];
 157                  if(!bh) continue;
 158                  for (i = nr_buffers_type[nlist]*2 ; i-- > 0 ; bh = next) {
 159                          if(bh->b_list != nlist) goto repeat1;
 160                          next = bh->b_next_free;
 161                          if(!lru_list[nlist]) break;
 162                          if (dev && bh->b_dev != dev)
 163                                   continue;
 164                          if (bh->b_lock)
 165                           {
 166                                   /* Buffer is locked; skip it unless wait is
 167                                      requested AND pass > 0. */
 168                                   if (!wait || !pass) {
 169                                           retry = 1;
 170                                           continue;
 171                                   }
 172                                   wait_on_buffer (bh);
 173                                   goto repeat2;
 174                           }
 175                          /* If an unlocked buffer is not uptodate, there has
 176                              been an IO error. Skip it. */
 177                          if (wait && bh->b_req && !bh->b_lock &&
 178                              !bh->b_dirt && !bh->b_uptodate) {
 179                                   err = 1;
 180                                   printk("Weird - unlocked, clean and not "
 181                                     "uptodate buffer on list %d %s %lu\n",
 182                                     nlist, kdevname(bh->b_dev), bh->b_blocknr);
 183                                   continue;
 184                           }
 185                          /* Don't write clean buffers.  Don't write ANY buffers
 186                             on the third pass. */
 187                          if (!bh->b_dirt || pass>=2)
 188                                   continue;
 189                          /* don't bother about locked buffers */
 190                          if (bh->b_lock)
 191                                  continue;
 192                          bh->b_count++;
 193                          bh->b_flushtime = 0;
 194                          ll_rw_block(WRITE, 1, &bh);
 195 
 196                          if(nlist != BUF_DIRTY) { 
 197                                  printk("[%d %s %ld] ", nlist,
 198                                         kdevname(bh->b_dev), bh->b_blocknr);
 199                                  ncount++;
 200                          };
 201                          bh->b_count--;
 202                          retry = 1;
 203                  }
 204          }
 205         if (ncount)
 206           printk("sys_sync: %d dirty buffers not on dirty list\n", ncount);
 207         
 208         /* If we are waiting for the sync to succeed, and if any dirty
 209            blocks were written, then repeat; on the second pass, only
 210            wait for buffers being written (do not pass to write any
 211            more buffers on the second pass). */
 212         if (wait && retry && ++pass<=2)
 213                  goto repeat;
 214         return err;
 215 }
 216 
 217 void sync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 218 {
 219         sync_buffers(dev, 0);
 220         sync_supers(dev);
 221         sync_inodes(dev);
 222         sync_buffers(dev, 0);
 223 }
 224 
 225 int fsync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 226 {
 227         sync_buffers(dev, 0);
 228         sync_supers(dev);
 229         sync_inodes(dev);
 230         return sync_buffers(dev, 1);
 231 }
 232 
 233 asmlinkage int sys_sync(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 234 {
 235         fsync_dev(0);
 236         return 0;
 237 }
 238 
 239 int file_fsync (struct inode *inode, struct file *filp)
     /* [previous][next][first][last][top][bottom][index][help] */
 240 {
 241         return fsync_dev(inode->i_dev);
 242 }
 243 
 244 asmlinkage int sys_fsync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 245 {
 246         struct file * file;
 247         struct inode * inode;
 248 
 249         if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
 250                 return -EBADF;
 251         if (!file->f_op || !file->f_op->fsync)
 252                 return -EINVAL;
 253         if (file->f_op->fsync(inode,file))
 254                 return -EIO;
 255         return 0;
 256 }
 257 
 258 void invalidate_buffers(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 259 {
 260         int i;
 261         int nlist;
 262         struct buffer_head * bh;
 263 
 264         for(nlist = 0; nlist < NR_LIST; nlist++) {
 265                 bh = lru_list[nlist];
 266                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bh->b_next_free) {
 267                         if (bh->b_dev != dev)
 268                                 continue;
 269                         wait_on_buffer(bh);
 270                         if (bh->b_dev != dev)
 271                                 continue;
 272                         if (bh->b_count)
 273                                 continue;
 274                         bh->b_flushtime = bh->b_uptodate = 
 275                                 bh->b_dirt = bh->b_req = 0;
 276                 }
 277         }
 278 }
 279 
 280 #define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block))%nr_hash)
 281 #define hash(dev,block) hash_table[_hashfn(dev,block)]
 282 
 283 static inline void remove_from_hash_queue(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 284 {
 285         if (bh->b_next)
 286                 bh->b_next->b_prev = bh->b_prev;
 287         if (bh->b_prev)
 288                 bh->b_prev->b_next = bh->b_next;
 289         if (hash(bh->b_dev,bh->b_blocknr) == bh)
 290                 hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
 291         bh->b_next = bh->b_prev = NULL;
 292 }
 293 
 294 static inline void remove_from_lru_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 295 {
 296         if (!(bh->b_prev_free) || !(bh->b_next_free))
 297                 panic("VFS: LRU block list corrupted");
 298         if (bh->b_dev == B_FREE)
 299                 panic("LRU list corrupted");
 300         bh->b_prev_free->b_next_free = bh->b_next_free;
 301         bh->b_next_free->b_prev_free = bh->b_prev_free;
 302 
 303         if (lru_list[bh->b_list] == bh)
 304                  lru_list[bh->b_list] = bh->b_next_free;
 305         if(lru_list[bh->b_list] == bh)
 306                  lru_list[bh->b_list] = NULL;
 307         bh->b_next_free = bh->b_prev_free = NULL;
 308 }
 309 
 310 static inline void remove_from_free_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 311 {
 312         int isize = BUFSIZE_INDEX(bh->b_size);
 313         if (!(bh->b_prev_free) || !(bh->b_next_free))
 314                 panic("VFS: Free block list corrupted");
 315         if(bh->b_dev != B_FREE)
 316                 panic("Free list corrupted");
 317         if(!free_list[isize])
 318                 panic("Free list empty");
 319         nr_free[isize]--;
 320         if(bh->b_next_free == bh)
 321                  free_list[isize] = NULL;
 322         else {
 323                 bh->b_prev_free->b_next_free = bh->b_next_free;
 324                 bh->b_next_free->b_prev_free = bh->b_prev_free;
 325                 if (free_list[isize] == bh)
 326                          free_list[isize] = bh->b_next_free;
 327         };
 328         bh->b_next_free = bh->b_prev_free = NULL;
 329 }
 330 
 331 static inline void remove_from_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 332 {
 333         if(bh->b_dev == B_FREE) {
 334                 remove_from_free_list(bh); /* Free list entries should not be
 335                                               in the hash queue */
 336                 return;
 337         };
 338         nr_buffers_type[bh->b_list]--;
 339         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]--;
 340         remove_from_hash_queue(bh);
 341         remove_from_lru_list(bh);
 342 }
 343 
 344 static inline void put_last_lru(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 345 {
 346         if (!bh)
 347                 return;
 348         if (bh == lru_list[bh->b_list]) {
 349                 lru_list[bh->b_list] = bh->b_next_free;
 350                 return;
 351         }
 352         if(bh->b_dev == B_FREE)
 353                 panic("Wrong block for lru list");
 354         remove_from_lru_list(bh);
 355 /* add to back of free list */
 356 
 357         if(!lru_list[bh->b_list]) {
 358                 lru_list[bh->b_list] = bh;
 359                 lru_list[bh->b_list]->b_prev_free = bh;
 360         };
 361 
 362         bh->b_next_free = lru_list[bh->b_list];
 363         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 364         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 365         lru_list[bh->b_list]->b_prev_free = bh;
 366 }
 367 
 368 static inline void put_last_free(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 369 {
 370         int isize;
 371         if (!bh)
 372                 return;
 373 
 374         isize = BUFSIZE_INDEX(bh->b_size);      
 375         bh->b_dev = B_FREE;  /* So it is obvious we are on the free list */
 376 /* add to back of free list */
 377 
 378         if(!free_list[isize]) {
 379                 free_list[isize] = bh;
 380                 bh->b_prev_free = bh;
 381         };
 382 
 383         nr_free[isize]++;
 384         bh->b_next_free = free_list[isize];
 385         bh->b_prev_free = free_list[isize]->b_prev_free;
 386         free_list[isize]->b_prev_free->b_next_free = bh;
 387         free_list[isize]->b_prev_free = bh;
 388 }
 389 
 390 static inline void insert_into_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 391 {
 392 /* put at end of free list */
 393 
 394         if(bh->b_dev == B_FREE) {
 395                 put_last_free(bh);
 396                 return;
 397         };
 398         if(!lru_list[bh->b_list]) {
 399                 lru_list[bh->b_list] = bh;
 400                 bh->b_prev_free = bh;
 401         };
 402         if (bh->b_next_free) panic("VFS: buffer LRU pointers corrupted");
 403         bh->b_next_free = lru_list[bh->b_list];
 404         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 405         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 406         lru_list[bh->b_list]->b_prev_free = bh;
 407         nr_buffers_type[bh->b_list]++;
 408         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]++;
 409 /* put the buffer in new hash-queue if it has a device */
 410         bh->b_prev = NULL;
 411         bh->b_next = NULL;
 412         if (!(bh->b_dev))
 413                 return;
 414         bh->b_next = hash(bh->b_dev,bh->b_blocknr);
 415         hash(bh->b_dev,bh->b_blocknr) = bh;
 416         if (bh->b_next)
 417                 bh->b_next->b_prev = bh;
 418 }
 419 
 420 static struct buffer_head * find_buffer(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 421 {               
 422         struct buffer_head * tmp;
 423 
 424         for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
 425                 if (tmp->b_dev == dev && tmp->b_blocknr == block)
 426                         if (tmp->b_size == size)
 427                                 return tmp;
 428                         else {
 429                                 printk("VFS: Wrong blocksize on device %s\n",
 430                                         kdevname(dev));
 431                                 return NULL;
 432                         }
 433         return NULL;
 434 }
 435 
 436 /*
 437  * Why like this, I hear you say... The reason is race-conditions.
 438  * As we don't lock buffers (unless we are reading them, that is),
 439  * something might happen to it while we sleep (ie a read-error
 440  * will force it bad). This shouldn't really happen currently, but
 441  * the code is ready.
 442  */
 443 struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 444 {
 445         struct buffer_head * bh;
 446 
 447         for (;;) {
 448                 if (!(bh=find_buffer(dev,block,size)))
 449                         return NULL;
 450                 bh->b_reuse=0;
 451                 bh->b_count++;
 452                 wait_on_buffer(bh);
 453                 if (bh->b_dev == dev && bh->b_blocknr == block
 454                                              && bh->b_size == size)
 455                         return bh;
 456                 bh->b_count--;
 457         }
 458 }
 459 
 460 void set_blocksize(kdev_t dev, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 461 {
 462         int i, nlist;
 463         struct buffer_head * bh, *bhnext;
 464 
 465         if (!blksize_size[MAJOR(dev)])
 466                 return;
 467 
 468         switch(size) {
 469                 default: panic("Invalid blocksize passed to set_blocksize");
 470                 case 512: case 1024: case 2048: case 4096:;
 471         }
 472 
 473         if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
 474                 blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 475                 return;
 476         }
 477         if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
 478                 return;
 479         sync_buffers(dev, 2);
 480         blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 481 
 482   /* We need to be quite careful how we do this - we are moving entries
 483      around on the free list, and we can get in a loop if we are not careful.*/
 484 
 485         for(nlist = 0; nlist < NR_LIST; nlist++) {
 486                 bh = lru_list[nlist];
 487                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) {
 488                         if(!bh) break;
 489                         bhnext = bh->b_next_free; 
 490                         if (bh->b_dev != dev)
 491                                  continue;
 492                         if (bh->b_size == size)
 493                                  continue;
 494                         
 495                         wait_on_buffer(bh);
 496                         if (bh->b_dev == dev && bh->b_size != size) {
 497                                 bh->b_uptodate = bh->b_dirt = bh->b_req =
 498                                          bh->b_flushtime = 0;
 499                         };
 500                         remove_from_hash_queue(bh);
 501                 }
 502         }
 503 }
 504 
 505 #define BADNESS(bh) (((bh)->b_dirt<<1)+(bh)->b_lock)
 506 
 507 void refill_freelist(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 508 {
 509         struct buffer_head * bh, * tmp;
 510         struct buffer_head * candidate[NR_LIST];
 511         unsigned int best_time, winner;
 512         int isize = BUFSIZE_INDEX(size);
 513         int buffers[NR_LIST];
 514         int i;
 515         int needed;
 516 
 517         /* First see if we even need this.  Sometimes it is advantageous
 518          to request some blocks in a filesystem that we know that we will
 519          be needing ahead of time. */
 520 
 521         if (nr_free[isize] > 100)
 522                 return;
 523 
 524         /* If there are too many dirty buffers, we wake up the update process
 525            now so as to ensure that there are still clean buffers available
 526            for user processes to use (and dirty) */
 527         
 528         /* We are going to try and locate this much memory */
 529         needed =bdf_prm.b_un.nrefill * size;  
 530 
 531         while (nr_free_pages > min_free_pages*2 && needed > 0 &&
 532                grow_buffers(GFP_BUFFER, size)) {
 533                 needed -= PAGE_SIZE;
 534         }
 535 
 536         if(needed <= 0) return;
 537 
 538         /* See if there are too many buffers of a different size.
 539            If so, victimize them */
 540 
 541         while(maybe_shrink_lav_buffers(size))
 542          {
 543                  if(!grow_buffers(GFP_BUFFER, size)) break;
 544                  needed -= PAGE_SIZE;
 545                  if(needed <= 0) return;
 546          };
 547 
 548         /* OK, we cannot grow the buffer cache, now try and get some
 549            from the lru list */
 550 
 551         /* First set the candidate pointers to usable buffers.  This
 552            should be quick nearly all of the time. */
 553 
 554 repeat0:
 555         for(i=0; i<NR_LIST; i++){
 556                 if(i == BUF_DIRTY || i == BUF_SHARED || 
 557                    nr_buffers_type[i] == 0) {
 558                         candidate[i] = NULL;
 559                         buffers[i] = 0;
 560                         continue;
 561                 }
 562                 buffers[i] = nr_buffers_type[i];
 563                 for (bh = lru_list[i]; buffers[i] > 0; bh = tmp, buffers[i]--)
 564                  {
 565                          if(buffers[i] < 0) panic("Here is the problem");
 566                          tmp = bh->b_next_free;
 567                          if (!bh) break;
 568                          
 569                          if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 570                              bh->b_dirt) {
 571                                  refile_buffer(bh);
 572                                  continue;
 573                          };
 574                          
 575                          if (bh->b_count || bh->b_size != size)
 576                                   continue;
 577                          
 578                          /* Buffers are written in the order they are placed 
 579                             on the locked list. If we encounter a locked
 580                             buffer here, this means that the rest of them
 581                             are also locked */
 582                          if(bh->b_lock && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 583                                  buffers[i] = 0;
 584                                  break;
 585                          }
 586                          
 587                          if (BADNESS(bh)) continue;
 588                          break;
 589                  };
 590                 if(!buffers[i]) candidate[i] = NULL; /* Nothing on this list */
 591                 else candidate[i] = bh;
 592                 if(candidate[i] && candidate[i]->b_count) panic("Here is the problem");
 593         }
 594         
 595  repeat:
 596         if(needed <= 0) return;
 597         
 598         /* Now see which candidate wins the election */
 599         
 600         winner = best_time = UINT_MAX;  
 601         for(i=0; i<NR_LIST; i++){
 602                 if(!candidate[i]) continue;
 603                 if(candidate[i]->b_lru_time < best_time){
 604                         best_time = candidate[i]->b_lru_time;
 605                         winner = i;
 606                 }
 607         }
 608         
 609         /* If we have a winner, use it, and then get a new candidate from that list */
 610         if(winner != UINT_MAX) {
 611                 i = winner;
 612                 bh = candidate[i];
 613                 candidate[i] = bh->b_next_free;
 614                 if(candidate[i] == bh) candidate[i] = NULL;  /* Got last one */
 615                 if (bh->b_count || bh->b_size != size)
 616                          panic("Busy buffer in candidate list\n");
 617                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1)
 618                          panic("Shared buffer in candidate list\n");
 619                 if (BADNESS(bh)) panic("Buffer in candidate list with BADNESS != 0\n");
 620                 
 621                 if(bh->b_dev == B_FREE)
 622                         panic("Wrong list");
 623                 remove_from_queues(bh);
 624                 bh->b_dev = B_FREE;
 625                 put_last_free(bh);
 626                 needed -= bh->b_size;
 627                 buffers[i]--;
 628                 if(buffers[i] < 0) panic("Here is the problem");
 629                 
 630                 if(buffers[i] == 0) candidate[i] = NULL;
 631                 
 632                 /* Now all we need to do is advance the candidate pointer
 633                    from the winner list to the next usable buffer */
 634                 if(candidate[i] && buffers[i] > 0){
 635                         if(buffers[i] <= 0) panic("Here is another problem");
 636                         for (bh = candidate[i]; buffers[i] > 0; bh = tmp, buffers[i]--) {
 637                                 if(buffers[i] < 0) panic("Here is the problem");
 638                                 tmp = bh->b_next_free;
 639                                 if (!bh) break;
 640                                 
 641                                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 642                                     bh->b_dirt) {
 643                                         refile_buffer(bh);
 644                                         continue;
 645                                 };
 646                                 
 647                                 if (bh->b_count || bh->b_size != size)
 648                                          continue;
 649                                 
 650                                 /* Buffers are written in the order they are
 651                                    placed on the locked list.  If we encounter
 652                                    a locked buffer here, this means that the
 653                                    rest of them are also locked */
 654                                 if(bh->b_lock && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 655                                         buffers[i] = 0;
 656                                         break;
 657                                 }
 658               
 659                                 if (BADNESS(bh)) continue;
 660                                 break;
 661                         };
 662                         if(!buffers[i]) candidate[i] = NULL; /* Nothing here */
 663                         else candidate[i] = bh;
 664                         if(candidate[i] && candidate[i]->b_count) 
 665                                  panic("Here is the problem");
 666                 }
 667                 
 668                 goto repeat;
 669         }
 670         
 671         if(needed <= 0) return;
 672         
 673         /* Too bad, that was not enough. Try a little harder to grow some. */
 674         
 675         if (nr_free_pages > 5) {
 676                 if (grow_buffers(GFP_BUFFER, size)) {
 677                         needed -= PAGE_SIZE;
 678                         goto repeat0;
 679                 };
 680         }
 681         
 682         /* and repeat until we find something good */
 683         if (!grow_buffers(GFP_ATOMIC, size))
 684                 wakeup_bdflush(1);
 685         needed -= PAGE_SIZE;
 686         goto repeat0;
 687 }
 688 
 689 /*
 690  * Ok, this is getblk, and it isn't very clear, again to hinder
 691  * race-conditions. Most of the code is seldom used, (ie repeating),
 692  * so it should be much more efficient than it looks.
 693  *
 694  * The algorithm is changed: hopefully better, and an elusive bug removed.
 695  *
 696  * 14.02.92: changed it to sync dirty buffers a bit: better performance
 697  * when the filesystem starts to get full of dirty blocks (I hope).
 698  */
 699 struct buffer_head * getblk(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 700 {
 701         struct buffer_head * bh;
 702         int isize = BUFSIZE_INDEX(size);
 703 
 704         /* Update this for the buffer size lav. */
 705         buffer_usage[isize]++;
 706 
 707         /* If there are too many dirty buffers, we wake up the update process
 708            now so as to ensure that there are still clean buffers available
 709            for user processes to use (and dirty) */
 710 repeat:
 711         bh = get_hash_table(dev, block, size);
 712         if (bh) {
 713                 if (bh->b_uptodate && !bh->b_dirt)
 714                          put_last_lru(bh);
 715                 if(!bh->b_dirt) bh->b_flushtime = 0;
 716                 return bh;
 717         }
 718 
 719         while(!free_list[isize]) refill_freelist(size);
 720         
 721         if (find_buffer(dev,block,size))
 722                  goto repeat;
 723 
 724         bh = free_list[isize];
 725         remove_from_free_list(bh);
 726 
 727 /* OK, FINALLY we know that this buffer is the only one of its kind, */
 728 /* and that it's unused (b_count=0), unlocked (b_lock=0), and clean */
 729         bh->b_count=1;
 730         bh->b_dirt=0;
 731         bh->b_lock=0;
 732         bh->b_uptodate=0;
 733         bh->b_flushtime=0;
 734         bh->b_req=0;
 735         bh->b_reuse=0;
 736         bh->b_dev=dev;
 737         bh->b_blocknr=block;
 738         insert_into_queues(bh);
 739         return bh;
 740 }
 741 
 742 void set_writetime(struct buffer_head * buf, int flag)
     /* [previous][next][first][last][top][bottom][index][help] */
 743 {
 744         int newtime;
 745 
 746         if (buf->b_dirt){
 747                 /* Move buffer to dirty list if jiffies is clear */
 748                 newtime = jiffies + (flag ? bdf_prm.b_un.age_super : 
 749                                      bdf_prm.b_un.age_buffer);
 750                 if(!buf->b_flushtime || buf->b_flushtime > newtime)
 751                          buf->b_flushtime = newtime;
 752         } else {
 753                 buf->b_flushtime = 0;
 754         }
 755 }
 756 
 757 
 758 void refile_buffer(struct buffer_head * buf){
     /* [previous][next][first][last][top][bottom][index][help] */
 759         int dispose;
 760         if(buf->b_dev == B_FREE)
 761                 panic("Attempt to refile free buffer\n");
 762         if (buf->b_dirt)
 763                 dispose = BUF_DIRTY;
 764         else if (mem_map[MAP_NR((unsigned long) buf->b_data)].count > 1)
 765                 dispose = BUF_SHARED;
 766         else if (buf->b_lock)
 767                 dispose = BUF_LOCKED;
 768         else if (buf->b_list == BUF_SHARED)
 769                 dispose = BUF_UNSHARED;
 770         else
 771                 dispose = BUF_CLEAN;
 772         if(dispose == BUF_CLEAN) buf->b_lru_time = jiffies;
 773         if(dispose != buf->b_list)  {
 774                 if(dispose == BUF_DIRTY || dispose == BUF_UNSHARED)
 775                          buf->b_lru_time = jiffies;
 776                 if(dispose == BUF_LOCKED && 
 777                    (buf->b_flushtime - buf->b_lru_time) <= bdf_prm.b_un.age_super)
 778                          dispose = BUF_LOCKED1;
 779                 remove_from_queues(buf);
 780                 buf->b_list = dispose;
 781                 insert_into_queues(buf);
 782                 if(dispose == BUF_DIRTY && nr_buffers_type[BUF_DIRTY] > 
 783                    (nr_buffers - nr_buffers_type[BUF_SHARED]) *
 784                    bdf_prm.b_un.nfract/100)
 785                          wakeup_bdflush(0);
 786         }
 787 }
 788 
 789 void brelse(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 790 {
 791         if (!buf)
 792                 return;
 793         wait_on_buffer(buf);
 794 
 795         /* If dirty, mark the time this buffer should be written back */
 796         set_writetime(buf, 0);
 797         refile_buffer(buf);
 798 
 799         if (buf->b_count) {
 800                 if (--buf->b_count)
 801                         return;
 802                 wake_up(&buffer_wait);
 803 #if 0
 804                 if (buf->b_reuse) {
 805                         buf->b_reuse = 0;
 806                         if (!buf->b_lock && !buf->b_dirt && !buf->b_wait) {
 807                                 if(buf->b_dev == B_FREE)
 808                                         panic("brelse: Wrong list");
 809                                 remove_from_queues(buf);
 810                                 buf->b_dev = B_FREE;
 811                                 put_last_free(buf);
 812                         }
 813                 }
 814 #endif
 815                 return;
 816         }
 817         printk("VFS: brelse: Trying to free free buffer\n");
 818 }
 819 
 820 /*
 821  * bread() reads a specified block and returns the buffer that contains
 822  * it. It returns NULL if the block was unreadable.
 823  */
 824 struct buffer_head * bread(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 825 {
 826         struct buffer_head * bh;
 827 
 828         if (!(bh = getblk(dev, block, size))) {
 829                 printk("VFS: bread: READ error on device %s\n",
 830                         kdevname(dev));
 831                 return NULL;
 832         }
 833         if (bh->b_uptodate)
 834                 return bh;
 835         ll_rw_block(READ, 1, &bh);
 836         wait_on_buffer(bh);
 837         if (bh->b_uptodate)
 838                 return bh;
 839         brelse(bh);
 840         return NULL;
 841 }
 842 
 843 /*
 844  * Ok, breada can be used as bread, but additionally to mark other
 845  * blocks for reading as well. End the argument list with a negative
 846  * number.
 847  */
 848 
 849 #define NBUF 16
 850 
 851 struct buffer_head * breada(kdev_t dev, int block, int bufsize,
     /* [previous][next][first][last][top][bottom][index][help] */
 852         unsigned int pos, unsigned int filesize)
 853 {
 854         struct buffer_head * bhlist[NBUF];
 855         unsigned int blocks;
 856         struct buffer_head * bh;
 857         int index;
 858         int i, j;
 859 
 860         if (pos >= filesize)
 861                 return NULL;
 862 
 863         if (block < 0 || !(bh = getblk(dev,block,bufsize)))
 864                 return NULL;
 865 
 866         index = BUFSIZE_INDEX(bh->b_size);
 867 
 868         if (bh->b_uptodate)
 869                 return bh;
 870 
 871         blocks = ((filesize & (bufsize - 1)) - (pos & (bufsize - 1))) >> (9+index);
 872 
 873         if (blocks > (read_ahead[MAJOR(dev)] >> index))
 874                 blocks = read_ahead[MAJOR(dev)] >> index;
 875         if (blocks > NBUF)
 876                 blocks = NBUF;
 877         
 878         bhlist[0] = bh;
 879         j = 1;
 880         for(i=1; i<blocks; i++) {
 881                 bh = getblk(dev,block+i,bufsize);
 882                 if (bh->b_uptodate) {
 883                         brelse(bh);
 884                         break;
 885                 }
 886                 bhlist[j++] = bh;
 887         }
 888 
 889         /* Request the read for these buffers, and then release them */
 890         ll_rw_block(READ, j, bhlist);
 891 
 892         for(i=1; i<j; i++)
 893                 brelse(bhlist[i]);
 894 
 895         /* Wait for this buffer, and then continue on */
 896         bh = bhlist[0];
 897         wait_on_buffer(bh);
 898         if (bh->b_uptodate)
 899                 return bh;
 900         brelse(bh);
 901         return NULL;
 902 }
 903 
 904 /*
 905  * See fs/inode.c for the weird use of volatile..
 906  */
 907 static void put_unused_buffer_head(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 908 {
 909         struct wait_queue * wait;
 910 
 911         wait = ((volatile struct buffer_head *) bh)->b_wait;
 912         memset(bh,0,sizeof(*bh));
 913         ((volatile struct buffer_head *) bh)->b_wait = wait;
 914         bh->b_next_free = unused_list;
 915         unused_list = bh;
 916 }
 917 
 918 static void get_more_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 919 {
 920         int i;
 921         struct buffer_head * bh;
 922 
 923         if (unused_list)
 924                 return;
 925 
 926         if (!(bh = (struct buffer_head*) get_free_page(GFP_BUFFER)))
 927                 return;
 928 
 929         for (nr_buffer_heads+=i=PAGE_SIZE/sizeof*bh ; i>0; i--) {
 930                 bh->b_next_free = unused_list;  /* only make link */
 931                 unused_list = bh++;
 932         }
 933 }
 934 
 935 static struct buffer_head * get_unused_buffer_head(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 936 {
 937         struct buffer_head * bh;
 938 
 939         get_more_buffer_heads();
 940         if (!unused_list)
 941                 return NULL;
 942         bh = unused_list;
 943         unused_list = bh->b_next_free;
 944         bh->b_next_free = NULL;
 945         bh->b_data = NULL;
 946         bh->b_size = 0;
 947         bh->b_req = 0;
 948         return bh;
 949 }
 950 
 951 /*
 952  * Create the appropriate buffers when given a page for data area and
 953  * the size of each buffer.. Use the bh->b_this_page linked list to
 954  * follow the buffers created.  Return NULL if unable to create more
 955  * buffers.
 956  */
 957 static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
 958 {
 959         struct buffer_head *bh, *head;
 960         unsigned long offset;
 961 
 962         head = NULL;
 963         offset = PAGE_SIZE;
 964         while ((offset -= size) < PAGE_SIZE) {
 965                 bh = get_unused_buffer_head();
 966                 if (!bh)
 967                         goto no_grow;
 968                 bh->b_this_page = head;
 969                 head = bh;
 970                 bh->b_data = (char *) (page+offset);
 971                 bh->b_size = size;
 972                 bh->b_dev = B_FREE;  /* Flag as unused */
 973         }
 974         return head;
 975 /*
 976  * In case anything failed, we just free everything we got.
 977  */
 978 no_grow:
 979         bh = head;
 980         while (bh) {
 981                 head = bh;
 982                 bh = bh->b_this_page;
 983                 put_unused_buffer_head(head);
 984         }
 985         return NULL;
 986 }
 987 
 988 static void read_buffers(struct buffer_head * bh[], int nrbuf)
     /* [previous][next][first][last][top][bottom][index][help] */
 989 {
 990         int i;
 991         int bhnum = 0;
 992         struct buffer_head * bhr[MAX_BUF_PER_PAGE];
 993 
 994         for (i = 0 ; i < nrbuf ; i++) {
 995                 if (bh[i] && !bh[i]->b_uptodate)
 996                         bhr[bhnum++] = bh[i];
 997         }
 998         if (bhnum)
 999                 ll_rw_block(READ, bhnum, bhr);
1000         for (i = nrbuf ; --i >= 0 ; ) {
1001                 if (bh[i]) {
1002                         wait_on_buffer(bh[i]);
1003                 }
1004         }
1005 }
1006 
1007 /*
1008  * This actually gets enough info to try to align the stuff,
1009  * but we don't bother yet.. We'll have to check that nobody
1010  * else uses the buffers etc.
1011  *
1012  * "address" points to the new page we can use to move things
1013  * around..
1014  */
1015 static unsigned long try_to_align(struct buffer_head ** bh, int nrbuf,
     /* [previous][next][first][last][top][bottom][index][help] */
1016         unsigned long address)
1017 {
1018         while (nrbuf-- > 0)
1019                 brelse(bh[nrbuf]);
1020         return 0;
1021 }
1022 
1023 static unsigned long check_aligned(struct buffer_head * first, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
1024         kdev_t dev, int *b, int size)
1025 {
1026         struct buffer_head * bh[MAX_BUF_PER_PAGE];
1027         unsigned long page;
1028         unsigned long offset;
1029         int block;
1030         int nrbuf;
1031         int aligned = 1;
1032 
1033         bh[0] = first;
1034         nrbuf = 1;
1035         page = (unsigned long) first->b_data;
1036         if (page & ~PAGE_MASK)
1037                 aligned = 0;
1038         for (offset = size ; offset < PAGE_SIZE ; offset += size) {
1039                 block = *++b;
1040                 if (!block)
1041                         goto no_go;
1042                 first = get_hash_table(dev, block, size);
1043                 if (!first)
1044                         goto no_go;
1045                 bh[nrbuf++] = first;
1046                 if (page+offset != (unsigned long) first->b_data)
1047                         aligned = 0;
1048         }
1049         if (!aligned)
1050                 return try_to_align(bh, nrbuf, address);
1051         mem_map[MAP_NR(page)].count++;
1052         read_buffers(bh,nrbuf);         /* make sure they are actually read correctly */
1053         while (nrbuf-- > 0)
1054                 brelse(bh[nrbuf]);
1055         free_page(address);
1056         ++current->min_flt;
1057         return page;
1058 no_go:
1059         while (nrbuf-- > 0)
1060                 brelse(bh[nrbuf]);
1061         return 0;
1062 }
1063 
1064 static unsigned long try_to_load_aligned(unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
1065         kdev_t dev, int b[], int size)
1066 {
1067         struct buffer_head * bh, * tmp, * arr[MAX_BUF_PER_PAGE];
1068         unsigned long offset;
1069         int isize = BUFSIZE_INDEX(size);
1070         int * p;
1071         int block;
1072 
1073         bh = create_buffers(address, size);
1074         if (!bh)
1075                 return 0;
1076         /* do any of the buffers already exist? punt if so.. */
1077         p = b;
1078         for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
1079                 block = *(p++);
1080                 if (!block)
1081                         goto not_aligned;
1082                 if (find_buffer(dev, block, size))
1083                         goto not_aligned;
1084         }
1085         tmp = bh;
1086         p = b;
1087         block = 0;
1088         while (1) {
1089                 arr[block++] = bh;
1090                 bh->b_count = 1;
1091                 bh->b_dirt = 0;
1092                 bh->b_reuse = 0;
1093                 bh->b_flushtime = 0;
1094                 bh->b_uptodate = 0;
1095                 bh->b_req = 0;
1096                 bh->b_dev = dev;
1097                 bh->b_blocknr = *(p++);
1098                 bh->b_list = BUF_CLEAN;
1099                 nr_buffers++;
1100                 nr_buffers_size[isize]++;
1101                 insert_into_queues(bh);
1102                 if (bh->b_this_page)
1103                         bh = bh->b_this_page;
1104                 else
1105                         break;
1106         }
1107         buffermem += PAGE_SIZE;
1108         bh->b_this_page = tmp;
1109         mem_map[MAP_NR(address)].count++;
1110         buffer_pages[MAP_NR(address)] = bh;
1111         read_buffers(arr,block);
1112         while (block-- > 0)
1113                 brelse(arr[block]);
1114         ++current->maj_flt;
1115         return address;
1116 not_aligned:
1117         while ((tmp = bh) != NULL) {
1118                 bh = bh->b_this_page;
1119                 put_unused_buffer_head(tmp);
1120         }
1121         return 0;
1122 }
1123 
1124 /*
1125  * Try-to-share-buffers tries to minimize memory use by trying to keep
1126  * both code pages and the buffer area in the same page. This is done by
1127  * (a) checking if the buffers are already aligned correctly in memory and
1128  * (b) if none of the buffer heads are in memory at all, trying to load
1129  * them into memory the way we want them.
1130  *
1131  * This doesn't guarantee that the memory is shared, but should under most
1132  * circumstances work very well indeed (ie >90% sharing of code pages on
1133  * demand-loadable executables).
1134  */
1135 static inline unsigned long try_to_share_buffers(unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
1136         kdev_t dev, int *b, int size)
1137 {
1138         struct buffer_head * bh;
1139         int block;
1140 
1141         block = b[0];
1142         if (!block)
1143                 return 0;
1144         bh = get_hash_table(dev, block, size);
1145         if (bh)
1146                 return check_aligned(bh, address, dev, b, size);
1147         return try_to_load_aligned(address, dev, b, size);
1148 }
1149 
1150 /*
1151  * bread_page reads four buffers into memory at the desired address. It's
1152  * a function of its own, as there is some speed to be got by reading them
1153  * all at the same time, not waiting for one to be read, and then another
1154  * etc. This also allows us to optimize memory usage by sharing code pages
1155  * and filesystem buffers..
1156  */
1157 unsigned long bread_page(unsigned long address, kdev_t dev, int b[], int size, int no_share)
     /* [previous][next][first][last][top][bottom][index][help] */
1158 {
1159         struct buffer_head * bh[MAX_BUF_PER_PAGE];
1160         unsigned long where;
1161         int i, j;
1162 
1163         if (!no_share) {
1164                 where = try_to_share_buffers(address, dev, b, size);
1165                 if (where)
1166                         return where;
1167         }
1168         ++current->maj_flt;
1169         for (i=0, j=0; j<PAGE_SIZE ; i++, j+= size) {
1170                 bh[i] = NULL;
1171                 if (b[i])
1172                         bh[i] = getblk(dev, b[i], size);
1173         }
1174         read_buffers(bh,i);
1175         where = address;
1176         for (i=0, j=0; j<PAGE_SIZE ; i++, j += size, where += size) {
1177                 if (bh[i]) {
1178                         if (bh[i]->b_uptodate)
1179                                 memcpy((void *) where, bh[i]->b_data, size);
1180                         brelse(bh[i]);
1181                 } else
1182                         memset((void *) where, 0, size);
1183         }
1184         return address;
1185 }
1186 
1187 #if 0
1188 /*
1189  * bwrite_page writes a page out to the buffer cache and/or the physical device.
1190  * It's used for mmap writes (the same way bread_page() is used for mmap reads).
1191  */
1192 void bwrite_page(unsigned long address, kdev_t dev, int b[], int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1193 {
1194         struct buffer_head * bh[MAX_BUF_PER_PAGE];
1195         int i, j;
1196 
1197         for (i=0, j=0; j<PAGE_SIZE ; i++, j+= size) {
1198                 bh[i] = NULL;
1199                 if (b[i])
1200                         bh[i] = getblk(dev, b[i], size);
1201         }
1202         for (i=0, j=0; j<PAGE_SIZE ; i++, j += size, address += size) {
1203                 if (bh[i]) {
1204                         memcpy(bh[i]->b_data, (void *) address, size);
1205                         bh[i]->b_uptodate = 1;
1206                         mark_buffer_dirty(bh[i], 0);
1207                         brelse(bh[i]);
1208                 } else
1209                         memset((void *) address, 0, size); /* ???!?!! */
1210         }       
1211 }
1212 #endif
1213 
1214 /*
1215  * Try to increase the number of buffers available: the size argument
1216  * is used to determine what kind of buffers we want.
1217  */
1218 static int grow_buffers(int pri, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1219 {
1220         unsigned long page;
1221         struct buffer_head *bh, *tmp;
1222         struct buffer_head * insert_point;
1223         int isize;
1224 
1225         if ((size & 511) || (size > PAGE_SIZE)) {
1226                 printk("VFS: grow_buffers: size = %d\n",size);
1227                 return 0;
1228         }
1229 
1230         isize = BUFSIZE_INDEX(size);
1231 
1232         if (!(page = __get_free_page(pri)))
1233                 return 0;
1234         bh = create_buffers(page, size);
1235         if (!bh) {
1236                 free_page(page);
1237                 return 0;
1238         }
1239 
1240         insert_point = free_list[isize];
1241 
1242         tmp = bh;
1243         while (1) {
1244                 nr_free[isize]++;
1245                 if (insert_point) {
1246                         tmp->b_next_free = insert_point->b_next_free;
1247                         tmp->b_prev_free = insert_point;
1248                         insert_point->b_next_free->b_prev_free = tmp;
1249                         insert_point->b_next_free = tmp;
1250                 } else {
1251                         tmp->b_prev_free = tmp;
1252                         tmp->b_next_free = tmp;
1253                 }
1254                 insert_point = tmp;
1255                 ++nr_buffers;
1256                 if (tmp->b_this_page)
1257                         tmp = tmp->b_this_page;
1258                 else
1259                         break;
1260         }
1261         free_list[isize] = bh;
1262         buffer_pages[MAP_NR(page)] = bh;
1263         tmp->b_this_page = bh;
1264         wake_up(&buffer_wait);
1265         buffermem += PAGE_SIZE;
1266         return 1;
1267 }
1268 
1269 
1270 /* =========== Reduce the buffer memory ============= */
1271 
1272 /*
1273  * try_to_free() checks if all the buffers on this particular page
1274  * are unused, and free's the page if so.
1275  */
1276 static int try_to_free(struct buffer_head * bh, struct buffer_head ** bhp)
     /* [previous][next][first][last][top][bottom][index][help] */
1277 {
1278         unsigned long page;
1279         struct buffer_head * tmp, * p;
1280         int isize = BUFSIZE_INDEX(bh->b_size);
1281 
1282         *bhp = bh;
1283         page = (unsigned long) bh->b_data;
1284         page &= PAGE_MASK;
1285         tmp = bh;
1286         do {
1287                 if (!tmp)
1288                         return 0;
1289                 if (tmp->b_count || tmp->b_dirt || tmp->b_lock || tmp->b_wait)
1290                         return 0;
1291                 tmp = tmp->b_this_page;
1292         } while (tmp != bh);
1293         tmp = bh;
1294         do {
1295                 p = tmp;
1296                 tmp = tmp->b_this_page;
1297                 nr_buffers--;
1298                 nr_buffers_size[isize]--;
1299                 if (p == *bhp)
1300                   {
1301                     *bhp = p->b_prev_free;
1302                     if (p == *bhp) /* Was this the last in the list? */
1303                       *bhp = NULL;
1304                   }
1305                 remove_from_queues(p);
1306                 put_unused_buffer_head(p);
1307         } while (tmp != bh);
1308         buffermem -= PAGE_SIZE;
1309         buffer_pages[MAP_NR(page)] = NULL;
1310         free_page(page);
1311         return !mem_map[MAP_NR(page)].count;
1312 }
1313 
1314 
1315 /*
1316  * Consult the load average for buffers and decide whether or not
1317  * we should shrink the buffers of one size or not.  If we decide yes,
1318  * do it and return 1.  Else return 0.  Do not attempt to shrink size
1319  * that is specified.
1320  *
1321  * I would prefer not to use a load average, but the way things are now it
1322  * seems unavoidable.  The way to get rid of it would be to force clustering
1323  * universally, so that when we reclaim buffers we always reclaim an entire
1324  * page.  Doing this would mean that we all need to move towards QMAGIC.
1325  */
1326 
1327 static int maybe_shrink_lav_buffers(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1328 {          
1329         int nlist;
1330         int isize;
1331         int total_lav, total_n_buffers, n_sizes;
1332         
1333         /* Do not consider the shared buffers since they would not tend
1334            to have getblk called very often, and this would throw off
1335            the lav.  They are not easily reclaimable anyway (let the swapper
1336            make the first move). */
1337   
1338         total_lav = total_n_buffers = n_sizes = 0;
1339         for(nlist = 0; nlist < NR_SIZES; nlist++)
1340          {
1341                  total_lav += buffers_lav[nlist];
1342                  if(nr_buffers_size[nlist]) n_sizes++;
1343                  total_n_buffers += nr_buffers_size[nlist];
1344                  total_n_buffers -= nr_buffers_st[nlist][BUF_SHARED]; 
1345          }
1346         
1347         /* See if we have an excessive number of buffers of a particular
1348            size - if so, victimize that bunch. */
1349   
1350         isize = (size ? BUFSIZE_INDEX(size) : -1);
1351         
1352         if (n_sizes > 1)
1353                  for(nlist = 0; nlist < NR_SIZES; nlist++)
1354                   {
1355                           if(nlist == isize) continue;
1356                           if(nr_buffers_size[nlist] &&
1357                              bdf_prm.b_un.lav_const * buffers_lav[nlist]*total_n_buffers < 
1358                              total_lav * (nr_buffers_size[nlist] - nr_buffers_st[nlist][BUF_SHARED]))
1359                                    if(shrink_specific_buffers(6, bufferindex_size[nlist])) 
1360                                             return 1;
1361                   }
1362         return 0;
1363 }
1364 /*
1365  * Try to free up some pages by shrinking the buffer-cache
1366  *
1367  * Priority tells the routine how hard to try to shrink the
1368  * buffers: 3 means "don't bother too much", while a value
1369  * of 0 means "we'd better get some free pages now".
1370  *
1371  * "limit" is meant to limit the shrink-action only to pages
1372  * that are in the 0 - limit address range, for DMA re-allocations.
1373  * We ignore that right now.
1374  */
1375 int shrink_buffers(unsigned int priority, unsigned long limit)
     /* [previous][next][first][last][top][bottom][index][help] */
1376 {
1377         if (priority < 2) {
1378                 sync_buffers(0,0);
1379         }
1380 
1381         if(priority == 2) wakeup_bdflush(1);
1382 
1383         if(maybe_shrink_lav_buffers(0)) return 1;
1384 
1385         /* No good candidate size - take any size we can find */
1386         return shrink_specific_buffers(priority, 0);
1387 }
1388 
1389 static int shrink_specific_buffers(unsigned int priority, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1390 {
1391         struct buffer_head *bh;
1392         int nlist;
1393         int i, isize, isize1;
1394 
1395 #ifdef DEBUG
1396         if(size) printk("Shrinking buffers of size %d\n", size);
1397 #endif
1398         /* First try the free lists, and see if we can get a complete page
1399            from here */
1400         isize1 = (size ? BUFSIZE_INDEX(size) : -1);
1401 
1402         for(isize = 0; isize<NR_SIZES; isize++){
1403                 if(isize1 != -1 && isize1 != isize) continue;
1404                 bh = free_list[isize];
1405                 if(!bh) continue;
1406                 for (i=0 ; !i || bh != free_list[isize]; bh = bh->b_next_free, i++) {
1407                         if (bh->b_count || !bh->b_this_page)
1408                                  continue;
1409                         if (try_to_free(bh, &bh))
1410                                  return 1;
1411                         if(!bh) break; /* Some interrupt must have used it after we
1412                                           freed the page.  No big deal - keep looking */
1413                 }
1414         }
1415         
1416         /* Not enough in the free lists, now try the lru list */
1417         
1418         for(nlist = 0; nlist < NR_LIST; nlist++) {
1419         repeat1:
1420                 if(priority > 3 && nlist == BUF_SHARED) continue;
1421                 bh = lru_list[nlist];
1422                 if(!bh) continue;
1423                 i = 2*nr_buffers_type[nlist] >> priority;
1424                 for ( ; i-- > 0 ; bh = bh->b_next_free) {
1425                         /* We may have stalled while waiting for I/O to complete. */
1426                         if(bh->b_list != nlist) goto repeat1;
1427                         if (bh->b_count || !bh->b_this_page)
1428                                  continue;
1429                         if(size && bh->b_size != size) continue;
1430                         if (bh->b_lock)
1431                                  if (priority)
1432                                           continue;
1433                                  else
1434                                           wait_on_buffer(bh);
1435                         if (bh->b_dirt) {
1436                                 bh->b_count++;
1437                                 bh->b_flushtime = 0;
1438                                 ll_rw_block(WRITEA, 1, &bh);
1439                                 bh->b_count--;
1440                                 continue;
1441                         }
1442                         if (try_to_free(bh, &bh))
1443                                  return 1;
1444                         if(!bh) break;
1445                 }
1446         }
1447         return 0;
1448 }
1449 
1450 
1451 /* ================== Debugging =================== */
1452 
1453 void show_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1454 {
1455         struct buffer_head * bh;
1456         int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
1457         int shared;
1458         int nlist, isize;
1459 
1460         printk("Buffer memory:   %6dkB\n",buffermem>>10);
1461         printk("Buffer heads:    %6d\n",nr_buffer_heads);
1462         printk("Buffer blocks:   %6d\n",nr_buffers);
1463 
1464         for(nlist = 0; nlist < NR_LIST; nlist++) {
1465           shared = found = locked = dirty = used = lastused = 0;
1466           bh = lru_list[nlist];
1467           if(!bh) continue;
1468           do {
1469                 found++;
1470                 if (bh->b_lock)
1471                         locked++;
1472                 if (bh->b_dirt)
1473                         dirty++;
1474                 if(mem_map[MAP_NR(((unsigned long) bh->b_data))].count !=1) shared++;
1475                 if (bh->b_count)
1476                         used++, lastused = found;
1477                 bh = bh->b_next_free;
1478               } while (bh != lru_list[nlist]);
1479         printk("Buffer[%d] mem: %d buffers, %d used (last=%d), %d locked, %d dirty %d shrd\n",
1480                 nlist, found, used, lastused, locked, dirty, shared);
1481         };
1482         printk("Size    [LAV]     Free  Clean  Unshar     Lck    Lck1   Dirty  Shared\n");
1483         for(isize = 0; isize<NR_SIZES; isize++){
1484                 printk("%5d [%5d]: %7d ", bufferindex_size[isize],
1485                        buffers_lav[isize], nr_free[isize]);
1486                 for(nlist = 0; nlist < NR_LIST; nlist++)
1487                          printk("%7d ", nr_buffers_st[isize][nlist]);
1488                 printk("\n");
1489         }
1490 }
1491 
1492 
1493 /* ====================== Cluster patches for ext2 ==================== */
1494 
1495 /*
1496  * try_to_reassign() checks if all the buffers on this particular page
1497  * are unused, and reassign to a new cluster them if this is true.
1498  */
1499 static inline int try_to_reassign(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1500                            kdev_t dev, unsigned int starting_block)
1501 {
1502         unsigned long page;
1503         struct buffer_head * tmp, * p;
1504 
1505         *bhp = bh;
1506         page = (unsigned long) bh->b_data;
1507         page &= PAGE_MASK;
1508         if(mem_map[MAP_NR(page)].count != 1) return 0;
1509         tmp = bh;
1510         do {
1511                 if (!tmp)
1512                          return 0;
1513                 
1514                 if (tmp->b_count || tmp->b_dirt || tmp->b_lock)
1515                          return 0;
1516                 tmp = tmp->b_this_page;
1517         } while (tmp != bh);
1518         tmp = bh;
1519         
1520         while((unsigned long) tmp->b_data & (PAGE_SIZE - 1)) 
1521                  tmp = tmp->b_this_page;
1522         
1523         /* This is the buffer at the head of the page */
1524         bh = tmp;
1525         do {
1526                 p = tmp;
1527                 tmp = tmp->b_this_page;
1528                 remove_from_queues(p);
1529                 p->b_dev = dev;
1530                 p->b_uptodate = 0;
1531                 p->b_req = 0;
1532                 p->b_blocknr = starting_block++;
1533                 insert_into_queues(p);
1534         } while (tmp != bh);
1535         return 1;
1536 }
1537 
1538 /*
1539  * Try to find a free cluster by locating a page where
1540  * all of the buffers are unused.  We would like this function
1541  * to be atomic, so we do not call anything that might cause
1542  * the process to sleep.  The priority is somewhat similar to
1543  * the priority used in shrink_buffers.
1544  * 
1545  * My thinking is that the kernel should end up using whole
1546  * pages for the buffer cache as much of the time as possible.
1547  * This way the other buffers on a particular page are likely
1548  * to be very near each other on the free list, and we will not
1549  * be expiring data prematurely.  For now we only cannibalize buffers
1550  * of the same size to keep the code simpler.
1551  */
1552 static int reassign_cluster(kdev_t dev, 
     /* [previous][next][first][last][top][bottom][index][help] */
1553                      unsigned int starting_block, int size)
1554 {
1555         struct buffer_head *bh;
1556         int isize = BUFSIZE_INDEX(size);
1557         int i;
1558 
1559         /* We want to give ourselves a really good shot at generating
1560            a cluster, and since we only take buffers from the free
1561            list, we "overfill" it a little. */
1562 
1563         while(nr_free[isize] < 32) refill_freelist(size);
1564 
1565         bh = free_list[isize];
1566         if(bh)
1567                  for (i=0 ; !i || bh != free_list[isize] ; bh = bh->b_next_free, i++) {
1568                          if (!bh->b_this_page)  continue;
1569                          if (try_to_reassign(bh, &bh, dev, starting_block))
1570                                  return 4;
1571                  }
1572         return 0;
1573 }
1574 
1575 /* This function tries to generate a new cluster of buffers
1576  * from a new page in memory.  We should only do this if we have
1577  * not expanded the buffer cache to the maximum size that we allow.
1578  */
1579 static unsigned long try_to_generate_cluster(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1580 {
1581         struct buffer_head * bh, * tmp, * arr[MAX_BUF_PER_PAGE];
1582         int isize = BUFSIZE_INDEX(size);
1583         unsigned long offset;
1584         unsigned long page;
1585         int nblock;
1586 
1587         page = get_free_page(GFP_NOBUFFER);
1588         if(!page) return 0;
1589 
1590         bh = create_buffers(page, size);
1591         if (!bh) {
1592                 free_page(page);
1593                 return 0;
1594         };
1595         nblock = block;
1596         for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
1597                 if (find_buffer(dev, nblock++, size))
1598                          goto not_aligned;
1599         }
1600         tmp = bh;
1601         nblock = 0;
1602         while (1) {
1603                 arr[nblock++] = bh;
1604                 bh->b_count = 1;
1605                 bh->b_dirt = 0;
1606                 bh->b_flushtime = 0;
1607                 bh->b_lock = 0;
1608                 bh->b_uptodate = 0;
1609                 bh->b_req = 0;
1610                 bh->b_dev = dev;
1611                 bh->b_list = BUF_CLEAN;
1612                 bh->b_blocknr = block++;
1613                 nr_buffers++;
1614                 nr_buffers_size[isize]++;
1615                 insert_into_queues(bh);
1616                 if (bh->b_this_page)
1617                         bh = bh->b_this_page;
1618                 else
1619                         break;
1620         }
1621         buffermem += PAGE_SIZE;
1622         buffer_pages[MAP_NR(page)] = bh;
1623         bh->b_this_page = tmp;
1624         while (nblock-- > 0)
1625                 brelse(arr[nblock]);
1626         return 4; /* ?? */
1627 not_aligned:
1628         while ((tmp = bh) != NULL) {
1629                 bh = bh->b_this_page;
1630                 put_unused_buffer_head(tmp);
1631         }
1632         free_page(page);
1633         return 0;
1634 }
1635 
1636 unsigned long generate_cluster(kdev_t dev, int b[], int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1637 {
1638         int i, offset;
1639         
1640         for (i = 0, offset = 0 ; offset < PAGE_SIZE ; i++, offset += size) {
1641                 if(i && b[i]-1 != b[i-1]) return 0;  /* No need to cluster */
1642                 if(find_buffer(dev, b[i], size)) return 0;
1643         };
1644 
1645         /* OK, we have a candidate for a new cluster */
1646         
1647         /* See if one size of buffer is over-represented in the buffer cache,
1648            if so reduce the numbers of buffers */
1649         if(maybe_shrink_lav_buffers(size))
1650          {
1651                  int retval;
1652                  retval = try_to_generate_cluster(dev, b[0], size);
1653                  if(retval) return retval;
1654          };
1655         
1656         if (nr_free_pages > min_free_pages*2) 
1657                  return try_to_generate_cluster(dev, b[0], size);
1658         else
1659                  return reassign_cluster(dev, b[0], size);
1660 }
1661 
1662 
1663 /* ===================== Init ======================= */
1664 
1665 /*
1666  * This initializes the initial buffer free list.  nr_buffers_type is set
1667  * to one less the actual number of buffers, as a sop to backwards
1668  * compatibility --- the old code did this (I think unintentionally,
1669  * but I'm not sure), and programs in the ps package expect it.
1670  *                                      - TYT 8/30/92
1671  */
1672 void buffer_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1673 {
1674         int i;
1675         int isize = BUFSIZE_INDEX(BLOCK_SIZE);
1676         long memsize = MAP_NR(high_memory) << PAGE_SHIFT;
1677 
1678         if (memsize >= 4*1024*1024) {
1679                 if(memsize >= 16*1024*1024)
1680                          nr_hash = 16381;
1681                 else
1682                          nr_hash = 4093;
1683         } else {
1684                 nr_hash = 997;
1685         };
1686         
1687         hash_table = (struct buffer_head **) vmalloc(nr_hash * 
1688                                                      sizeof(struct buffer_head *));
1689 
1690 
1691         buffer_pages = (struct buffer_head **) vmalloc(MAP_NR(high_memory) * 
1692                                                      sizeof(struct buffer_head *));
1693         for (i = 0 ; i < MAP_NR(high_memory) ; i++)
1694                 buffer_pages[i] = NULL;
1695 
1696         for (i = 0 ; i < nr_hash ; i++)
1697                 hash_table[i] = NULL;
1698         lru_list[BUF_CLEAN] = 0;
1699         grow_buffers(GFP_KERNEL, BLOCK_SIZE);
1700         if (!free_list[isize])
1701                 panic("VFS: Unable to initialize buffer free list!");
1702         return;
1703 }
1704 
1705 
1706 /* ====================== bdflush support =================== */
1707 
1708 /* This is a simple kernel daemon, whose job it is to provide a dynamically
1709  * response to dirty buffers.  Once this process is activated, we write back
1710  * a limited number of buffers to the disks and then go back to sleep again.
1711  * In effect this is a process which never leaves kernel mode, and does not have
1712  * any user memory associated with it except for the stack.  There is also
1713  * a kernel stack page, which obviously must be separate from the user stack.
1714  */
1715 struct wait_queue * bdflush_wait = NULL;
1716 struct wait_queue * bdflush_done = NULL;
1717 
1718 static int bdflush_running = 0;
1719 
1720 static void wakeup_bdflush(int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
1721 {
1722         if(!bdflush_running){
1723                 printk("Warning - bdflush not running\n");
1724                 sync_buffers(0,0);
1725                 return;
1726         };
1727         wake_up(&bdflush_wait);
1728         if(wait) sleep_on(&bdflush_done);
1729 }
1730 
1731 
1732 
1733 /* 
1734  * Here we attempt to write back old buffers.  We also try and flush inodes 
1735  * and supers as well, since this function is essentially "update", and 
1736  * otherwise there would be no way of ensuring that these quantities ever 
1737  * get written back.  Ideally, we would have a timestamp on the inodes
1738  * and superblocks so that we could write back only the old ones as well
1739  */
1740 
1741 asmlinkage int sync_old_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1742 {
1743         int i, isize;
1744         int ndirty, nwritten;
1745         int nlist;
1746         int ncount;
1747         struct buffer_head * bh, *next;
1748 
1749         sync_supers(0);
1750         sync_inodes(0);
1751 
1752         ncount = 0;
1753 #ifdef DEBUG
1754         for(nlist = 0; nlist < NR_LIST; nlist++)
1755 #else
1756         for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1757 #endif
1758         {
1759                 ndirty = 0;
1760                 nwritten = 0;
1761         repeat:
1762                 bh = lru_list[nlist];
1763                 if(bh) 
1764                          for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) {
1765                                  /* We may have stalled while waiting for I/O to complete. */
1766                                  if(bh->b_list != nlist) goto repeat;
1767                                  next = bh->b_next_free;
1768                                  if(!lru_list[nlist]) {
1769                                          printk("Dirty list empty %d\n", i);
1770                                          break;
1771                                  }
1772                                  
1773                                  /* Clean buffer on dirty list?  Refile it */
1774                                  if (nlist == BUF_DIRTY && !bh->b_dirt && !bh->b_lock)
1775                                   {
1776                                           refile_buffer(bh);
1777                                           continue;
1778                                   }
1779                                  
1780                                  if (bh->b_lock || !bh->b_dirt)
1781                                           continue;
1782                                  ndirty++;
1783                                  if(bh->b_flushtime > jiffies) continue;
1784                                  nwritten++;
1785                                  bh->b_count++;
1786                                  bh->b_flushtime = 0;
1787 #ifdef DEBUG
1788                                  if(nlist != BUF_DIRTY) ncount++;
1789 #endif
1790                                  ll_rw_block(WRITE, 1, &bh);
1791                                  bh->b_count--;
1792                          }
1793         }
1794 #ifdef DEBUG
1795         if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount);
1796         printk("Wrote %d/%d buffers\n", nwritten, ndirty);
1797 #endif
1798         
1799         /* We assume that we only come through here on a regular
1800            schedule, like every 5 seconds.  Now update load averages.  
1801            Shift usage counts to prevent overflow. */
1802         for(isize = 0; isize<NR_SIZES; isize++){
1803                 CALC_LOAD(buffers_lav[isize], bdf_prm.b_un.lav_const, buffer_usage[isize]);
1804                 buffer_usage[isize] = 0;
1805         };
1806         return 0;
1807 }
1808 
1809 
1810 /* This is the interface to bdflush.  As we get more sophisticated, we can
1811  * pass tuning parameters to this "process", to adjust how it behaves.  If you
1812  * invoke this again after you have done this once, you would simply modify 
1813  * the tuning parameters.  We would want to verify each parameter, however,
1814  * to make sure that it is reasonable. */
1815 
1816 asmlinkage int sys_bdflush(int func, long data)
     /* [previous][next][first][last][top][bottom][index][help] */
1817 {
1818         int i, error;
1819         int ndirty;
1820         int nlist;
1821         int ncount;
1822         struct buffer_head * bh, *next;
1823 
1824         if (!suser())
1825                 return -EPERM;
1826 
1827         if (func == 1)
1828                  return sync_old_buffers();
1829 
1830         /* Basically func 0 means start, 1 means read param 1, 2 means write param 1, etc */
1831         if (func >= 2) {
1832                 i = (func-2) >> 1;
1833                 if (i < 0 || i >= N_PARAM)
1834                         return -EINVAL;
1835                 if((func & 1) == 0) {
1836                         error = verify_area(VERIFY_WRITE, (void *) data, sizeof(int));
1837                         if (error)
1838                                 return error;
1839                         put_user(bdf_prm.data[i], (int*)data);
1840                         return 0;
1841                 };
1842                 if (data < bdflush_min[i] || data > bdflush_max[i])
1843                         return -EINVAL;
1844                 bdf_prm.data[i] = data;
1845                 return 0;
1846         };
1847         
1848         if (bdflush_running)
1849                 return -EBUSY; /* Only one copy of this running at one time */
1850         bdflush_running++;
1851         
1852         /* OK, from here on is the daemon */
1853         
1854         for (;;) {
1855 #ifdef DEBUG
1856                 printk("bdflush() activated...");
1857 #endif
1858                 
1859                 ncount = 0;
1860 #ifdef DEBUG
1861                 for(nlist = 0; nlist < NR_LIST; nlist++)
1862 #else
1863                 for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1864 #endif
1865                  {
1866                          ndirty = 0;
1867                  repeat:
1868                          bh = lru_list[nlist];
1869                          if(bh) 
1870                                   for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty; 
1871                                        bh = next) {
1872                                           /* We may have stalled while waiting for I/O to complete. */
1873                                           if(bh->b_list != nlist) goto repeat;
1874                                           next = bh->b_next_free;
1875                                           if(!lru_list[nlist]) {
1876                                                   printk("Dirty list empty %d\n", i);
1877                                                   break;
1878                                           }
1879                                           
1880                                           /* Clean buffer on dirty list?  Refile it */
1881                                           if (nlist == BUF_DIRTY && !bh->b_dirt && !bh->b_lock)
1882                                            {
1883                                                    refile_buffer(bh);
1884                                                    continue;
1885                                            }
1886                                           
1887                                           if (bh->b_lock || !bh->b_dirt)
1888                                                    continue;
1889                                           /* Should we write back buffers that are shared or not??
1890                                              currently dirty buffers are not shared, so it does not matter */
1891                                           bh->b_count++;
1892                                           ndirty++;
1893                                           bh->b_flushtime = 0;
1894                                           ll_rw_block(WRITE, 1, &bh);
1895 #ifdef DEBUG
1896                                           if(nlist != BUF_DIRTY) ncount++;
1897 #endif
1898                                           bh->b_count--;
1899                                   }
1900                  }
1901 #ifdef DEBUG
1902                 if (ncount) printk("sys_bdflush: %d dirty buffers not on dirty list\n", ncount);
1903                 printk("sleeping again.\n");
1904 #endif
1905                 wake_up(&bdflush_done);
1906                 
1907                 /* If there are still a lot of dirty buffers around, skip the sleep
1908                    and flush some more */
1909                 
1910                 if(nr_buffers_type[BUF_DIRTY] <= (nr_buffers - nr_buffers_type[BUF_SHARED]) * 
1911                    bdf_prm.b_un.nfract/100) {
1912                         if (current->signal & (1 << (SIGKILL-1))) {
1913                                 bdflush_running--;
1914                                 return 0;
1915                         }
1916                         current->signal = 0;
1917                         interruptible_sleep_on(&bdflush_wait);
1918                 }
1919         }
1920 }
1921 
1922 
1923 /*
1924  * Overrides for Emacs so that we follow Linus's tabbing style.
1925  * Emacs will notice this stuff at the end of the file and automatically
1926  * adjust the settings for this buffer only.  This must remain at the end
1927  * of the file.
1928  * ---------------------------------------------------------------------------
1929  * Local variables:
1930  * c-indent-level: 8
1931  * c-brace-imaginary-offset: 0
1932  * c-brace-offset: -8
1933  * c-argdecl-indent: 8
1934  * c-label-offset: -8
1935  * c-continued-statement-offset: 8
1936  * c-continued-brace-offset: 0
1937  * End:
1938  */

/* [previous][next][first][last][top][bottom][index][help] */