root/fs/buffer.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. __wait_on_buffer
  2. sync_buffers
  3. sync_dev
  4. fsync_dev
  5. sys_sync
  6. file_fsync
  7. sys_fsync
  8. sys_fdatasync
  9. invalidate_buffers
  10. remove_from_hash_queue
  11. remove_from_lru_list
  12. remove_from_free_list
  13. remove_from_queues
  14. put_last_lru
  15. put_last_free
  16. insert_into_queues
  17. find_buffer
  18. get_hash_table
  19. set_blocksize
  20. refill_freelist
  21. getblk
  22. set_writetime
  23. refile_buffer
  24. __brelse
  25. __bforget
  26. bread
  27. breada
  28. put_unused_buffer_head
  29. get_more_buffer_heads
  30. recover_reusable_buffer_heads
  31. get_unused_buffer_head
  32. create_buffers
  33. bread_page
  34. mark_buffer_uptodate
  35. generic_readpage
  36. bwrite_page
  37. grow_buffers
  38. try_to_free_buffer
  39. age_buffer
  40. maybe_shrink_lav_buffers
  41. shrink_specific_buffers
  42. show_buffers
  43. try_to_reassign
  44. reassign_cluster
  45. try_to_generate_cluster
  46. generate_cluster
  47. buffer_init
  48. wakeup_bdflush
  49. sync_old_buffers
  50. sys_bdflush
  51. bdflush

   1 /*
   2  *  linux/fs/buffer.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  *  'buffer.c' implements the buffer-cache functions. Race-conditions have
   9  * been avoided by NEVER letting an interrupt change a buffer (except for the
  10  * data, of course), but instead letting the caller do it.
  11  */
  12 
  13 /*
  14  * NOTE! There is one discordant note here: checking floppies for
  15  * disk change. This is where it fits best, I think, as it should
  16  * invalidate changed floppy-disk-caches.
  17  */
  18  
  19 /* Some bdflush() changes for the dynamic ramdisk - Paul Gortmaker, 12/94 */
  20 
  21 #include <linux/sched.h>
  22 #include <linux/kernel.h>
  23 #include <linux/major.h>
  24 #include <linux/string.h>
  25 #include <linux/locks.h>
  26 #include <linux/errno.h>
  27 #include <linux/malloc.h>
  28 #include <linux/pagemap.h>
  29 #include <linux/swap.h>
  30 #include <linux/swapctl.h>
  31 #include <linux/smp.h>
  32 #include <linux/smp_lock.h>
  33 
  34 #include <asm/system.h>
  35 #include <asm/segment.h>
  36 #include <asm/io.h>
  37 
  38 #define NR_SIZES 4
  39 static char buffersize_index[9] = {-1,  0,  1, -1,  2, -1, -1, -1, 3};
  40 static short int bufferindex_size[NR_SIZES] = {512, 1024, 2048, 4096};
  41 
  42 #define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9])
  43 #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
  44 
  45 static int grow_buffers(int pri, int size);
  46 static int shrink_specific_buffers(unsigned int priority, int size);
  47 static int maybe_shrink_lav_buffers(int);
  48 
  49 static int nr_hash = 0;  /* Size of hash table */
  50 static struct buffer_head ** hash_table;
  51 struct buffer_head ** buffer_pages;
  52 static struct buffer_head * lru_list[NR_LIST] = {NULL, };
  53 /* next_to_age is an array of pointers into the lru lists, used to
  54    cycle through the buffers aging their contents when deciding which
  55    buffers to discard when more memory is needed */
  56 static struct buffer_head * next_to_age[NR_LIST] = {NULL, };
  57 static struct buffer_head * free_list[NR_SIZES] = {NULL, };
  58 static struct buffer_head * unused_list = NULL;
  59 struct buffer_head * reuse_list = NULL;
  60 static struct wait_queue * buffer_wait = NULL;
  61 
  62 int nr_buffers = 0;
  63 int nr_buffers_type[NR_LIST] = {0,};
  64 int nr_buffers_size[NR_SIZES] = {0,};
  65 int nr_buffers_st[NR_SIZES][NR_LIST] = {{0,},};
  66 int buffer_usage[NR_SIZES] = {0,};  /* Usage counts used to determine load average */
  67 int buffers_lav[NR_SIZES] = {0,};  /* Load average of buffer usage */
  68 int nr_free[NR_SIZES] = {0,};
  69 int buffermem = 0;
  70 int nr_buffer_heads = 0;
  71 extern int *blksize_size[];
  72 
  73 /* Here is the parameter block for the bdflush process. */
  74 static void wakeup_bdflush(int);
  75 
  76 #define N_PARAM 9
  77 #define LAV
  78 
  79 static union bdflush_param{
  80         struct {
  81                 int nfract;  /* Percentage of buffer cache dirty to 
  82                                 activate bdflush */
  83                 int ndirty;  /* Maximum number of dirty blocks to write out per
  84                                 wake-cycle */
  85                 int nrefill; /* Number of clean buffers to try and obtain
  86                                 each time we call refill */
  87                 int nref_dirt; /* Dirty buffer threshold for activating bdflush
  88                                   when trying to refill buffers. */
  89                 int clu_nfract;  /* Percentage of buffer cache to scan to 
  90                                     search for free clusters */
  91                 int age_buffer;  /* Time for normal buffer to age before 
  92                                     we flush it */
  93                 int age_super;  /* Time for superblock to age before we 
  94                                    flush it */
  95                 int lav_const;  /* Constant used for load average (time
  96                                    constant */
  97                 int lav_ratio;  /* Used to determine how low a lav for a
  98                                    particular size can go before we start to
  99                                    trim back the buffers */
 100         } b_un;
 101         unsigned int data[N_PARAM];
 102 } bdf_prm = {{25, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}};
 103 
 104 /* The lav constant is set for 1 minute, as long as the update process runs
 105    every 5 seconds.  If you change the frequency of update, the time
 106    constant will also change. */
 107 
 108 
 109 /* These are the min and max parameter values that we will allow to be assigned */
 110 static int bdflush_min[N_PARAM] = {  0,  10,    5,   25,  0,   100,   100, 1, 1};
 111 static int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 60000, 60000, 2047, 5};
 112 
 113 /*
 114  * Rewrote the wait-routines to use the "new" wait-queue functionality,
 115  * and getting rid of the cli-sti pairs. The wait-queue routines still
 116  * need cli-sti, but now it's just a couple of 386 instructions or so.
 117  *
 118  * Note that the real wait_on_buffer() is an inline function that checks
 119  * if 'b_wait' is set before calling this, so that the queues aren't set
 120  * up unnecessarily.
 121  */
 122 void __wait_on_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 123 {
 124         struct wait_queue wait = { current, NULL };
 125 
 126         bh->b_count++;
 127         add_wait_queue(&bh->b_wait, &wait);
 128 repeat:
 129         current->state = TASK_UNINTERRUPTIBLE;
 130         if (buffer_locked(bh)) {
 131                 schedule();
 132                 goto repeat;
 133         }
 134         remove_wait_queue(&bh->b_wait, &wait);
 135         bh->b_count--;
 136         current->state = TASK_RUNNING;
 137 }
 138 
 139 /* Call sync_buffers with wait!=0 to ensure that the call does not
 140    return until all buffer writes have completed.  Sync() may return
 141    before the writes have finished; fsync() may not. */
 142 
 143 
 144 /* Godamity-damn.  Some buffers (bitmaps for filesystems)
 145    spontaneously dirty themselves without ever brelse being called.
 146    We will ultimately want to put these in a separate list, but for
 147    now we search all of the lists for dirty buffers */
 148 
 149 static int sync_buffers(kdev_t dev, int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 150 {
 151         int i, retry, pass = 0, err = 0;
 152         int nlist, ncount;
 153         struct buffer_head * bh, *next;
 154 
 155         /* One pass for no-wait, three for wait:
 156            0) write out all dirty, unlocked buffers;
 157            1) write out all dirty buffers, waiting if locked;
 158            2) wait for completion by waiting for all buffers to unlock. */
 159  repeat:
 160         retry = 0;
 161  repeat2:
 162         ncount = 0;
 163         /* We search all lists as a failsafe mechanism, not because we expect
 164            there to be dirty buffers on any of the other lists. */
 165         for(nlist = 0; nlist < NR_LIST; nlist++)
 166          {
 167          repeat1:
 168                  bh = lru_list[nlist];
 169                  if(!bh) continue;
 170                  for (i = nr_buffers_type[nlist]*2 ; i-- > 0 ; bh = next) {
 171                          if(bh->b_list != nlist) goto repeat1;
 172                          next = bh->b_next_free;
 173                          if(!lru_list[nlist]) break;
 174                          if (dev && bh->b_dev != dev)
 175                                   continue;
 176                          if (buffer_locked(bh))
 177                           {
 178                                   /* Buffer is locked; skip it unless wait is
 179                                      requested AND pass > 0. */
 180                                   if (!wait || !pass) {
 181                                           retry = 1;
 182                                           continue;
 183                                   }
 184                                   wait_on_buffer (bh);
 185                                   goto repeat2;
 186                           }
 187                          /* If an unlocked buffer is not uptodate, there has
 188                              been an IO error. Skip it. */
 189                          if (wait && buffer_req(bh) && !buffer_locked(bh) &&
 190                              !buffer_dirty(bh) && !buffer_uptodate(bh)) {
 191                                   err = 1;
 192                                   continue;
 193                           }
 194                          /* Don't write clean buffers.  Don't write ANY buffers
 195                             on the third pass. */
 196                          if (!buffer_dirty(bh) || pass>=2)
 197                                   continue;
 198                          /* don't bother about locked buffers */
 199                          if (buffer_locked(bh))
 200                                  continue;
 201                          bh->b_count++;
 202                          bh->b_flushtime = 0;
 203                          ll_rw_block(WRITE, 1, &bh);
 204 
 205                          if(nlist != BUF_DIRTY) { 
 206                                  printk("[%d %s %ld] ", nlist,
 207                                         kdevname(bh->b_dev), bh->b_blocknr);
 208                                  ncount++;
 209                          };
 210                          bh->b_count--;
 211                          retry = 1;
 212                  }
 213          }
 214         if (ncount)
 215           printk("sys_sync: %d dirty buffers not on dirty list\n", ncount);
 216         
 217         /* If we are waiting for the sync to succeed, and if any dirty
 218            blocks were written, then repeat; on the second pass, only
 219            wait for buffers being written (do not pass to write any
 220            more buffers on the second pass). */
 221         if (wait && retry && ++pass<=2)
 222                  goto repeat;
 223         return err;
 224 }
 225 
 226 void sync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 227 {
 228         sync_buffers(dev, 0);
 229         sync_supers(dev);
 230         sync_inodes(dev);
 231         sync_buffers(dev, 0);
 232         sync_dquots(dev, -1);
 233 }
 234 
 235 int fsync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 236 {
 237         sync_buffers(dev, 0);
 238         sync_supers(dev);
 239         sync_inodes(dev);
 240         sync_dquots(dev, -1);
 241         return sync_buffers(dev, 1);
 242 }
 243 
 244 asmlinkage int sys_sync(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 245 {
 246         fsync_dev(0);
 247         return 0;
 248 }
 249 
 250 int file_fsync (struct inode *inode, struct file *filp)
     /* [previous][next][first][last][top][bottom][index][help] */
 251 {
 252         return fsync_dev(inode->i_dev);
 253 }
 254 
 255 asmlinkage int sys_fsync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 256 {
 257         struct file * file;
 258         struct inode * inode;
 259 
 260         if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
 261                 return -EBADF;
 262         if (!file->f_op || !file->f_op->fsync)
 263                 return -EINVAL;
 264         if (file->f_op->fsync(inode,file))
 265                 return -EIO;
 266         return 0;
 267 }
 268 
 269 asmlinkage int sys_fdatasync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 270 {
 271         struct file * file;
 272         struct inode * inode;
 273 
 274         if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
 275                 return -EBADF;
 276         if (!file->f_op || !file->f_op->fsync)
 277                 return -EINVAL;
 278         /* this needs further work, at the moment it is identical to fsync() */
 279         if (file->f_op->fsync(inode,file))
 280                 return -EIO;
 281         return 0;
 282 }
 283 
 284 void invalidate_buffers(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 285 {
 286         int i;
 287         int nlist;
 288         struct buffer_head * bh;
 289 
 290         for(nlist = 0; nlist < NR_LIST; nlist++) {
 291                 bh = lru_list[nlist];
 292                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bh->b_next_free) {
 293                         if (bh->b_dev != dev)
 294                                 continue;
 295                         wait_on_buffer(bh);
 296                         if (bh->b_dev != dev)
 297                                 continue;
 298                         if (bh->b_count)
 299                                 continue;
 300                         bh->b_flushtime = 0;
 301                         clear_bit(BH_Protected, &bh->b_state);
 302                         clear_bit(BH_Uptodate, &bh->b_state);
 303                         clear_bit(BH_Dirty, &bh->b_state);
 304                         clear_bit(BH_Req, &bh->b_state);
 305                 }
 306         }
 307 }
 308 
 309 #define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block))%nr_hash)
 310 #define hash(dev,block) hash_table[_hashfn(dev,block)]
 311 
 312 static inline void remove_from_hash_queue(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 313 {
 314         if (bh->b_next)
 315                 bh->b_next->b_prev = bh->b_prev;
 316         if (bh->b_prev)
 317                 bh->b_prev->b_next = bh->b_next;
 318         if (hash(bh->b_dev,bh->b_blocknr) == bh)
 319                 hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
 320         bh->b_next = bh->b_prev = NULL;
 321 }
 322 
 323 static inline void remove_from_lru_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 324 {
 325         if (!(bh->b_prev_free) || !(bh->b_next_free))
 326                 panic("VFS: LRU block list corrupted");
 327         if (bh->b_dev == B_FREE)
 328                 panic("LRU list corrupted");
 329         bh->b_prev_free->b_next_free = bh->b_next_free;
 330         bh->b_next_free->b_prev_free = bh->b_prev_free;
 331 
 332         if (lru_list[bh->b_list] == bh)
 333                  lru_list[bh->b_list] = bh->b_next_free;
 334         if (lru_list[bh->b_list] == bh)
 335                  lru_list[bh->b_list] = NULL;
 336         if (next_to_age[bh->b_list] == bh)
 337                 next_to_age[bh->b_list] = bh->b_next_free;
 338         if (next_to_age[bh->b_list] == bh)
 339                 next_to_age[bh->b_list] = NULL;
 340 
 341         bh->b_next_free = bh->b_prev_free = NULL;
 342 }
 343 
 344 static inline void remove_from_free_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 345 {
 346         int isize = BUFSIZE_INDEX(bh->b_size);
 347         if (!(bh->b_prev_free) || !(bh->b_next_free))
 348                 panic("VFS: Free block list corrupted");
 349         if(bh->b_dev != B_FREE)
 350                 panic("Free list corrupted");
 351         if(!free_list[isize])
 352                 panic("Free list empty");
 353         nr_free[isize]--;
 354         if(bh->b_next_free == bh)
 355                  free_list[isize] = NULL;
 356         else {
 357                 bh->b_prev_free->b_next_free = bh->b_next_free;
 358                 bh->b_next_free->b_prev_free = bh->b_prev_free;
 359                 if (free_list[isize] == bh)
 360                          free_list[isize] = bh->b_next_free;
 361         };
 362         bh->b_next_free = bh->b_prev_free = NULL;
 363 }
 364 
 365 static inline void remove_from_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 366 {
 367         if(bh->b_dev == B_FREE) {
 368                 remove_from_free_list(bh); /* Free list entries should not be
 369                                               in the hash queue */
 370                 return;
 371         };
 372         nr_buffers_type[bh->b_list]--;
 373         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]--;
 374         remove_from_hash_queue(bh);
 375         remove_from_lru_list(bh);
 376 }
 377 
 378 static inline void put_last_lru(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 379 {
 380         if (!bh)
 381                 return;
 382         if (bh == lru_list[bh->b_list]) {
 383                 lru_list[bh->b_list] = bh->b_next_free;
 384                 if (next_to_age[bh->b_list] == bh)
 385                         next_to_age[bh->b_list] = bh->b_next_free;
 386                 return;
 387         }
 388         if(bh->b_dev == B_FREE)
 389                 panic("Wrong block for lru list");
 390         remove_from_lru_list(bh);
 391 /* add to back of free list */
 392 
 393         if(!lru_list[bh->b_list]) {
 394                 lru_list[bh->b_list] = bh;
 395                 lru_list[bh->b_list]->b_prev_free = bh;
 396         };
 397         if (!next_to_age[bh->b_list])
 398                 next_to_age[bh->b_list] = bh;
 399 
 400         bh->b_next_free = lru_list[bh->b_list];
 401         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 402         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 403         lru_list[bh->b_list]->b_prev_free = bh;
 404 }
 405 
 406 static inline void put_last_free(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 407 {
 408         int isize;
 409         if (!bh)
 410                 return;
 411 
 412         isize = BUFSIZE_INDEX(bh->b_size);      
 413         bh->b_dev = B_FREE;  /* So it is obvious we are on the free list */
 414         /* add to back of free list */
 415         if(!free_list[isize]) {
 416                 free_list[isize] = bh;
 417                 bh->b_prev_free = bh;
 418         };
 419 
 420         nr_free[isize]++;
 421         bh->b_next_free = free_list[isize];
 422         bh->b_prev_free = free_list[isize]->b_prev_free;
 423         free_list[isize]->b_prev_free->b_next_free = bh;
 424         free_list[isize]->b_prev_free = bh;
 425 }
 426 
 427 static inline void insert_into_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 428 {
 429         /* put at end of free list */
 430         if(bh->b_dev == B_FREE) {
 431                 put_last_free(bh);
 432                 return;
 433         }
 434         if(!lru_list[bh->b_list]) {
 435                 lru_list[bh->b_list] = bh;
 436                 bh->b_prev_free = bh;
 437         }
 438         if (!next_to_age[bh->b_list])
 439                 next_to_age[bh->b_list] = bh;
 440         if (bh->b_next_free) panic("VFS: buffer LRU pointers corrupted");
 441         bh->b_next_free = lru_list[bh->b_list];
 442         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 443         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 444         lru_list[bh->b_list]->b_prev_free = bh;
 445         nr_buffers_type[bh->b_list]++;
 446         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]++;
 447 /* put the buffer in new hash-queue if it has a device */
 448         bh->b_prev = NULL;
 449         bh->b_next = NULL;
 450         if (!(bh->b_dev))
 451                 return;
 452         bh->b_next = hash(bh->b_dev,bh->b_blocknr);
 453         hash(bh->b_dev,bh->b_blocknr) = bh;
 454         if (bh->b_next)
 455                 bh->b_next->b_prev = bh;
 456 }
 457 
 458 static inline struct buffer_head * find_buffer(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 459 {               
 460         struct buffer_head * tmp;
 461 
 462         for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
 463                 if (tmp->b_blocknr == block && tmp->b_dev == dev)
 464                         if (tmp->b_size == size)
 465                                 return tmp;
 466                         else {
 467                                 printk("VFS: Wrong blocksize on device %s\n",
 468                                         kdevname(dev));
 469                                 return NULL;
 470                         }
 471         return NULL;
 472 }
 473 
 474 /*
 475  * Why like this, I hear you say... The reason is race-conditions.
 476  * As we don't lock buffers (unless we are reading them, that is),
 477  * something might happen to it while we sleep (ie a read-error
 478  * will force it bad). This shouldn't really happen currently, but
 479  * the code is ready.
 480  */
 481 struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 482 {
 483         struct buffer_head * bh;
 484 
 485         for (;;) {
 486                 if (!(bh=find_buffer(dev,block,size)))
 487                         return NULL;
 488                 bh->b_count++;
 489                 wait_on_buffer(bh);
 490                 if (bh->b_dev == dev && bh->b_blocknr == block
 491                                              && bh->b_size == size)
 492                         return bh;
 493                 bh->b_count--;
 494         }
 495 }
 496 
 497 void set_blocksize(kdev_t dev, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 498 {
 499         int i, nlist;
 500         struct buffer_head * bh, *bhnext;
 501 
 502         if (!blksize_size[MAJOR(dev)])
 503                 return;
 504 
 505         switch(size) {
 506                 default: panic("Invalid blocksize passed to set_blocksize");
 507                 case 512: case 1024: case 2048: case 4096:;
 508         }
 509 
 510         if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
 511                 blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 512                 return;
 513         }
 514         if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
 515                 return;
 516         sync_buffers(dev, 2);
 517         blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 518 
 519   /* We need to be quite careful how we do this - we are moving entries
 520      around on the free list, and we can get in a loop if we are not careful.*/
 521 
 522         for(nlist = 0; nlist < NR_LIST; nlist++) {
 523                 bh = lru_list[nlist];
 524                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) {
 525                         if(!bh) break;
 526                         bhnext = bh->b_next_free; 
 527                         if (bh->b_dev != dev)
 528                                  continue;
 529                         if (bh->b_size == size)
 530                                  continue;
 531                         
 532                         wait_on_buffer(bh);
 533                         if (bh->b_dev == dev && bh->b_size != size) {
 534                                 clear_bit(BH_Dirty, &bh->b_state);
 535                                 clear_bit(BH_Uptodate, &bh->b_state);
 536                                 clear_bit(BH_Req, &bh->b_state);
 537                                 bh->b_flushtime = 0;
 538                         }
 539                         remove_from_hash_queue(bh);
 540                 }
 541         }
 542 }
 543 
 544 #define BADNESS(bh) (buffer_dirty(bh) || buffer_locked(bh))
 545 
 546 void refill_freelist(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 547 {
 548         struct buffer_head * bh, * tmp;
 549         struct buffer_head * candidate[NR_LIST];
 550         unsigned int best_time, winner;
 551         int isize = BUFSIZE_INDEX(size);
 552         int buffers[NR_LIST];
 553         int i;
 554         int needed;
 555 
 556         /* First see if we even need this.  Sometimes it is advantageous
 557          to request some blocks in a filesystem that we know that we will
 558          be needing ahead of time. */
 559 
 560         if (nr_free[isize] > 100)
 561                 return;
 562 
 563         /* If there are too many dirty buffers, we wake up the update process
 564            now so as to ensure that there are still clean buffers available
 565            for user processes to use (and dirty) */
 566         
 567         /* We are going to try and locate this much memory */
 568         needed =bdf_prm.b_un.nrefill * size;  
 569 
 570         while (nr_free_pages > min_free_pages*2 && needed > 0 &&
 571                grow_buffers(GFP_BUFFER, size)) {
 572                 needed -= PAGE_SIZE;
 573         }
 574 
 575         if(needed <= 0) return;
 576 
 577         /* See if there are too many buffers of a different size.
 578            If so, victimize them */
 579 
 580         while(maybe_shrink_lav_buffers(size))
 581          {
 582                  if(!grow_buffers(GFP_BUFFER, size)) break;
 583                  needed -= PAGE_SIZE;
 584                  if(needed <= 0) return;
 585          };
 586 
 587         /* OK, we cannot grow the buffer cache, now try and get some
 588            from the lru list */
 589 
 590         /* First set the candidate pointers to usable buffers.  This
 591            should be quick nearly all of the time. */
 592 
 593 repeat0:
 594         for(i=0; i<NR_LIST; i++){
 595                 if(i == BUF_DIRTY || i == BUF_SHARED || 
 596                    nr_buffers_type[i] == 0) {
 597                         candidate[i] = NULL;
 598                         buffers[i] = 0;
 599                         continue;
 600                 }
 601                 buffers[i] = nr_buffers_type[i];
 602                 for (bh = lru_list[i]; buffers[i] > 0; bh = tmp, buffers[i]--)
 603                  {
 604                          if(buffers[i] < 0) panic("Here is the problem");
 605                          tmp = bh->b_next_free;
 606                          if (!bh) break;
 607                          
 608                          if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 609                              buffer_dirty(bh)) {
 610                                  refile_buffer(bh);
 611                                  continue;
 612                          }
 613                          
 614                          if (bh->b_count || buffer_protected(bh) || bh->b_size != size)
 615                                   continue;
 616                          
 617                          /* Buffers are written in the order they are placed 
 618                             on the locked list. If we encounter a locked
 619                             buffer here, this means that the rest of them
 620                             are also locked */
 621                          if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 622                                  buffers[i] = 0;
 623                                  break;
 624                          }
 625                          
 626                          if (BADNESS(bh)) continue;
 627                          break;
 628                  };
 629                 if(!buffers[i]) candidate[i] = NULL; /* Nothing on this list */
 630                 else candidate[i] = bh;
 631                 if(candidate[i] && candidate[i]->b_count) panic("Here is the problem");
 632         }
 633         
 634  repeat:
 635         if(needed <= 0) return;
 636         
 637         /* Now see which candidate wins the election */
 638         
 639         winner = best_time = UINT_MAX;  
 640         for(i=0; i<NR_LIST; i++){
 641                 if(!candidate[i]) continue;
 642                 if(candidate[i]->b_lru_time < best_time){
 643                         best_time = candidate[i]->b_lru_time;
 644                         winner = i;
 645                 }
 646         }
 647         
 648         /* If we have a winner, use it, and then get a new candidate from that list */
 649         if(winner != UINT_MAX) {
 650                 i = winner;
 651                 bh = candidate[i];
 652                 candidate[i] = bh->b_next_free;
 653                 if(candidate[i] == bh) candidate[i] = NULL;  /* Got last one */
 654                 if (bh->b_count || bh->b_size != size)
 655                          panic("Busy buffer in candidate list\n");
 656                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1)
 657                          panic("Shared buffer in candidate list\n");
 658                 if (buffer_protected(bh))
 659                         panic("Protected buffer in candidate list\n");
 660                 if (BADNESS(bh)) panic("Buffer in candidate list with BADNESS != 0\n");
 661                 
 662                 if(bh->b_dev == B_FREE)
 663                         panic("Wrong list");
 664                 remove_from_queues(bh);
 665                 bh->b_dev = B_FREE;
 666                 put_last_free(bh);
 667                 needed -= bh->b_size;
 668                 buffers[i]--;
 669                 if(buffers[i] < 0) panic("Here is the problem");
 670                 
 671                 if(buffers[i] == 0) candidate[i] = NULL;
 672                 
 673                 /* Now all we need to do is advance the candidate pointer
 674                    from the winner list to the next usable buffer */
 675                 if(candidate[i] && buffers[i] > 0){
 676                         if(buffers[i] <= 0) panic("Here is another problem");
 677                         for (bh = candidate[i]; buffers[i] > 0; bh = tmp, buffers[i]--) {
 678                                 if(buffers[i] < 0) panic("Here is the problem");
 679                                 tmp = bh->b_next_free;
 680                                 if (!bh) break;
 681                                 
 682                                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 683                                     buffer_dirty(bh)) {
 684                                         refile_buffer(bh);
 685                                         continue;
 686                                 };
 687                                 
 688                                 if (bh->b_count || buffer_protected(bh) || bh->b_size != size)
 689                                          continue;
 690                                 
 691                                 /* Buffers are written in the order they are
 692                                    placed on the locked list.  If we encounter
 693                                    a locked buffer here, this means that the
 694                                    rest of them are also locked */
 695                                 if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 696                                         buffers[i] = 0;
 697                                         break;
 698                                 }
 699               
 700                                 if (BADNESS(bh)) continue;
 701                                 break;
 702                         };
 703                         if(!buffers[i]) candidate[i] = NULL; /* Nothing here */
 704                         else candidate[i] = bh;
 705                         if(candidate[i] && candidate[i]->b_count) 
 706                                  panic("Here is the problem");
 707                 }
 708                 
 709                 goto repeat;
 710         }
 711         
 712         if(needed <= 0) return;
 713         
 714         /* Too bad, that was not enough. Try a little harder to grow some. */
 715         
 716         if (nr_free_pages > min_free_pages + 5) {
 717                 if (grow_buffers(GFP_BUFFER, size)) {
 718                         needed -= PAGE_SIZE;
 719                         goto repeat0;
 720                 };
 721         }
 722         
 723         /* and repeat until we find something good */
 724         if (!grow_buffers(GFP_ATOMIC, size))
 725                 wakeup_bdflush(1);
 726         needed -= PAGE_SIZE;
 727         goto repeat0;
 728 }
 729 
 730 /*
 731  * Ok, this is getblk, and it isn't very clear, again to hinder
 732  * race-conditions. Most of the code is seldom used, (ie repeating),
 733  * so it should be much more efficient than it looks.
 734  *
 735  * The algorithm is changed: hopefully better, and an elusive bug removed.
 736  *
 737  * 14.02.92: changed it to sync dirty buffers a bit: better performance
 738  * when the filesystem starts to get full of dirty blocks (I hope).
 739  */
 740 struct buffer_head * getblk(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 741 {
 742         struct buffer_head * bh;
 743         int isize = BUFSIZE_INDEX(size);
 744 
 745         /* Update this for the buffer size lav. */
 746         buffer_usage[isize]++;
 747 
 748         /* If there are too many dirty buffers, we wake up the update process
 749            now so as to ensure that there are still clean buffers available
 750            for user processes to use (and dirty) */
 751 repeat:
 752         bh = get_hash_table(dev, block, size);
 753         if (bh) {
 754                 if (!buffer_dirty(bh)) {
 755                         if (buffer_uptodate(bh))
 756                                  put_last_lru(bh);
 757                         bh->b_flushtime = 0;
 758                 }
 759                 set_bit(BH_Touched, &bh->b_state);
 760                 return bh;
 761         }
 762 
 763         while(!free_list[isize]) refill_freelist(size);
 764         
 765         if (find_buffer(dev,block,size))
 766                  goto repeat;
 767 
 768         bh = free_list[isize];
 769         remove_from_free_list(bh);
 770 
 771 /* OK, FINALLY we know that this buffer is the only one of its kind, */
 772 /* and that it's unused (b_count=0), unlocked (buffer_locked=0), and clean */
 773         bh->b_count=1;
 774         bh->b_flushtime=0;
 775         bh->b_state=(1<<BH_Touched);
 776         bh->b_dev=dev;
 777         bh->b_blocknr=block;
 778         insert_into_queues(bh);
 779         return bh;
 780 }
 781 
 782 void set_writetime(struct buffer_head * buf, int flag)
     /* [previous][next][first][last][top][bottom][index][help] */
 783 {
 784         int newtime;
 785 
 786         if (buffer_dirty(buf)) {
 787                 /* Move buffer to dirty list if jiffies is clear */
 788                 newtime = jiffies + (flag ? bdf_prm.b_un.age_super : 
 789                                      bdf_prm.b_un.age_buffer);
 790                 if(!buf->b_flushtime || buf->b_flushtime > newtime)
 791                          buf->b_flushtime = newtime;
 792         } else {
 793                 buf->b_flushtime = 0;
 794         }
 795 }
 796 
 797 
 798 void refile_buffer(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 799 {
 800         int dispose;
 801 
 802         if(buf->b_dev == B_FREE) {
 803                 printk("Attempt to refile free buffer\n");
 804                 return;
 805         }
 806         if (buffer_dirty(buf))
 807                 dispose = BUF_DIRTY;
 808         else if ((mem_map[MAP_NR((unsigned long) buf->b_data)].count > 1) || buffer_protected(buf))
 809                 dispose = BUF_SHARED;
 810         else if (buffer_locked(buf))
 811                 dispose = BUF_LOCKED;
 812         else if (buf->b_list == BUF_SHARED)
 813                 dispose = BUF_UNSHARED;
 814         else
 815                 dispose = BUF_CLEAN;
 816         if(dispose == BUF_CLEAN) buf->b_lru_time = jiffies;
 817         if(dispose != buf->b_list)  {
 818                 if(dispose == BUF_DIRTY || dispose == BUF_UNSHARED)
 819                          buf->b_lru_time = jiffies;
 820                 if(dispose == BUF_LOCKED && 
 821                    (buf->b_flushtime - buf->b_lru_time) <= bdf_prm.b_un.age_super)
 822                          dispose = BUF_LOCKED1;
 823                 remove_from_queues(buf);
 824                 buf->b_list = dispose;
 825                 insert_into_queues(buf);
 826                 if(dispose == BUF_DIRTY && nr_buffers_type[BUF_DIRTY] > 
 827                    (nr_buffers - nr_buffers_type[BUF_SHARED]) *
 828                    bdf_prm.b_un.nfract/100)
 829                          wakeup_bdflush(0);
 830         }
 831 }
 832 
 833 /*
 834  * Release a buffer head
 835  */
 836 void __brelse(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 837 {
 838         wait_on_buffer(buf);
 839 
 840         /* If dirty, mark the time this buffer should be written back */
 841         set_writetime(buf, 0);
 842         refile_buffer(buf);
 843 
 844         if (buf->b_count) {
 845                 if (!--buf->b_count)
 846                         wake_up(&buffer_wait);
 847                 return;
 848         }
 849         printk("VFS: brelse: Trying to free free buffer\n");
 850 }
 851 
 852 /*
 853  * bforget() is like brelse(), except it removes the buffer
 854  * from the hash-queues (so that it won't be re-used if it's
 855  * shared).
 856  */
 857 void __bforget(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 858 {
 859         wait_on_buffer(buf);
 860         mark_buffer_clean(buf);
 861         clear_bit(BH_Protected, &buf->b_state);
 862         buf->b_count--;
 863         remove_from_hash_queue(buf);
 864         buf->b_dev = NODEV;
 865         refile_buffer(buf);
 866         wake_up(&buffer_wait);
 867 }
 868 
 869 /*
 870  * bread() reads a specified block and returns the buffer that contains
 871  * it. It returns NULL if the block was unreadable.
 872  */
 873 struct buffer_head * bread(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 874 {
 875         struct buffer_head * bh;
 876 
 877         if (!(bh = getblk(dev, block, size))) {
 878                 printk("VFS: bread: READ error on device %s\n",
 879                         kdevname(dev));
 880                 return NULL;
 881         }
 882         if (buffer_uptodate(bh))
 883                 return bh;
 884         ll_rw_block(READ, 1, &bh);
 885         wait_on_buffer(bh);
 886         if (buffer_uptodate(bh))
 887                 return bh;
 888         brelse(bh);
 889         return NULL;
 890 }
 891 
 892 /*
 893  * Ok, breada can be used as bread, but additionally to mark other
 894  * blocks for reading as well. End the argument list with a negative
 895  * number.
 896  */
 897 
 898 #define NBUF 16
 899 
 900 struct buffer_head * breada(kdev_t dev, int block, int bufsize,
     /* [previous][next][first][last][top][bottom][index][help] */
 901         unsigned int pos, unsigned int filesize)
 902 {
 903         struct buffer_head * bhlist[NBUF];
 904         unsigned int blocks;
 905         struct buffer_head * bh;
 906         int index;
 907         int i, j;
 908 
 909         if (pos >= filesize)
 910                 return NULL;
 911 
 912         if (block < 0 || !(bh = getblk(dev,block,bufsize)))
 913                 return NULL;
 914 
 915         index = BUFSIZE_INDEX(bh->b_size);
 916 
 917         if (buffer_uptodate(bh))
 918                 return bh;
 919 
 920         blocks = ((filesize & (bufsize - 1)) - (pos & (bufsize - 1))) >> (9+index);
 921 
 922         if (blocks > (read_ahead[MAJOR(dev)] >> index))
 923                 blocks = read_ahead[MAJOR(dev)] >> index;
 924         if (blocks > NBUF)
 925                 blocks = NBUF;
 926         
 927         bhlist[0] = bh;
 928         j = 1;
 929         for(i=1; i<blocks; i++) {
 930                 bh = getblk(dev,block+i,bufsize);
 931                 if (buffer_uptodate(bh)) {
 932                         brelse(bh);
 933                         break;
 934                 }
 935                 bhlist[j++] = bh;
 936         }
 937 
 938         /* Request the read for these buffers, and then release them */
 939         ll_rw_block(READ, j, bhlist);
 940 
 941         for(i=1; i<j; i++)
 942                 brelse(bhlist[i]);
 943 
 944         /* Wait for this buffer, and then continue on */
 945         bh = bhlist[0];
 946         wait_on_buffer(bh);
 947         if (buffer_uptodate(bh))
 948                 return bh;
 949         brelse(bh);
 950         return NULL;
 951 }
 952 
 953 /*
 954  * See fs/inode.c for the weird use of volatile..
 955  */
 956 static void put_unused_buffer_head(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 957 {
 958         struct wait_queue * wait;
 959 
 960         wait = ((volatile struct buffer_head *) bh)->b_wait;
 961         memset(bh,0,sizeof(*bh));
 962         ((volatile struct buffer_head *) bh)->b_wait = wait;
 963         bh->b_next_free = unused_list;
 964         unused_list = bh;
 965 }
 966 
 967 static void get_more_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 968 {
 969         int i;
 970         struct buffer_head * bh;
 971 
 972         if (unused_list)
 973                 return;
 974 
 975         if (!(bh = (struct buffer_head*) get_free_page(GFP_KERNEL)))
 976                 return;
 977 
 978         for (nr_buffer_heads+=i=PAGE_SIZE/sizeof*bh ; i>0; i--) {
 979                 bh->b_next_free = unused_list;  /* only make link */
 980                 unused_list = bh++;
 981         }
 982 }
 983 
 984 /* 
 985  * We can't put completed temporary IO buffer_heads directly onto the
 986  * unused_list when they become unlocked, since the device driver
 987  * end_request routines still expect access to the buffer_head's
 988  * fields after the final unlock.  So, the device driver puts them on
 989  * the reuse_list instead once IO completes, and we recover these to
 990  * the unused_list here.
 991  *
 992  * The reuse_list receives buffers from interrupt routines, so we need
 993  * to be IRQ-safe here.
 994  */
 995 static inline void recover_reusable_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 996 {
 997         struct buffer_head *bh;
 998         unsigned long flags;
 999         
1000         save_flags(flags);
1001         cli();
1002         while (reuse_list) {
1003                 bh = reuse_list;
1004                 reuse_list = bh->b_next_free;
1005                 restore_flags(flags);
1006                 put_unused_buffer_head(bh);
1007                 cli();
1008         }
1009 }
1010 
1011 static struct buffer_head * get_unused_buffer_head(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1012 {
1013         struct buffer_head * bh;
1014 
1015         recover_reusable_buffer_heads();
1016         get_more_buffer_heads();
1017         if (!unused_list)
1018                 return NULL;
1019         bh = unused_list;
1020         unused_list = bh->b_next_free;
1021         bh->b_next_free = NULL;
1022         bh->b_data = NULL;
1023         bh->b_size = 0;
1024         bh->b_state = 0;
1025         return bh;
1026 }
1027 
1028 /*
1029  * Create the appropriate buffers when given a page for data area and
1030  * the size of each buffer.. Use the bh->b_this_page linked list to
1031  * follow the buffers created.  Return NULL if unable to create more
1032  * buffers.
1033  */
1034 static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
1035 {
1036         struct buffer_head *bh, *head;
1037         unsigned long offset;
1038 
1039         head = NULL;
1040         offset = PAGE_SIZE;
1041         while ((offset -= size) < PAGE_SIZE) {
1042                 bh = get_unused_buffer_head();
1043                 if (!bh)
1044                         goto no_grow;
1045                 bh->b_this_page = head;
1046                 head = bh;
1047                 bh->b_data = (char *) (page+offset);
1048                 bh->b_size = size;
1049                 bh->b_dev = B_FREE;  /* Flag as unused */
1050         }
1051         return head;
1052 /*
1053  * In case anything failed, we just free everything we got.
1054  */
1055 no_grow:
1056         bh = head;
1057         while (bh) {
1058                 head = bh;
1059                 bh = bh->b_this_page;
1060                 put_unused_buffer_head(head);
1061         }
1062         return NULL;
1063 }
1064 
1065 static int bread_page(unsigned long address, kdev_t dev, int b[], int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1066 {
1067         struct buffer_head *bh, *prev, *next, *arr[MAX_BUF_PER_PAGE];
1068         int block, nr;
1069         struct page *page;
1070 
1071         page = mem_map + MAP_NR(address);
1072         page->uptodate = 0;
1073         bh = create_buffers(address, size);
1074         if (!bh)
1075                 return -ENOMEM;
1076         nr = 0;
1077         next = bh;
1078         do {
1079                 struct buffer_head * tmp;
1080                 block = *(b++);
1081 
1082                 set_bit(BH_FreeOnIO, &next->b_state);
1083                 next->b_list = BUF_CLEAN;
1084                 next->b_dev = dev;
1085                 next->b_blocknr = block;
1086                 next->b_count = 1;
1087                 next->b_flushtime = 0;
1088                 clear_bit(BH_Dirty, &next->b_state);
1089                 clear_bit(BH_Req, &next->b_state);
1090                 set_bit(BH_Uptodate, &next->b_state);
1091                 
1092                 if (!block) {
1093                         memset(next->b_data, 0, size);
1094                         continue;
1095                 }
1096                 tmp = get_hash_table(dev, block, size);
1097                 if (tmp) {
1098                         if (!buffer_uptodate(tmp)) {
1099                                 ll_rw_block(READ, 1, &tmp);
1100                                 wait_on_buffer(tmp);
1101                         }
1102                         memcpy(next->b_data, tmp->b_data, size);
1103                         brelse(tmp);
1104                         continue;
1105                 }
1106                 clear_bit(BH_Uptodate, &next->b_state);
1107                 arr[nr++] = next;
1108         } while (prev = next, (next = next->b_this_page) != NULL);
1109         prev->b_this_page = bh;
1110         
1111         if (nr)
1112                 ll_rw_block(READ, nr, arr);
1113         else {
1114                 page->locked = 0;
1115                 page->uptodate = 1;
1116                 wake_up(&page->wait);
1117         }
1118         ++current->maj_flt;
1119         return 0;
1120 }
1121 
1122 void mark_buffer_uptodate(struct buffer_head * bh, int on)
     /* [previous][next][first][last][top][bottom][index][help] */
1123 {
1124         if (on) {
1125                 struct buffer_head *tmp = bh;
1126                 int page_uptodate = 1;
1127                 set_bit(BH_Uptodate, &bh->b_state);
1128                 do {
1129                         if (!test_bit(BH_Uptodate, &tmp->b_state)) {
1130                                 page_uptodate = 0;
1131                                 break;
1132                         }
1133                         tmp=tmp->b_this_page;
1134                 } while (tmp && tmp != bh);
1135                 if (page_uptodate)
1136                         mem_map[MAP_NR(bh->b_data)].uptodate = 1;
1137         } else
1138                 clear_bit(BH_Uptodate, &bh->b_state);
1139 }
1140 
1141 /*
1142  * Generic "readpage" function for block devices that have the normal
1143  * bmap functionality. This is most of the block device filesystems.
1144  * Reads the page asynchronously --- the unlock_buffer() and
1145  * mark_buffer_uptodate() functions propogate buffer state into the
1146  * page struct once IO has completed.
1147  */
1148 int generic_readpage(struct inode * inode, struct page * page)
     /* [previous][next][first][last][top][bottom][index][help] */
1149 {
1150         unsigned long block, address;
1151         int *p, nr[PAGE_SIZE/512];
1152         int i;
1153 
1154         wait_on_page(page);
1155         page->locked = 1;
1156         
1157         i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
1158         block = page->offset >> inode->i_sb->s_blocksize_bits;
1159         p = nr;
1160         do {
1161                 *p = inode->i_op->bmap(inode, block);
1162                 i--;
1163                 block++;
1164                 p++;
1165         } while (i > 0);
1166 
1167         /* IO start */
1168         page->count++;
1169         address = page_address(page);
1170         bread_page(address, inode->i_dev, nr, inode->i_sb->s_blocksize);
1171         free_page(address);
1172         return 0;
1173 }
1174 
1175 #if 0
1176 /*
1177  * bwrite_page writes a page out to the buffer cache and/or the physical device.
1178  * It's used for mmap writes (the same way bread_page() is used for mmap reads).
1179  */
1180 void bwrite_page(unsigned long address, kdev_t dev, int b[], int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1181 {
1182         struct buffer_head * bh[MAX_BUF_PER_PAGE];
1183         int i, j;
1184 
1185         for (i=0, j=0; j<PAGE_SIZE ; i++, j+= size) {
1186                 bh[i] = NULL;
1187                 if (b[i])
1188                         bh[i] = getblk(dev, b[i], size);
1189         }
1190         for (i=0, j=0; j<PAGE_SIZE ; i++, j += size, address += size) {
1191                 if (bh[i]) {
1192                         memcpy(bh[i]->b_data, (void *) address, size);
1193                         mark_buffer_uptodate(bh[i], 1);
1194                         mark_buffer_dirty(bh[i], 0);
1195                         brelse(bh[i]);
1196                 } else
1197                         memset((void *) address, 0, size); /* ???!?!! */
1198         }       
1199 }
1200 #endif
1201 
1202 /*
1203  * Try to increase the number of buffers available: the size argument
1204  * is used to determine what kind of buffers we want.
1205  */
1206 static int grow_buffers(int pri, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1207 {
1208         unsigned long page;
1209         struct buffer_head *bh, *tmp;
1210         struct buffer_head * insert_point;
1211         int isize;
1212 
1213         if ((size & 511) || (size > PAGE_SIZE)) {
1214                 printk("VFS: grow_buffers: size = %d\n",size);
1215                 return 0;
1216         }
1217 
1218         isize = BUFSIZE_INDEX(size);
1219 
1220         if (!(page = __get_free_page(pri)))
1221                 return 0;
1222         bh = create_buffers(page, size);
1223         if (!bh) {
1224                 free_page(page);
1225                 return 0;
1226         }
1227 
1228         insert_point = free_list[isize];
1229 
1230         tmp = bh;
1231         while (1) {
1232                 nr_free[isize]++;
1233                 if (insert_point) {
1234                         tmp->b_next_free = insert_point->b_next_free;
1235                         tmp->b_prev_free = insert_point;
1236                         insert_point->b_next_free->b_prev_free = tmp;
1237                         insert_point->b_next_free = tmp;
1238                 } else {
1239                         tmp->b_prev_free = tmp;
1240                         tmp->b_next_free = tmp;
1241                 }
1242                 insert_point = tmp;
1243                 ++nr_buffers;
1244                 if (tmp->b_this_page)
1245                         tmp = tmp->b_this_page;
1246                 else
1247                         break;
1248         }
1249         free_list[isize] = bh;
1250         buffer_pages[MAP_NR(page)] = bh;
1251         tmp->b_this_page = bh;
1252         wake_up(&buffer_wait);
1253         buffermem += PAGE_SIZE;
1254         return 1;
1255 }
1256 
1257 
1258 /* =========== Reduce the buffer memory ============= */
1259 
1260 /*
1261  * try_to_free_buffer() checks if all the buffers on this particular page
1262  * are unused, and free's the page if so.
1263  */
1264 int try_to_free_buffer(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1265                        int priority)
1266 {
1267         unsigned long page;
1268         struct buffer_head * tmp, * p;
1269         int isize = BUFSIZE_INDEX(bh->b_size);
1270 
1271         *bhp = bh;
1272         page = (unsigned long) bh->b_data;
1273         page &= PAGE_MASK;
1274         tmp = bh;
1275         do {
1276                 if (!tmp)
1277                         return 0;
1278                 if (tmp->b_count || buffer_protected(tmp) ||
1279                     buffer_dirty(tmp) || buffer_locked(tmp) || tmp->b_wait)
1280                         return 0;
1281                 if (priority && buffer_touched(tmp))
1282                         return 0;
1283                 tmp = tmp->b_this_page;
1284         } while (tmp != bh);
1285         tmp = bh;
1286         do {
1287                 p = tmp;
1288                 tmp = tmp->b_this_page;
1289                 nr_buffers--;
1290                 nr_buffers_size[isize]--;
1291                 if (p == *bhp)
1292                   {
1293                     *bhp = p->b_prev_free;
1294                     if (p == *bhp) /* Was this the last in the list? */
1295                       *bhp = NULL;
1296                   }
1297                 remove_from_queues(p);
1298                 put_unused_buffer_head(p);
1299         } while (tmp != bh);
1300         buffermem -= PAGE_SIZE;
1301         buffer_pages[MAP_NR(page)] = NULL;
1302         free_page(page);
1303         return !mem_map[MAP_NR(page)].count;
1304 }
1305 
1306 /* Age buffers on a given page, according to whether they have been
1307    visited recently or not. */
1308 static inline void age_buffer(struct buffer_head *bh)
     /* [previous][next][first][last][top][bottom][index][help] */
1309 {
1310         struct buffer_head *tmp = bh;
1311         int touched = 0;
1312 
1313         /*
1314          * When we age a page, we mark all other buffers in the page
1315          * with the "has_aged" flag.  Then, when these aliased buffers
1316          * come up for aging, we skip them until next pass.  This
1317          * ensures that a page full of multiple buffers only gets aged
1318          * once per pass through the lru lists. 
1319          */
1320         if (clear_bit(BH_Has_aged, &bh->b_state))
1321                 return;
1322         
1323         do {
1324                 touched |= clear_bit(BH_Touched, &tmp->b_state);
1325                 tmp = tmp->b_this_page;
1326                 set_bit(BH_Has_aged, &tmp->b_state);
1327         } while (tmp != bh);
1328         clear_bit(BH_Has_aged, &bh->b_state);
1329 
1330         if (touched) 
1331                 touch_page(mem_map + MAP_NR((unsigned long) bh->b_data));
1332         else
1333                 age_page(mem_map + MAP_NR((unsigned long) bh->b_data));
1334 }
1335 
1336 /*
1337  * Consult the load average for buffers and decide whether or not
1338  * we should shrink the buffers of one size or not.  If we decide yes,
1339  * do it and return 1.  Else return 0.  Do not attempt to shrink size
1340  * that is specified.
1341  *
1342  * I would prefer not to use a load average, but the way things are now it
1343  * seems unavoidable.  The way to get rid of it would be to force clustering
1344  * universally, so that when we reclaim buffers we always reclaim an entire
1345  * page.  Doing this would mean that we all need to move towards QMAGIC.
1346  */
1347 
1348 static int maybe_shrink_lav_buffers(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1349 {          
1350         int nlist;
1351         int isize;
1352         int total_lav, total_n_buffers, n_sizes;
1353         
1354         /* Do not consider the shared buffers since they would not tend
1355            to have getblk called very often, and this would throw off
1356            the lav.  They are not easily reclaimable anyway (let the swapper
1357            make the first move). */
1358   
1359         total_lav = total_n_buffers = n_sizes = 0;
1360         for(nlist = 0; nlist < NR_SIZES; nlist++)
1361          {
1362                  total_lav += buffers_lav[nlist];
1363                  if(nr_buffers_size[nlist]) n_sizes++;
1364                  total_n_buffers += nr_buffers_size[nlist];
1365                  total_n_buffers -= nr_buffers_st[nlist][BUF_SHARED]; 
1366          }
1367         
1368         /* See if we have an excessive number of buffers of a particular
1369            size - if so, victimize that bunch. */
1370   
1371         isize = (size ? BUFSIZE_INDEX(size) : -1);
1372         
1373         if (n_sizes > 1)
1374                  for(nlist = 0; nlist < NR_SIZES; nlist++)
1375                   {
1376                           if(nlist == isize) continue;
1377                           if(nr_buffers_size[nlist] &&
1378                              bdf_prm.b_un.lav_const * buffers_lav[nlist]*total_n_buffers < 
1379                              total_lav * (nr_buffers_size[nlist] - nr_buffers_st[nlist][BUF_SHARED]))
1380                                    if(shrink_specific_buffers(6, bufferindex_size[nlist])) 
1381                                             return 1;
1382                   }
1383         return 0;
1384 }
1385 
1386 /*
1387  * Try to free up some pages by shrinking the buffer-cache
1388  *
1389  * Priority tells the routine how hard to try to shrink the
1390  * buffers: 6 means "don't bother too much", while a value
1391  * of 0 means "we'd better get some free pages now".
1392  *
1393  * "limit" is meant to limit the shrink-action only to pages
1394  * that are in the 0 - limit address range, for DMA re-allocations.
1395  * We ignore that right now.
1396  */
1397 
1398 static int shrink_specific_buffers(unsigned int priority, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1399 {
1400         struct buffer_head *bh;
1401         int nlist;
1402         int i, isize, isize1;
1403 
1404 #ifdef DEBUG
1405         if(size) printk("Shrinking buffers of size %d\n", size);
1406 #endif
1407         /* First try the free lists, and see if we can get a complete page
1408            from here */
1409         isize1 = (size ? BUFSIZE_INDEX(size) : -1);
1410 
1411         for(isize = 0; isize<NR_SIZES; isize++){
1412                 if(isize1 != -1 && isize1 != isize) continue;
1413                 bh = free_list[isize];
1414                 if(!bh) continue;
1415                 for (i=0 ; !i || bh != free_list[isize]; bh = bh->b_next_free, i++) {
1416                         if (bh->b_count || buffer_protected(bh) ||
1417                             !bh->b_this_page)
1418                                  continue;
1419                         if (!age_of((unsigned long) bh->b_data) &&
1420                             try_to_free_buffer(bh, &bh, 6))
1421                                  return 1;
1422                         if(!bh) break;
1423                         /* Some interrupt must have used it after we
1424                            freed the page.  No big deal - keep looking */
1425                 }
1426         }
1427         
1428         /* Not enough in the free lists, now try the lru list */
1429         
1430         for(nlist = 0; nlist < NR_LIST; nlist++) {
1431         repeat1:
1432                 if(priority > 2 && nlist == BUF_SHARED) continue;
1433                 i = nr_buffers_type[nlist];
1434                 i = ((BUFFEROUT_WEIGHT * i) >> 10) >> priority;
1435                 for ( ; i > 0; i-- ) {
1436                         bh = next_to_age[nlist];
1437                         if (!bh)
1438                                 break;
1439                         next_to_age[nlist] = bh->b_next_free;
1440 
1441                         /* First, age the buffer. */
1442                         age_buffer(bh);
1443                         /* We may have stalled while waiting for I/O
1444                            to complete. */
1445                         if(bh->b_list != nlist) goto repeat1;
1446                         if (bh->b_count || buffer_protected(bh) ||
1447                             !bh->b_this_page)
1448                                  continue;
1449                         if(size && bh->b_size != size) continue;
1450                         if (buffer_locked(bh))
1451                                  if (priority)
1452                                           continue;
1453                                  else
1454                                           wait_on_buffer(bh);
1455                         if (buffer_dirty(bh)) {
1456                                 bh->b_count++;
1457                                 bh->b_flushtime = 0;
1458                                 ll_rw_block(WRITEA, 1, &bh);
1459                                 bh->b_count--;
1460                                 continue;
1461                         }
1462                         /* At priority 6, only consider really old
1463                            (age==0) buffers for reclaiming.  At
1464                            priority 0, consider any buffers. */
1465                         if ((age_of((unsigned long) bh->b_data) >>
1466                              (6-priority)) > 0)
1467                                 continue;                               
1468                         if (try_to_free_buffer(bh, &bh, 0))
1469                                  return 1;
1470                         if(!bh) break;
1471                 }
1472         }
1473         return 0;
1474 }
1475 
1476 
1477 /* ================== Debugging =================== */
1478 
1479 void show_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1480 {
1481         struct buffer_head * bh;
1482         int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
1483         int protected = 0;
1484         int shared;
1485         int nlist, isize;
1486 
1487         printk("Buffer memory:   %6dkB\n",buffermem>>10);
1488         printk("Buffer heads:    %6d\n",nr_buffer_heads);
1489         printk("Buffer blocks:   %6d\n",nr_buffers);
1490 
1491         for(nlist = 0; nlist < NR_LIST; nlist++) {
1492           shared = found = locked = dirty = used = lastused = protected = 0;
1493           bh = lru_list[nlist];
1494           if(!bh) continue;
1495           do {
1496                 found++;
1497                 if (buffer_locked(bh))
1498                         locked++;
1499                 if (buffer_protected(bh))
1500                         protected++;
1501                 if (buffer_dirty(bh))
1502                         dirty++;
1503                 if(mem_map[MAP_NR(((unsigned long) bh->b_data))].count !=1) shared++;
1504                 if (bh->b_count)
1505                         used++, lastused = found;
1506                 bh = bh->b_next_free;
1507               } while (bh != lru_list[nlist]);
1508         printk("Buffer[%d] mem: %d buffers, %d used (last=%d), %d locked, "
1509                "%d protected, %d dirty %d shrd\n",
1510                 nlist, found, used, lastused, locked, protected, dirty, shared);
1511         };
1512         printk("Size    [LAV]     Free  Clean  Unshar     Lck    Lck1   Dirty  Shared \n");
1513         for(isize = 0; isize<NR_SIZES; isize++){
1514                 printk("%5d [%5d]: %7d ", bufferindex_size[isize],
1515                        buffers_lav[isize], nr_free[isize]);
1516                 for(nlist = 0; nlist < NR_LIST; nlist++)
1517                          printk("%7d ", nr_buffers_st[isize][nlist]);
1518                 printk("\n");
1519         }
1520 }
1521 
1522 
1523 /* ====================== Cluster patches for ext2 ==================== */
1524 
1525 /*
1526  * try_to_reassign() checks if all the buffers on this particular page
1527  * are unused, and reassign to a new cluster them if this is true.
1528  */
1529 static inline int try_to_reassign(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1530                            kdev_t dev, unsigned int starting_block)
1531 {
1532         unsigned long page;
1533         struct buffer_head * tmp, * p;
1534 
1535         *bhp = bh;
1536         page = (unsigned long) bh->b_data;
1537         page &= PAGE_MASK;
1538         if(mem_map[MAP_NR(page)].count != 1) return 0;
1539         tmp = bh;
1540         do {
1541                 if (!tmp)
1542                          return 0;
1543                 
1544                 if (tmp->b_count || buffer_protected(tmp) ||
1545                     buffer_dirty(tmp) || buffer_locked(tmp))
1546                          return 0;
1547                 tmp = tmp->b_this_page;
1548         } while (tmp != bh);
1549         tmp = bh;
1550         
1551         while((unsigned long) tmp->b_data & (PAGE_SIZE - 1)) 
1552                  tmp = tmp->b_this_page;
1553         
1554         /* This is the buffer at the head of the page */
1555         bh = tmp;
1556         do {
1557                 p = tmp;
1558                 tmp = tmp->b_this_page;
1559                 remove_from_queues(p);
1560                 p->b_dev = dev;
1561                 mark_buffer_uptodate(p, 0);
1562                 clear_bit(BH_Req, &p->b_state);
1563                 p->b_blocknr = starting_block++;
1564                 insert_into_queues(p);
1565         } while (tmp != bh);
1566         return 1;
1567 }
1568 
1569 /*
1570  * Try to find a free cluster by locating a page where
1571  * all of the buffers are unused.  We would like this function
1572  * to be atomic, so we do not call anything that might cause
1573  * the process to sleep.  The priority is somewhat similar to
1574  * the priority used in shrink_buffers.
1575  * 
1576  * My thinking is that the kernel should end up using whole
1577  * pages for the buffer cache as much of the time as possible.
1578  * This way the other buffers on a particular page are likely
1579  * to be very near each other on the free list, and we will not
1580  * be expiring data prematurely.  For now we only cannibalize buffers
1581  * of the same size to keep the code simpler.
1582  */
1583 static int reassign_cluster(kdev_t dev, 
     /* [previous][next][first][last][top][bottom][index][help] */
1584                      unsigned int starting_block, int size)
1585 {
1586         struct buffer_head *bh;
1587         int isize = BUFSIZE_INDEX(size);
1588         int i;
1589 
1590         /* We want to give ourselves a really good shot at generating
1591            a cluster, and since we only take buffers from the free
1592            list, we "overfill" it a little. */
1593 
1594         while(nr_free[isize] < 32) refill_freelist(size);
1595 
1596         bh = free_list[isize];
1597         if(bh)
1598                  for (i=0 ; !i || bh != free_list[isize] ; bh = bh->b_next_free, i++) {
1599                          if (!bh->b_this_page)  continue;
1600                          if (try_to_reassign(bh, &bh, dev, starting_block))
1601                                  return 4;
1602                  }
1603         return 0;
1604 }
1605 
1606 /* This function tries to generate a new cluster of buffers
1607  * from a new page in memory.  We should only do this if we have
1608  * not expanded the buffer cache to the maximum size that we allow.
1609  */
1610 static unsigned long try_to_generate_cluster(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1611 {
1612         struct buffer_head * bh, * tmp, * arr[MAX_BUF_PER_PAGE];
1613         int isize = BUFSIZE_INDEX(size);
1614         unsigned long offset;
1615         unsigned long page;
1616         int nblock;
1617 
1618         page = get_free_page(GFP_NOBUFFER);
1619         if(!page) return 0;
1620 
1621         bh = create_buffers(page, size);
1622         if (!bh) {
1623                 free_page(page);
1624                 return 0;
1625         };
1626         nblock = block;
1627         for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
1628                 if (find_buffer(dev, nblock++, size))
1629                          goto not_aligned;
1630         }
1631         tmp = bh;
1632         nblock = 0;
1633         while (1) {
1634                 arr[nblock++] = bh;
1635                 bh->b_count = 1;
1636                 bh->b_flushtime = 0;
1637                 bh->b_state = 0;
1638                 bh->b_dev = dev;
1639                 bh->b_list = BUF_CLEAN;
1640                 bh->b_blocknr = block++;
1641                 nr_buffers++;
1642                 nr_buffers_size[isize]++;
1643                 insert_into_queues(bh);
1644                 if (bh->b_this_page)
1645                         bh = bh->b_this_page;
1646                 else
1647                         break;
1648         }
1649         buffermem += PAGE_SIZE;
1650         buffer_pages[MAP_NR(page)] = bh;
1651         bh->b_this_page = tmp;
1652         while (nblock-- > 0)
1653                 brelse(arr[nblock]);
1654         return 4; /* ?? */
1655 not_aligned:
1656         while ((tmp = bh) != NULL) {
1657                 bh = bh->b_this_page;
1658                 put_unused_buffer_head(tmp);
1659         }
1660         free_page(page);
1661         return 0;
1662 }
1663 
1664 unsigned long generate_cluster(kdev_t dev, int b[], int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1665 {
1666         int i, offset;
1667         
1668         for (i = 0, offset = 0 ; offset < PAGE_SIZE ; i++, offset += size) {
1669                 if(i && b[i]-1 != b[i-1]) return 0;  /* No need to cluster */
1670                 if(find_buffer(dev, b[i], size)) return 0;
1671         };
1672 
1673         /* OK, we have a candidate for a new cluster */
1674         
1675         /* See if one size of buffer is over-represented in the buffer cache,
1676            if so reduce the numbers of buffers */
1677         if(maybe_shrink_lav_buffers(size))
1678          {
1679                  int retval;
1680                  retval = try_to_generate_cluster(dev, b[0], size);
1681                  if(retval) return retval;
1682          };
1683         
1684         if (nr_free_pages > min_free_pages*2) 
1685                  return try_to_generate_cluster(dev, b[0], size);
1686         else
1687                  return reassign_cluster(dev, b[0], size);
1688 }
1689 
1690 
1691 /* ===================== Init ======================= */
1692 
1693 /*
1694  * This initializes the initial buffer free list.  nr_buffers_type is set
1695  * to one less the actual number of buffers, as a sop to backwards
1696  * compatibility --- the old code did this (I think unintentionally,
1697  * but I'm not sure), and programs in the ps package expect it.
1698  *                                      - TYT 8/30/92
1699  */
1700 void buffer_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1701 {
1702         int i;
1703         int isize = BUFSIZE_INDEX(BLOCK_SIZE);
1704         long memsize = MAP_NR(high_memory) << PAGE_SHIFT;
1705 
1706         if (memsize >= 4*1024*1024) {
1707                 if(memsize >= 16*1024*1024)
1708                          nr_hash = 16381;
1709                 else
1710                          nr_hash = 4093;
1711         } else {
1712                 nr_hash = 997;
1713         };
1714         
1715         hash_table = (struct buffer_head **) vmalloc(nr_hash * 
1716                                                      sizeof(struct buffer_head *));
1717 
1718 
1719         buffer_pages = (struct buffer_head **) vmalloc(MAP_NR(high_memory) * 
1720                                                      sizeof(struct buffer_head *));
1721         for (i = 0 ; i < MAP_NR(high_memory) ; i++)
1722                 buffer_pages[i] = NULL;
1723 
1724         for (i = 0 ; i < nr_hash ; i++)
1725                 hash_table[i] = NULL;
1726         lru_list[BUF_CLEAN] = 0;
1727         grow_buffers(GFP_KERNEL, BLOCK_SIZE);
1728         if (!free_list[isize])
1729                 panic("VFS: Unable to initialize buffer free list!");
1730         return;
1731 }
1732 
1733 
1734 /* ====================== bdflush support =================== */
1735 
1736 /* This is a simple kernel daemon, whose job it is to provide a dynamic
1737  * response to dirty buffers.  Once this process is activated, we write back
1738  * a limited number of buffers to the disks and then go back to sleep again.
1739  */
1740 struct wait_queue * bdflush_wait = NULL;
1741 struct wait_queue * bdflush_done = NULL;
1742 
1743 static void wakeup_bdflush(int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
1744 {
1745         wake_up(&bdflush_wait);
1746         if(wait) sleep_on(&bdflush_done);
1747 }
1748 
1749 
1750 /* 
1751  * Here we attempt to write back old buffers.  We also try and flush inodes 
1752  * and supers as well, since this function is essentially "update", and 
1753  * otherwise there would be no way of ensuring that these quantities ever 
1754  * get written back.  Ideally, we would have a timestamp on the inodes
1755  * and superblocks so that we could write back only the old ones as well
1756  */
1757 
1758 asmlinkage int sync_old_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1759 {
1760         int i, isize;
1761         int ndirty, nwritten;
1762         int nlist;
1763         int ncount;
1764         struct buffer_head * bh, *next;
1765 
1766         sync_supers(0);
1767         sync_inodes(0);
1768 
1769         ncount = 0;
1770 #ifdef DEBUG
1771         for(nlist = 0; nlist < NR_LIST; nlist++)
1772 #else
1773         for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1774 #endif
1775         {
1776                 ndirty = 0;
1777                 nwritten = 0;
1778         repeat:
1779                 bh = lru_list[nlist];
1780                 if(bh) 
1781                          for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) {
1782                                  /* We may have stalled while waiting for I/O to complete. */
1783                                  if(bh->b_list != nlist) goto repeat;
1784                                  next = bh->b_next_free;
1785                                  if(!lru_list[nlist]) {
1786                                          printk("Dirty list empty %d\n", i);
1787                                          break;
1788                                  }
1789                                  
1790                                  /* Clean buffer on dirty list?  Refile it */
1791                                  if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
1792                                   {
1793                                           refile_buffer(bh);
1794                                           continue;
1795                                   }
1796                                  
1797                                  if (buffer_locked(bh) || !buffer_dirty(bh))
1798                                           continue;
1799                                  ndirty++;
1800                                  if(bh->b_flushtime > jiffies) continue;
1801                                  nwritten++;
1802                                  bh->b_count++;
1803                                  bh->b_flushtime = 0;
1804 #ifdef DEBUG
1805                                  if(nlist != BUF_DIRTY) ncount++;
1806 #endif
1807                                  ll_rw_block(WRITE, 1, &bh);
1808                                  bh->b_count--;
1809                          }
1810         }
1811 #ifdef DEBUG
1812         if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount);
1813         printk("Wrote %d/%d buffers\n", nwritten, ndirty);
1814 #endif
1815         
1816         /* We assume that we only come through here on a regular
1817            schedule, like every 5 seconds.  Now update load averages.  
1818            Shift usage counts to prevent overflow. */
1819         for(isize = 0; isize<NR_SIZES; isize++){
1820                 CALC_LOAD(buffers_lav[isize], bdf_prm.b_un.lav_const, buffer_usage[isize]);
1821                 buffer_usage[isize] = 0;
1822         };
1823         return 0;
1824 }
1825 
1826 
1827 /* This is the interface to bdflush.  As we get more sophisticated, we can
1828  * pass tuning parameters to this "process", to adjust how it behaves. 
1829  * We would want to verify each parameter, however, to make sure that it 
1830  * is reasonable. */
1831 
1832 asmlinkage int sys_bdflush(int func, long data)
     /* [previous][next][first][last][top][bottom][index][help] */
1833 {
1834         int i, error;
1835 
1836         if (!suser())
1837                 return -EPERM;
1838 
1839         if (func == 1)
1840                  return sync_old_buffers();
1841 
1842         /* Basically func 1 means read param 1, 2 means write param 1, etc */
1843         if (func >= 2) {
1844                 i = (func-2) >> 1;
1845                 if (i < 0 || i >= N_PARAM)
1846                         return -EINVAL;
1847                 if((func & 1) == 0) {
1848                         error = verify_area(VERIFY_WRITE, (void *) data, sizeof(int));
1849                         if (error)
1850                                 return error;
1851                         put_user(bdf_prm.data[i], (int*)data);
1852                         return 0;
1853                 };
1854                 if (data < bdflush_min[i] || data > bdflush_max[i])
1855                         return -EINVAL;
1856                 bdf_prm.data[i] = data;
1857                 return 0;
1858         };
1859 
1860         /* Having func 0 used to launch the actual bdflush and then never
1861         return (unless explicitly killed). We return zero here to 
1862         remain semi-compatible with present update(8) programs. */
1863 
1864         return 0;
1865 }
1866 
1867 /* This is the actual bdflush daemon itself. It used to be started from
1868  * the syscall above, but now we launch it ourselves internally with
1869  * kernel_thread(...)  directly after the first thread in init/main.c */
1870 
1871 int bdflush(void * unused) 
     /* [previous][next][first][last][top][bottom][index][help] */
1872 {
1873         int i;
1874         int ndirty;
1875         int nlist;
1876         int ncount;
1877         struct buffer_head * bh, *next;
1878 
1879         /*
1880          *      We have a bare-bones task_struct, and really should fill
1881          *      in a few more things so "top" and /proc/2/{exe,root,cwd}
1882          *      display semi-sane things. Not real crucial though...  
1883          */
1884 
1885         current->session = 1;
1886         current->pgrp = 1;
1887         sprintf(current->comm, "kflushd");
1888 
1889         /*
1890          *      As a kernel thread we want to tamper with system buffers
1891          *      and other internals and thus be subject to the SMP locking
1892          *      rules. (On a uniprocessor box this does nothing).
1893          */
1894          
1895 #ifdef __SMP__
1896         lock_kernel();
1897         syscall_count++;
1898 #endif
1899                  
1900         for (;;) {
1901 #ifdef DEBUG
1902                 printk("bdflush() activated...");
1903 #endif
1904                 
1905                 ncount = 0;
1906 #ifdef DEBUG
1907                 for(nlist = 0; nlist < NR_LIST; nlist++)
1908 #else
1909                 for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1910 #endif
1911                  {
1912                          ndirty = 0;
1913                  repeat:
1914                          bh = lru_list[nlist];
1915                          if(bh) 
1916                                   for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty; 
1917                                        bh = next) {
1918                                           /* We may have stalled while waiting for I/O to complete. */
1919                                           if(bh->b_list != nlist) goto repeat;
1920                                           next = bh->b_next_free;
1921                                           if(!lru_list[nlist]) {
1922                                                   printk("Dirty list empty %d\n", i);
1923                                                   break;
1924                                           }
1925                                           
1926                                           /* Clean buffer on dirty list?  Refile it */
1927                                           if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
1928                                            {
1929                                                    refile_buffer(bh);
1930                                                    continue;
1931                                            }
1932                                           
1933                                           if (buffer_locked(bh) || !buffer_dirty(bh))
1934                                                    continue;
1935                                           /* Should we write back buffers that are shared or not??
1936                                              currently dirty buffers are not shared, so it does not matter */
1937                                           bh->b_count++;
1938                                           ndirty++;
1939                                           bh->b_flushtime = 0;
1940                                           ll_rw_block(WRITE, 1, &bh);
1941 #ifdef DEBUG
1942                                           if(nlist != BUF_DIRTY) ncount++;
1943 #endif
1944                                           bh->b_count--;
1945                                   }
1946                  }
1947 #ifdef DEBUG
1948                 if (ncount) printk("sys_bdflush: %d dirty buffers not on dirty list\n", ncount);
1949                 printk("sleeping again.\n");
1950 #endif
1951                 wake_up(&bdflush_done);
1952                 
1953                 /* If there are still a lot of dirty buffers around, skip the sleep
1954                    and flush some more */
1955                 
1956                 if(nr_buffers_type[BUF_DIRTY] <= (nr_buffers - nr_buffers_type[BUF_SHARED]) * 
1957                    bdf_prm.b_un.nfract/100) {
1958                         current->signal = 0;
1959                         interruptible_sleep_on(&bdflush_wait);
1960                 }
1961         }
1962 }
1963 
1964 
1965 /*
1966  * Overrides for Emacs so that we follow Linus's tabbing style.
1967  * Emacs will notice this stuff at the end of the file and automatically
1968  * adjust the settings for this buffer only.  This must remain at the end
1969  * of the file.
1970  * ---------------------------------------------------------------------------
1971  * Local variables:
1972  * c-indent-level: 8
1973  * c-brace-imaginary-offset: 0
1974  * c-brace-offset: -8
1975  * c-argdecl-indent: 8
1976  * c-label-offset: -8
1977  * c-continued-statement-offset: 8
1978  * c-continued-brace-offset: 0
1979  * End:
1980  */

/* [previous][next][first][last][top][bottom][index][help] */