root/fs/buffer.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. __wait_on_buffer
  2. sync_buffers
  3. sync_dev
  4. fsync_dev
  5. sys_sync
  6. file_fsync
  7. sys_fsync
  8. sys_fdatasync
  9. invalidate_buffers
  10. remove_from_hash_queue
  11. remove_from_lru_list
  12. remove_from_free_list
  13. remove_from_queues
  14. put_last_lru
  15. put_last_free
  16. insert_into_queues
  17. find_buffer
  18. get_hash_table
  19. set_blocksize
  20. refill_freelist
  21. getblk
  22. set_writetime
  23. refile_buffer
  24. __brelse
  25. __bforget
  26. bread
  27. breada
  28. put_unused_buffer_head
  29. get_more_buffer_heads
  30. get_unused_buffer_head
  31. create_buffers
  32. read_buffers
  33. bread_page
  34. generic_readpage
  35. bwrite_page
  36. grow_buffers
  37. try_to_free_buffer
  38. age_buffer
  39. maybe_shrink_lav_buffers
  40. shrink_specific_buffers
  41. show_buffers
  42. try_to_reassign
  43. reassign_cluster
  44. try_to_generate_cluster
  45. generate_cluster
  46. buffer_init
  47. wakeup_bdflush
  48. sync_old_buffers
  49. sys_bdflush
  50. bdflush

   1 /*
   2  *  linux/fs/buffer.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  *  'buffer.c' implements the buffer-cache functions. Race-conditions have
   9  * been avoided by NEVER letting an interrupt change a buffer (except for the
  10  * data, of course), but instead letting the caller do it.
  11  */
  12 
  13 /*
  14  * NOTE! There is one discordant note here: checking floppies for
  15  * disk change. This is where it fits best, I think, as it should
  16  * invalidate changed floppy-disk-caches.
  17  */
  18  
  19 /* Some bdflush() changes for the dynamic ramdisk - Paul Gortmaker, 12/94 */
  20 
  21 #include <linux/sched.h>
  22 #include <linux/kernel.h>
  23 #include <linux/major.h>
  24 #include <linux/string.h>
  25 #include <linux/locks.h>
  26 #include <linux/errno.h>
  27 #include <linux/malloc.h>
  28 #include <linux/pagemap.h>
  29 #include <linux/swap.h>
  30 #include <linux/swapctl.h>
  31 #include <linux/smp.h>
  32 #include <linux/smp_lock.h>
  33 
  34 #include <asm/system.h>
  35 #include <asm/segment.h>
  36 #include <asm/io.h>
  37 
  38 #define NR_SIZES 4
  39 static char buffersize_index[9] = {-1,  0,  1, -1,  2, -1, -1, -1, 3};
  40 static short int bufferindex_size[NR_SIZES] = {512, 1024, 2048, 4096};
  41 
  42 #define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9])
  43 #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
  44 
  45 static int grow_buffers(int pri, int size);
  46 static int shrink_specific_buffers(unsigned int priority, int size);
  47 static int maybe_shrink_lav_buffers(int);
  48 
  49 static int nr_hash = 0;  /* Size of hash table */
  50 static struct buffer_head ** hash_table;
  51 struct buffer_head ** buffer_pages;
  52 static struct buffer_head * lru_list[NR_LIST] = {NULL, };
  53 /* next_to_age is an array of pointers into the lru lists, used to
  54    cycle through the buffers aging their contents when deciding which
  55    buffers to discard when more memory is needed */
  56 static struct buffer_head * next_to_age[NR_LIST] = {NULL, };
  57 static struct buffer_head * free_list[NR_SIZES] = {NULL, };
  58 static struct buffer_head * unused_list = NULL;
  59 static struct wait_queue * buffer_wait = NULL;
  60 
  61 int nr_buffers = 0;
  62 int nr_buffers_type[NR_LIST] = {0,};
  63 int nr_buffers_size[NR_SIZES] = {0,};
  64 int nr_buffers_st[NR_SIZES][NR_LIST] = {{0,},};
  65 int buffer_usage[NR_SIZES] = {0,};  /* Usage counts used to determine load average */
  66 int buffers_lav[NR_SIZES] = {0,};  /* Load average of buffer usage */
  67 int nr_free[NR_SIZES] = {0,};
  68 int buffermem = 0;
  69 int nr_buffer_heads = 0;
  70 extern int *blksize_size[];
  71 
  72 /* Here is the parameter block for the bdflush process. */
  73 static void wakeup_bdflush(int);
  74 
  75 #define N_PARAM 9
  76 #define LAV
  77 
  78 static union bdflush_param{
  79         struct {
  80                 int nfract;  /* Percentage of buffer cache dirty to 
  81                                 activate bdflush */
  82                 int ndirty;  /* Maximum number of dirty blocks to write out per
  83                                 wake-cycle */
  84                 int nrefill; /* Number of clean buffers to try and obtain
  85                                 each time we call refill */
  86                 int nref_dirt; /* Dirty buffer threshold for activating bdflush
  87                                   when trying to refill buffers. */
  88                 int clu_nfract;  /* Percentage of buffer cache to scan to 
  89                                     search for free clusters */
  90                 int age_buffer;  /* Time for normal buffer to age before 
  91                                     we flush it */
  92                 int age_super;  /* Time for superblock to age before we 
  93                                    flush it */
  94                 int lav_const;  /* Constant used for load average (time
  95                                    constant */
  96                 int lav_ratio;  /* Used to determine how low a lav for a
  97                                    particular size can go before we start to
  98                                    trim back the buffers */
  99         } b_un;
 100         unsigned int data[N_PARAM];
 101 } bdf_prm = {{25, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}};
 102 
 103 /* The lav constant is set for 1 minute, as long as the update process runs
 104    every 5 seconds.  If you change the frequency of update, the time
 105    constant will also change. */
 106 
 107 
 108 /* These are the min and max parameter values that we will allow to be assigned */
 109 static int bdflush_min[N_PARAM] = {  0,  10,    5,   25,  0,   100,   100, 1, 1};
 110 static int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 60000, 60000, 2047, 5};
 111 
 112 /*
 113  * Rewrote the wait-routines to use the "new" wait-queue functionality,
 114  * and getting rid of the cli-sti pairs. The wait-queue routines still
 115  * need cli-sti, but now it's just a couple of 386 instructions or so.
 116  *
 117  * Note that the real wait_on_buffer() is an inline function that checks
 118  * if 'b_wait' is set before calling this, so that the queues aren't set
 119  * up unnecessarily.
 120  */
 121 void __wait_on_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 122 {
 123         struct wait_queue wait = { current, NULL };
 124 
 125         bh->b_count++;
 126         add_wait_queue(&bh->b_wait, &wait);
 127 repeat:
 128         current->state = TASK_UNINTERRUPTIBLE;
 129         if (buffer_locked(bh)) {
 130                 schedule();
 131                 goto repeat;
 132         }
 133         remove_wait_queue(&bh->b_wait, &wait);
 134         bh->b_count--;
 135         current->state = TASK_RUNNING;
 136 }
 137 
 138 /* Call sync_buffers with wait!=0 to ensure that the call does not
 139    return until all buffer writes have completed.  Sync() may return
 140    before the writes have finished; fsync() may not. */
 141 
 142 
 143 /* Godamity-damn.  Some buffers (bitmaps for filesystems)
 144    spontaneously dirty themselves without ever brelse being called.
 145    We will ultimately want to put these in a separate list, but for
 146    now we search all of the lists for dirty buffers */
 147 
 148 static int sync_buffers(kdev_t dev, int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 149 {
 150         int i, retry, pass = 0, err = 0;
 151         int nlist, ncount;
 152         struct buffer_head * bh, *next;
 153 
 154         /* One pass for no-wait, three for wait:
 155            0) write out all dirty, unlocked buffers;
 156            1) write out all dirty buffers, waiting if locked;
 157            2) wait for completion by waiting for all buffers to unlock. */
 158  repeat:
 159         retry = 0;
 160  repeat2:
 161         ncount = 0;
 162         /* We search all lists as a failsafe mechanism, not because we expect
 163            there to be dirty buffers on any of the other lists. */
 164         for(nlist = 0; nlist < NR_LIST; nlist++)
 165          {
 166          repeat1:
 167                  bh = lru_list[nlist];
 168                  if(!bh) continue;
 169                  for (i = nr_buffers_type[nlist]*2 ; i-- > 0 ; bh = next) {
 170                          if(bh->b_list != nlist) goto repeat1;
 171                          next = bh->b_next_free;
 172                          if(!lru_list[nlist]) break;
 173                          if (dev && bh->b_dev != dev)
 174                                   continue;
 175                          if (buffer_locked(bh))
 176                           {
 177                                   /* Buffer is locked; skip it unless wait is
 178                                      requested AND pass > 0. */
 179                                   if (!wait || !pass) {
 180                                           retry = 1;
 181                                           continue;
 182                                   }
 183                                   wait_on_buffer (bh);
 184                                   goto repeat2;
 185                           }
 186                          /* If an unlocked buffer is not uptodate, there has
 187                              been an IO error. Skip it. */
 188                          if (wait && buffer_req(bh) && !buffer_locked(bh) &&
 189                              !buffer_dirty(bh) && !buffer_uptodate(bh)) {
 190                                   err = 1;
 191                                   continue;
 192                           }
 193                          /* Don't write clean buffers.  Don't write ANY buffers
 194                             on the third pass. */
 195                          if (!buffer_dirty(bh) || pass>=2)
 196                                   continue;
 197                          /* don't bother about locked buffers */
 198                          if (buffer_locked(bh))
 199                                  continue;
 200                          bh->b_count++;
 201                          bh->b_flushtime = 0;
 202                          ll_rw_block(WRITE, 1, &bh);
 203 
 204                          if(nlist != BUF_DIRTY) { 
 205                                  printk("[%d %s %ld] ", nlist,
 206                                         kdevname(bh->b_dev), bh->b_blocknr);
 207                                  ncount++;
 208                          };
 209                          bh->b_count--;
 210                          retry = 1;
 211                  }
 212          }
 213         if (ncount)
 214           printk("sys_sync: %d dirty buffers not on dirty list\n", ncount);
 215         
 216         /* If we are waiting for the sync to succeed, and if any dirty
 217            blocks were written, then repeat; on the second pass, only
 218            wait for buffers being written (do not pass to write any
 219            more buffers on the second pass). */
 220         if (wait && retry && ++pass<=2)
 221                  goto repeat;
 222         return err;
 223 }
 224 
 225 void sync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 226 {
 227         sync_buffers(dev, 0);
 228         sync_supers(dev);
 229         sync_inodes(dev);
 230         sync_buffers(dev, 0);
 231         sync_dquots(dev, -1);
 232 }
 233 
 234 int fsync_dev(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 235 {
 236         sync_buffers(dev, 0);
 237         sync_supers(dev);
 238         sync_inodes(dev);
 239         sync_dquots(dev, -1);
 240         return sync_buffers(dev, 1);
 241 }
 242 
 243 asmlinkage int sys_sync(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 244 {
 245         fsync_dev(0);
 246         return 0;
 247 }
 248 
 249 int file_fsync (struct inode *inode, struct file *filp)
     /* [previous][next][first][last][top][bottom][index][help] */
 250 {
 251         return fsync_dev(inode->i_dev);
 252 }
 253 
 254 asmlinkage int sys_fsync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 255 {
 256         struct file * file;
 257         struct inode * inode;
 258 
 259         if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
 260                 return -EBADF;
 261         if (!file->f_op || !file->f_op->fsync)
 262                 return -EINVAL;
 263         if (file->f_op->fsync(inode,file))
 264                 return -EIO;
 265         return 0;
 266 }
 267 
 268 asmlinkage int sys_fdatasync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 269 {
 270         struct file * file;
 271         struct inode * inode;
 272 
 273         if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode))
 274                 return -EBADF;
 275         if (!file->f_op || !file->f_op->fsync)
 276                 return -EINVAL;
 277         /* this needs further work, at the moment it is identical to fsync() */
 278         if (file->f_op->fsync(inode,file))
 279                 return -EIO;
 280         return 0;
 281 }
 282 
 283 void invalidate_buffers(kdev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 284 {
 285         int i;
 286         int nlist;
 287         struct buffer_head * bh;
 288 
 289         for(nlist = 0; nlist < NR_LIST; nlist++) {
 290                 bh = lru_list[nlist];
 291                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bh->b_next_free) {
 292                         if (bh->b_dev != dev)
 293                                 continue;
 294                         wait_on_buffer(bh);
 295                         if (bh->b_dev != dev)
 296                                 continue;
 297                         if (bh->b_count)
 298                                 continue;
 299                         bh->b_flushtime = 0;
 300                         clear_bit(BH_Protected, &bh->b_state);
 301                         clear_bit(BH_Uptodate, &bh->b_state);
 302                         clear_bit(BH_Dirty, &bh->b_state);
 303                         clear_bit(BH_Req, &bh->b_state);
 304                 }
 305         }
 306 }
 307 
 308 #define _hashfn(dev,block) (((unsigned)(HASHDEV(dev)^block))%nr_hash)
 309 #define hash(dev,block) hash_table[_hashfn(dev,block)]
 310 
 311 static inline void remove_from_hash_queue(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 312 {
 313         if (bh->b_next)
 314                 bh->b_next->b_prev = bh->b_prev;
 315         if (bh->b_prev)
 316                 bh->b_prev->b_next = bh->b_next;
 317         if (hash(bh->b_dev,bh->b_blocknr) == bh)
 318                 hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
 319         bh->b_next = bh->b_prev = NULL;
 320 }
 321 
 322 static inline void remove_from_lru_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 323 {
 324         if (!(bh->b_prev_free) || !(bh->b_next_free))
 325                 panic("VFS: LRU block list corrupted");
 326         if (bh->b_dev == B_FREE)
 327                 panic("LRU list corrupted");
 328         bh->b_prev_free->b_next_free = bh->b_next_free;
 329         bh->b_next_free->b_prev_free = bh->b_prev_free;
 330 
 331         if (lru_list[bh->b_list] == bh)
 332                  lru_list[bh->b_list] = bh->b_next_free;
 333         if (lru_list[bh->b_list] == bh)
 334                  lru_list[bh->b_list] = NULL;
 335         if (next_to_age[bh->b_list] == bh)
 336                 next_to_age[bh->b_list] = bh->b_next_free;
 337         if (next_to_age[bh->b_list] == bh)
 338                 next_to_age[bh->b_list] = NULL;
 339 
 340         bh->b_next_free = bh->b_prev_free = NULL;
 341 }
 342 
 343 static inline void remove_from_free_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 344 {
 345         int isize = BUFSIZE_INDEX(bh->b_size);
 346         if (!(bh->b_prev_free) || !(bh->b_next_free))
 347                 panic("VFS: Free block list corrupted");
 348         if(bh->b_dev != B_FREE)
 349                 panic("Free list corrupted");
 350         if(!free_list[isize])
 351                 panic("Free list empty");
 352         nr_free[isize]--;
 353         if(bh->b_next_free == bh)
 354                  free_list[isize] = NULL;
 355         else {
 356                 bh->b_prev_free->b_next_free = bh->b_next_free;
 357                 bh->b_next_free->b_prev_free = bh->b_prev_free;
 358                 if (free_list[isize] == bh)
 359                          free_list[isize] = bh->b_next_free;
 360         };
 361         bh->b_next_free = bh->b_prev_free = NULL;
 362 }
 363 
 364 static inline void remove_from_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 365 {
 366         if(bh->b_dev == B_FREE) {
 367                 remove_from_free_list(bh); /* Free list entries should not be
 368                                               in the hash queue */
 369                 return;
 370         };
 371         nr_buffers_type[bh->b_list]--;
 372         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]--;
 373         remove_from_hash_queue(bh);
 374         remove_from_lru_list(bh);
 375 }
 376 
 377 static inline void put_last_lru(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 378 {
 379         if (!bh)
 380                 return;
 381         if (bh == lru_list[bh->b_list]) {
 382                 lru_list[bh->b_list] = bh->b_next_free;
 383                 if (next_to_age[bh->b_list] == bh)
 384                         next_to_age[bh->b_list] = bh->b_next_free;
 385                 return;
 386         }
 387         if(bh->b_dev == B_FREE)
 388                 panic("Wrong block for lru list");
 389         remove_from_lru_list(bh);
 390 /* add to back of free list */
 391 
 392         if(!lru_list[bh->b_list]) {
 393                 lru_list[bh->b_list] = bh;
 394                 lru_list[bh->b_list]->b_prev_free = bh;
 395         };
 396         if (!next_to_age[bh->b_list])
 397                 next_to_age[bh->b_list] = bh;
 398 
 399         bh->b_next_free = lru_list[bh->b_list];
 400         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 401         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 402         lru_list[bh->b_list]->b_prev_free = bh;
 403 }
 404 
 405 static inline void put_last_free(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 406 {
 407         int isize;
 408         if (!bh)
 409                 return;
 410 
 411         isize = BUFSIZE_INDEX(bh->b_size);      
 412         bh->b_dev = B_FREE;  /* So it is obvious we are on the free list */
 413         /* add to back of free list */
 414         if(!free_list[isize]) {
 415                 free_list[isize] = bh;
 416                 bh->b_prev_free = bh;
 417         };
 418 
 419         nr_free[isize]++;
 420         bh->b_next_free = free_list[isize];
 421         bh->b_prev_free = free_list[isize]->b_prev_free;
 422         free_list[isize]->b_prev_free->b_next_free = bh;
 423         free_list[isize]->b_prev_free = bh;
 424 }
 425 
 426 static inline void insert_into_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 427 {
 428         /* put at end of free list */
 429         if(bh->b_dev == B_FREE) {
 430                 put_last_free(bh);
 431                 return;
 432         }
 433         if(!lru_list[bh->b_list]) {
 434                 lru_list[bh->b_list] = bh;
 435                 bh->b_prev_free = bh;
 436         }
 437         if (!next_to_age[bh->b_list])
 438                 next_to_age[bh->b_list] = bh;
 439         if (bh->b_next_free) panic("VFS: buffer LRU pointers corrupted");
 440         bh->b_next_free = lru_list[bh->b_list];
 441         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 442         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 443         lru_list[bh->b_list]->b_prev_free = bh;
 444         nr_buffers_type[bh->b_list]++;
 445         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]++;
 446 /* put the buffer in new hash-queue if it has a device */
 447         bh->b_prev = NULL;
 448         bh->b_next = NULL;
 449         if (!(bh->b_dev))
 450                 return;
 451         bh->b_next = hash(bh->b_dev,bh->b_blocknr);
 452         hash(bh->b_dev,bh->b_blocknr) = bh;
 453         if (bh->b_next)
 454                 bh->b_next->b_prev = bh;
 455 }
 456 
 457 static inline struct buffer_head * find_buffer(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 458 {               
 459         struct buffer_head * tmp;
 460 
 461         for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
 462                 if (tmp->b_dev == dev && tmp->b_blocknr == block)
 463                         if (tmp->b_size == size)
 464                                 return tmp;
 465                         else {
 466                                 printk("VFS: Wrong blocksize on device %s\n",
 467                                         kdevname(dev));
 468                                 return NULL;
 469                         }
 470         return NULL;
 471 }
 472 
 473 /*
 474  * Why like this, I hear you say... The reason is race-conditions.
 475  * As we don't lock buffers (unless we are reading them, that is),
 476  * something might happen to it while we sleep (ie a read-error
 477  * will force it bad). This shouldn't really happen currently, but
 478  * the code is ready.
 479  */
 480 struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 481 {
 482         struct buffer_head * bh;
 483 
 484         for (;;) {
 485                 if (!(bh=find_buffer(dev,block,size)))
 486                         return NULL;
 487                 bh->b_count++;
 488                 wait_on_buffer(bh);
 489                 if (bh->b_dev == dev && bh->b_blocknr == block
 490                                              && bh->b_size == size)
 491                         return bh;
 492                 bh->b_count--;
 493         }
 494 }
 495 
 496 void set_blocksize(kdev_t dev, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 497 {
 498         int i, nlist;
 499         struct buffer_head * bh, *bhnext;
 500 
 501         if (!blksize_size[MAJOR(dev)])
 502                 return;
 503 
 504         switch(size) {
 505                 default: panic("Invalid blocksize passed to set_blocksize");
 506                 case 512: case 1024: case 2048: case 4096:;
 507         }
 508 
 509         if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
 510                 blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 511                 return;
 512         }
 513         if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
 514                 return;
 515         sync_buffers(dev, 2);
 516         blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 517 
 518   /* We need to be quite careful how we do this - we are moving entries
 519      around on the free list, and we can get in a loop if we are not careful.*/
 520 
 521         for(nlist = 0; nlist < NR_LIST; nlist++) {
 522                 bh = lru_list[nlist];
 523                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) {
 524                         if(!bh) break;
 525                         bhnext = bh->b_next_free; 
 526                         if (bh->b_dev != dev)
 527                                  continue;
 528                         if (bh->b_size == size)
 529                                  continue;
 530                         
 531                         wait_on_buffer(bh);
 532                         if (bh->b_dev == dev && bh->b_size != size) {
 533                                 clear_bit(BH_Dirty, &bh->b_state);
 534                                 clear_bit(BH_Uptodate, &bh->b_state);
 535                                 clear_bit(BH_Req, &bh->b_state);
 536                                 bh->b_flushtime = 0;
 537                         }
 538                         remove_from_hash_queue(bh);
 539                 }
 540         }
 541 }
 542 
 543 #define BADNESS(bh) (buffer_dirty(bh) || buffer_locked(bh))
 544 
 545 void refill_freelist(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 546 {
 547         struct buffer_head * bh, * tmp;
 548         struct buffer_head * candidate[NR_LIST];
 549         unsigned int best_time, winner;
 550         int isize = BUFSIZE_INDEX(size);
 551         int buffers[NR_LIST];
 552         int i;
 553         int needed;
 554 
 555         /* First see if we even need this.  Sometimes it is advantageous
 556          to request some blocks in a filesystem that we know that we will
 557          be needing ahead of time. */
 558 
 559         if (nr_free[isize] > 100)
 560                 return;
 561 
 562         /* If there are too many dirty buffers, we wake up the update process
 563            now so as to ensure that there are still clean buffers available
 564            for user processes to use (and dirty) */
 565         
 566         /* We are going to try and locate this much memory */
 567         needed =bdf_prm.b_un.nrefill * size;  
 568 
 569         while (nr_free_pages > min_free_pages*2 && needed > 0 &&
 570                grow_buffers(GFP_BUFFER, size)) {
 571                 needed -= PAGE_SIZE;
 572         }
 573 
 574         if(needed <= 0) return;
 575 
 576         /* See if there are too many buffers of a different size.
 577            If so, victimize them */
 578 
 579         while(maybe_shrink_lav_buffers(size))
 580          {
 581                  if(!grow_buffers(GFP_BUFFER, size)) break;
 582                  needed -= PAGE_SIZE;
 583                  if(needed <= 0) return;
 584          };
 585 
 586         /* OK, we cannot grow the buffer cache, now try and get some
 587            from the lru list */
 588 
 589         /* First set the candidate pointers to usable buffers.  This
 590            should be quick nearly all of the time. */
 591 
 592 repeat0:
 593         for(i=0; i<NR_LIST; i++){
 594                 if(i == BUF_DIRTY || i == BUF_SHARED || 
 595                    nr_buffers_type[i] == 0) {
 596                         candidate[i] = NULL;
 597                         buffers[i] = 0;
 598                         continue;
 599                 }
 600                 buffers[i] = nr_buffers_type[i];
 601                 for (bh = lru_list[i]; buffers[i] > 0; bh = tmp, buffers[i]--)
 602                  {
 603                          if(buffers[i] < 0) panic("Here is the problem");
 604                          tmp = bh->b_next_free;
 605                          if (!bh) break;
 606                          
 607                          if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 608                              buffer_dirty(bh)) {
 609                                  refile_buffer(bh);
 610                                  continue;
 611                          }
 612                          
 613                          if (bh->b_count || buffer_protected(bh) || bh->b_size != size)
 614                                   continue;
 615                          
 616                          /* Buffers are written in the order they are placed 
 617                             on the locked list. If we encounter a locked
 618                             buffer here, this means that the rest of them
 619                             are also locked */
 620                          if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 621                                  buffers[i] = 0;
 622                                  break;
 623                          }
 624                          
 625                          if (BADNESS(bh)) continue;
 626                          break;
 627                  };
 628                 if(!buffers[i]) candidate[i] = NULL; /* Nothing on this list */
 629                 else candidate[i] = bh;
 630                 if(candidate[i] && candidate[i]->b_count) panic("Here is the problem");
 631         }
 632         
 633  repeat:
 634         if(needed <= 0) return;
 635         
 636         /* Now see which candidate wins the election */
 637         
 638         winner = best_time = UINT_MAX;  
 639         for(i=0; i<NR_LIST; i++){
 640                 if(!candidate[i]) continue;
 641                 if(candidate[i]->b_lru_time < best_time){
 642                         best_time = candidate[i]->b_lru_time;
 643                         winner = i;
 644                 }
 645         }
 646         
 647         /* If we have a winner, use it, and then get a new candidate from that list */
 648         if(winner != UINT_MAX) {
 649                 i = winner;
 650                 bh = candidate[i];
 651                 candidate[i] = bh->b_next_free;
 652                 if(candidate[i] == bh) candidate[i] = NULL;  /* Got last one */
 653                 if (bh->b_count || bh->b_size != size)
 654                          panic("Busy buffer in candidate list\n");
 655                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1)
 656                          panic("Shared buffer in candidate list\n");
 657                 if (buffer_protected(bh))
 658                         panic("Protected buffer in candidate list\n");
 659                 if (BADNESS(bh)) panic("Buffer in candidate list with BADNESS != 0\n");
 660                 
 661                 if(bh->b_dev == B_FREE)
 662                         panic("Wrong list");
 663                 remove_from_queues(bh);
 664                 bh->b_dev = B_FREE;
 665                 put_last_free(bh);
 666                 needed -= bh->b_size;
 667                 buffers[i]--;
 668                 if(buffers[i] < 0) panic("Here is the problem");
 669                 
 670                 if(buffers[i] == 0) candidate[i] = NULL;
 671                 
 672                 /* Now all we need to do is advance the candidate pointer
 673                    from the winner list to the next usable buffer */
 674                 if(candidate[i] && buffers[i] > 0){
 675                         if(buffers[i] <= 0) panic("Here is another problem");
 676                         for (bh = candidate[i]; buffers[i] > 0; bh = tmp, buffers[i]--) {
 677                                 if(buffers[i] < 0) panic("Here is the problem");
 678                                 tmp = bh->b_next_free;
 679                                 if (!bh) break;
 680                                 
 681                                 if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
 682                                     buffer_dirty(bh)) {
 683                                         refile_buffer(bh);
 684                                         continue;
 685                                 };
 686                                 
 687                                 if (bh->b_count || buffer_protected(bh) || bh->b_size != size)
 688                                          continue;
 689                                 
 690                                 /* Buffers are written in the order they are
 691                                    placed on the locked list.  If we encounter
 692                                    a locked buffer here, this means that the
 693                                    rest of them are also locked */
 694                                 if (buffer_locked(bh) && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 695                                         buffers[i] = 0;
 696                                         break;
 697                                 }
 698               
 699                                 if (BADNESS(bh)) continue;
 700                                 break;
 701                         };
 702                         if(!buffers[i]) candidate[i] = NULL; /* Nothing here */
 703                         else candidate[i] = bh;
 704                         if(candidate[i] && candidate[i]->b_count) 
 705                                  panic("Here is the problem");
 706                 }
 707                 
 708                 goto repeat;
 709         }
 710         
 711         if(needed <= 0) return;
 712         
 713         /* Too bad, that was not enough. Try a little harder to grow some. */
 714         
 715         if (nr_free_pages > min_free_pages + 5) {
 716                 if (grow_buffers(GFP_BUFFER, size)) {
 717                         needed -= PAGE_SIZE;
 718                         goto repeat0;
 719                 };
 720         }
 721         
 722         /* and repeat until we find something good */
 723         if (!grow_buffers(GFP_ATOMIC, size))
 724                 wakeup_bdflush(1);
 725         needed -= PAGE_SIZE;
 726         goto repeat0;
 727 }
 728 
 729 /*
 730  * Ok, this is getblk, and it isn't very clear, again to hinder
 731  * race-conditions. Most of the code is seldom used, (ie repeating),
 732  * so it should be much more efficient than it looks.
 733  *
 734  * The algorithm is changed: hopefully better, and an elusive bug removed.
 735  *
 736  * 14.02.92: changed it to sync dirty buffers a bit: better performance
 737  * when the filesystem starts to get full of dirty blocks (I hope).
 738  */
 739 struct buffer_head * getblk(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 740 {
 741         struct buffer_head * bh;
 742         int isize = BUFSIZE_INDEX(size);
 743 
 744         /* Update this for the buffer size lav. */
 745         buffer_usage[isize]++;
 746 
 747         /* If there are too many dirty buffers, we wake up the update process
 748            now so as to ensure that there are still clean buffers available
 749            for user processes to use (and dirty) */
 750 repeat:
 751         bh = get_hash_table(dev, block, size);
 752         if (bh) {
 753                 if (!buffer_dirty(bh)) {
 754                         if (buffer_uptodate(bh))
 755                                  put_last_lru(bh);
 756                         bh->b_flushtime = 0;
 757                 }
 758                 set_bit(BH_Touched, &bh->b_state);
 759                 return bh;
 760         }
 761 
 762         while(!free_list[isize]) refill_freelist(size);
 763         
 764         if (find_buffer(dev,block,size))
 765                  goto repeat;
 766 
 767         bh = free_list[isize];
 768         remove_from_free_list(bh);
 769 
 770 /* OK, FINALLY we know that this buffer is the only one of its kind, */
 771 /* and that it's unused (b_count=0), unlocked (buffer_locked=0), and clean */
 772         bh->b_count=1;
 773         bh->b_flushtime=0;
 774         bh->b_state=(1<<BH_Touched);
 775         bh->b_dev=dev;
 776         bh->b_blocknr=block;
 777         insert_into_queues(bh);
 778         return bh;
 779 }
 780 
 781 void set_writetime(struct buffer_head * buf, int flag)
     /* [previous][next][first][last][top][bottom][index][help] */
 782 {
 783         int newtime;
 784 
 785         if (buffer_dirty(buf)) {
 786                 /* Move buffer to dirty list if jiffies is clear */
 787                 newtime = jiffies + (flag ? bdf_prm.b_un.age_super : 
 788                                      bdf_prm.b_un.age_buffer);
 789                 if(!buf->b_flushtime || buf->b_flushtime > newtime)
 790                          buf->b_flushtime = newtime;
 791         } else {
 792                 buf->b_flushtime = 0;
 793         }
 794 }
 795 
 796 
 797 void refile_buffer(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 798 {
 799         int dispose;
 800 
 801         if(buf->b_dev == B_FREE) {
 802                 printk("Attempt to refile free buffer\n");
 803                 return;
 804         }
 805         if (buffer_dirty(buf))
 806                 dispose = BUF_DIRTY;
 807         else if ((mem_map[MAP_NR((unsigned long) buf->b_data)].count > 1) || buffer_protected(buf))
 808                 dispose = BUF_SHARED;
 809         else if (buffer_locked(buf))
 810                 dispose = BUF_LOCKED;
 811         else if (buf->b_list == BUF_SHARED)
 812                 dispose = BUF_UNSHARED;
 813         else
 814                 dispose = BUF_CLEAN;
 815         if(dispose == BUF_CLEAN) buf->b_lru_time = jiffies;
 816         if(dispose != buf->b_list)  {
 817                 if(dispose == BUF_DIRTY || dispose == BUF_UNSHARED)
 818                          buf->b_lru_time = jiffies;
 819                 if(dispose == BUF_LOCKED && 
 820                    (buf->b_flushtime - buf->b_lru_time) <= bdf_prm.b_un.age_super)
 821                          dispose = BUF_LOCKED1;
 822                 remove_from_queues(buf);
 823                 buf->b_list = dispose;
 824                 insert_into_queues(buf);
 825                 if(dispose == BUF_DIRTY && nr_buffers_type[BUF_DIRTY] > 
 826                    (nr_buffers - nr_buffers_type[BUF_SHARED]) *
 827                    bdf_prm.b_un.nfract/100)
 828                          wakeup_bdflush(0);
 829         }
 830 }
 831 
 832 /*
 833  * Release a buffer head
 834  */
 835 void __brelse(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 836 {
 837         wait_on_buffer(buf);
 838 
 839         /* If dirty, mark the time this buffer should be written back */
 840         set_writetime(buf, 0);
 841         refile_buffer(buf);
 842 
 843         if (buf->b_count) {
 844                 if (!--buf->b_count)
 845                         wake_up(&buffer_wait);
 846                 return;
 847         }
 848         printk("VFS: brelse: Trying to free free buffer\n");
 849 }
 850 
 851 /*
 852  * bforget() is like brelse(), except it removes the buffer
 853  * from the hash-queues (so that it won't be re-used if it's
 854  * shared).
 855  */
 856 void __bforget(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 857 {
 858         wait_on_buffer(buf);
 859         mark_buffer_clean(buf);
 860         clear_bit(BH_Protected, &buf->b_state);
 861         buf->b_count--;
 862         remove_from_hash_queue(buf);
 863         buf->b_dev = NODEV;
 864         refile_buffer(buf);
 865         wake_up(&buffer_wait);
 866 }
 867 
 868 /*
 869  * bread() reads a specified block and returns the buffer that contains
 870  * it. It returns NULL if the block was unreadable.
 871  */
 872 struct buffer_head * bread(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 873 {
 874         struct buffer_head * bh;
 875 
 876         if (!(bh = getblk(dev, block, size))) {
 877                 printk("VFS: bread: READ error on device %s\n",
 878                         kdevname(dev));
 879                 return NULL;
 880         }
 881         if (buffer_uptodate(bh))
 882                 return bh;
 883         ll_rw_block(READ, 1, &bh);
 884         wait_on_buffer(bh);
 885         if (buffer_uptodate(bh))
 886                 return bh;
 887         brelse(bh);
 888         return NULL;
 889 }
 890 
 891 /*
 892  * Ok, breada can be used as bread, but additionally to mark other
 893  * blocks for reading as well. End the argument list with a negative
 894  * number.
 895  */
 896 
 897 #define NBUF 16
 898 
 899 struct buffer_head * breada(kdev_t dev, int block, int bufsize,
     /* [previous][next][first][last][top][bottom][index][help] */
 900         unsigned int pos, unsigned int filesize)
 901 {
 902         struct buffer_head * bhlist[NBUF];
 903         unsigned int blocks;
 904         struct buffer_head * bh;
 905         int index;
 906         int i, j;
 907 
 908         if (pos >= filesize)
 909                 return NULL;
 910 
 911         if (block < 0 || !(bh = getblk(dev,block,bufsize)))
 912                 return NULL;
 913 
 914         index = BUFSIZE_INDEX(bh->b_size);
 915 
 916         if (buffer_uptodate(bh))
 917                 return bh;
 918 
 919         blocks = ((filesize & (bufsize - 1)) - (pos & (bufsize - 1))) >> (9+index);
 920 
 921         if (blocks > (read_ahead[MAJOR(dev)] >> index))
 922                 blocks = read_ahead[MAJOR(dev)] >> index;
 923         if (blocks > NBUF)
 924                 blocks = NBUF;
 925         
 926         bhlist[0] = bh;
 927         j = 1;
 928         for(i=1; i<blocks; i++) {
 929                 bh = getblk(dev,block+i,bufsize);
 930                 if (buffer_uptodate(bh)) {
 931                         brelse(bh);
 932                         break;
 933                 }
 934                 bhlist[j++] = bh;
 935         }
 936 
 937         /* Request the read for these buffers, and then release them */
 938         ll_rw_block(READ, j, bhlist);
 939 
 940         for(i=1; i<j; i++)
 941                 brelse(bhlist[i]);
 942 
 943         /* Wait for this buffer, and then continue on */
 944         bh = bhlist[0];
 945         wait_on_buffer(bh);
 946         if (buffer_uptodate(bh))
 947                 return bh;
 948         brelse(bh);
 949         return NULL;
 950 }
 951 
 952 /*
 953  * See fs/inode.c for the weird use of volatile..
 954  */
 955 static void put_unused_buffer_head(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 956 {
 957         struct wait_queue * wait;
 958 
 959         wait = ((volatile struct buffer_head *) bh)->b_wait;
 960         memset(bh,0,sizeof(*bh));
 961         ((volatile struct buffer_head *) bh)->b_wait = wait;
 962         bh->b_next_free = unused_list;
 963         unused_list = bh;
 964 }
 965 
 966 static void get_more_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 967 {
 968         int i;
 969         struct buffer_head * bh;
 970 
 971         if (unused_list)
 972                 return;
 973 
 974         if (!(bh = (struct buffer_head*) get_free_page(GFP_KERNEL)))
 975                 return;
 976 
 977         for (nr_buffer_heads+=i=PAGE_SIZE/sizeof*bh ; i>0; i--) {
 978                 bh->b_next_free = unused_list;  /* only make link */
 979                 unused_list = bh++;
 980         }
 981 }
 982 
 983 static struct buffer_head * get_unused_buffer_head(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 984 {
 985         struct buffer_head * bh;
 986 
 987         get_more_buffer_heads();
 988         if (!unused_list)
 989                 return NULL;
 990         bh = unused_list;
 991         unused_list = bh->b_next_free;
 992         bh->b_next_free = NULL;
 993         bh->b_data = NULL;
 994         bh->b_size = 0;
 995         bh->b_state = 0;
 996         return bh;
 997 }
 998 
 999 /*
1000  * Create the appropriate buffers when given a page for data area and
1001  * the size of each buffer.. Use the bh->b_this_page linked list to
1002  * follow the buffers created.  Return NULL if unable to create more
1003  * buffers.
1004  */
1005 static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
1006 {
1007         struct buffer_head *bh, *head;
1008         unsigned long offset;
1009 
1010         head = NULL;
1011         offset = PAGE_SIZE;
1012         while ((offset -= size) < PAGE_SIZE) {
1013                 bh = get_unused_buffer_head();
1014                 if (!bh)
1015                         goto no_grow;
1016                 bh->b_this_page = head;
1017                 head = bh;
1018                 bh->b_data = (char *) (page+offset);
1019                 bh->b_size = size;
1020                 bh->b_dev = B_FREE;  /* Flag as unused */
1021         }
1022         return head;
1023 /*
1024  * In case anything failed, we just free everything we got.
1025  */
1026 no_grow:
1027         bh = head;
1028         while (bh) {
1029                 head = bh;
1030                 bh = bh->b_this_page;
1031                 put_unused_buffer_head(head);
1032         }
1033         return NULL;
1034 }
1035 
1036 static void read_buffers(struct buffer_head * bh[], int nrbuf)
     /* [previous][next][first][last][top][bottom][index][help] */
1037 {
1038         ll_rw_block(READ, nrbuf, bh);
1039         bh += nrbuf;
1040         do {
1041                 nrbuf--;
1042                 bh--;
1043                 wait_on_buffer(*bh);
1044         } while (nrbuf > 0);
1045 }
1046 
1047 static int bread_page(unsigned long address, kdev_t dev, int b[], int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1048 {
1049         struct buffer_head *bh, *next, *arr[MAX_BUF_PER_PAGE];
1050         int block, nr;
1051 
1052         bh = create_buffers(address, size);
1053         if (!bh)
1054                 return -ENOMEM;
1055         nr = 0;
1056         next = bh;
1057         do {
1058                 struct buffer_head * tmp;
1059                 block = *(b++);
1060                 if (!block) {
1061                         memset(next->b_data, 0, size);
1062                         continue;
1063                 }
1064                 tmp = get_hash_table(dev, block, size);
1065                 if (tmp) {
1066                         if (!buffer_uptodate(tmp)) {
1067                                 ll_rw_block(READ, 1, &tmp);
1068                                 wait_on_buffer(tmp);
1069                         }
1070                         memcpy(next->b_data, tmp->b_data, size);
1071                         brelse(tmp);
1072                         continue;
1073                 }
1074                 arr[nr++] = next;
1075                 next->b_dev = dev;
1076                 next->b_blocknr = block;
1077                 next->b_count = 1;
1078                 next->b_flushtime = 0;
1079                 clear_bit(BH_Dirty, &next->b_state);
1080                 clear_bit(BH_Uptodate, &next->b_state);
1081                 clear_bit(BH_Req, &next->b_state);
1082                 next->b_list = BUF_CLEAN;
1083         } while ((next = next->b_this_page) != NULL);
1084 
1085         if (nr)
1086                 read_buffers(arr,nr);
1087         ++current->maj_flt;
1088 
1089         while ((next = bh) != NULL) {
1090                 bh = bh->b_this_page;
1091                 put_unused_buffer_head(next);
1092         }
1093         return 0;
1094 }
1095 
1096 /*
1097  * Generic "readpage" function for block devices that have the
1098  * normal bmap functionality. This is most of the block device
1099  * filesystems.
1100  */
1101 int generic_readpage(struct inode * inode, struct page * page)
     /* [previous][next][first][last][top][bottom][index][help] */
1102 {
1103         unsigned long block, address;
1104         int *p, nr[PAGE_SIZE/512];
1105         int i;
1106 
1107         i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
1108         block = page->offset >> inode->i_sb->s_blocksize_bits;
1109         p = nr;
1110         do {
1111                 *p = inode->i_op->bmap(inode, block);
1112                 i--;
1113                 block++;
1114                 p++;
1115         } while (i > 0);
1116 
1117         /*
1118          * We should make this asynchronous, but this is good enough for now..
1119          */
1120 
1121         /* IO start */
1122         page->count++;
1123         address = page_address(page);
1124         bread_page(address, inode->i_dev, nr, inode->i_sb->s_blocksize);
1125 
1126         /* IO ready (this part should be in the "page ready callback" function) */
1127         page->uptodate = 1;
1128         wake_up(&page->wait);
1129         free_page(address);
1130 
1131         return 0;
1132 }
1133 
1134 #if 0
1135 /*
1136  * bwrite_page writes a page out to the buffer cache and/or the physical device.
1137  * It's used for mmap writes (the same way bread_page() is used for mmap reads).
1138  */
1139 void bwrite_page(unsigned long address, kdev_t dev, int b[], int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1140 {
1141         struct buffer_head * bh[MAX_BUF_PER_PAGE];
1142         int i, j;
1143 
1144         for (i=0, j=0; j<PAGE_SIZE ; i++, j+= size) {
1145                 bh[i] = NULL;
1146                 if (b[i])
1147                         bh[i] = getblk(dev, b[i], size);
1148         }
1149         for (i=0, j=0; j<PAGE_SIZE ; i++, j += size, address += size) {
1150                 if (bh[i]) {
1151                         memcpy(bh[i]->b_data, (void *) address, size);
1152                         mark_buffer_uptodate(bh[i], 1);
1153                         mark_buffer_dirty(bh[i], 0);
1154                         brelse(bh[i]);
1155                 } else
1156                         memset((void *) address, 0, size); /* ???!?!! */
1157         }       
1158 }
1159 #endif
1160 
1161 /*
1162  * Try to increase the number of buffers available: the size argument
1163  * is used to determine what kind of buffers we want.
1164  */
1165 static int grow_buffers(int pri, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1166 {
1167         unsigned long page;
1168         struct buffer_head *bh, *tmp;
1169         struct buffer_head * insert_point;
1170         int isize;
1171 
1172         if ((size & 511) || (size > PAGE_SIZE)) {
1173                 printk("VFS: grow_buffers: size = %d\n",size);
1174                 return 0;
1175         }
1176 
1177         isize = BUFSIZE_INDEX(size);
1178 
1179         if (!(page = __get_free_page(pri)))
1180                 return 0;
1181         bh = create_buffers(page, size);
1182         if (!bh) {
1183                 free_page(page);
1184                 return 0;
1185         }
1186 
1187         insert_point = free_list[isize];
1188 
1189         tmp = bh;
1190         while (1) {
1191                 nr_free[isize]++;
1192                 if (insert_point) {
1193                         tmp->b_next_free = insert_point->b_next_free;
1194                         tmp->b_prev_free = insert_point;
1195                         insert_point->b_next_free->b_prev_free = tmp;
1196                         insert_point->b_next_free = tmp;
1197                 } else {
1198                         tmp->b_prev_free = tmp;
1199                         tmp->b_next_free = tmp;
1200                 }
1201                 insert_point = tmp;
1202                 ++nr_buffers;
1203                 if (tmp->b_this_page)
1204                         tmp = tmp->b_this_page;
1205                 else
1206                         break;
1207         }
1208         free_list[isize] = bh;
1209         buffer_pages[MAP_NR(page)] = bh;
1210         tmp->b_this_page = bh;
1211         wake_up(&buffer_wait);
1212         buffermem += PAGE_SIZE;
1213         return 1;
1214 }
1215 
1216 
1217 /* =========== Reduce the buffer memory ============= */
1218 
1219 /*
1220  * try_to_free_buffer() checks if all the buffers on this particular page
1221  * are unused, and free's the page if so.
1222  */
1223 int try_to_free_buffer(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1224                        int priority)
1225 {
1226         unsigned long page;
1227         struct buffer_head * tmp, * p;
1228         int isize = BUFSIZE_INDEX(bh->b_size);
1229 
1230         *bhp = bh;
1231         page = (unsigned long) bh->b_data;
1232         page &= PAGE_MASK;
1233         tmp = bh;
1234         do {
1235                 if (!tmp)
1236                         return 0;
1237                 if (tmp->b_count || buffer_protected(tmp) ||
1238                     buffer_dirty(tmp) || buffer_locked(tmp) || tmp->b_wait)
1239                         return 0;
1240                 if (priority && buffer_touched(tmp))
1241                         return 0;
1242                 tmp = tmp->b_this_page;
1243         } while (tmp != bh);
1244         tmp = bh;
1245         do {
1246                 p = tmp;
1247                 tmp = tmp->b_this_page;
1248                 nr_buffers--;
1249                 nr_buffers_size[isize]--;
1250                 if (p == *bhp)
1251                   {
1252                     *bhp = p->b_prev_free;
1253                     if (p == *bhp) /* Was this the last in the list? */
1254                       *bhp = NULL;
1255                   }
1256                 remove_from_queues(p);
1257                 put_unused_buffer_head(p);
1258         } while (tmp != bh);
1259         buffermem -= PAGE_SIZE;
1260         buffer_pages[MAP_NR(page)] = NULL;
1261         free_page(page);
1262         return !mem_map[MAP_NR(page)].count;
1263 }
1264 
1265 /* Age buffers on a given page, according to whether they have been
1266    visited recently or not. */
1267 static inline void age_buffer(struct buffer_head *bh)
     /* [previous][next][first][last][top][bottom][index][help] */
1268 {
1269         struct buffer_head *tmp = bh;
1270         int touched = 0;
1271 
1272         /*
1273          * When we age a page, we mark all other buffers in the page
1274          * with the "has_aged" flag.  Then, when these aliased buffers
1275          * come up for aging, we skip them until next pass.  This
1276          * ensures that a page full of multiple buffers only gets aged
1277          * once per pass through the lru lists. 
1278          */
1279         if (clear_bit(BH_Has_aged, &bh->b_state))
1280                 return;
1281         
1282         do {
1283                 touched |= clear_bit(BH_Touched, &tmp->b_state);
1284                 tmp = tmp->b_this_page;
1285                 set_bit(BH_Has_aged, &tmp->b_state);
1286         } while (tmp != bh);
1287         clear_bit(BH_Has_aged, &bh->b_state);
1288 
1289         if (touched) 
1290                 touch_page(mem_map + MAP_NR((unsigned long) bh->b_data));
1291         else
1292                 age_page(mem_map + MAP_NR((unsigned long) bh->b_data));
1293 }
1294 
1295 /*
1296  * Consult the load average for buffers and decide whether or not
1297  * we should shrink the buffers of one size or not.  If we decide yes,
1298  * do it and return 1.  Else return 0.  Do not attempt to shrink size
1299  * that is specified.
1300  *
1301  * I would prefer not to use a load average, but the way things are now it
1302  * seems unavoidable.  The way to get rid of it would be to force clustering
1303  * universally, so that when we reclaim buffers we always reclaim an entire
1304  * page.  Doing this would mean that we all need to move towards QMAGIC.
1305  */
1306 
1307 static int maybe_shrink_lav_buffers(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1308 {          
1309         int nlist;
1310         int isize;
1311         int total_lav, total_n_buffers, n_sizes;
1312         
1313         /* Do not consider the shared buffers since they would not tend
1314            to have getblk called very often, and this would throw off
1315            the lav.  They are not easily reclaimable anyway (let the swapper
1316            make the first move). */
1317   
1318         total_lav = total_n_buffers = n_sizes = 0;
1319         for(nlist = 0; nlist < NR_SIZES; nlist++)
1320          {
1321                  total_lav += buffers_lav[nlist];
1322                  if(nr_buffers_size[nlist]) n_sizes++;
1323                  total_n_buffers += nr_buffers_size[nlist];
1324                  total_n_buffers -= nr_buffers_st[nlist][BUF_SHARED]; 
1325          }
1326         
1327         /* See if we have an excessive number of buffers of a particular
1328            size - if so, victimize that bunch. */
1329   
1330         isize = (size ? BUFSIZE_INDEX(size) : -1);
1331         
1332         if (n_sizes > 1)
1333                  for(nlist = 0; nlist < NR_SIZES; nlist++)
1334                   {
1335                           if(nlist == isize) continue;
1336                           if(nr_buffers_size[nlist] &&
1337                              bdf_prm.b_un.lav_const * buffers_lav[nlist]*total_n_buffers < 
1338                              total_lav * (nr_buffers_size[nlist] - nr_buffers_st[nlist][BUF_SHARED]))
1339                                    if(shrink_specific_buffers(6, bufferindex_size[nlist])) 
1340                                             return 1;
1341                   }
1342         return 0;
1343 }
1344 
1345 /*
1346  * Try to free up some pages by shrinking the buffer-cache
1347  *
1348  * Priority tells the routine how hard to try to shrink the
1349  * buffers: 6 means "don't bother too much", while a value
1350  * of 0 means "we'd better get some free pages now".
1351  *
1352  * "limit" is meant to limit the shrink-action only to pages
1353  * that are in the 0 - limit address range, for DMA re-allocations.
1354  * We ignore that right now.
1355  */
1356 
1357 static int shrink_specific_buffers(unsigned int priority, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1358 {
1359         struct buffer_head *bh;
1360         int nlist;
1361         int i, isize, isize1;
1362 
1363 #ifdef DEBUG
1364         if(size) printk("Shrinking buffers of size %d\n", size);
1365 #endif
1366         /* First try the free lists, and see if we can get a complete page
1367            from here */
1368         isize1 = (size ? BUFSIZE_INDEX(size) : -1);
1369 
1370         for(isize = 0; isize<NR_SIZES; isize++){
1371                 if(isize1 != -1 && isize1 != isize) continue;
1372                 bh = free_list[isize];
1373                 if(!bh) continue;
1374                 for (i=0 ; !i || bh != free_list[isize]; bh = bh->b_next_free, i++) {
1375                         if (bh->b_count || buffer_protected(bh) ||
1376                             !bh->b_this_page)
1377                                  continue;
1378                         if (!age_of((unsigned long) bh->b_data) &&
1379                             try_to_free_buffer(bh, &bh, 6))
1380                                  return 1;
1381                         if(!bh) break;
1382                         /* Some interrupt must have used it after we
1383                            freed the page.  No big deal - keep looking */
1384                 }
1385         }
1386         
1387         /* Not enough in the free lists, now try the lru list */
1388         
1389         for(nlist = 0; nlist < NR_LIST; nlist++) {
1390         repeat1:
1391                 if(priority > 2 && nlist == BUF_SHARED) continue;
1392                 i = nr_buffers_type[nlist];
1393                 i = ((BUFFEROUT_WEIGHT * i) >> 10) >> priority;
1394                 for ( ; i > 0; i-- ) {
1395                         bh = next_to_age[nlist];
1396                         if (!bh)
1397                                 break;
1398                         next_to_age[nlist] = bh->b_next_free;
1399 
1400                         /* First, age the buffer. */
1401                         age_buffer(bh);
1402                         /* We may have stalled while waiting for I/O
1403                            to complete. */
1404                         if(bh->b_list != nlist) goto repeat1;
1405                         if (bh->b_count || buffer_protected(bh) ||
1406                             !bh->b_this_page)
1407                                  continue;
1408                         if(size && bh->b_size != size) continue;
1409                         if (buffer_locked(bh))
1410                                  if (priority)
1411                                           continue;
1412                                  else
1413                                           wait_on_buffer(bh);
1414                         if (buffer_dirty(bh)) {
1415                                 bh->b_count++;
1416                                 bh->b_flushtime = 0;
1417                                 ll_rw_block(WRITEA, 1, &bh);
1418                                 bh->b_count--;
1419                                 continue;
1420                         }
1421                         /* At priority 6, only consider really old
1422                            (age==0) buffers for reclaiming.  At
1423                            priority 0, consider any buffers. */
1424                         if ((age_of((unsigned long) bh->b_data) >>
1425                              (6-priority)) > 0)
1426                                 continue;                               
1427                         if (try_to_free_buffer(bh, &bh, 0))
1428                                  return 1;
1429                         if(!bh) break;
1430                 }
1431         }
1432         return 0;
1433 }
1434 
1435 
1436 /* ================== Debugging =================== */
1437 
1438 void show_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1439 {
1440         struct buffer_head * bh;
1441         int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
1442         int protected = 0;
1443         int shared;
1444         int nlist, isize;
1445 
1446         printk("Buffer memory:   %6dkB\n",buffermem>>10);
1447         printk("Buffer heads:    %6d\n",nr_buffer_heads);
1448         printk("Buffer blocks:   %6d\n",nr_buffers);
1449 
1450         for(nlist = 0; nlist < NR_LIST; nlist++) {
1451           shared = found = locked = dirty = used = lastused = protected = 0;
1452           bh = lru_list[nlist];
1453           if(!bh) continue;
1454           do {
1455                 found++;
1456                 if (buffer_locked(bh))
1457                         locked++;
1458                 if (buffer_protected(bh))
1459                         protected++;
1460                 if (buffer_dirty(bh))
1461                         dirty++;
1462                 if(mem_map[MAP_NR(((unsigned long) bh->b_data))].count !=1) shared++;
1463                 if (bh->b_count)
1464                         used++, lastused = found;
1465                 bh = bh->b_next_free;
1466               } while (bh != lru_list[nlist]);
1467         printk("Buffer[%d] mem: %d buffers, %d used (last=%d), %d locked, "
1468                "%d protected, %d dirty %d shrd\n",
1469                 nlist, found, used, lastused, locked, protected, dirty, shared);
1470         };
1471         printk("Size    [LAV]     Free  Clean  Unshar     Lck    Lck1   Dirty  Shared \n");
1472         for(isize = 0; isize<NR_SIZES; isize++){
1473                 printk("%5d [%5d]: %7d ", bufferindex_size[isize],
1474                        buffers_lav[isize], nr_free[isize]);
1475                 for(nlist = 0; nlist < NR_LIST; nlist++)
1476                          printk("%7d ", nr_buffers_st[isize][nlist]);
1477                 printk("\n");
1478         }
1479 }
1480 
1481 
1482 /* ====================== Cluster patches for ext2 ==================== */
1483 
1484 /*
1485  * try_to_reassign() checks if all the buffers on this particular page
1486  * are unused, and reassign to a new cluster them if this is true.
1487  */
1488 static inline int try_to_reassign(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1489                            kdev_t dev, unsigned int starting_block)
1490 {
1491         unsigned long page;
1492         struct buffer_head * tmp, * p;
1493 
1494         *bhp = bh;
1495         page = (unsigned long) bh->b_data;
1496         page &= PAGE_MASK;
1497         if(mem_map[MAP_NR(page)].count != 1) return 0;
1498         tmp = bh;
1499         do {
1500                 if (!tmp)
1501                          return 0;
1502                 
1503                 if (tmp->b_count || buffer_protected(tmp) ||
1504                     buffer_dirty(tmp) || buffer_locked(tmp))
1505                          return 0;
1506                 tmp = tmp->b_this_page;
1507         } while (tmp != bh);
1508         tmp = bh;
1509         
1510         while((unsigned long) tmp->b_data & (PAGE_SIZE - 1)) 
1511                  tmp = tmp->b_this_page;
1512         
1513         /* This is the buffer at the head of the page */
1514         bh = tmp;
1515         do {
1516                 p = tmp;
1517                 tmp = tmp->b_this_page;
1518                 remove_from_queues(p);
1519                 p->b_dev = dev;
1520                 mark_buffer_uptodate(p, 0);
1521                 clear_bit(BH_Req, &p->b_state);
1522                 p->b_blocknr = starting_block++;
1523                 insert_into_queues(p);
1524         } while (tmp != bh);
1525         return 1;
1526 }
1527 
1528 /*
1529  * Try to find a free cluster by locating a page where
1530  * all of the buffers are unused.  We would like this function
1531  * to be atomic, so we do not call anything that might cause
1532  * the process to sleep.  The priority is somewhat similar to
1533  * the priority used in shrink_buffers.
1534  * 
1535  * My thinking is that the kernel should end up using whole
1536  * pages for the buffer cache as much of the time as possible.
1537  * This way the other buffers on a particular page are likely
1538  * to be very near each other on the free list, and we will not
1539  * be expiring data prematurely.  For now we only cannibalize buffers
1540  * of the same size to keep the code simpler.
1541  */
1542 static int reassign_cluster(kdev_t dev, 
     /* [previous][next][first][last][top][bottom][index][help] */
1543                      unsigned int starting_block, int size)
1544 {
1545         struct buffer_head *bh;
1546         int isize = BUFSIZE_INDEX(size);
1547         int i;
1548 
1549         /* We want to give ourselves a really good shot at generating
1550            a cluster, and since we only take buffers from the free
1551            list, we "overfill" it a little. */
1552 
1553         while(nr_free[isize] < 32) refill_freelist(size);
1554 
1555         bh = free_list[isize];
1556         if(bh)
1557                  for (i=0 ; !i || bh != free_list[isize] ; bh = bh->b_next_free, i++) {
1558                          if (!bh->b_this_page)  continue;
1559                          if (try_to_reassign(bh, &bh, dev, starting_block))
1560                                  return 4;
1561                  }
1562         return 0;
1563 }
1564 
1565 /* This function tries to generate a new cluster of buffers
1566  * from a new page in memory.  We should only do this if we have
1567  * not expanded the buffer cache to the maximum size that we allow.
1568  */
1569 static unsigned long try_to_generate_cluster(kdev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1570 {
1571         struct buffer_head * bh, * tmp, * arr[MAX_BUF_PER_PAGE];
1572         int isize = BUFSIZE_INDEX(size);
1573         unsigned long offset;
1574         unsigned long page;
1575         int nblock;
1576 
1577         page = get_free_page(GFP_NOBUFFER);
1578         if(!page) return 0;
1579 
1580         bh = create_buffers(page, size);
1581         if (!bh) {
1582                 free_page(page);
1583                 return 0;
1584         };
1585         nblock = block;
1586         for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
1587                 if (find_buffer(dev, nblock++, size))
1588                          goto not_aligned;
1589         }
1590         tmp = bh;
1591         nblock = 0;
1592         while (1) {
1593                 arr[nblock++] = bh;
1594                 bh->b_count = 1;
1595                 bh->b_flushtime = 0;
1596                 bh->b_state = 0;
1597                 bh->b_dev = dev;
1598                 bh->b_list = BUF_CLEAN;
1599                 bh->b_blocknr = block++;
1600                 nr_buffers++;
1601                 nr_buffers_size[isize]++;
1602                 insert_into_queues(bh);
1603                 if (bh->b_this_page)
1604                         bh = bh->b_this_page;
1605                 else
1606                         break;
1607         }
1608         buffermem += PAGE_SIZE;
1609         buffer_pages[MAP_NR(page)] = bh;
1610         bh->b_this_page = tmp;
1611         while (nblock-- > 0)
1612                 brelse(arr[nblock]);
1613         return 4; /* ?? */
1614 not_aligned:
1615         while ((tmp = bh) != NULL) {
1616                 bh = bh->b_this_page;
1617                 put_unused_buffer_head(tmp);
1618         }
1619         free_page(page);
1620         return 0;
1621 }
1622 
1623 unsigned long generate_cluster(kdev_t dev, int b[], int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1624 {
1625         int i, offset;
1626         
1627         for (i = 0, offset = 0 ; offset < PAGE_SIZE ; i++, offset += size) {
1628                 if(i && b[i]-1 != b[i-1]) return 0;  /* No need to cluster */
1629                 if(find_buffer(dev, b[i], size)) return 0;
1630         };
1631 
1632         /* OK, we have a candidate for a new cluster */
1633         
1634         /* See if one size of buffer is over-represented in the buffer cache,
1635            if so reduce the numbers of buffers */
1636         if(maybe_shrink_lav_buffers(size))
1637          {
1638                  int retval;
1639                  retval = try_to_generate_cluster(dev, b[0], size);
1640                  if(retval) return retval;
1641          };
1642         
1643         if (nr_free_pages > min_free_pages*2) 
1644                  return try_to_generate_cluster(dev, b[0], size);
1645         else
1646                  return reassign_cluster(dev, b[0], size);
1647 }
1648 
1649 
1650 /* ===================== Init ======================= */
1651 
1652 /*
1653  * This initializes the initial buffer free list.  nr_buffers_type is set
1654  * to one less the actual number of buffers, as a sop to backwards
1655  * compatibility --- the old code did this (I think unintentionally,
1656  * but I'm not sure), and programs in the ps package expect it.
1657  *                                      - TYT 8/30/92
1658  */
1659 void buffer_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1660 {
1661         int i;
1662         int isize = BUFSIZE_INDEX(BLOCK_SIZE);
1663         long memsize = MAP_NR(high_memory) << PAGE_SHIFT;
1664 
1665         if (memsize >= 4*1024*1024) {
1666                 if(memsize >= 16*1024*1024)
1667                          nr_hash = 16381;
1668                 else
1669                          nr_hash = 4093;
1670         } else {
1671                 nr_hash = 997;
1672         };
1673         
1674         hash_table = (struct buffer_head **) vmalloc(nr_hash * 
1675                                                      sizeof(struct buffer_head *));
1676 
1677 
1678         buffer_pages = (struct buffer_head **) vmalloc(MAP_NR(high_memory) * 
1679                                                      sizeof(struct buffer_head *));
1680         for (i = 0 ; i < MAP_NR(high_memory) ; i++)
1681                 buffer_pages[i] = NULL;
1682 
1683         for (i = 0 ; i < nr_hash ; i++)
1684                 hash_table[i] = NULL;
1685         lru_list[BUF_CLEAN] = 0;
1686         grow_buffers(GFP_KERNEL, BLOCK_SIZE);
1687         if (!free_list[isize])
1688                 panic("VFS: Unable to initialize buffer free list!");
1689         return;
1690 }
1691 
1692 
1693 /* ====================== bdflush support =================== */
1694 
1695 /* This is a simple kernel daemon, whose job it is to provide a dynamic
1696  * response to dirty buffers.  Once this process is activated, we write back
1697  * a limited number of buffers to the disks and then go back to sleep again.
1698  */
1699 struct wait_queue * bdflush_wait = NULL;
1700 struct wait_queue * bdflush_done = NULL;
1701 
1702 static void wakeup_bdflush(int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
1703 {
1704         wake_up(&bdflush_wait);
1705         if(wait) sleep_on(&bdflush_done);
1706 }
1707 
1708 
1709 /* 
1710  * Here we attempt to write back old buffers.  We also try and flush inodes 
1711  * and supers as well, since this function is essentially "update", and 
1712  * otherwise there would be no way of ensuring that these quantities ever 
1713  * get written back.  Ideally, we would have a timestamp on the inodes
1714  * and superblocks so that we could write back only the old ones as well
1715  */
1716 
1717 asmlinkage int sync_old_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1718 {
1719         int i, isize;
1720         int ndirty, nwritten;
1721         int nlist;
1722         int ncount;
1723         struct buffer_head * bh, *next;
1724 
1725         sync_supers(0);
1726         sync_inodes(0);
1727 
1728         ncount = 0;
1729 #ifdef DEBUG
1730         for(nlist = 0; nlist < NR_LIST; nlist++)
1731 #else
1732         for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1733 #endif
1734         {
1735                 ndirty = 0;
1736                 nwritten = 0;
1737         repeat:
1738                 bh = lru_list[nlist];
1739                 if(bh) 
1740                          for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) {
1741                                  /* We may have stalled while waiting for I/O to complete. */
1742                                  if(bh->b_list != nlist) goto repeat;
1743                                  next = bh->b_next_free;
1744                                  if(!lru_list[nlist]) {
1745                                          printk("Dirty list empty %d\n", i);
1746                                          break;
1747                                  }
1748                                  
1749                                  /* Clean buffer on dirty list?  Refile it */
1750                                  if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
1751                                   {
1752                                           refile_buffer(bh);
1753                                           continue;
1754                                   }
1755                                  
1756                                  if (buffer_locked(bh) || !buffer_dirty(bh))
1757                                           continue;
1758                                  ndirty++;
1759                                  if(bh->b_flushtime > jiffies) continue;
1760                                  nwritten++;
1761                                  bh->b_count++;
1762                                  bh->b_flushtime = 0;
1763 #ifdef DEBUG
1764                                  if(nlist != BUF_DIRTY) ncount++;
1765 #endif
1766                                  ll_rw_block(WRITE, 1, &bh);
1767                                  bh->b_count--;
1768                          }
1769         }
1770 #ifdef DEBUG
1771         if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount);
1772         printk("Wrote %d/%d buffers\n", nwritten, ndirty);
1773 #endif
1774         
1775         /* We assume that we only come through here on a regular
1776            schedule, like every 5 seconds.  Now update load averages.  
1777            Shift usage counts to prevent overflow. */
1778         for(isize = 0; isize<NR_SIZES; isize++){
1779                 CALC_LOAD(buffers_lav[isize], bdf_prm.b_un.lav_const, buffer_usage[isize]);
1780                 buffer_usage[isize] = 0;
1781         };
1782         return 0;
1783 }
1784 
1785 
1786 /* This is the interface to bdflush.  As we get more sophisticated, we can
1787  * pass tuning parameters to this "process", to adjust how it behaves. 
1788  * We would want to verify each parameter, however, to make sure that it 
1789  * is reasonable. */
1790 
1791 asmlinkage int sys_bdflush(int func, long data)
     /* [previous][next][first][last][top][bottom][index][help] */
1792 {
1793         int i, error;
1794 
1795         if (!suser())
1796                 return -EPERM;
1797 
1798         if (func == 1)
1799                  return sync_old_buffers();
1800 
1801         /* Basically func 1 means read param 1, 2 means write param 1, etc */
1802         if (func >= 2) {
1803                 i = (func-2) >> 1;
1804                 if (i < 0 || i >= N_PARAM)
1805                         return -EINVAL;
1806                 if((func & 1) == 0) {
1807                         error = verify_area(VERIFY_WRITE, (void *) data, sizeof(int));
1808                         if (error)
1809                                 return error;
1810                         put_user(bdf_prm.data[i], (int*)data);
1811                         return 0;
1812                 };
1813                 if (data < bdflush_min[i] || data > bdflush_max[i])
1814                         return -EINVAL;
1815                 bdf_prm.data[i] = data;
1816                 return 0;
1817         };
1818 
1819         /* Having func 0 used to launch the actual bdflush and then never
1820         return (unless explicitly killed). We return zero here to 
1821         remain semi-compatible with present update(8) programs. */
1822 
1823         return 0;
1824 }
1825 
1826 /* This is the actual bdflush daemon itself. It used to be started from
1827  * the syscall above, but now we launch it ourselves internally with
1828  * kernel_thread(...)  directly after the first thread in init/main.c */
1829 
1830 int bdflush(void * unused) 
     /* [previous][next][first][last][top][bottom][index][help] */
1831 {
1832         int i;
1833         int ndirty;
1834         int nlist;
1835         int ncount;
1836         struct buffer_head * bh, *next;
1837 
1838         /*
1839          *      We have a bare-bones task_struct, and really should fill
1840          *      in a few more things so "top" and /proc/2/{exe,root,cwd}
1841          *      display semi-sane things. Not real crucial though...  
1842          */
1843 
1844         current->session = 1;
1845         current->pgrp = 1;
1846         sprintf(current->comm, "kflushd");
1847 
1848         /*
1849          *      As a kernel thread we want to tamper with system buffers
1850          *      and other internals and thus be subject to the SMP locking
1851          *      rules. (On a uniprocessor box this does nothing).
1852          */
1853          
1854 #ifdef __SMP__
1855         lock_kernel();
1856         syscall_count++;
1857 #endif
1858                  
1859         for (;;) {
1860 #ifdef DEBUG
1861                 printk("bdflush() activated...");
1862 #endif
1863                 
1864                 ncount = 0;
1865 #ifdef DEBUG
1866                 for(nlist = 0; nlist < NR_LIST; nlist++)
1867 #else
1868                 for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1869 #endif
1870                  {
1871                          ndirty = 0;
1872                  repeat:
1873                          bh = lru_list[nlist];
1874                          if(bh) 
1875                                   for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty; 
1876                                        bh = next) {
1877                                           /* We may have stalled while waiting for I/O to complete. */
1878                                           if(bh->b_list != nlist) goto repeat;
1879                                           next = bh->b_next_free;
1880                                           if(!lru_list[nlist]) {
1881                                                   printk("Dirty list empty %d\n", i);
1882                                                   break;
1883                                           }
1884                                           
1885                                           /* Clean buffer on dirty list?  Refile it */
1886                                           if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh))
1887                                            {
1888                                                    refile_buffer(bh);
1889                                                    continue;
1890                                            }
1891                                           
1892                                           if (buffer_locked(bh) || !buffer_dirty(bh))
1893                                                    continue;
1894                                           /* Should we write back buffers that are shared or not??
1895                                              currently dirty buffers are not shared, so it does not matter */
1896                                           bh->b_count++;
1897                                           ndirty++;
1898                                           bh->b_flushtime = 0;
1899                                           ll_rw_block(WRITE, 1, &bh);
1900 #ifdef DEBUG
1901                                           if(nlist != BUF_DIRTY) ncount++;
1902 #endif
1903                                           bh->b_count--;
1904                                   }
1905                  }
1906 #ifdef DEBUG
1907                 if (ncount) printk("sys_bdflush: %d dirty buffers not on dirty list\n", ncount);
1908                 printk("sleeping again.\n");
1909 #endif
1910                 wake_up(&bdflush_done);
1911                 
1912                 /* If there are still a lot of dirty buffers around, skip the sleep
1913                    and flush some more */
1914                 
1915                 if(nr_buffers_type[BUF_DIRTY] <= (nr_buffers - nr_buffers_type[BUF_SHARED]) * 
1916                    bdf_prm.b_un.nfract/100) {
1917                         current->signal = 0;
1918                         interruptible_sleep_on(&bdflush_wait);
1919                 }
1920         }
1921 }
1922 
1923 
1924 /*
1925  * Overrides for Emacs so that we follow Linus's tabbing style.
1926  * Emacs will notice this stuff at the end of the file and automatically
1927  * adjust the settings for this buffer only.  This must remain at the end
1928  * of the file.
1929  * ---------------------------------------------------------------------------
1930  * Local variables:
1931  * c-indent-level: 8
1932  * c-brace-imaginary-offset: 0
1933  * c-brace-offset: -8
1934  * c-argdecl-indent: 8
1935  * c-label-offset: -8
1936  * c-continued-statement-offset: 8
1937  * c-continued-brace-offset: 0
1938  * End:
1939  */

/* [previous][next][first][last][top][bottom][index][help] */