root/fs/buffer.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. __wait_on_buffer
  2. sync_buffers
  3. sync_dev
  4. fsync_dev
  5. sys_sync
  6. file_fsync
  7. sys_fsync
  8. invalidate_buffers
  9. check_disk_change
  10. remove_from_hash_queue
  11. remove_from_lru_list
  12. remove_from_free_list
  13. remove_from_queues
  14. put_last_lru
  15. put_last_free
  16. insert_into_queues
  17. find_buffer
  18. get_hash_table
  19. set_blocksize
  20. refill_freelist
  21. getblk
  22. set_writetime
  23. refile_buffer
  24. brelse
  25. bread
  26. breada
  27. put_unused_buffer_head
  28. get_more_buffer_heads
  29. get_unused_buffer_head
  30. create_buffers
  31. read_buffers
  32. check_aligned
  33. try_to_load_aligned
  34. try_to_share_buffers
  35. bread_page
  36. grow_buffers
  37. try_to_free
  38. maybe_shrink_lav_buffers
  39. shrink_buffers
  40. shrink_specific_buffers
  41. show_buffers
  42. try_to_reassign
  43. reassign_cluster
  44. try_to_generate_cluster
  45. generate_cluster
  46. buffer_init
  47. wakeup_bdflush
  48. sync_old_buffers
  49. sys_bdflush

   1 /*
   2  *  linux/fs/buffer.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  *  'buffer.c' implements the buffer-cache functions. Race-conditions have
   9  * been avoided by NEVER letting an interrupt change a buffer (except for the
  10  * data, of course), but instead letting the caller do it.
  11  */
  12 
  13 /*
  14  * NOTE! There is one discordant note here: checking floppies for
  15  * disk change. This is where it fits best, I think, as it should
  16  * invalidate changed floppy-disk-caches.
  17  */
  18 
  19 #include <stdarg.h>
  20  
  21 #include <linux/config.h>
  22 #include <linux/errno.h>
  23 #include <linux/sched.h>
  24 #include <linux/kernel.h>
  25 #include <linux/major.h>
  26 #include <linux/string.h>
  27 #include <linux/locks.h>
  28 #include <linux/errno.h>
  29 #include <linux/malloc.h>
  30 
  31 #include <asm/system.h>
  32 #include <asm/segment.h>
  33 #include <asm/io.h>
  34 
  35 #ifdef CONFIG_SCSI
  36 #ifdef CONFIG_BLK_DEV_SR
  37 extern int check_cdrom_media_change(int, int);
  38 #endif
  39 #ifdef CONFIG_BLK_DEV_SD
  40 extern int check_scsidisk_media_change(int, int);
  41 extern int revalidate_scsidisk(int, int);
  42 #endif
  43 #endif
  44 #ifdef CONFIG_CDU31A
  45 extern int check_cdu31a_media_change(int, int);
  46 #endif
  47 #ifdef CONFIG_MCD
  48 extern int check_mcd_media_change(int, int);
  49 #endif
  50 
  51 #define NR_SIZES 4
  52 static char buffersize_index[9] = {-1,  0,  1, -1,  2, -1, -1, -1, 3};
  53 static short int bufferindex_size[NR_SIZES] = {512, 1024, 2048, 4096};
  54 
  55 #define BUFSIZE_INDEX(X) (buffersize_index[(X)>>9])
  56 
  57 static int grow_buffers(int pri, int size);
  58 static int shrink_specific_buffers(unsigned int priority, int size);
  59 static int maybe_shrink_lav_buffers(int);
  60 
  61 static int nr_hash = 0;  /* Size of hash table */
  62 static struct buffer_head ** hash_table;
  63 struct buffer_head ** buffer_pages;
  64 static struct buffer_head * lru_list[NR_LIST] = {NULL, };
  65 static struct buffer_head * free_list[NR_SIZES] = {NULL, };
  66 static struct buffer_head * unused_list = NULL;
  67 static struct wait_queue * buffer_wait = NULL;
  68 
  69 int nr_buffers = 0;
  70 int nr_buffers_type[NR_LIST] = {0,};
  71 int nr_buffers_size[NR_SIZES] = {0,};
  72 int nr_buffers_st[NR_SIZES][NR_LIST] = {{0,},};
  73 int buffer_usage[NR_SIZES] = {0,};  /* Usage counts used to determine load average */
  74 int buffers_lav[NR_SIZES] = {0,};  /* Load average of buffer usage */
  75 int nr_free[NR_SIZES] = {0,};
  76 int buffermem = 0;
  77 int nr_buffer_heads = 0;
  78 static int min_free_pages = 20; /* nr free pages needed before buffer grows */
  79 extern int *blksize_size[];
  80 
  81 /* Here is the parameter block for the bdflush process. */
  82 static void wakeup_bdflush(int);
  83 
  84 #define N_PARAM 9
  85 #define LAV
  86 
  87 static union bdflush_param{
  88         struct {
  89                 int nfract;  /* Percentage of buffer cache dirty to 
  90                                 activate bdflush */
  91                 int ndirty;  /* Maximum number of dirty blocks to write out per
  92                                 wake-cycle */
  93                 int nrefill; /* Number of clean buffers to try and obtain
  94                                 each time we call refill */
  95                 int nref_dirt; /* Dirty buffer threshold for activating bdflush
  96                                   when trying to refill buffers. */
  97                 int clu_nfract;  /* Percentage of buffer cache to scan to 
  98                                     search for free clusters */
  99                 int age_buffer;  /* Time for normal buffer to age before 
 100                                     we flush it */
 101                 int age_super;  /* Time for superblock to age before we 
 102                                    flush it */
 103                 int lav_const;  /* Constant used for load average (time
 104                                    constant */
 105                 int lav_ratio;  /* Used to determine how low a lav for a
 106                                    particular size can go before we start to
 107                                    trim back the buffers */
 108         } b_un;
 109         unsigned int data[N_PARAM];
 110 } bdf_prm = {{25, 500, 64, 256, 15, 3000, 500, 1884, 2}};
 111 
 112 /* The lav constant is set for 1 minute, as long as the update process runs
 113    every 5 seconds.  If you change the frequency of update, the time
 114    constant will also change. */
 115 
 116 
 117 /* These are the min and max parameter values that we will allow to be assigned */
 118 static int bdflush_min[N_PARAM] = {  0,  10,    5,   25,  0,   100,   100, 1, 1};
 119 static int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 60000, 60000, 2047, 5};
 120 
 121 /*
 122  * Rewrote the wait-routines to use the "new" wait-queue functionality,
 123  * and getting rid of the cli-sti pairs. The wait-queue routines still
 124  * need cli-sti, but now it's just a couple of 386 instructions or so.
 125  *
 126  * Note that the real wait_on_buffer() is an inline function that checks
 127  * if 'b_wait' is set before calling this, so that the queues aren't set
 128  * up unnecessarily.
 129  */
 130 void __wait_on_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 131 {
 132         struct wait_queue wait = { current, NULL };
 133 
 134         bh->b_count++;
 135         add_wait_queue(&bh->b_wait, &wait);
 136 repeat:
 137         current->state = TASK_UNINTERRUPTIBLE;
 138         if (bh->b_lock) {
 139                 schedule();
 140                 goto repeat;
 141         }
 142         remove_wait_queue(&bh->b_wait, &wait);
 143         bh->b_count--;
 144         current->state = TASK_RUNNING;
 145 }
 146 
 147 /* Call sync_buffers with wait!=0 to ensure that the call does not
 148    return until all buffer writes have completed.  Sync() may return
 149    before the writes have finished; fsync() may not. */
 150 
 151 
 152 /* Godamity-damn.  Some buffers (bitmaps for filesystems)
 153    spontaneously dirty themselves without ever brelse being called.
 154    We will ultimately want to put these in a separate list, but for
 155    now we search all of the lists for dirty buffers */
 156 
 157 static int sync_buffers(dev_t dev, int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 158 {
 159         int i, retry, pass = 0, err = 0;
 160         int nlist, ncount;
 161         struct buffer_head * bh, *next;
 162 
 163         /* One pass for no-wait, three for wait:
 164            0) write out all dirty, unlocked buffers;
 165            1) write out all dirty buffers, waiting if locked;
 166            2) wait for completion by waiting for all buffers to unlock. */
 167  repeat:
 168         retry = 0;
 169         ncount = 0;
 170         /* We search all lists as a failsafe mechanism, not because we expect
 171            there to be dirty buffers on any of the other lists. */
 172         for(nlist = 0; nlist < NR_LIST; nlist++)
 173          {
 174          repeat1:
 175                  bh = lru_list[nlist];
 176                  if(!bh) continue;
 177                  for (i = nr_buffers_type[nlist]*2 ; i-- > 0 ; bh = next) {
 178                          if(bh->b_list != nlist) goto repeat1;
 179                          next = bh->b_next_free;
 180                          if(!lru_list[nlist]) break;
 181                          if (dev && bh->b_dev != dev)
 182                                   continue;
 183                          if (bh->b_lock)
 184                           {
 185                                   /* Buffer is locked; skip it unless wait is
 186                                      requested AND pass > 0. */
 187                                   if (!wait || !pass) {
 188                                           retry = 1;
 189                                           continue;
 190                                   }
 191                                   wait_on_buffer (bh);
 192                           }
 193                          /* If an unlocked buffer is not uptodate, there has
 194                              been an IO error. Skip it. */
 195                          if (wait && bh->b_req && !bh->b_lock &&
 196                              !bh->b_dirt && !bh->b_uptodate)
 197                           {
 198                                   err = 1;
 199                                   printk("Weird - unlocked, clean and not uptodate buffer on %d list %d\n", nlist);
 200                                   continue;
 201                           }
 202                          /* Don't write clean buffers.  Don't write ANY buffers
 203                             on the third pass. */
 204                          if (!bh->b_dirt || pass>=2)
 205                                   continue;
 206                          bh->b_count++;
 207                          bh->b_flushtime = 0;
 208                          ll_rw_block(WRITE, 1, &bh);
 209 
 210                          if(nlist != BUF_DIRTY) { 
 211                                  printk("[%d %x %d] ", nlist, bh->b_dev, bh->b_blocknr);
 212                                  ncount++;
 213                          };
 214                          bh->b_count--;
 215                          retry = 1;
 216                  }
 217          }
 218         if (ncount) printk("sys_sync: %d dirty buffers not on dirty list\n", ncount);
 219         
 220         /* If we are waiting for the sync to succeed, and if any dirty
 221            blocks were written, then repeat; on the second pass, only
 222            wait for buffers being written (do not pass to write any
 223            more buffers on the second pass). */
 224         if (wait && retry && ++pass<=2)
 225                  goto repeat;
 226         return err;
 227 }
 228 
 229 void sync_dev(dev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 230 {
 231         sync_buffers(dev, 0);
 232         sync_supers(dev);
 233         sync_inodes(dev);
 234         sync_buffers(dev, 0);
 235 }
 236 
 237 int fsync_dev(dev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 238 {
 239         sync_buffers(dev, 0);
 240         sync_supers(dev);
 241         sync_inodes(dev);
 242         return sync_buffers(dev, 1);
 243 }
 244 
 245 asmlinkage int sys_sync(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 246 {
 247         sync_dev(0);
 248         return 0;
 249 }
 250 
 251 int file_fsync (struct inode *inode, struct file *filp)
     /* [previous][next][first][last][top][bottom][index][help] */
 252 {
 253         return fsync_dev(inode->i_dev);
 254 }
 255 
 256 asmlinkage int sys_fsync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 257 {
 258         struct file * file;
 259         struct inode * inode;
 260 
 261         if (fd>=NR_OPEN || !(file=current->filp[fd]) || !(inode=file->f_inode))
 262                 return -EBADF;
 263         if (!file->f_op || !file->f_op->fsync)
 264                 return -EINVAL;
 265         if (file->f_op->fsync(inode,file))
 266                 return -EIO;
 267         return 0;
 268 }
 269 
 270 void invalidate_buffers(dev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 271 {
 272         int i;
 273         int nlist;
 274         struct buffer_head * bh;
 275 
 276         for(nlist = 0; nlist < NR_LIST; nlist++) {
 277                 bh = lru_list[nlist];
 278                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; 
 279                      bh = bh->b_next_free) {
 280                         if (bh->b_dev != dev)
 281                                  continue;
 282                         wait_on_buffer(bh);
 283                         if (bh->b_dev == dev)
 284                                  bh->b_flushtime = bh->b_uptodate = 
 285                                           bh->b_dirt = bh->b_req = 0;
 286                 }
 287         }
 288 }
 289 
 290 /*
 291  * This routine checks whether a floppy has been changed, and
 292  * invalidates all buffer-cache-entries in that case. This
 293  * is a relatively slow routine, so we have to try to minimize using
 294  * it. Thus it is called only upon a 'mount' or 'open'. This
 295  * is the best way of combining speed and utility, I think.
 296  * People changing diskettes in the middle of an operation deserve
 297  * to loose :-)
 298  *
 299  * NOTE! Although currently this is only for floppies, the idea is
 300  * that any additional removable block-device will use this routine,
 301  * and that mount/open needn't know that floppies/whatever are
 302  * special.
 303  */
 304 void check_disk_change(dev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 305 {
 306         int i;
 307         struct buffer_head * bh;
 308 
 309         switch(MAJOR(dev)){
 310         case FLOPPY_MAJOR:
 311                 if (!(bh = getblk(dev,0,1024)))
 312                         return;
 313                 i = floppy_change(bh);
 314                 brelse(bh);
 315                 break;
 316 
 317 #if defined(CONFIG_BLK_DEV_SD) && defined(CONFIG_SCSI)
 318          case SCSI_DISK_MAJOR:
 319                 i = check_scsidisk_media_change(dev, 0);
 320                 break;
 321 #endif
 322 
 323 #if defined(CONFIG_BLK_DEV_SR) && defined(CONFIG_SCSI)
 324          case SCSI_CDROM_MAJOR:
 325                 i = check_cdrom_media_change(dev, 0);
 326                 break;
 327 #endif
 328 
 329 #if defined(CONFIG_CDU31A)
 330          case CDU31A_CDROM_MAJOR:
 331                 i = check_cdu31a_media_change(dev, 0);
 332                 break;
 333 #endif
 334 
 335 #if defined(CONFIG_MCD)
 336          case MITSUMI_CDROM_MAJOR:
 337                 i = check_mcd_media_change(dev, 0);
 338                 break;
 339 #endif
 340 
 341          default:
 342                 return;
 343         };
 344 
 345         if (!i) return;
 346 
 347         printk("VFS: Disk change detected on device %d/%d\n",
 348                                         MAJOR(dev), MINOR(dev));
 349         for (i=0 ; i<NR_SUPER ; i++)
 350                 if (super_blocks[i].s_dev == dev)
 351                         put_super(super_blocks[i].s_dev);
 352         invalidate_inodes(dev);
 353         invalidate_buffers(dev);
 354 
 355 #if defined(CONFIG_BLK_DEV_SD) && defined(CONFIG_SCSI)
 356 /* This is trickier for a removable hardisk, because we have to invalidate
 357    all of the partitions that lie on the disk. */
 358         if (MAJOR(dev) == SCSI_DISK_MAJOR)
 359                 revalidate_scsidisk(dev, 0);
 360 #endif
 361 }
 362 
 363 #define _hashfn(dev,block) (((unsigned)(dev^block))%nr_hash)
 364 #define hash(dev,block) hash_table[_hashfn(dev,block)]
 365 
 366 static inline void remove_from_hash_queue(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 367 {
 368         if (bh->b_next)
 369                 bh->b_next->b_prev = bh->b_prev;
 370         if (bh->b_prev)
 371                 bh->b_prev->b_next = bh->b_next;
 372         if (hash(bh->b_dev,bh->b_blocknr) == bh)
 373                 hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
 374         bh->b_next = bh->b_prev = NULL;
 375 }
 376 
 377 static inline void remove_from_lru_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 378 {
 379         if (!(bh->b_prev_free) || !(bh->b_next_free))
 380                 panic("VFS: LRU block list corrupted");
 381         if (bh->b_dev == 0xffff) panic("LRU list corrupted");
 382         bh->b_prev_free->b_next_free = bh->b_next_free;
 383         bh->b_next_free->b_prev_free = bh->b_prev_free;
 384 
 385         if (lru_list[bh->b_list] == bh)
 386                  lru_list[bh->b_list] = bh->b_next_free;
 387         if(lru_list[bh->b_list] == bh)
 388                  lru_list[bh->b_list] = NULL;
 389         bh->b_next_free = bh->b_prev_free = NULL;
 390 }
 391 
 392 static inline void remove_from_free_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 393 {
 394         int isize = BUFSIZE_INDEX(bh->b_size);
 395         if (!(bh->b_prev_free) || !(bh->b_next_free))
 396                 panic("VFS: Free block list corrupted");
 397         if(bh->b_dev != 0xffff) panic("Free list corrupted");
 398         if(!free_list[isize])
 399                  panic("Free list empty");
 400         nr_free[isize]--;
 401         if(bh->b_next_free == bh)
 402                  free_list[isize] = NULL;
 403         else {
 404                 bh->b_prev_free->b_next_free = bh->b_next_free;
 405                 bh->b_next_free->b_prev_free = bh->b_prev_free;
 406                 if (free_list[isize] == bh)
 407                          free_list[isize] = bh->b_next_free;
 408         };
 409         bh->b_next_free = bh->b_prev_free = NULL;
 410 }
 411 
 412 static inline void remove_from_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 413 {
 414         if(bh->b_dev == 0xffff) {
 415                 remove_from_free_list(bh); /* Free list entries should not be
 416                                               in the hash queue */
 417                 return;
 418         };
 419         nr_buffers_type[bh->b_list]--;
 420         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]--;
 421         remove_from_hash_queue(bh);
 422         remove_from_lru_list(bh);
 423 }
 424 
 425 static inline void put_last_lru(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 426 {
 427         if (!bh)
 428                 return;
 429         if (bh == lru_list[bh->b_list]) {
 430                 lru_list[bh->b_list] = bh->b_next_free;
 431                 return;
 432         }
 433         if(bh->b_dev == 0xffff) panic("Wrong block for lru list");
 434         remove_from_lru_list(bh);
 435 /* add to back of free list */
 436 
 437         if(!lru_list[bh->b_list]) {
 438                 lru_list[bh->b_list] = bh;
 439                 lru_list[bh->b_list]->b_prev_free = bh;
 440         };
 441 
 442         bh->b_next_free = lru_list[bh->b_list];
 443         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 444         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 445         lru_list[bh->b_list]->b_prev_free = bh;
 446 }
 447 
 448 static inline void put_last_free(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 449 {
 450         int isize;
 451         if (!bh)
 452                 return;
 453 
 454         isize = BUFSIZE_INDEX(bh->b_size);      
 455         bh->b_dev = 0xffff;  /* So it is obvious we are on the free list */
 456 /* add to back of free list */
 457 
 458         if(!free_list[isize]) {
 459                 free_list[isize] = bh;
 460                 bh->b_prev_free = bh;
 461         };
 462 
 463         nr_free[isize]++;
 464         bh->b_next_free = free_list[isize];
 465         bh->b_prev_free = free_list[isize]->b_prev_free;
 466         free_list[isize]->b_prev_free->b_next_free = bh;
 467         free_list[isize]->b_prev_free = bh;
 468 }
 469 
 470 static inline void insert_into_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 471 {
 472 /* put at end of free list */
 473 
 474         if(bh->b_dev == 0xffff) {
 475                 put_last_free(bh);
 476                 return;
 477         };
 478         if(!lru_list[bh->b_list]) {
 479                 lru_list[bh->b_list] = bh;
 480                 bh->b_prev_free = bh;
 481         };
 482         if (bh->b_next_free) panic("VFS: buffer LRU pointers corrupted");
 483         bh->b_next_free = lru_list[bh->b_list];
 484         bh->b_prev_free = lru_list[bh->b_list]->b_prev_free;
 485         lru_list[bh->b_list]->b_prev_free->b_next_free = bh;
 486         lru_list[bh->b_list]->b_prev_free = bh;
 487         nr_buffers_type[bh->b_list]++;
 488         nr_buffers_st[BUFSIZE_INDEX(bh->b_size)][bh->b_list]++;
 489 /* put the buffer in new hash-queue if it has a device */
 490         bh->b_prev = NULL;
 491         bh->b_next = NULL;
 492         if (!bh->b_dev)
 493                 return;
 494         bh->b_next = hash(bh->b_dev,bh->b_blocknr);
 495         hash(bh->b_dev,bh->b_blocknr) = bh;
 496         if (bh->b_next)
 497                 bh->b_next->b_prev = bh;
 498 }
 499 
 500 static struct buffer_head * find_buffer(dev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 501 {               
 502         struct buffer_head * tmp;
 503 
 504         for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
 505                 if (tmp->b_dev==dev && tmp->b_blocknr==block)
 506                         if (tmp->b_size == size)
 507                                 return tmp;
 508                         else {
 509                                 printk("VFS: Wrong blocksize on device %d/%d\n",
 510                                                         MAJOR(dev), MINOR(dev));
 511                                 return NULL;
 512                         }
 513         return NULL;
 514 }
 515 
 516 /*
 517  * Why like this, I hear you say... The reason is race-conditions.
 518  * As we don't lock buffers (unless we are readint them, that is),
 519  * something might happen to it while we sleep (ie a read-error
 520  * will force it bad). This shouldn't really happen currently, but
 521  * the code is ready.
 522  */
 523 struct buffer_head * get_hash_table(dev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 524 {
 525         struct buffer_head * bh;
 526 
 527         for (;;) {
 528                 if (!(bh=find_buffer(dev,block,size)))
 529                         return NULL;
 530                 bh->b_count++;
 531                 wait_on_buffer(bh);
 532                 if (bh->b_dev == dev && bh->b_blocknr == block && bh->b_size == size)
 533                         return bh;
 534                 bh->b_count--;
 535         }
 536 }
 537 
 538 void set_blocksize(dev_t dev, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 539 {
 540         int i, nlist;
 541         struct buffer_head * bh, *bhnext;
 542 
 543         if (!blksize_size[MAJOR(dev)])
 544                 return;
 545 
 546         switch(size) {
 547                 default: panic("Invalid blocksize passed to set_blocksize");
 548                 case 512: case 1024: case 2048: case 4096:;
 549         }
 550 
 551         if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
 552                 blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 553                 return;
 554         }
 555         if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
 556                 return;
 557         sync_buffers(dev, 2);
 558         blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 559 
 560   /* We need to be quite careful how we do this - we are moving entries
 561      around on the free list, and we can get in a loop if we are not careful.*/
 562 
 563         for(nlist = 0; nlist < NR_LIST; nlist++) {
 564                 bh = lru_list[nlist];
 565                 for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) {
 566                         if(!bh) break;
 567                         bhnext = bh->b_next_free; 
 568                         if (bh->b_dev != dev)
 569                                  continue;
 570                         if (bh->b_size == size)
 571                                  continue;
 572                         
 573                         wait_on_buffer(bh);
 574                         if (bh->b_dev == dev && bh->b_size != size) {
 575                                 bh->b_uptodate = bh->b_dirt = 
 576                                          bh->b_flushtime = 0;
 577                         };
 578                         remove_from_hash_queue(bh);
 579                 }
 580         }
 581 }
 582 
 583 #define BADNESS(bh) (((bh)->b_dirt<<1)+(bh)->b_lock)
 584 
 585 void refill_freelist(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 586 {
 587         struct buffer_head * bh, * tmp;
 588         struct buffer_head * candidate[NR_LIST];
 589         unsigned int best_time, winner;
 590         int isize = BUFSIZE_INDEX(size);
 591         int buffers[NR_LIST];
 592         int i;
 593         int needed;
 594 
 595         /* First see if we even need this.  Sometimes it is advantageous
 596          to request some blocks in a filesystem that we know that we will
 597          be needing ahead of time. */
 598 
 599         if(nr_free[isize] > 100) return 0;
 600 
 601         /* If there are too many dirty buffers, we wake up the update process
 602            now so as to ensure that there are still clean buffers available
 603            for user processes to use (and dirty) */
 604         
 605         /* We are going to try and locate this much memory */
 606         needed =bdf_prm.b_un.nrefill * size;  
 607 
 608         while (nr_free_pages > min_free_pages && needed > 0 &&
 609                grow_buffers(GFP_BUFFER, size)) {
 610                 needed -= PAGE_SIZE;
 611         }
 612 
 613         if(needed <= 0) return;
 614 
 615         /* See if there are too many buffers of a different size.
 616            If so, victimize them */
 617 
 618         while(maybe_shrink_lav_buffers(size))
 619          {
 620                  if(!grow_buffers(GFP_BUFFER, size)) break;
 621                  needed -= PAGE_SIZE;
 622                  if(needed <= 0) return;
 623          };
 624 
 625         /* OK, we cannot grow the buffer cache, now try and get some
 626            from the lru list */
 627 
 628         /* First set the candidate pointers to usable buffers.  This
 629            should be quick nearly all of the time. */
 630 
 631 repeat0:
 632         for(i=0; i<NR_LIST; i++){
 633                 if(i == BUF_DIRTY || i == BUF_SHARED || 
 634                    nr_buffers_type[i] == 0) {
 635                         candidate[i] = NULL;
 636                         buffers[i] = 0;
 637                         continue;
 638                 }
 639                 buffers[i] = nr_buffers_type[i];
 640                 for (bh = lru_list[i]; buffers[i] > 0; bh = tmp, buffers[i]--)
 641                  {
 642                          if(buffers[i] < 0) panic("Here is the problem");
 643                          tmp = bh->b_next_free;
 644                          if (!bh) break;
 645                          
 646                          if (mem_map[MAP_NR((unsigned long) bh->b_data)] != 1 ||
 647                              bh->b_dirt) {
 648                                  refile_buffer(bh);
 649                                  continue;
 650                          };
 651                          
 652                          if (bh->b_count || bh->b_size != size)
 653                                   continue;
 654                          
 655                          /* Buffers are written in the order they are placed 
 656                             on the locked list. If we encounter a locked
 657                             buffer here, this means that the rest of them
 658                             are also locked */
 659                          if(bh->b_lock && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 660                                  buffers[i] = 0;
 661                                  break;
 662                          }
 663                          
 664                          if (BADNESS(bh)) continue;
 665                          break;
 666                  };
 667                 if(!buffers[i]) candidate[i] = NULL; /* Nothing on this list */
 668                 else candidate[i] = bh;
 669                 if(candidate[i] && candidate[i]->b_count) panic("Here is the problem");
 670         }
 671         
 672  repeat:
 673         if(needed <= 0) return;
 674         
 675         /* Now see which candidate wins the election */
 676         
 677         winner = best_time = UINT_MAX;  
 678         for(i=0; i<NR_LIST; i++){
 679                 if(!candidate[i]) continue;
 680                 if(candidate[i]->b_lru_time < best_time){
 681                         best_time = candidate[i]->b_lru_time;
 682                         winner = i;
 683                 }
 684         }
 685         
 686         /* If we have a winner, use it, and then get a new candidate from that list */
 687         if(winner != UINT_MAX) {
 688                 i = winner;
 689                 bh = candidate[i];
 690                 candidate[i] = bh->b_next_free;
 691                 if(candidate[i] == bh) candidate[i] = NULL;  /* Got last one */
 692                 if (bh->b_count || bh->b_size != size)
 693                          panic("Busy buffer in candidate list\n");
 694                 if (mem_map[MAP_NR((unsigned long) bh->b_data)] != 1)
 695                          panic("Shared buffer in candidate list\n");
 696                 if (BADNESS(bh)) panic("Buffer in candidate list with BADNESS != 0\n");
 697                 
 698                 if(bh->b_dev == 0xffff) panic("Wrong list");
 699                 remove_from_queues(bh);
 700                 bh->b_dev = 0xffff;
 701                 put_last_free(bh);
 702                 needed -= bh->b_size;
 703                 buffers[i]--;
 704                 if(buffers[i] < 0) panic("Here is the problem");
 705                 
 706                 if(buffers[i] == 0) candidate[i] = NULL;
 707                 
 708                 /* Now all we need to do is advance the candidate pointer
 709                    from the winner list to the next usable buffer */
 710                 if(candidate[i] && buffers[i] > 0){
 711                         if(buffers[i] <= 0) panic("Here is another problem");
 712                         for (bh = candidate[i]; buffers[i] > 0; bh = tmp, buffers[i]--) {
 713                                 if(buffers[i] < 0) panic("Here is the problem");
 714                                 tmp = bh->b_next_free;
 715                                 if (!bh) break;
 716                                 
 717                                 if (mem_map[MAP_NR((unsigned long) bh->b_data)] != 1 ||
 718                                     bh->b_dirt) {
 719                                         refile_buffer(bh);
 720                                         continue;
 721                                 };
 722                                 
 723                                 if (bh->b_count || bh->b_size != size)
 724                                          continue;
 725                                 
 726                                 /* Buffers are written in the order they are
 727                                    placed on the locked list.  If we encounter
 728                                    a locked buffer here, this means that the
 729                                    rest of them are also locked */
 730                                 if(bh->b_lock && (i == BUF_LOCKED || i == BUF_LOCKED1)) {
 731                                         buffers[i] = 0;
 732                                         break;
 733                                 }
 734               
 735                                 if (BADNESS(bh)) continue;
 736                                 break;
 737                         };
 738                         if(!buffers[i]) candidate[i] = NULL; /* Nothing here */
 739                         else candidate[i] = bh;
 740                         if(candidate[i] && candidate[i]->b_count) 
 741                                  panic("Here is the problem");
 742                 }
 743                 
 744                 goto repeat;
 745         }
 746         
 747         if(needed <= 0) return;
 748         
 749         /* Too bad, that was not enough. Try a little harder to grow some. */
 750         
 751         if (nr_free_pages > 5) {
 752                 if (grow_buffers(GFP_BUFFER, size)) {
 753                         needed -= PAGE_SIZE;
 754                         goto repeat0;
 755                 };
 756         }
 757         
 758         /* and repeat until we find something good */
 759         if (!grow_buffers(GFP_ATOMIC, size))
 760                 wakeup_bdflush(1);
 761         needed -= PAGE_SIZE;
 762         goto repeat0;
 763 }
 764 
 765 /*
 766  * Ok, this is getblk, and it isn't very clear, again to hinder
 767  * race-conditions. Most of the code is seldom used, (ie repeating),
 768  * so it should be much more efficient than it looks.
 769  *
 770  * The algoritm is changed: hopefully better, and an elusive bug removed.
 771  *
 772  * 14.02.92: changed it to sync dirty buffers a bit: better performance
 773  * when the filesystem starts to get full of dirty blocks (I hope).
 774  */
 775 struct buffer_head * getblk(dev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 776 {
 777         struct buffer_head * bh;
 778         int isize = BUFSIZE_INDEX(size);
 779 
 780         /* Update this for the buffer size lav. */
 781         buffer_usage[isize]++;
 782 
 783         /* If there are too many dirty buffers, we wake up the update process
 784            now so as to ensure that there are still clean buffers available
 785            for user processes to use (and dirty) */
 786 repeat:
 787         bh = get_hash_table(dev, block, size);
 788         if (bh) {
 789                 if (bh->b_uptodate && !bh->b_dirt)
 790                          put_last_lru(bh);
 791                 if(!bh->b_dirt) bh->b_flushtime = 0;
 792                 return bh;
 793         }
 794 
 795         while(!free_list[isize]) refill_freelist(size);
 796         
 797         if (find_buffer(dev,block,size))
 798                  goto repeat;
 799 
 800         bh = free_list[isize];
 801         remove_from_free_list(bh);
 802 
 803 /* OK, FINALLY we know that this buffer is the only one of it's kind, */
 804 /* and that it's unused (b_count=0), unlocked (b_lock=0), and clean */
 805         bh->b_count=1;
 806         bh->b_dirt=0;
 807         bh->b_lock=0;
 808         bh->b_uptodate=0;
 809         bh->b_flushtime = 0;
 810         bh->b_req=0;
 811         bh->b_dev=dev;
 812         bh->b_blocknr=block;
 813         insert_into_queues(bh);
 814         return bh;
 815 }
 816 
 817 void set_writetime(struct buffer_head * buf, int flag)
     /* [previous][next][first][last][top][bottom][index][help] */
 818 {
 819         int newtime;
 820 
 821         if (buf->b_dirt){
 822                 /* Move buffer to dirty list if jiffies is clear */
 823                 newtime = jiffies + (flag ? bdf_prm.b_un.age_super : 
 824                                      bdf_prm.b_un.age_buffer);
 825                 if(!buf->b_flushtime || buf->b_flushtime > newtime)
 826                          buf->b_flushtime = newtime;
 827         } else {
 828                 buf->b_flushtime = 0;
 829         }
 830 }
 831 
 832 
 833 static char buffer_disposition[] = {BUF_CLEAN, BUF_SHARED, BUF_LOCKED, BUF_SHARED, 
 834                                       BUF_DIRTY, BUF_DIRTY, BUF_DIRTY, BUF_DIRTY};
 835 
 836 void refile_buffer(struct buffer_head * buf){
     /* [previous][next][first][last][top][bottom][index][help] */
 837         int i, dispose;
 838         i = 0;
 839         if(buf->b_dev == 0xffff) panic("Attempt to refile free buffer\n");
 840         if(mem_map[MAP_NR((unsigned long) buf->b_data)] != 1) i = 1;
 841         if(buf->b_lock) i |= 2;
 842         if(buf->b_dirt) i |= 4;
 843         dispose = buffer_disposition[i];
 844         if(buf->b_list == BUF_SHARED && dispose == BUF_CLEAN)
 845                  dispose = BUF_UNSHARED;
 846         if(dispose == -1) panic("Bad buffer settings (%d)\n", i);
 847         if(dispose == BUF_CLEAN) buf->b_lru_time = jiffies;
 848         if(dispose != buf->b_list)  {
 849                 if(dispose == BUF_DIRTY || dispose == BUF_UNSHARED)
 850                          buf->b_lru_time = jiffies;
 851                 if(dispose == BUF_LOCKED && 
 852                    (buf->b_flushtime - buf->b_lru_time) <= bdf_prm.b_un.age_super)
 853                          dispose = BUF_LOCKED1;
 854                 remove_from_queues(buf);
 855                 buf->b_list = dispose;
 856                 insert_into_queues(buf);
 857                 if(dispose == BUF_DIRTY && nr_buffers_type[BUF_DIRTY] > 
 858                    (nr_buffers - nr_buffers_type[BUF_SHARED]) *
 859                    bdf_prm.b_un.nfract/100)
 860                          wakeup_bdflush(0);
 861         }
 862 }
 863 
 864 void brelse(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 865 {
 866         if (!buf)
 867                 return;
 868         wait_on_buffer(buf);
 869 
 870         /* If dirty, mark the time this buffer should be written back */
 871         set_writetime(buf, 0);
 872         refile_buffer(buf);
 873 
 874         if (buf->b_count) {
 875                 if (--buf->b_count)
 876                         return;
 877                 wake_up(&buffer_wait);
 878                 return;
 879         }
 880         printk("VFS: brelse: Trying to free free buffer\n");
 881 }
 882 
 883 /*
 884  * bread() reads a specified block and returns the buffer that contains
 885  * it. It returns NULL if the block was unreadable.
 886  */
 887 struct buffer_head * bread(dev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 888 {
 889         struct buffer_head * bh;
 890 
 891         if (!(bh = getblk(dev, block, size))) {
 892                 printk("VFS: bread: READ error on device %d/%d\n",
 893                                                 MAJOR(dev), MINOR(dev));
 894                 return NULL;
 895         }
 896         if (bh->b_uptodate)
 897                 return bh;
 898         ll_rw_block(READ, 1, &bh);
 899         wait_on_buffer(bh);
 900         if (bh->b_uptodate)
 901                 return bh;
 902         brelse(bh);
 903         return NULL;
 904 }
 905 
 906 /*
 907  * Ok, breada can be used as bread, but additionally to mark other
 908  * blocks for reading as well. End the argument list with a negative
 909  * number.
 910  */
 911 
 912 #define NBUF 16
 913 
 914 struct buffer_head * breada(dev_t dev, int block, int bufsize,
     /* [previous][next][first][last][top][bottom][index][help] */
 915         unsigned int pos, unsigned int filesize)
 916 {
 917         struct buffer_head * bhlist[NBUF];
 918         unsigned int blocks;
 919         struct buffer_head * bh;
 920         int index;
 921         int i, j;
 922 
 923         if (pos >= filesize)
 924                 return NULL;
 925 
 926         if (block < 0 || !(bh = getblk(dev,block,bufsize)))
 927                 return NULL;
 928 
 929         index = BUFSIZE_INDEX(bh->b_size);
 930 
 931         if (bh->b_uptodate)
 932                 return bh;
 933 
 934         blocks = ((filesize & (bufsize - 1)) - (pos & (bufsize - 1))) >> (9+index);
 935 
 936         if (blocks > (read_ahead[MAJOR(dev)] >> index))
 937                 blocks = read_ahead[MAJOR(dev)] >> index;
 938         if (blocks > NBUF)
 939                 blocks = NBUF;
 940         
 941         bhlist[0] = bh;
 942         j = 1;
 943         for(i=1; i<blocks; i++) {
 944                 bh = getblk(dev,block+i,bufsize);
 945                 if (bh->b_uptodate) {
 946                         brelse(bh);
 947                         break;
 948                 }
 949                 bhlist[j++] = bh;
 950         }
 951 
 952         /* Request the read for these buffers, and then release them */
 953         ll_rw_block(READ, j, bhlist);
 954 
 955         for(i=1; i<j; i++)
 956                 brelse(bhlist[i]);
 957 
 958         /* Wait for this buffer, and then continue on */
 959         bh = bhlist[0];
 960         wait_on_buffer(bh);
 961         if (bh->b_uptodate)
 962                 return bh;
 963         brelse(bh);
 964         return NULL;
 965 }
 966 
 967 /*
 968  * See fs/inode.c for the weird use of volatile..
 969  */
 970 static void put_unused_buffer_head(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 971 {
 972         struct wait_queue * wait;
 973 
 974         wait = ((volatile struct buffer_head *) bh)->b_wait;
 975         memset((void *) bh,0,sizeof(*bh));
 976         ((volatile struct buffer_head *) bh)->b_wait = wait;
 977         bh->b_next_free = unused_list;
 978         unused_list = bh;
 979 }
 980 
 981 static void get_more_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 982 {
 983         int i;
 984         struct buffer_head * bh;
 985 
 986         if (unused_list)
 987                 return;
 988 
 989         if (!(bh = (struct buffer_head*) get_free_page(GFP_BUFFER)))
 990                 return;
 991 
 992         for (nr_buffer_heads+=i=PAGE_SIZE/sizeof*bh ; i>0; i--) {
 993                 bh->b_next_free = unused_list;  /* only make link */
 994                 unused_list = bh++;
 995         }
 996 }
 997 
 998 static struct buffer_head * get_unused_buffer_head(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 999 {
1000         struct buffer_head * bh;
1001 
1002         get_more_buffer_heads();
1003         if (!unused_list)
1004                 return NULL;
1005         bh = unused_list;
1006         unused_list = bh->b_next_free;
1007         bh->b_next_free = NULL;
1008         bh->b_data = NULL;
1009         bh->b_size = 0;
1010         bh->b_req = 0;
1011         return bh;
1012 }
1013 
1014 /*
1015  * Create the appropriate buffers when given a page for data area and
1016  * the size of each buffer.. Use the bh->b_this_page linked list to
1017  * follow the buffers created.  Return NULL if unable to create more
1018  * buffers.
1019  */
1020 static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
1021 {
1022         struct buffer_head *bh, *head;
1023         unsigned long offset;
1024 
1025         head = NULL;
1026         offset = PAGE_SIZE;
1027         while ((offset -= size) < PAGE_SIZE) {
1028                 bh = get_unused_buffer_head();
1029                 if (!bh)
1030                         goto no_grow;
1031                 bh->b_this_page = head;
1032                 head = bh;
1033                 bh->b_data = (char *) (page+offset);
1034                 bh->b_size = size;
1035                 bh->b_dev = 0xffff;  /* Flag as unused */
1036         }
1037         return head;
1038 /*
1039  * In case anything failed, we just free everything we got.
1040  */
1041 no_grow:
1042         bh = head;
1043         while (bh) {
1044                 head = bh;
1045                 bh = bh->b_this_page;
1046                 put_unused_buffer_head(head);
1047         }
1048         return NULL;
1049 }
1050 
1051 static void read_buffers(struct buffer_head * bh[], int nrbuf)
     /* [previous][next][first][last][top][bottom][index][help] */
1052 {
1053         int i;
1054         int bhnum = 0;
1055         struct buffer_head * bhr[8];
1056 
1057         for (i = 0 ; i < nrbuf ; i++) {
1058                 if (bh[i] && !bh[i]->b_uptodate)
1059                         bhr[bhnum++] = bh[i];
1060         }
1061         if (bhnum)
1062                 ll_rw_block(READ, bhnum, bhr);
1063         for (i = 0 ; i < nrbuf ; i++) {
1064                 if (bh[i]) {
1065                         wait_on_buffer(bh[i]);
1066                 }
1067         }
1068 }
1069 
1070 static unsigned long check_aligned(struct buffer_head * first, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
1071         dev_t dev, int *b, int size)
1072 {
1073         struct buffer_head * bh[8];
1074         unsigned long page;
1075         unsigned long offset;
1076         int block;
1077         int nrbuf;
1078 
1079         page = (unsigned long) first->b_data;
1080         if (page & ~PAGE_MASK) {
1081                 brelse(first);
1082                 return 0;
1083         }
1084         mem_map[MAP_NR(page)]++;
1085         bh[0] = first;
1086         nrbuf = 1;
1087         for (offset = size ; offset < PAGE_SIZE ; offset += size) {
1088                 block = *++b;
1089                 if (!block)
1090                         goto no_go;
1091                 first = get_hash_table(dev, block, size);
1092                 if (!first)
1093                         goto no_go;
1094                 bh[nrbuf++] = first;
1095                 if (page+offset != (unsigned long) first->b_data)
1096                         goto no_go;
1097         }
1098         read_buffers(bh,nrbuf);         /* make sure they are actually read correctly */
1099         while (nrbuf-- > 0)
1100                 brelse(bh[nrbuf]);
1101         free_page(address);
1102         ++current->min_flt;
1103         return page;
1104 no_go:
1105         while (nrbuf-- > 0)
1106                 brelse(bh[nrbuf]);
1107         free_page(page);
1108         return 0;
1109 }
1110 
1111 static unsigned long try_to_load_aligned(unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
1112         dev_t dev, int b[], int size)
1113 {
1114         struct buffer_head * bh, * tmp, * arr[8];
1115         unsigned long offset;
1116         int isize = BUFSIZE_INDEX(size);
1117         int * p;
1118         int block;
1119 
1120         bh = create_buffers(address, size);
1121         if (!bh)
1122                 return 0;
1123         /* do any of the buffers already exist? punt if so.. */
1124         p = b;
1125         for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
1126                 block = *(p++);
1127                 if (!block)
1128                         goto not_aligned;
1129                 if (find_buffer(dev, block, size))
1130                         goto not_aligned;
1131         }
1132         tmp = bh;
1133         p = b;
1134         block = 0;
1135         while (1) {
1136                 arr[block++] = bh;
1137                 bh->b_count = 1;
1138                 bh->b_dirt = 0;
1139                 bh->b_flushtime = 0;
1140                 bh->b_uptodate = 0;
1141                 bh->b_req = 0;
1142                 bh->b_dev = dev;
1143                 bh->b_blocknr = *(p++);
1144                 bh->b_list = BUF_CLEAN;
1145                 nr_buffers++;
1146                 nr_buffers_size[isize]++;
1147                 insert_into_queues(bh);
1148                 if (bh->b_this_page)
1149                         bh = bh->b_this_page;
1150                 else
1151                         break;
1152         }
1153         buffermem += PAGE_SIZE;
1154         bh->b_this_page = tmp;
1155         mem_map[MAP_NR(address)]++;
1156         buffer_pages[address >> PAGE_SHIFT] = bh;
1157         read_buffers(arr,block);
1158         while (block-- > 0)
1159                 brelse(arr[block]);
1160         ++current->maj_flt;
1161         return address;
1162 not_aligned:
1163         while ((tmp = bh) != NULL) {
1164                 bh = bh->b_this_page;
1165                 put_unused_buffer_head(tmp);
1166         }
1167         return 0;
1168 }
1169 
1170 /*
1171  * Try-to-share-buffers tries to minimize memory use by trying to keep
1172  * both code pages and the buffer area in the same page. This is done by
1173  * (a) checking if the buffers are already aligned correctly in memory and
1174  * (b) if none of the buffer heads are in memory at all, trying to load
1175  * them into memory the way we want them.
1176  *
1177  * This doesn't guarantee that the memory is shared, but should under most
1178  * circumstances work very well indeed (ie >90% sharing of code pages on
1179  * demand-loadable executables).
1180  */
1181 static inline unsigned long try_to_share_buffers(unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
1182         dev_t dev, int *b, int size)
1183 {
1184         struct buffer_head * bh;
1185         int block;
1186 
1187         block = b[0];
1188         if (!block)
1189                 return 0;
1190         bh = get_hash_table(dev, block, size);
1191         if (bh)
1192                 return check_aligned(bh, address, dev, b, size);
1193         return try_to_load_aligned(address, dev, b, size);
1194 }
1195 
1196 #define COPYBLK(size,from,to) \
1197 __asm__ __volatile__("rep ; movsl": \
1198         :"c" (((unsigned long) size) >> 2),"S" (from),"D" (to) \
1199         :"cx","di","si")
1200 
1201 /*
1202  * bread_page reads four buffers into memory at the desired address. It's
1203  * a function of its own, as there is some speed to be got by reading them
1204  * all at the same time, not waiting for one to be read, and then another
1205  * etc. This also allows us to optimize memory usage by sharing code pages
1206  * and filesystem buffers..
1207  */
1208 unsigned long bread_page(unsigned long address, dev_t dev, int b[], int size, int prot)
     /* [previous][next][first][last][top][bottom][index][help] */
1209 {
1210         struct buffer_head * bh[8];
1211         unsigned long where;
1212         int i, j;
1213 
1214         if (!(prot & PAGE_RW)) {
1215                 where = try_to_share_buffers(address,dev,b,size);
1216                 if (where)
1217                         return where;
1218         }
1219         ++current->maj_flt;
1220         for (i=0, j=0; j<PAGE_SIZE ; i++, j+= size) {
1221                 bh[i] = NULL;
1222                 if (b[i])
1223                         bh[i] = getblk(dev, b[i], size);
1224         }
1225         read_buffers(bh,i);
1226         where = address;
1227         for (i=0, j=0; j<PAGE_SIZE ; i++, j += size,address += size) {
1228                 if (bh[i]) {
1229                         if (bh[i]->b_uptodate)
1230                                 COPYBLK(size, (unsigned long) bh[i]->b_data,address);
1231                         brelse(bh[i]);
1232                 }
1233         }
1234         return where;
1235 }
1236 
1237 /*
1238  * Try to increase the number of buffers available: the size argument
1239  * is used to determine what kind of buffers we want.
1240  */
1241 static int grow_buffers(int pri, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1242 {
1243         unsigned long page;
1244         struct buffer_head *bh, *tmp;
1245         struct buffer_head * insert_point;
1246         int isize;
1247 
1248         if ((size & 511) || (size > PAGE_SIZE)) {
1249                 printk("VFS: grow_buffers: size = %d\n",size);
1250                 return 0;
1251         }
1252 
1253         isize = BUFSIZE_INDEX(size);
1254 
1255         if (!(page = __get_free_page(pri)))
1256                 return 0;
1257         bh = create_buffers(page, size);
1258         if (!bh) {
1259                 free_page(page);
1260                 return 0;
1261         }
1262 
1263         insert_point = free_list[isize];
1264 
1265         tmp = bh;
1266         while (1) {
1267                 nr_free[isize]++;
1268                 if (insert_point) {
1269                         tmp->b_next_free = insert_point->b_next_free;
1270                         tmp->b_prev_free = insert_point;
1271                         insert_point->b_next_free->b_prev_free = tmp;
1272                         insert_point->b_next_free = tmp;
1273                 } else {
1274                         tmp->b_prev_free = tmp;
1275                         tmp->b_next_free = tmp;
1276                 }
1277                 insert_point = tmp;
1278                 ++nr_buffers;
1279                 if (tmp->b_this_page)
1280                         tmp = tmp->b_this_page;
1281                 else
1282                         break;
1283         }
1284         free_list[isize] = bh;
1285         buffer_pages[page >> PAGE_SHIFT] = bh;
1286         tmp->b_this_page = bh;
1287         wake_up(&buffer_wait);
1288         buffermem += PAGE_SIZE;
1289         return 1;
1290 }
1291 
1292 /*
1293  * try_to_free() checks if all the buffers on this particular page
1294  * are unused, and free's the page if so.
1295  */
1296 static int try_to_free(struct buffer_head * bh, struct buffer_head ** bhp)
     /* [previous][next][first][last][top][bottom][index][help] */
1297 {
1298         unsigned long page;
1299         struct buffer_head * tmp, * p;
1300         int isize = BUFSIZE_INDEX(bh->b_size);
1301 
1302         *bhp = bh;
1303         page = (unsigned long) bh->b_data;
1304         page &= PAGE_MASK;
1305         tmp = bh;
1306         do {
1307                 if (!tmp)
1308                         return 0;
1309                 if (tmp->b_count || tmp->b_dirt || tmp->b_lock || tmp->b_wait)
1310                         return 0;
1311                 tmp = tmp->b_this_page;
1312         } while (tmp != bh);
1313         tmp = bh;
1314         do {
1315                 p = tmp;
1316                 tmp = tmp->b_this_page;
1317                 nr_buffers--;
1318                 nr_buffers_size[isize]--;
1319                 if (p == *bhp)
1320                   {
1321                     *bhp = p->b_prev_free;
1322                     if (p == *bhp) /* Was this the last in the list? */
1323                       *bhp = NULL;
1324                   }
1325                 remove_from_queues(p);
1326                 put_unused_buffer_head(p);
1327         } while (tmp != bh);
1328         buffermem -= PAGE_SIZE;
1329         buffer_pages[page >> PAGE_SHIFT] = NULL;
1330         free_page(page);
1331         return !mem_map[MAP_NR(page)];
1332 }
1333 
1334 
1335 /*
1336  * Consult the load average for buffers and decide whether or not
1337  * we should shrink the buffers of one size or not.  If we decide yes,
1338  * do it and return 1.  Else return 0.  Do not attempt to shrink size
1339  * that is specified.
1340  *
1341  * I would prefer not to use a load average, but the way things are now it
1342  * seems unavoidable.  The way to get rid of it would be to force clustering
1343  * universally, so that when we reclaim buffers we always reclaim an entire
1344  * page.  Doing this would mean that we all need to move towards QMAGIC.
1345  */
1346 
1347 static int maybe_shrink_lav_buffers(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1348 {          
1349         int nlist;
1350         int isize;
1351         int total_lav, total_n_buffers, n_sizes;
1352         
1353         /* Do not consider the shared buffers since they would not tend
1354            to have getblk called very often, and this would throw off
1355            the lav.  They are not easily reclaimable anyway (let the swapper
1356            make the first move). */
1357   
1358         total_lav = total_n_buffers = n_sizes = 0;
1359         for(nlist = 0; nlist < NR_SIZES; nlist++)
1360          {
1361                  total_lav += buffers_lav[nlist];
1362                  if(nr_buffers_size[nlist]) n_sizes++;
1363                  total_n_buffers += nr_buffers_size[nlist];
1364                  total_n_buffers -= nr_buffers_st[nlist][BUF_SHARED]; 
1365          }
1366         
1367         /* See if we have an excessive number of buffers of a particular
1368            size - if so, victimize that bunch. */
1369   
1370         isize = (size ? BUFSIZE_INDEX(size) : -1);
1371         
1372         if (n_sizes > 1)
1373                  for(nlist = 0; nlist < NR_SIZES; nlist++)
1374                   {
1375                           if(nlist == isize) continue;
1376                           if(nr_buffers_size[nlist] &&
1377                              bdf_prm.b_un.lav_const * buffers_lav[nlist]*total_n_buffers < 
1378                              total_lav * (nr_buffers_size[nlist] - nr_buffers_st[nlist][BUF_SHARED]))
1379                                    if(shrink_specific_buffers(6, bufferindex_size[nlist])) 
1380                                             return 1;
1381                   }
1382         return 0;
1383 }
1384 /*
1385  * Try to free up some pages by shrinking the buffer-cache
1386  *
1387  * Priority tells the routine how hard to try to shrink the
1388  * buffers: 3 means "don't bother too much", while a value
1389  * of 0 means "we'd better get some free pages now".
1390  */
1391 int shrink_buffers(unsigned int priority)
     /* [previous][next][first][last][top][bottom][index][help] */
1392 {
1393         if (priority < 2) {
1394                 sync_buffers(0,0);
1395         }
1396 
1397         if(priority == 2) wakeup_bdflush(1);
1398 
1399         if(maybe_shrink_lav_buffers(0)) return 1;
1400 
1401         /* No good candidate size - take any size we can find */
1402         return shrink_specific_buffers(priority, 0);
1403 }
1404 
1405 static int shrink_specific_buffers(unsigned int priority, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1406 {
1407         struct buffer_head *bh;
1408         int nlist;
1409         int i, isize, isize1;
1410 
1411 #ifdef DEBUG
1412         if(size) printk("Shrinking buffers of size %d\n", size);
1413 #endif
1414         /* First try the free lists, and see if we can get a complete page
1415            from here */
1416         isize1 = (size ? BUFSIZE_INDEX(size) : -1);
1417 
1418         for(isize = 0; isize<NR_SIZES; isize++){
1419                 if(isize1 != -1 && isize1 != isize) continue;
1420                 bh = free_list[isize];
1421                 if(!bh) continue;
1422                 for (i=0 ; !i || bh != free_list[isize]; bh = bh->b_next_free, i++) {
1423                         if (bh->b_count || !bh->b_this_page)
1424                                  continue;
1425                         if (try_to_free(bh, &bh))
1426                                  return 1;
1427                         if(!bh) break; /* Some interrupt must have used it after we
1428                                           freed the page.  No big deal - keep looking */
1429                 }
1430         }
1431         
1432         /* Not enough in the free lists, now try the lru list */
1433         
1434         for(nlist = 0; nlist < NR_LIST; nlist++) {
1435         repeat1:
1436                 if(priority > 3 && nlist == BUF_SHARED) continue;
1437                 bh = lru_list[nlist];
1438                 if(!bh) continue;
1439                 i = nr_buffers_type[nlist] >> priority;
1440                 for ( ; i-- > 0 ; bh = bh->b_next_free) {
1441                         /* We may have stalled while waiting for I/O to complete. */
1442                         if(bh->b_list != nlist) goto repeat1;
1443                         if (bh->b_count || !bh->b_this_page)
1444                                  continue;
1445                         if(size && bh->b_size != size) continue;
1446                         if (bh->b_lock)
1447                                  if (priority)
1448                                           continue;
1449                                  else
1450                                           wait_on_buffer(bh);
1451                         if (bh->b_dirt) {
1452                                 bh->b_count++;
1453                                 bh->b_flushtime = 0;
1454                                 ll_rw_block(WRITEA, 1, &bh);
1455                                 bh->b_count--;
1456                                 continue;
1457                         }
1458                         if (try_to_free(bh, &bh))
1459                                  return 1;
1460                         if(!bh) break;
1461                 }
1462         }
1463         return 0;
1464 }
1465 
1466 
1467 void show_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1468 {
1469         struct buffer_head * bh;
1470         int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
1471         int shared;
1472         int nlist, isize;
1473 
1474         printk("Buffer memory:   %6dkB\n",buffermem>>10);
1475         printk("Buffer heads:    %6d\n",nr_buffer_heads);
1476         printk("Buffer blocks:   %6d\n",nr_buffers);
1477 
1478         for(nlist = 0; nlist < NR_LIST; nlist++) {
1479           shared = found = locked = dirty = used = lastused = 0;
1480           bh = lru_list[nlist];
1481           if(!bh) continue;
1482           do {
1483                 found++;
1484                 if (bh->b_lock)
1485                         locked++;
1486                 if (bh->b_dirt)
1487                         dirty++;
1488                 if(mem_map[MAP_NR(((unsigned long) bh->b_data))] !=1) shared++;
1489                 if (bh->b_count)
1490                         used++, lastused = found;
1491                 bh = bh->b_next_free;
1492               } while (bh != lru_list[nlist]);
1493         printk("Buffer[%d] mem: %d buffers, %d used (last=%d), %d locked, %d dirty %d shrd\n",
1494                 nlist, found, used, lastused, locked, dirty, shared);
1495         };
1496         printk("Size    [LAV]     Free  Clean  Unshar     Lck    Lck1   Dirty  Shared\n");
1497         for(isize = 0; isize<NR_SIZES; isize++){
1498                 printk("%5d [%5d]: %7d ", bufferindex_size[isize],
1499                        buffers_lav[isize], nr_free[isize]);
1500                 for(nlist = 0; nlist < NR_LIST; nlist++)
1501                          printk("%7d ", nr_buffers_st[isize][nlist]);
1502                 printk("\n");
1503         }
1504 }
1505 
1506 /*
1507  * try_to_reassign() checks if all the buffers on this particular page
1508  * are unused, and reassign to a new cluster them if this is true.
1509  */
1510 static inline int try_to_reassign(struct buffer_head * bh, struct buffer_head ** bhp,
     /* [previous][next][first][last][top][bottom][index][help] */
1511                            dev_t dev, unsigned int starting_block)
1512 {
1513         unsigned long page;
1514         struct buffer_head * tmp, * p;
1515 
1516         *bhp = bh;
1517         page = (unsigned long) bh->b_data;
1518         page &= PAGE_MASK;
1519         if(mem_map[MAP_NR(page)] != 1) return 0;
1520         tmp = bh;
1521         do {
1522                 if (!tmp)
1523                          return 0;
1524                 
1525                 if (tmp->b_count || tmp->b_dirt || tmp->b_lock)
1526                          return 0;
1527                 tmp = tmp->b_this_page;
1528         } while (tmp != bh);
1529         tmp = bh;
1530         
1531         while((unsigned int) tmp->b_data & (PAGE_SIZE - 1)) 
1532                  tmp = tmp->b_this_page;
1533         
1534         /* This is the buffer at the head of the page */
1535         bh = tmp;
1536         do {
1537                 p = tmp;
1538                 tmp = tmp->b_this_page;
1539                 remove_from_queues(p);
1540                 p->b_dev=dev;
1541                 p->b_uptodate = 0;
1542                 p->b_req = 0;
1543                 p->b_blocknr=starting_block++;
1544                 insert_into_queues(p);
1545         } while (tmp != bh);
1546         return 1;
1547 }
1548 
1549 /*
1550  * Try to find a free cluster by locating a page where
1551  * all of the buffers are unused.  We would like this function
1552  * to be atomic, so we do not call anything that might cause
1553  * the process to sleep.  The priority is somewhat similar to
1554  * the priority used in shrink_buffers.
1555  * 
1556  * My thinking is that the kernel should end up using whole
1557  * pages for the buffer cache as much of the time as possible.
1558  * This way the other buffers on a particular page are likely
1559  * to be very near each other on the free list, and we will not
1560  * be expiring data prematurely.  For now we only canibalize buffers
1561  * of the same size to keep the code simpler.
1562  */
1563 static int reassign_cluster(dev_t dev, 
     /* [previous][next][first][last][top][bottom][index][help] */
1564                      unsigned int starting_block, int size)
1565 {
1566         struct buffer_head *bh;
1567         int isize = BUFSIZE_INDEX(size);
1568         int i;
1569 
1570         /* We want to give ourselves a really good shot at generating
1571            a cluster, and since we only take buffers from the free
1572            list, we "overfill" it a little. */
1573 
1574         while(nr_free[isize] < 32) refill_freelist(size);
1575 
1576         bh = free_list[isize];
1577         if(bh)
1578                  for (i=0 ; !i || bh != free_list[isize] ; bh = bh->b_next_free, i++) {
1579                          if (!bh->b_this_page)  continue;
1580                          if (try_to_reassign(bh, &bh, dev, starting_block))
1581                                  return 4;
1582                  }
1583         return 0;
1584 }
1585 
1586 /* This function tries to generate a new cluster of buffers
1587  * from a new page in memory.  We should only do this if we have
1588  * not expanded the buffer cache to the maximum size that we allow.
1589  */
1590 static unsigned long try_to_generate_cluster(dev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1591 {
1592         struct buffer_head * bh, * tmp, * arr[8];
1593         int isize = BUFSIZE_INDEX(size);
1594         unsigned long offset;
1595         unsigned long page;
1596         int nblock;
1597 
1598         page = get_free_page(GFP_NOBUFFER);
1599         if(!page) return 0;
1600 
1601         bh = create_buffers(page, size);
1602         if (!bh) {
1603                 free_page(page);
1604                 return 0;
1605         };
1606         nblock = block;
1607         for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
1608                 if (find_buffer(dev, nblock++, size))
1609                          goto not_aligned;
1610         }
1611         tmp = bh;
1612         nblock = 0;
1613         while (1) {
1614                 arr[nblock++] = bh;
1615                 bh->b_count = 1;
1616                 bh->b_dirt = 0;
1617                 bh->b_flushtime = 0;
1618                 bh->b_lock = 0;
1619                 bh->b_uptodate = 0;
1620                 bh->b_req = 0;
1621                 bh->b_dev = dev;
1622                 bh->b_list = BUF_CLEAN;
1623                 bh->b_blocknr = block++;
1624                 nr_buffers++;
1625                 nr_buffers_size[isize]++;
1626                 insert_into_queues(bh);
1627                 if (bh->b_this_page)
1628                         bh = bh->b_this_page;
1629                 else
1630                         break;
1631         }
1632         buffermem += PAGE_SIZE;
1633         buffer_pages[page >> PAGE_SHIFT] = bh;
1634         bh->b_this_page = tmp;
1635         while (nblock-- > 0)
1636                 brelse(arr[nblock]);
1637         return 4;
1638 not_aligned:
1639         while ((tmp = bh) != NULL) {
1640                 bh = bh->b_this_page;
1641                 put_unused_buffer_head(tmp);
1642         }
1643         free_page(page);
1644         return 0;
1645 }
1646 
1647 unsigned long generate_cluster(dev_t dev, int b[], int size)
     /* [previous][next][first][last][top][bottom][index][help] */
1648 {
1649         int i, offset;
1650         
1651         for (i = 0, offset = 0 ; offset < PAGE_SIZE ; i++, offset += size) {
1652                 if(i && b[i]-1 != b[i-1]) return 0;  /* No need to cluster */
1653                 if(find_buffer(dev, b[i], size)) return 0;
1654         };
1655 
1656         /* OK, we have a candidate for a new cluster */
1657         
1658         /* See if one size of buffer is over-represented in the buffer cache,
1659            if so reduce the numbers of buffers */
1660         if(maybe_shrink_lav_buffers(size))
1661          {
1662                  int retval;
1663                  retval = try_to_generate_cluster(dev, b[0], size);
1664                  if(retval) return retval;
1665          };
1666         
1667         if (nr_free_pages > min_free_pages) 
1668                  return try_to_generate_cluster(dev, b[0], size);
1669         else
1670                  return reassign_cluster(dev, b[0], size);
1671 }
1672 
1673 /*
1674  * This initializes the initial buffer free list.  nr_buffers_type is set
1675  * to one less the actual number of buffers, as a sop to backwards
1676  * compatibility --- the old code did this (I think unintentionally,
1677  * but I'm not sure), and programs in the ps package expect it.
1678  *                                      - TYT 8/30/92
1679  */
1680 void buffer_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1681 {
1682         int i;
1683         int isize = BUFSIZE_INDEX(BLOCK_SIZE);
1684 
1685         if (high_memory >= 4*1024*1024) {
1686                 min_free_pages = 200;
1687                 if(high_memory >= 16*1024*1024)
1688                          nr_hash = 16381;
1689                 else
1690                          nr_hash = 4093;
1691         } else {
1692                 min_free_pages = 20;
1693                 nr_hash = 997;
1694         };
1695         
1696         hash_table = (struct buffer_head **) vmalloc(nr_hash * 
1697                                                      sizeof(struct buffer_head *));
1698 
1699 
1700         buffer_pages = (struct buffer_head **) vmalloc((high_memory >>PAGE_SHIFT) * 
1701                                                      sizeof(struct buffer_head *));
1702         for (i = 0 ; i < high_memory >> PAGE_SHIFT ; i++)
1703                 buffer_pages[i] = NULL;
1704 
1705         for (i = 0 ; i < nr_hash ; i++)
1706                 hash_table[i] = NULL;
1707         lru_list[BUF_CLEAN] = 0;
1708         grow_buffers(GFP_KERNEL, BLOCK_SIZE);
1709         if (!free_list[isize])
1710                 panic("VFS: Unable to initialize buffer free list!");
1711         return;
1712 }
1713 
1714 /* This is a simple kernel daemon, whose job it is to provide a dynamicly
1715  * response to dirty buffers.  Once this process is activated, we write back
1716  * a limited number of buffers to the disks and then go back to sleep again.
1717  * In effect this is a process which never leaves kernel mode, and does not have
1718  * any user memory associated with it except for the stack.  There is also
1719  * a kernel stack page, which obviously must be separate from the user stack.
1720  */
1721 struct wait_queue * bdflush_wait = NULL;
1722 struct wait_queue * bdflush_done = NULL;
1723 
1724 static int bdflush_running = 0;
1725 
1726 static void wakeup_bdflush(int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
1727 {
1728         if(!bdflush_running){
1729                 printk("Warning - bdflush not running\n");
1730                 sync_buffers(0,0);
1731                 return;
1732         };
1733         wake_up(&bdflush_wait);
1734         if(wait) sleep_on(&bdflush_done);
1735 }
1736 
1737 
1738 
1739 /* 
1740  * Here we attempt to write back old buffers.  We also try and flush indoes 
1741  * and supers as well, since this function is essentially "update", and 
1742  * otherwise there would be no way of ensuring that these quantities ever 
1743  * get written back.  Ideally, we would have a timestamp on the inodes
1744  * and superblocks so that we could write back only the old ones as well
1745  */
1746 
1747 asmlinkage int sync_old_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1748 {
1749         int i, isize;
1750         int ndirty, nwritten;
1751         int nlist;
1752         int ncount;
1753         struct buffer_head * bh, *next;
1754 
1755         sync_supers(0);
1756         sync_inodes(0);
1757 
1758         ncount = 0;
1759 #ifdef DEBUG
1760         for(nlist = 0; nlist < NR_LIST; nlist++)
1761 #else
1762         for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1763 #endif
1764         {
1765                 ndirty = 0;
1766                 nwritten = 0;
1767         repeat:
1768                 bh = lru_list[nlist];
1769                 if(bh) 
1770                          for (i = nr_buffers_type[nlist]; --i > 0; bh = next) {
1771                                  /* We may have stalled while waiting for I/O to complete. */
1772                                  if(bh->b_list != nlist) goto repeat;
1773                                  next = bh->b_next_free;
1774                                  if(!lru_list[nlist]) {
1775                                          printk("Dirty list empty %d\n", i);
1776                                          break;
1777                                  }
1778                                  
1779                                  /* Clean buffer on dirty list?  Refile it */
1780                                  if (nlist == BUF_DIRTY && !bh->b_dirt && !bh->b_lock)
1781                                   {
1782                                           refile_buffer(bh);
1783                                           continue;
1784                                   }
1785                                  
1786                                  if (bh->b_lock || !bh->b_dirt)
1787                                           continue;
1788                                  ndirty++;
1789                                  if(bh->b_flushtime > jiffies) continue;
1790                                  nwritten++;
1791                                  bh->b_count++;
1792                                  bh->b_flushtime = 0;
1793 #ifdef DEBUG
1794                                  if(nlist != BUF_DIRTY) ncount++;
1795 #endif
1796                                  ll_rw_block(WRITE, 1, &bh);
1797                                  bh->b_count--;
1798                          }
1799         }
1800 #ifdef DEBUG
1801         if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount);
1802         printk("Wrote %d/%d buffers\n", nwritten, ndirty);
1803 #endif
1804         
1805         /* We assume that we only come through here on a regular
1806            schedule, like every 5 seconds.  Now update load averages.  
1807            Shift usage counts to prevent overflow. */
1808         for(isize = 0; isize<NR_SIZES; isize++){
1809                 CALC_LOAD(buffers_lav[isize], bdf_prm.b_un.lav_const, buffer_usage[isize]);
1810                 buffer_usage[isize] = 0;
1811         };
1812         return 0;
1813 }
1814 
1815 
1816 /* This is the interface to bdflush.  As we get more sophisticated, we can
1817  * pass tuning parameters to this "process", to adjust how it behaves.  If you
1818  * invoke this again after you have done this once, you would simply modify 
1819  * the tuning parameters.  We would want to verify each parameter, however,
1820  * to make sure that it is reasonable. */
1821 
1822 asmlinkage int sys_bdflush(int func, int data)
     /* [previous][next][first][last][top][bottom][index][help] */
1823 {
1824         int i, error;
1825         int ndirty;
1826         int nlist;
1827         int ncount;
1828         struct buffer_head * bh, *next;
1829 
1830         if(!suser()) return -EPERM;
1831 
1832         if(func == 1)
1833                  return sync_old_buffers();
1834 
1835         /* Basically func 0 means start, 1 means read param 1, 2 means write param 1, etc */
1836         if(func >= 2){
1837                 i = (func-2) >> 1;
1838                 if (i < 0 || i >= N_PARAM) return -EINVAL;
1839                 if((func & 1) == 0) {
1840                         error = verify_area(VERIFY_WRITE, (void *) data, sizeof(int));
1841                         if(error) return error;
1842                         put_fs_long(bdf_prm.data[i], data);
1843                         return 0;
1844                 };
1845                 if(data < bdflush_min[i] || data > bdflush_max[i]) return -EINVAL;
1846                 bdf_prm.data[i] = data;
1847                 return 0;
1848         };
1849         
1850         if(bdflush_running++) return -EBUSY; /* Only one copy of this running at one time */
1851         
1852         /* OK, from here on is the daemon */
1853         
1854         while(1==1){
1855 #ifdef DEBUG
1856                 printk("bdflush() activated...");
1857 #endif
1858                 
1859                 ncount = 0;
1860 #ifdef DEBUG
1861                 for(nlist = 0; nlist < NR_LIST; nlist++)
1862 #else
1863                 for(nlist = BUF_DIRTY; nlist <= BUF_DIRTY; nlist++)
1864 #endif
1865                  {
1866                          ndirty = 0;
1867                  repeat:
1868                          bh = lru_list[nlist];
1869                          if(bh) 
1870                                   for (i = nr_buffers_type[nlist]; --i > 0 && ndirty < bdf_prm.b_un.ndirty; 
1871                                        bh = next) {
1872                                           /* We may have stalled while waiting for I/O to complete. */
1873                                           if(bh->b_list != nlist) goto repeat;
1874                                           next = bh->b_next_free;
1875                                           if(!lru_list[nlist]) {
1876                                                   printk("Dirty list empty %d\n", i);
1877                                                   break;
1878                                           }
1879                                           
1880                                           /* Clean buffer on dirty list?  Refile it */
1881                                           if (nlist == BUF_DIRTY && !bh->b_dirt && !bh->b_lock)
1882                                            {
1883                                                    refile_buffer(bh);
1884                                                    continue;
1885                                            }
1886                                           
1887                                           if (bh->b_lock || !bh->b_dirt)
1888                                                    continue;
1889                                           /* Should we write back buffers that are shared or not??
1890                                              currently dirty buffers are not shared, so it does not matter */
1891                                           bh->b_count++;
1892                                           ndirty++;
1893                                           bh->b_flushtime = 0;
1894                                           ll_rw_block(WRITE, 1, &bh);
1895 #ifdef DEBUG
1896                                           if(nlist != BUF_DIRTY) ncount++;
1897 #endif
1898                                           bh->b_count--;
1899                                   }
1900                  }
1901 #ifdef DEBUG
1902                 if (ncount) printk("sys_bdflush: %d dirty buffers not on dirty list\n", ncount);
1903                 printk("sleeping again.\n");
1904 #endif
1905                 wake_up(&bdflush_done);
1906                 
1907                 /* If there are still a lot of dirty buffers around, skip the sleep
1908                    and flush some more */
1909                 
1910                 if(nr_buffers_type[BUF_DIRTY] < (nr_buffers - nr_buffers_type[BUF_SHARED]) * 
1911                    bdf_prm.b_un.nfract/100) {
1912                         interruptible_sleep_on(&bdflush_wait);
1913                 }
1914         }
1915 }
1916 
1917 
1918 /*
1919  * Overrides for Emacs so that we follow Linus's tabbing style.
1920  * Emacs will notice this stuff at the end of the file and automatically
1921  * adjust the settings for this buffer only.  This must remain at the end
1922  * of the file.
1923  * ---------------------------------------------------------------------------
1924  * Local variables:
1925  * c-indent-level: 8
1926  * c-brace-imaginary-offset: 0
1927  * c-brace-offset: -8
1928  * c-argdecl-indent: 8
1929  * c-label-offset: -8
1930  * c-continued-statement-offset: 8
1931  * c-continued-brace-offset: 0
1932  * End:
1933  */

/* [previous][next][first][last][top][bottom][index][help] */