root/fs/buffer.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. __wait_on_buffer
  2. sync_buffers
  3. sync_dev
  4. fsync_dev
  5. sys_sync
  6. file_fsync
  7. sys_fsync
  8. invalidate_buffers
  9. check_disk_change
  10. remove_from_hash_queue
  11. remove_from_free_list
  12. remove_from_queues
  13. put_first_free
  14. put_last_free
  15. insert_into_queues
  16. find_buffer
  17. get_hash_table
  18. set_blocksize
  19. getblk
  20. brelse
  21. bread
  22. breada
  23. put_unused_buffer_head
  24. get_more_buffer_heads
  25. get_unused_buffer_head
  26. create_buffers
  27. read_buffers
  28. check_aligned
  29. try_to_load_aligned
  30. try_to_share_buffers
  31. bread_page
  32. grow_buffers
  33. try_to_free
  34. shrink_buffers
  35. show_buffers
  36. buffer_init

   1 /*
   2  *  linux/fs/buffer.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  *  'buffer.c' implements the buffer-cache functions. Race-conditions have
   9  * been avoided by NEVER letting an interrupt change a buffer (except for the
  10  * data, of course), but instead letting the caller do it.
  11  */
  12 
  13 /*
  14  * NOTE! There is one discordant note here: checking floppies for
  15  * disk change. This is where it fits best, I think, as it should
  16  * invalidate changed floppy-disk-caches.
  17  */
  18 
  19 #include <stdarg.h>
  20  
  21 #include <linux/config.h>
  22 #include <linux/errno.h>
  23 #include <linux/sched.h>
  24 #include <linux/kernel.h>
  25 #include <linux/major.h>
  26 #include <linux/string.h>
  27 #include <linux/locks.h>
  28 #include <linux/errno.h>
  29 
  30 #include <asm/system.h>
  31 #include <asm/io.h>
  32 
  33 #ifdef CONFIG_SCSI
  34 #ifdef CONFIG_BLK_DEV_SR
  35 extern int check_cdrom_media_change(int, int);
  36 #endif
  37 #ifdef CONFIG_BLK_DEV_SD
  38 extern int check_scsidisk_media_change(int, int);
  39 extern int revalidate_scsidisk(int, int);
  40 #endif
  41 #endif
  42 #ifdef CONFIG_CDU31A
  43 extern int check_cdu31a_media_change(int, int);
  44 #endif
  45 #ifdef CONFIG_MCD
  46 extern int check_mcd_media_change(int, int);
  47 #endif
  48 
  49 static int grow_buffers(int pri, int size);
  50 
  51 static struct buffer_head * hash_table[NR_HASH];
  52 static struct buffer_head * free_list = NULL;
  53 static struct buffer_head * unused_list = NULL;
  54 static struct wait_queue * buffer_wait = NULL;
  55 
  56 int nr_buffers = 0;
  57 int buffermem = 0;
  58 int nr_buffer_heads = 0;
  59 static int min_free_pages = 20; /* nr free pages needed before buffer grows */
  60 extern int *blksize_size[];
  61 
  62 /*
  63  * Rewrote the wait-routines to use the "new" wait-queue functionality,
  64  * and getting rid of the cli-sti pairs. The wait-queue routines still
  65  * need cli-sti, but now it's just a couple of 386 instructions or so.
  66  *
  67  * Note that the real wait_on_buffer() is an inline function that checks
  68  * if 'b_wait' is set before calling this, so that the queues aren't set
  69  * up unnecessarily.
  70  */
  71 void __wait_on_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
  72 {
  73         struct wait_queue wait = { current, NULL };
  74 
  75         bh->b_count++;
  76         add_wait_queue(&bh->b_wait, &wait);
  77 repeat:
  78         current->state = TASK_UNINTERRUPTIBLE;
  79         if (bh->b_lock) {
  80                 schedule();
  81                 goto repeat;
  82         }
  83         remove_wait_queue(&bh->b_wait, &wait);
  84         bh->b_count--;
  85         current->state = TASK_RUNNING;
  86 }
  87 
  88 /* Call sync_buffers with wait!=0 to ensure that the call does not
  89    return until all buffer writes have completed.  Sync() may return
  90    before the writes have finished; fsync() may not. */
  91 
  92 static int sync_buffers(dev_t dev, int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
  93 {
  94         int i, retry, pass = 0, err = 0;
  95         struct buffer_head * bh;
  96 
  97         /* One pass for no-wait, three for wait:
  98            0) write out all dirty, unlocked buffers;
  99            1) write out all dirty buffers, waiting if locked;
 100            2) wait for completion by waiting for all buffers to unlock.
 101          */
 102 repeat:
 103         retry = 0;
 104         bh = free_list;
 105         for (i = nr_buffers*2 ; i-- > 0 ; bh = bh->b_next_free) {
 106                 if (dev && bh->b_dev != dev)
 107                         continue;
 108 #ifdef 0 /* Disable bad-block debugging code */
 109                 if (bh->b_req && !bh->b_lock &&
 110                     !bh->b_dirt && !bh->b_uptodate)
 111                         printk ("Warning (IO error) - orphaned block %08x on %04x\n",
 112                                 bh->b_blocknr, bh->b_dev);
 113 #endif
 114                 if (bh->b_lock)
 115                 {
 116                         /* Buffer is locked; skip it unless wait is
 117                            requested AND pass > 0. */
 118                         if (!wait || !pass) {
 119                                 retry = 1;
 120                                 continue;
 121                         }
 122                         wait_on_buffer (bh);
 123                 }
 124                 /* If an unlocked buffer is not uptodate, there has been 
 125                    an IO error. Skip it. */
 126                 if (wait && bh->b_req && !bh->b_lock &&
 127                     !bh->b_dirt && !bh->b_uptodate)
 128                 {
 129                         err = 1;
 130                         continue;
 131                 }
 132                 /* Don't write clean buffers.  Don't write ANY buffers
 133                    on the third pass. */
 134                 if (!bh->b_dirt || pass>=2)
 135                         continue;
 136                 bh->b_count++;
 137                 ll_rw_block(WRITE, 1, &bh);
 138                 bh->b_count--;
 139                 retry = 1;
 140         }
 141         /* If we are waiting for the sync to succeed, and if any dirty
 142            blocks were written, then repeat; on the second pass, only
 143            wait for buffers being written (do not pass to write any
 144            more buffers on the second pass). */
 145         if (wait && retry && ++pass<=2)
 146                 goto repeat;
 147         return err;
 148 }
 149 
 150 void sync_dev(dev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 151 {
 152         sync_buffers(dev, 0);
 153         sync_supers(dev);
 154         sync_inodes(dev);
 155         sync_buffers(dev, 0);
 156 }
 157 
 158 int fsync_dev(dev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 159 {
 160         sync_buffers(dev, 0);
 161         sync_supers(dev);
 162         sync_inodes(dev);
 163         return sync_buffers(dev, 1);
 164 }
 165 
 166 asmlinkage int sys_sync(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 167 {
 168         sync_dev(0);
 169         return 0;
 170 }
 171 
 172 int file_fsync (struct inode *inode, struct file *filp)
     /* [previous][next][first][last][top][bottom][index][help] */
 173 {
 174         return fsync_dev(inode->i_dev);
 175 }
 176 
 177 asmlinkage int sys_fsync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 178 {
 179         struct file * file;
 180         struct inode * inode;
 181 
 182         if (fd>=NR_OPEN || !(file=current->filp[fd]) || !(inode=file->f_inode))
 183                 return -EBADF;
 184         if (!file->f_op || !file->f_op->fsync)
 185                 return -EINVAL;
 186         if (file->f_op->fsync(inode,file))
 187                 return -EIO;
 188         return 0;
 189 }
 190 
 191 void invalidate_buffers(dev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 192 {
 193         int i;
 194         struct buffer_head * bh;
 195 
 196         bh = free_list;
 197         for (i = nr_buffers*2 ; --i > 0 ; bh = bh->b_next_free) {
 198                 if (bh->b_dev != dev)
 199                         continue;
 200                 wait_on_buffer(bh);
 201                 if (bh->b_dev == dev)
 202                         bh->b_uptodate = bh->b_dirt = bh->b_req = 0;
 203         }
 204 }
 205 
 206 /*
 207  * This routine checks whether a floppy has been changed, and
 208  * invalidates all buffer-cache-entries in that case. This
 209  * is a relatively slow routine, so we have to try to minimize using
 210  * it. Thus it is called only upon a 'mount' or 'open'. This
 211  * is the best way of combining speed and utility, I think.
 212  * People changing diskettes in the middle of an operation deserve
 213  * to loose :-)
 214  *
 215  * NOTE! Although currently this is only for floppies, the idea is
 216  * that any additional removable block-device will use this routine,
 217  * and that mount/open needn't know that floppies/whatever are
 218  * special.
 219  */
 220 void check_disk_change(dev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 221 {
 222         int i;
 223         struct buffer_head * bh;
 224 
 225         switch(MAJOR(dev)){
 226         case FLOPPY_MAJOR:
 227                 if (!(bh = getblk(dev,0,1024)))
 228                         return;
 229                 i = floppy_change(bh);
 230                 brelse(bh);
 231                 break;
 232 
 233 #if defined(CONFIG_BLK_DEV_SD) && defined(CONFIG_SCSI)
 234          case SCSI_DISK_MAJOR:
 235                 i = check_scsidisk_media_change(dev, 0);
 236                 break;
 237 #endif
 238 
 239 #if defined(CONFIG_BLK_DEV_SR) && defined(CONFIG_SCSI)
 240          case SCSI_CDROM_MAJOR:
 241                 i = check_cdrom_media_change(dev, 0);
 242                 break;
 243 #endif
 244 
 245 #if defined(CONFIG_CDU31A)
 246          case CDU31A_CDROM_MAJOR:
 247                 i = check_cdu31a_media_change(dev, 0);
 248                 break;
 249 #endif
 250 
 251 #if defined(CONFIG_MCD)
 252          case MITSUMI_CDROM_MAJOR:
 253                 i = check_mcd_media_change(dev, 0);
 254                 break;
 255 #endif
 256 
 257          default:
 258                 return;
 259         };
 260 
 261         if (!i) return;
 262 
 263         printk("VFS: Disk change detected on device %d/%d\n",
 264                                         MAJOR(dev), MINOR(dev));
 265         for (i=0 ; i<NR_SUPER ; i++)
 266                 if (super_blocks[i].s_dev == dev)
 267                         put_super(super_blocks[i].s_dev);
 268         invalidate_inodes(dev);
 269         invalidate_buffers(dev);
 270 
 271 #if defined(CONFIG_BLK_DEV_SD) && defined(CONFIG_SCSI)
 272 /* This is trickier for a removable hardisk, because we have to invalidate
 273    all of the partitions that lie on the disk. */
 274         if (MAJOR(dev) == SCSI_DISK_MAJOR)
 275                 revalidate_scsidisk(dev, 0);
 276 #endif
 277 }
 278 
 279 #define _hashfn(dev,block) (((unsigned)(dev^block))%NR_HASH)
 280 #define hash(dev,block) hash_table[_hashfn(dev,block)]
 281 
 282 static inline void remove_from_hash_queue(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 283 {
 284         if (bh->b_next)
 285                 bh->b_next->b_prev = bh->b_prev;
 286         if (bh->b_prev)
 287                 bh->b_prev->b_next = bh->b_next;
 288         if (hash(bh->b_dev,bh->b_blocknr) == bh)
 289                 hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
 290         bh->b_next = bh->b_prev = NULL;
 291 }
 292 
 293 static inline void remove_from_free_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 294 {
 295         if (!(bh->b_prev_free) || !(bh->b_next_free))
 296                 panic("VFS: Free block list corrupted");
 297         bh->b_prev_free->b_next_free = bh->b_next_free;
 298         bh->b_next_free->b_prev_free = bh->b_prev_free;
 299         if (free_list == bh)
 300                 free_list = bh->b_next_free;
 301         bh->b_next_free = bh->b_prev_free = NULL;
 302 }
 303 
 304 static inline void remove_from_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 305 {
 306         remove_from_hash_queue(bh);
 307         remove_from_free_list(bh);
 308 }
 309 
 310 static inline void put_first_free(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 311 {
 312         if (!bh || (bh == free_list))
 313                 return;
 314         remove_from_free_list(bh);
 315 /* add to front of free list */
 316         bh->b_next_free = free_list;
 317         bh->b_prev_free = free_list->b_prev_free;
 318         free_list->b_prev_free->b_next_free = bh;
 319         free_list->b_prev_free = bh;
 320         free_list = bh;
 321 }
 322 
 323 static inline void put_last_free(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 324 {
 325         if (!bh)
 326                 return;
 327         if (bh == free_list) {
 328                 free_list = bh->b_next_free;
 329                 return;
 330         }
 331         remove_from_free_list(bh);
 332 /* add to back of free list */
 333         bh->b_next_free = free_list;
 334         bh->b_prev_free = free_list->b_prev_free;
 335         free_list->b_prev_free->b_next_free = bh;
 336         free_list->b_prev_free = bh;
 337 }
 338 
 339 static inline void insert_into_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 340 {
 341 /* put at end of free list */
 342         bh->b_next_free = free_list;
 343         bh->b_prev_free = free_list->b_prev_free;
 344         free_list->b_prev_free->b_next_free = bh;
 345         free_list->b_prev_free = bh;
 346 /* put the buffer in new hash-queue if it has a device */
 347         bh->b_prev = NULL;
 348         bh->b_next = NULL;
 349         if (!bh->b_dev)
 350                 return;
 351         bh->b_next = hash(bh->b_dev,bh->b_blocknr);
 352         hash(bh->b_dev,bh->b_blocknr) = bh;
 353         if (bh->b_next)
 354                 bh->b_next->b_prev = bh;
 355 }
 356 
 357 static struct buffer_head * find_buffer(dev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 358 {               
 359         struct buffer_head * tmp;
 360 
 361         for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
 362                 if (tmp->b_dev==dev && tmp->b_blocknr==block)
 363                         if (tmp->b_size == size)
 364                                 return tmp;
 365                         else {
 366                                 printk("VFS: Wrong blocksize on device %d/%d\n",
 367                                                         MAJOR(dev), MINOR(dev));
 368                                 return NULL;
 369                         }
 370         return NULL;
 371 }
 372 
 373 /*
 374  * Why like this, I hear you say... The reason is race-conditions.
 375  * As we don't lock buffers (unless we are readint them, that is),
 376  * something might happen to it while we sleep (ie a read-error
 377  * will force it bad). This shouldn't really happen currently, but
 378  * the code is ready.
 379  */
 380 struct buffer_head * get_hash_table(dev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 381 {
 382         struct buffer_head * bh;
 383 
 384         for (;;) {
 385                 if (!(bh=find_buffer(dev,block,size)))
 386                         return NULL;
 387                 bh->b_count++;
 388                 wait_on_buffer(bh);
 389                 if (bh->b_dev == dev && bh->b_blocknr == block && bh->b_size == size)
 390                         return bh;
 391                 bh->b_count--;
 392         }
 393 }
 394 
 395 void set_blocksize(dev_t dev, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 396 {
 397         int i;
 398         struct buffer_head * bh, *bhnext;
 399 
 400         if (!blksize_size[MAJOR(dev)])
 401                 return;
 402 
 403         switch(size) {
 404                 default: panic("Invalid blocksize passed to set_blocksize");
 405                 case 512: case 1024: case 2048: case 4096:;
 406         }
 407 
 408         if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
 409                 blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 410                 return;
 411         }
 412         if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
 413                 return;
 414         sync_buffers(dev, 2);
 415         blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 416 
 417   /* We need to be quite careful how we do this - we are moving entries
 418      around on the free list, and we can get in a loop if we are not careful.*/
 419 
 420         bh = free_list;
 421         for (i = nr_buffers*2 ; --i > 0 ; bh = bhnext) {
 422                 bhnext = bh->b_next_free; 
 423                 if (bh->b_dev != dev)
 424                         continue;
 425                 if (bh->b_size == size)
 426                         continue;
 427 
 428                 wait_on_buffer(bh);
 429                 if (bh->b_dev == dev && bh->b_size != size)
 430                         bh->b_uptodate = bh->b_dirt = 0;
 431                 remove_from_hash_queue(bh);
 432 /*    put_first_free(bh); */
 433         }
 434 }
 435 
 436 /*
 437  * Ok, this is getblk, and it isn't very clear, again to hinder
 438  * race-conditions. Most of the code is seldom used, (ie repeating),
 439  * so it should be much more efficient than it looks.
 440  *
 441  * The algoritm is changed: hopefully better, and an elusive bug removed.
 442  *
 443  * 14.02.92: changed it to sync dirty buffers a bit: better performance
 444  * when the filesystem starts to get full of dirty blocks (I hope).
 445  */
 446 #define BADNESS(bh) (((bh)->b_dirt<<1)+(bh)->b_lock)
 447 struct buffer_head * getblk(dev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 448 {
 449         struct buffer_head * bh, * tmp;
 450         int buffers;
 451         static int grow_size = 0;
 452 
 453 repeat:
 454         bh = get_hash_table(dev, block, size);
 455         if (bh) {
 456                 if (bh->b_uptodate && !bh->b_dirt)
 457                         put_last_free(bh);
 458                 return bh;
 459         }
 460         grow_size -= size;
 461         if (nr_free_pages > min_free_pages && grow_size <= 0) {
 462                 if (grow_buffers(GFP_BUFFER, size))
 463                         grow_size = PAGE_SIZE;
 464         }
 465         buffers = nr_buffers;
 466         bh = NULL;
 467 
 468         for (tmp = free_list; buffers-- > 0 ; tmp = tmp->b_next_free) {
 469                 if (tmp->b_count || tmp->b_size != size)
 470                         continue;
 471                 if (mem_map[MAP_NR((unsigned long) tmp->b_data)] != 1)
 472                         continue;
 473                 if (!bh || BADNESS(tmp)<BADNESS(bh)) {
 474                         bh = tmp;
 475                         if (!BADNESS(tmp))
 476                                 break;
 477                 }
 478 #if 0
 479                 if (tmp->b_dirt) {
 480                         tmp->b_count++;
 481                         ll_rw_block(WRITEA, 1, &tmp);
 482                         tmp->b_count--;
 483                 }
 484 #endif
 485         }
 486 
 487         if (!bh) {
 488                 if (nr_free_pages > 5)
 489                         if (grow_buffers(GFP_BUFFER, size))
 490                                 goto repeat;
 491                 if (!grow_buffers(GFP_ATOMIC, size))
 492                         sleep_on(&buffer_wait);
 493                 goto repeat;
 494         }
 495 
 496         wait_on_buffer(bh);
 497         if (bh->b_count || bh->b_size != size)
 498                 goto repeat;
 499         if (bh->b_dirt) {
 500                 sync_buffers(0,0);
 501                 goto repeat;
 502         }
 503 /* NOTE!! While we slept waiting for this block, somebody else might */
 504 /* already have added "this" block to the cache. check it */
 505         if (find_buffer(dev,block,size))
 506                 goto repeat;
 507 /* OK, FINALLY we know that this buffer is the only one of its kind, */
 508 /* and that it's unused (b_count=0), unlocked (b_lock=0), and clean */
 509         bh->b_count=1;
 510         bh->b_dirt=0;
 511         bh->b_uptodate=0;
 512         bh->b_req=0;
 513         remove_from_queues(bh);
 514         bh->b_dev=dev;
 515         bh->b_blocknr=block;
 516         insert_into_queues(bh);
 517         return bh;
 518 }
 519 
 520 void brelse(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 521 {
 522         if (!buf)
 523                 return;
 524         wait_on_buffer(buf);
 525         if (buf->b_count) {
 526                 if (--buf->b_count)
 527                         return;
 528                 wake_up(&buffer_wait);
 529                 return;
 530         }
 531         printk("VFS: brelse: Trying to free free buffer\n");
 532 }
 533 
 534 /*
 535  * bread() reads a specified block and returns the buffer that contains
 536  * it. It returns NULL if the block was unreadable.
 537  */
 538 struct buffer_head * bread(dev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 539 {
 540         struct buffer_head * bh;
 541 
 542         if (!(bh = getblk(dev, block, size))) {
 543                 printk("VFS: bread: READ error on device %d/%d\n",
 544                                                 MAJOR(dev), MINOR(dev));
 545                 return NULL;
 546         }
 547         if (bh->b_uptodate)
 548                 return bh;
 549         ll_rw_block(READ, 1, &bh);
 550         wait_on_buffer(bh);
 551         if (bh->b_uptodate)
 552                 return bh;
 553         brelse(bh);
 554         return NULL;
 555 }
 556 
 557 /*
 558  * Ok, breada can be used as bread, but additionally to mark other
 559  * blocks for reading as well. End the argument list with a negative
 560  * number.
 561  */
 562 struct buffer_head * breada(dev_t dev,int first, ...)
     /* [previous][next][first][last][top][bottom][index][help] */
 563 {
 564         va_list args;
 565         unsigned int blocksize;
 566         struct buffer_head * bh, *tmp;
 567 
 568         va_start(args,first);
 569 
 570         blocksize = BLOCK_SIZE;
 571         if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)])
 572                 blocksize = blksize_size[MAJOR(dev)][MINOR(dev)];
 573 
 574         if (!(bh = getblk(dev, first, blocksize))) {
 575                 printk("VFS: breada: READ error on device %d/%d\n",
 576                                                 MAJOR(dev), MINOR(dev));
 577                 return NULL;
 578         }
 579         if (!bh->b_uptodate)
 580                 ll_rw_block(READ, 1, &bh);
 581         while ((first=va_arg(args,int))>=0) {
 582                 tmp = getblk(dev, first, blocksize);
 583                 if (tmp) {
 584                         if (!tmp->b_uptodate)
 585                                 ll_rw_block(READA, 1, &tmp);
 586                         tmp->b_count--;
 587                 }
 588         }
 589         va_end(args);
 590         wait_on_buffer(bh);
 591         if (bh->b_uptodate)
 592                 return bh;
 593         brelse(bh);
 594         return (NULL);
 595 }
 596 
 597 /*
 598  * See fs/inode.c for the weird use of volatile..
 599  */
 600 static void put_unused_buffer_head(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 601 {
 602         struct wait_queue * wait;
 603 
 604         wait = ((volatile struct buffer_head *) bh)->b_wait;
 605         memset((void *) bh,0,sizeof(*bh));
 606         ((volatile struct buffer_head *) bh)->b_wait = wait;
 607         bh->b_next_free = unused_list;
 608         unused_list = bh;
 609 }
 610 
 611 static void get_more_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 612 {
 613         int i;
 614         struct buffer_head * bh;
 615 
 616         if (unused_list)
 617                 return;
 618 
 619         if(! (bh = (struct buffer_head*) get_free_page(GFP_BUFFER)))
 620                 return;
 621 
 622         for (nr_buffer_heads+=i=PAGE_SIZE/sizeof*bh ; i>0; i--) {
 623                 bh->b_next_free = unused_list;  /* only make link */
 624                 unused_list = bh++;
 625         }
 626 }
 627 
 628 static struct buffer_head * get_unused_buffer_head(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 629 {
 630         struct buffer_head * bh;
 631 
 632         get_more_buffer_heads();
 633         if (!unused_list)
 634                 return NULL;
 635         bh = unused_list;
 636         unused_list = bh->b_next_free;
 637         bh->b_next_free = NULL;
 638         bh->b_data = NULL;
 639         bh->b_size = 0;
 640         bh->b_req = 0;
 641         return bh;
 642 }
 643 
 644 /*
 645  * Create the appropriate buffers when given a page for data area and
 646  * the size of each buffer.. Use the bh->b_this_page linked list to
 647  * follow the buffers created.  Return NULL if unable to create more
 648  * buffers.
 649  */
 650 static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
 651 {
 652         struct buffer_head *bh, *head;
 653         unsigned long offset;
 654 
 655         head = NULL;
 656         offset = PAGE_SIZE;
 657         while ((offset -= size) < PAGE_SIZE) {
 658                 bh = get_unused_buffer_head();
 659                 if (!bh)
 660                         goto no_grow;
 661                 bh->b_this_page = head;
 662                 head = bh;
 663                 bh->b_data = (char *) (page+offset);
 664                 bh->b_size = size;
 665         }
 666         return head;
 667 /*
 668  * In case anything failed, we just free everything we got.
 669  */
 670 no_grow:
 671         bh = head;
 672         while (bh) {
 673                 head = bh;
 674                 bh = bh->b_this_page;
 675                 put_unused_buffer_head(head);
 676         }
 677         return NULL;
 678 }
 679 
 680 static void read_buffers(struct buffer_head * bh[], int nrbuf)
     /* [previous][next][first][last][top][bottom][index][help] */
 681 {
 682         int i;
 683         int bhnum = 0;
 684         struct buffer_head * bhr[8];
 685 
 686         for (i = 0 ; i < nrbuf ; i++) {
 687                 if (bh[i] && !bh[i]->b_uptodate)
 688                         bhr[bhnum++] = bh[i];
 689         }
 690         if (bhnum)
 691                 ll_rw_block(READ, bhnum, bhr);
 692         for (i = 0 ; i < nrbuf ; i++) {
 693                 if (bh[i]) {
 694                         wait_on_buffer(bh[i]);
 695                 }
 696         }
 697 }
 698 
 699 static unsigned long check_aligned(struct buffer_head * first, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 700         dev_t dev, int *b, int size)
 701 {
 702         struct buffer_head * bh[8];
 703         unsigned long page;
 704         unsigned long offset;
 705         int block;
 706         int nrbuf;
 707 
 708         page = (unsigned long) first->b_data;
 709         if (page & ~PAGE_MASK) {
 710                 brelse(first);
 711                 return 0;
 712         }
 713         mem_map[MAP_NR(page)]++;
 714         bh[0] = first;
 715         nrbuf = 1;
 716         for (offset = size ; offset < PAGE_SIZE ; offset += size) {
 717                 block = *++b;
 718                 if (!block)
 719                         goto no_go;
 720                 first = get_hash_table(dev, block, size);
 721                 if (!first)
 722                         goto no_go;
 723                 bh[nrbuf++] = first;
 724                 if (page+offset != (unsigned long) first->b_data)
 725                         goto no_go;
 726         }
 727         read_buffers(bh,nrbuf);         /* make sure they are actually read correctly */
 728         while (nrbuf-- > 0)
 729                 brelse(bh[nrbuf]);
 730         free_page(address);
 731         ++current->min_flt;
 732         return page;
 733 no_go:
 734         while (nrbuf-- > 0)
 735                 brelse(bh[nrbuf]);
 736         free_page(page);
 737         return 0;
 738 }
 739 
 740 static unsigned long try_to_load_aligned(unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 741         dev_t dev, int b[], int size)
 742 {
 743         struct buffer_head * bh, * tmp, * arr[8];
 744         unsigned long offset;
 745         int * p;
 746         int block;
 747 
 748         bh = create_buffers(address, size);
 749         if (!bh)
 750                 return 0;
 751         /* do any of the buffers already exist? punt if so.. */
 752         p = b;
 753         for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
 754                 block = *(p++);
 755                 if (!block)
 756                         goto not_aligned;
 757                 if (find_buffer(dev, block, size))
 758                         goto not_aligned;
 759         }
 760         tmp = bh;
 761         p = b;
 762         block = 0;
 763         while (1) {
 764                 arr[block++] = bh;
 765                 bh->b_count = 1;
 766                 bh->b_dirt = 0;
 767                 bh->b_uptodate = 0;
 768                 bh->b_dev = dev;
 769                 bh->b_blocknr = *(p++);
 770                 nr_buffers++;
 771                 insert_into_queues(bh);
 772                 if (bh->b_this_page)
 773                         bh = bh->b_this_page;
 774                 else
 775                         break;
 776         }
 777         buffermem += PAGE_SIZE;
 778         bh->b_this_page = tmp;
 779         mem_map[MAP_NR(address)]++;
 780         read_buffers(arr,block);
 781         while (block-- > 0)
 782                 brelse(arr[block]);
 783         ++current->maj_flt;
 784         return address;
 785 not_aligned:
 786         while ((tmp = bh) != NULL) {
 787                 bh = bh->b_this_page;
 788                 put_unused_buffer_head(tmp);
 789         }
 790         return 0;
 791 }
 792 
 793 /*
 794  * Try-to-share-buffers tries to minimize memory use by trying to keep
 795  * both code pages and the buffer area in the same page. This is done by
 796  * (a) checking if the buffers are already aligned correctly in memory and
 797  * (b) if none of the buffer heads are in memory at all, trying to load
 798  * them into memory the way we want them.
 799  *
 800  * This doesn't guarantee that the memory is shared, but should under most
 801  * circumstances work very well indeed (ie >90% sharing of code pages on
 802  * demand-loadable executables).
 803  */
 804 static inline unsigned long try_to_share_buffers(unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 805         dev_t dev, int *b, int size)
 806 {
 807         struct buffer_head * bh;
 808         int block;
 809 
 810         block = b[0];
 811         if (!block)
 812                 return 0;
 813         bh = get_hash_table(dev, block, size);
 814         if (bh)
 815                 return check_aligned(bh, address, dev, b, size);
 816         return try_to_load_aligned(address, dev, b, size);
 817 }
 818 
 819 #define COPYBLK(size,from,to) \
 820 __asm__ __volatile__("rep ; movsl": \
 821         :"c" (((unsigned long) size) >> 2),"S" (from),"D" (to) \
 822         :"cx","di","si")
 823 
 824 /*
 825  * bread_page reads four buffers into memory at the desired address. It's
 826  * a function of its own, as there is some speed to be got by reading them
 827  * all at the same time, not waiting for one to be read, and then another
 828  * etc. This also allows us to optimize memory usage by sharing code pages
 829  * and filesystem buffers..
 830  */
 831 unsigned long bread_page(unsigned long address, dev_t dev, int b[], int size, int prot)
     /* [previous][next][first][last][top][bottom][index][help] */
 832 {
 833         struct buffer_head * bh[8];
 834         unsigned long where;
 835         int i, j;
 836 
 837         if (!(prot & PAGE_RW)) {
 838                 where = try_to_share_buffers(address,dev,b,size);
 839                 if (where)
 840                         return where;
 841         }
 842         ++current->maj_flt;
 843         for (i=0, j=0; j<PAGE_SIZE ; i++, j+= size) {
 844                 bh[i] = NULL;
 845                 if (b[i])
 846                         bh[i] = getblk(dev, b[i], size);
 847         }
 848         read_buffers(bh,i);
 849         where = address;
 850         for (i=0, j=0; j<PAGE_SIZE ; i++, j += size,address += size) {
 851                 if (bh[i]) {
 852                         if (bh[i]->b_uptodate)
 853                                 COPYBLK(size, (unsigned long) bh[i]->b_data,address);
 854                         brelse(bh[i]);
 855                 }
 856         }
 857         return where;
 858 }
 859 
 860 /*
 861  * Try to increase the number of buffers available: the size argument
 862  * is used to determine what kind of buffers we want.
 863  */
 864 static int grow_buffers(int pri, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 865 {
 866         unsigned long page;
 867         struct buffer_head *bh, *tmp;
 868 
 869         if ((size & 511) || (size > PAGE_SIZE)) {
 870                 printk("VFS: grow_buffers: size = %d\n",size);
 871                 return 0;
 872         }
 873         if(!(page = __get_free_page(pri)))
 874                 return 0;
 875         bh = create_buffers(page, size);
 876         if (!bh) {
 877                 free_page(page);
 878                 return 0;
 879         }
 880         tmp = bh;
 881         while (1) {
 882                 if (free_list) {
 883                         tmp->b_next_free = free_list;
 884                         tmp->b_prev_free = free_list->b_prev_free;
 885                         free_list->b_prev_free->b_next_free = tmp;
 886                         free_list->b_prev_free = tmp;
 887                 } else {
 888                         tmp->b_prev_free = tmp;
 889                         tmp->b_next_free = tmp;
 890                 }
 891                 free_list = tmp;
 892                 ++nr_buffers;
 893                 if (tmp->b_this_page)
 894                         tmp = tmp->b_this_page;
 895                 else
 896                         break;
 897         }
 898         tmp->b_this_page = bh;
 899         buffermem += PAGE_SIZE;
 900         return 1;
 901 }
 902 
 903 /*
 904  * try_to_free() checks if all the buffers on this particular page
 905  * are unused, and free's the page if so.
 906  */
 907 static int try_to_free(struct buffer_head * bh, struct buffer_head ** bhp)
     /* [previous][next][first][last][top][bottom][index][help] */
 908 {
 909         unsigned long page;
 910         struct buffer_head * tmp, * p;
 911 
 912         *bhp = bh;
 913         page = (unsigned long) bh->b_data;
 914         page &= PAGE_MASK;
 915         tmp = bh;
 916         do {
 917                 if (!tmp)
 918                         return 0;
 919                 if (tmp->b_count || tmp->b_dirt || tmp->b_lock || tmp->b_wait)
 920                         return 0;
 921                 tmp = tmp->b_this_page;
 922         } while (tmp != bh);
 923         tmp = bh;
 924         do {
 925                 p = tmp;
 926                 tmp = tmp->b_this_page;
 927                 nr_buffers--;
 928                 if (p == *bhp)
 929                         *bhp = p->b_prev_free;
 930                 remove_from_queues(p);
 931                 put_unused_buffer_head(p);
 932         } while (tmp != bh);
 933         buffermem -= PAGE_SIZE;
 934         free_page(page);
 935         return !mem_map[MAP_NR(page)];
 936 }
 937 
 938 /*
 939  * Try to free up some pages by shrinking the buffer-cache
 940  *
 941  * Priority tells the routine how hard to try to shrink the
 942  * buffers: 3 means "don't bother too much", while a value
 943  * of 0 means "we'd better get some free pages now".
 944  */
 945 int shrink_buffers(unsigned int priority)
     /* [previous][next][first][last][top][bottom][index][help] */
 946 {
 947         struct buffer_head *bh;
 948         int i;
 949 
 950         if (priority < 2)
 951                 sync_buffers(0,0);
 952         bh = free_list;
 953         i = nr_buffers >> priority;
 954         for ( ; i-- > 0 ; bh = bh->b_next_free) {
 955                 if (bh->b_count ||
 956                     (priority >= 5 &&
 957                      mem_map[MAP_NR((unsigned long) bh->b_data)] > 1)) {
 958                         put_last_free(bh);
 959                         continue;
 960                 }
 961                 if (!bh->b_this_page)
 962                         continue;
 963                 if (bh->b_lock)
 964                         if (priority)
 965                                 continue;
 966                         else
 967                                 wait_on_buffer(bh);
 968                 if (bh->b_dirt) {
 969                         bh->b_count++;
 970                         ll_rw_block(WRITEA, 1, &bh);
 971                         bh->b_count--;
 972                         continue;
 973                 }
 974                 if (try_to_free(bh, &bh))
 975                         return 1;
 976         }
 977         return 0;
 978 }
 979 
 980 void show_buffers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 981 {
 982         struct buffer_head * bh;
 983         int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
 984 
 985         printk("Buffer memory:   %6dkB\n",buffermem>>10);
 986         printk("Buffer heads:    %6d\n",nr_buffer_heads);
 987         printk("Buffer blocks:   %6d\n",nr_buffers);
 988         bh = free_list;
 989         do {
 990                 found++;
 991                 if (bh->b_lock)
 992                         locked++;
 993                 if (bh->b_dirt)
 994                         dirty++;
 995                 if (bh->b_count)
 996                         used++, lastused = found;
 997                 bh = bh->b_next_free;
 998         } while (bh != free_list);
 999         printk("Buffer mem: %d buffers, %d used (last=%d), %d locked, %d dirty\n",
1000                 found, used, lastused, locked, dirty);
1001 }
1002 
1003 /*
1004  * This initializes the initial buffer free list.  nr_buffers is set
1005  * to one less the actual number of buffers, as a sop to backwards
1006  * compatibility --- the old code did this (I think unintentionally,
1007  * but I'm not sure), and programs in the ps package expect it.
1008  *                                      - TYT 8/30/92
1009  */
1010 void buffer_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1011 {
1012         int i;
1013 
1014         if (high_memory >= 4*1024*1024)
1015                 min_free_pages = 200;
1016         else
1017                 min_free_pages = 20;
1018         for (i = 0 ; i < NR_HASH ; i++)
1019                 hash_table[i] = NULL;
1020         free_list = 0;
1021         grow_buffers(GFP_KERNEL, BLOCK_SIZE);
1022         if (!free_list)
1023                 panic("VFS: Unable to initialize buffer free list!");
1024         return;
1025 }

/* [previous][next][first][last][top][bottom][index][help] */