root/fs/buffer.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. __wait_on_buffer
  2. sync_buffers
  3. sync_dev
  4. fsync_dev
  5. sys_sync
  6. file_fsync
  7. sys_fsync
  8. invalidate_buffers
  9. check_disk_change
  10. remove_from_hash_queue
  11. remove_from_free_list
  12. remove_from_queues
  13. put_first_free
  14. put_last_free
  15. insert_into_queues
  16. find_buffer
  17. get_hash_table
  18. set_blocksize
  19. getblk
  20. brelse
  21. bread
  22. breada
  23. put_unused_buffer_head
  24. get_more_buffer_heads
  25. get_unused_buffer_head
  26. create_buffers
  27. read_buffers
  28. check_aligned
  29. try_to_load_aligned
  30. try_to_share_buffers
  31. bread_page
  32. grow_buffers
  33. try_to_free
  34. shrink_buffers
  35. buffer_init

   1 /*
   2  *  linux/fs/buffer.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  *  'buffer.c' implements the buffer-cache functions. Race-conditions have
   9  * been avoided by NEVER letting a interrupt change a buffer (except for the
  10  * data, of course), but instead letting the caller do it.
  11  */
  12 
  13 /*
  14  * NOTE! There is one discordant note here: checking floppies for
  15  * disk change. This is where it fits best, I think, as it should
  16  * invalidate changed floppy-disk-caches.
  17  */
  18 
  19 #include <stdarg.h>
  20  
  21 #include <linux/config.h>
  22 #include <linux/errno.h>
  23 #include <linux/sched.h>
  24 #include <linux/kernel.h>
  25 #include <linux/string.h>
  26 #include <linux/locks.h>
  27 #include <linux/errno.h>
  28 
  29 #include <asm/system.h>
  30 #include <asm/io.h>
  31 
  32 #ifdef CONFIG_SCSI
  33 #ifdef CONFIG_BLK_DEV_SR
  34 extern int check_cdrom_media_change(int, int);
  35 #endif
  36 #ifdef CONFIG_BLK_DEV_SD
  37 extern int check_scsidisk_media_change(int, int);
  38 extern int revalidate_scsidisk(int, int);
  39 #endif
  40 #endif
  41 
  42 static struct buffer_head * hash_table[NR_HASH];
  43 static struct buffer_head * free_list = NULL;
  44 static struct buffer_head * unused_list = NULL;
  45 static struct wait_queue * buffer_wait = NULL;
  46 
  47 int nr_buffers = 0;
  48 int buffermem = 0;
  49 int nr_buffer_heads = 0;
  50 static int min_free_pages = 20; /* nr free pages needed before buffer grows */
  51 extern int *blksize_size[];
  52 
  53 /*
  54  * Rewrote the wait-routines to use the "new" wait-queue functionality,
  55  * and getting rid of the cli-sti pairs. The wait-queue routines still
  56  * need cli-sti, but now it's just a couple of 386 instructions or so.
  57  *
  58  * Note that the real wait_on_buffer() is an inline function that checks
  59  * if 'b_wait' is set before calling this, so that the queues aren't set
  60  * up unnecessarily.
  61  */
  62 void __wait_on_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
  63 {
  64         struct wait_queue wait = { current, NULL };
  65 
  66         bh->b_count++;
  67         add_wait_queue(&bh->b_wait, &wait);
  68 repeat:
  69         current->state = TASK_UNINTERRUPTIBLE;
  70         if (bh->b_lock) {
  71                 schedule();
  72                 goto repeat;
  73         }
  74         remove_wait_queue(&bh->b_wait, &wait);
  75         bh->b_count--;
  76         current->state = TASK_RUNNING;
  77 }
  78 
  79 /* Call sync_buffers with wait!=0 to ensure that the call does not
  80    return until all buffer writes have completed.  Sync() may return
  81    before the writes have finished; fsync() may not. */
  82 
  83 static int sync_buffers(dev_t dev, int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
  84 {
  85         int i, retry, pass = 0, err = 0;
  86         struct buffer_head * bh;
  87 
  88         /* One pass for no-wait, three for wait:
  89            0) write out all dirty, unlocked buffers;
  90            1) write out all dirty buffers, waiting if locked;
  91            2) wait for completion by waiting for all buffers to unlock.
  92          */
  93 repeat:
  94         retry = 0;
  95         bh = free_list;
  96         for (i = nr_buffers*2 ; i-- > 0 ; bh = bh->b_next_free) {
  97                 if (dev && bh->b_dev != dev)
  98                         continue;
  99 #ifdef 0 /* Disable bad-block debugging code */
 100                 if (bh->b_req && !bh->b_lock &&
 101                     !bh->b_dirt && !bh->b_uptodate)
 102                         printk ("Warning (IO error) - orphaned block %08x on %04x\n",
 103                                 bh->b_blocknr, bh->b_dev);
 104 #endif
 105                 if (bh->b_lock)
 106                 {
 107                         /* Buffer is locked; skip it unless wait is
 108                            requested AND pass > 0. */
 109                         if (!wait || !pass) {
 110                                 retry = 1;
 111                                 continue;
 112                         }
 113                         wait_on_buffer (bh);
 114                 }
 115                 /* If an unlocked buffer is not uptodate, there has been 
 116                    an IO error. Skip it. */
 117                 if (wait && bh->b_req && !bh->b_lock &&
 118                     !bh->b_dirt && !bh->b_uptodate)
 119                 {
 120                         err = 1;
 121                         continue;
 122                 }
 123                 /* Don't write clean buffers.  Don't write ANY buffers
 124                    on the third pass. */
 125                 if (!bh->b_dirt || pass>=2)
 126                         continue;
 127                 bh->b_count++;
 128                 ll_rw_block(WRITE, 1, &bh);
 129                 bh->b_count--;
 130                 retry = 1;
 131         }
 132         /* If we are waiting for the sync to succeed, and if any dirty
 133            blocks were written, then repeat; on the second pass, only
 134            wait for buffers being written (do not pass to write any
 135            more buffers on the second pass). */
 136         if (wait && retry && ++pass<=2)
 137                 goto repeat;
 138         return err;
 139 }
 140 
 141 void sync_dev(dev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 142 {
 143         sync_buffers(dev, 0);
 144         sync_supers(dev);
 145         sync_inodes(dev);
 146         sync_buffers(dev, 0);
 147 }
 148 
 149 int fsync_dev(dev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 150 {
 151         sync_buffers(dev, 0);
 152         sync_supers(dev);
 153         sync_inodes(dev);
 154         return sync_buffers(dev, 1);
 155 }
 156 
 157 extern "C" int sys_sync(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 158 {
 159         sync_dev(0);
 160         return 0;
 161 }
 162 
 163 int file_fsync (struct inode *inode, struct file *filp)
     /* [previous][next][first][last][top][bottom][index][help] */
 164 {
 165         return fsync_dev(inode->i_dev);
 166 }
 167 
 168 extern "C" int sys_fsync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 169 {
 170         struct file * file;
 171         struct inode * inode;
 172 
 173         if (fd>=NR_OPEN || !(file=current->filp[fd]) || !(inode=file->f_inode))
 174                 return -EBADF;
 175         if (!file->f_op || !file->f_op->fsync)
 176                 return -EINVAL;
 177         if (file->f_op->fsync(inode,file))
 178                 return -EIO;
 179         return 0;
 180 }
 181 
 182 void invalidate_buffers(dev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 183 {
 184         int i;
 185         struct buffer_head * bh;
 186 
 187         bh = free_list;
 188         for (i = nr_buffers*2 ; --i > 0 ; bh = bh->b_next_free) {
 189                 if (bh->b_dev != dev)
 190                         continue;
 191                 wait_on_buffer(bh);
 192                 if (bh->b_dev == dev)
 193                         bh->b_uptodate = bh->b_dirt = bh->b_req = 0;
 194         }
 195 }
 196 
 197 /*
 198  * This routine checks whether a floppy has been changed, and
 199  * invalidates all buffer-cache-entries in that case. This
 200  * is a relatively slow routine, so we have to try to minimize using
 201  * it. Thus it is called only upon a 'mount' or 'open'. This
 202  * is the best way of combining speed and utility, I think.
 203  * People changing diskettes in the middle of an operation deserve
 204  * to loose :-)
 205  *
 206  * NOTE! Although currently this is only for floppies, the idea is
 207  * that any additional removable block-device will use this routine,
 208  * and that mount/open needn't know that floppies/whatever are
 209  * special.
 210  */
 211 void check_disk_change(dev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 212 {
 213         int i;
 214         struct buffer_head * bh;
 215 
 216         switch(MAJOR(dev)){
 217         case 2: /* floppy disc */
 218                 if (!(bh = getblk(dev,0,1024)))
 219                         return;
 220                 i = floppy_change(bh);
 221                 brelse(bh);
 222                 break;
 223 
 224 #if defined(CONFIG_BLK_DEV_SD) && defined(CONFIG_SCSI)
 225          case 8: /* Removable scsi disk */
 226                 i = check_scsidisk_media_change(dev, 0);
 227                 break;
 228 #endif
 229 
 230 #if defined(CONFIG_BLK_DEV_SR) && defined(CONFIG_SCSI)
 231          case 11: /* CDROM */
 232                 i = check_cdrom_media_change(dev, 0);
 233                 break;
 234 #endif
 235 
 236          default:
 237                 return;
 238         };
 239 
 240         if (!i) return;
 241 
 242         printk("VFS: Disk change detected on device %d/%d\n",
 243                                         MAJOR(dev), MINOR(dev));
 244         for (i=0 ; i<NR_SUPER ; i++)
 245                 if (super_blocks[i].s_dev == dev)
 246                         put_super(super_blocks[i].s_dev);
 247         invalidate_inodes(dev);
 248         invalidate_buffers(dev);
 249 
 250 #if defined(CONFIG_BLK_DEV_SD) && defined(CONFIG_SCSI)
 251 /* This is trickier for a removable hardisk, because we have to invalidate
 252    all of the partitions that lie on the disk. */
 253         if (MAJOR(dev) == 8)
 254                 revalidate_scsidisk(dev, 0);
 255 #endif
 256 }
 257 
 258 #define _hashfn(dev,block) (((unsigned)(dev^block))%NR_HASH)
 259 #define hash(dev,block) hash_table[_hashfn(dev,block)]
 260 
 261 static inline void remove_from_hash_queue(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 262 {
 263         if (bh->b_next)
 264                 bh->b_next->b_prev = bh->b_prev;
 265         if (bh->b_prev)
 266                 bh->b_prev->b_next = bh->b_next;
 267         if (hash(bh->b_dev,bh->b_blocknr) == bh)
 268                 hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
 269         bh->b_next = bh->b_prev = NULL;
 270 }
 271 
 272 static inline void remove_from_free_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 273 {
 274         if (!(bh->b_prev_free) || !(bh->b_next_free))
 275                 panic("VFS: Free block list corrupted");
 276         bh->b_prev_free->b_next_free = bh->b_next_free;
 277         bh->b_next_free->b_prev_free = bh->b_prev_free;
 278         if (free_list == bh)
 279                 free_list = bh->b_next_free;
 280         bh->b_next_free = bh->b_prev_free = NULL;
 281 }
 282 
 283 static inline void remove_from_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 284 {
 285         remove_from_hash_queue(bh);
 286         remove_from_free_list(bh);
 287 }
 288 
 289 static inline void put_first_free(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 290 {
 291         if (!bh || (bh == free_list))
 292                 return;
 293         remove_from_free_list(bh);
 294 /* add to front of free list */
 295         bh->b_next_free = free_list;
 296         bh->b_prev_free = free_list->b_prev_free;
 297         free_list->b_prev_free->b_next_free = bh;
 298         free_list->b_prev_free = bh;
 299         free_list = bh;
 300 }
 301 
 302 static inline void put_last_free(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 303 {
 304         if (!bh)
 305                 return;
 306         if (bh == free_list) {
 307                 free_list = bh->b_next_free;
 308                 return;
 309         }
 310         remove_from_free_list(bh);
 311 /* add to back of free list */
 312         bh->b_next_free = free_list;
 313         bh->b_prev_free = free_list->b_prev_free;
 314         free_list->b_prev_free->b_next_free = bh;
 315         free_list->b_prev_free = bh;
 316 }
 317 
 318 static inline void insert_into_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 319 {
 320 /* put at end of free list */
 321         bh->b_next_free = free_list;
 322         bh->b_prev_free = free_list->b_prev_free;
 323         free_list->b_prev_free->b_next_free = bh;
 324         free_list->b_prev_free = bh;
 325 /* put the buffer in new hash-queue if it has a device */
 326         bh->b_prev = NULL;
 327         bh->b_next = NULL;
 328         if (!bh->b_dev)
 329                 return;
 330         bh->b_next = hash(bh->b_dev,bh->b_blocknr);
 331         hash(bh->b_dev,bh->b_blocknr) = bh;
 332         if (bh->b_next)
 333                 bh->b_next->b_prev = bh;
 334 }
 335 
 336 static struct buffer_head * find_buffer(dev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 337 {               
 338         struct buffer_head * tmp;
 339 
 340         for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
 341                 if (tmp->b_dev==dev && tmp->b_blocknr==block)
 342                         if (tmp->b_size == size)
 343                                 return tmp;
 344                         else {
 345                                 printk("VFS: Wrong blocksize on device %d/%d\n",
 346                                                         MAJOR(dev), MINOR(dev));
 347                                 return NULL;
 348                         }
 349         return NULL;
 350 }
 351 
 352 /*
 353  * Why like this, I hear you say... The reason is race-conditions.
 354  * As we don't lock buffers (unless we are readint them, that is),
 355  * something might happen to it while we sleep (ie a read-error
 356  * will force it bad). This shouldn't really happen currently, but
 357  * the code is ready.
 358  */
 359 struct buffer_head * get_hash_table(dev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 360 {
 361         struct buffer_head * bh;
 362 
 363         for (;;) {
 364                 if (!(bh=find_buffer(dev,block,size)))
 365                         return NULL;
 366                 bh->b_count++;
 367                 wait_on_buffer(bh);
 368                 if (bh->b_dev == dev && bh->b_blocknr == block && bh->b_size == size)
 369                         return bh;
 370                 bh->b_count--;
 371         }
 372 }
 373 
 374 void set_blocksize(dev_t dev, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 375 {
 376         int i;
 377         struct buffer_head * bh, *bhnext;
 378 
 379         if (!blksize_size[MAJOR(dev)])
 380                 return;
 381 
 382         if (size != 512 && size != 1024 && size != 2048 &&  size != 4096) 
 383                 panic("Invalid blocksize passed to set_blocksize");
 384 
 385         if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
 386                 blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 387                 return;
 388         };
 389         if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
 390                 return;
 391         sync_buffers(dev, 2);
 392         blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 393 
 394   /* We need to be quite careful how we do this - we are moving entries
 395      around on the free list, and we can get in a loop if we are not careful.*/
 396 
 397         bh = free_list;
 398         for (i = nr_buffers*2 ; --i > 0 ; bh = bhnext) {
 399                 bhnext = bh->b_next_free; 
 400                 if (bh->b_dev != dev)
 401                         continue;
 402                 if (bh->b_size == size)
 403                         continue;
 404 
 405                 wait_on_buffer(bh);
 406                 if (bh->b_dev == dev && bh->b_size != size)
 407                         bh->b_uptodate = bh->b_dirt = 0;
 408                 remove_from_hash_queue(bh);
 409 /*    put_first_free(bh); */
 410         }
 411 }
 412 
 413 /*
 414  * Ok, this is getblk, and it isn't very clear, again to hinder
 415  * race-conditions. Most of the code is seldom used, (ie repeating),
 416  * so it should be much more efficient than it looks.
 417  *
 418  * The algoritm is changed: hopefully better, and an elusive bug removed.
 419  *
 420  * 14.02.92: changed it to sync dirty buffers a bit: better performance
 421  * when the filesystem starts to get full of dirty blocks (I hope).
 422  */
 423 #define BADNESS(bh) (((bh)->b_dirt<<1)+(bh)->b_lock)
 424 struct buffer_head * getblk(dev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 425 {
 426         struct buffer_head * bh, * tmp;
 427         int buffers;
 428         static int grow_size = 0;
 429 
 430 repeat:
 431         bh = get_hash_table(dev, block, size);
 432         if (bh) {
 433                 if (bh->b_uptodate && !bh->b_dirt)
 434                         put_last_free(bh);
 435                 return bh;
 436         }
 437         grow_size -= size;
 438         if (nr_free_pages > min_free_pages && grow_size <= 0) {
 439                 grow_buffers(size);
 440                 grow_size = 4096;
 441         }
 442         buffers = nr_buffers;
 443         bh = NULL;
 444 
 445         for (tmp = free_list; buffers-- > 0 ; tmp = tmp->b_next_free) {
 446                 if (tmp->b_count || tmp->b_size != size)
 447                         continue;
 448                 if (mem_map[MAP_NR((unsigned long) tmp->b_data)] != 1)
 449                         continue;
 450                 if (!bh || BADNESS(tmp)<BADNESS(bh)) {
 451                         bh = tmp;
 452                         if (!BADNESS(tmp))
 453                                 break;
 454                 }
 455 #if 0
 456                 if (tmp->b_dirt) {
 457                         tmp->b_count++;
 458                         ll_rw_block(WRITEA, 1, &tmp);
 459                         tmp->b_count--;
 460                 }
 461 #endif
 462         }
 463 
 464         if (!bh && nr_free_pages > 5) {
 465                 grow_buffers(size);
 466                 goto repeat;
 467         }
 468         
 469 /* and repeat until we find something good */
 470         if (!bh) {
 471                 sleep_on(&buffer_wait);
 472                 goto repeat;
 473         }
 474         wait_on_buffer(bh);
 475         if (bh->b_count || bh->b_size != size)
 476                 goto repeat;
 477         if (bh->b_dirt) {
 478                 sync_buffers(0,0);
 479                 goto repeat;
 480         }
 481 /* NOTE!! While we slept waiting for this block, somebody else might */
 482 /* already have added "this" block to the cache. check it */
 483         if (find_buffer(dev,block,size))
 484                 goto repeat;
 485 /* OK, FINALLY we know that this buffer is the only one of it's kind, */
 486 /* and that it's unused (b_count=0), unlocked (b_lock=0), and clean */
 487         bh->b_count=1;
 488         bh->b_dirt=0;
 489         bh->b_uptodate=0;
 490         bh->b_req=0;
 491         remove_from_queues(bh);
 492         bh->b_dev=dev;
 493         bh->b_blocknr=block;
 494         insert_into_queues(bh);
 495         return bh;
 496 }
 497 
 498 void brelse(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 499 {
 500         if (!buf)
 501                 return;
 502         wait_on_buffer(buf);
 503         if (buf->b_count) {
 504                 if (--buf->b_count)
 505                         return;
 506                 wake_up(&buffer_wait);
 507                 return;
 508         }
 509         printk("VFS: brelse: Trying to free free buffer\n");
 510 }
 511 
 512 /*
 513  * bread() reads a specified block and returns the buffer that contains
 514  * it. It returns NULL if the block was unreadable.
 515  */
 516 struct buffer_head * bread(dev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 517 {
 518         struct buffer_head * bh;
 519 
 520         if (!(bh = getblk(dev, block, size))) {
 521                 printk("VFS: bread: READ error on device %d/%d\n",
 522                                                 MAJOR(dev), MINOR(dev));
 523                 return NULL;
 524         }
 525         if (bh->b_uptodate)
 526                 return bh;
 527         ll_rw_block(READ, 1, &bh);
 528         wait_on_buffer(bh);
 529         if (bh->b_uptodate)
 530                 return bh;
 531         brelse(bh);
 532         return NULL;
 533 }
 534 
 535 /*
 536  * Ok, breada can be used as bread, but additionally to mark other
 537  * blocks for reading as well. End the argument list with a negative
 538  * number.
 539  */
 540 struct buffer_head * breada(dev_t dev,int first, ...)
     /* [previous][next][first][last][top][bottom][index][help] */
 541 {
 542         va_list args;
 543         unsigned int blocksize;
 544         struct buffer_head * bh, *tmp;
 545 
 546         va_start(args,first);
 547 
 548         blocksize = BLOCK_SIZE;
 549         if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)])
 550                 blocksize = blksize_size[MAJOR(dev)][MINOR(dev)];
 551 
 552         if (!(bh = getblk(dev, first, blocksize))) {
 553                 printk("VFS: breada: READ error on device %d/%d\n",
 554                                                 MAJOR(dev), MINOR(dev));
 555                 return NULL;
 556         }
 557         if (!bh->b_uptodate)
 558                 ll_rw_block(READ, 1, &bh);
 559         while ((first=va_arg(args,int))>=0) {
 560                 tmp = getblk(dev, first, blocksize);
 561                 if (tmp) {
 562                         if (!tmp->b_uptodate)
 563                                 ll_rw_block(READA, 1, &tmp);
 564                         tmp->b_count--;
 565                 }
 566         }
 567         va_end(args);
 568         wait_on_buffer(bh);
 569         if (bh->b_uptodate)
 570                 return bh;
 571         brelse(bh);
 572         return (NULL);
 573 }
 574 
 575 /*
 576  * See fs/inode.c for the weird use of volatile..
 577  */
 578 static void put_unused_buffer_head(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 579 {
 580         struct wait_queue * wait;
 581 
 582         wait = ((volatile struct buffer_head *) bh)->b_wait;
 583         memset((void *) bh,0,sizeof(*bh));
 584         ((volatile struct buffer_head *) bh)->b_wait = wait;
 585         bh->b_next_free = unused_list;
 586         unused_list = bh;
 587 }
 588 
 589 static void get_more_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 590 {
 591         unsigned long page;
 592         struct buffer_head * bh;
 593 
 594         if (unused_list)
 595                 return;
 596         page = get_free_page(GFP_BUFFER);
 597         if (!page)
 598                 return;
 599         bh = (struct buffer_head *) page;
 600         while ((unsigned long) (bh+1) <= page+4096) {
 601                 put_unused_buffer_head(bh);
 602                 bh++;
 603                 nr_buffer_heads++;
 604         }
 605 }
 606 
 607 static struct buffer_head * get_unused_buffer_head(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 608 {
 609         struct buffer_head * bh;
 610 
 611         get_more_buffer_heads();
 612         if (!unused_list)
 613                 return NULL;
 614         bh = unused_list;
 615         unused_list = bh->b_next_free;
 616         bh->b_next_free = NULL;
 617         bh->b_data = NULL;
 618         bh->b_size = 0;
 619         bh->b_req = 0;
 620         return bh;
 621 }
 622 
 623 /*
 624  * Create the appropriate buffers when given a page for data area and
 625  * the size of each buffer.. Use the bh->b_this_page linked list to
 626  * follow the buffers created.  Return NULL if unable to create more
 627  * buffers.
 628  */
 629 static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
 630 {
 631         struct buffer_head *bh, *head;
 632         unsigned long offset;
 633 
 634         head = NULL;
 635         offset = 4096;
 636         while ((offset -= size) < 4096) {
 637                 bh = get_unused_buffer_head();
 638                 if (!bh)
 639                         goto no_grow;
 640                 bh->b_this_page = head;
 641                 head = bh;
 642                 bh->b_data = (char *) (page+offset);
 643                 bh->b_size = size;
 644         }
 645         return head;
 646 /*
 647  * In case anything failed, we just free everything we got.
 648  */
 649 no_grow:
 650         bh = head;
 651         while (bh) {
 652                 head = bh;
 653                 bh = bh->b_this_page;
 654                 put_unused_buffer_head(head);
 655         }
 656         return NULL;
 657 }
 658 
 659 static void read_buffers(struct buffer_head * bh[], int nrbuf)
     /* [previous][next][first][last][top][bottom][index][help] */
 660 {
 661         int i;
 662         int bhnum = 0;
 663         struct buffer_head * bhr[8];
 664 
 665         for (i = 0 ; i < nrbuf ; i++) {
 666                 if (bh[i] && !bh[i]->b_uptodate)
 667                         bhr[bhnum++] = bh[i];
 668         }
 669         if (bhnum)
 670                 ll_rw_block(READ, bhnum, bhr);
 671         for (i = 0 ; i < nrbuf ; i++) {
 672                 if (bh[i]) {
 673                         wait_on_buffer(bh[i]);
 674                 }
 675         }
 676 }
 677 
 678 static unsigned long check_aligned(struct buffer_head * first, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 679         dev_t dev, int *b, int size)
 680 {
 681         struct buffer_head * bh[8];
 682         unsigned long page;
 683         unsigned long offset;
 684         int block;
 685         int nrbuf;
 686 
 687         page = (unsigned long) first->b_data;
 688         if (page & 0xfff) {
 689                 brelse(first);
 690                 return 0;
 691         }
 692         mem_map[MAP_NR(page)]++;
 693         bh[0] = first;
 694         nrbuf = 1;
 695         for (offset = size ; offset < 4096 ; offset += size) {
 696                 block = *++b;
 697                 if (!block)
 698                         goto no_go;
 699                 first = get_hash_table(dev, block, size);
 700                 if (!first)
 701                         goto no_go;
 702                 bh[nrbuf++] = first;
 703                 if (page+offset != (unsigned long) first->b_data)
 704                         goto no_go;
 705         }
 706         read_buffers(bh,nrbuf);         /* make sure they are actually read correctly */
 707         while (nrbuf-- > 0)
 708                 brelse(bh[nrbuf]);
 709         free_page(address);
 710         ++current->min_flt;
 711         return page;
 712 no_go:
 713         while (nrbuf-- > 0)
 714                 brelse(bh[nrbuf]);
 715         free_page(page);
 716         return 0;
 717 }
 718 
 719 static unsigned long try_to_load_aligned(unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 720         dev_t dev, int b[], int size)
 721 {
 722         struct buffer_head * bh, * tmp, * arr[8];
 723         unsigned long offset;
 724         int * p;
 725         int block;
 726 
 727         bh = create_buffers(address, size);
 728         if (!bh)
 729                 return 0;
 730         p = b;
 731         for (offset = 0 ; offset < 4096 ; offset += size) {
 732                 block = *(p++);
 733                 if (!block)
 734                         goto not_aligned;
 735                 tmp = get_hash_table(dev, block, size);
 736                 if (tmp) {
 737                         brelse(tmp);
 738                         goto not_aligned;
 739                 }
 740         }
 741         tmp = bh;
 742         p = b;
 743         block = 0;
 744         while (1) {
 745                 arr[block++] = bh;
 746                 bh->b_count = 1;
 747                 bh->b_dirt = 0;
 748                 bh->b_uptodate = 0;
 749                 bh->b_dev = dev;
 750                 bh->b_blocknr = *(p++);
 751                 nr_buffers++;
 752                 insert_into_queues(bh);
 753                 if (bh->b_this_page)
 754                         bh = bh->b_this_page;
 755                 else
 756                         break;
 757         }
 758         buffermem += 4096;
 759         bh->b_this_page = tmp;
 760         mem_map[MAP_NR(address)]++;
 761         read_buffers(arr,block);
 762         while (block-- > 0)
 763                 brelse(arr[block]);
 764         ++current->maj_flt;
 765         return address;
 766 not_aligned:
 767         while ((tmp = bh) != NULL) {
 768                 bh = bh->b_this_page;
 769                 put_unused_buffer_head(tmp);
 770         }
 771         return 0;
 772 }
 773 
 774 /*
 775  * Try-to-share-buffers tries to minimize memory use by trying to keep
 776  * both code pages and the buffer area in the same page. This is done by
 777  * (a) checking if the buffers are already aligned correctly in memory and
 778  * (b) if none of the buffer heads are in memory at all, trying to load
 779  * them into memory the way we want them.
 780  *
 781  * This doesn't guarantee that the memory is shared, but should under most
 782  * circumstances work very well indeed (ie >90% sharing of code pages on
 783  * demand-loadable executables).
 784  */
 785 static inline unsigned long try_to_share_buffers(unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 786         dev_t dev, int *b, int size)
 787 {
 788         struct buffer_head * bh;
 789         int block;
 790 
 791         block = b[0];
 792         if (!block)
 793                 return 0;
 794         bh = get_hash_table(dev, block, size);
 795         if (bh)
 796                 return check_aligned(bh, address, dev, b, size);
 797         return try_to_load_aligned(address, dev, b, size);
 798 }
 799 
 800 #define COPYBLK(size,from,to) \
 801 __asm__ __volatile__("rep ; movsl": \
 802         :"c" (((unsigned long) size) >> 2),"S" (from),"D" (to) \
 803         :"cx","di","si")
 804 
 805 /*
 806  * bread_page reads four buffers into memory at the desired address. It's
 807  * a function of its own, as there is some speed to be got by reading them
 808  * all at the same time, not waiting for one to be read, and then another
 809  * etc. This also allows us to optimize memory usage by sharing code pages
 810  * and filesystem buffers..
 811  */
 812 unsigned long bread_page(unsigned long address, dev_t dev, int b[], int size, int prot)
     /* [previous][next][first][last][top][bottom][index][help] */
 813 {
 814         struct buffer_head * bh[8];
 815         unsigned long where;
 816         int i, j;
 817 
 818         if (!(prot & PAGE_RW)) {
 819                 where = try_to_share_buffers(address,dev,b,size);
 820                 if (where)
 821                         return where;
 822         }
 823         ++current->maj_flt;
 824         for (i=0, j=0; j<PAGE_SIZE ; i++, j+= size) {
 825                 bh[i] = NULL;
 826                 if (b[i])
 827                         bh[i] = getblk(dev, b[i], size);
 828         }
 829         read_buffers(bh,4);
 830         where = address;
 831         for (i=0, j=0; j<PAGE_SIZE ; i++, j += size,address += size) {
 832                 if (bh[i]) {
 833                         if (bh[i]->b_uptodate)
 834                                 COPYBLK(size, (unsigned long) bh[i]->b_data,address);
 835                         brelse(bh[i]);
 836                 }
 837         }
 838         return where;
 839 }
 840 
 841 /*
 842  * Try to increase the number of buffers available: the size argument
 843  * is used to determine what kind of buffers we want.
 844  */
 845 void grow_buffers(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 846 {
 847         unsigned long page;
 848         struct buffer_head *bh, *tmp;
 849 
 850         if ((size & 511) || (size > 4096)) {
 851                 printk("VFS: grow_buffers: size = %d\n",size);
 852                 return;
 853         }
 854         page = get_free_page(GFP_BUFFER);
 855         if (!page)
 856                 return;
 857         bh = create_buffers(page, size);
 858         if (!bh) {
 859                 free_page(page);
 860                 return;
 861         }
 862         tmp = bh;
 863         while (1) {
 864                 if (free_list) {
 865                         tmp->b_next_free = free_list;
 866                         tmp->b_prev_free = free_list->b_prev_free;
 867                         free_list->b_prev_free->b_next_free = tmp;
 868                         free_list->b_prev_free = tmp;
 869                 } else {
 870                         tmp->b_prev_free = tmp;
 871                         tmp->b_next_free = tmp;
 872                 }
 873                 free_list = tmp;
 874                 ++nr_buffers;
 875                 if (tmp->b_this_page)
 876                         tmp = tmp->b_this_page;
 877                 else
 878                         break;
 879         }
 880         tmp->b_this_page = bh;
 881         buffermem += 4096;
 882         return;
 883 }
 884 
 885 /*
 886  * try_to_free() checks if all the buffers on this particular page
 887  * are unused, and free's the page if so.
 888  */
 889 static int try_to_free(struct buffer_head * bh, struct buffer_head ** bhp)
     /* [previous][next][first][last][top][bottom][index][help] */
 890 {
 891         unsigned long page;
 892         struct buffer_head * tmp, * p;
 893 
 894         *bhp = bh;
 895         page = (unsigned long) bh->b_data;
 896         page &= 0xfffff000;
 897         tmp = bh;
 898         do {
 899                 if (!tmp)
 900                         return 0;
 901                 if (tmp->b_count || tmp->b_dirt || tmp->b_lock)
 902                         return 0;
 903                 tmp = tmp->b_this_page;
 904         } while (tmp != bh);
 905         tmp = bh;
 906         do {
 907                 p = tmp;
 908                 tmp = tmp->b_this_page;
 909                 nr_buffers--;
 910                 if (p == *bhp)
 911                         *bhp = p->b_prev_free;
 912                 remove_from_queues(p);
 913                 put_unused_buffer_head(p);
 914         } while (tmp != bh);
 915         buffermem -= 4096;
 916         free_page(page);
 917         return !mem_map[MAP_NR(page)];
 918 }
 919 
 920 /*
 921  * Try to free up some pages by shrinking the buffer-cache
 922  *
 923  * Priority tells the routine how hard to try to shrink the
 924  * buffers: 3 means "don't bother too much", while a value
 925  * of 0 means "we'd better get some free pages now".
 926  */
 927 int shrink_buffers(unsigned int priority)
     /* [previous][next][first][last][top][bottom][index][help] */
 928 {
 929         struct buffer_head *bh;
 930         int i;
 931 
 932         if (priority < 2)
 933                 sync_buffers(0,0);
 934         bh = free_list;
 935         i = nr_buffers >> priority;
 936         for ( ; i-- > 0 ; bh = bh->b_next_free) {
 937                 if (bh->b_count || !bh->b_this_page)
 938                         continue;
 939                 if (bh->b_lock)
 940                         if (priority)
 941                                 continue;
 942                         else
 943                                 wait_on_buffer(bh);
 944                 if (bh->b_dirt) {
 945                         bh->b_count++;
 946                         ll_rw_block(WRITEA, 1, &bh);
 947                         bh->b_count--;
 948                         continue;
 949                 }
 950                 if (try_to_free(bh, &bh))
 951                         return 1;
 952         }
 953         return 0;
 954 }
 955 
 956 /*
 957  * This initializes the initial buffer free list.  nr_buffers is set
 958  * to one less the actual number of buffers, as a sop to backwards
 959  * compatibility --- the old code did this (I think unintentionally,
 960  * but I'm not sure), and programs in the ps package expect it.
 961  *                                      - TYT 8/30/92
 962  */
 963 void buffer_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 964 {
 965         int i;
 966 
 967         if (high_memory >= 4*1024*1024)
 968                 min_free_pages = 200;
 969         else
 970                 min_free_pages = 20;
 971         for (i = 0 ; i < NR_HASH ; i++)
 972                 hash_table[i] = NULL;
 973         free_list = 0;
 974         grow_buffers(BLOCK_SIZE);
 975         if (!free_list)
 976                 panic("VFS: Unable to initialize buffer free list!");
 977         return;
 978 }

/* [previous][next][first][last][top][bottom][index][help] */