root/fs/buffer.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. __wait_on_buffer
  2. sync_buffers
  3. sync_dev
  4. fsync_dev
  5. sys_sync
  6. file_fsync
  7. sys_fsync
  8. invalidate_buffers
  9. check_disk_change
  10. remove_from_hash_queue
  11. remove_from_free_list
  12. remove_from_queues
  13. put_first_free
  14. put_last_free
  15. insert_into_queues
  16. find_buffer
  17. get_hash_table
  18. set_blocksize
  19. getblk
  20. brelse
  21. bread
  22. breada
  23. put_unused_buffer_head
  24. get_more_buffer_heads
  25. get_unused_buffer_head
  26. create_buffers
  27. read_buffers
  28. check_aligned
  29. try_to_load_aligned
  30. try_to_share_buffers
  31. bread_page
  32. grow_buffers
  33. try_to_free
  34. shrink_buffers
  35. buffer_init

   1 /*
   2  *  linux/fs/buffer.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  *  'buffer.c' implements the buffer-cache functions. Race-conditions have
   9  * been avoided by NEVER letting an interrupt change a buffer (except for the
  10  * data, of course), but instead letting the caller do it.
  11  */
  12 
  13 /*
  14  * NOTE! There is one discordant note here: checking floppies for
  15  * disk change. This is where it fits best, I think, as it should
  16  * invalidate changed floppy-disk-caches.
  17  */
  18 
  19 #include <stdarg.h>
  20  
  21 #include <linux/config.h>
  22 #include <linux/errno.h>
  23 #include <linux/sched.h>
  24 #include <linux/kernel.h>
  25 #include <linux/string.h>
  26 #include <linux/locks.h>
  27 #include <linux/errno.h>
  28 
  29 #include <asm/system.h>
  30 #include <asm/io.h>
  31 
  32 #ifdef CONFIG_SCSI
  33 #ifdef CONFIG_BLK_DEV_SR
  34 extern int check_cdrom_media_change(int, int);
  35 #endif
  36 #ifdef CONFIG_BLK_DEV_SD
  37 extern int check_scsidisk_media_change(int, int);
  38 extern int revalidate_scsidisk(int, int);
  39 #endif
  40 #endif
  41 #ifdef CONFIG_CDU31A
  42 extern int check_cdu31a_media_change(int, int);
  43 #endif
  44 #ifdef CONFIG_MCD
  45 extern int check_mcd_media_change(int, int);
  46 #endif
  47 
  48 static struct buffer_head * hash_table[NR_HASH];
  49 static struct buffer_head * free_list = NULL;
  50 static struct buffer_head * unused_list = NULL;
  51 static struct wait_queue * buffer_wait = NULL;
  52 
  53 int nr_buffers = 0;
  54 int buffermem = 0;
  55 int nr_buffer_heads = 0;
  56 static int min_free_pages = 20; /* nr free pages needed before buffer grows */
  57 extern int *blksize_size[];
  58 
  59 /*
  60  * Rewrote the wait-routines to use the "new" wait-queue functionality,
  61  * and getting rid of the cli-sti pairs. The wait-queue routines still
  62  * need cli-sti, but now it's just a couple of 386 instructions or so.
  63  *
  64  * Note that the real wait_on_buffer() is an inline function that checks
  65  * if 'b_wait' is set before calling this, so that the queues aren't set
  66  * up unnecessarily.
  67  */
  68 void __wait_on_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
  69 {
  70         struct wait_queue wait = { current, NULL };
  71 
  72         bh->b_count++;
  73         add_wait_queue(&bh->b_wait, &wait);
  74 repeat:
  75         current->state = TASK_UNINTERRUPTIBLE;
  76         if (bh->b_lock) {
  77                 schedule();
  78                 goto repeat;
  79         }
  80         remove_wait_queue(&bh->b_wait, &wait);
  81         bh->b_count--;
  82         current->state = TASK_RUNNING;
  83 }
  84 
  85 /* Call sync_buffers with wait!=0 to ensure that the call does not
  86    return until all buffer writes have completed.  Sync() may return
  87    before the writes have finished; fsync() may not. */
  88 
  89 static int sync_buffers(dev_t dev, int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
  90 {
  91         int i, retry, pass = 0, err = 0;
  92         struct buffer_head * bh;
  93 
  94         /* One pass for no-wait, three for wait:
  95            0) write out all dirty, unlocked buffers;
  96            1) write out all dirty buffers, waiting if locked;
  97            2) wait for completion by waiting for all buffers to unlock.
  98          */
  99 repeat:
 100         retry = 0;
 101         bh = free_list;
 102         for (i = nr_buffers*2 ; i-- > 0 ; bh = bh->b_next_free) {
 103                 if (dev && bh->b_dev != dev)
 104                         continue;
 105 #ifdef 0 /* Disable bad-block debugging code */
 106                 if (bh->b_req && !bh->b_lock &&
 107                     !bh->b_dirt && !bh->b_uptodate)
 108                         printk ("Warning (IO error) - orphaned block %08x on %04x\n",
 109                                 bh->b_blocknr, bh->b_dev);
 110 #endif
 111                 if (bh->b_lock)
 112                 {
 113                         /* Buffer is locked; skip it unless wait is
 114                            requested AND pass > 0. */
 115                         if (!wait || !pass) {
 116                                 retry = 1;
 117                                 continue;
 118                         }
 119                         wait_on_buffer (bh);
 120                 }
 121                 /* If an unlocked buffer is not uptodate, there has been 
 122                    an IO error. Skip it. */
 123                 if (wait && bh->b_req && !bh->b_lock &&
 124                     !bh->b_dirt && !bh->b_uptodate)
 125                 {
 126                         err = 1;
 127                         continue;
 128                 }
 129                 /* Don't write clean buffers.  Don't write ANY buffers
 130                    on the third pass. */
 131                 if (!bh->b_dirt || pass>=2)
 132                         continue;
 133                 bh->b_count++;
 134                 ll_rw_block(WRITE, 1, &bh);
 135                 bh->b_count--;
 136                 retry = 1;
 137         }
 138         /* If we are waiting for the sync to succeed, and if any dirty
 139            blocks were written, then repeat; on the second pass, only
 140            wait for buffers being written (do not pass to write any
 141            more buffers on the second pass). */
 142         if (wait && retry && ++pass<=2)
 143                 goto repeat;
 144         return err;
 145 }
 146 
 147 void sync_dev(dev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 148 {
 149         sync_buffers(dev, 0);
 150         sync_supers(dev);
 151         sync_inodes(dev);
 152         sync_buffers(dev, 0);
 153 }
 154 
 155 int fsync_dev(dev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 156 {
 157         sync_buffers(dev, 0);
 158         sync_supers(dev);
 159         sync_inodes(dev);
 160         return sync_buffers(dev, 1);
 161 }
 162 
 163 asmlinkage int sys_sync(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 164 {
 165         sync_dev(0);
 166         return 0;
 167 }
 168 
 169 int file_fsync (struct inode *inode, struct file *filp)
     /* [previous][next][first][last][top][bottom][index][help] */
 170 {
 171         return fsync_dev(inode->i_dev);
 172 }
 173 
 174 asmlinkage int sys_fsync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 175 {
 176         struct file * file;
 177         struct inode * inode;
 178 
 179         if (fd>=NR_OPEN || !(file=current->filp[fd]) || !(inode=file->f_inode))
 180                 return -EBADF;
 181         if (!file->f_op || !file->f_op->fsync)
 182                 return -EINVAL;
 183         if (file->f_op->fsync(inode,file))
 184                 return -EIO;
 185         return 0;
 186 }
 187 
 188 void invalidate_buffers(dev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 189 {
 190         int i;
 191         struct buffer_head * bh;
 192 
 193         bh = free_list;
 194         for (i = nr_buffers*2 ; --i > 0 ; bh = bh->b_next_free) {
 195                 if (bh->b_dev != dev)
 196                         continue;
 197                 wait_on_buffer(bh);
 198                 if (bh->b_dev == dev)
 199                         bh->b_uptodate = bh->b_dirt = bh->b_req = 0;
 200         }
 201 }
 202 
 203 /*
 204  * This routine checks whether a floppy has been changed, and
 205  * invalidates all buffer-cache-entries in that case. This
 206  * is a relatively slow routine, so we have to try to minimize using
 207  * it. Thus it is called only upon a 'mount' or 'open'. This
 208  * is the best way of combining speed and utility, I think.
 209  * People changing diskettes in the middle of an operation deserve
 210  * to loose :-)
 211  *
 212  * NOTE! Although currently this is only for floppies, the idea is
 213  * that any additional removable block-device will use this routine,
 214  * and that mount/open needn't know that floppies/whatever are
 215  * special.
 216  */
 217 void check_disk_change(dev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 218 {
 219         int i;
 220         struct buffer_head * bh;
 221 
 222         switch(MAJOR(dev)){
 223         case 2: /* floppy disc */
 224                 if (!(bh = getblk(dev,0,1024)))
 225                         return;
 226                 i = floppy_change(bh);
 227                 brelse(bh);
 228                 break;
 229 
 230 #if defined(CONFIG_BLK_DEV_SD) && defined(CONFIG_SCSI)
 231          case 8: /* Removable scsi disk */
 232                 i = check_scsidisk_media_change(dev, 0);
 233                 break;
 234 #endif
 235 
 236 #if defined(CONFIG_BLK_DEV_SR) && defined(CONFIG_SCSI)
 237          case 11: /* CDROM */
 238                 i = check_cdrom_media_change(dev, 0);
 239                 break;
 240 #endif
 241 
 242 #if defined(CONFIG_CDU31A)
 243          case 15: /* Sony CDROM */
 244                 i = check_cdu31a_media_change(dev, 0);
 245                 break;
 246 #endif
 247 
 248 #if defined(CONFIG_MCD)
 249          case 23: /* Sony CDROM */
 250                 i = check_mcd_media_change(dev, 0);
 251                 break;
 252 #endif
 253 
 254          default:
 255                 return;
 256         };
 257 
 258         if (!i) return;
 259 
 260         printk("VFS: Disk change detected on device %d/%d\n",
 261                                         MAJOR(dev), MINOR(dev));
 262         for (i=0 ; i<NR_SUPER ; i++)
 263                 if (super_blocks[i].s_dev == dev)
 264                         put_super(super_blocks[i].s_dev);
 265         invalidate_inodes(dev);
 266         invalidate_buffers(dev);
 267 
 268 #if defined(CONFIG_BLK_DEV_SD) && defined(CONFIG_SCSI)
 269 /* This is trickier for a removable hardisk, because we have to invalidate
 270    all of the partitions that lie on the disk. */
 271         if (MAJOR(dev) == 8)
 272                 revalidate_scsidisk(dev, 0);
 273 #endif
 274 }
 275 
 276 #define _hashfn(dev,block) (((unsigned)(dev^block))%NR_HASH)
 277 #define hash(dev,block) hash_table[_hashfn(dev,block)]
 278 
 279 static inline void remove_from_hash_queue(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 280 {
 281         if (bh->b_next)
 282                 bh->b_next->b_prev = bh->b_prev;
 283         if (bh->b_prev)
 284                 bh->b_prev->b_next = bh->b_next;
 285         if (hash(bh->b_dev,bh->b_blocknr) == bh)
 286                 hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
 287         bh->b_next = bh->b_prev = NULL;
 288 }
 289 
 290 static inline void remove_from_free_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 291 {
 292         if (!(bh->b_prev_free) || !(bh->b_next_free))
 293                 panic("VFS: Free block list corrupted");
 294         bh->b_prev_free->b_next_free = bh->b_next_free;
 295         bh->b_next_free->b_prev_free = bh->b_prev_free;
 296         if (free_list == bh)
 297                 free_list = bh->b_next_free;
 298         bh->b_next_free = bh->b_prev_free = NULL;
 299 }
 300 
 301 static inline void remove_from_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 302 {
 303         remove_from_hash_queue(bh);
 304         remove_from_free_list(bh);
 305 }
 306 
 307 static inline void put_first_free(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 308 {
 309         if (!bh || (bh == free_list))
 310                 return;
 311         remove_from_free_list(bh);
 312 /* add to front of free list */
 313         bh->b_next_free = free_list;
 314         bh->b_prev_free = free_list->b_prev_free;
 315         free_list->b_prev_free->b_next_free = bh;
 316         free_list->b_prev_free = bh;
 317         free_list = bh;
 318 }
 319 
 320 static inline void put_last_free(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 321 {
 322         if (!bh)
 323                 return;
 324         if (bh == free_list) {
 325                 free_list = bh->b_next_free;
 326                 return;
 327         }
 328         remove_from_free_list(bh);
 329 /* add to back of free list */
 330         bh->b_next_free = free_list;
 331         bh->b_prev_free = free_list->b_prev_free;
 332         free_list->b_prev_free->b_next_free = bh;
 333         free_list->b_prev_free = bh;
 334 }
 335 
 336 static inline void insert_into_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 337 {
 338 /* put at end of free list */
 339         bh->b_next_free = free_list;
 340         bh->b_prev_free = free_list->b_prev_free;
 341         free_list->b_prev_free->b_next_free = bh;
 342         free_list->b_prev_free = bh;
 343 /* put the buffer in new hash-queue if it has a device */
 344         bh->b_prev = NULL;
 345         bh->b_next = NULL;
 346         if (!bh->b_dev)
 347                 return;
 348         bh->b_next = hash(bh->b_dev,bh->b_blocknr);
 349         hash(bh->b_dev,bh->b_blocknr) = bh;
 350         if (bh->b_next)
 351                 bh->b_next->b_prev = bh;
 352 }
 353 
 354 static struct buffer_head * find_buffer(dev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 355 {               
 356         struct buffer_head * tmp;
 357 
 358         for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
 359                 if (tmp->b_dev==dev && tmp->b_blocknr==block)
 360                         if (tmp->b_size == size)
 361                                 return tmp;
 362                         else {
 363                                 printk("VFS: Wrong blocksize on device %d/%d\n",
 364                                                         MAJOR(dev), MINOR(dev));
 365                                 return NULL;
 366                         }
 367         return NULL;
 368 }
 369 
 370 /*
 371  * Why like this, I hear you say... The reason is race-conditions.
 372  * As we don't lock buffers (unless we are readint them, that is),
 373  * something might happen to it while we sleep (ie a read-error
 374  * will force it bad). This shouldn't really happen currently, but
 375  * the code is ready.
 376  */
 377 struct buffer_head * get_hash_table(dev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 378 {
 379         struct buffer_head * bh;
 380 
 381         for (;;) {
 382                 if (!(bh=find_buffer(dev,block,size)))
 383                         return NULL;
 384                 bh->b_count++;
 385                 wait_on_buffer(bh);
 386                 if (bh->b_dev == dev && bh->b_blocknr == block && bh->b_size == size)
 387                         return bh;
 388                 bh->b_count--;
 389         }
 390 }
 391 
 392 void set_blocksize(dev_t dev, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 393 {
 394         int i;
 395         struct buffer_head * bh, *bhnext;
 396 
 397         if (!blksize_size[MAJOR(dev)])
 398                 return;
 399 
 400         switch(size) {
 401                 default: panic("Invalid blocksize passed to set_blocksize");
 402                 case 512: case 1024: case 2048: case 4096:;
 403         }
 404 
 405         if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
 406                 blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 407                 return;
 408         }
 409         if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
 410                 return;
 411         sync_buffers(dev, 2);
 412         blksize_size[MAJOR(dev)][MINOR(dev)] = size;
 413 
 414   /* We need to be quite careful how we do this - we are moving entries
 415      around on the free list, and we can get in a loop if we are not careful.*/
 416 
 417         bh = free_list;
 418         for (i = nr_buffers*2 ; --i > 0 ; bh = bhnext) {
 419                 bhnext = bh->b_next_free; 
 420                 if (bh->b_dev != dev)
 421                         continue;
 422                 if (bh->b_size == size)
 423                         continue;
 424 
 425                 wait_on_buffer(bh);
 426                 if (bh->b_dev == dev && bh->b_size != size)
 427                         bh->b_uptodate = bh->b_dirt = 0;
 428                 remove_from_hash_queue(bh);
 429 /*    put_first_free(bh); */
 430         }
 431 }
 432 
 433 /*
 434  * Ok, this is getblk, and it isn't very clear, again to hinder
 435  * race-conditions. Most of the code is seldom used, (ie repeating),
 436  * so it should be much more efficient than it looks.
 437  *
 438  * The algoritm is changed: hopefully better, and an elusive bug removed.
 439  *
 440  * 14.02.92: changed it to sync dirty buffers a bit: better performance
 441  * when the filesystem starts to get full of dirty blocks (I hope).
 442  */
 443 #define BADNESS(bh) (((bh)->b_dirt<<1)+(bh)->b_lock)
 444 struct buffer_head * getblk(dev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 445 {
 446         struct buffer_head * bh, * tmp;
 447         int buffers;
 448         static int grow_size = 0;
 449 
 450 repeat:
 451         bh = get_hash_table(dev, block, size);
 452         if (bh) {
 453                 if (bh->b_uptodate && !bh->b_dirt)
 454                         put_last_free(bh);
 455                 return bh;
 456         }
 457         grow_size -= size;
 458         if (nr_free_pages > min_free_pages && grow_size <= 0) {
 459                 grow_buffers(size);
 460                 grow_size = PAGE_SIZE;
 461         }
 462         buffers = nr_buffers;
 463         bh = NULL;
 464 
 465         for (tmp = free_list; buffers-- > 0 ; tmp = tmp->b_next_free) {
 466                 if (tmp->b_count || tmp->b_size != size)
 467                         continue;
 468                 if (mem_map[MAP_NR((unsigned long) tmp->b_data)] != 1)
 469                         continue;
 470                 if (!bh || BADNESS(tmp)<BADNESS(bh)) {
 471                         bh = tmp;
 472                         if (!BADNESS(tmp))
 473                                 break;
 474                 }
 475 #if 0
 476                 if (tmp->b_dirt) {
 477                         tmp->b_count++;
 478                         ll_rw_block(WRITEA, 1, &tmp);
 479                         tmp->b_count--;
 480                 }
 481 #endif
 482         }
 483 
 484         if (!bh && nr_free_pages > 5) {
 485                 grow_buffers(size);
 486                 goto repeat;
 487         }
 488         
 489 /* and repeat until we find something good */
 490         if (!bh) {
 491                 sleep_on(&buffer_wait);
 492                 goto repeat;
 493         }
 494         wait_on_buffer(bh);
 495         if (bh->b_count || bh->b_size != size)
 496                 goto repeat;
 497         if (bh->b_dirt) {
 498                 sync_buffers(0,0);
 499                 goto repeat;
 500         }
 501 /* NOTE!! While we slept waiting for this block, somebody else might */
 502 /* already have added "this" block to the cache. check it */
 503         if (find_buffer(dev,block,size))
 504                 goto repeat;
 505 /* OK, FINALLY we know that this buffer is the only one of it's kind, */
 506 /* and that it's unused (b_count=0), unlocked (b_lock=0), and clean */
 507         bh->b_count=1;
 508         bh->b_dirt=0;
 509         bh->b_uptodate=0;
 510         bh->b_req=0;
 511         remove_from_queues(bh);
 512         bh->b_dev=dev;
 513         bh->b_blocknr=block;
 514         insert_into_queues(bh);
 515         return bh;
 516 }
 517 
 518 void brelse(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 519 {
 520         if (!buf)
 521                 return;
 522         wait_on_buffer(buf);
 523         if (buf->b_count) {
 524                 if (--buf->b_count)
 525                         return;
 526                 wake_up(&buffer_wait);
 527                 return;
 528         }
 529         printk("VFS: brelse: Trying to free free buffer\n");
 530 }
 531 
 532 /*
 533  * bread() reads a specified block and returns the buffer that contains
 534  * it. It returns NULL if the block was unreadable.
 535  */
 536 struct buffer_head * bread(dev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 537 {
 538         struct buffer_head * bh;
 539 
 540         if (!(bh = getblk(dev, block, size))) {
 541                 printk("VFS: bread: READ error on device %d/%d\n",
 542                                                 MAJOR(dev), MINOR(dev));
 543                 return NULL;
 544         }
 545         if (bh->b_uptodate)
 546                 return bh;
 547         ll_rw_block(READ, 1, &bh);
 548         wait_on_buffer(bh);
 549         if (bh->b_uptodate)
 550                 return bh;
 551         brelse(bh);
 552         return NULL;
 553 }
 554 
 555 /*
 556  * Ok, breada can be used as bread, but additionally to mark other
 557  * blocks for reading as well. End the argument list with a negative
 558  * number.
 559  */
 560 struct buffer_head * breada(dev_t dev,int first, ...)
     /* [previous][next][first][last][top][bottom][index][help] */
 561 {
 562         va_list args;
 563         unsigned int blocksize;
 564         struct buffer_head * bh, *tmp;
 565 
 566         va_start(args,first);
 567 
 568         blocksize = BLOCK_SIZE;
 569         if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)])
 570                 blocksize = blksize_size[MAJOR(dev)][MINOR(dev)];
 571 
 572         if (!(bh = getblk(dev, first, blocksize))) {
 573                 printk("VFS: breada: READ error on device %d/%d\n",
 574                                                 MAJOR(dev), MINOR(dev));
 575                 return NULL;
 576         }
 577         if (!bh->b_uptodate)
 578                 ll_rw_block(READ, 1, &bh);
 579         while ((first=va_arg(args,int))>=0) {
 580                 tmp = getblk(dev, first, blocksize);
 581                 if (tmp) {
 582                         if (!tmp->b_uptodate)
 583                                 ll_rw_block(READA, 1, &tmp);
 584                         tmp->b_count--;
 585                 }
 586         }
 587         va_end(args);
 588         wait_on_buffer(bh);
 589         if (bh->b_uptodate)
 590                 return bh;
 591         brelse(bh);
 592         return (NULL);
 593 }
 594 
 595 /*
 596  * See fs/inode.c for the weird use of volatile..
 597  */
 598 static void put_unused_buffer_head(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 599 {
 600         struct wait_queue * wait;
 601 
 602         wait = ((volatile struct buffer_head *) bh)->b_wait;
 603         memset((void *) bh,0,sizeof(*bh));
 604         ((volatile struct buffer_head *) bh)->b_wait = wait;
 605         bh->b_next_free = unused_list;
 606         unused_list = bh;
 607 }
 608 
 609 static void get_more_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 610 {
 611         int i;
 612         struct buffer_head * bh;
 613 
 614         if (unused_list)
 615                 return;
 616 
 617         if(! (bh = (struct buffer_head*) get_free_page(GFP_BUFFER)))
 618                 return;
 619 
 620         for (nr_buffer_heads+=i=PAGE_SIZE/sizeof*bh ; i>0; i--) {
 621                 bh->b_next_free = unused_list;  /* only make link */
 622                 unused_list = bh++;
 623         }
 624 }
 625 
 626 static struct buffer_head * get_unused_buffer_head(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 627 {
 628         struct buffer_head * bh;
 629 
 630         get_more_buffer_heads();
 631         if (!unused_list)
 632                 return NULL;
 633         bh = unused_list;
 634         unused_list = bh->b_next_free;
 635         bh->b_next_free = NULL;
 636         bh->b_data = NULL;
 637         bh->b_size = 0;
 638         bh->b_req = 0;
 639         return bh;
 640 }
 641 
 642 /*
 643  * Create the appropriate buffers when given a page for data area and
 644  * the size of each buffer.. Use the bh->b_this_page linked list to
 645  * follow the buffers created.  Return NULL if unable to create more
 646  * buffers.
 647  */
 648 static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
 649 {
 650         struct buffer_head *bh, *head;
 651         unsigned long offset;
 652 
 653         head = NULL;
 654         offset = PAGE_SIZE;
 655         while ((offset -= size) < PAGE_SIZE) {
 656                 bh = get_unused_buffer_head();
 657                 if (!bh)
 658                         goto no_grow;
 659                 bh->b_this_page = head;
 660                 head = bh;
 661                 bh->b_data = (char *) (page+offset);
 662                 bh->b_size = size;
 663         }
 664         return head;
 665 /*
 666  * In case anything failed, we just free everything we got.
 667  */
 668 no_grow:
 669         bh = head;
 670         while (bh) {
 671                 head = bh;
 672                 bh = bh->b_this_page;
 673                 put_unused_buffer_head(head);
 674         }
 675         return NULL;
 676 }
 677 
 678 static void read_buffers(struct buffer_head * bh[], int nrbuf)
     /* [previous][next][first][last][top][bottom][index][help] */
 679 {
 680         int i;
 681         int bhnum = 0;
 682         struct buffer_head * bhr[8];
 683 
 684         for (i = 0 ; i < nrbuf ; i++) {
 685                 if (bh[i] && !bh[i]->b_uptodate)
 686                         bhr[bhnum++] = bh[i];
 687         }
 688         if (bhnum)
 689                 ll_rw_block(READ, bhnum, bhr);
 690         for (i = 0 ; i < nrbuf ; i++) {
 691                 if (bh[i]) {
 692                         wait_on_buffer(bh[i]);
 693                 }
 694         }
 695 }
 696 
 697 static unsigned long check_aligned(struct buffer_head * first, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 698         dev_t dev, int *b, int size)
 699 {
 700         struct buffer_head * bh[8];
 701         unsigned long page;
 702         unsigned long offset;
 703         int block;
 704         int nrbuf;
 705 
 706         page = (unsigned long) first->b_data;
 707         if (page & ~PAGE_MASK) {
 708                 brelse(first);
 709                 return 0;
 710         }
 711         mem_map[MAP_NR(page)]++;
 712         bh[0] = first;
 713         nrbuf = 1;
 714         for (offset = size ; offset < PAGE_SIZE ; offset += size) {
 715                 block = *++b;
 716                 if (!block)
 717                         goto no_go;
 718                 first = get_hash_table(dev, block, size);
 719                 if (!first)
 720                         goto no_go;
 721                 bh[nrbuf++] = first;
 722                 if (page+offset != (unsigned long) first->b_data)
 723                         goto no_go;
 724         }
 725         read_buffers(bh,nrbuf);         /* make sure they are actually read correctly */
 726         while (nrbuf-- > 0)
 727                 brelse(bh[nrbuf]);
 728         free_page(address);
 729         ++current->min_flt;
 730         return page;
 731 no_go:
 732         while (nrbuf-- > 0)
 733                 brelse(bh[nrbuf]);
 734         free_page(page);
 735         return 0;
 736 }
 737 
 738 static unsigned long try_to_load_aligned(unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 739         dev_t dev, int b[], int size)
 740 {
 741         struct buffer_head * bh, * tmp, * arr[8];
 742         unsigned long offset;
 743         int * p;
 744         int block;
 745 
 746         bh = create_buffers(address, size);
 747         if (!bh)
 748                 return 0;
 749         p = b;
 750         for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
 751                 block = *(p++);
 752                 if (!block)
 753                         goto not_aligned;
 754                 tmp = get_hash_table(dev, block, size);
 755                 if (tmp) {
 756                         brelse(tmp);
 757                         goto not_aligned;
 758                 }
 759         }
 760         tmp = bh;
 761         p = b;
 762         block = 0;
 763         while (1) {
 764                 arr[block++] = bh;
 765                 bh->b_count = 1;
 766                 bh->b_dirt = 0;
 767                 bh->b_uptodate = 0;
 768                 bh->b_dev = dev;
 769                 bh->b_blocknr = *(p++);
 770                 nr_buffers++;
 771                 insert_into_queues(bh);
 772                 if (bh->b_this_page)
 773                         bh = bh->b_this_page;
 774                 else
 775                         break;
 776         }
 777         buffermem += PAGE_SIZE;
 778         bh->b_this_page = tmp;
 779         mem_map[MAP_NR(address)]++;
 780         read_buffers(arr,block);
 781         while (block-- > 0)
 782                 brelse(arr[block]);
 783         ++current->maj_flt;
 784         return address;
 785 not_aligned:
 786         while ((tmp = bh) != NULL) {
 787                 bh = bh->b_this_page;
 788                 put_unused_buffer_head(tmp);
 789         }
 790         return 0;
 791 }
 792 
 793 /*
 794  * Try-to-share-buffers tries to minimize memory use by trying to keep
 795  * both code pages and the buffer area in the same page. This is done by
 796  * (a) checking if the buffers are already aligned correctly in memory and
 797  * (b) if none of the buffer heads are in memory at all, trying to load
 798  * them into memory the way we want them.
 799  *
 800  * This doesn't guarantee that the memory is shared, but should under most
 801  * circumstances work very well indeed (ie >90% sharing of code pages on
 802  * demand-loadable executables).
 803  */
 804 static inline unsigned long try_to_share_buffers(unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 805         dev_t dev, int *b, int size)
 806 {
 807         struct buffer_head * bh;
 808         int block;
 809 
 810         block = b[0];
 811         if (!block)
 812                 return 0;
 813         bh = get_hash_table(dev, block, size);
 814         if (bh)
 815                 return check_aligned(bh, address, dev, b, size);
 816         return try_to_load_aligned(address, dev, b, size);
 817 }
 818 
 819 #define COPYBLK(size,from,to) \
 820 __asm__ __volatile__("rep ; movsl": \
 821         :"c" (((unsigned long) size) >> 2),"S" (from),"D" (to) \
 822         :"cx","di","si")
 823 
 824 /*
 825  * bread_page reads four buffers into memory at the desired address. It's
 826  * a function of its own, as there is some speed to be got by reading them
 827  * all at the same time, not waiting for one to be read, and then another
 828  * etc. This also allows us to optimize memory usage by sharing code pages
 829  * and filesystem buffers..
 830  */
 831 unsigned long bread_page(unsigned long address, dev_t dev, int b[], int size, int prot)
     /* [previous][next][first][last][top][bottom][index][help] */
 832 {
 833         struct buffer_head * bh[8];
 834         unsigned long where;
 835         int i, j;
 836 
 837         if (!(prot & PAGE_RW)) {
 838                 where = try_to_share_buffers(address,dev,b,size);
 839                 if (where)
 840                         return where;
 841         }
 842         ++current->maj_flt;
 843         for (i=0, j=0; j<PAGE_SIZE ; i++, j+= size) {
 844                 bh[i] = NULL;
 845                 if (b[i])
 846                         bh[i] = getblk(dev, b[i], size);
 847         }
 848         read_buffers(bh,4);
 849         where = address;
 850         for (i=0, j=0; j<PAGE_SIZE ; i++, j += size,address += size) {
 851                 if (bh[i]) {
 852                         if (bh[i]->b_uptodate)
 853                                 COPYBLK(size, (unsigned long) bh[i]->b_data,address);
 854                         brelse(bh[i]);
 855                 }
 856         }
 857         return where;
 858 }
 859 
 860 /*
 861  * Try to increase the number of buffers available: the size argument
 862  * is used to determine what kind of buffers we want.
 863  */
 864 void grow_buffers(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 865 {
 866         unsigned long page;
 867         struct buffer_head *bh, *tmp;
 868 
 869         if ((size & 511) || (size > PAGE_SIZE)) {
 870                 printk("VFS: grow_buffers: size = %d\n",size);
 871                 return;
 872         }
 873         if(!(page = __get_free_page(GFP_BUFFER)))
 874                 return;
 875         bh = create_buffers(page, size);
 876         if (!bh) {
 877                 free_page(page);
 878                 return;
 879         }
 880         tmp = bh;
 881         while (1) {
 882                 if (free_list) {
 883                         tmp->b_next_free = free_list;
 884                         tmp->b_prev_free = free_list->b_prev_free;
 885                         free_list->b_prev_free->b_next_free = tmp;
 886                         free_list->b_prev_free = tmp;
 887                 } else {
 888                         tmp->b_prev_free = tmp;
 889                         tmp->b_next_free = tmp;
 890                 }
 891                 free_list = tmp;
 892                 ++nr_buffers;
 893                 if (tmp->b_this_page)
 894                         tmp = tmp->b_this_page;
 895                 else
 896                         break;
 897         }
 898         tmp->b_this_page = bh;
 899         buffermem += PAGE_SIZE;
 900         return;
 901 }
 902 
 903 /*
 904  * try_to_free() checks if all the buffers on this particular page
 905  * are unused, and free's the page if so.
 906  */
 907 static int try_to_free(struct buffer_head * bh, struct buffer_head ** bhp)
     /* [previous][next][first][last][top][bottom][index][help] */
 908 {
 909         unsigned long page;
 910         struct buffer_head * tmp, * p;
 911 
 912         *bhp = bh;
 913         page = (unsigned long) bh->b_data;
 914         page &= PAGE_MASK;
 915         tmp = bh;
 916         do {
 917                 if (!tmp)
 918                         return 0;
 919                 if (tmp->b_count || tmp->b_dirt || tmp->b_lock)
 920                         return 0;
 921                 tmp = tmp->b_this_page;
 922         } while (tmp != bh);
 923         tmp = bh;
 924         do {
 925                 p = tmp;
 926                 tmp = tmp->b_this_page;
 927                 nr_buffers--;
 928                 if (p == *bhp)
 929                         *bhp = p->b_prev_free;
 930                 remove_from_queues(p);
 931                 put_unused_buffer_head(p);
 932         } while (tmp != bh);
 933         buffermem -= PAGE_SIZE;
 934         free_page(page);
 935         return !mem_map[MAP_NR(page)];
 936 }
 937 
 938 /*
 939  * Try to free up some pages by shrinking the buffer-cache
 940  *
 941  * Priority tells the routine how hard to try to shrink the
 942  * buffers: 3 means "don't bother too much", while a value
 943  * of 0 means "we'd better get some free pages now".
 944  */
 945 int shrink_buffers(unsigned int priority)
     /* [previous][next][first][last][top][bottom][index][help] */
 946 {
 947         struct buffer_head *bh;
 948         int i;
 949 
 950         if (priority < 2)
 951                 sync_buffers(0,0);
 952         bh = free_list;
 953         i = nr_buffers >> priority;
 954         for ( ; i-- > 0 ; bh = bh->b_next_free) {
 955                 if (bh->b_count || !bh->b_this_page)
 956                         continue;
 957                 if (bh->b_lock)
 958                         if (priority)
 959                                 continue;
 960                         else
 961                                 wait_on_buffer(bh);
 962                 if (bh->b_dirt) {
 963                         bh->b_count++;
 964                         ll_rw_block(WRITEA, 1, &bh);
 965                         bh->b_count--;
 966                         continue;
 967                 }
 968                 if (try_to_free(bh, &bh))
 969                         return 1;
 970         }
 971         return 0;
 972 }
 973 
 974 /*
 975  * This initializes the initial buffer free list.  nr_buffers is set
 976  * to one less the actual number of buffers, as a sop to backwards
 977  * compatibility --- the old code did this (I think unintentionally,
 978  * but I'm not sure), and programs in the ps package expect it.
 979  *                                      - TYT 8/30/92
 980  */
 981 void buffer_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 982 {
 983         int i;
 984 
 985         if (high_memory >= 4*1024*1024)
 986                 min_free_pages = 200;
 987         else
 988                 min_free_pages = 20;
 989         for (i = 0 ; i < NR_HASH ; i++)
 990                 hash_table[i] = NULL;
 991         free_list = 0;
 992         grow_buffers(BLOCK_SIZE);
 993         if (!free_list)
 994                 panic("VFS: Unable to initialize buffer free list!");
 995         return;
 996 }

/* [previous][next][first][last][top][bottom][index][help] */