root/fs/buffer.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. __wait_on_buffer
  2. sync_buffers
  3. sync_dev
  4. fsync_dev
  5. sys_sync
  6. file_fsync
  7. sys_fsync
  8. invalidate_buffers
  9. check_disk_change
  10. remove_from_hash_queue
  11. remove_from_free_list
  12. remove_from_queues
  13. put_first_free
  14. put_last_free
  15. insert_into_queues
  16. find_buffer
  17. get_hash_table
  18. getblk
  19. brelse
  20. bread
  21. breada
  22. put_unused_buffer_head
  23. get_more_buffer_heads
  24. get_unused_buffer_head
  25. create_buffers
  26. read_buffers
  27. check_aligned
  28. try_to_load_aligned
  29. try_to_share_buffers
  30. bread_page
  31. grow_buffers
  32. try_to_free
  33. shrink_buffers
  34. buffer_init

   1 /*
   2  *  linux/fs/buffer.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  *  'buffer.c' implements the buffer-cache functions. Race-conditions have
   9  * been avoided by NEVER letting a interrupt change a buffer (except for the
  10  * data, of course), but instead letting the caller do it.
  11  */
  12 
  13 /*
  14  * NOTE! There is one discordant note here: checking floppies for
  15  * disk change. This is where it fits best, I think, as it should
  16  * invalidate changed floppy-disk-caches.
  17  */
  18 
  19 #include <stdarg.h>
  20  
  21 #include <linux/config.h>
  22 #include <linux/errno.h>
  23 #include <linux/sched.h>
  24 #include <linux/kernel.h>
  25 #include <linux/string.h>
  26 #include <linux/locks.h>
  27 #include <linux/errno.h>
  28 
  29 #include <asm/system.h>
  30 #include <asm/io.h>
  31 
  32 #ifdef CONFIG_SCSI
  33 #ifdef CONFIG_BLK_DEV_SR
  34 extern int check_cdrom_media_change(int, int);
  35 #endif
  36 #ifdef CONFIG_BLK_DEV_SD
  37 extern int check_scsidisk_media_change(int, int);
  38 extern int revalidate_scsidisk(int, int);
  39 #endif
  40 #endif
  41 
  42 static struct buffer_head * hash_table[NR_HASH];
  43 static struct buffer_head * free_list = NULL;
  44 static struct buffer_head * unused_list = NULL;
  45 static struct wait_queue * buffer_wait = NULL;
  46 
  47 int nr_buffers = 0;
  48 int buffermem = 0;
  49 int nr_buffer_heads = 0;
  50 static int min_free_pages = 20; /* nr free pages needed before buffer grows */
  51 
  52 /*
  53  * Rewrote the wait-routines to use the "new" wait-queue functionality,
  54  * and getting rid of the cli-sti pairs. The wait-queue routines still
  55  * need cli-sti, but now it's just a couple of 386 instructions or so.
  56  *
  57  * Note that the real wait_on_buffer() is an inline function that checks
  58  * if 'b_wait' is set before calling this, so that the queues aren't set
  59  * up unnecessarily.
  60  */
  61 void __wait_on_buffer(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
  62 {
  63         struct wait_queue wait = { current, NULL };
  64 
  65         bh->b_count++;
  66         add_wait_queue(&bh->b_wait, &wait);
  67 repeat:
  68         current->state = TASK_UNINTERRUPTIBLE;
  69         if (bh->b_lock) {
  70                 schedule();
  71                 goto repeat;
  72         }
  73         remove_wait_queue(&bh->b_wait, &wait);
  74         bh->b_count--;
  75         current->state = TASK_RUNNING;
  76 }
  77 
  78 /* Call sync_buffers with wait!=0 to ensure that the call does not
  79    return until all buffer writes have completed.  Sync() may return
  80    before the writes have finished; fsync() may not. */
  81 
  82 static int sync_buffers(dev_t dev, int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
  83 {
  84         int i, retry, pass = 0, err = 0;
  85         struct buffer_head * bh;
  86 
  87         /* One pass for no-wait, three for wait:
  88            0) write out all dirty, unlocked buffers;
  89            1) write out all dirty buffers, waiting if locked;
  90            2) wait for completion by waiting for all buffers to unlock.
  91          */
  92 repeat:
  93         retry = 0;
  94         bh = free_list;
  95         for (i = nr_buffers*2 ; i-- > 0 ; bh = bh->b_next_free) {
  96                 if (dev && bh->b_dev != dev)
  97                         continue;
  98 #ifdef 0 /* Disable bad-block debugging code */
  99                 if (bh->b_req && !bh->b_lock &&
 100                     !bh->b_dirt && !bh->b_uptodate)
 101                         printk ("Warning (IO error) - orphaned block %08x on %04x\n",
 102                                 bh->b_blocknr, bh->b_dev);
 103 #endif
 104                 if (bh->b_lock)
 105                 {
 106                         /* Buffer is locked; skip it unless wait is
 107                            requested AND pass > 0. */
 108                         if (!wait || !pass) {
 109                                 retry = 1;
 110                                 continue;
 111                         }
 112                         wait_on_buffer (bh);
 113                 }
 114                 /* If an unlocked buffer is not uptodate, there has been 
 115                    an IO error. Skip it. */
 116                 if (wait && bh->b_req && !bh->b_lock &&
 117                     !bh->b_dirt && !bh->b_uptodate)
 118                 {
 119                         err = 1;
 120                         continue;
 121                 }
 122                 /* Don't write clean buffers.  Don't write ANY buffers
 123                    on the third pass. */
 124                 if (!bh->b_dirt || pass>=2)
 125                         continue;
 126                 bh->b_count++;
 127                 ll_rw_block(WRITE, 1, &bh);
 128                 bh->b_count--;
 129                 retry = 1;
 130         }
 131         /* If we are waiting for the sync to succeed, and if any dirty
 132            blocks were written, then repeat; on the second pass, only
 133            wait for buffers being written (do not pass to write any
 134            more buffers on the second pass). */
 135         if (wait && retry && ++pass<=2)
 136                 goto repeat;
 137         return err;
 138 }
 139 
 140 void sync_dev(dev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 141 {
 142         sync_buffers(dev, 0);
 143         sync_supers(dev);
 144         sync_inodes(dev);
 145         sync_buffers(dev, 0);
 146 }
 147 
 148 int fsync_dev(dev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 149 {
 150         sync_buffers(dev, 0);
 151         sync_supers(dev);
 152         sync_inodes(dev);
 153         return sync_buffers(dev, 1);
 154 }
 155 
 156 int sys_sync(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 157 {
 158         sync_dev(0);
 159         return 0;
 160 }
 161 
 162 int file_fsync (struct inode *inode, struct file *filp)
     /* [previous][next][first][last][top][bottom][index][help] */
 163 {
 164         return fsync_dev(inode->i_dev);
 165 }
 166 
 167 int sys_fsync(unsigned int fd)
     /* [previous][next][first][last][top][bottom][index][help] */
 168 {
 169         struct file * file;
 170         struct inode * inode;
 171 
 172         if (fd>=NR_OPEN || !(file=current->filp[fd]) || !(inode=file->f_inode))
 173                 return -EBADF;
 174         if (!file->f_op || !file->f_op->fsync)
 175                 return -EINVAL;
 176         if (file->f_op->fsync(inode,file))
 177                 return -EIO;
 178         return 0;
 179 }
 180 
 181 void invalidate_buffers(dev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 182 {
 183         int i;
 184         struct buffer_head * bh;
 185 
 186         bh = free_list;
 187         for (i = nr_buffers*2 ; --i > 0 ; bh = bh->b_next_free) {
 188                 if (bh->b_dev != dev)
 189                         continue;
 190                 wait_on_buffer(bh);
 191                 if (bh->b_dev == dev)
 192                         bh->b_uptodate = bh->b_dirt = bh->b_req = 0;
 193         }
 194 }
 195 
 196 /*
 197  * This routine checks whether a floppy has been changed, and
 198  * invalidates all buffer-cache-entries in that case. This
 199  * is a relatively slow routine, so we have to try to minimize using
 200  * it. Thus it is called only upon a 'mount' or 'open'. This
 201  * is the best way of combining speed and utility, I think.
 202  * People changing diskettes in the middle of an operation deserve
 203  * to loose :-)
 204  *
 205  * NOTE! Although currently this is only for floppies, the idea is
 206  * that any additional removable block-device will use this routine,
 207  * and that mount/open needn't know that floppies/whatever are
 208  * special.
 209  */
 210 void check_disk_change(dev_t dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 211 {
 212         int i;
 213         struct buffer_head * bh;
 214 
 215         switch(MAJOR(dev)){
 216         case 2: /* floppy disc */
 217                 if (!(bh = getblk(dev,0,1024)))
 218                         return;
 219                 i = floppy_change(bh);
 220                 brelse(bh);
 221                 break;
 222 
 223 #if defined(CONFIG_BLK_DEV_SD) && defined(CONFIG_SCSI)
 224          case 8: /* Removable scsi disk */
 225                 i = check_scsidisk_media_change(dev, 0);
 226                 break;
 227 #endif
 228 
 229 #if defined(CONFIG_BLK_DEV_SR) && defined(CONFIG_SCSI)
 230          case 11: /* CDROM */
 231                 i = check_cdrom_media_change(dev, 0);
 232                 break;
 233 #endif
 234 
 235          default:
 236                 return;
 237         };
 238 
 239         if (!i) return;
 240 
 241         printk("VFS: Disk change detected on device %d/%d\n",
 242                                         MAJOR(dev), MINOR(dev));
 243         for (i=0 ; i<NR_SUPER ; i++)
 244                 if (super_block[i].s_dev == dev)
 245                         put_super(super_block[i].s_dev);
 246         invalidate_inodes(dev);
 247         invalidate_buffers(dev);
 248 
 249 #if defined(CONFIG_BLK_DEV_SD) && defined(CONFIG_SCSI)
 250 /* This is trickier for a removable hardisk, because we have to invalidate
 251    all of the partitions that lie on the disk. */
 252         if (MAJOR(dev) == 8)
 253                 revalidate_scsidisk(dev, 0);
 254 #endif
 255 }
 256 
 257 #define _hashfn(dev,block) (((unsigned)(dev^block))%NR_HASH)
 258 #define hash(dev,block) hash_table[_hashfn(dev,block)]
 259 
 260 static inline void remove_from_hash_queue(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 261 {
 262         if (bh->b_next)
 263                 bh->b_next->b_prev = bh->b_prev;
 264         if (bh->b_prev)
 265                 bh->b_prev->b_next = bh->b_next;
 266         if (hash(bh->b_dev,bh->b_blocknr) == bh)
 267                 hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
 268         bh->b_next = bh->b_prev = NULL;
 269 }
 270 
 271 static inline void remove_from_free_list(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 272 {
 273         if (!(bh->b_prev_free) || !(bh->b_next_free))
 274                 panic("VFS: Free block list corrupted");
 275         bh->b_prev_free->b_next_free = bh->b_next_free;
 276         bh->b_next_free->b_prev_free = bh->b_prev_free;
 277         if (free_list == bh)
 278                 free_list = bh->b_next_free;
 279         bh->b_next_free = bh->b_prev_free = NULL;
 280 }
 281 
 282 static inline void remove_from_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 283 {
 284         remove_from_hash_queue(bh);
 285         remove_from_free_list(bh);
 286 }
 287 
 288 static inline void put_first_free(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 289 {
 290         if (!bh || (bh == free_list))
 291                 return;
 292         remove_from_free_list(bh);
 293 /* add to front of free list */
 294         bh->b_next_free = free_list;
 295         bh->b_prev_free = free_list->b_prev_free;
 296         free_list->b_prev_free->b_next_free = bh;
 297         free_list->b_prev_free = bh;
 298         free_list = bh;
 299 }
 300 
 301 static inline void put_last_free(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 302 {
 303         if (!bh)
 304                 return;
 305         if (bh == free_list) {
 306                 free_list = bh->b_next_free;
 307                 return;
 308         }
 309         remove_from_free_list(bh);
 310 /* add to back of free list */
 311         bh->b_next_free = free_list;
 312         bh->b_prev_free = free_list->b_prev_free;
 313         free_list->b_prev_free->b_next_free = bh;
 314         free_list->b_prev_free = bh;
 315 }
 316 
 317 static inline void insert_into_queues(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 318 {
 319 /* put at end of free list */
 320         bh->b_next_free = free_list;
 321         bh->b_prev_free = free_list->b_prev_free;
 322         free_list->b_prev_free->b_next_free = bh;
 323         free_list->b_prev_free = bh;
 324 /* put the buffer in new hash-queue if it has a device */
 325         bh->b_prev = NULL;
 326         bh->b_next = NULL;
 327         if (!bh->b_dev)
 328                 return;
 329         bh->b_next = hash(bh->b_dev,bh->b_blocknr);
 330         hash(bh->b_dev,bh->b_blocknr) = bh;
 331         if (bh->b_next)
 332                 bh->b_next->b_prev = bh;
 333 }
 334 
 335 static struct buffer_head * find_buffer(dev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 336 {               
 337         struct buffer_head * tmp;
 338 
 339         for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
 340                 if (tmp->b_dev==dev && tmp->b_blocknr==block)
 341                         if (tmp->b_size == size)
 342                                 return tmp;
 343                         else {
 344                                 printk("VFS: Wrong blocksize on device %d/%d\n",
 345                                                         MAJOR(dev), MINOR(dev));
 346                                 return NULL;
 347                         }
 348         return NULL;
 349 }
 350 
 351 /*
 352  * Why like this, I hear you say... The reason is race-conditions.
 353  * As we don't lock buffers (unless we are readint them, that is),
 354  * something might happen to it while we sleep (ie a read-error
 355  * will force it bad). This shouldn't really happen currently, but
 356  * the code is ready.
 357  */
 358 struct buffer_head * get_hash_table(dev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 359 {
 360         struct buffer_head * bh;
 361 
 362         for (;;) {
 363                 if (!(bh=find_buffer(dev,block,size)))
 364                         return NULL;
 365                 bh->b_count++;
 366                 wait_on_buffer(bh);
 367                 if (bh->b_dev == dev && bh->b_blocknr == block && bh->b_size == size)
 368                         return bh;
 369                 bh->b_count--;
 370         }
 371 }
 372 
 373 /*
 374  * Ok, this is getblk, and it isn't very clear, again to hinder
 375  * race-conditions. Most of the code is seldom used, (ie repeating),
 376  * so it should be much more efficient than it looks.
 377  *
 378  * The algoritm is changed: hopefully better, and an elusive bug removed.
 379  *
 380  * 14.02.92: changed it to sync dirty buffers a bit: better performance
 381  * when the filesystem starts to get full of dirty blocks (I hope).
 382  */
 383 #define BADNESS(bh) (((bh)->b_dirt<<1)+(bh)->b_lock)
 384 struct buffer_head * getblk(dev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 385 {
 386         struct buffer_head * bh, * tmp;
 387         int buffers;
 388         static int grow_size = 0;
 389 
 390 repeat:
 391         bh = get_hash_table(dev, block, size);
 392         if (bh) {
 393                 if (bh->b_uptodate && !bh->b_dirt)
 394                         put_last_free(bh);
 395                 return bh;
 396         }
 397         grow_size -= size;
 398         if (nr_free_pages > min_free_pages && grow_size <= 0) {
 399                 grow_buffers(size);
 400                 grow_size = 4096;
 401         }
 402         buffers = nr_buffers;
 403         bh = NULL;
 404 
 405         for (tmp = free_list; buffers-- > 0 ; tmp = tmp->b_next_free) {
 406                 if (tmp->b_count || tmp->b_size != size)
 407                         continue;
 408                 if (mem_map[MAP_NR((unsigned long) tmp->b_data)] != 1)
 409                         continue;
 410                 if (!bh || BADNESS(tmp)<BADNESS(bh)) {
 411                         bh = tmp;
 412                         if (!BADNESS(tmp))
 413                                 break;
 414                 }
 415 #if 0
 416                 if (tmp->b_dirt) {
 417                         tmp->b_count++;
 418                         ll_rw_block(WRITEA, 1, &tmp);
 419                         tmp->b_count--;
 420                 }
 421 #endif
 422         }
 423 
 424         if (!bh && nr_free_pages > 5) {
 425                 grow_buffers(size);
 426                 goto repeat;
 427         }
 428         
 429 /* and repeat until we find something good */
 430         if (!bh) {
 431                 sleep_on(&buffer_wait);
 432                 goto repeat;
 433         }
 434         wait_on_buffer(bh);
 435         if (bh->b_count || bh->b_size != size)
 436                 goto repeat;
 437         if (bh->b_dirt) {
 438                 sync_buffers(0,0);
 439                 goto repeat;
 440         }
 441 /* NOTE!! While we slept waiting for this block, somebody else might */
 442 /* already have added "this" block to the cache. check it */
 443         if (find_buffer(dev,block,size))
 444                 goto repeat;
 445 /* OK, FINALLY we know that this buffer is the only one of it's kind, */
 446 /* and that it's unused (b_count=0), unlocked (b_lock=0), and clean */
 447         bh->b_count=1;
 448         bh->b_dirt=0;
 449         bh->b_uptodate=0;
 450         bh->b_req=0;
 451         remove_from_queues(bh);
 452         bh->b_dev=dev;
 453         bh->b_blocknr=block;
 454         insert_into_queues(bh);
 455         return bh;
 456 }
 457 
 458 void brelse(struct buffer_head * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 459 {
 460         if (!buf)
 461                 return;
 462         wait_on_buffer(buf);
 463         if (buf->b_count) {
 464                 if (--buf->b_count)
 465                         return;
 466                 wake_up(&buffer_wait);
 467                 return;
 468         }
 469         printk("VFS: brelse: Trying to free free buffer\n");
 470 }
 471 
 472 /*
 473  * bread() reads a specified block and returns the buffer that contains
 474  * it. It returns NULL if the block was unreadable.
 475  */
 476 struct buffer_head * bread(dev_t dev, int block, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 477 {
 478         struct buffer_head * bh;
 479 
 480         if (!(bh = getblk(dev, block, size))) {
 481                 printk("VFS: bread: READ error on device %d/%d\n",
 482                                                 MAJOR(dev), MINOR(dev));
 483                 return NULL;
 484         }
 485         if (bh->b_uptodate)
 486                 return bh;
 487         ll_rw_block(READ, 1, &bh);
 488         wait_on_buffer(bh);
 489         if (bh->b_uptodate)
 490                 return bh;
 491         brelse(bh);
 492         return NULL;
 493 }
 494 
 495 /*
 496  * Ok, breada can be used as bread, but additionally to mark other
 497  * blocks for reading as well. End the argument list with a negative
 498  * number.
 499  */
 500 struct buffer_head * breada(dev_t dev,int first, ...)
     /* [previous][next][first][last][top][bottom][index][help] */
 501 {
 502         va_list args;
 503         struct buffer_head * bh, *tmp;
 504 
 505         va_start(args,first);
 506         if (!(bh = getblk(dev, first, 1024))) {
 507                 printk("VFS: breada: READ error on device %d/%d\n",
 508                                                 MAJOR(dev), MINOR(dev));
 509                 return NULL;
 510         }
 511         if (!bh->b_uptodate)
 512                 ll_rw_block(READ, 1, &bh);
 513         while ((first=va_arg(args,int))>=0) {
 514                 tmp = getblk(dev, first, 1024);
 515                 if (tmp) {
 516                         if (!tmp->b_uptodate)
 517                                 ll_rw_block(READA, 1, &tmp);
 518                         tmp->b_count--;
 519                 }
 520         }
 521         va_end(args);
 522         wait_on_buffer(bh);
 523         if (bh->b_uptodate)
 524                 return bh;
 525         brelse(bh);
 526         return (NULL);
 527 }
 528 
 529 /*
 530  * See fs/inode.c for the weird use of volatile..
 531  */
 532 static void put_unused_buffer_head(struct buffer_head * bh)
     /* [previous][next][first][last][top][bottom][index][help] */
 533 {
 534         struct wait_queue * wait;
 535 
 536         wait = ((volatile struct buffer_head *) bh)->b_wait;
 537         memset((void *) bh,0,sizeof(*bh));
 538         ((volatile struct buffer_head *) bh)->b_wait = wait;
 539         bh->b_next_free = unused_list;
 540         unused_list = bh;
 541 }
 542 
 543 static void get_more_buffer_heads(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 544 {
 545         unsigned long page;
 546         struct buffer_head * bh;
 547 
 548         if (unused_list)
 549                 return;
 550         page = get_free_page(GFP_KERNEL);
 551         if (!page)
 552                 return;
 553         bh = (struct buffer_head *) page;
 554         while ((unsigned long) (bh+1) <= page+4096) {
 555                 put_unused_buffer_head(bh);
 556                 bh++;
 557                 nr_buffer_heads++;
 558         }
 559 }
 560 
 561 static struct buffer_head * get_unused_buffer_head(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 562 {
 563         struct buffer_head * bh;
 564 
 565         get_more_buffer_heads();
 566         if (!unused_list)
 567                 return NULL;
 568         bh = unused_list;
 569         unused_list = bh->b_next_free;
 570         bh->b_next_free = NULL;
 571         bh->b_data = NULL;
 572         bh->b_size = 0;
 573         bh->b_req = 0;
 574         return bh;
 575 }
 576 
 577 /*
 578  * Create the appropriate buffers when given a page for data area and
 579  * the size of each buffer.. Use the bh->b_this_page linked list to
 580  * follow the buffers created.  Return NULL if unable to create more
 581  * buffers.
 582  */
 583 static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
 584 {
 585         struct buffer_head *bh, *head;
 586         unsigned long offset;
 587 
 588         head = NULL;
 589         offset = 4096;
 590         while ((offset -= size) < 4096) {
 591                 bh = get_unused_buffer_head();
 592                 if (!bh)
 593                         goto no_grow;
 594                 bh->b_this_page = head;
 595                 head = bh;
 596                 bh->b_data = (char *) (page+offset);
 597                 bh->b_size = size;
 598         }
 599         return head;
 600 /*
 601  * In case anything failed, we just free everything we got.
 602  */
 603 no_grow:
 604         bh = head;
 605         while (bh) {
 606                 head = bh;
 607                 bh = bh->b_this_page;
 608                 put_unused_buffer_head(head);
 609         }
 610         return NULL;
 611 }
 612 
 613 static void read_buffers(struct buffer_head * bh[], int nrbuf)
     /* [previous][next][first][last][top][bottom][index][help] */
 614 {
 615         int i;
 616         int bhnum = 0;
 617         struct buffer_head * bhr[8];
 618 
 619         for (i = 0 ; i < nrbuf ; i++) {
 620                 if (bh[i] && !bh[i]->b_uptodate)
 621                         bhr[bhnum++] = bh[i];
 622         }
 623         if (bhnum)
 624                 ll_rw_block(READ, bhnum, bhr);
 625         for (i = 0 ; i < nrbuf ; i++) {
 626                 if (bh[i]) {
 627                         wait_on_buffer(bh[i]);
 628                 }
 629         }
 630 }
 631 
 632 static unsigned long check_aligned(struct buffer_head * first, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 633         dev_t dev, int *b, int size)
 634 {
 635         struct buffer_head * bh[8];
 636         unsigned long page;
 637         unsigned long offset;
 638         int block;
 639         int nrbuf;
 640 
 641         page = (unsigned long) first->b_data;
 642         if (page & 0xfff) {
 643                 brelse(first);
 644                 return 0;
 645         }
 646         mem_map[MAP_NR(page)]++;
 647         bh[0] = first;
 648         nrbuf = 1;
 649         for (offset = size ; offset < 4096 ; offset += size) {
 650                 block = *++b;
 651                 if (!block)
 652                         goto no_go;
 653                 first = get_hash_table(dev, block, size);
 654                 if (!first)
 655                         goto no_go;
 656                 bh[nrbuf++] = first;
 657                 if (page+offset != (unsigned long) first->b_data)
 658                         goto no_go;
 659         }
 660         read_buffers(bh,nrbuf);         /* make sure they are actually read correctly */
 661         while (nrbuf-- > 0)
 662                 brelse(bh[nrbuf]);
 663         free_page(address);
 664         ++current->min_flt;
 665         return page;
 666 no_go:
 667         while (nrbuf-- > 0)
 668                 brelse(bh[nrbuf]);
 669         free_page(page);
 670         return 0;
 671 }
 672 
 673 static unsigned long try_to_load_aligned(unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 674         dev_t dev, int b[], int size)
 675 {
 676         struct buffer_head * bh, * tmp, * arr[8];
 677         unsigned long offset;
 678         int * p;
 679         int block;
 680 
 681         bh = create_buffers(address, size);
 682         if (!bh)
 683                 return 0;
 684         p = b;
 685         for (offset = 0 ; offset < 4096 ; offset += size) {
 686                 block = *(p++);
 687                 if (!block)
 688                         goto not_aligned;
 689                 tmp = get_hash_table(dev, block, size);
 690                 if (tmp) {
 691                         brelse(tmp);
 692                         goto not_aligned;
 693                 }
 694         }
 695         tmp = bh;
 696         p = b;
 697         block = 0;
 698         while (1) {
 699                 arr[block++] = bh;
 700                 bh->b_count = 1;
 701                 bh->b_dirt = 0;
 702                 bh->b_uptodate = 0;
 703                 bh->b_dev = dev;
 704                 bh->b_blocknr = *(p++);
 705                 nr_buffers++;
 706                 insert_into_queues(bh);
 707                 if (bh->b_this_page)
 708                         bh = bh->b_this_page;
 709                 else
 710                         break;
 711         }
 712         buffermem += 4096;
 713         bh->b_this_page = tmp;
 714         mem_map[MAP_NR(address)]++;
 715         read_buffers(arr,block);
 716         while (block-- > 0)
 717                 brelse(arr[block]);
 718         ++current->maj_flt;
 719         return address;
 720 not_aligned:
 721         while ((tmp = bh) != NULL) {
 722                 bh = bh->b_this_page;
 723                 put_unused_buffer_head(tmp);
 724         }
 725         return 0;
 726 }
 727 
 728 /*
 729  * Try-to-share-buffers tries to minimize memory use by trying to keep
 730  * both code pages and the buffer area in the same page. This is done by
 731  * (a) checking if the buffers are already aligned correctly in memory and
 732  * (b) if none of the buffer heads are in memory at all, trying to load
 733  * them into memory the way we want them.
 734  *
 735  * This doesn't guarantee that the memory is shared, but should under most
 736  * circumstances work very well indeed (ie >90% sharing of code pages on
 737  * demand-loadable executables).
 738  */
 739 static inline unsigned long try_to_share_buffers(unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 740         dev_t dev, int *b, int size)
 741 {
 742         struct buffer_head * bh;
 743         int block;
 744 
 745         block = b[0];
 746         if (!block)
 747                 return 0;
 748         bh = get_hash_table(dev, block, size);
 749         if (bh)
 750                 return check_aligned(bh, address, dev, b, size);
 751         return try_to_load_aligned(address, dev, b, size);
 752 }
 753 
 754 #define COPYBLK(from,to) \
 755 __asm__ __volatile__("rep ; movsl" \
 756         ::"c" (BLOCK_SIZE/4),"S" (from),"D" (to) \
 757         :"cx","di","si")
 758 
 759 /*
 760  * bread_page reads four buffers into memory at the desired address. It's
 761  * a function of its own, as there is some speed to be got by reading them
 762  * all at the same time, not waiting for one to be read, and then another
 763  * etc. This also allows us to optimize memory usage by sharing code pages
 764  * and filesystem buffers..
 765  */
 766 unsigned long bread_page(unsigned long address, dev_t dev, int b[], int size, int prot)
     /* [previous][next][first][last][top][bottom][index][help] */
 767 {
 768         struct buffer_head * bh[8];
 769         unsigned long where;
 770         int i;
 771 
 772         if (!(prot & PAGE_RW)) {
 773                 where = try_to_share_buffers(address,dev,b,size);
 774                 if (where)
 775                         return where;
 776         }
 777         ++current->maj_flt;
 778         for (i=0 ; i<4 ; i++) {
 779                 bh[i] = NULL;
 780                 if (b[i])
 781                         bh[i] = getblk(dev, b[i], size);
 782         }
 783         read_buffers(bh,4);
 784         where = address;
 785         for (i=0 ; i<4 ; i++,address += BLOCK_SIZE) {
 786                 if (bh[i]) {
 787                         if (bh[i]->b_uptodate)
 788                                 COPYBLK((unsigned long) bh[i]->b_data,address);
 789                         brelse(bh[i]);
 790                 }
 791         }
 792         return where;
 793 }
 794 
 795 /*
 796  * Try to increase the number of buffers available: the size argument
 797  * is used to determine what kind of buffers we want. Currently only
 798  * 1024-byte buffers are supported by the rest of the system, but I
 799  * think this will change eventually.
 800  */
 801 void grow_buffers(int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 802 {
 803         unsigned long page;
 804         struct buffer_head *bh, *tmp;
 805 
 806         if ((size & 511) || (size > 4096)) {
 807                 printk("VFS: grow_buffers: size = %d\n",size);
 808                 return;
 809         }
 810         page = get_free_page(GFP_BUFFER);
 811         if (!page)
 812                 return;
 813         bh = create_buffers(page, size);
 814         if (!bh) {
 815                 free_page(page);
 816                 return;
 817         }
 818         tmp = bh;
 819         while (1) {
 820                 if (free_list) {
 821                         tmp->b_next_free = free_list;
 822                         tmp->b_prev_free = free_list->b_prev_free;
 823                         free_list->b_prev_free->b_next_free = tmp;
 824                         free_list->b_prev_free = tmp;
 825                 } else {
 826                         tmp->b_prev_free = tmp;
 827                         tmp->b_next_free = tmp;
 828                 }
 829                 free_list = tmp;
 830                 ++nr_buffers;
 831                 if (tmp->b_this_page)
 832                         tmp = tmp->b_this_page;
 833                 else
 834                         break;
 835         }
 836         tmp->b_this_page = bh;
 837         buffermem += 4096;
 838         return;
 839 }
 840 
 841 /*
 842  * try_to_free() checks if all the buffers on this particular page
 843  * are unused, and free's the page if so.
 844  */
 845 static int try_to_free(struct buffer_head * bh, struct buffer_head ** bhp)
     /* [previous][next][first][last][top][bottom][index][help] */
 846 {
 847         unsigned long page;
 848         struct buffer_head * tmp, * p;
 849 
 850         *bhp = bh;
 851         page = (unsigned long) bh->b_data;
 852         page &= 0xfffff000;
 853         tmp = bh;
 854         do {
 855                 if (!tmp)
 856                         return 0;
 857                 if (tmp->b_count || tmp->b_dirt || tmp->b_lock)
 858                         return 0;
 859                 tmp = tmp->b_this_page;
 860         } while (tmp != bh);
 861         tmp = bh;
 862         do {
 863                 p = tmp;
 864                 tmp = tmp->b_this_page;
 865                 nr_buffers--;
 866                 if (p == *bhp)
 867                         *bhp = p->b_prev_free;
 868                 remove_from_queues(p);
 869                 put_unused_buffer_head(p);
 870         } while (tmp != bh);
 871         buffermem -= 4096;
 872         free_page(page);
 873         return !mem_map[MAP_NR(page)];
 874 }
 875 
 876 /*
 877  * Try to free up some pages by shrinking the buffer-cache
 878  *
 879  * Priority tells the routine how hard to try to shrink the
 880  * buffers: 3 means "don't bother too much", while a value
 881  * of 0 means "we'd better get some free pages now".
 882  */
 883 int shrink_buffers(unsigned int priority)
     /* [previous][next][first][last][top][bottom][index][help] */
 884 {
 885         struct buffer_head *bh;
 886         int i;
 887 
 888         if (priority < 2)
 889                 sync_buffers(0,0);
 890         bh = free_list;
 891         i = nr_buffers >> priority;
 892         for ( ; i-- > 0 ; bh = bh->b_next_free) {
 893                 if (bh->b_count || !bh->b_this_page)
 894                         continue;
 895                 if (bh->b_lock)
 896                         if (priority)
 897                                 continue;
 898                         else
 899                                 wait_on_buffer(bh);
 900                 if (bh->b_dirt) {
 901                         bh->b_count++;
 902                         ll_rw_block(WRITEA, 1, &bh);
 903                         bh->b_count--;
 904                         continue;
 905                 }
 906                 if (try_to_free(bh, &bh))
 907                         return 1;
 908         }
 909         return 0;
 910 }
 911 
 912 /*
 913  * This initializes the initial buffer free list.  nr_buffers is set
 914  * to one less the actual number of buffers, as a sop to backwards
 915  * compatibility --- the old code did this (I think unintentionally,
 916  * but I'm not sure), and programs in the ps package expect it.
 917  *                                      - TYT 8/30/92
 918  */
 919 void buffer_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 920 {
 921         int i;
 922 
 923         if (high_memory >= 4*1024*1024)
 924                 min_free_pages = 200;
 925         else
 926                 min_free_pages = 20;
 927         for (i = 0 ; i < NR_HASH ; i++)
 928                 hash_table[i] = NULL;
 929         free_list = 0;
 930         grow_buffers(BLOCK_SIZE);
 931         if (!free_list)
 932                 panic("VFS: Unable to initialize buffer free list!");
 933         return;
 934 }

/* [previous][next][first][last][top][bottom][index][help] */