root/mm/filemap.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. invalidate_inode_pages
  2. truncate_inode_pages
  3. shrink_mmap
  4. page_unuse
  5. update_vm_cache
  6. try_to_read_ahead
  7. __wait_on_page
  8. generic_file_read
  9. fill_page
  10. filemap_nopage
  11. do_write_page
  12. filemap_write_page
  13. filemap_swapout
  14. filemap_swapin
  15. filemap_sync_pte
  16. filemap_sync_pte_range
  17. filemap_sync_pmd_range
  18. filemap_sync
  19. filemap_unmap
  20. generic_file_mmap
  21. msync_interval
  22. sys_msync

   1 /*
   2  *      linux/mm/filemap.c
   3  *
   4  * Copyright (C) 1994, 1995  Linus Torvalds
   5  */
   6 
   7 /*
   8  * This file handles the generic file mmap semantics used by
   9  * most "normal" filesystems (but you don't /have/ to use this:
  10  * the NFS filesystem does this differently, for example)
  11  */
  12 #include <linux/stat.h>
  13 #include <linux/sched.h>
  14 #include <linux/kernel.h>
  15 #include <linux/mm.h>
  16 #include <linux/shm.h>
  17 #include <linux/errno.h>
  18 #include <linux/mman.h>
  19 #include <linux/string.h>
  20 #include <linux/malloc.h>
  21 #include <linux/fs.h>
  22 #include <linux/locks.h>
  23 #include <linux/pagemap.h>
  24 #include <linux/swap.h>
  25 
  26 #include <asm/segment.h>
  27 #include <asm/system.h>
  28 #include <asm/pgtable.h>
  29 
  30 /*
  31  * Shared mappings implemented 30.11.1994. It's not fully working yet,
  32  * though.
  33  *
  34  * Shared mappings now work. 15.8.1995  Bruno.
  35  */
  36 
  37 unsigned long page_cache_size = 0;
  38 struct page * page_hash_table[PAGE_HASH_SIZE];
  39 
  40 /*
  41  * Simple routines for both non-shared and shared mappings.
  42  */
  43 
  44 /*
  45  * Invalidate the pages of an inode, removing all pages that aren't
  46  * locked down (those are sure to be up-to-date anyway, so we shouldn't
  47  * invalidate them).
  48  */
  49 void invalidate_inode_pages(struct inode * inode)
     /* [previous][next][first][last][top][bottom][index][help] */
  50 {
  51         struct page ** p;
  52         struct page * page;
  53 
  54         p = &inode->i_pages;
  55         while ((page = *p) != NULL) {
  56                 if (page->locked) {
  57                         p = &page->next;
  58                         continue;
  59                 }
  60                 inode->i_nrpages--;
  61                 if ((*p = page->next) != NULL)
  62                         (*p)->prev = page->prev;
  63                 page->dirty = 0;
  64                 page->next = NULL;
  65                 page->prev = NULL;
  66                 remove_page_from_hash_queue(page);
  67                 page->inode = NULL;
  68                 free_page(page_address(page));
  69                 continue;
  70         }
  71 }
  72 
  73 /*
  74  * Truncate the page cache at a set offset, removing the pages
  75  * that are beyond that offset (and zeroing out partial pages).
  76  */
  77 void truncate_inode_pages(struct inode * inode, unsigned long start)
     /* [previous][next][first][last][top][bottom][index][help] */
  78 {
  79         struct page ** p;
  80         struct page * page;
  81 
  82 repeat:
  83         p = &inode->i_pages;
  84         while ((page = *p) != NULL) {
  85                 unsigned long offset = page->offset;
  86 
  87                 /* page wholly truncated - free it */
  88                 if (offset >= start) {
  89                         if (page->locked) {
  90                                 wait_on_page(page);
  91                                 goto repeat;
  92                         }
  93                         inode->i_nrpages--;
  94                         if ((*p = page->next) != NULL)
  95                                 (*p)->prev = page->prev;
  96                         page->dirty = 0;
  97                         page->next = NULL;
  98                         page->prev = NULL;
  99                         remove_page_from_hash_queue(page);
 100                         page->inode = NULL;
 101                         free_page(page_address(page));
 102                         continue;
 103                 }
 104                 p = &page->next;
 105                 offset = start - offset;
 106                 /* partial truncate, clear end of page */
 107                 if (offset < PAGE_SIZE)
 108                         memset((void *) (offset + page_address(page)), 0, PAGE_SIZE - offset);
 109         }
 110 }
 111 
 112 int shrink_mmap(int priority, int dma)
     /* [previous][next][first][last][top][bottom][index][help] */
 113 {
 114         static int clock = 0;
 115         struct page * page;
 116         unsigned long limit = MAP_NR(high_memory);
 117         struct buffer_head *tmp, *bh;
 118 
 119         priority = (limit<<2) >> priority;
 120         page = mem_map + clock;
 121         while (priority-- > 0) {
 122                 if (page->locked)
 123                         goto next;
 124                 if (dma && !page->dma)
 125                         goto next;
 126                 /* First of all, regenerate the page's referenced bit
 127                    from any buffers in the page */
 128                 bh = page->buffers;
 129                 if (bh) {
 130                         tmp = bh;
 131                         do {
 132                                 if (buffer_touched(tmp)) {
 133                                         clear_bit(BH_Touched, &tmp->b_state);
 134                                         page->referenced = 1;
 135                                 }
 136                                 tmp = tmp->b_this_page;
 137                         } while (tmp != bh);
 138                 }
 139 
 140                 /* We can't throw away shared pages, but we do mark
 141                    them as referenced.  This relies on the fact that
 142                    no page is currently in both the page cache and the
 143                    buffer cache; we'd have to modify the following
 144                    test to allow for that case. */
 145                 if (page->count > 1)
 146                         page->referenced = 1;
 147                 else if (page->referenced)
 148                         page->referenced = 0;
 149                 else if (page->count) {
 150                         /* The page is an old, unshared page --- try
 151                            to discard it. */
 152                         if (page->inode) {
 153                                 remove_page_from_hash_queue(page);
 154                                 remove_page_from_inode_queue(page);
 155                                 free_page(page_address(page));
 156                                 return 1;
 157                         }
 158                         if (bh && try_to_free_buffer(bh, &bh, 6))
 159                                 return 1;
 160                 }
 161 next:
 162                 page++;
 163                 clock++;
 164                 if (clock >= limit) {
 165                         clock = 0;
 166                         page = mem_map;
 167                 }
 168         }
 169         return 0;
 170 }
 171 
 172 /*
 173  * This is called from try_to_swap_out() when we try to get rid of some
 174  * pages..  If we're unmapping the last occurrence of this page, we also
 175  * free it from the page hash-queues etc, as we don't want to keep it
 176  * in-core unnecessarily.
 177  */
 178 unsigned long page_unuse(unsigned long page)
     /* [previous][next][first][last][top][bottom][index][help] */
 179 {
 180         struct page * p = mem_map + MAP_NR(page);
 181         int count = p->count;
 182 
 183         if (count != 2)
 184                 return count;
 185         if (!p->inode)
 186                 return count;
 187         remove_page_from_hash_queue(p);
 188         remove_page_from_inode_queue(p);
 189         free_page(page);
 190         return 1;
 191 }
 192 
 193 /*
 194  * Update a page cache copy, when we're doing a "write()" system call
 195  * See also "update_vm_cache()".
 196  */
 197 void update_vm_cache(struct inode * inode, unsigned long pos, const char * buf, int count)
     /* [previous][next][first][last][top][bottom][index][help] */
 198 {
 199         unsigned long offset, len;
 200 
 201         offset = (pos & ~PAGE_MASK);
 202         pos = pos & PAGE_MASK;
 203         len = PAGE_SIZE - offset;
 204         do {
 205                 struct page * page;
 206 
 207                 if (len > count)
 208                         len = count;
 209                 page = find_page(inode, pos);
 210                 if (page) {
 211                         unsigned long addr;
 212 
 213                         wait_on_page(page);
 214                         addr = page_address(page);
 215                         memcpy((void *) (offset + addr), buf, len);
 216                         free_page(addr);
 217                 }
 218                 count -= len;
 219                 buf += len;
 220                 len = PAGE_SIZE;
 221                 offset = 0;
 222                 pos += PAGE_SIZE;
 223         } while (count);
 224 }
 225 
 226 /*
 227  * Try to read ahead in the file. "page_cache" is a potentially free page
 228  * that we could use for the cache (if it is 0 we can try to create one,
 229  * this is all overlapped with the IO on the previous page finishing anyway)
 230  */
 231 static unsigned long try_to_read_ahead(struct inode * inode, unsigned long offset, unsigned long page_cache)
     /* [previous][next][first][last][top][bottom][index][help] */
 232 {
 233         struct page * page;
 234 
 235         offset &= PAGE_MASK;
 236         if (!page_cache) {
 237                 page_cache = __get_free_page(GFP_KERNEL);
 238                 if (!page_cache)
 239                         return 0;
 240         }
 241         if (offset >= inode->i_size)
 242                 return page_cache;
 243 #if 1
 244         page = find_page(inode, offset);
 245         if (page) {
 246                 page->count--;
 247                 return page_cache;
 248         }
 249         /*
 250          * Ok, add the new page to the hash-queues...
 251          */
 252         page = mem_map + MAP_NR(page_cache);
 253         page->count++;
 254         page->uptodate = 0;
 255         page->error = 0;
 256         page->offset = offset;
 257         add_page_to_inode_queue(inode, page);
 258         add_page_to_hash_queue(inode, page);
 259 
 260         inode->i_op->readpage(inode, page);
 261 
 262         free_page(page_cache);
 263         return 0;
 264 #else
 265         return page_cache;
 266 #endif
 267 }
 268 
 269 /* 
 270  * Wait for IO to complete on a locked page.
 271  */
 272 void __wait_on_page(struct page *page)
     /* [previous][next][first][last][top][bottom][index][help] */
 273 {
 274         struct wait_queue wait = { current, NULL };
 275 
 276         page->count++;
 277         add_wait_queue(&page->wait, &wait);
 278 repeat:
 279         current->state = TASK_UNINTERRUPTIBLE;
 280         if (page->locked) {
 281                 schedule();
 282                 goto repeat;
 283         }
 284         remove_wait_queue(&page->wait, &wait);
 285         page->count--;
 286         current->state = TASK_RUNNING;
 287 }
 288 
 289 
 290 /*
 291  * This is a generic file read routine, and uses the
 292  * inode->i_op->readpage() function for the actual low-level
 293  * stuff.
 294  *
 295  * This is really ugly. But the goto's actually try to clarify some
 296  * of the logic when it comes to error handling etc.
 297  */
 298 #define MAX_READAHEAD (PAGE_SIZE*4)
 299 int generic_file_read(struct inode * inode, struct file * filp, char * buf, int count)
     /* [previous][next][first][last][top][bottom][index][help] */
 300 {
 301         int error, read;
 302         unsigned long pos, page_cache;
 303         
 304         if (count <= 0)
 305                 return 0;
 306         error = 0;
 307         read = 0;
 308         page_cache = 0;
 309 
 310         pos = filp->f_pos;
 311         for (;;) {
 312                 struct page *page;
 313                 unsigned long offset, addr, nr;
 314 
 315                 if (pos >= inode->i_size)
 316                         break;
 317                 offset = pos & ~PAGE_MASK;
 318                 nr = PAGE_SIZE - offset;
 319                 /*
 320                  * Try to find the data in the page cache..
 321                  */
 322                 page = find_page(inode, pos & PAGE_MASK);
 323                 if (page)
 324                         goto found_page;
 325 
 326                 /*
 327                  * Ok, it wasn't cached, so we need to create a new
 328                  * page..
 329                  */
 330                 if (page_cache)
 331                         goto new_page;
 332 
 333                 error = -ENOMEM;
 334                 page_cache = __get_free_page(GFP_KERNEL);
 335                 if (!page_cache)
 336                         break;
 337                 error = 0;
 338 
 339                 /*
 340                  * That could have slept, so we need to check again..
 341                  */
 342                 if (pos >= inode->i_size)
 343                         break;
 344                 page = find_page(inode, pos & PAGE_MASK);
 345                 if (!page)
 346                         goto new_page;
 347 
 348 found_page:
 349                 addr = page_address(page);
 350                 if (nr > count)
 351                         nr = count;
 352 
 353                 /*
 354                  * We may want to do read-ahead.. Do this only
 355                  * if we're waiting for the current page to be
 356                  * filled in, and if
 357                  *  - we're going to read more than this page
 358                  *  - if "f_reada" is set
 359                  */
 360                 if (page->locked) {
 361                         unsigned long max_ahead, ahead;
 362 
 363                         max_ahead = count - nr;
 364                         if (filp->f_reada || max_ahead > MAX_READAHEAD)
 365                                 max_ahead = MAX_READAHEAD;
 366                         ahead = 0;
 367                         while (ahead < max_ahead) {
 368                                 ahead += PAGE_SIZE;
 369                                 page_cache = try_to_read_ahead(inode, pos + ahead, page_cache);
 370                         }
 371                         __wait_on_page(page);
 372                 }
 373                 if (!page->uptodate)
 374                         goto read_page;
 375                 if (nr > inode->i_size - pos)
 376                         nr = inode->i_size - pos;
 377                 memcpy_tofs(buf, (void *) (addr + offset), nr);
 378                 free_page(addr);
 379                 buf += nr;
 380                 pos += nr;
 381                 read += nr;
 382                 count -= nr;
 383                 if (count)
 384                         continue;
 385                 break;
 386         
 387 
 388 new_page:
 389                 /*
 390                  * Ok, add the new page to the hash-queues...
 391                  */
 392                 addr = page_cache;
 393                 page = mem_map + MAP_NR(page_cache);
 394                 page_cache = 0;
 395                 page->count++;
 396                 page->uptodate = 0;
 397                 page->error = 0;
 398                 page->offset = pos & PAGE_MASK;
 399                 add_page_to_inode_queue(inode, page);
 400                 add_page_to_hash_queue(inode, page);
 401 
 402                 /*
 403                  * Error handling is tricky. If we get a read error,
 404                  * the cached page stays in the cache (but uptodate=0),
 405                  * and the next process that accesses it will try to
 406                  * re-read it. This is needed for NFS etc, where the
 407                  * identity of the reader can decide if we can read the
 408                  * page or not..
 409                  */
 410 read_page:
 411                 error = inode->i_op->readpage(inode, page);
 412                 if (!error)
 413                         goto found_page;
 414                 free_page(addr);
 415                 break;
 416         }
 417 
 418         filp->f_pos = pos;
 419         filp->f_reada = 1;
 420         if (page_cache)
 421                 free_page(page_cache);
 422         if (!IS_RDONLY(inode)) {
 423                 inode->i_atime = CURRENT_TIME;
 424                 inode->i_dirt = 1;
 425         }
 426         if (!read)
 427                 read = error;
 428         return read;
 429 }
 430 
 431 /*
 432  * Find a cached page and wait for it to become up-to-date, return
 433  * the page address.  Increments the page count.
 434  */
 435 static inline unsigned long fill_page(struct inode * inode, unsigned long offset)
     /* [previous][next][first][last][top][bottom][index][help] */
 436 {
 437         struct page * page;
 438         unsigned long new_page;
 439 
 440         page = find_page(inode, offset);
 441         if (page)
 442                 goto found_page;
 443         new_page = __get_free_page(GFP_KERNEL);
 444         page = find_page(inode, offset);
 445         if (page) {
 446                 if (new_page)
 447                         free_page(new_page);
 448                 goto found_page;
 449         }
 450         if (!new_page)
 451                 return 0;
 452         page = mem_map + MAP_NR(new_page);
 453         new_page = 0;
 454         page->count++;
 455         page->uptodate = 0;
 456         page->error = 0;
 457         page->offset = offset;
 458         add_page_to_inode_queue(inode, page);
 459         add_page_to_hash_queue(inode, page);
 460         inode->i_op->readpage(inode, page);
 461 found_page:
 462         wait_on_page(page);
 463         return page_address(page);
 464 }
 465 
 466 /*
 467  * Semantics for shared and private memory areas are different past the end
 468  * of the file. A shared mapping past the last page of the file is an error
 469  * and results in a SIBGUS, while a private mapping just maps in a zero page.
 470  */
 471 static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long address, int no_share)
     /* [previous][next][first][last][top][bottom][index][help] */
 472 {
 473         unsigned long offset;
 474         struct inode * inode = area->vm_inode;
 475         unsigned long page;
 476 
 477         offset = (address & PAGE_MASK) - area->vm_start + area->vm_offset;
 478         if (offset >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm)
 479                 return 0;
 480 
 481         page = fill_page(inode, offset);
 482         if (page && no_share) {
 483                 unsigned long new_page = __get_free_page(GFP_KERNEL);
 484                 if (new_page)
 485                         memcpy((void *) new_page, (void *) page, PAGE_SIZE);
 486                 free_page(page);
 487                 return new_page;
 488         }
 489         return page;
 490 }
 491 
 492 /*
 493  * Tries to write a shared mapped page to its backing store. May return -EIO
 494  * if the disk is full.
 495  */
 496 static inline int do_write_page(struct inode * inode, struct file * file,
     /* [previous][next][first][last][top][bottom][index][help] */
 497         const char * page, unsigned long offset)
 498 {
 499         int old_fs, retval;
 500         unsigned long size;
 501 
 502         size = offset + PAGE_SIZE;
 503         /* refuse to extend file size.. */
 504         if (S_ISREG(inode->i_mode)) {
 505                 if (size > inode->i_size)
 506                         size = inode->i_size;
 507                 /* Ho humm.. We should have tested for this earlier */
 508                 if (size < offset)
 509                         return -EIO;
 510         }
 511         size -= offset;
 512         old_fs = get_fs();
 513         set_fs(KERNEL_DS);
 514         retval = -EIO;
 515         if (size == file->f_op->write(inode, file, (const char *) page, size))
 516                 retval = 0;
 517         set_fs(old_fs);
 518         return retval;
 519 }
 520 
 521 static int filemap_write_page(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 522         unsigned long offset,
 523         unsigned long page)
 524 {
 525         int result;
 526         struct file file;
 527         struct inode * inode;
 528         struct buffer_head * bh;
 529 
 530         bh = mem_map[MAP_NR(page)].buffers;
 531         if (bh) {
 532                 /* whee.. just mark the buffer heads dirty */
 533                 struct buffer_head * tmp = bh;
 534                 do {
 535                         mark_buffer_dirty(tmp, 0);
 536                         tmp = tmp->b_this_page;
 537                 } while (tmp != bh);
 538                 return 0;
 539         }
 540 
 541         inode = vma->vm_inode;
 542         file.f_op = inode->i_op->default_file_ops;
 543         if (!file.f_op->write)
 544                 return -EIO;
 545         file.f_mode = 3;
 546         file.f_flags = 0;
 547         file.f_count = 1;
 548         file.f_inode = inode;
 549         file.f_pos = offset;
 550         file.f_reada = 0;
 551 
 552         down(&inode->i_sem);
 553         result = do_write_page(inode, &file, (const char *) page, offset);
 554         up(&inode->i_sem);
 555         return result;
 556 }
 557 
 558 
 559 /*
 560  * Swapping to a shared file: while we're busy writing out the page
 561  * (and the page still exists in memory), we save the page information
 562  * in the page table, so that "filemap_swapin()" can re-use the page
 563  * immediately if it is called while we're busy swapping it out..
 564  *
 565  * Once we've written it all out, we mark the page entry "empty", which
 566  * will result in a normal page-in (instead of a swap-in) from the now
 567  * up-to-date disk file.
 568  */
 569 int filemap_swapout(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 570         unsigned long offset,
 571         pte_t *page_table)
 572 {
 573         int error;
 574         unsigned long page = pte_page(*page_table);
 575         unsigned long entry = SWP_ENTRY(SHM_SWP_TYPE, MAP_NR(page));
 576 
 577         set_pte(page_table, __pte(entry));
 578         /* Yuck, perhaps a slightly modified swapout parameter set? */
 579         invalidate_page(vma, (offset + vma->vm_start - vma->vm_offset));
 580         error = filemap_write_page(vma, offset, page);
 581         if (pte_val(*page_table) == entry)
 582                 pte_clear(page_table);
 583         return error;
 584 }
 585 
 586 /*
 587  * filemap_swapin() is called only if we have something in the page
 588  * tables that is non-zero (but not present), which we know to be the
 589  * page index of a page that is busy being swapped out (see above).
 590  * So we just use it directly..
 591  */
 592 static pte_t filemap_swapin(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 593         unsigned long offset,
 594         unsigned long entry)
 595 {
 596         unsigned long page = SWP_OFFSET(entry);
 597 
 598         mem_map[page].count++;
 599         page = (page << PAGE_SHIFT) + PAGE_OFFSET;
 600         return mk_pte(page,vma->vm_page_prot);
 601 }
 602 
 603 
 604 static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 605         unsigned long address, unsigned int flags)
 606 {
 607         pte_t pte = *ptep;
 608         unsigned long page;
 609         int error;
 610 
 611         if (!(flags & MS_INVALIDATE)) {
 612                 if (!pte_present(pte))
 613                         return 0;
 614                 if (!pte_dirty(pte))
 615                         return 0;
 616                 set_pte(ptep, pte_mkclean(pte));
 617                 invalidate_page(vma, address);
 618                 page = pte_page(pte);
 619                 mem_map[MAP_NR(page)].count++;
 620         } else {
 621                 if (pte_none(pte))
 622                         return 0;
 623                 pte_clear(ptep);
 624                 invalidate_page(vma, address);
 625                 if (!pte_present(pte)) {
 626                         swap_free(pte_val(pte));
 627                         return 0;
 628                 }
 629                 page = pte_page(pte);
 630                 if (!pte_dirty(pte) || flags == MS_INVALIDATE) {
 631                         free_page(page);
 632                         return 0;
 633                 }
 634         }
 635         error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page);
 636         free_page(page);
 637         return error;
 638 }
 639 
 640 static inline int filemap_sync_pte_range(pmd_t * pmd,
     /* [previous][next][first][last][top][bottom][index][help] */
 641         unsigned long address, unsigned long size, 
 642         struct vm_area_struct *vma, unsigned long offset, unsigned int flags)
 643 {
 644         pte_t * pte;
 645         unsigned long end;
 646         int error;
 647 
 648         if (pmd_none(*pmd))
 649                 return 0;
 650         if (pmd_bad(*pmd)) {
 651                 printk("filemap_sync_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
 652                 pmd_clear(pmd);
 653                 return 0;
 654         }
 655         pte = pte_offset(pmd, address);
 656         offset += address & PMD_MASK;
 657         address &= ~PMD_MASK;
 658         end = address + size;
 659         if (end > PMD_SIZE)
 660                 end = PMD_SIZE;
 661         error = 0;
 662         do {
 663                 error |= filemap_sync_pte(pte, vma, address + offset, flags);
 664                 address += PAGE_SIZE;
 665                 pte++;
 666         } while (address < end);
 667         return error;
 668 }
 669 
 670 static inline int filemap_sync_pmd_range(pgd_t * pgd,
     /* [previous][next][first][last][top][bottom][index][help] */
 671         unsigned long address, unsigned long size, 
 672         struct vm_area_struct *vma, unsigned int flags)
 673 {
 674         pmd_t * pmd;
 675         unsigned long offset, end;
 676         int error;
 677 
 678         if (pgd_none(*pgd))
 679                 return 0;
 680         if (pgd_bad(*pgd)) {
 681                 printk("filemap_sync_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd));
 682                 pgd_clear(pgd);
 683                 return 0;
 684         }
 685         pmd = pmd_offset(pgd, address);
 686         offset = address & PMD_MASK;
 687         address &= ~PMD_MASK;
 688         end = address + size;
 689         if (end > PGDIR_SIZE)
 690                 end = PGDIR_SIZE;
 691         error = 0;
 692         do {
 693                 error |= filemap_sync_pte_range(pmd, address, end - address, vma, offset, flags);
 694                 address = (address + PMD_SIZE) & PMD_MASK;
 695                 pmd++;
 696         } while (address < end);
 697         return error;
 698 }
 699 
 700 static int filemap_sync(struct vm_area_struct * vma, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 701         size_t size, unsigned int flags)
 702 {
 703         pgd_t * dir;
 704         unsigned long end = address + size;
 705         int error = 0;
 706 
 707         dir = pgd_offset(current->mm, address);
 708         while (address < end) {
 709                 error |= filemap_sync_pmd_range(dir, address, end - address, vma, flags);
 710                 address = (address + PGDIR_SIZE) & PGDIR_MASK;
 711                 dir++;
 712         }
 713         invalidate_range(vma->vm_mm, end - size, end);
 714         return error;
 715 }
 716 
 717 /*
 718  * This handles (potentially partial) area unmaps..
 719  */
 720 static void filemap_unmap(struct vm_area_struct *vma, unsigned long start, size_t len)
     /* [previous][next][first][last][top][bottom][index][help] */
 721 {
 722         filemap_sync(vma, start, len, MS_ASYNC);
 723 }
 724 
 725 /*
 726  * Shared mappings need to be able to do the right thing at
 727  * close/unmap/sync. They will also use the private file as
 728  * backing-store for swapping..
 729  */
 730 static struct vm_operations_struct file_shared_mmap = {
 731         NULL,                   /* no special open */
 732         NULL,                   /* no special close */
 733         filemap_unmap,          /* unmap - we need to sync the pages */
 734         NULL,                   /* no special protect */
 735         filemap_sync,           /* sync */
 736         NULL,                   /* advise */
 737         filemap_nopage,         /* nopage */
 738         NULL,                   /* wppage */
 739         filemap_swapout,        /* swapout */
 740         filemap_swapin,         /* swapin */
 741 };
 742 
 743 /*
 744  * Private mappings just need to be able to load in the map.
 745  *
 746  * (This is actually used for shared mappings as well, if we
 747  * know they can't ever get write permissions..)
 748  */
 749 static struct vm_operations_struct file_private_mmap = {
 750         NULL,                   /* open */
 751         NULL,                   /* close */
 752         NULL,                   /* unmap */
 753         NULL,                   /* protect */
 754         NULL,                   /* sync */
 755         NULL,                   /* advise */
 756         filemap_nopage,         /* nopage */
 757         NULL,                   /* wppage */
 758         NULL,                   /* swapout */
 759         NULL,                   /* swapin */
 760 };
 761 
 762 /* This is used for a general mmap of a disk file */
 763 int generic_file_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma)
     /* [previous][next][first][last][top][bottom][index][help] */
 764 {
 765         struct vm_operations_struct * ops;
 766 
 767         if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
 768                 ops = &file_shared_mmap;
 769                 /* share_page() can only guarantee proper page sharing if
 770                  * the offsets are all page aligned. */
 771                 if (vma->vm_offset & (PAGE_SIZE - 1))
 772                         return -EINVAL;
 773         } else {
 774                 ops = &file_private_mmap;
 775                 if (vma->vm_offset & (inode->i_sb->s_blocksize - 1))
 776                         return -EINVAL;
 777         }
 778         if (!inode->i_sb || !S_ISREG(inode->i_mode))
 779                 return -EACCES;
 780         if (!inode->i_op || !inode->i_op->readpage)
 781                 return -ENOEXEC;
 782         if (!IS_RDONLY(inode)) {
 783                 inode->i_atime = CURRENT_TIME;
 784                 inode->i_dirt = 1;
 785         }
 786         vma->vm_inode = inode;
 787         inode->i_count++;
 788         vma->vm_ops = ops;
 789         return 0;
 790 }
 791 
 792 
 793 /*
 794  * The msync() system call.
 795  */
 796 
 797 static int msync_interval(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 798         unsigned long start, unsigned long end, int flags)
 799 {
 800         if (!vma->vm_inode)
 801                 return 0;
 802         if (vma->vm_ops->sync) {
 803                 int error;
 804                 error = vma->vm_ops->sync(vma, start, end-start, flags);
 805                 if (error)
 806                         return error;
 807                 if (flags & MS_SYNC)
 808                         return file_fsync(vma->vm_inode, NULL);
 809                 return 0;
 810         }
 811         return 0;
 812 }
 813 
 814 asmlinkage int sys_msync(unsigned long start, size_t len, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
 815 {
 816         unsigned long end;
 817         struct vm_area_struct * vma;
 818         int unmapped_error, error;
 819 
 820         if (start & ~PAGE_MASK)
 821                 return -EINVAL;
 822         len = (len + ~PAGE_MASK) & PAGE_MASK;
 823         end = start + len;
 824         if (end < start)
 825                 return -EINVAL;
 826         if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
 827                 return -EINVAL;
 828         if (end == start)
 829                 return 0;
 830         /*
 831          * If the interval [start,end) covers some unmapped address ranges,
 832          * just ignore them, but return -EFAULT at the end.
 833          */
 834         vma = find_vma(current, start);
 835         unmapped_error = 0;
 836         for (;;) {
 837                 /* Still start < end. */
 838                 if (!vma)
 839                         return -EFAULT;
 840                 /* Here start < vma->vm_end. */
 841                 if (start < vma->vm_start) {
 842                         unmapped_error = -EFAULT;
 843                         start = vma->vm_start;
 844                 }
 845                 /* Here vma->vm_start <= start < vma->vm_end. */
 846                 if (end <= vma->vm_end) {
 847                         if (start < end) {
 848                                 error = msync_interval(vma, start, end, flags);
 849                                 if (error)
 850                                         return error;
 851                         }
 852                         return unmapped_error;
 853                 }
 854                 /* Here vma->vm_start <= start < vma->vm_end < end. */
 855                 error = msync_interval(vma, start, vma->vm_end, flags);
 856                 if (error)
 857                         return error;
 858                 start = vma->vm_end;
 859                 vma = vma->vm_next;
 860         }
 861 }

/* [previous][next][first][last][top][bottom][index][help] */