root/mm/filemap.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. invalidate_inode_pages
  2. truncate_inode_pages
  3. shrink_mmap
  4. page_unuse
  5. update_vm_cache
  6. try_to_read_ahead
  7. __wait_on_page
  8. generic_file_read
  9. fill_page
  10. filemap_nopage
  11. filemap_write_page
  12. filemap_swapout
  13. filemap_swapin
  14. filemap_sync_pte
  15. filemap_sync_pte_range
  16. filemap_sync_pmd_range
  17. filemap_sync
  18. filemap_unmap
  19. generic_file_mmap
  20. msync_interval
  21. sys_msync

   1 /*
   2  *      linux/mm/filemap.c
   3  *
   4  * Copyright (C) 1994, 1995  Linus Torvalds
   5  */
   6 
   7 /*
   8  * This file handles the generic file mmap semantics used by
   9  * most "normal" filesystems (but you don't /have/ to use this:
  10  * the NFS filesystem does this differently, for example)
  11  */
  12 #include <linux/stat.h>
  13 #include <linux/sched.h>
  14 #include <linux/kernel.h>
  15 #include <linux/mm.h>
  16 #include <linux/shm.h>
  17 #include <linux/errno.h>
  18 #include <linux/mman.h>
  19 #include <linux/string.h>
  20 #include <linux/malloc.h>
  21 #include <linux/fs.h>
  22 #include <linux/locks.h>
  23 #include <linux/pagemap.h>
  24 #include <linux/swap.h>
  25 
  26 #include <asm/segment.h>
  27 #include <asm/system.h>
  28 #include <asm/pgtable.h>
  29 
  30 /*
  31  * Shared mappings implemented 30.11.1994. It's not fully working yet,
  32  * though.
  33  *
  34  * Shared mappings now work. 15.8.1995  Bruno.
  35  */
  36 
  37 unsigned long page_cache_size = 0;
  38 struct page * page_hash_table[PAGE_HASH_SIZE];
  39 
  40 /*
  41  * Simple routines for both non-shared and shared mappings.
  42  */
  43 
  44 /*
  45  * Invalidate the pages of an inode, removing all pages that aren't
  46  * locked down (those are sure to be up-to-date anyway, so we shouldn't
  47  * invalidate them).
  48  */
  49 void invalidate_inode_pages(struct inode * inode)
     /* [previous][next][first][last][top][bottom][index][help] */
  50 {
  51         struct page ** p;
  52         struct page * page;
  53 
  54         p = &inode->i_pages;
  55         while ((page = *p) != NULL) {
  56                 if (page->locked) {
  57                         p = &page->next;
  58                         continue;
  59                 }
  60                 inode->i_nrpages--;
  61                 if ((*p = page->next) != NULL)
  62                         (*p)->prev = page->prev;
  63                 page->dirty = 0;
  64                 page->next = NULL;
  65                 page->prev = NULL;
  66                 remove_page_from_hash_queue(page);
  67                 page->inode = NULL;
  68                 free_page(page_address(page));
  69                 continue;
  70         }
  71 }
  72 
  73 /*
  74  * Truncate the page cache at a set offset, removing the pages
  75  * that are beyond that offset (and zeroing out partial pages).
  76  */
  77 void truncate_inode_pages(struct inode * inode, unsigned long start)
     /* [previous][next][first][last][top][bottom][index][help] */
  78 {
  79         struct page ** p;
  80         struct page * page;
  81 
  82 repeat:
  83         p = &inode->i_pages;
  84         while ((page = *p) != NULL) {
  85                 unsigned long offset = page->offset;
  86 
  87                 /* page wholly truncated - free it */
  88                 if (offset >= start) {
  89                         if (page->locked) {
  90                                 wait_on_page(page);
  91                                 goto repeat;
  92                         }
  93                         inode->i_nrpages--;
  94                         if ((*p = page->next) != NULL)
  95                                 (*p)->prev = page->prev;
  96                         page->dirty = 0;
  97                         page->next = NULL;
  98                         page->prev = NULL;
  99                         remove_page_from_hash_queue(page);
 100                         page->inode = NULL;
 101                         free_page(page_address(page));
 102                         continue;
 103                 }
 104                 p = &page->next;
 105                 offset = start - offset;
 106                 /* partial truncate, clear end of page */
 107                 if (offset < PAGE_SIZE)
 108                         memset((void *) (offset + page_address(page)), 0, PAGE_SIZE - offset);
 109         }
 110 }
 111 
 112 int shrink_mmap(int priority, int dma)
     /* [previous][next][first][last][top][bottom][index][help] */
 113 {
 114         static int clock = 0;
 115         struct page * page;
 116         unsigned long limit = MAP_NR(high_memory);
 117         struct buffer_head *tmp, *bh;
 118 
 119         priority = (limit<<2) >> priority;
 120         page = mem_map + clock;
 121         while (priority-- > 0) {
 122                 if (page->locked)
 123                         goto next;
 124                 if (dma && !page->dma)
 125                         goto next;
 126                 /* First of all, regenerate the page's referenced bit
 127                    from any buffers in the page */
 128                 bh = page->buffers;
 129                 if (bh) {
 130                         tmp = bh;
 131                         do {
 132                                 if (buffer_touched(tmp)) {
 133                                         clear_bit(BH_Touched, &tmp->b_state);
 134                                         page->referenced = 1;
 135                                 }
 136                                 tmp = tmp->b_this_page;
 137                         } while (tmp != bh);
 138                 }
 139 
 140                 /* We can't throw away shared pages, but we do mark
 141                    them as referenced.  This relies on the fact that
 142                    no page is currently in both the page cache and the
 143                    buffer cache; we'd have to modify the following
 144                    test to allow for that case. */
 145                 if (page->count > 1)
 146                         page->referenced = 1;
 147                 else if (page->referenced)
 148                         page->referenced = 0;
 149                 else if (page->count) {
 150                         /* The page is an old, unshared page --- try
 151                            to discard it. */
 152                         if (page->inode) {
 153                                 remove_page_from_hash_queue(page);
 154                                 remove_page_from_inode_queue(page);
 155                                 free_page(page_address(page));
 156                                 return 1;
 157                         }
 158                         if (bh && try_to_free_buffer(bh, &bh, 6))
 159                                 return 1;
 160                 }
 161 next:
 162                 page++;
 163                 clock++;
 164                 if (clock >= limit) {
 165                         clock = 0;
 166                         page = mem_map;
 167                 }
 168         }
 169         return 0;
 170 }
 171 
 172 /*
 173  * This is called from try_to_swap_out() when we try to get rid of some
 174  * pages..  If we're unmapping the last occurrence of this page, we also
 175  * free it from the page hash-queues etc, as we don't want to keep it
 176  * in-core unnecessarily.
 177  */
 178 unsigned long page_unuse(unsigned long page)
     /* [previous][next][first][last][top][bottom][index][help] */
 179 {
 180         struct page * p = mem_map + MAP_NR(page);
 181         int count = p->count;
 182 
 183         if (count != 2)
 184                 return count;
 185         if (!p->inode)
 186                 return count;
 187         remove_page_from_hash_queue(p);
 188         remove_page_from_inode_queue(p);
 189         free_page(page);
 190         return 1;
 191 }
 192 
 193 /*
 194  * Update a page cache copy, when we're doing a "write()" system call
 195  * See also "update_vm_cache()".
 196  */
 197 void update_vm_cache(struct inode * inode, unsigned long pos, const char * buf, int count)
     /* [previous][next][first][last][top][bottom][index][help] */
 198 {
 199         unsigned long offset, len;
 200 
 201         offset = (pos & ~PAGE_MASK);
 202         pos = pos & PAGE_MASK;
 203         len = PAGE_SIZE - offset;
 204         do {
 205                 struct page * page;
 206 
 207                 if (len > count)
 208                         len = count;
 209                 page = find_page(inode, pos);
 210                 if (page) {
 211                         unsigned long addr;
 212 
 213                         wait_on_page(page);
 214                         addr = page_address(page);
 215                         memcpy((void *) (offset + addr), buf, len);
 216                         free_page(addr);
 217                 }
 218                 count -= len;
 219                 buf += len;
 220                 len = PAGE_SIZE;
 221                 offset = 0;
 222                 pos += PAGE_SIZE;
 223         } while (count);
 224 }
 225 
 226 /*
 227  * Try to read ahead in the file. "page_cache" is a potentially free page
 228  * that we could use for the cache (if it is 0 we can try to create one,
 229  * this is all overlapped with the IO on the previous page finishing anyway)
 230  */
 231 static unsigned long try_to_read_ahead(struct inode * inode, unsigned long offset, unsigned long page_cache)
     /* [previous][next][first][last][top][bottom][index][help] */
 232 {
 233         struct page * page;
 234 
 235         offset &= PAGE_MASK;
 236         if (!page_cache) {
 237                 page_cache = __get_free_page(GFP_KERNEL);
 238                 if (!page_cache)
 239                         return 0;
 240         }
 241         if (offset >= inode->i_size)
 242                 return page_cache;
 243 #if 1
 244         page = find_page(inode, offset);
 245         if (page) {
 246                 page->count--;
 247                 return page_cache;
 248         }
 249         /*
 250          * Ok, add the new page to the hash-queues...
 251          */
 252         page = mem_map + MAP_NR(page_cache);
 253         page->count++;
 254         page->uptodate = 0;
 255         page->error = 0;
 256         page->offset = offset;
 257         add_page_to_inode_queue(inode, page);
 258         add_page_to_hash_queue(inode, page);
 259 
 260         inode->i_op->readpage(inode, page);
 261 
 262         free_page(page_cache);
 263         return 0;
 264 #else
 265         return page_cache;
 266 #endif
 267 }
 268 
 269 /* 
 270  * Wait for IO to complete on a locked page.
 271  */
 272 void __wait_on_page(struct page *page)
     /* [previous][next][first][last][top][bottom][index][help] */
 273 {
 274         struct wait_queue wait = { current, NULL };
 275 
 276         page->count++;
 277         add_wait_queue(&page->wait, &wait);
 278 repeat:
 279         current->state = TASK_UNINTERRUPTIBLE;
 280         if (page->locked) {
 281                 schedule();
 282                 goto repeat;
 283         }
 284         remove_wait_queue(&page->wait, &wait);
 285         page->count--;
 286         current->state = TASK_RUNNING;
 287 }
 288 
 289 
 290 /*
 291  * This is a generic file read routine, and uses the
 292  * inode->i_op->readpage() function for the actual low-level
 293  * stuff.
 294  *
 295  * This is really ugly. But the goto's actually try to clarify some
 296  * of the logic when it comes to error handling etc.
 297  */
 298 #define MAX_READAHEAD (PAGE_SIZE*4)
 299 int generic_file_read(struct inode * inode, struct file * filp, char * buf, int count)
     /* [previous][next][first][last][top][bottom][index][help] */
 300 {
 301         int error, read;
 302         unsigned long pos, page_cache;
 303         
 304         if (count <= 0)
 305                 return 0;
 306         error = 0;
 307         read = 0;
 308         page_cache = 0;
 309 
 310         pos = filp->f_pos;
 311         for (;;) {
 312                 struct page *page;
 313                 unsigned long offset, addr, nr;
 314 
 315                 if (pos >= inode->i_size)
 316                         break;
 317                 offset = pos & ~PAGE_MASK;
 318                 nr = PAGE_SIZE - offset;
 319                 /*
 320                  * Try to find the data in the page cache..
 321                  */
 322                 page = find_page(inode, pos & PAGE_MASK);
 323                 if (page)
 324                         goto found_page;
 325 
 326                 /*
 327                  * Ok, it wasn't cached, so we need to create a new
 328                  * page..
 329                  */
 330                 if (page_cache)
 331                         goto new_page;
 332 
 333                 error = -ENOMEM;
 334                 page_cache = __get_free_page(GFP_KERNEL);
 335                 if (!page_cache)
 336                         break;
 337                 error = 0;
 338 
 339                 /*
 340                  * That could have slept, so we need to check again..
 341                  */
 342                 if (pos >= inode->i_size)
 343                         break;
 344                 page = find_page(inode, pos & PAGE_MASK);
 345                 if (!page)
 346                         goto new_page;
 347 
 348 found_page:
 349                 addr = page_address(page);
 350                 if (nr > count)
 351                         nr = count;
 352 
 353                 /*
 354                  * We may want to do read-ahead.. Do this only
 355                  * if we're waiting for the current page to be
 356                  * filled in, and if
 357                  *  - we're going to read more than this page
 358                  *  - if "f_reada" is set
 359                  */
 360                 if (page->locked) {
 361                         unsigned long max_ahead, ahead;
 362 
 363                         max_ahead = count - nr;
 364                         if (filp->f_reada || max_ahead > MAX_READAHEAD)
 365                                 max_ahead = MAX_READAHEAD;
 366                         ahead = 0;
 367                         while (ahead < max_ahead) {
 368                                 ahead += PAGE_SIZE;
 369                                 page_cache = try_to_read_ahead(inode, pos + ahead, page_cache);
 370                         }
 371                         __wait_on_page(page);
 372                 }
 373                 if (!page->uptodate)
 374                         goto read_page;
 375                 if (nr > inode->i_size - pos)
 376                         nr = inode->i_size - pos;
 377                 memcpy_tofs(buf, (void *) (addr + offset), nr);
 378                 free_page(addr);
 379                 buf += nr;
 380                 pos += nr;
 381                 read += nr;
 382                 count -= nr;
 383                 if (count)
 384                         continue;
 385                 break;
 386         
 387 
 388 new_page:
 389                 /*
 390                  * Ok, add the new page to the hash-queues...
 391                  */
 392                 addr = page_cache;
 393                 page = mem_map + MAP_NR(page_cache);
 394                 page_cache = 0;
 395                 page->count++;
 396                 page->uptodate = 0;
 397                 page->error = 0;
 398                 page->offset = pos & PAGE_MASK;
 399                 add_page_to_inode_queue(inode, page);
 400                 add_page_to_hash_queue(inode, page);
 401 
 402                 /*
 403                  * Error handling is tricky. If we get a read error,
 404                  * the cached page stays in the cache (but uptodate=0),
 405                  * and the next process that accesses it will try to
 406                  * re-read it. This is needed for NFS etc, where the
 407                  * identity of the reader can decide if we can read the
 408                  * page or not..
 409                  */
 410 read_page:
 411                 error = inode->i_op->readpage(inode, page);
 412                 if (!error)
 413                         goto found_page;
 414                 free_page(addr);
 415                 break;
 416         }
 417 
 418         filp->f_pos = pos;
 419         filp->f_reada = 1;
 420         if (page_cache)
 421                 free_page(page_cache);
 422         if (!IS_RDONLY(inode)) {
 423                 inode->i_atime = CURRENT_TIME;
 424                 inode->i_dirt = 1;
 425         }
 426         if (!read)
 427                 read = error;
 428         return read;
 429 }
 430 
 431 /*
 432  * Find a cached page and wait for it to become up-to-date, return
 433  * the page address.  Increments the page count.
 434  */
 435 static inline unsigned long fill_page(struct inode * inode, unsigned long offset)
     /* [previous][next][first][last][top][bottom][index][help] */
 436 {
 437         struct page * page;
 438         unsigned long new_page;
 439 
 440         page = find_page(inode, offset);
 441         if (page)
 442                 goto found_page;
 443         new_page = __get_free_page(GFP_KERNEL);
 444         page = find_page(inode, offset);
 445         if (page) {
 446                 if (new_page)
 447                         free_page(new_page);
 448                 goto found_page;
 449         }
 450         if (!new_page)
 451                 return 0;
 452         page = mem_map + MAP_NR(new_page);
 453         new_page = 0;
 454         page->count++;
 455         page->uptodate = 0;
 456         page->error = 0;
 457         page->offset = offset;
 458         add_page_to_inode_queue(inode, page);
 459         add_page_to_hash_queue(inode, page);
 460         inode->i_op->readpage(inode, page);
 461 found_page:
 462         wait_on_page(page);
 463         return page_address(page);
 464 }
 465 
 466 /*
 467  * Semantics for shared and private memory areas are different past the end
 468  * of the file. A shared mapping past the last page of the file is an error
 469  * and results in a SIBGUS, while a private mapping just maps in a zero page.
 470  */
 471 static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long address, int no_share)
     /* [previous][next][first][last][top][bottom][index][help] */
 472 {
 473         unsigned long offset;
 474         struct inode * inode = area->vm_inode;
 475         unsigned long page;
 476 
 477         offset = (address & PAGE_MASK) - area->vm_start + area->vm_offset;
 478         if (offset >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm)
 479                 return 0;
 480 
 481         page = fill_page(inode, offset);
 482         if (page && no_share) {
 483                 unsigned long new_page = __get_free_page(GFP_KERNEL);
 484                 if (new_page)
 485                         memcpy((void *) new_page, (void *) page, PAGE_SIZE);
 486                 free_page(page);
 487                 return new_page;
 488         }
 489         return page;
 490 }
 491 
 492 /*
 493  * Tries to write a shared mapped page to its backing store. May return -EIO
 494  * if the disk is full.
 495  */
 496 static int filemap_write_page(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 497         unsigned long offset,
 498         unsigned long page)
 499 {
 500         int old_fs;
 501         unsigned long size, result;
 502         struct file file;
 503         struct inode * inode;
 504         struct buffer_head * bh;
 505 
 506         bh = mem_map[MAP_NR(page)].buffers;
 507         if (bh) {
 508                 /* whee.. just mark the buffer heads dirty */
 509                 struct buffer_head * tmp = bh;
 510                 do {
 511                         mark_buffer_dirty(tmp, 0);
 512                         tmp = tmp->b_this_page;
 513                 } while (tmp != bh);
 514                 return 0;
 515         }
 516 
 517         inode = vma->vm_inode;
 518         file.f_op = inode->i_op->default_file_ops;
 519         if (!file.f_op->write)
 520                 return -EIO;
 521         size = offset + PAGE_SIZE;
 522         /* refuse to extend file size.. */
 523         if (S_ISREG(inode->i_mode)) {
 524                 if (size > inode->i_size)
 525                         size = inode->i_size;
 526                 /* Ho humm.. We should have tested for this earlier */
 527                 if (size < offset)
 528                         return -EIO;
 529         }
 530         size -= offset;
 531         file.f_mode = 3;
 532         file.f_flags = 0;
 533         file.f_count = 1;
 534         file.f_inode = inode;
 535         file.f_pos = offset;
 536         file.f_reada = 0;
 537         old_fs = get_fs();
 538         set_fs(KERNEL_DS);
 539         result = file.f_op->write(inode, &file, (const char *) page, size);
 540         set_fs(old_fs);
 541         if (result != size)
 542                 return -EIO;
 543         return 0;
 544 }
 545 
 546 
 547 /*
 548  * Swapping to a shared file: while we're busy writing out the page
 549  * (and the page still exists in memory), we save the page information
 550  * in the page table, so that "filemap_swapin()" can re-use the page
 551  * immediately if it is called while we're busy swapping it out..
 552  *
 553  * Once we've written it all out, we mark the page entry "empty", which
 554  * will result in a normal page-in (instead of a swap-in) from the now
 555  * up-to-date disk file.
 556  */
 557 int filemap_swapout(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 558         unsigned long offset,
 559         pte_t *page_table)
 560 {
 561         int error;
 562         unsigned long page = pte_page(*page_table);
 563         unsigned long entry = SWP_ENTRY(SHM_SWP_TYPE, MAP_NR(page));
 564 
 565         set_pte(page_table, __pte(entry));
 566         /* Yuck, perhaps a slightly modified swapout parameter set? */
 567         invalidate_page(vma, (offset + vma->vm_start - vma->vm_offset));
 568         error = filemap_write_page(vma, offset, page);
 569         if (pte_val(*page_table) == entry)
 570                 pte_clear(page_table);
 571         return error;
 572 }
 573 
 574 /*
 575  * filemap_swapin() is called only if we have something in the page
 576  * tables that is non-zero (but not present), which we know to be the
 577  * page index of a page that is busy being swapped out (see above).
 578  * So we just use it directly..
 579  */
 580 static pte_t filemap_swapin(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 581         unsigned long offset,
 582         unsigned long entry)
 583 {
 584         unsigned long page = SWP_OFFSET(entry);
 585 
 586         mem_map[page].count++;
 587         page = (page << PAGE_SHIFT) + PAGE_OFFSET;
 588         return mk_pte(page,vma->vm_page_prot);
 589 }
 590 
 591 
 592 static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 593         unsigned long address, unsigned int flags)
 594 {
 595         pte_t pte = *ptep;
 596         unsigned long page;
 597         int error;
 598 
 599         if (!(flags & MS_INVALIDATE)) {
 600                 if (!pte_present(pte))
 601                         return 0;
 602                 if (!pte_dirty(pte))
 603                         return 0;
 604                 set_pte(ptep, pte_mkclean(pte));
 605                 invalidate_page(vma, address);
 606                 page = pte_page(pte);
 607                 mem_map[MAP_NR(page)].count++;
 608         } else {
 609                 if (pte_none(pte))
 610                         return 0;
 611                 pte_clear(ptep);
 612                 invalidate_page(vma, address);
 613                 if (!pte_present(pte)) {
 614                         swap_free(pte_val(pte));
 615                         return 0;
 616                 }
 617                 page = pte_page(pte);
 618                 if (!pte_dirty(pte) || flags == MS_INVALIDATE) {
 619                         free_page(page);
 620                         return 0;
 621                 }
 622         }
 623         error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page);
 624         free_page(page);
 625         return error;
 626 }
 627 
 628 static inline int filemap_sync_pte_range(pmd_t * pmd,
     /* [previous][next][first][last][top][bottom][index][help] */
 629         unsigned long address, unsigned long size, 
 630         struct vm_area_struct *vma, unsigned long offset, unsigned int flags)
 631 {
 632         pte_t * pte;
 633         unsigned long end;
 634         int error;
 635 
 636         if (pmd_none(*pmd))
 637                 return 0;
 638         if (pmd_bad(*pmd)) {
 639                 printk("filemap_sync_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
 640                 pmd_clear(pmd);
 641                 return 0;
 642         }
 643         pte = pte_offset(pmd, address);
 644         offset += address & PMD_MASK;
 645         address &= ~PMD_MASK;
 646         end = address + size;
 647         if (end > PMD_SIZE)
 648                 end = PMD_SIZE;
 649         error = 0;
 650         do {
 651                 error |= filemap_sync_pte(pte, vma, address + offset, flags);
 652                 address += PAGE_SIZE;
 653                 pte++;
 654         } while (address < end);
 655         return error;
 656 }
 657 
 658 static inline int filemap_sync_pmd_range(pgd_t * pgd,
     /* [previous][next][first][last][top][bottom][index][help] */
 659         unsigned long address, unsigned long size, 
 660         struct vm_area_struct *vma, unsigned int flags)
 661 {
 662         pmd_t * pmd;
 663         unsigned long offset, end;
 664         int error;
 665 
 666         if (pgd_none(*pgd))
 667                 return 0;
 668         if (pgd_bad(*pgd)) {
 669                 printk("filemap_sync_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd));
 670                 pgd_clear(pgd);
 671                 return 0;
 672         }
 673         pmd = pmd_offset(pgd, address);
 674         offset = address & PMD_MASK;
 675         address &= ~PMD_MASK;
 676         end = address + size;
 677         if (end > PGDIR_SIZE)
 678                 end = PGDIR_SIZE;
 679         error = 0;
 680         do {
 681                 error |= filemap_sync_pte_range(pmd, address, end - address, vma, offset, flags);
 682                 address = (address + PMD_SIZE) & PMD_MASK;
 683                 pmd++;
 684         } while (address < end);
 685         return error;
 686 }
 687 
 688 static int filemap_sync(struct vm_area_struct * vma, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 689         size_t size, unsigned int flags)
 690 {
 691         pgd_t * dir;
 692         unsigned long end = address + size;
 693         int error = 0;
 694 
 695         dir = pgd_offset(current->mm, address);
 696         while (address < end) {
 697                 error |= filemap_sync_pmd_range(dir, address, end - address, vma, flags);
 698                 address = (address + PGDIR_SIZE) & PGDIR_MASK;
 699                 dir++;
 700         }
 701         invalidate_range(vma->vm_mm, end - size, end);
 702         return error;
 703 }
 704 
 705 /*
 706  * This handles (potentially partial) area unmaps..
 707  */
 708 static void filemap_unmap(struct vm_area_struct *vma, unsigned long start, size_t len)
     /* [previous][next][first][last][top][bottom][index][help] */
 709 {
 710         filemap_sync(vma, start, len, MS_ASYNC);
 711 }
 712 
 713 /*
 714  * Shared mappings need to be able to do the right thing at
 715  * close/unmap/sync. They will also use the private file as
 716  * backing-store for swapping..
 717  */
 718 static struct vm_operations_struct file_shared_mmap = {
 719         NULL,                   /* no special open */
 720         NULL,                   /* no special close */
 721         filemap_unmap,          /* unmap - we need to sync the pages */
 722         NULL,                   /* no special protect */
 723         filemap_sync,           /* sync */
 724         NULL,                   /* advise */
 725         filemap_nopage,         /* nopage */
 726         NULL,                   /* wppage */
 727         filemap_swapout,        /* swapout */
 728         filemap_swapin,         /* swapin */
 729 };
 730 
 731 /*
 732  * Private mappings just need to be able to load in the map.
 733  *
 734  * (This is actually used for shared mappings as well, if we
 735  * know they can't ever get write permissions..)
 736  */
 737 static struct vm_operations_struct file_private_mmap = {
 738         NULL,                   /* open */
 739         NULL,                   /* close */
 740         NULL,                   /* unmap */
 741         NULL,                   /* protect */
 742         NULL,                   /* sync */
 743         NULL,                   /* advise */
 744         filemap_nopage,         /* nopage */
 745         NULL,                   /* wppage */
 746         NULL,                   /* swapout */
 747         NULL,                   /* swapin */
 748 };
 749 
 750 /* This is used for a general mmap of a disk file */
 751 int generic_file_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma)
     /* [previous][next][first][last][top][bottom][index][help] */
 752 {
 753         struct vm_operations_struct * ops;
 754 
 755         if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
 756                 ops = &file_shared_mmap;
 757                 /* share_page() can only guarantee proper page sharing if
 758                  * the offsets are all page aligned. */
 759                 if (vma->vm_offset & (PAGE_SIZE - 1))
 760                         return -EINVAL;
 761         } else {
 762                 ops = &file_private_mmap;
 763                 if (vma->vm_offset & (inode->i_sb->s_blocksize - 1))
 764                         return -EINVAL;
 765         }
 766         if (!inode->i_sb || !S_ISREG(inode->i_mode))
 767                 return -EACCES;
 768         if (!inode->i_op || !inode->i_op->readpage)
 769                 return -ENOEXEC;
 770         if (!IS_RDONLY(inode)) {
 771                 inode->i_atime = CURRENT_TIME;
 772                 inode->i_dirt = 1;
 773         }
 774         vma->vm_inode = inode;
 775         inode->i_count++;
 776         vma->vm_ops = ops;
 777         return 0;
 778 }
 779 
 780 
 781 /*
 782  * The msync() system call.
 783  */
 784 
 785 static int msync_interval(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 786         unsigned long start, unsigned long end, int flags)
 787 {
 788         if (!vma->vm_inode)
 789                 return 0;
 790         if (vma->vm_ops->sync) {
 791                 int error;
 792                 error = vma->vm_ops->sync(vma, start, end-start, flags);
 793                 if (error)
 794                         return error;
 795                 if (flags & MS_SYNC)
 796                         return file_fsync(vma->vm_inode, NULL);
 797                 return 0;
 798         }
 799         return 0;
 800 }
 801 
 802 asmlinkage int sys_msync(unsigned long start, size_t len, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
 803 {
 804         unsigned long end;
 805         struct vm_area_struct * vma;
 806         int unmapped_error, error;
 807 
 808         if (start & ~PAGE_MASK)
 809                 return -EINVAL;
 810         len = (len + ~PAGE_MASK) & PAGE_MASK;
 811         end = start + len;
 812         if (end < start)
 813                 return -EINVAL;
 814         if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
 815                 return -EINVAL;
 816         if (end == start)
 817                 return 0;
 818         /*
 819          * If the interval [start,end) covers some unmapped address ranges,
 820          * just ignore them, but return -EFAULT at the end.
 821          */
 822         vma = find_vma(current, start);
 823         unmapped_error = 0;
 824         for (;;) {
 825                 /* Still start < end. */
 826                 if (!vma)
 827                         return -EFAULT;
 828                 /* Here start < vma->vm_end. */
 829                 if (start < vma->vm_start) {
 830                         unmapped_error = -EFAULT;
 831                         start = vma->vm_start;
 832                 }
 833                 /* Here vma->vm_start <= start < vma->vm_end. */
 834                 if (end <= vma->vm_end) {
 835                         if (start < end) {
 836                                 error = msync_interval(vma, start, end, flags);
 837                                 if (error)
 838                                         return error;
 839                         }
 840                         return unmapped_error;
 841                 }
 842                 /* Here vma->vm_start <= start < vma->vm_end < end. */
 843                 error = msync_interval(vma, start, vma->vm_end, flags);
 844                 if (error)
 845                         return error;
 846                 start = vma->vm_end;
 847                 vma = vma->vm_next;
 848         }
 849 }

/* [previous][next][first][last][top][bottom][index][help] */