root/mm/filemap.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. invalidate_inode_pages
  2. truncate_inode_pages
  3. shrink_mmap
  4. page_unuse
  5. update_vm_cache
  6. try_to_read_ahead
  7. __wait_on_page
  8. generic_file_read
  9. fill_page
  10. filemap_nopage
  11. filemap_write_page
  12. filemap_swapout
  13. filemap_swapin
  14. filemap_sync_pte
  15. filemap_sync_pte_range
  16. filemap_sync_pmd_range
  17. filemap_sync
  18. filemap_unmap
  19. generic_file_mmap
  20. msync_interval
  21. sys_msync

   1 /*
   2  *      linux/mm/filemap.c
   3  *
   4  * Copyright (C) 1994, 1995  Linus Torvalds
   5  */
   6 
   7 /*
   8  * This file handles the generic file mmap semantics used by
   9  * most "normal" filesystems (but you don't /have/ to use this:
  10  * the NFS filesystem does this differently, for example)
  11  */
  12 #include <linux/stat.h>
  13 #include <linux/sched.h>
  14 #include <linux/kernel.h>
  15 #include <linux/mm.h>
  16 #include <linux/shm.h>
  17 #include <linux/errno.h>
  18 #include <linux/mman.h>
  19 #include <linux/string.h>
  20 #include <linux/malloc.h>
  21 #include <linux/fs.h>
  22 #include <linux/locks.h>
  23 #include <linux/pagemap.h>
  24 #include <linux/swap.h>
  25 
  26 #include <asm/segment.h>
  27 #include <asm/system.h>
  28 #include <asm/pgtable.h>
  29 
  30 /*
  31  * Shared mappings implemented 30.11.1994. It's not fully working yet,
  32  * though.
  33  *
  34  * Shared mappings now work. 15.8.1995  Bruno.
  35  */
  36 
  37 unsigned long page_cache_size = 0;
  38 struct page * page_hash_table[PAGE_HASH_SIZE];
  39 
  40 /*
  41  * Simple routines for both non-shared and shared mappings.
  42  */
  43 
  44 /*
  45  * Invalidate the pages of an inode, removing all pages that aren't
  46  * locked down (those are sure to be up-to-date anyway, so we shouldn't
  47  * invalidate them).
  48  */
  49 void invalidate_inode_pages(struct inode * inode)
     /* [previous][next][first][last][top][bottom][index][help] */
  50 {
  51         struct page ** p;
  52         struct page * page;
  53 
  54         p = &inode->i_pages;
  55         while ((page = *p) != NULL) {
  56                 if (page->locked) {
  57                         p = &page->next;
  58                         continue;
  59                 }
  60                 inode->i_nrpages--;
  61                 if ((*p = page->next) != NULL)
  62                         (*p)->prev = page->prev;
  63                 page->dirty = 0;
  64                 page->next = NULL;
  65                 page->prev = NULL;
  66                 remove_page_from_hash_queue(page);
  67                 page->inode = NULL;
  68                 free_page(page_address(page));
  69                 continue;
  70         }
  71 }
  72 
  73 /*
  74  * Truncate the page cache at a set offset, removing the pages
  75  * that are beyond that offset (and zeroing out partial pages).
  76  */
  77 void truncate_inode_pages(struct inode * inode, unsigned long start)
     /* [previous][next][first][last][top][bottom][index][help] */
  78 {
  79         struct page ** p;
  80         struct page * page;
  81 
  82 repeat:
  83         p = &inode->i_pages;
  84         while ((page = *p) != NULL) {
  85                 unsigned long offset = page->offset;
  86 
  87                 /* page wholly truncated - free it */
  88                 if (offset >= start) {
  89                         if (page->locked) {
  90                                 wait_on_page(page);
  91                                 goto repeat;
  92                         }
  93                         inode->i_nrpages--;
  94                         if ((*p = page->next) != NULL)
  95                                 (*p)->prev = page->prev;
  96                         page->dirty = 0;
  97                         page->next = NULL;
  98                         page->prev = NULL;
  99                         remove_page_from_hash_queue(page);
 100                         page->inode = NULL;
 101                         free_page(page_address(page));
 102                         continue;
 103                 }
 104                 p = &page->next;
 105                 offset = start - offset;
 106                 /* partial truncate, clear end of page */
 107                 if (offset < PAGE_SIZE)
 108                         memset((void *) (offset + page_address(page)), 0, PAGE_SIZE - offset);
 109         }
 110 }
 111 
 112 int shrink_mmap(int priority, unsigned long limit)
     /* [previous][next][first][last][top][bottom][index][help] */
 113 {
 114         static int clock = 0;
 115         struct page * page;
 116         struct buffer_head *tmp, *bh;
 117 
 118         if (limit > high_memory)
 119                 limit = high_memory;
 120         limit = MAP_NR(limit);
 121         if (clock >= limit)
 122                 clock = 0;
 123         priority = (limit<<2) >> priority;
 124         page = mem_map + clock;
 125         while (priority-- > 0) {
 126                 if (page->locked)
 127                     goto next;
 128                 /* First of all, regenerate the page's referenced bit
 129                    from any buffers in the page */
 130                 bh = buffer_pages[MAP_NR(page_address(page))];
 131                 if (bh) {
 132                         tmp = bh;
 133                         do {
 134                                 if (buffer_touched(tmp)) {
 135                                         clear_bit(BH_Touched, &tmp->b_state);
 136                                         page->referenced = 1;
 137                                 }
 138                                 tmp = tmp->b_this_page;
 139                         } while (tmp != bh);
 140                 }
 141 
 142                 /* We can't throw away shared pages, but we do mark
 143                    them as referenced.  This relies on the fact that
 144                    no page is currently in both the page cache and the
 145                    buffer cache; we'd have to modify the following
 146                    test to allow for that case. */
 147                 if (page->count > 1)
 148                         page->referenced = 1;
 149                 else if (page->referenced)
 150                         page->referenced = 0;
 151                 else if (page->count) {
 152                         /* The page is an old, unshared page --- try
 153                            to discard it. */
 154                         if (page->inode) {
 155                                 remove_page_from_hash_queue(page);
 156                                 remove_page_from_inode_queue(page);
 157                                 free_page(page_address(page));
 158                                 return 1;
 159                         }
 160                         if (bh && try_to_free_buffer(bh, &bh, 6))
 161                                 return 1;
 162                 }
 163 next:
 164                 page++;
 165                 clock++;
 166                 if (clock >= limit) {
 167                         clock = 0;
 168                         page = mem_map;
 169                 }
 170         }
 171         return 0;
 172 }
 173 
 174 /*
 175  * This is called from try_to_swap_out() when we try to get rid of some
 176  * pages..  If we're unmapping the last occurrence of this page, we also
 177  * free it from the page hash-queues etc, as we don't want to keep it
 178  * in-core unnecessarily.
 179  */
 180 unsigned long page_unuse(unsigned long page)
     /* [previous][next][first][last][top][bottom][index][help] */
 181 {
 182         struct page * p = mem_map + MAP_NR(page);
 183         int count = p->count;
 184 
 185         if (count != 2)
 186                 return count;
 187         if (!p->inode)
 188                 return count;
 189         remove_page_from_hash_queue(p);
 190         remove_page_from_inode_queue(p);
 191         free_page(page);
 192         return 1;
 193 }
 194 
 195 /*
 196  * Update a page cache copy, when we're doing a "write()" system call
 197  * See also "update_vm_cache()".
 198  */
 199 void update_vm_cache(struct inode * inode, unsigned long pos, const char * buf, int count)
     /* [previous][next][first][last][top][bottom][index][help] */
 200 {
 201         unsigned long offset, len;
 202 
 203         offset = (pos & ~PAGE_MASK);
 204         pos = pos & PAGE_MASK;
 205         len = PAGE_SIZE - offset;
 206         do {
 207                 struct page * page;
 208 
 209                 if (len > count)
 210                         len = count;
 211                 page = find_page(inode, pos);
 212                 if (page) {
 213                         unsigned long addr;
 214 
 215                         wait_on_page(page);
 216                         addr = page_address(page);
 217                         memcpy((void *) (offset + addr), buf, len);
 218                         free_page(addr);
 219                 }
 220                 count -= len;
 221                 buf += len;
 222                 len = PAGE_SIZE;
 223                 offset = 0;
 224                 pos += PAGE_SIZE;
 225         } while (count);
 226 }
 227 
 228 /*
 229  * Try to read ahead in the file. "page_cache" is a potentially free page
 230  * that we could use for the cache (if it is 0 we can try to create one,
 231  * this is all overlapped with the IO on the previous page finishing anyway)
 232  */
 233 static unsigned long try_to_read_ahead(struct inode * inode, unsigned long offset, unsigned long page_cache)
     /* [previous][next][first][last][top][bottom][index][help] */
 234 {
 235         struct page * page;
 236 
 237         offset &= PAGE_MASK;
 238         if (!page_cache) {
 239                 page_cache = __get_free_page(GFP_KERNEL);
 240                 if (!page_cache)
 241                         return 0;
 242         }
 243         if (offset >= inode->i_size)
 244                 return page_cache;
 245 #if 1
 246         page = find_page(inode, offset);
 247         if (page) {
 248                 page->count--;
 249                 return page_cache;
 250         }
 251         /*
 252          * Ok, add the new page to the hash-queues...
 253          */
 254         page = mem_map + MAP_NR(page_cache);
 255         page->count++;
 256         page->uptodate = 0;
 257         page->error = 0;
 258         page->offset = offset;
 259         add_page_to_inode_queue(inode, page);
 260         add_page_to_hash_queue(inode, page);
 261 
 262         inode->i_op->readpage(inode, page);
 263 
 264         free_page(page_cache);
 265         return 0;
 266 #else
 267         return page_cache;
 268 #endif
 269 }
 270 
 271 /* 
 272  * Wait for IO to complete on a locked page.
 273  */
 274 void __wait_on_page(struct page *page)
     /* [previous][next][first][last][top][bottom][index][help] */
 275 {
 276         struct wait_queue wait = { current, NULL };
 277 
 278         page->count++;
 279         add_wait_queue(&page->wait, &wait);
 280 repeat:
 281         current->state = TASK_UNINTERRUPTIBLE;
 282         if (page->locked) {
 283                 schedule();
 284                 goto repeat;
 285         }
 286         remove_wait_queue(&page->wait, &wait);
 287         page->count--;
 288         current->state = TASK_RUNNING;
 289 }
 290 
 291 
 292 /*
 293  * This is a generic file read routine, and uses the
 294  * inode->i_op->readpage() function for the actual low-level
 295  * stuff.
 296  *
 297  * This is really ugly. But the goto's actually try to clarify some
 298  * of the logic when it comes to error handling etc.
 299  */
 300 #define MAX_READAHEAD (PAGE_SIZE*4)
 301 int generic_file_read(struct inode * inode, struct file * filp, char * buf, int count)
     /* [previous][next][first][last][top][bottom][index][help] */
 302 {
 303         int error, read;
 304         unsigned long pos, page_cache;
 305         
 306         if (count <= 0)
 307                 return 0;
 308         error = 0;
 309         read = 0;
 310         page_cache = 0;
 311 
 312         pos = filp->f_pos;
 313         for (;;) {
 314                 struct page *page;
 315                 unsigned long offset, addr, nr;
 316 
 317                 if (pos >= inode->i_size)
 318                         break;
 319                 offset = pos & ~PAGE_MASK;
 320                 nr = PAGE_SIZE - offset;
 321                 /*
 322                  * Try to find the data in the page cache..
 323                  */
 324                 page = find_page(inode, pos & PAGE_MASK);
 325                 if (page)
 326                         goto found_page;
 327 
 328                 /*
 329                  * Ok, it wasn't cached, so we need to create a new
 330                  * page..
 331                  */
 332                 if (page_cache)
 333                         goto new_page;
 334 
 335                 error = -ENOMEM;
 336                 page_cache = __get_free_page(GFP_KERNEL);
 337                 if (!page_cache)
 338                         break;
 339                 error = 0;
 340 
 341                 /*
 342                  * That could have slept, so we need to check again..
 343                  */
 344                 if (pos >= inode->i_size)
 345                         break;
 346                 page = find_page(inode, pos & PAGE_MASK);
 347                 if (!page)
 348                         goto new_page;
 349 
 350 found_page:
 351                 addr = page_address(page);
 352                 if (nr > count)
 353                         nr = count;
 354 
 355                 /*
 356                  * We may want to do read-ahead.. Do this only
 357                  * if we're waiting for the current page to be
 358                  * filled in, and if
 359                  *  - we're going to read more than this page
 360                  *  - if "f_reada" is set
 361                  */
 362                 if (page->locked) {
 363                         unsigned long max_ahead, ahead;
 364 
 365                         max_ahead = count - nr;
 366                         if (filp->f_reada || max_ahead > MAX_READAHEAD)
 367                                 max_ahead = MAX_READAHEAD;
 368                         ahead = 0;
 369                         while (ahead < max_ahead) {
 370                                 ahead += PAGE_SIZE;
 371                                 page_cache = try_to_read_ahead(inode, pos + ahead, page_cache);
 372                         }
 373                         __wait_on_page(page);
 374                 }
 375                 if (!page->uptodate)
 376                         goto read_page;
 377                 if (nr > inode->i_size - pos)
 378                         nr = inode->i_size - pos;
 379                 memcpy_tofs(buf, (void *) (addr + offset), nr);
 380                 free_page(addr);
 381                 buf += nr;
 382                 pos += nr;
 383                 read += nr;
 384                 count -= nr;
 385                 if (count)
 386                         continue;
 387                 break;
 388         
 389 
 390 new_page:
 391                 /*
 392                  * Ok, add the new page to the hash-queues...
 393                  */
 394                 addr = page_cache;
 395                 page = mem_map + MAP_NR(page_cache);
 396                 page_cache = 0;
 397                 page->count++;
 398                 page->uptodate = 0;
 399                 page->error = 0;
 400                 page->offset = pos & PAGE_MASK;
 401                 add_page_to_inode_queue(inode, page);
 402                 add_page_to_hash_queue(inode, page);
 403 
 404                 /*
 405                  * Error handling is tricky. If we get a read error,
 406                  * the cached page stays in the cache (but uptodate=0),
 407                  * and the next process that accesses it will try to
 408                  * re-read it. This is needed for NFS etc, where the
 409                  * identity of the reader can decide if we can read the
 410                  * page or not..
 411                  */
 412 read_page:
 413                 error = inode->i_op->readpage(inode, page);
 414                 if (!error)
 415                         goto found_page;
 416                 free_page(addr);
 417                 break;
 418         }
 419 
 420         filp->f_pos = pos;
 421         filp->f_reada = 1;
 422         if (page_cache)
 423                 free_page(page_cache);
 424         if (!IS_RDONLY(inode)) {
 425                 inode->i_atime = CURRENT_TIME;
 426                 inode->i_dirt = 1;
 427         }
 428         if (!read)
 429                 read = error;
 430         return read;
 431 }
 432 
 433 /*
 434  * Find a cached page and wait for it to become up-to-date, return
 435  * the page address.  Increments the page count.
 436  */
 437 static inline unsigned long fill_page(struct inode * inode, unsigned long offset)
     /* [previous][next][first][last][top][bottom][index][help] */
 438 {
 439         struct page * page;
 440         unsigned long new_page;
 441 
 442         page = find_page(inode, offset);
 443         if (page)
 444                 goto found_page;
 445         new_page = __get_free_page(GFP_KERNEL);
 446         page = find_page(inode, offset);
 447         if (page) {
 448                 if (new_page)
 449                         free_page(new_page);
 450                 goto found_page;
 451         }
 452         if (!new_page)
 453                 return 0;
 454         page = mem_map + MAP_NR(new_page);
 455         new_page = 0;
 456         page->count++;
 457         page->uptodate = 0;
 458         page->error = 0;
 459         page->offset = offset;
 460         add_page_to_inode_queue(inode, page);
 461         add_page_to_hash_queue(inode, page);
 462         inode->i_op->readpage(inode, page);
 463 found_page:
 464         wait_on_page(page);
 465         return page_address(page);
 466 }
 467 
 468 /*
 469  * Semantics for shared and private memory areas are different past the end
 470  * of the file. A shared mapping past the last page of the file is an error
 471  * and results in a SIBGUS, while a private mapping just maps in a zero page.
 472  */
 473 static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long address, int no_share)
     /* [previous][next][first][last][top][bottom][index][help] */
 474 {
 475         unsigned long offset;
 476         struct inode * inode = area->vm_inode;
 477         unsigned long page;
 478 
 479         offset = (address & PAGE_MASK) - area->vm_start + area->vm_offset;
 480         if (offset >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm)
 481                 return 0;
 482 
 483         page = fill_page(inode, offset);
 484         if (page && no_share) {
 485                 unsigned long new_page = __get_free_page(GFP_KERNEL);
 486                 if (new_page)
 487                         memcpy((void *) new_page, (void *) page, PAGE_SIZE);
 488                 free_page(page);
 489                 return new_page;
 490         }
 491         return page;
 492 }
 493 
 494 /*
 495  * Tries to write a shared mapped page to its backing store. May return -EIO
 496  * if the disk is full.
 497  */
 498 static int filemap_write_page(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 499         unsigned long offset,
 500         unsigned long page)
 501 {
 502         int old_fs;
 503         unsigned long size, result;
 504         struct file file;
 505         struct inode * inode;
 506         struct buffer_head * bh;
 507 
 508         bh = buffer_pages[MAP_NR(page)];
 509         if (bh) {
 510                 /* whee.. just mark the buffer heads dirty */
 511                 struct buffer_head * tmp = bh;
 512                 do {
 513                         mark_buffer_dirty(tmp, 0);
 514                         tmp = tmp->b_this_page;
 515                 } while (tmp != bh);
 516                 return 0;
 517         }
 518 
 519         inode = vma->vm_inode;
 520         file.f_op = inode->i_op->default_file_ops;
 521         if (!file.f_op->write)
 522                 return -EIO;
 523         size = offset + PAGE_SIZE;
 524         /* refuse to extend file size.. */
 525         if (S_ISREG(inode->i_mode)) {
 526                 if (size > inode->i_size)
 527                         size = inode->i_size;
 528                 /* Ho humm.. We should have tested for this earlier */
 529                 if (size < offset)
 530                         return -EIO;
 531         }
 532         size -= offset;
 533         file.f_mode = 3;
 534         file.f_flags = 0;
 535         file.f_count = 1;
 536         file.f_inode = inode;
 537         file.f_pos = offset;
 538         file.f_reada = 0;
 539         old_fs = get_fs();
 540         set_fs(KERNEL_DS);
 541         result = file.f_op->write(inode, &file, (const char *) page, size);
 542         set_fs(old_fs);
 543         if (result != size)
 544                 return -EIO;
 545         return 0;
 546 }
 547 
 548 
 549 /*
 550  * Swapping to a shared file: while we're busy writing out the page
 551  * (and the page still exists in memory), we save the page information
 552  * in the page table, so that "filemap_swapin()" can re-use the page
 553  * immediately if it is called while we're busy swapping it out..
 554  *
 555  * Once we've written it all out, we mark the page entry "empty", which
 556  * will result in a normal page-in (instead of a swap-in) from the now
 557  * up-to-date disk file.
 558  */
 559 int filemap_swapout(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 560         unsigned long offset,
 561         pte_t *page_table)
 562 {
 563         int error;
 564         unsigned long page = pte_page(*page_table);
 565         unsigned long entry = SWP_ENTRY(SHM_SWP_TYPE, MAP_NR(page));
 566 
 567         set_pte(page_table, __pte(entry));
 568         /* Yuck, perhaps a slightly modified swapout parameter set? */
 569         invalidate_page(vma, (offset + vma->vm_start - vma->vm_offset));
 570         error = filemap_write_page(vma, offset, page);
 571         if (pte_val(*page_table) == entry)
 572                 pte_clear(page_table);
 573         return error;
 574 }
 575 
 576 /*
 577  * filemap_swapin() is called only if we have something in the page
 578  * tables that is non-zero (but not present), which we know to be the
 579  * page index of a page that is busy being swapped out (see above).
 580  * So we just use it directly..
 581  */
 582 static pte_t filemap_swapin(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 583         unsigned long offset,
 584         unsigned long entry)
 585 {
 586         unsigned long page = SWP_OFFSET(entry);
 587 
 588         mem_map[page].count++;
 589         page = (page << PAGE_SHIFT) + PAGE_OFFSET;
 590         return mk_pte(page,vma->vm_page_prot);
 591 }
 592 
 593 
 594 static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 595         unsigned long address, unsigned int flags)
 596 {
 597         pte_t pte = *ptep;
 598         unsigned long page;
 599         int error;
 600 
 601         if (!(flags & MS_INVALIDATE)) {
 602                 if (!pte_present(pte))
 603                         return 0;
 604                 if (!pte_dirty(pte))
 605                         return 0;
 606                 set_pte(ptep, pte_mkclean(pte));
 607                 invalidate_page(vma, address);
 608                 page = pte_page(pte);
 609                 mem_map[MAP_NR(page)].count++;
 610         } else {
 611                 if (pte_none(pte))
 612                         return 0;
 613                 pte_clear(ptep);
 614                 invalidate_page(vma, address);
 615                 if (!pte_present(pte)) {
 616                         swap_free(pte_val(pte));
 617                         return 0;
 618                 }
 619                 page = pte_page(pte);
 620                 if (!pte_dirty(pte) || flags == MS_INVALIDATE) {
 621                         free_page(page);
 622                         return 0;
 623                 }
 624         }
 625         error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page);
 626         free_page(page);
 627         return error;
 628 }
 629 
 630 static inline int filemap_sync_pte_range(pmd_t * pmd,
     /* [previous][next][first][last][top][bottom][index][help] */
 631         unsigned long address, unsigned long size, 
 632         struct vm_area_struct *vma, unsigned long offset, unsigned int flags)
 633 {
 634         pte_t * pte;
 635         unsigned long end;
 636         int error;
 637 
 638         if (pmd_none(*pmd))
 639                 return 0;
 640         if (pmd_bad(*pmd)) {
 641                 printk("filemap_sync_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
 642                 pmd_clear(pmd);
 643                 return 0;
 644         }
 645         pte = pte_offset(pmd, address);
 646         offset += address & PMD_MASK;
 647         address &= ~PMD_MASK;
 648         end = address + size;
 649         if (end > PMD_SIZE)
 650                 end = PMD_SIZE;
 651         error = 0;
 652         do {
 653                 error |= filemap_sync_pte(pte, vma, address + offset, flags);
 654                 address += PAGE_SIZE;
 655                 pte++;
 656         } while (address < end);
 657         return error;
 658 }
 659 
 660 static inline int filemap_sync_pmd_range(pgd_t * pgd,
     /* [previous][next][first][last][top][bottom][index][help] */
 661         unsigned long address, unsigned long size, 
 662         struct vm_area_struct *vma, unsigned int flags)
 663 {
 664         pmd_t * pmd;
 665         unsigned long offset, end;
 666         int error;
 667 
 668         if (pgd_none(*pgd))
 669                 return 0;
 670         if (pgd_bad(*pgd)) {
 671                 printk("filemap_sync_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd));
 672                 pgd_clear(pgd);
 673                 return 0;
 674         }
 675         pmd = pmd_offset(pgd, address);
 676         offset = address & PMD_MASK;
 677         address &= ~PMD_MASK;
 678         end = address + size;
 679         if (end > PGDIR_SIZE)
 680                 end = PGDIR_SIZE;
 681         error = 0;
 682         do {
 683                 error |= filemap_sync_pte_range(pmd, address, end - address, vma, offset, flags);
 684                 address = (address + PMD_SIZE) & PMD_MASK;
 685                 pmd++;
 686         } while (address < end);
 687         return error;
 688 }
 689 
 690 static int filemap_sync(struct vm_area_struct * vma, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 691         size_t size, unsigned int flags)
 692 {
 693         pgd_t * dir;
 694         unsigned long end = address + size;
 695         int error = 0;
 696 
 697         dir = pgd_offset(current->mm, address);
 698         while (address < end) {
 699                 error |= filemap_sync_pmd_range(dir, address, end - address, vma, flags);
 700                 address = (address + PGDIR_SIZE) & PGDIR_MASK;
 701                 dir++;
 702         }
 703         invalidate_range(vma->vm_mm, end - size, end);
 704         return error;
 705 }
 706 
 707 /*
 708  * This handles (potentially partial) area unmaps..
 709  */
 710 static void filemap_unmap(struct vm_area_struct *vma, unsigned long start, size_t len)
     /* [previous][next][first][last][top][bottom][index][help] */
 711 {
 712         filemap_sync(vma, start, len, MS_ASYNC);
 713 }
 714 
 715 /*
 716  * Shared mappings need to be able to do the right thing at
 717  * close/unmap/sync. They will also use the private file as
 718  * backing-store for swapping..
 719  */
 720 static struct vm_operations_struct file_shared_mmap = {
 721         NULL,                   /* no special open */
 722         NULL,                   /* no special close */
 723         filemap_unmap,          /* unmap - we need to sync the pages */
 724         NULL,                   /* no special protect */
 725         filemap_sync,           /* sync */
 726         NULL,                   /* advise */
 727         filemap_nopage,         /* nopage */
 728         NULL,                   /* wppage */
 729         filemap_swapout,        /* swapout */
 730         filemap_swapin,         /* swapin */
 731 };
 732 
 733 /*
 734  * Private mappings just need to be able to load in the map.
 735  *
 736  * (This is actually used for shared mappings as well, if we
 737  * know they can't ever get write permissions..)
 738  */
 739 static struct vm_operations_struct file_private_mmap = {
 740         NULL,                   /* open */
 741         NULL,                   /* close */
 742         NULL,                   /* unmap */
 743         NULL,                   /* protect */
 744         NULL,                   /* sync */
 745         NULL,                   /* advise */
 746         filemap_nopage,         /* nopage */
 747         NULL,                   /* wppage */
 748         NULL,                   /* swapout */
 749         NULL,                   /* swapin */
 750 };
 751 
 752 /* This is used for a general mmap of a disk file */
 753 int generic_file_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma)
     /* [previous][next][first][last][top][bottom][index][help] */
 754 {
 755         struct vm_operations_struct * ops;
 756 
 757         if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
 758                 ops = &file_shared_mmap;
 759                 /* share_page() can only guarantee proper page sharing if
 760                  * the offsets are all page aligned. */
 761                 if (vma->vm_offset & (PAGE_SIZE - 1))
 762                         return -EINVAL;
 763         } else {
 764                 ops = &file_private_mmap;
 765                 if (vma->vm_offset & (inode->i_sb->s_blocksize - 1))
 766                         return -EINVAL;
 767         }
 768         if (!inode->i_sb || !S_ISREG(inode->i_mode))
 769                 return -EACCES;
 770         if (!inode->i_op || !inode->i_op->readpage)
 771                 return -ENOEXEC;
 772         if (!IS_RDONLY(inode)) {
 773                 inode->i_atime = CURRENT_TIME;
 774                 inode->i_dirt = 1;
 775         }
 776         vma->vm_inode = inode;
 777         inode->i_count++;
 778         vma->vm_ops = ops;
 779         return 0;
 780 }
 781 
 782 
 783 /*
 784  * The msync() system call.
 785  */
 786 
 787 static int msync_interval(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 788         unsigned long start, unsigned long end, int flags)
 789 {
 790         if (!vma->vm_inode)
 791                 return 0;
 792         if (vma->vm_ops->sync) {
 793                 int error;
 794                 error = vma->vm_ops->sync(vma, start, end-start, flags);
 795                 if (error)
 796                         return error;
 797                 if (flags & MS_SYNC)
 798                         return file_fsync(vma->vm_inode, NULL);
 799                 return 0;
 800         }
 801         return 0;
 802 }
 803 
 804 asmlinkage int sys_msync(unsigned long start, size_t len, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
 805 {
 806         unsigned long end;
 807         struct vm_area_struct * vma;
 808         int unmapped_error, error;
 809 
 810         if (start & ~PAGE_MASK)
 811                 return -EINVAL;
 812         len = (len + ~PAGE_MASK) & PAGE_MASK;
 813         end = start + len;
 814         if (end < start)
 815                 return -EINVAL;
 816         if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
 817                 return -EINVAL;
 818         if (end == start)
 819                 return 0;
 820         /*
 821          * If the interval [start,end) covers some unmapped address ranges,
 822          * just ignore them, but return -EFAULT at the end.
 823          */
 824         vma = find_vma(current, start);
 825         unmapped_error = 0;
 826         for (;;) {
 827                 /* Still start < end. */
 828                 if (!vma)
 829                         return -EFAULT;
 830                 /* Here start < vma->vm_end. */
 831                 if (start < vma->vm_start) {
 832                         unmapped_error = -EFAULT;
 833                         start = vma->vm_start;
 834                 }
 835                 /* Here vma->vm_start <= start < vma->vm_end. */
 836                 if (end <= vma->vm_end) {
 837                         if (start < end) {
 838                                 error = msync_interval(vma, start, end, flags);
 839                                 if (error)
 840                                         return error;
 841                         }
 842                         return unmapped_error;
 843                 }
 844                 /* Here vma->vm_start <= start < vma->vm_end < end. */
 845                 error = msync_interval(vma, start, vma->vm_end, flags);
 846                 if (error)
 847                         return error;
 848                 start = vma->vm_end;
 849                 vma = vma->vm_next;
 850         }
 851 }

/* [previous][next][first][last][top][bottom][index][help] */