root/mm/filemap.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. invalidate_inode_pages
  2. truncate_inode_pages
  3. shrink_mmap
  4. page_unuse
  5. update_vm_cache
  6. try_to_read_ahead
  7. __wait_on_page
  8. generic_file_read
  9. fill_page
  10. filemap_nopage
  11. do_write_page
  12. filemap_write_page
  13. filemap_swapout
  14. filemap_swapin
  15. filemap_sync_pte
  16. filemap_sync_pte_range
  17. filemap_sync_pmd_range
  18. filemap_sync
  19. filemap_unmap
  20. generic_file_mmap
  21. msync_interval
  22. sys_msync

   1 /*
   2  *      linux/mm/filemap.c
   3  *
   4  * Copyright (C) 1994, 1995  Linus Torvalds
   5  */
   6 
   7 /*
   8  * This file handles the generic file mmap semantics used by
   9  * most "normal" filesystems (but you don't /have/ to use this:
  10  * the NFS filesystem does this differently, for example)
  11  */
  12 #include <linux/stat.h>
  13 #include <linux/sched.h>
  14 #include <linux/kernel.h>
  15 #include <linux/mm.h>
  16 #include <linux/shm.h>
  17 #include <linux/errno.h>
  18 #include <linux/mman.h>
  19 #include <linux/string.h>
  20 #include <linux/malloc.h>
  21 #include <linux/fs.h>
  22 #include <linux/locks.h>
  23 #include <linux/pagemap.h>
  24 #include <linux/swap.h>
  25 
  26 #include <asm/segment.h>
  27 #include <asm/system.h>
  28 #include <asm/pgtable.h>
  29 
  30 /*
  31  * Shared mappings implemented 30.11.1994. It's not fully working yet,
  32  * though.
  33  *
  34  * Shared mappings now work. 15.8.1995  Bruno.
  35  */
  36 
  37 unsigned long page_cache_size = 0;
  38 struct page * page_hash_table[PAGE_HASH_SIZE];
  39 
  40 /*
  41  * Simple routines for both non-shared and shared mappings.
  42  */
  43 
  44 /*
  45  * Invalidate the pages of an inode, removing all pages that aren't
  46  * locked down (those are sure to be up-to-date anyway, so we shouldn't
  47  * invalidate them).
  48  */
  49 void invalidate_inode_pages(struct inode * inode)
     /* [previous][next][first][last][top][bottom][index][help] */
  50 {
  51         struct page ** p;
  52         struct page * page;
  53 
  54         p = &inode->i_pages;
  55         while ((page = *p) != NULL) {
  56                 if (page->locked) {
  57                         p = &page->next;
  58                         continue;
  59                 }
  60                 inode->i_nrpages--;
  61                 if ((*p = page->next) != NULL)
  62                         (*p)->prev = page->prev;
  63                 page->dirty = 0;
  64                 page->next = NULL;
  65                 page->prev = NULL;
  66                 remove_page_from_hash_queue(page);
  67                 page->inode = NULL;
  68                 free_page(page_address(page));
  69                 continue;
  70         }
  71 }
  72 
  73 /*
  74  * Truncate the page cache at a set offset, removing the pages
  75  * that are beyond that offset (and zeroing out partial pages).
  76  */
  77 void truncate_inode_pages(struct inode * inode, unsigned long start)
     /* [previous][next][first][last][top][bottom][index][help] */
  78 {
  79         struct page ** p;
  80         struct page * page;
  81 
  82 repeat:
  83         p = &inode->i_pages;
  84         while ((page = *p) != NULL) {
  85                 unsigned long offset = page->offset;
  86 
  87                 /* page wholly truncated - free it */
  88                 if (offset >= start) {
  89                         if (page->locked) {
  90                                 wait_on_page(page);
  91                                 goto repeat;
  92                         }
  93                         inode->i_nrpages--;
  94                         if ((*p = page->next) != NULL)
  95                                 (*p)->prev = page->prev;
  96                         page->dirty = 0;
  97                         page->next = NULL;
  98                         page->prev = NULL;
  99                         remove_page_from_hash_queue(page);
 100                         page->inode = NULL;
 101                         free_page(page_address(page));
 102                         continue;
 103                 }
 104                 p = &page->next;
 105                 offset = start - offset;
 106                 /* partial truncate, clear end of page */
 107                 if (offset < PAGE_SIZE)
 108                         memset((void *) (offset + page_address(page)), 0, PAGE_SIZE - offset);
 109         }
 110 }
 111 
 112 int shrink_mmap(int priority, int dma)
     /* [previous][next][first][last][top][bottom][index][help] */
 113 {
 114         static int clock = 0;
 115         struct page * page;
 116         unsigned long limit = MAP_NR(high_memory);
 117         struct buffer_head *tmp, *bh;
 118 
 119         priority = (limit<<2) >> priority;
 120         page = mem_map + clock;
 121         while (priority-- > 0) {
 122                 if (page->locked)
 123                         goto next;
 124                 if (dma && !page->dma)
 125                         goto next;
 126                 /* First of all, regenerate the page's referenced bit
 127                    from any buffers in the page */
 128                 bh = page->buffers;
 129                 if (bh) {
 130                         tmp = bh;
 131                         do {
 132                                 if (buffer_touched(tmp)) {
 133                                         clear_bit(BH_Touched, &tmp->b_state);
 134                                         page->referenced = 1;
 135                                 }
 136                                 tmp = tmp->b_this_page;
 137                         } while (tmp != bh);
 138                 }
 139 
 140                 /* We can't throw away shared pages, but we do mark
 141                    them as referenced.  This relies on the fact that
 142                    no page is currently in both the page cache and the
 143                    buffer cache; we'd have to modify the following
 144                    test to allow for that case. */
 145                 if (page->count > 1)
 146                         page->referenced = 1;
 147                 else if (page->referenced)
 148                         page->referenced = 0;
 149                 else if (page->count) {
 150                         /* The page is an old, unshared page --- try
 151                            to discard it. */
 152                         if (page->inode) {
 153                                 remove_page_from_hash_queue(page);
 154                                 remove_page_from_inode_queue(page);
 155                                 free_page(page_address(page));
 156                                 return 1;
 157                         }
 158                         if (bh && try_to_free_buffer(bh, &bh, 6))
 159                                 return 1;
 160                 }
 161 next:
 162                 page++;
 163                 clock++;
 164                 if (clock >= limit) {
 165                         clock = 0;
 166                         page = mem_map;
 167                 }
 168         }
 169         return 0;
 170 }
 171 
 172 /*
 173  * This is called from try_to_swap_out() when we try to get rid of some
 174  * pages..  If we're unmapping the last occurrence of this page, we also
 175  * free it from the page hash-queues etc, as we don't want to keep it
 176  * in-core unnecessarily.
 177  */
 178 unsigned long page_unuse(unsigned long page)
     /* [previous][next][first][last][top][bottom][index][help] */
 179 {
 180         struct page * p = mem_map + MAP_NR(page);
 181         int count = p->count;
 182 
 183         if (count != 2)
 184                 return count;
 185         if (!p->inode)
 186                 return count;
 187         remove_page_from_hash_queue(p);
 188         remove_page_from_inode_queue(p);
 189         free_page(page);
 190         return 1;
 191 }
 192 
 193 /*
 194  * Update a page cache copy, when we're doing a "write()" system call
 195  * See also "update_vm_cache()".
 196  */
 197 void update_vm_cache(struct inode * inode, unsigned long pos, const char * buf, int count)
     /* [previous][next][first][last][top][bottom][index][help] */
 198 {
 199         unsigned long offset, len;
 200 
 201         offset = (pos & ~PAGE_MASK);
 202         pos = pos & PAGE_MASK;
 203         len = PAGE_SIZE - offset;
 204         do {
 205                 struct page * page;
 206 
 207                 if (len > count)
 208                         len = count;
 209                 page = find_page(inode, pos);
 210                 if (page) {
 211                         unsigned long addr;
 212 
 213                         wait_on_page(page);
 214                         addr = page_address(page);
 215                         memcpy((void *) (offset + addr), buf, len);
 216                         free_page(addr);
 217                 }
 218                 count -= len;
 219                 buf += len;
 220                 len = PAGE_SIZE;
 221                 offset = 0;
 222                 pos += PAGE_SIZE;
 223         } while (count);
 224 }
 225 
 226 /*
 227  * Try to read ahead in the file. "page_cache" is a potentially free page
 228  * that we could use for the cache (if it is 0 we can try to create one,
 229  * this is all overlapped with the IO on the previous page finishing anyway)
 230  */
 231 static unsigned long try_to_read_ahead(struct inode * inode, unsigned long offset, unsigned long page_cache)
     /* [previous][next][first][last][top][bottom][index][help] */
 232 {
 233         struct page * page;
 234 
 235         offset &= PAGE_MASK;
 236         if (!page_cache) {
 237                 page_cache = __get_free_page(GFP_KERNEL);
 238                 if (!page_cache)
 239                         return 0;
 240         }
 241         if (offset >= inode->i_size)
 242                 return page_cache;
 243 #if 1
 244         page = find_page(inode, offset);
 245         if (page) {
 246                 page->count--;
 247                 return page_cache;
 248         }
 249         /*
 250          * Ok, add the new page to the hash-queues...
 251          */
 252         page = mem_map + MAP_NR(page_cache);
 253         page->count++;
 254         page->uptodate = 0;
 255         page->error = 0;
 256         page->offset = offset;
 257         add_page_to_inode_queue(inode, page);
 258         add_page_to_hash_queue(inode, page);
 259 
 260         inode->i_op->readpage(inode, page);
 261 
 262         free_page(page_cache);
 263         return 0;
 264 #else
 265         return page_cache;
 266 #endif
 267 }
 268 
 269 /* 
 270  * Wait for IO to complete on a locked page.
 271  */
 272 void __wait_on_page(struct page *page)
     /* [previous][next][first][last][top][bottom][index][help] */
 273 {
 274         struct wait_queue wait = { current, NULL };
 275 
 276         page->count++;
 277         add_wait_queue(&page->wait, &wait);
 278 repeat:
 279         current->state = TASK_UNINTERRUPTIBLE;
 280         if (page->locked) {
 281                 schedule();
 282                 goto repeat;
 283         }
 284         remove_wait_queue(&page->wait, &wait);
 285         page->count--;
 286         current->state = TASK_RUNNING;
 287 }
 288 
 289 
 290 /*
 291  * This is a generic file read routine, and uses the
 292  * inode->i_op->readpage() function for the actual low-level
 293  * stuff.
 294  *
 295  * This is really ugly. But the goto's actually try to clarify some
 296  * of the logic when it comes to error handling etc.
 297  */
 298 #define MAX_READAHEAD (PAGE_SIZE*8)
 299 int generic_file_read(struct inode * inode, struct file * filp, char * buf, int count)
     /* [previous][next][first][last][top][bottom][index][help] */
 300 {
 301         int error, read;
 302         unsigned long pos, page_cache;
 303         
 304         if (count <= 0)
 305                 return 0;
 306         error = 0;
 307         read = 0;
 308         page_cache = 0;
 309 
 310         pos = filp->f_pos;
 311         for (;;) {
 312                 struct page *page;
 313                 unsigned long offset, addr, nr;
 314 
 315                 if (pos >= inode->i_size)
 316                         break;
 317                 offset = pos & ~PAGE_MASK;
 318                 nr = PAGE_SIZE - offset;
 319                 /*
 320                  * Try to find the data in the page cache..
 321                  */
 322                 page = find_page(inode, pos & PAGE_MASK);
 323                 if (page)
 324                         goto found_page;
 325 
 326                 /*
 327                  * Ok, it wasn't cached, so we need to create a new
 328                  * page..
 329                  */
 330                 if (page_cache)
 331                         goto new_page;
 332 
 333                 error = -ENOMEM;
 334                 page_cache = __get_free_page(GFP_KERNEL);
 335                 if (!page_cache)
 336                         break;
 337                 error = 0;
 338 
 339                 /*
 340                  * That could have slept, so we need to check again..
 341                  */
 342                 if (pos >= inode->i_size)
 343                         break;
 344                 page = find_page(inode, pos & PAGE_MASK);
 345                 if (!page)
 346                         goto new_page;
 347 
 348 found_page:
 349                 addr = page_address(page);
 350                 if (nr > count)
 351                         nr = count;
 352 
 353                 /*
 354                  * We may want to do read-ahead.. Do this only
 355                  * if we're waiting for the current page to be
 356                  * filled in, and if
 357                  *  - we're going to read more than this page
 358                  *  - if "f_reada" is set
 359                  */
 360                 if (page->locked) {
 361                         unsigned long max_ahead, ahead;
 362 
 363                         max_ahead = count - nr;
 364                         if (filp->f_reada || max_ahead > MAX_READAHEAD)
 365                                 max_ahead = MAX_READAHEAD;
 366                         ahead = 0;
 367                         while (ahead < max_ahead) {
 368                                 ahead += PAGE_SIZE;
 369                                 page_cache = try_to_read_ahead(inode, pos + ahead, page_cache);
 370                                 if (!page->locked)
 371                                         goto unlocked_page;
 372                         }
 373                         __wait_on_page(page);
 374                 }
 375 unlocked_page:
 376                 if (!page->uptodate)
 377                         goto read_page;
 378                 if (nr > inode->i_size - pos)
 379                         nr = inode->i_size - pos;
 380                 memcpy_tofs(buf, (void *) (addr + offset), nr);
 381                 free_page(addr);
 382                 buf += nr;
 383                 pos += nr;
 384                 read += nr;
 385                 count -= nr;
 386                 if (count)
 387                         continue;
 388                 break;
 389         
 390 
 391 new_page:
 392                 /*
 393                  * Ok, add the new page to the hash-queues...
 394                  */
 395                 addr = page_cache;
 396                 page = mem_map + MAP_NR(page_cache);
 397                 page_cache = 0;
 398                 page->count++;
 399                 page->uptodate = 0;
 400                 page->error = 0;
 401                 page->offset = pos & PAGE_MASK;
 402                 add_page_to_inode_queue(inode, page);
 403                 add_page_to_hash_queue(inode, page);
 404 
 405                 /*
 406                  * Error handling is tricky. If we get a read error,
 407                  * the cached page stays in the cache (but uptodate=0),
 408                  * and the next process that accesses it will try to
 409                  * re-read it. This is needed for NFS etc, where the
 410                  * identity of the reader can decide if we can read the
 411                  * page or not..
 412                  */
 413 read_page:
 414                 error = inode->i_op->readpage(inode, page);
 415                 if (!error)
 416                         goto found_page;
 417                 free_page(addr);
 418                 break;
 419         }
 420 
 421         filp->f_pos = pos;
 422         filp->f_reada = 1;
 423         if (page_cache)
 424                 free_page(page_cache);
 425         if (!IS_RDONLY(inode)) {
 426                 inode->i_atime = CURRENT_TIME;
 427                 inode->i_dirt = 1;
 428         }
 429         if (!read)
 430                 read = error;
 431         return read;
 432 }
 433 
 434 /*
 435  * Find a cached page and wait for it to become up-to-date, return
 436  * the page address.  Increments the page count.
 437  */
 438 static inline unsigned long fill_page(struct inode * inode, unsigned long offset)
     /* [previous][next][first][last][top][bottom][index][help] */
 439 {
 440         struct page * page;
 441         unsigned long new_page;
 442 
 443         page = find_page(inode, offset);
 444         if (page)
 445                 goto found_page_dont_free;
 446         new_page = __get_free_page(GFP_KERNEL);
 447         page = find_page(inode, offset);
 448         if (page)
 449                 goto found_page;
 450         if (!new_page)
 451                 return 0;
 452         page = mem_map + MAP_NR(new_page);
 453         new_page = 0;
 454         page->count++;
 455         page->uptodate = 0;
 456         page->error = 0;
 457         page->offset = offset;
 458         add_page_to_inode_queue(inode, page);
 459         add_page_to_hash_queue(inode, page);
 460         inode->i_op->readpage(inode, page);
 461         if (page->locked)
 462                 new_page = try_to_read_ahead(inode, offset + PAGE_SIZE, 0);
 463 found_page:
 464         if (new_page)
 465                 free_page(new_page);
 466 found_page_dont_free:
 467         wait_on_page(page);
 468         return page_address(page);
 469 }
 470 
 471 /*
 472  * Semantics for shared and private memory areas are different past the end
 473  * of the file. A shared mapping past the last page of the file is an error
 474  * and results in a SIBGUS, while a private mapping just maps in a zero page.
 475  */
 476 static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long address, int no_share)
     /* [previous][next][first][last][top][bottom][index][help] */
 477 {
 478         unsigned long offset;
 479         struct inode * inode = area->vm_inode;
 480         unsigned long page;
 481 
 482         offset = (address & PAGE_MASK) - area->vm_start + area->vm_offset;
 483         if (offset >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm)
 484                 return 0;
 485 
 486         page = fill_page(inode, offset);
 487         if (page && no_share) {
 488                 unsigned long new_page = __get_free_page(GFP_KERNEL);
 489                 if (new_page)
 490                         memcpy((void *) new_page, (void *) page, PAGE_SIZE);
 491                 free_page(page);
 492                 return new_page;
 493         }
 494         return page;
 495 }
 496 
 497 /*
 498  * Tries to write a shared mapped page to its backing store. May return -EIO
 499  * if the disk is full.
 500  */
 501 static inline int do_write_page(struct inode * inode, struct file * file,
     /* [previous][next][first][last][top][bottom][index][help] */
 502         const char * page, unsigned long offset)
 503 {
 504         int old_fs, retval;
 505         unsigned long size;
 506 
 507         size = offset + PAGE_SIZE;
 508         /* refuse to extend file size.. */
 509         if (S_ISREG(inode->i_mode)) {
 510                 if (size > inode->i_size)
 511                         size = inode->i_size;
 512                 /* Ho humm.. We should have tested for this earlier */
 513                 if (size < offset)
 514                         return -EIO;
 515         }
 516         size -= offset;
 517         old_fs = get_fs();
 518         set_fs(KERNEL_DS);
 519         retval = -EIO;
 520         if (size == file->f_op->write(inode, file, (const char *) page, size))
 521                 retval = 0;
 522         set_fs(old_fs);
 523         return retval;
 524 }
 525 
 526 static int filemap_write_page(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 527         unsigned long offset,
 528         unsigned long page)
 529 {
 530         int result;
 531         struct file file;
 532         struct inode * inode;
 533         struct buffer_head * bh;
 534 
 535         bh = mem_map[MAP_NR(page)].buffers;
 536         if (bh) {
 537                 /* whee.. just mark the buffer heads dirty */
 538                 struct buffer_head * tmp = bh;
 539                 do {
 540                         mark_buffer_dirty(tmp, 0);
 541                         tmp = tmp->b_this_page;
 542                 } while (tmp != bh);
 543                 return 0;
 544         }
 545 
 546         inode = vma->vm_inode;
 547         file.f_op = inode->i_op->default_file_ops;
 548         if (!file.f_op->write)
 549                 return -EIO;
 550         file.f_mode = 3;
 551         file.f_flags = 0;
 552         file.f_count = 1;
 553         file.f_inode = inode;
 554         file.f_pos = offset;
 555         file.f_reada = 0;
 556 
 557         down(&inode->i_sem);
 558         result = do_write_page(inode, &file, (const char *) page, offset);
 559         up(&inode->i_sem);
 560         return result;
 561 }
 562 
 563 
 564 /*
 565  * Swapping to a shared file: while we're busy writing out the page
 566  * (and the page still exists in memory), we save the page information
 567  * in the page table, so that "filemap_swapin()" can re-use the page
 568  * immediately if it is called while we're busy swapping it out..
 569  *
 570  * Once we've written it all out, we mark the page entry "empty", which
 571  * will result in a normal page-in (instead of a swap-in) from the now
 572  * up-to-date disk file.
 573  */
 574 int filemap_swapout(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 575         unsigned long offset,
 576         pte_t *page_table)
 577 {
 578         int error;
 579         unsigned long page = pte_page(*page_table);
 580         unsigned long entry = SWP_ENTRY(SHM_SWP_TYPE, MAP_NR(page));
 581 
 582         flush_cache_page(vma, (offset + vma->vm_start - vma->vm_offset));
 583         set_pte(page_table, __pte(entry));
 584         flush_tlb_page(vma, (offset + vma->vm_start - vma->vm_offset));
 585         error = filemap_write_page(vma, offset, page);
 586         if (pte_val(*page_table) == entry)
 587                 pte_clear(page_table);
 588         return error;
 589 }
 590 
 591 /*
 592  * filemap_swapin() is called only if we have something in the page
 593  * tables that is non-zero (but not present), which we know to be the
 594  * page index of a page that is busy being swapped out (see above).
 595  * So we just use it directly..
 596  */
 597 static pte_t filemap_swapin(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 598         unsigned long offset,
 599         unsigned long entry)
 600 {
 601         unsigned long page = SWP_OFFSET(entry);
 602 
 603         mem_map[page].count++;
 604         page = (page << PAGE_SHIFT) + PAGE_OFFSET;
 605         return mk_pte(page,vma->vm_page_prot);
 606 }
 607 
 608 
 609 static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 610         unsigned long address, unsigned int flags)
 611 {
 612         pte_t pte = *ptep;
 613         unsigned long page;
 614         int error;
 615 
 616         if (!(flags & MS_INVALIDATE)) {
 617                 if (!pte_present(pte))
 618                         return 0;
 619                 if (!pte_dirty(pte))
 620                         return 0;
 621                 flush_cache_page(vma, address);
 622                 set_pte(ptep, pte_mkclean(pte));
 623                 flush_tlb_page(vma, address);
 624                 page = pte_page(pte);
 625                 mem_map[MAP_NR(page)].count++;
 626         } else {
 627                 if (pte_none(pte))
 628                         return 0;
 629                 flush_cache_page(vma, address);
 630                 pte_clear(ptep);
 631                 flush_tlb_page(vma, address);
 632                 if (!pte_present(pte)) {
 633                         swap_free(pte_val(pte));
 634                         return 0;
 635                 }
 636                 page = pte_page(pte);
 637                 if (!pte_dirty(pte) || flags == MS_INVALIDATE) {
 638                         free_page(page);
 639                         return 0;
 640                 }
 641         }
 642         error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page);
 643         free_page(page);
 644         return error;
 645 }
 646 
 647 static inline int filemap_sync_pte_range(pmd_t * pmd,
     /* [previous][next][first][last][top][bottom][index][help] */
 648         unsigned long address, unsigned long size, 
 649         struct vm_area_struct *vma, unsigned long offset, unsigned int flags)
 650 {
 651         pte_t * pte;
 652         unsigned long end;
 653         int error;
 654 
 655         if (pmd_none(*pmd))
 656                 return 0;
 657         if (pmd_bad(*pmd)) {
 658                 printk("filemap_sync_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
 659                 pmd_clear(pmd);
 660                 return 0;
 661         }
 662         pte = pte_offset(pmd, address);
 663         offset += address & PMD_MASK;
 664         address &= ~PMD_MASK;
 665         end = address + size;
 666         if (end > PMD_SIZE)
 667                 end = PMD_SIZE;
 668         error = 0;
 669         do {
 670                 error |= filemap_sync_pte(pte, vma, address + offset, flags);
 671                 address += PAGE_SIZE;
 672                 pte++;
 673         } while (address < end);
 674         return error;
 675 }
 676 
 677 static inline int filemap_sync_pmd_range(pgd_t * pgd,
     /* [previous][next][first][last][top][bottom][index][help] */
 678         unsigned long address, unsigned long size, 
 679         struct vm_area_struct *vma, unsigned int flags)
 680 {
 681         pmd_t * pmd;
 682         unsigned long offset, end;
 683         int error;
 684 
 685         if (pgd_none(*pgd))
 686                 return 0;
 687         if (pgd_bad(*pgd)) {
 688                 printk("filemap_sync_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd));
 689                 pgd_clear(pgd);
 690                 return 0;
 691         }
 692         pmd = pmd_offset(pgd, address);
 693         offset = address & PMD_MASK;
 694         address &= ~PMD_MASK;
 695         end = address + size;
 696         if (end > PGDIR_SIZE)
 697                 end = PGDIR_SIZE;
 698         error = 0;
 699         do {
 700                 error |= filemap_sync_pte_range(pmd, address, end - address, vma, offset, flags);
 701                 address = (address + PMD_SIZE) & PMD_MASK;
 702                 pmd++;
 703         } while (address < end);
 704         return error;
 705 }
 706 
 707 static int filemap_sync(struct vm_area_struct * vma, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 708         size_t size, unsigned int flags)
 709 {
 710         pgd_t * dir;
 711         unsigned long end = address + size;
 712         int error = 0;
 713 
 714         dir = pgd_offset(current->mm, address);
 715         flush_cache_range(vma->vm_mm, end - size, end);
 716         while (address < end) {
 717                 error |= filemap_sync_pmd_range(dir, address, end - address, vma, flags);
 718                 address = (address + PGDIR_SIZE) & PGDIR_MASK;
 719                 dir++;
 720         }
 721         flush_tlb_range(vma->vm_mm, end - size, end);
 722         return error;
 723 }
 724 
 725 /*
 726  * This handles (potentially partial) area unmaps..
 727  */
 728 static void filemap_unmap(struct vm_area_struct *vma, unsigned long start, size_t len)
     /* [previous][next][first][last][top][bottom][index][help] */
 729 {
 730         filemap_sync(vma, start, len, MS_ASYNC);
 731 }
 732 
 733 /*
 734  * Shared mappings need to be able to do the right thing at
 735  * close/unmap/sync. They will also use the private file as
 736  * backing-store for swapping..
 737  */
 738 static struct vm_operations_struct file_shared_mmap = {
 739         NULL,                   /* no special open */
 740         NULL,                   /* no special close */
 741         filemap_unmap,          /* unmap - we need to sync the pages */
 742         NULL,                   /* no special protect */
 743         filemap_sync,           /* sync */
 744         NULL,                   /* advise */
 745         filemap_nopage,         /* nopage */
 746         NULL,                   /* wppage */
 747         filemap_swapout,        /* swapout */
 748         filemap_swapin,         /* swapin */
 749 };
 750 
 751 /*
 752  * Private mappings just need to be able to load in the map.
 753  *
 754  * (This is actually used for shared mappings as well, if we
 755  * know they can't ever get write permissions..)
 756  */
 757 static struct vm_operations_struct file_private_mmap = {
 758         NULL,                   /* open */
 759         NULL,                   /* close */
 760         NULL,                   /* unmap */
 761         NULL,                   /* protect */
 762         NULL,                   /* sync */
 763         NULL,                   /* advise */
 764         filemap_nopage,         /* nopage */
 765         NULL,                   /* wppage */
 766         NULL,                   /* swapout */
 767         NULL,                   /* swapin */
 768 };
 769 
 770 /* This is used for a general mmap of a disk file */
 771 int generic_file_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma)
     /* [previous][next][first][last][top][bottom][index][help] */
 772 {
 773         struct vm_operations_struct * ops;
 774 
 775         if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
 776                 ops = &file_shared_mmap;
 777                 /* share_page() can only guarantee proper page sharing if
 778                  * the offsets are all page aligned. */
 779                 if (vma->vm_offset & (PAGE_SIZE - 1))
 780                         return -EINVAL;
 781         } else {
 782                 ops = &file_private_mmap;
 783                 if (vma->vm_offset & (inode->i_sb->s_blocksize - 1))
 784                         return -EINVAL;
 785         }
 786         if (!inode->i_sb || !S_ISREG(inode->i_mode))
 787                 return -EACCES;
 788         if (!inode->i_op || !inode->i_op->readpage)
 789                 return -ENOEXEC;
 790         if (!IS_RDONLY(inode)) {
 791                 inode->i_atime = CURRENT_TIME;
 792                 inode->i_dirt = 1;
 793         }
 794         vma->vm_inode = inode;
 795         inode->i_count++;
 796         vma->vm_ops = ops;
 797         return 0;
 798 }
 799 
 800 
 801 /*
 802  * The msync() system call.
 803  */
 804 
 805 static int msync_interval(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 806         unsigned long start, unsigned long end, int flags)
 807 {
 808         if (!vma->vm_inode)
 809                 return 0;
 810         if (vma->vm_ops->sync) {
 811                 int error;
 812                 error = vma->vm_ops->sync(vma, start, end-start, flags);
 813                 if (error)
 814                         return error;
 815                 if (flags & MS_SYNC)
 816                         return file_fsync(vma->vm_inode, NULL);
 817                 return 0;
 818         }
 819         return 0;
 820 }
 821 
 822 asmlinkage int sys_msync(unsigned long start, size_t len, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
 823 {
 824         unsigned long end;
 825         struct vm_area_struct * vma;
 826         int unmapped_error, error;
 827 
 828         if (start & ~PAGE_MASK)
 829                 return -EINVAL;
 830         len = (len + ~PAGE_MASK) & PAGE_MASK;
 831         end = start + len;
 832         if (end < start)
 833                 return -EINVAL;
 834         if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
 835                 return -EINVAL;
 836         if (end == start)
 837                 return 0;
 838         /*
 839          * If the interval [start,end) covers some unmapped address ranges,
 840          * just ignore them, but return -EFAULT at the end.
 841          */
 842         vma = find_vma(current, start);
 843         unmapped_error = 0;
 844         for (;;) {
 845                 /* Still start < end. */
 846                 if (!vma)
 847                         return -EFAULT;
 848                 /* Here start < vma->vm_end. */
 849                 if (start < vma->vm_start) {
 850                         unmapped_error = -EFAULT;
 851                         start = vma->vm_start;
 852                 }
 853                 /* Here vma->vm_start <= start < vma->vm_end. */
 854                 if (end <= vma->vm_end) {
 855                         if (start < end) {
 856                                 error = msync_interval(vma, start, end, flags);
 857                                 if (error)
 858                                         return error;
 859                         }
 860                         return unmapped_error;
 861                 }
 862                 /* Here vma->vm_start <= start < vma->vm_end < end. */
 863                 error = msync_interval(vma, start, vma->vm_end, flags);
 864                 if (error)
 865                         return error;
 866                 start = vma->vm_end;
 867                 vma = vma->vm_next;
 868         }
 869 }

/* [previous][next][first][last][top][bottom][index][help] */