mm/filemap.c

/* */
This source file includes following definitions.
invalidate_inode_pages
shrink_mmap
page_unuse
update_vm_cache
try_to_read_ahead
__wait_on_page
generic_file_read
fill_page
filemap_nopage
filemap_write_page
filemap_swapout
filemap_swapin
filemap_sync_pte
filemap_sync_pte_range
filemap_sync_pmd_range
filemap_sync
filemap_unmap
generic_file_mmap
msync_interval
sys_msync
   1 /*
   2  *      linux/mm/filemap.c
   3  *
   4  * Copyright (C) 1994, 1995  Linus Torvalds
   5  */
   6 
   7 /*
   8  * This file handles the generic file mmap semantics used by
   9  * most "normal" filesystems (but you don't /have/ to use this:
  10  * the NFS filesystem does this differently, for example)
  11  */
  12 #include <linux/stat.h>
  13 #include <linux/sched.h>
  14 #include <linux/kernel.h>
  15 #include <linux/mm.h>
  16 #include <linux/shm.h>
  17 #include <linux/errno.h>
  18 #include <linux/mman.h>
  19 #include <linux/string.h>
  20 #include <linux/malloc.h>
  21 #include <linux/fs.h>
  22 #include <linux/locks.h>
  23 #include <linux/pagemap.h>
  24 #include <linux/swap.h>
  25 
  26 #include <asm/segment.h>
  27 #include <asm/system.h>
  28 #include <asm/pgtable.h>
  29 
  30 /*
  31  * Shared mappings implemented 30.11.1994. It's not fully working yet,
  32  * though.
  33  *
  34  * Shared mappings now work. 15.8.1995  Bruno.
  35  */
  36 
  37 unsigned long page_cache_size = 0;
  38 struct page * page_hash_table[PAGE_HASH_SIZE];
  39 
  40 /*
  41  * Simple routines for both non-shared and shared mappings.
  42  */
  43 
  44 void invalidate_inode_pages(struct inode * inode, unsigned long start)
     /*  */
  45 {
  46         struct page ** p;
  47         struct page * page;
  48 
  49 repeat:
  50         p = &inode->i_pages;
  51         while ((page = *p) != NULL) {
  52                 unsigned long offset = page->offset;
  53 
  54                 /* page wholly truncated - free it */
  55                 if (offset >= start) {
  56                         if (page->locked) {
  57                                 wait_on_page(page);
  58                                 goto repeat;
  59                         }
  60                         inode->i_nrpages--;
  61                         if ((*p = page->next) != NULL)
  62                                 (*p)->prev = page->prev;
  63                         page->dirty = 0;
  64                         page->next = NULL;
  65                         page->prev = NULL;
  66                         remove_page_from_hash_queue(page);
  67                         page->inode = NULL;
  68                         free_page(page_address(page));
  69                         continue;
  70                 }
  71                 p = &page->next;
  72                 offset = start - offset;
  73                 /* partial truncate, clear end of page */
  74                 if (offset < PAGE_SIZE)
  75                         memset((void *) (offset + page_address(page)), 0, PAGE_SIZE - offset);
  76         }
  77 }
  78 
  79 int shrink_mmap(int priority, unsigned long limit)
     /*  */
  80 {
  81         static int clock = 0;
  82         struct page * page;
  83         struct buffer_head *tmp, *bh;
  84 
  85         if (limit > high_memory)
  86                 limit = high_memory;
  87         limit = MAP_NR(limit);
  88         if (clock >= limit)
  89                 clock = 0;
  90         priority = (limit<<2) >> priority;
  91         page = mem_map + clock;
  92         while (priority-- > 0) {
  93                 if (page->locked)
  94                     goto next;
  95                 /* First of all, regenerate the page's referenced bit
  96                    from any buffers in the page */
  97                 bh = buffer_pages[MAP_NR(page_address(page))];
  98                 if (bh) {
  99                         tmp = bh;
 100                         do {
 101                                 if (buffer_touched(tmp)) {
 102                                         clear_bit(BH_Touched, &tmp->b_state);
 103                                         page->referenced = 1;
 104                                 }
 105                                 tmp = tmp->b_this_page;
 106                         } while (tmp != bh);
 107                 }
 108 
 109                 /* We can't throw away shared pages, but we do mark
 110                    them as referenced.  This relies on the fact that
 111                    no page is currently in both the page cache and the
 112                    buffer cache; we'd have to modify the following
 113                    test to allow for that case. */
 114                 if (page->count > 1)
 115                         page->referenced = 1;
 116                 else if (page->referenced)
 117                         page->referenced = 0;
 118                 else if (page->count) {
 119                         /* The page is an old, unshared page --- try
 120                            to discard it. */
 121                         if (page->inode) {
 122                                 remove_page_from_hash_queue(page);
 123                                 remove_page_from_inode_queue(page);
 124                                 free_page(page_address(page));
 125                                 return 1;
 126                         }
 127                         if (bh && try_to_free_buffer(bh, &bh, 6))
 128                                 return 1;
 129                 }
 130 next:
 131                 page++;
 132                 clock++;
 133                 if (clock >= limit) {
 134                         clock = 0;
 135                         page = mem_map;
 136                 }
 137         }
 138         return 0;
 139 }
 140 
 141 /*
 142  * This is called from try_to_swap_out() when we try to get rid of some
 143  * pages..  If we're unmapping the last occurrence of this page, we also
 144  * free it from the page hash-queues etc, as we don't want to keep it
 145  * in-core unnecessarily.
 146  */
 147 unsigned long page_unuse(unsigned long page)
     /*  */
 148 {
 149         struct page * p = mem_map + MAP_NR(page);
 150         int count = p->count;
 151 
 152         if (count != 2)
 153                 return count;
 154         if (!p->inode)
 155                 return count;
 156         remove_page_from_hash_queue(p);
 157         remove_page_from_inode_queue(p);
 158         free_page(page);
 159         return 1;
 160 }
 161 
 162 /*
 163  * Update a page cache copy, when we're doing a "write()" system call
 164  * See also "update_vm_cache()".
 165  */
 166 void update_vm_cache(struct inode * inode, unsigned long pos, const char * buf, int count)
     /*  */
 167 {
 168         unsigned long offset, len;
 169 
 170         offset = (pos & ~PAGE_MASK);
 171         pos = pos & PAGE_MASK;
 172         len = PAGE_SIZE - offset;
 173         do {
 174                 struct page * page;
 175 
 176                 if (len > count)
 177                         len = count;
 178                 page = find_page(inode, pos);
 179                 if (page) {
 180                         unsigned long addr;
 181 
 182                         wait_on_page(page);
 183                         addr = page_address(page);
 184                         memcpy((void *) (offset + addr), buf, len);
 185                         free_page(addr);
 186                 }
 187                 count -= len;
 188                 buf += len;
 189                 len = PAGE_SIZE;
 190                 offset = 0;
 191                 pos += PAGE_SIZE;
 192         } while (count);
 193 }
 194 
 195 /*
 196  * Try to read ahead in the file. "page_cache" is a potentially free page
 197  * that we could use for the cache (if it is 0 we can try to create one,
 198  * this is all overlapped with the IO on the previous page finishing anyway)
 199  */
 200 static unsigned long try_to_read_ahead(struct inode * inode, unsigned long offset, unsigned long page_cache)
     /*  */
 201 {
 202         struct page * page;
 203 
 204         offset &= PAGE_MASK;
 205         if (!page_cache) {
 206                 page_cache = __get_free_page(GFP_KERNEL);
 207                 if (!page_cache)
 208                         return 0;
 209         }
 210         if (offset >= inode->i_size)
 211                 return page_cache;
 212 #if 1
 213         page = find_page(inode, offset);
 214         if (page) {
 215                 page->count--;
 216                 return page_cache;
 217         }
 218         /*
 219          * Ok, add the new page to the hash-queues...
 220          */
 221         page = mem_map + MAP_NR(page_cache);
 222         page->count++;
 223         page->uptodate = 0;
 224         page->error = 0;
 225         page->offset = offset;
 226         add_page_to_inode_queue(inode, page);
 227         add_page_to_hash_queue(inode, page);
 228 
 229         inode->i_op->readpage(inode, page);
 230 
 231         free_page(page_cache);
 232         return 0;
 233 #else
 234         return page_cache;
 235 #endif
 236 }
 237 
 238 /* 
 239  * Wait for IO to complete on a locked page.
 240  */
 241 void __wait_on_page(struct page *page)
     /*  */
 242 {
 243         struct wait_queue wait = { current, NULL };
 244 
 245         page->count++;
 246         add_wait_queue(&page->wait, &wait);
 247 repeat:
 248         current->state = TASK_UNINTERRUPTIBLE;
 249         if (page->locked) {
 250                 schedule();
 251                 goto repeat;
 252         }
 253         remove_wait_queue(&page->wait, &wait);
 254         page->count--;
 255         current->state = TASK_RUNNING;
 256 }
 257 
 258 
 259 /*
 260  * This is a generic file read routine, and uses the
 261  * inode->i_op->readpage() function for the actual low-level
 262  * stuff.
 263  */
 264 #define READAHEAD_PAGES 3
 265 #define MAX_IO_PAGES 4
 266 int generic_file_read(struct inode * inode, struct file * filp, char * buf, int count)
     /*  */
 267 {
 268         int read = 0, newpage = 0;
 269         unsigned long pos;
 270         unsigned long page_cache = 0;
 271         int pre_read = 0;
 272         
 273         if (count <= 0)
 274                 return 0;
 275 
 276         pos = filp->f_pos;
 277         do {
 278                 struct page *page;
 279                 unsigned long offset, addr, nr;
 280                 int i;
 281                 off_t p;
 282 
 283                 if (pos >= inode->i_size)
 284                         break;
 285                 offset = pos & ~PAGE_MASK;
 286                 nr = PAGE_SIZE - offset;
 287                 /*
 288                  * Try to find the data in the page cache..
 289                  */
 290                 page = find_page(inode, pos & PAGE_MASK);
 291                 if (page)
 292                         goto found_page;
 293 
 294                 /*
 295                  * Ok, it wasn't cached, so we need to create a new
 296                  * page..
 297                  */
 298                 if (!page_cache) {
 299                         page_cache = __get_free_page(GFP_KERNEL);
 300                         if (!page_cache) {
 301                                 if (!read)
 302                                         read = -ENOMEM;
 303                                 break;
 304                         }
 305                 }
 306 
 307                 /*
 308                  * That could have slept, so we need to check again..
 309                  */
 310                 if (pos >= inode->i_size)
 311                         break;
 312                 page = find_page(inode, pos & PAGE_MASK);
 313                 if (page)
 314                         goto found_page;
 315 
 316                 /*
 317                  * Ok, add the new page to the hash-queues...
 318                  */
 319                 page = mem_map + MAP_NR(page_cache);
 320                 page_cache = 0;
 321                 page->count++;
 322                 page->uptodate = 0;
 323                 page->error = 0;
 324                 page->offset = pos & PAGE_MASK;
 325                 add_page_to_inode_queue(inode, page);
 326                 add_page_to_hash_queue(inode, page);
 327 
 328                 inode->i_op->readpage(inode, page);
 329                 /* We only set "newpage" when we encounter a
 330                    completely uncached page.  This way, we do no
 331                    readahead if we are still just reading data out of
 332                    the cache (even if the cached page is not yet
 333                    uptodate --- it may be currently being read as a
 334                    result of previous readahead).  -- sct */
 335                 newpage = 1;
 336 
 337 found_page:
 338                 addr = page_address(page);
 339                 if (nr > count)
 340                         nr = count;
 341                 /* We have two readahead cases.  First, do data
 342                    pre-read if the current read request is for more
 343                    than one page, so we can merge the adjacent
 344                    requests. */
 345                 if (newpage && nr < count) {
 346                         if (pre_read > 0)
 347                                 pre_read -= PAGE_SIZE;
 348                         else {
 349                                 pre_read = (MAX_IO_PAGES-1) * PAGE_SIZE;
 350                                 if (pre_read > (count - nr))
 351                                         pre_read = count - nr;
 352                                 for (i=0, p=pos; i<pre_read; i+=PAGE_SIZE) {
 353                                         p += PAGE_SIZE;
 354                                         page_cache = try_to_read_ahead(inode, p, page_cache);
 355                                 }
 356                         }
 357                 }
 358                 else
 359                 /* Second, do readahead at the end of the read, if we
 360                    are still waiting on the current IO to complete, if
 361                    readahead is flagged for the file, and if we have
 362                    finished with the current block. */
 363                 if (newpage && nr == count && filp->f_reada
 364                     && !((pos + nr) & ~PAGE_MASK)) {
 365                         for (i=0, p=pos; i<READAHEAD_PAGES; i++) {
 366                                 p += PAGE_SIZE;
 367                                 page_cache = try_to_read_ahead(inode, p, page_cache);
 368                         }
 369                 }
 370                 wait_on_page(page);
 371                 if (nr > inode->i_size - pos)
 372                         nr = inode->i_size - pos;
 373                 memcpy_tofs(buf, (void *) (addr + offset), nr);
 374                 free_page(addr);
 375                 buf += nr;
 376                 pos += nr;
 377                 read += nr;
 378                 count -= nr;
 379         } while (count);
 380 
 381         filp->f_pos = pos;
 382         filp->f_reada = 1;
 383         if (page_cache)
 384                 free_page(page_cache);
 385         if (!IS_RDONLY(inode)) {
 386                 inode->i_atime = CURRENT_TIME;
 387                 inode->i_dirt = 1;
 388         }
 389         return read;
 390 }
 391 
 392 /*
 393  * Find a cached page and wait for it to become up-to-date, return
 394  * the page address.  Increments the page count.
 395  */
 396 static inline unsigned long fill_page(struct inode * inode, unsigned long offset)
     /*  */
 397 {
 398         struct page * page;
 399         unsigned long new_page;
 400 
 401         page = find_page(inode, offset);
 402         if (page)
 403                 goto found_page;
 404         new_page = __get_free_page(GFP_KERNEL);
 405         page = find_page(inode, offset);
 406         if (page) {
 407                 if (new_page)
 408                         free_page(new_page);
 409                 goto found_page;
 410         }
 411         if (!new_page)
 412                 return 0;
 413         page = mem_map + MAP_NR(new_page);
 414         new_page = 0;
 415         page->count++;
 416         page->uptodate = 0;
 417         page->error = 0;
 418         page->offset = offset;
 419         add_page_to_inode_queue(inode, page);
 420         add_page_to_hash_queue(inode, page);
 421         inode->i_op->readpage(inode, page);
 422 found_page:
 423         wait_on_page(page);
 424         return page_address(page);
 425 }
 426 
 427 /*
 428  * Semantics for shared and private memory areas are different past the end
 429  * of the file. A shared mapping past the last page of the file is an error
 430  * and results in a SIBGUS, while a private mapping just maps in a zero page.
 431  */
 432 static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long address, int no_share)
     /*  */
 433 {
 434         unsigned long offset;
 435         struct inode * inode = area->vm_inode;
 436         unsigned long page;
 437 
 438         offset = (address & PAGE_MASK) - area->vm_start + area->vm_offset;
 439         if (offset >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm)
 440                 return 0;
 441 
 442         page = fill_page(inode, offset);
 443         if (page && no_share) {
 444                 unsigned long new_page = __get_free_page(GFP_KERNEL);
 445                 if (new_page)
 446                         memcpy((void *) new_page, (void *) page, PAGE_SIZE);
 447                 free_page(page);
 448                 return new_page;
 449         }
 450         return page;
 451 }
 452 
 453 /*
 454  * Tries to write a shared mapped page to its backing store. May return -EIO
 455  * if the disk is full.
 456  */
 457 static int filemap_write_page(struct vm_area_struct * vma,
     /*  */
 458         unsigned long offset,
 459         unsigned long page)
 460 {
 461         int old_fs;
 462         unsigned long size, result;
 463         struct file file;
 464         struct inode * inode;
 465         struct buffer_head * bh;
 466 
 467         bh = buffer_pages[MAP_NR(page)];
 468         if (bh) {
 469                 /* whee.. just mark the buffer heads dirty */
 470                 struct buffer_head * tmp = bh;
 471                 do {
 472                         mark_buffer_dirty(tmp, 0);
 473                         tmp = tmp->b_this_page;
 474                 } while (tmp != bh);
 475                 return 0;
 476         }
 477 
 478         inode = vma->vm_inode;
 479         file.f_op = inode->i_op->default_file_ops;
 480         if (!file.f_op->write)
 481                 return -EIO;
 482         size = offset + PAGE_SIZE;
 483         /* refuse to extend file size.. */
 484         if (S_ISREG(inode->i_mode)) {
 485                 if (size > inode->i_size)
 486                         size = inode->i_size;
 487                 /* Ho humm.. We should have tested for this earlier */
 488                 if (size < offset)
 489                         return -EIO;
 490         }
 491         size -= offset;
 492         file.f_mode = 3;
 493         file.f_flags = 0;
 494         file.f_count = 1;
 495         file.f_inode = inode;
 496         file.f_pos = offset;
 497         file.f_reada = 0;
 498         old_fs = get_fs();
 499         set_fs(KERNEL_DS);
 500         result = file.f_op->write(inode, &file, (const char *) page, size);
 501         set_fs(old_fs);
 502         if (result != size)
 503                 return -EIO;
 504         return 0;
 505 }
 506 
 507 
 508 /*
 509  * Swapping to a shared file: while we're busy writing out the page
 510  * (and the page still exists in memory), we save the page information
 511  * in the page table, so that "filemap_swapin()" can re-use the page
 512  * immediately if it is called while we're busy swapping it out..
 513  *
 514  * Once we've written it all out, we mark the page entry "empty", which
 515  * will result in a normal page-in (instead of a swap-in) from the now
 516  * up-to-date disk file.
 517  */
 518 int filemap_swapout(struct vm_area_struct * vma,
     /*  */
 519         unsigned long offset,
 520         pte_t *page_table)
 521 {
 522         int error;
 523         unsigned long page = pte_page(*page_table);
 524         unsigned long entry = SWP_ENTRY(SHM_SWP_TYPE, MAP_NR(page));
 525 
 526         set_pte(page_table, __pte(entry));
 527         /* Yuck, perhaps a slightly modified swapout parameter set? */
 528         invalidate_page(vma, (offset + vma->vm_start - vma->vm_offset));
 529         error = filemap_write_page(vma, offset, page);
 530         if (pte_val(*page_table) == entry)
 531                 pte_clear(page_table);
 532         return error;
 533 }
 534 
 535 /*
 536  * filemap_swapin() is called only if we have something in the page
 537  * tables that is non-zero (but not present), which we know to be the
 538  * page index of a page that is busy being swapped out (see above).
 539  * So we just use it directly..
 540  */
 541 static pte_t filemap_swapin(struct vm_area_struct * vma,
     /*  */
 542         unsigned long offset,
 543         unsigned long entry)
 544 {
 545         unsigned long page = SWP_OFFSET(entry);
 546 
 547         mem_map[page].count++;
 548         page = (page << PAGE_SHIFT) + PAGE_OFFSET;
 549         return mk_pte(page,vma->vm_page_prot);
 550 }
 551 
 552 
 553 static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
     /*  */
 554         unsigned long address, unsigned int flags)
 555 {
 556         pte_t pte = *ptep;
 557         unsigned long page;
 558         int error;
 559 
 560         if (!(flags & MS_INVALIDATE)) {
 561                 if (!pte_present(pte))
 562                         return 0;
 563                 if (!pte_dirty(pte))
 564                         return 0;
 565                 set_pte(ptep, pte_mkclean(pte));
 566                 invalidate_page(vma, address);
 567                 page = pte_page(pte);
 568                 mem_map[MAP_NR(page)].count++;
 569         } else {
 570                 if (pte_none(pte))
 571                         return 0;
 572                 pte_clear(ptep);
 573                 invalidate_page(vma, address);
 574                 if (!pte_present(pte)) {
 575                         swap_free(pte_val(pte));
 576                         return 0;
 577                 }
 578                 page = pte_page(pte);
 579                 if (!pte_dirty(pte) || flags == MS_INVALIDATE) {
 580                         free_page(page);
 581                         return 0;
 582                 }
 583         }
 584         error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page);
 585         free_page(page);
 586         return error;
 587 }
 588 
 589 static inline int filemap_sync_pte_range(pmd_t * pmd,
     /*  */
 590         unsigned long address, unsigned long size, 
 591         struct vm_area_struct *vma, unsigned long offset, unsigned int flags)
 592 {
 593         pte_t * pte;
 594         unsigned long end;
 595         int error;
 596 
 597         if (pmd_none(*pmd))
 598                 return 0;
 599         if (pmd_bad(*pmd)) {
 600                 printk("filemap_sync_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
 601                 pmd_clear(pmd);
 602                 return 0;
 603         }
 604         pte = pte_offset(pmd, address);
 605         offset += address & PMD_MASK;
 606         address &= ~PMD_MASK;
 607         end = address + size;
 608         if (end > PMD_SIZE)
 609                 end = PMD_SIZE;
 610         error = 0;
 611         do {
 612                 error |= filemap_sync_pte(pte, vma, address + offset, flags);
 613                 address += PAGE_SIZE;
 614                 pte++;
 615         } while (address < end);
 616         return error;
 617 }
 618 
 619 static inline int filemap_sync_pmd_range(pgd_t * pgd,
     /*  */
 620         unsigned long address, unsigned long size, 
 621         struct vm_area_struct *vma, unsigned int flags)
 622 {
 623         pmd_t * pmd;
 624         unsigned long offset, end;
 625         int error;
 626 
 627         if (pgd_none(*pgd))
 628                 return 0;
 629         if (pgd_bad(*pgd)) {
 630                 printk("filemap_sync_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd));
 631                 pgd_clear(pgd);
 632                 return 0;
 633         }
 634         pmd = pmd_offset(pgd, address);
 635         offset = address & PMD_MASK;
 636         address &= ~PMD_MASK;
 637         end = address + size;
 638         if (end > PGDIR_SIZE)
 639                 end = PGDIR_SIZE;
 640         error = 0;
 641         do {
 642                 error |= filemap_sync_pte_range(pmd, address, end - address, vma, offset, flags);
 643                 address = (address + PMD_SIZE) & PMD_MASK;
 644                 pmd++;
 645         } while (address < end);
 646         return error;
 647 }
 648 
 649 static int filemap_sync(struct vm_area_struct * vma, unsigned long address,
     /*  */
 650         size_t size, unsigned int flags)
 651 {
 652         pgd_t * dir;
 653         unsigned long end = address + size;
 654         int error = 0;
 655 
 656         dir = pgd_offset(current->mm, address);
 657         while (address < end) {
 658                 error |= filemap_sync_pmd_range(dir, address, end - address, vma, flags);
 659                 address = (address + PGDIR_SIZE) & PGDIR_MASK;
 660                 dir++;
 661         }
 662         invalidate_range(vma->vm_mm, end - size, end);
 663         return error;
 664 }
 665 
 666 /*
 667  * This handles (potentially partial) area unmaps..
 668  */
 669 static void filemap_unmap(struct vm_area_struct *vma, unsigned long start, size_t len)
     /*  */
 670 {
 671         filemap_sync(vma, start, len, MS_ASYNC);
 672 }
 673 
 674 /*
 675  * Shared mappings need to be able to do the right thing at
 676  * close/unmap/sync. They will also use the private file as
 677  * backing-store for swapping..
 678  */
 679 static struct vm_operations_struct file_shared_mmap = {
 680         NULL,                   /* no special open */
 681         NULL,                   /* no special close */
 682         filemap_unmap,          /* unmap - we need to sync the pages */
 683         NULL,                   /* no special protect */
 684         filemap_sync,           /* sync */
 685         NULL,                   /* advise */
 686         filemap_nopage,         /* nopage */
 687         NULL,                   /* wppage */
 688         filemap_swapout,        /* swapout */
 689         filemap_swapin,         /* swapin */
 690 };
 691 
 692 /*
 693  * Private mappings just need to be able to load in the map.
 694  *
 695  * (This is actually used for shared mappings as well, if we
 696  * know they can't ever get write permissions..)
 697  */
 698 static struct vm_operations_struct file_private_mmap = {
 699         NULL,                   /* open */
 700         NULL,                   /* close */
 701         NULL,                   /* unmap */
 702         NULL,                   /* protect */
 703         NULL,                   /* sync */
 704         NULL,                   /* advise */
 705         filemap_nopage,         /* nopage */
 706         NULL,                   /* wppage */
 707         NULL,                   /* swapout */
 708         NULL,                   /* swapin */
 709 };
 710 
 711 /* This is used for a general mmap of a disk file */
 712 int generic_file_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma)
     /*  */
 713 {
 714         struct vm_operations_struct * ops;
 715 
 716         if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
 717                 ops = &file_shared_mmap;
 718                 /* share_page() can only guarantee proper page sharing if
 719                  * the offsets are all page aligned. */
 720                 if (vma->vm_offset & (PAGE_SIZE - 1))
 721                         return -EINVAL;
 722         } else {
 723                 ops = &file_private_mmap;
 724                 if (vma->vm_offset & (inode->i_sb->s_blocksize - 1))
 725                         return -EINVAL;
 726         }
 727         if (!inode->i_sb || !S_ISREG(inode->i_mode))
 728                 return -EACCES;
 729         if (!inode->i_op || !inode->i_op->readpage)
 730                 return -ENOEXEC;
 731         if (!IS_RDONLY(inode)) {
 732                 inode->i_atime = CURRENT_TIME;
 733                 inode->i_dirt = 1;
 734         }
 735         vma->vm_inode = inode;
 736         inode->i_count++;
 737         vma->vm_ops = ops;
 738         return 0;
 739 }
 740 
 741 
 742 /*
 743  * The msync() system call.
 744  */
 745 
 746 static int msync_interval(struct vm_area_struct * vma,
     /*  */
 747         unsigned long start, unsigned long end, int flags)
 748 {
 749         if (!vma->vm_inode)
 750                 return 0;
 751         if (vma->vm_ops->sync) {
 752                 int error;
 753                 error = vma->vm_ops->sync(vma, start, end-start, flags);
 754                 if (error)
 755                         return error;
 756                 if (flags & MS_SYNC)
 757                         return file_fsync(vma->vm_inode, NULL);
 758                 return 0;
 759         }
 760         return 0;
 761 }
 762 
 763 asmlinkage int sys_msync(unsigned long start, size_t len, int flags)
     /*  */
 764 {
 765         unsigned long end;
 766         struct vm_area_struct * vma;
 767         int unmapped_error, error;
 768 
 769         if (start & ~PAGE_MASK)
 770                 return -EINVAL;
 771         len = (len + ~PAGE_MASK) & PAGE_MASK;
 772         end = start + len;
 773         if (end < start)
 774                 return -EINVAL;
 775         if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
 776                 return -EINVAL;
 777         if (end == start)
 778                 return 0;
 779         /*
 780          * If the interval [start,end) covers some unmapped address ranges,
 781          * just ignore them, but return -EFAULT at the end.
 782          */
 783         vma = find_vma(current, start);
 784         unmapped_error = 0;
 785         for (;;) {
 786                 /* Still start < end. */
 787                 if (!vma)
 788                         return -EFAULT;
 789                 /* Here start < vma->vm_end. */
 790                 if (start < vma->vm_start) {
 791                         unmapped_error = -EFAULT;
 792                         start = vma->vm_start;
 793                 }
 794                 /* Here vma->vm_start <= start < vma->vm_end. */
 795                 if (end <= vma->vm_end) {
 796                         if (start < end) {
 797                                 error = msync_interval(vma, start, end, flags);
 798                                 if (error)
 799                                         return error;
 800                         }
 801                         return unmapped_error;
 802                 }
 803                 /* Here vma->vm_start <= start < vma->vm_end < end. */
 804                 error = msync_interval(vma, start, vma->vm_end, flags);
 805                 if (error)
 806                         return error;
 807                 start = vma->vm_end;
 808                 vma = vma->vm_next;
 809         }
 810 }
/* */
root/mm/filemap.c

DEFINITIONS