root/mm/filemap.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. invalidate_inode_pages
  2. shrink_mmap
  3. page_unuse
  4. readpage
  5. filemap_nopage
  6. filemap_write_page
  7. filemap_swapout
  8. filemap_swapin
  9. filemap_sync_pte
  10. filemap_sync_pte_range
  11. filemap_sync_pmd_range
  12. filemap_sync
  13. filemap_unmap
  14. generic_mmap
  15. msync_interval
  16. sys_msync

   1 /*
   2  *      linux/mm/filemap.c
   3  *
   4  * Copyright (C) 1994, 1995  Linus Torvalds
   5  */
   6 
   7 /*
   8  * This file handles the generic file mmap semantics used by
   9  * most "normal" filesystems (but you don't /have/ to use this:
  10  * the NFS filesystem does this differently, for example)
  11  */
  12 #include <linux/stat.h>
  13 #include <linux/sched.h>
  14 #include <linux/kernel.h>
  15 #include <linux/mm.h>
  16 #include <linux/shm.h>
  17 #include <linux/errno.h>
  18 #include <linux/mman.h>
  19 #include <linux/string.h>
  20 #include <linux/malloc.h>
  21 #include <linux/fs.h>
  22 #include <linux/locks.h>
  23 #include <linux/pagemap.h>
  24 
  25 #include <asm/segment.h>
  26 #include <asm/system.h>
  27 #include <asm/pgtable.h>
  28 
  29 /*
  30  * Shared mappings implemented 30.11.1994. It's not fully working yet,
  31  * though.
  32  *
  33  * Shared mappings now work. 15.8.1995  Bruno.
  34  */
  35 
  36 unsigned long page_cache_size = 0;
  37 struct page * page_hash_table[PAGE_HASH_SIZE];
  38 
  39 /*
  40  * Simple routines for both non-shared and shared mappings.
  41  */
  42 
  43 void invalidate_inode_pages(struct inode * inode, unsigned long start)
     /* [previous][next][first][last][top][bottom][index][help] */
  44 {
  45         struct page ** p = &inode->i_pages;
  46         struct page * page;
  47 
  48         while ((page = *p) != NULL) {
  49                 unsigned long offset = page->offset;
  50 
  51                 /* page wholly truncated - free it */
  52                 if (offset >= start) {
  53                         inode->i_nrpages--;
  54                         if ((*p = page->next) != NULL)
  55                                 (*p)->prev = page->prev;
  56                         page->dirty = 0;
  57                         page->next = NULL;
  58                         page->prev = NULL;
  59                         remove_page_from_hash_queue(page);
  60                         page->inode = NULL;
  61                         free_page(page_address(page));
  62                         continue;
  63                 }
  64                 p = &page->next;
  65                 offset = start - offset;
  66                 /* partial truncate, clear end of page */
  67                 if (offset < PAGE_SIZE)
  68                         memset((void *) (offset + page_address(page)), 0, PAGE_SIZE - offset);
  69         }
  70 }
  71 
  72 int shrink_mmap(int priority, unsigned long limit)
     /* [previous][next][first][last][top][bottom][index][help] */
  73 {
  74         static int clock = 0;
  75         struct page * page;
  76 
  77         if (limit > high_memory)
  78                 limit = high_memory;
  79         limit = MAP_NR(limit);
  80         if (clock >= limit)
  81                 clock = 0;
  82         priority = (limit<<2) >> priority;
  83         page = mem_map + clock;
  84         while (priority-- > 0) {
  85                 if (page->inode) {
  86                         unsigned age = page->age;
  87                         /* if the page is shared, we juvenate it slightly */
  88                         if (page->count != 1)
  89                                 age |= PAGE_AGE_VALUE;
  90                         page->age = age >> 1;
  91                         if (age <= PAGE_AGE_VALUE/2) {
  92                                 remove_page_from_hash_queue(page);
  93                                 remove_page_from_inode_queue(page);
  94                                 free_page(page_address(page));
  95                                 return 1;
  96                         }
  97                 }
  98                 page++;
  99                 clock++;
 100                 if (clock >= limit) {
 101                         clock = 0;
 102                         page = mem_map;
 103                 }
 104         }
 105         return 0;
 106 }
 107 
 108 /*
 109  * This is called from try_to_swap_out() when we try to egt rid of some
 110  * pages..  If we're unmapping the last occurrence of this page, we also
 111  * free it from the page hash-queues etc, as we don't want to keep it
 112  * in-core unnecessarily.
 113  */
 114 unsigned long page_unuse(unsigned long page)
     /* [previous][next][first][last][top][bottom][index][help] */
 115 {
 116         struct page * p = mem_map + MAP_NR(page);
 117         int count = p->count;
 118 
 119         if (count != 2)
 120                 return count;
 121         if (!p->inode)
 122                 return count;
 123         remove_page_from_hash_queue(p);
 124         remove_page_from_inode_queue(p);
 125         free_page(page);
 126         return 1;
 127 }
 128 
 129 /*
 130  * This should be a low-level fs-specific function (ie
 131  * inode->i_op->readpage).
 132  */
 133 static int readpage(struct inode * inode, unsigned long offset, char * page)
     /* [previous][next][first][last][top][bottom][index][help] */
 134 {
 135         int *p, nr[PAGE_SIZE/512];
 136         int i;
 137 
 138         i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
 139         offset >>= inode->i_sb->s_blocksize_bits;
 140         p = nr;
 141         do {
 142                 *p = inode->i_op->bmap(inode, offset);
 143                 i--;
 144                 offset++;
 145                 p++;
 146         } while (i > 0);
 147         bread_page((unsigned long) page, inode->i_dev, nr, inode->i_sb->s_blocksize);
 148         return 0;
 149 }
 150 
 151 /*
 152  * Semantics for shared and private memory areas are different past the end
 153  * of the file. A shared mapping past the last page of the file is an error
 154  * and results in a SIBGUS, while a private mapping just maps in a zero page.
 155  */
 156 static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 157         unsigned long page, int no_share)
 158 {
 159         struct inode * inode = area->vm_inode;
 160         unsigned long new_page, old_page;
 161         struct page *p;
 162 
 163         address = (address & PAGE_MASK) - area->vm_start + area->vm_offset;
 164         if (address >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm)
 165                 send_sig(SIGBUS, current, 1);
 166         p = find_page(inode, address);
 167         if (p)
 168                 goto old_page_exists;
 169         new_page = 0;
 170         if (no_share) {
 171                 new_page = __get_free_page(GFP_USER);
 172                 if (!new_page) {
 173                         oom(current);
 174                         return page;
 175                 }
 176         }
 177         /* inode->i_op-> */ readpage(inode, address, (char *) page);
 178         p = find_page(inode, address);
 179         if (p)
 180                 goto old_and_new_page_exists;
 181         p = mem_map + MAP_NR(page);
 182         p->offset = address;
 183         add_page_to_inode_queue(inode, p);
 184         add_page_to_hash_queue(inode, p);
 185         if (new_page) {
 186                 memcpy((void *) new_page, (void *) page, PAGE_SIZE);
 187                 return new_page;
 188         }
 189         p->count++;
 190         return page;
 191 
 192 old_and_new_page_exists:
 193         if (new_page)
 194                 free_page(new_page);
 195 old_page_exists:
 196         old_page = page_address(p);
 197         if (no_share) {
 198                 memcpy((void *) page, (void *) old_page, PAGE_SIZE);
 199                 return page;
 200         }
 201         p->count++;
 202         free_page(page);
 203         return old_page;
 204 }
 205 
 206 /*
 207  * Tries to write a shared mapped page to its backing store. May return -EIO
 208  * if the disk is full.
 209  */
 210 static int filemap_write_page(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 211         unsigned long offset,
 212         unsigned long page)
 213 {
 214         int old_fs;
 215         unsigned long size, result;
 216         struct file file;
 217         struct inode * inode;
 218         struct buffer_head * bh;
 219 
 220         bh = buffer_pages[MAP_NR(page)];
 221         if (bh) {
 222                 /* whee.. just mark the buffer heads dirty */
 223                 struct buffer_head * tmp = bh;
 224                 do {
 225                         mark_buffer_dirty(tmp, 0);
 226                         tmp = tmp->b_this_page;
 227                 } while (tmp != bh);
 228                 return 0;
 229         }
 230 
 231         inode = vma->vm_inode;
 232         file.f_op = inode->i_op->default_file_ops;
 233         if (!file.f_op->write)
 234                 return -EIO;
 235         size = offset + PAGE_SIZE;
 236         /* refuse to extend file size.. */
 237         if (S_ISREG(inode->i_mode)) {
 238                 if (size > inode->i_size)
 239                         size = inode->i_size;
 240                 /* Ho humm.. We should have tested for this earlier */
 241                 if (size < offset)
 242                         return -EIO;
 243         }
 244         size -= offset;
 245         file.f_mode = 3;
 246         file.f_flags = 0;
 247         file.f_count = 1;
 248         file.f_inode = inode;
 249         file.f_pos = offset;
 250         file.f_reada = 0;
 251         old_fs = get_fs();
 252         set_fs(KERNEL_DS);
 253         result = file.f_op->write(inode, &file, (const char *) page, size);
 254         set_fs(old_fs);
 255         if (result != size)
 256                 return -EIO;
 257         return 0;
 258 }
 259 
 260 
 261 /*
 262  * Swapping to a shared file: while we're busy writing out the page
 263  * (and the page still exists in memory), we save the page information
 264  * in the page table, so that "filemap_swapin()" can re-use the page
 265  * immediately if it is called while we're busy swapping it out..
 266  *
 267  * Once we've written it all out, we mark the page entry "empty", which
 268  * will result in a normal page-in (instead of a swap-in) from the now
 269  * up-to-date disk file.
 270  */
 271 int filemap_swapout(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 272         unsigned long offset,
 273         pte_t *page_table)
 274 {
 275         int error;
 276         unsigned long page = pte_page(*page_table);
 277         unsigned long entry = SWP_ENTRY(SHM_SWP_TYPE, MAP_NR(page));
 278 
 279         set_pte(page_table, __pte(entry));
 280         /* Yuck, perhaps a slightly modified swapout parameter set? */
 281         invalidate_page(vma, (offset + vma->vm_start - vma->vm_offset));
 282         error = filemap_write_page(vma, offset, page);
 283         if (pte_val(*page_table) == entry)
 284                 pte_clear(page_table);
 285         return error;
 286 }
 287 
 288 /*
 289  * filemap_swapin() is called only if we have something in the page
 290  * tables that is non-zero (but not present), which we know to be the
 291  * page index of a page that is busy being swapped out (see above).
 292  * So we just use it directly..
 293  */
 294 static pte_t filemap_swapin(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 295         unsigned long offset,
 296         unsigned long entry)
 297 {
 298         unsigned long page = SWP_OFFSET(entry);
 299 
 300         mem_map[page].count++;
 301         page = (page << PAGE_SHIFT) + PAGE_OFFSET;
 302         return mk_pte(page,vma->vm_page_prot);
 303 }
 304 
 305 
 306 static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 307         unsigned long address, unsigned int flags)
 308 {
 309         pte_t pte = *ptep;
 310         unsigned long page;
 311         int error;
 312 
 313         if (!(flags & MS_INVALIDATE)) {
 314                 if (!pte_present(pte))
 315                         return 0;
 316                 if (!pte_dirty(pte))
 317                         return 0;
 318                 set_pte(ptep, pte_mkclean(pte));
 319                 invalidate_page(vma, address);
 320                 page = pte_page(pte);
 321                 mem_map[MAP_NR(page)].count++;
 322         } else {
 323                 if (pte_none(pte))
 324                         return 0;
 325                 pte_clear(ptep);
 326                 invalidate_page(vma, address);
 327                 if (!pte_present(pte)) {
 328                         swap_free(pte_val(pte));
 329                         return 0;
 330                 }
 331                 page = pte_page(pte);
 332                 if (!pte_dirty(pte) || flags == MS_INVALIDATE) {
 333                         free_page(page);
 334                         return 0;
 335                 }
 336         }
 337         error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page);
 338         free_page(page);
 339         return error;
 340 }
 341 
 342 static inline int filemap_sync_pte_range(pmd_t * pmd,
     /* [previous][next][first][last][top][bottom][index][help] */
 343         unsigned long address, unsigned long size, 
 344         struct vm_area_struct *vma, unsigned long offset, unsigned int flags)
 345 {
 346         pte_t * pte;
 347         unsigned long end;
 348         int error;
 349 
 350         if (pmd_none(*pmd))
 351                 return 0;
 352         if (pmd_bad(*pmd)) {
 353                 printk("filemap_sync_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
 354                 pmd_clear(pmd);
 355                 return 0;
 356         }
 357         pte = pte_offset(pmd, address);
 358         offset += address & PMD_MASK;
 359         address &= ~PMD_MASK;
 360         end = address + size;
 361         if (end > PMD_SIZE)
 362                 end = PMD_SIZE;
 363         error = 0;
 364         do {
 365                 error |= filemap_sync_pte(pte, vma, address + offset, flags);
 366                 address += PAGE_SIZE;
 367                 pte++;
 368         } while (address < end);
 369         return error;
 370 }
 371 
 372 static inline int filemap_sync_pmd_range(pgd_t * pgd,
     /* [previous][next][first][last][top][bottom][index][help] */
 373         unsigned long address, unsigned long size, 
 374         struct vm_area_struct *vma, unsigned int flags)
 375 {
 376         pmd_t * pmd;
 377         unsigned long offset, end;
 378         int error;
 379 
 380         if (pgd_none(*pgd))
 381                 return 0;
 382         if (pgd_bad(*pgd)) {
 383                 printk("filemap_sync_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd));
 384                 pgd_clear(pgd);
 385                 return 0;
 386         }
 387         pmd = pmd_offset(pgd, address);
 388         offset = address & PMD_MASK;
 389         address &= ~PMD_MASK;
 390         end = address + size;
 391         if (end > PGDIR_SIZE)
 392                 end = PGDIR_SIZE;
 393         error = 0;
 394         do {
 395                 error |= filemap_sync_pte_range(pmd, address, end - address, vma, offset, flags);
 396                 address = (address + PMD_SIZE) & PMD_MASK;
 397                 pmd++;
 398         } while (address < end);
 399         return error;
 400 }
 401 
 402 static int filemap_sync(struct vm_area_struct * vma, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 403         size_t size, unsigned int flags)
 404 {
 405         pgd_t * dir;
 406         unsigned long end = address + size;
 407         int error = 0;
 408 
 409         dir = pgd_offset(current->mm, address);
 410         while (address < end) {
 411                 error |= filemap_sync_pmd_range(dir, address, end - address, vma, flags);
 412                 address = (address + PGDIR_SIZE) & PGDIR_MASK;
 413                 dir++;
 414         }
 415         invalidate_range(vma->vm_mm, end - size, end);
 416         return error;
 417 }
 418 
 419 /*
 420  * This handles (potentially partial) area unmaps..
 421  */
 422 static void filemap_unmap(struct vm_area_struct *vma, unsigned long start, size_t len)
     /* [previous][next][first][last][top][bottom][index][help] */
 423 {
 424         filemap_sync(vma, start, len, MS_ASYNC);
 425 }
 426 
 427 /*
 428  * Shared mappings need to be able to do the right thing at
 429  * close/unmap/sync. They will also use the private file as
 430  * backing-store for swapping..
 431  */
 432 static struct vm_operations_struct file_shared_mmap = {
 433         NULL,                   /* no special open */
 434         NULL,                   /* no special close */
 435         filemap_unmap,          /* unmap - we need to sync the pages */
 436         NULL,                   /* no special protect */
 437         filemap_sync,           /* sync */
 438         NULL,                   /* advise */
 439         filemap_nopage,         /* nopage */
 440         NULL,                   /* wppage */
 441         filemap_swapout,        /* swapout */
 442         filemap_swapin,         /* swapin */
 443 };
 444 
 445 /*
 446  * Private mappings just need to be able to load in the map.
 447  *
 448  * (This is actually used for shared mappings as well, if we
 449  * know they can't ever get write permissions..)
 450  */
 451 static struct vm_operations_struct file_private_mmap = {
 452         NULL,                   /* open */
 453         NULL,                   /* close */
 454         NULL,                   /* unmap */
 455         NULL,                   /* protect */
 456         NULL,                   /* sync */
 457         NULL,                   /* advise */
 458         filemap_nopage,         /* nopage */
 459         NULL,                   /* wppage */
 460         NULL,                   /* swapout */
 461         NULL,                   /* swapin */
 462 };
 463 
 464 /* This is used for a general mmap of a disk file */
 465 int generic_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma)
     /* [previous][next][first][last][top][bottom][index][help] */
 466 {
 467         struct vm_operations_struct * ops;
 468 
 469         if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
 470                 ops = &file_shared_mmap;
 471                 /* share_page() can only guarantee proper page sharing if
 472                  * the offsets are all page aligned. */
 473                 if (vma->vm_offset & (PAGE_SIZE - 1))
 474                         return -EINVAL;
 475         } else {
 476                 ops = &file_private_mmap;
 477                 if (vma->vm_offset & (inode->i_sb->s_blocksize - 1))
 478                         return -EINVAL;
 479         }
 480         if (!inode->i_sb || !S_ISREG(inode->i_mode))
 481                 return -EACCES;
 482         if (!inode->i_op || !inode->i_op->bmap)
 483                 return -ENOEXEC;
 484         if (!IS_RDONLY(inode)) {
 485                 inode->i_atime = CURRENT_TIME;
 486                 inode->i_dirt = 1;
 487         }
 488         vma->vm_inode = inode;
 489         inode->i_count++;
 490         vma->vm_ops = ops;
 491         return 0;
 492 }
 493 
 494 
 495 /*
 496  * The msync() system call.
 497  */
 498 
 499 static int msync_interval(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 500         unsigned long start, unsigned long end, int flags)
 501 {
 502         if (!vma->vm_inode)
 503                 return 0;
 504         if (vma->vm_ops->sync) {
 505                 int error;
 506                 error = vma->vm_ops->sync(vma, start, end-start, flags);
 507                 if (error)
 508                         return error;
 509                 if (flags & MS_SYNC)
 510                         return file_fsync(vma->vm_inode, NULL);
 511                 return 0;
 512         }
 513         return 0;
 514 }
 515 
 516 asmlinkage int sys_msync(unsigned long start, size_t len, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
 517 {
 518         unsigned long end;
 519         struct vm_area_struct * vma;
 520         int unmapped_error, error;
 521 
 522         if (start & ~PAGE_MASK)
 523                 return -EINVAL;
 524         len = (len + ~PAGE_MASK) & PAGE_MASK;
 525         end = start + len;
 526         if (end < start)
 527                 return -EINVAL;
 528         if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
 529                 return -EINVAL;
 530         if (end == start)
 531                 return 0;
 532         /*
 533          * If the interval [start,end) covers some unmapped address ranges,
 534          * just ignore them, but return -EFAULT at the end.
 535          */
 536         vma = find_vma(current, start);
 537         unmapped_error = 0;
 538         for (;;) {
 539                 /* Still start < end. */
 540                 if (!vma)
 541                         return -EFAULT;
 542                 /* Here start < vma->vm_end. */
 543                 if (start < vma->vm_start) {
 544                         unmapped_error = -EFAULT;
 545                         start = vma->vm_start;
 546                 }
 547                 /* Here vma->vm_start <= start < vma->vm_end. */
 548                 if (end <= vma->vm_end) {
 549                         if (start < end) {
 550                                 error = msync_interval(vma, start, end, flags);
 551                                 if (error)
 552                                         return error;
 553                         }
 554                         return unmapped_error;
 555                 }
 556                 /* Here vma->vm_start <= start < vma->vm_end < end. */
 557                 error = msync_interval(vma, start, vma->vm_end, flags);
 558                 if (error)
 559                         return error;
 560                 start = vma->vm_end;
 561                 vma = vma->vm_next;
 562         }
 563 }

/* [previous][next][first][last][top][bottom][index][help] */