root/mm/filemap.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. invalidate_inode_pages
  2. shrink_mmap
  3. page_unuse
  4. readpage
  5. filemap_nopage
  6. filemap_write_page
  7. filemap_swapout
  8. filemap_swapin
  9. filemap_sync_pte
  10. filemap_sync_pte_range
  11. filemap_sync_pmd_range
  12. filemap_sync
  13. filemap_unmap
  14. generic_mmap
  15. msync_interval
  16. sys_msync

   1 /*
   2  *      linux/mm/filemap.c
   3  *
   4  * Copyright (C) 1994, 1995  Linus Torvalds
   5  */
   6 
   7 /*
   8  * This file handles the generic file mmap semantics used by
   9  * most "normal" filesystems (but you don't /have/ to use this:
  10  * the NFS filesystem does this differently, for example)
  11  */
  12 #include <linux/stat.h>
  13 #include <linux/sched.h>
  14 #include <linux/kernel.h>
  15 #include <linux/mm.h>
  16 #include <linux/shm.h>
  17 #include <linux/errno.h>
  18 #include <linux/mman.h>
  19 #include <linux/string.h>
  20 #include <linux/malloc.h>
  21 #include <linux/fs.h>
  22 #include <linux/locks.h>
  23 #include <linux/pagemap.h>
  24 
  25 #include <asm/segment.h>
  26 #include <asm/system.h>
  27 #include <asm/pgtable.h>
  28 
  29 /*
  30  * Shared mappings implemented 30.11.1994. It's not fully working yet,
  31  * though.
  32  *
  33  * Shared mappings now work. 15.8.1995  Bruno.
  34  */
  35 
  36 struct page * page_hash_table[PAGE_HASH_SIZE];
  37 
  38 /*
  39  * Simple routines for both non-shared and shared mappings.
  40  */
  41 
  42 void invalidate_inode_pages(struct inode * inode, unsigned long start)
     /* [previous][next][first][last][top][bottom][index][help] */
  43 {
  44         struct page ** p = &inode->i_pages;
  45         struct page * page;
  46 
  47         while ((page = *p) != NULL) {
  48                 unsigned long offset = page->offset;
  49 
  50                 /* page wholly truncated - free it */
  51                 if (offset >= start) {
  52                         inode->i_nrpages--;
  53                         if ((*p = page->next) != NULL)
  54                                 (*p)->prev = page->prev;
  55                         page->dirty = 0;
  56                         page->next = NULL;
  57                         page->prev = NULL;
  58                         remove_page_from_hash_queue(page);
  59                         page->inode = NULL;
  60                         free_page(page_address(page));
  61                         continue;
  62                 }
  63                 p = &page->next;
  64                 offset = start - offset;
  65                 /* partial truncate, clear end of page */
  66                 if (offset < PAGE_SIZE)
  67                         memset((void *) (offset + page_address(page)), 0, PAGE_SIZE - offset);
  68         }
  69 }
  70 
  71 int shrink_mmap(int priority, unsigned long limit)
     /* [previous][next][first][last][top][bottom][index][help] */
  72 {
  73         static int clock = 0;
  74         struct page * page;
  75 
  76         if (limit > high_memory)
  77                 limit = high_memory;
  78         limit = MAP_NR(limit);
  79         if (clock >= limit)
  80                 clock = 0;
  81         priority = limit >> priority;
  82         page = mem_map + clock;
  83         while (priority-- > 0) {
  84                 if (page->inode && page->count == 1) {
  85                         remove_page_from_hash_queue(page);
  86                         remove_page_from_inode_queue(page);
  87                         free_page(page_address(page));
  88                         return 1;
  89                 }
  90                 page++;
  91                 clock++;
  92                 if (clock >= limit) {
  93                         clock = 0;
  94                         page = mem_map;
  95                 }
  96         }
  97         return 0;
  98 }
  99 
 100 /*
 101  * This is called from try_to_swap_out() when we try to egt rid of some
 102  * pages..  If we're unmapping the last occurrence of this page, we also
 103  * free it from the page hash-queues etc, as we don't want to keep it
 104  * in-core unnecessarily.
 105  */
 106 unsigned long page_unuse(unsigned long page)
     /* [previous][next][first][last][top][bottom][index][help] */
 107 {
 108         struct page * p = mem_map + MAP_NR(page);
 109         int count = p->count;
 110 
 111         if (count != 2)
 112                 return count;
 113         if (!p->inode)
 114                 return count;
 115         remove_page_from_hash_queue(p);
 116         remove_page_from_inode_queue(p);
 117         free_page(page);
 118         return 1;
 119 }
 120 
 121 /*
 122  * This should be a low-level fs-specific function (ie
 123  * inode->i_op->readpage).
 124  */
 125 static int readpage(struct inode * inode, unsigned long offset, char * page)
     /* [previous][next][first][last][top][bottom][index][help] */
 126 {
 127         int *p, nr[PAGE_SIZE/512];
 128         int i;
 129 
 130         i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
 131         offset >>= inode->i_sb->s_blocksize_bits;
 132         p = nr;
 133         do {
 134                 *p = inode->i_op->bmap(inode, offset);
 135                 i--;
 136                 offset++;
 137                 p++;
 138         } while (i > 0);
 139         bread_page((unsigned long) page, inode->i_dev, nr, inode->i_sb->s_blocksize);
 140         return 0;
 141 }
 142 
 143 /*
 144  * Semantics for shared and private memory areas are different past the end
 145  * of the file. A shared mapping past the last page of the file is an error
 146  * and results in a SIBGUS, while a private mapping just maps in a zero page.
 147  */
 148 static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 149         unsigned long page, int no_share)
 150 {
 151         struct inode * inode = area->vm_inode;
 152         unsigned long new_page, old_page;
 153         struct page *p;
 154 
 155         address = (address & PAGE_MASK) - area->vm_start + area->vm_offset;
 156         if (address >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm)
 157                 send_sig(SIGBUS, current, 1);
 158         p = find_page(inode, address);
 159         if (p)
 160                 goto old_page_exists;
 161         new_page = 0;
 162         if (no_share) {
 163                 new_page = __get_free_page(GFP_USER);
 164                 if (!new_page) {
 165                         oom(current);
 166                         return page;
 167                 }
 168         }
 169         /* inode->i_op-> */ readpage(inode, address, (char *) page);
 170         p = find_page(inode, address);
 171         if (p)
 172                 goto old_and_new_page_exists;
 173         p = mem_map + MAP_NR(page);
 174         p->offset = address;
 175         add_page_to_inode_queue(inode, p);
 176         add_page_to_hash_queue(inode, p);
 177         if (new_page) {
 178                 memcpy((void *) new_page, (void *) page, PAGE_SIZE);
 179                 return new_page;
 180         }
 181         p->count++;
 182         return page;
 183 
 184 old_and_new_page_exists:
 185         if (new_page)
 186                 free_page(new_page);
 187 old_page_exists:
 188         old_page = page_address(p);
 189         if (no_share) {
 190                 memcpy((void *) page, (void *) old_page, PAGE_SIZE);
 191                 return page;
 192         }
 193         p->count++;
 194         free_page(page);
 195         return old_page;
 196 }
 197 
 198 /*
 199  * Tries to write a shared mapped page to its backing store. May return -EIO
 200  * if the disk is full.
 201  */
 202 static int filemap_write_page(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 203         unsigned long offset,
 204         unsigned long page)
 205 {
 206         int old_fs;
 207         unsigned long size, result;
 208         struct file file;
 209         struct inode * inode;
 210         struct buffer_head * bh;
 211 
 212         bh = buffer_pages[MAP_NR(page)];
 213         if (bh) {
 214                 /* whee.. just mark the buffer heads dirty */
 215                 struct buffer_head * tmp = bh;
 216                 do {
 217                         mark_buffer_dirty(tmp, 0);
 218                         tmp = tmp->b_this_page;
 219                 } while (tmp != bh);
 220                 return 0;
 221         }
 222 
 223         inode = vma->vm_inode;
 224         file.f_op = inode->i_op->default_file_ops;
 225         if (!file.f_op->write)
 226                 return -EIO;
 227         size = offset + PAGE_SIZE;
 228         /* refuse to extend file size.. */
 229         if (S_ISREG(inode->i_mode)) {
 230                 if (size > inode->i_size)
 231                         size = inode->i_size;
 232                 /* Ho humm.. We should have tested for this earlier */
 233                 if (size < offset)
 234                         return -EIO;
 235         }
 236         size -= offset;
 237         file.f_mode = 3;
 238         file.f_flags = 0;
 239         file.f_count = 1;
 240         file.f_inode = inode;
 241         file.f_pos = offset;
 242         file.f_reada = 0;
 243         old_fs = get_fs();
 244         set_fs(KERNEL_DS);
 245         result = file.f_op->write(inode, &file, (const char *) page, size);
 246         set_fs(old_fs);
 247         if (result != size)
 248                 return -EIO;
 249         return 0;
 250 }
 251 
 252 
 253 /*
 254  * Swapping to a shared file: while we're busy writing out the page
 255  * (and the page still exists in memory), we save the page information
 256  * in the page table, so that "filemap_swapin()" can re-use the page
 257  * immediately if it is called while we're busy swapping it out..
 258  *
 259  * Once we've written it all out, we mark the page entry "empty", which
 260  * will result in a normal page-in (instead of a swap-in) from the now
 261  * up-to-date disk file.
 262  */
 263 int filemap_swapout(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 264         unsigned long offset,
 265         pte_t *page_table)
 266 {
 267         int error;
 268         unsigned long page = pte_page(*page_table);
 269         unsigned long entry = SWP_ENTRY(SHM_SWP_TYPE, MAP_NR(page));
 270 
 271         set_pte(page_table, __pte(entry));
 272         /* Yuck, perhaps a slightly modified swapout parameter set? */
 273         invalidate_page(vma, (offset + vma->vm_start - vma->vm_offset));
 274         error = filemap_write_page(vma, offset, page);
 275         if (pte_val(*page_table) == entry)
 276                 pte_clear(page_table);
 277         return error;
 278 }
 279 
 280 /*
 281  * filemap_swapin() is called only if we have something in the page
 282  * tables that is non-zero (but not present), which we know to be the
 283  * page index of a page that is busy being swapped out (see above).
 284  * So we just use it directly..
 285  */
 286 static pte_t filemap_swapin(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 287         unsigned long offset,
 288         unsigned long entry)
 289 {
 290         unsigned long page = SWP_OFFSET(entry);
 291 
 292         mem_map[page].count++;
 293         page = (page << PAGE_SHIFT) + PAGE_OFFSET;
 294         return mk_pte(page,vma->vm_page_prot);
 295 }
 296 
 297 
 298 static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 299         unsigned long address, unsigned int flags)
 300 {
 301         pte_t pte = *ptep;
 302         unsigned long page;
 303         int error;
 304 
 305         if (!(flags & MS_INVALIDATE)) {
 306                 if (!pte_present(pte))
 307                         return 0;
 308                 if (!pte_dirty(pte))
 309                         return 0;
 310                 set_pte(ptep, pte_mkclean(pte));
 311                 invalidate_page(vma, address);
 312                 page = pte_page(pte);
 313                 mem_map[MAP_NR(page)].count++;
 314         } else {
 315                 if (pte_none(pte))
 316                         return 0;
 317                 pte_clear(ptep);
 318                 invalidate_page(vma, address);
 319                 if (!pte_present(pte)) {
 320                         swap_free(pte_val(pte));
 321                         return 0;
 322                 }
 323                 page = pte_page(pte);
 324                 if (!pte_dirty(pte) || flags == MS_INVALIDATE) {
 325                         free_page(page);
 326                         return 0;
 327                 }
 328         }
 329         error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page);
 330         free_page(page);
 331         return error;
 332 }
 333 
 334 static inline int filemap_sync_pte_range(pmd_t * pmd,
     /* [previous][next][first][last][top][bottom][index][help] */
 335         unsigned long address, unsigned long size, 
 336         struct vm_area_struct *vma, unsigned long offset, unsigned int flags)
 337 {
 338         pte_t * pte;
 339         unsigned long end;
 340         int error;
 341 
 342         if (pmd_none(*pmd))
 343                 return 0;
 344         if (pmd_bad(*pmd)) {
 345                 printk("filemap_sync_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
 346                 pmd_clear(pmd);
 347                 return 0;
 348         }
 349         pte = pte_offset(pmd, address);
 350         offset += address & PMD_MASK;
 351         address &= ~PMD_MASK;
 352         end = address + size;
 353         if (end > PMD_SIZE)
 354                 end = PMD_SIZE;
 355         error = 0;
 356         do {
 357                 error |= filemap_sync_pte(pte, vma, address + offset, flags);
 358                 address += PAGE_SIZE;
 359                 pte++;
 360         } while (address < end);
 361         return error;
 362 }
 363 
 364 static inline int filemap_sync_pmd_range(pgd_t * pgd,
     /* [previous][next][first][last][top][bottom][index][help] */
 365         unsigned long address, unsigned long size, 
 366         struct vm_area_struct *vma, unsigned int flags)
 367 {
 368         pmd_t * pmd;
 369         unsigned long offset, end;
 370         int error;
 371 
 372         if (pgd_none(*pgd))
 373                 return 0;
 374         if (pgd_bad(*pgd)) {
 375                 printk("filemap_sync_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd));
 376                 pgd_clear(pgd);
 377                 return 0;
 378         }
 379         pmd = pmd_offset(pgd, address);
 380         offset = address & PMD_MASK;
 381         address &= ~PMD_MASK;
 382         end = address + size;
 383         if (end > PGDIR_SIZE)
 384                 end = PGDIR_SIZE;
 385         error = 0;
 386         do {
 387                 error |= filemap_sync_pte_range(pmd, address, end - address, vma, offset, flags);
 388                 address = (address + PMD_SIZE) & PMD_MASK;
 389                 pmd++;
 390         } while (address < end);
 391         return error;
 392 }
 393 
 394 static int filemap_sync(struct vm_area_struct * vma, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 395         size_t size, unsigned int flags)
 396 {
 397         pgd_t * dir;
 398         unsigned long end = address + size;
 399         int error = 0;
 400 
 401         dir = pgd_offset(current->mm, address);
 402         while (address < end) {
 403                 error |= filemap_sync_pmd_range(dir, address, end - address, vma, flags);
 404                 address = (address + PGDIR_SIZE) & PGDIR_MASK;
 405                 dir++;
 406         }
 407         invalidate_range(vma->vm_mm, end - size, end);
 408         return error;
 409 }
 410 
 411 /*
 412  * This handles (potentially partial) area unmaps..
 413  */
 414 static void filemap_unmap(struct vm_area_struct *vma, unsigned long start, size_t len)
     /* [previous][next][first][last][top][bottom][index][help] */
 415 {
 416         filemap_sync(vma, start, len, MS_ASYNC);
 417 }
 418 
 419 /*
 420  * Shared mappings need to be able to do the right thing at
 421  * close/unmap/sync. They will also use the private file as
 422  * backing-store for swapping..
 423  */
 424 static struct vm_operations_struct file_shared_mmap = {
 425         NULL,                   /* no special open */
 426         NULL,                   /* no special close */
 427         filemap_unmap,          /* unmap - we need to sync the pages */
 428         NULL,                   /* no special protect */
 429         filemap_sync,           /* sync */
 430         NULL,                   /* advise */
 431         filemap_nopage,         /* nopage */
 432         NULL,                   /* wppage */
 433         filemap_swapout,        /* swapout */
 434         filemap_swapin,         /* swapin */
 435 };
 436 
 437 /*
 438  * Private mappings just need to be able to load in the map.
 439  *
 440  * (This is actually used for shared mappings as well, if we
 441  * know they can't ever get write permissions..)
 442  */
 443 static struct vm_operations_struct file_private_mmap = {
 444         NULL,                   /* open */
 445         NULL,                   /* close */
 446         NULL,                   /* unmap */
 447         NULL,                   /* protect */
 448         NULL,                   /* sync */
 449         NULL,                   /* advise */
 450         filemap_nopage,         /* nopage */
 451         NULL,                   /* wppage */
 452         NULL,                   /* swapout */
 453         NULL,                   /* swapin */
 454 };
 455 
 456 /* This is used for a general mmap of a disk file */
 457 int generic_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma)
     /* [previous][next][first][last][top][bottom][index][help] */
 458 {
 459         struct vm_operations_struct * ops;
 460 
 461         if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
 462                 ops = &file_shared_mmap;
 463                 /* share_page() can only guarantee proper page sharing if
 464                  * the offsets are all page aligned. */
 465                 if (vma->vm_offset & (PAGE_SIZE - 1))
 466                         return -EINVAL;
 467         } else {
 468                 ops = &file_private_mmap;
 469                 if (vma->vm_offset & (inode->i_sb->s_blocksize - 1))
 470                         return -EINVAL;
 471         }
 472         if (!inode->i_sb || !S_ISREG(inode->i_mode))
 473                 return -EACCES;
 474         if (!inode->i_op || !inode->i_op->bmap)
 475                 return -ENOEXEC;
 476         if (!IS_RDONLY(inode)) {
 477                 inode->i_atime = CURRENT_TIME;
 478                 inode->i_dirt = 1;
 479         }
 480         vma->vm_inode = inode;
 481         inode->i_count++;
 482         vma->vm_ops = ops;
 483         return 0;
 484 }
 485 
 486 
 487 /*
 488  * The msync() system call.
 489  */
 490 
 491 static int msync_interval(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 492         unsigned long start, unsigned long end, int flags)
 493 {
 494         if (!vma->vm_inode)
 495                 return 0;
 496         if (vma->vm_ops->sync) {
 497                 int error;
 498                 error = vma->vm_ops->sync(vma, start, end-start, flags);
 499                 if (error)
 500                         return error;
 501                 if (flags & MS_SYNC)
 502                         return file_fsync(vma->vm_inode, NULL);
 503                 return 0;
 504         }
 505         return 0;
 506 }
 507 
 508 asmlinkage int sys_msync(unsigned long start, size_t len, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
 509 {
 510         unsigned long end;
 511         struct vm_area_struct * vma;
 512         int unmapped_error, error;
 513 
 514         if (start & ~PAGE_MASK)
 515                 return -EINVAL;
 516         len = (len + ~PAGE_MASK) & PAGE_MASK;
 517         end = start + len;
 518         if (end < start)
 519                 return -EINVAL;
 520         if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
 521                 return -EINVAL;
 522         if (end == start)
 523                 return 0;
 524         /*
 525          * If the interval [start,end) covers some unmapped address ranges,
 526          * just ignore them, but return -EFAULT at the end.
 527          */
 528         vma = find_vma(current, start);
 529         unmapped_error = 0;
 530         for (;;) {
 531                 /* Still start < end. */
 532                 if (!vma)
 533                         return -EFAULT;
 534                 /* Here start < vma->vm_end. */
 535                 if (start < vma->vm_start) {
 536                         unmapped_error = -EFAULT;
 537                         start = vma->vm_start;
 538                 }
 539                 /* Here vma->vm_start <= start < vma->vm_end. */
 540                 if (end <= vma->vm_end) {
 541                         if (start < end) {
 542                                 error = msync_interval(vma, start, end, flags);
 543                                 if (error)
 544                                         return error;
 545                         }
 546                         return unmapped_error;
 547                 }
 548                 /* Here vma->vm_start <= start < vma->vm_end < end. */
 549                 error = msync_interval(vma, start, vma->vm_end, flags);
 550                 if (error)
 551                         return error;
 552                 start = vma->vm_end;
 553                 vma = vma->vm_next;
 554         }
 555 }

/* [previous][next][first][last][top][bottom][index][help] */