root/mm/filemap.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. multi_bmap
  2. filemap_nopage
  3. filemap_write_page
  4. filemap_swapout
  5. filemap_swapin
  6. filemap_sync_pte
  7. filemap_sync_pte_range
  8. filemap_sync_pmd_range
  9. filemap_sync
  10. filemap_unmap
  11. generic_mmap
  12. msync_interval
  13. sys_msync

   1 /*
   2  *      linux/mm/filemap.c
   3  *
   4  * Copyright (C) 1994, 1995  Linus Torvalds
   5  */
   6 
   7 /*
   8  * This file handles the generic file mmap semantics used by
   9  * most "normal" filesystems (but you don't /have/ to use this:
  10  * the NFS filesystem does this differently, for example)
  11  */
  12 #include <linux/stat.h>
  13 #include <linux/sched.h>
  14 #include <linux/kernel.h>
  15 #include <linux/mm.h>
  16 #include <linux/shm.h>
  17 #include <linux/errno.h>
  18 #include <linux/mman.h>
  19 #include <linux/string.h>
  20 #include <linux/malloc.h>
  21 #include <linux/fs.h>
  22 #include <linux/locks.h>
  23 
  24 #include <asm/segment.h>
  25 #include <asm/system.h>
  26 #include <asm/pgtable.h>
  27 
  28 /*
  29  * Shared mappings implemented 30.11.1994. It's not fully working yet,
  30  * though.
  31  *
  32  * Shared mappings now work. 15.8.1995  Bruno.
  33  */
  34 
  35 /*
  36  * Simple routines for both non-shared and shared mappings.
  37  */
  38 
  39 static inline void multi_bmap(struct inode * inode, unsigned long block, unsigned int * nr, int shift)
     /* [previous][next][first][last][top][bottom][index][help] */
  40 {
  41         int i = PAGE_SIZE >> shift;
  42         block >>= shift;
  43         do {
  44                 *nr = bmap(inode, block);
  45                 i--;
  46                 block++;
  47                 nr++;
  48         } while (i > 0);
  49 }
  50 
  51 /*
  52  * Semantics for shared and private memory areas are different past the end
  53  * of the file. A shared mapping past the last page of the file is an error
  54  * and results in a SIBGUS, while a private mapping just maps in a zero page.
  55  */
  56 static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
  57         unsigned long page, int no_share)
  58 {
  59         struct inode * inode = area->vm_inode;
  60         int nr[PAGE_SIZE/512];
  61 
  62         address = (address & PAGE_MASK) - area->vm_start + area->vm_offset;
  63         if (address >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm)
  64                 send_sig(SIGBUS, current, 1);
  65         multi_bmap(inode, address, nr, inode->i_sb->s_blocksize_bits);
  66         return bread_page(page, inode->i_dev, nr, inode->i_sb->s_blocksize, no_share);
  67 }
  68 
  69 
  70 /*
  71  * Tries to write a shared mapped page to its backing store. May return -EIO
  72  * if the disk is full.
  73  */
  74 static int filemap_write_page(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
  75         unsigned long offset,
  76         unsigned long page)
  77 {
  78         int old_fs;
  79         unsigned long size, result;
  80         struct file file;
  81         struct inode * inode;
  82         struct buffer_head * bh;
  83 
  84         bh = buffer_pages[MAP_NR(page)];
  85         if (bh) {
  86                 /* whee.. just mark the buffer heads dirty */
  87                 struct buffer_head * tmp = bh;
  88                 do {
  89                         mark_buffer_dirty(tmp, 0);
  90                         tmp = tmp->b_this_page;
  91                 } while (tmp != bh);
  92                 return 0;
  93         }
  94 
  95         inode = vma->vm_inode;
  96         file.f_op = inode->i_op->default_file_ops;
  97         if (!file.f_op->write)
  98                 return -EIO;
  99         size = offset + PAGE_SIZE;
 100         /* refuse to extend file size.. */
 101         if (S_ISREG(inode->i_mode)) {
 102                 if (size > inode->i_size)
 103                         size = inode->i_size;
 104                 /* Ho humm.. We should have tested for this earlier */
 105                 if (size < offset)
 106                         return -EIO;
 107         }
 108         size -= offset;
 109         file.f_mode = 3;
 110         file.f_flags = 0;
 111         file.f_count = 1;
 112         file.f_inode = inode;
 113         file.f_pos = offset;
 114         file.f_reada = 0;
 115         old_fs = get_fs();
 116         set_fs(KERNEL_DS);
 117         result = file.f_op->write(inode, &file, (const char *) page, size);
 118         set_fs(old_fs);
 119         if (result != size)
 120                 return -EIO;
 121         return 0;
 122 }
 123 
 124 
 125 /*
 126  * Swapping to a shared file: while we're busy writing out the page
 127  * (and the page still exists in memory), we save the page information
 128  * in the page table, so that "filemap_swapin()" can re-use the page
 129  * immediately if it is called while we're busy swapping it out..
 130  *
 131  * Once we've written it all out, we mark the page entry "empty", which
 132  * will result in a normal page-in (instead of a swap-in) from the now
 133  * up-to-date disk file.
 134  */
 135 int filemap_swapout(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 136         unsigned long offset,
 137         pte_t *page_table)
 138 {
 139         int error;
 140         unsigned long page = pte_page(*page_table);
 141         unsigned long entry = SWP_ENTRY(SHM_SWP_TYPE, MAP_NR(page));
 142 
 143         set_pte(page_table, __pte(entry));
 144         /* Yuck, perhaps a slightly modified swapout parameter set? */
 145         invalidate_page(vma, (offset + vma->vm_start - vma->vm_offset));
 146         error = filemap_write_page(vma, offset, page);
 147         if (pte_val(*page_table) == entry)
 148                 pte_clear(page_table);
 149         return error;
 150 }
 151 
 152 /*
 153  * filemap_swapin() is called only if we have something in the page
 154  * tables that is non-zero (but not present), which we know to be the
 155  * page index of a page that is busy being swapped out (see above).
 156  * So we just use it directly..
 157  */
 158 static pte_t filemap_swapin(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 159         unsigned long offset,
 160         unsigned long entry)
 161 {
 162         unsigned long page = SWP_OFFSET(entry);
 163 
 164         mem_map[page].count++;
 165         page = (page << PAGE_SHIFT) + PAGE_OFFSET;
 166         return mk_pte(page,vma->vm_page_prot);
 167 }
 168 
 169 
 170 static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 171         unsigned long address, unsigned int flags)
 172 {
 173         pte_t pte = *ptep;
 174         unsigned long page;
 175         int error;
 176 
 177         if (!(flags & MS_INVALIDATE)) {
 178                 if (!pte_present(pte))
 179                         return 0;
 180                 if (!pte_dirty(pte))
 181                         return 0;
 182                 set_pte(ptep, pte_mkclean(pte));
 183                 invalidate_page(vma, address);
 184                 page = pte_page(pte);
 185                 mem_map[MAP_NR(page)].count++;
 186         } else {
 187                 if (pte_none(pte))
 188                         return 0;
 189                 pte_clear(ptep);
 190                 invalidate_page(vma, address);
 191                 if (!pte_present(pte)) {
 192                         swap_free(pte_val(pte));
 193                         return 0;
 194                 }
 195                 page = pte_page(pte);
 196                 if (!pte_dirty(pte) || flags == MS_INVALIDATE) {
 197                         free_page(page);
 198                         return 0;
 199                 }
 200         }
 201         error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page);
 202         free_page(page);
 203         return error;
 204 }
 205 
 206 static inline int filemap_sync_pte_range(pmd_t * pmd,
     /* [previous][next][first][last][top][bottom][index][help] */
 207         unsigned long address, unsigned long size, 
 208         struct vm_area_struct *vma, unsigned long offset, unsigned int flags)
 209 {
 210         pte_t * pte;
 211         unsigned long end;
 212         int error;
 213 
 214         if (pmd_none(*pmd))
 215                 return 0;
 216         if (pmd_bad(*pmd)) {
 217                 printk("filemap_sync_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
 218                 pmd_clear(pmd);
 219                 return 0;
 220         }
 221         pte = pte_offset(pmd, address);
 222         offset += address & PMD_MASK;
 223         address &= ~PMD_MASK;
 224         end = address + size;
 225         if (end > PMD_SIZE)
 226                 end = PMD_SIZE;
 227         error = 0;
 228         do {
 229                 error |= filemap_sync_pte(pte, vma, address + offset, flags);
 230                 address += PAGE_SIZE;
 231                 pte++;
 232         } while (address < end);
 233         return error;
 234 }
 235 
 236 static inline int filemap_sync_pmd_range(pgd_t * pgd,
     /* [previous][next][first][last][top][bottom][index][help] */
 237         unsigned long address, unsigned long size, 
 238         struct vm_area_struct *vma, unsigned int flags)
 239 {
 240         pmd_t * pmd;
 241         unsigned long offset, end;
 242         int error;
 243 
 244         if (pgd_none(*pgd))
 245                 return 0;
 246         if (pgd_bad(*pgd)) {
 247                 printk("filemap_sync_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd));
 248                 pgd_clear(pgd);
 249                 return 0;
 250         }
 251         pmd = pmd_offset(pgd, address);
 252         offset = address & PMD_MASK;
 253         address &= ~PMD_MASK;
 254         end = address + size;
 255         if (end > PGDIR_SIZE)
 256                 end = PGDIR_SIZE;
 257         error = 0;
 258         do {
 259                 error |= filemap_sync_pte_range(pmd, address, end - address, vma, offset, flags);
 260                 address = (address + PMD_SIZE) & PMD_MASK;
 261                 pmd++;
 262         } while (address < end);
 263         return error;
 264 }
 265 
 266 static int filemap_sync(struct vm_area_struct * vma, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 267         size_t size, unsigned int flags)
 268 {
 269         pgd_t * dir;
 270         unsigned long end = address + size;
 271         int error = 0;
 272 
 273         dir = pgd_offset(current->mm, address);
 274         while (address < end) {
 275                 error |= filemap_sync_pmd_range(dir, address, end - address, vma, flags);
 276                 address = (address + PGDIR_SIZE) & PGDIR_MASK;
 277                 dir++;
 278         }
 279         invalidate_range(vma->vm_mm, end - size, end);
 280         return error;
 281 }
 282 
 283 /*
 284  * This handles (potentially partial) area unmaps..
 285  */
 286 static void filemap_unmap(struct vm_area_struct *vma, unsigned long start, size_t len)
     /* [previous][next][first][last][top][bottom][index][help] */
 287 {
 288         filemap_sync(vma, start, len, MS_ASYNC);
 289 }
 290 
 291 /*
 292  * Shared mappings need to be able to do the right thing at
 293  * close/unmap/sync. They will also use the private file as
 294  * backing-store for swapping..
 295  */
 296 static struct vm_operations_struct file_shared_mmap = {
 297         NULL,                   /* no special open */
 298         NULL,                   /* no special close */
 299         filemap_unmap,          /* unmap - we need to sync the pages */
 300         NULL,                   /* no special protect */
 301         filemap_sync,           /* sync */
 302         NULL,                   /* advise */
 303         filemap_nopage,         /* nopage */
 304         NULL,                   /* wppage */
 305         filemap_swapout,        /* swapout */
 306         filemap_swapin,         /* swapin */
 307 };
 308 
 309 /*
 310  * Private mappings just need to be able to load in the map.
 311  *
 312  * (This is actually used for shared mappings as well, if we
 313  * know they can't ever get write permissions..)
 314  */
 315 static struct vm_operations_struct file_private_mmap = {
 316         NULL,                   /* open */
 317         NULL,                   /* close */
 318         NULL,                   /* unmap */
 319         NULL,                   /* protect */
 320         NULL,                   /* sync */
 321         NULL,                   /* advise */
 322         filemap_nopage,         /* nopage */
 323         NULL,                   /* wppage */
 324         NULL,                   /* swapout */
 325         NULL,                   /* swapin */
 326 };
 327 
 328 /* This is used for a general mmap of a disk file */
 329 int generic_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma)
     /* [previous][next][first][last][top][bottom][index][help] */
 330 {
 331         struct vm_operations_struct * ops;
 332 
 333         if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
 334                 ops = &file_shared_mmap;
 335                 /* share_page() can only guarantee proper page sharing if
 336                  * the offsets are all page aligned. */
 337                 if (vma->vm_offset & (PAGE_SIZE - 1))
 338                         return -EINVAL;
 339         } else {
 340                 ops = &file_private_mmap;
 341                 if (vma->vm_offset & (inode->i_sb->s_blocksize - 1))
 342                         return -EINVAL;
 343         }
 344         if (!inode->i_sb || !S_ISREG(inode->i_mode))
 345                 return -EACCES;
 346         if (!inode->i_op || !inode->i_op->bmap)
 347                 return -ENOEXEC;
 348         if (!IS_RDONLY(inode)) {
 349                 inode->i_atime = CURRENT_TIME;
 350                 inode->i_dirt = 1;
 351         }
 352         vma->vm_inode = inode;
 353         inode->i_count++;
 354         vma->vm_ops = ops;
 355         return 0;
 356 }
 357 
 358 
 359 /*
 360  * The msync() system call.
 361  */
 362 
 363 static int msync_interval(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 364         unsigned long start, unsigned long end, int flags)
 365 {
 366         if (!vma->vm_inode)
 367                 return 0;
 368         if (vma->vm_ops->sync) {
 369                 int error;
 370                 error = vma->vm_ops->sync(vma, start, end-start, flags);
 371                 if (error)
 372                         return error;
 373                 if (flags & MS_SYNC)
 374                         return file_fsync(vma->vm_inode, NULL);
 375                 return 0;
 376         }
 377         return 0;
 378 }
 379 
 380 asmlinkage int sys_msync(unsigned long start, size_t len, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
 381 {
 382         unsigned long end;
 383         struct vm_area_struct * vma;
 384         int unmapped_error, error;
 385 
 386         if (start & ~PAGE_MASK)
 387                 return -EINVAL;
 388         len = (len + ~PAGE_MASK) & PAGE_MASK;
 389         end = start + len;
 390         if (end < start)
 391                 return -EINVAL;
 392         if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
 393                 return -EINVAL;
 394         if (end == start)
 395                 return 0;
 396         /*
 397          * If the interval [start,end) covers some unmapped address ranges,
 398          * just ignore them, but return -EFAULT at the end.
 399          */
 400         vma = find_vma(current, start);
 401         unmapped_error = 0;
 402         for (;;) {
 403                 /* Still start < end. */
 404                 if (!vma)
 405                         return -EFAULT;
 406                 /* Here start < vma->vm_end. */
 407                 if (start < vma->vm_start) {
 408                         unmapped_error = -EFAULT;
 409                         start = vma->vm_start;
 410                 }
 411                 /* Here vma->vm_start <= start < vma->vm_end. */
 412                 if (end <= vma->vm_end) {
 413                         if (start < end) {
 414                                 error = msync_interval(vma, start, end, flags);
 415                                 if (error)
 416                                         return error;
 417                         }
 418                         return unmapped_error;
 419                 }
 420                 /* Here vma->vm_start <= start < vma->vm_end < end. */
 421                 error = msync_interval(vma, start, vma->vm_end, flags);
 422                 if (error)
 423                         return error;
 424                 start = vma->vm_end;
 425                 vma = vma->vm_next;
 426         }
 427 }

/* [previous][next][first][last][top][bottom][index][help] */