root/mm/mmap.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. do_mmap
  2. sys_mmap
  3. unmap_fixup
  4. sys_munmap
  5. do_munmap
  6. generic_mmap
  7. insert_vm_struct
  8. merge_segments
  9. anon_map

   1 /*
   2  *      linux/mm/mmap.c
   3  *
   4  * Written by obz.
   5  */
   6 #include <linux/stat.h>
   7 #include <linux/sched.h>
   8 #include <linux/kernel.h>
   9 #include <linux/mm.h>
  10 #include <linux/shm.h>
  11 #include <linux/errno.h>
  12 #include <linux/mman.h>
  13 #include <linux/string.h>
  14 #include <linux/malloc.h>
  15 
  16 #include <asm/segment.h>
  17 #include <asm/system.h>
  18 
  19 static int anon_map(struct inode *, struct file *, struct vm_area_struct *);
  20 
  21 /*
  22  * description of effects of mapping type and prot in current implementation.
  23  * this is due to the limited x86 page protection hardware.  The expected
  24  * behavior is in parens:
  25  *
  26  * map_type     prot
  27  *              PROT_NONE       PROT_READ       PROT_WRITE      PROT_EXEC
  28  * MAP_SHARED   r: (no) no      r: (yes) yes    r: (no) yes     r: (no) yes
  29  *              w: (no) no      w: (no) no      w: (yes) yes    w: (no) no
  30  *              x: (no) no      x: (no) yes     x: (no) yes     x: (yes) yes
  31  *              
  32  * MAP_PRIVATE  r: (no) no      r: (yes) yes    r: (no) yes     r: (no) yes
  33  *              w: (no) no      w: (no) no      w: (copy) copy  w: (no) no
  34  *              x: (no) no      x: (no) yes     x: (no) yes     x: (yes) yes
  35  *
  36  */
  37 
  38 int do_mmap(struct file * file, unsigned long addr, unsigned long len,
     /* [previous][next][first][last][top][bottom][index][help] */
  39         unsigned long prot, unsigned long flags, unsigned long off)
  40 {
  41         int mask, error;
  42         struct vm_area_struct * vma;
  43 
  44         if ((len = PAGE_ALIGN(len)) == 0)
  45                 return addr;
  46 
  47         if (addr > TASK_SIZE || len > TASK_SIZE || addr > TASK_SIZE-len)
  48                 return -EINVAL;
  49 
  50         /* offset overflow? */
  51         if (off + len < off)
  52                 return -EINVAL;
  53 
  54         /*
  55          * do simple checking here so the lower-level routines won't have
  56          * to. we assume access permissions have been handled by the open
  57          * of the memory object, so we don't do any here.
  58          */
  59 
  60         if (file != NULL) {
  61                 switch (flags & MAP_TYPE) {
  62                 case MAP_SHARED:
  63                         if ((prot & PROT_WRITE) && !(file->f_mode & 2))
  64                                 return -EACCES;
  65                         /* fall through */
  66                 case MAP_PRIVATE:
  67                         if (!(file->f_mode & 1))
  68                                 return -EACCES;
  69                         break;
  70 
  71                 default:
  72                         return -EINVAL;
  73                 }
  74         } else if ((flags & MAP_TYPE) == MAP_SHARED)
  75                 return -EINVAL;
  76 
  77         /*
  78          * obtain the address to map to. we verify (or select) it and ensure
  79          * that it represents a valid section of the address space.
  80          */
  81 
  82         if (flags & MAP_FIXED) {
  83                 if (addr & ~PAGE_MASK)
  84                         return -EINVAL;
  85                 if (len > TASK_SIZE || addr > TASK_SIZE - len)
  86                         return -EINVAL;
  87         } else {
  88                 struct vm_area_struct * vmm;
  89 
  90                 /* Maybe this works.. Ugly it is. */
  91                 addr = SHM_RANGE_START;
  92                 while (addr+len < SHM_RANGE_END) {
  93                         for (vmm = current->mm->mmap ; vmm ; vmm = vmm->vm_next) {
  94                                 if (addr >= vmm->vm_end)
  95                                         continue;
  96                                 if (addr + len <= vmm->vm_start)
  97                                         continue;
  98                                 addr = PAGE_ALIGN(vmm->vm_end);
  99                                 break;
 100                         }
 101                         if (!vmm)
 102                                 break;
 103                 }
 104                 if (addr+len >= SHM_RANGE_END)
 105                         return -ENOMEM;
 106         }
 107 
 108         /*
 109          * determine the object being mapped and call the appropriate
 110          * specific mapper. the address has already been validated, but
 111          * not unmapped, but the maps are removed from the list.
 112          */
 113         if (file && (!file->f_op || !file->f_op->mmap))
 114                 return -ENODEV;
 115         mask = PAGE_PRESENT;
 116         if (prot & (PROT_READ | PROT_EXEC))
 117                 mask |= PAGE_READONLY;
 118         if (prot & PROT_WRITE)
 119                 if ((flags & MAP_TYPE) == MAP_PRIVATE)
 120                         mask |= PAGE_COPY;
 121                 else
 122                         mask |= PAGE_SHARED;
 123 
 124         vma = kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
 125         if (!vma)
 126                 return -ENOMEM;
 127 
 128         vma->vm_task = current;
 129         vma->vm_start = addr;
 130         vma->vm_end = addr + len;
 131         vma->vm_page_prot = mask;
 132         vma->vm_flags = prot & (VM_READ | VM_WRITE | VM_EXEC);
 133         vma->vm_flags |= flags & (VM_GROWSDOWN | VM_DENYWRITE);
 134 
 135         if (file) {
 136                 if (file->f_mode & 1)
 137                         vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
 138                 if (flags & MAP_SHARED) {
 139                         vma->vm_flags |= VM_SHARED | VM_MAYSHARE;
 140                         if (!(file->f_mode & 2))
 141                                 vma->vm_flags &= ~VM_MAYWRITE;
 142                 }
 143         } else
 144                 vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
 145         vma->vm_ops = NULL;
 146         vma->vm_offset = off;
 147         vma->vm_inode = NULL;
 148         vma->vm_pte = 0;
 149 
 150         do_munmap(addr, len);   /* Clear old maps */
 151 
 152         if (file)
 153                 error = file->f_op->mmap(file->f_inode, file, vma);
 154         else
 155                 error = anon_map(NULL, NULL, vma);
 156         
 157         if (!error)
 158                 return addr;
 159 
 160         kfree(vma);
 161         if (!current->errno)
 162                 current->errno = -error;
 163         return -1;
 164 }
 165 
 166 asmlinkage int sys_mmap(unsigned long *buffer)
     /* [previous][next][first][last][top][bottom][index][help] */
 167 {
 168         int error;
 169         unsigned long flags;
 170         struct file * file = NULL;
 171 
 172         error = verify_area(VERIFY_READ, buffer, 6*4);
 173         if (error)
 174                 return error;
 175         flags = get_fs_long(buffer+3);
 176         if (!(flags & MAP_ANONYMOUS)) {
 177                 unsigned long fd = get_fs_long(buffer+4);
 178                 if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
 179                         return -EBADF;
 180         }
 181         return do_mmap(file, get_fs_long(buffer), get_fs_long(buffer+1),
 182                 get_fs_long(buffer+2), flags, get_fs_long(buffer+5));
 183 }
 184 
 185 /*
 186  * Normal function to fix up a mapping
 187  * This function is the default for when an area has no specific
 188  * function.  This may be used as part of a more specific routine.
 189  * This function works out what part of an area is affected and
 190  * adjusts the mapping information.  Since the actual page
 191  * manipulation is done in do_mmap(), none need be done here,
 192  * though it would probably be more appropriate.
 193  *
 194  * By the time this function is called, the area struct has been
 195  * removed from the process mapping list, so it needs to be
 196  * reinserted if necessary.
 197  *
 198  * The 4 main cases are:
 199  *    Unmapping the whole area
 200  *    Unmapping from the start of the segment to a point in it
 201  *    Unmapping from an intermediate point to the end
 202  *    Unmapping between to intermediate points, making a hole.
 203  *
 204  * Case 4 involves the creation of 2 new areas, for each side of
 205  * the hole.
 206  */
 207 void unmap_fixup(struct vm_area_struct *area,
     /* [previous][next][first][last][top][bottom][index][help] */
 208                  unsigned long addr, size_t len)
 209 {
 210         struct vm_area_struct *mpnt;
 211         unsigned long end = addr + len;
 212 
 213         if (addr < area->vm_start || addr >= area->vm_end ||
 214             end <= area->vm_start || end > area->vm_end ||
 215             end < addr)
 216         {
 217                 printk("unmap_fixup: area=%lx-%lx, unmap %lx-%lx!!\n",
 218                        area->vm_start, area->vm_end, addr, end);
 219                 return;
 220         }
 221 
 222         /* Unmapping the whole area */
 223         if (addr == area->vm_start && end == area->vm_end) {
 224                 if (area->vm_ops && area->vm_ops->close)
 225                         area->vm_ops->close(area);
 226                 if (area->vm_inode)
 227                         iput(area->vm_inode);
 228                 return;
 229         }
 230 
 231         /* Work out to one of the ends */
 232         if (addr >= area->vm_start && end == area->vm_end)
 233                 area->vm_end = addr;
 234         if (addr == area->vm_start && end <= area->vm_end) {
 235                 area->vm_offset += (end - area->vm_start);
 236                 area->vm_start = end;
 237         }
 238 
 239         /* Unmapping a hole */
 240         if (addr > area->vm_start && end < area->vm_end)
 241         {
 242                 /* Add end mapping -- leave beginning for below */
 243                 mpnt = (struct vm_area_struct *)kmalloc(sizeof(*mpnt), GFP_KERNEL);
 244 
 245                 *mpnt = *area;
 246                 mpnt->vm_offset += (end - area->vm_start);
 247                 mpnt->vm_start = end;
 248                 if (mpnt->vm_inode)
 249                         mpnt->vm_inode->i_count++;
 250                 area->vm_end = addr;    /* Truncate area */
 251                 insert_vm_struct(current, mpnt);
 252         }
 253 
 254         /* construct whatever mapping is needed */
 255         mpnt = (struct vm_area_struct *)kmalloc(sizeof(*mpnt), GFP_KERNEL);
 256         *mpnt = *area;
 257         insert_vm_struct(current, mpnt);
 258 }
 259 
 260 asmlinkage int sys_munmap(unsigned long addr, size_t len)
     /* [previous][next][first][last][top][bottom][index][help] */
 261 {
 262         return do_munmap(addr, len);
 263 }
 264 
 265 /*
 266  * Munmap is split into 2 main parts -- this part which finds
 267  * what needs doing, and the areas themselves, which do the
 268  * work.  This now handles partial unmappings.
 269  * Jeremy Fitzhardine <jeremy@sw.oz.au>
 270  */
 271 int do_munmap(unsigned long addr, size_t len)
     /* [previous][next][first][last][top][bottom][index][help] */
 272 {
 273         struct vm_area_struct *mpnt, **npp, *free;
 274 
 275         if ((addr & ~PAGE_MASK) || addr > TASK_SIZE || len > TASK_SIZE-addr)
 276                 return -EINVAL;
 277 
 278         if ((len = PAGE_ALIGN(len)) == 0)
 279                 return 0;
 280 
 281         /*
 282          * Check if this memory area is ok - put it on the temporary
 283          * list if so..  The checks here are pretty simple --
 284          * every area affected in some way (by any overlap) is put
 285          * on the list.  If nothing is put on, nothing is affected.
 286          */
 287         npp = &current->mm->mmap;
 288         free = NULL;
 289         for (mpnt = *npp; mpnt != NULL; mpnt = *npp) {
 290                 unsigned long end = addr+len;
 291 
 292                 if ((addr < mpnt->vm_start && end <= mpnt->vm_start) ||
 293                     (addr >= mpnt->vm_end && end > mpnt->vm_end))
 294                 {
 295                         npp = &mpnt->vm_next;
 296                         continue;
 297                 }
 298 
 299                 *npp = mpnt->vm_next;
 300                 mpnt->vm_next = free;
 301                 free = mpnt;
 302         }
 303 
 304         if (free == NULL)
 305                 return 0;
 306 
 307         /*
 308          * Ok - we have the memory areas we should free on the 'free' list,
 309          * so release them, and unmap the page range..
 310          * If the one of the segments is only being partially unmapped,
 311          * it will put new vm_area_struct(s) into the address space.
 312          */
 313         while (free) {
 314                 unsigned long st, end;
 315 
 316                 mpnt = free;
 317                 free = free->vm_next;
 318 
 319                 st = addr < mpnt->vm_start ? mpnt->vm_start : addr;
 320                 end = addr+len;
 321                 end = end > mpnt->vm_end ? mpnt->vm_end : end;
 322 
 323                 if (mpnt->vm_ops && mpnt->vm_ops->unmap)
 324                         mpnt->vm_ops->unmap(mpnt, st, end-st);
 325                 else
 326                         unmap_fixup(mpnt, st, end-st);
 327 
 328                 kfree(mpnt);
 329         }
 330 
 331         unmap_page_range(addr, len);
 332         return 0;
 333 }
 334 
 335 /* This is used for a general mmap of a disk file */
 336 int generic_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma)
     /* [previous][next][first][last][top][bottom][index][help] */
 337 {
 338         extern struct vm_operations_struct file_mmap;
 339         struct buffer_head * bh;
 340 
 341         if (vma->vm_page_prot & PAGE_RW)        /* only PAGE_COW or read-only supported right now */
 342                 return -EINVAL;
 343         if (vma->vm_offset & (inode->i_sb->s_blocksize - 1))
 344                 return -EINVAL;
 345         if (!inode->i_sb || !S_ISREG(inode->i_mode))
 346                 return -EACCES;
 347         if (!inode->i_op || !inode->i_op->bmap)
 348                 return -ENOEXEC;
 349         if (!(bh = bread(inode->i_dev,bmap(inode,0),inode->i_sb->s_blocksize)))
 350                 return -EACCES;
 351         if (!IS_RDONLY(inode)) {
 352                 inode->i_atime = CURRENT_TIME;
 353                 inode->i_dirt = 1;
 354         }
 355         brelse(bh);
 356 
 357         unmap_page_range(vma->vm_start, vma->vm_end - vma->vm_start);
 358         vma->vm_inode = inode;
 359         inode->i_count++;
 360         vma->vm_ops = &file_mmap;
 361         insert_vm_struct(current, vma);
 362         merge_segments(current->mm->mmap);
 363         
 364         return 0;
 365 }
 366 
 367 /*
 368  * Insert vm structure into process list
 369  * This makes sure the list is sorted by start address, and
 370  * some some simple overlap checking.
 371  * JSGF
 372  */
 373 void insert_vm_struct(struct task_struct *t, struct vm_area_struct *vmp)
     /* [previous][next][first][last][top][bottom][index][help] */
 374 {
 375         struct vm_area_struct **nxtpp, *mpnt;
 376 
 377         nxtpp = &t->mm->mmap;
 378         
 379         for(mpnt = t->mm->mmap; mpnt != NULL; mpnt = mpnt->vm_next)
 380         {
 381                 if (mpnt->vm_start > vmp->vm_start)
 382                         break;
 383                 nxtpp = &mpnt->vm_next;
 384 
 385                 if ((vmp->vm_start >= mpnt->vm_start &&
 386                      vmp->vm_start < mpnt->vm_end) ||
 387                     (vmp->vm_end >= mpnt->vm_start &&
 388                      vmp->vm_end < mpnt->vm_end))
 389                         printk("insert_vm_struct: ins area %lx-%lx in area %lx-%lx\n",
 390                                vmp->vm_start, vmp->vm_end,
 391                                mpnt->vm_start, vmp->vm_end);
 392         }
 393         
 394         vmp->vm_next = mpnt;
 395 
 396         *nxtpp = vmp;
 397 }
 398 
 399 /*
 400  * Merge a list of memory segments if possible.
 401  * Redundant vm_area_structs are freed.
 402  * This assumes that the list is ordered by address.
 403  */
 404 void merge_segments(struct vm_area_struct *mpnt)
     /* [previous][next][first][last][top][bottom][index][help] */
 405 {
 406         struct vm_area_struct *prev, *next;
 407 
 408         if (mpnt == NULL)
 409                 return;
 410         
 411         for(prev = mpnt, mpnt = mpnt->vm_next;
 412             mpnt != NULL;
 413             prev = mpnt, mpnt = next)
 414         {
 415                 next = mpnt->vm_next;
 416 
 417                 /*
 418                  * To share, we must have the same inode, operations.. 
 419                  */
 420                 if (mpnt->vm_inode != prev->vm_inode)
 421                         continue;
 422                 if (mpnt->vm_pte != prev->vm_pte)
 423                         continue;
 424                 if (mpnt->vm_ops != prev->vm_ops)
 425                         continue;
 426                 if (mpnt->vm_page_prot != prev->vm_page_prot ||
 427                     mpnt->vm_flags != prev->vm_flags)
 428                         continue;
 429                 if (prev->vm_end != mpnt->vm_start)
 430                         continue;
 431                 /*
 432                  * and if we have an inode, the offsets must be contiguous..
 433                  */
 434                 if (mpnt->vm_inode != NULL) {
 435                         if (prev->vm_offset + prev->vm_end - prev->vm_start != mpnt->vm_offset)
 436                                 continue;
 437                 }
 438 
 439                 /*
 440                  * merge prev with mpnt and set up pointers so the new
 441                  * big segment can possibly merge with the next one.
 442                  * The old unused mpnt is freed.
 443                  */
 444                 prev->vm_end = mpnt->vm_end;
 445                 prev->vm_next = mpnt->vm_next;
 446                 kfree_s(mpnt, sizeof(*mpnt));
 447                 mpnt = prev;
 448         }
 449 }
 450 
 451 /*
 452  * Map memory not associated with any file into a process
 453  * address space.  Adjecent memory is merged.
 454  */
 455 static int anon_map(struct inode *ino, struct file * file, struct vm_area_struct * vma)
     /* [previous][next][first][last][top][bottom][index][help] */
 456 {
 457         if (zeromap_page_range(vma->vm_start, vma->vm_end - vma->vm_start, vma->vm_page_prot))
 458                 return -ENOMEM;
 459 
 460         insert_vm_struct(current, vma);
 461         merge_segments(current->mm->mmap);
 462         return 0;
 463 }

/* [previous][next][first][last][top][bottom][index][help] */