root/mm/mmap.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. do_mmap
  2. get_unmapped_area
  3. find_vma
  4. find_vma_intersection
  5. avl_neighbours
  6. avl_rebalance
  7. avl_insert
  8. avl_insert_neighbours
  9. avl_remove
  10. printk_list
  11. printk_avl
  12. avl_checkheights
  13. avl_checkleft
  14. avl_checkright
  15. avl_checkorder
  16. avl_check
  17. unmap_fixup
  18. sys_munmap
  19. do_munmap
  20. build_mmap_avl
  21. exit_mmap
  22. insert_vm_struct
  23. remove_shared_vm_struct
  24. merge_segments
  25. anon_map

   1 /*
   2  *      linux/mm/mmap.c
   3  *
   4  * Written by obz.
   5  */
   6 #include <linux/stat.h>
   7 #include <linux/sched.h>
   8 #include <linux/kernel.h>
   9 #include <linux/mm.h>
  10 #include <linux/shm.h>
  11 #include <linux/errno.h>
  12 #include <linux/mman.h>
  13 #include <linux/string.h>
  14 #include <linux/malloc.h>
  15 
  16 #include <asm/segment.h>
  17 #include <asm/system.h>
  18 #include <asm/pgtable.h>
  19 
  20 static int anon_map(struct inode *, struct file *, struct vm_area_struct *);
  21 
  22 /*
  23  * description of effects of mapping type and prot in current implementation.
  24  * this is due to the limited x86 page protection hardware.  The expected
  25  * behavior is in parens:
  26  *
  27  * map_type     prot
  28  *              PROT_NONE       PROT_READ       PROT_WRITE      PROT_EXEC
  29  * MAP_SHARED   r: (no) no      r: (yes) yes    r: (no) yes     r: (no) yes
  30  *              w: (no) no      w: (no) no      w: (yes) yes    w: (no) no
  31  *              x: (no) no      x: (no) yes     x: (no) yes     x: (yes) yes
  32  *              
  33  * MAP_PRIVATE  r: (no) no      r: (yes) yes    r: (no) yes     r: (no) yes
  34  *              w: (no) no      w: (no) no      w: (copy) copy  w: (no) no
  35  *              x: (no) no      x: (no) yes     x: (no) yes     x: (yes) yes
  36  *
  37  */
  38 
  39 pgprot_t protection_map[16] = {
  40         __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
  41         __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
  42 };
  43 
  44 unsigned long do_mmap(struct file * file, unsigned long addr, unsigned long len,
     /* [previous][next][first][last][top][bottom][index][help] */
  45         unsigned long prot, unsigned long flags, unsigned long off)
  46 {
  47         int error;
  48         struct vm_area_struct * vma;
  49 
  50         if ((len = PAGE_ALIGN(len)) == 0)
  51                 return addr;
  52 
  53         if (addr > TASK_SIZE || len > TASK_SIZE || addr > TASK_SIZE-len)
  54                 return -EINVAL;
  55 
  56         /* offset overflow? */
  57         if (off + len < off)
  58                 return -EINVAL;
  59 
  60         /*
  61          * do simple checking here so the lower-level routines won't have
  62          * to. we assume access permissions have been handled by the open
  63          * of the memory object, so we don't do any here.
  64          */
  65 
  66         if (file != NULL) {
  67                 switch (flags & MAP_TYPE) {
  68                 case MAP_SHARED:
  69                         if ((prot & PROT_WRITE) && !(file->f_mode & 2))
  70                                 return -EACCES;
  71                         /* fall through */
  72                 case MAP_PRIVATE:
  73                         if (!(file->f_mode & 1))
  74                                 return -EACCES;
  75                         break;
  76 
  77                 default:
  78                         return -EINVAL;
  79                 }
  80                 if (flags & MAP_DENYWRITE) {
  81                         if (file->f_inode->i_wcount > 0)
  82                                 return -ETXTBSY;
  83                 }
  84         } else if ((flags & MAP_TYPE) != MAP_PRIVATE)
  85                 return -EINVAL;
  86 
  87         /*
  88          * obtain the address to map to. we verify (or select) it and ensure
  89          * that it represents a valid section of the address space.
  90          */
  91 
  92         if (flags & MAP_FIXED) {
  93                 if (addr & ~PAGE_MASK)
  94                         return -EINVAL;
  95                 if (len > TASK_SIZE || addr > TASK_SIZE - len)
  96                         return -EINVAL;
  97         } else {
  98                 addr = get_unmapped_area(addr, len);
  99                 if (!addr)
 100                         return -ENOMEM;
 101         }
 102 
 103         /*
 104          * determine the object being mapped and call the appropriate
 105          * specific mapper. the address has already been validated, but
 106          * not unmapped, but the maps are removed from the list.
 107          */
 108         if (file && (!file->f_op || !file->f_op->mmap))
 109                 return -ENODEV;
 110 
 111         vma = (struct vm_area_struct *)kmalloc(sizeof(struct vm_area_struct),
 112                 GFP_KERNEL);
 113         if (!vma)
 114                 return -ENOMEM;
 115 
 116         vma->vm_mm = current->mm;
 117         vma->vm_start = addr;
 118         vma->vm_end = addr + len;
 119         vma->vm_flags = prot & (VM_READ | VM_WRITE | VM_EXEC);
 120         vma->vm_flags |= flags & (VM_GROWSDOWN | VM_DENYWRITE | VM_EXECUTABLE);
 121 
 122         if (file) {
 123                 if (file->f_mode & 1)
 124                         vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
 125                 if (flags & MAP_SHARED) {
 126                         vma->vm_flags |= VM_SHARED | VM_MAYSHARE;
 127                         /*
 128                          * This looks strange, but when we don't have the file open
 129                          * for writing, we can demote the shared mapping to a simpler
 130                          * private mapping. That also takes care of a security hole
 131                          * with ptrace() writing to a shared mapping without write
 132                          * permissions.
 133                          *
 134                          * We leave the VM_MAYSHARE bit on, just to get correct output
 135                          * from /proc/xxx/maps..
 136                          */
 137                         if (!(file->f_mode & 2))
 138                                 vma->vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
 139                 }
 140         } else
 141                 vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
 142         vma->vm_page_prot = protection_map[vma->vm_flags & 0x0f];
 143         vma->vm_ops = NULL;
 144         vma->vm_offset = off;
 145         vma->vm_inode = NULL;
 146         vma->vm_pte = 0;
 147 
 148         do_munmap(addr, len);   /* Clear old maps */
 149 
 150         if (file)
 151                 error = file->f_op->mmap(file->f_inode, file, vma);
 152         else
 153                 error = anon_map(NULL, NULL, vma);
 154         
 155         if (error) {
 156                 kfree(vma);
 157                 return error;
 158         }
 159         insert_vm_struct(current, vma);
 160         merge_segments(current, vma->vm_start, vma->vm_end);
 161         return addr;
 162 }
 163 
 164 /*
 165  * Get an address range which is currently unmapped.
 166  * For mmap() without MAP_FIXED and shmat() with addr=0.
 167  * Return value 0 means ENOMEM.
 168  */
 169 unsigned long get_unmapped_area(unsigned long addr, unsigned long len)
     /* [previous][next][first][last][top][bottom][index][help] */
 170 {
 171         struct vm_area_struct * vmm;
 172 
 173         if (len > TASK_SIZE)
 174                 return 0;
 175         if (!addr)
 176                 addr = TASK_SIZE / 3;
 177         addr = PAGE_ALIGN(addr);
 178 
 179         for (vmm = current->mm->mmap; ; vmm = vmm->vm_next) {
 180                 if (TASK_SIZE - len < addr)
 181                         return 0;
 182                 if (!vmm)
 183                         return addr;
 184                 if (addr > vmm->vm_end)
 185                         continue;
 186                 if (addr + len > vmm->vm_start) {
 187                         addr = vmm->vm_end;
 188                         continue;
 189                 }
 190                 return addr;
 191         }
 192 }
 193 
 194 /*
 195  * Searching a VMA in the linear list task->mm->mmap is horribly slow.
 196  * Use an AVL (Adelson-Velskii and Landis) tree to speed up this search
 197  * from O(n) to O(log n), where n is the number of VMAs of the task
 198  * (typically around 6, but may reach 3000 in some cases).
 199  * Written by Bruno Haible <haible@ma2s2.mathematik.uni-karlsruhe.de>.
 200  */
 201 
 202 /* We keep the list and tree sorted by address. */
 203 #define vm_avl_key      vm_end
 204 #define vm_avl_key_t    unsigned long   /* typeof(vma->avl_key) */
 205 
 206 /*
 207  * task->mm->mmap_avl is the AVL tree corresponding to task->mm->mmap
 208  * or, more exactly, its root.
 209  * A vm_area_struct has the following fields:
 210  *   vm_avl_left     left son of a tree node
 211  *   vm_avl_right    right son of a tree node
 212  *   vm_avl_height   1+max(heightof(left),heightof(right))
 213  * The empty tree is represented as NULL.
 214  */
 215 #define avl_empty       (struct vm_area_struct *) NULL
 216 
 217 /* Since the trees are balanced, their height will never be large. */
 218 #define avl_maxheight   41      /* why this? a small exercise */
 219 #define heightof(tree)  ((tree) == avl_empty ? 0 : (tree)->vm_avl_height)
 220 /*
 221  * Consistency and balancing rules:
 222  * 1. tree->vm_avl_height == 1+max(heightof(tree->vm_avl_left),heightof(tree->vm_avl_right))
 223  * 2. abs( heightof(tree->vm_avl_left) - heightof(tree->vm_avl_right) ) <= 1
 224  * 3. foreach node in tree->vm_avl_left: node->vm_avl_key <= tree->vm_avl_key,
 225  *    foreach node in tree->vm_avl_right: node->vm_avl_key >= tree->vm_avl_key.
 226  */
 227 
 228 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
 229 struct vm_area_struct * find_vma (struct task_struct * task, unsigned long addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 230 {
 231 #if 0 /* equivalent, but slow */
 232         struct vm_area_struct * vma;
 233 
 234         if (!task->mm)
 235                 return NULL;
 236         for (vma = task->mm->mmap ; ; vma = vma->vm_next) {
 237                 if (!vma)
 238                         return NULL;
 239                 if (vma->vm_end > addr)
 240                         return vma;
 241         }
 242 #else
 243         struct vm_area_struct * result = NULL;
 244         struct vm_area_struct * tree;
 245 
 246         if (!task->mm)
 247                 return NULL;
 248         for (tree = task->mm->mmap_avl ; ; ) {
 249                 if (tree == avl_empty)
 250                         return result;
 251                 if (tree->vm_end > addr) {
 252                         if (tree->vm_start <= addr)
 253                                 return tree;
 254                         result = tree;
 255                         tree = tree->vm_avl_left;
 256                 } else
 257                         tree = tree->vm_avl_right;
 258         }
 259 #endif
 260 }
 261 
 262 /* Look up the first VMA which intersects the interval start_addr..end_addr-1,
 263    NULL if none.  Assume start_addr < end_addr. */
 264 struct vm_area_struct * find_vma_intersection (struct task_struct * task, unsigned long start_addr, unsigned long end_addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 265 {
 266         struct vm_area_struct * vma;
 267 
 268 #if 0 /* equivalent, but slow */
 269         for (vma = task->mm->mmap; vma; vma = vma->vm_next) {
 270                 if (end_addr <= vma->vm_start)
 271                         break;
 272                 if (start_addr < vma->vm_end)
 273                         return vma;
 274         }
 275         return NULL;
 276 #else
 277         vma = find_vma(task,start_addr);
 278         if (!vma || end_addr <= vma->vm_start)
 279                 return NULL;
 280         return vma;
 281 #endif
 282 }
 283 
 284 /* Look up the nodes at the left and at the right of a given node. */
 285 static void avl_neighbours (struct vm_area_struct * node, struct vm_area_struct * tree, struct vm_area_struct ** to_the_left, struct vm_area_struct ** to_the_right)
     /* [previous][next][first][last][top][bottom][index][help] */
 286 {
 287         vm_avl_key_t key = node->vm_avl_key;
 288 
 289         *to_the_left = *to_the_right = NULL;
 290         for (;;) {
 291                 if (tree == avl_empty) {
 292                         printk("avl_neighbours: node not found in the tree\n");
 293                         return;
 294                 }
 295                 if (key == tree->vm_avl_key)
 296                         break;
 297                 if (key < tree->vm_avl_key) {
 298                         *to_the_right = tree;
 299                         tree = tree->vm_avl_left;
 300                 } else {
 301                         *to_the_left = tree;
 302                         tree = tree->vm_avl_right;
 303                 }
 304         }
 305         if (tree != node) {
 306                 printk("avl_neighbours: node not exactly found in the tree\n");
 307                 return;
 308         }
 309         if (tree->vm_avl_left != avl_empty) {
 310                 struct vm_area_struct * node;
 311                 for (node = tree->vm_avl_left; node->vm_avl_right != avl_empty; node = node->vm_avl_right)
 312                         continue;
 313                 *to_the_left = node;
 314         }
 315         if (tree->vm_avl_right != avl_empty) {
 316                 struct vm_area_struct * node;
 317                 for (node = tree->vm_avl_right; node->vm_avl_left != avl_empty; node = node->vm_avl_left)
 318                         continue;
 319                 *to_the_right = node;
 320         }
 321         if ((*to_the_left && ((*to_the_left)->vm_next != node)) || (node->vm_next != *to_the_right))
 322                 printk("avl_neighbours: tree inconsistent with list\n");
 323 }
 324 
 325 /*
 326  * Rebalance a tree.
 327  * After inserting or deleting a node of a tree we have a sequence of subtrees
 328  * nodes[0]..nodes[k-1] such that
 329  * nodes[0] is the root and nodes[i+1] = nodes[i]->{vm_avl_left|vm_avl_right}.
 330  */
 331 static void avl_rebalance (struct vm_area_struct *** nodeplaces_ptr, int count)
     /* [previous][next][first][last][top][bottom][index][help] */
 332 {
 333         for ( ; count > 0 ; count--) {
 334                 struct vm_area_struct ** nodeplace = *--nodeplaces_ptr;
 335                 struct vm_area_struct * node = *nodeplace;
 336                 struct vm_area_struct * nodeleft = node->vm_avl_left;
 337                 struct vm_area_struct * noderight = node->vm_avl_right;
 338                 int heightleft = heightof(nodeleft);
 339                 int heightright = heightof(noderight);
 340                 if (heightright + 1 < heightleft) {
 341                         /*                                                      */
 342                         /*                            *                         */
 343                         /*                          /   \                       */
 344                         /*                       n+2      n                     */
 345                         /*                                                      */
 346                         struct vm_area_struct * nodeleftleft = nodeleft->vm_avl_left;
 347                         struct vm_area_struct * nodeleftright = nodeleft->vm_avl_right;
 348                         int heightleftright = heightof(nodeleftright);
 349                         if (heightof(nodeleftleft) >= heightleftright) {
 350                                 /*                                                        */
 351                                 /*                *                    n+2|n+3            */
 352                                 /*              /   \                  /    \             */
 353                                 /*           n+2      n      -->      /   n+1|n+2         */
 354                                 /*           / \                      |    /    \         */
 355                                 /*         n+1 n|n+1                 n+1  n|n+1  n        */
 356                                 /*                                                        */
 357                                 node->vm_avl_left = nodeleftright; nodeleft->vm_avl_right = node;
 358                                 nodeleft->vm_avl_height = 1 + (node->vm_avl_height = 1 + heightleftright);
 359                                 *nodeplace = nodeleft;
 360                         } else {
 361                                 /*                                                        */
 362                                 /*                *                     n+2               */
 363                                 /*              /   \                 /     \             */
 364                                 /*           n+2      n      -->    n+1     n+1           */
 365                                 /*           / \                    / \     / \           */
 366                                 /*          n  n+1                 n   L   R   n          */
 367                                 /*             / \                                        */
 368                                 /*            L   R                                       */
 369                                 /*                                                        */
 370                                 nodeleft->vm_avl_right = nodeleftright->vm_avl_left;
 371                                 node->vm_avl_left = nodeleftright->vm_avl_right;
 372                                 nodeleftright->vm_avl_left = nodeleft;
 373                                 nodeleftright->vm_avl_right = node;
 374                                 nodeleft->vm_avl_height = node->vm_avl_height = heightleftright;
 375                                 nodeleftright->vm_avl_height = heightleft;
 376                                 *nodeplace = nodeleftright;
 377                         }
 378                 }
 379                 else if (heightleft + 1 < heightright) {
 380                         /* similar to the above, just interchange 'left' <--> 'right' */
 381                         struct vm_area_struct * noderightright = noderight->vm_avl_right;
 382                         struct vm_area_struct * noderightleft = noderight->vm_avl_left;
 383                         int heightrightleft = heightof(noderightleft);
 384                         if (heightof(noderightright) >= heightrightleft) {
 385                                 node->vm_avl_right = noderightleft; noderight->vm_avl_left = node;
 386                                 noderight->vm_avl_height = 1 + (node->vm_avl_height = 1 + heightrightleft);
 387                                 *nodeplace = noderight;
 388                         } else {
 389                                 noderight->vm_avl_left = noderightleft->vm_avl_right;
 390                                 node->vm_avl_right = noderightleft->vm_avl_left;
 391                                 noderightleft->vm_avl_right = noderight;
 392                                 noderightleft->vm_avl_left = node;
 393                                 noderight->vm_avl_height = node->vm_avl_height = heightrightleft;
 394                                 noderightleft->vm_avl_height = heightright;
 395                                 *nodeplace = noderightleft;
 396                         }
 397                 }
 398                 else {
 399                         int height = (heightleft<heightright ? heightright : heightleft) + 1;
 400                         if (height == node->vm_avl_height)
 401                                 break;
 402                         node->vm_avl_height = height;
 403                 }
 404         }
 405 }
 406 
 407 /* Insert a node into a tree. */
 408 static void avl_insert (struct vm_area_struct * new_node, struct vm_area_struct ** ptree)
     /* [previous][next][first][last][top][bottom][index][help] */
 409 {
 410         vm_avl_key_t key = new_node->vm_avl_key;
 411         struct vm_area_struct ** nodeplace = ptree;
 412         struct vm_area_struct ** stack[avl_maxheight];
 413         int stack_count = 0;
 414         struct vm_area_struct *** stack_ptr = &stack[0]; /* = &stack[stackcount] */
 415         for (;;) {
 416                 struct vm_area_struct * node = *nodeplace;
 417                 if (node == avl_empty)
 418                         break;
 419                 *stack_ptr++ = nodeplace; stack_count++;
 420                 if (key < node->vm_avl_key)
 421                         nodeplace = &node->vm_avl_left;
 422                 else
 423                         nodeplace = &node->vm_avl_right;
 424         }
 425         new_node->vm_avl_left = avl_empty;
 426         new_node->vm_avl_right = avl_empty;
 427         new_node->vm_avl_height = 1;
 428         *nodeplace = new_node;
 429         avl_rebalance(stack_ptr,stack_count);
 430 }
 431 
 432 /* Insert a node into a tree, and
 433  * return the node to the left of it and the node to the right of it.
 434  */
 435 static void avl_insert_neighbours (struct vm_area_struct * new_node, struct vm_area_struct ** ptree,
     /* [previous][next][first][last][top][bottom][index][help] */
 436         struct vm_area_struct ** to_the_left, struct vm_area_struct ** to_the_right)
 437 {
 438         vm_avl_key_t key = new_node->vm_avl_key;
 439         struct vm_area_struct ** nodeplace = ptree;
 440         struct vm_area_struct ** stack[avl_maxheight];
 441         int stack_count = 0;
 442         struct vm_area_struct *** stack_ptr = &stack[0]; /* = &stack[stackcount] */
 443         *to_the_left = *to_the_right = NULL;
 444         for (;;) {
 445                 struct vm_area_struct * node = *nodeplace;
 446                 if (node == avl_empty)
 447                         break;
 448                 *stack_ptr++ = nodeplace; stack_count++;
 449                 if (key < node->vm_avl_key) {
 450                         *to_the_right = node;
 451                         nodeplace = &node->vm_avl_left;
 452                 } else {
 453                         *to_the_left = node;
 454                         nodeplace = &node->vm_avl_right;
 455                 }
 456         }
 457         new_node->vm_avl_left = avl_empty;
 458         new_node->vm_avl_right = avl_empty;
 459         new_node->vm_avl_height = 1;
 460         *nodeplace = new_node;
 461         avl_rebalance(stack_ptr,stack_count);
 462 }
 463 
 464 /* Removes a node out of a tree. */
 465 static void avl_remove (struct vm_area_struct * node_to_delete, struct vm_area_struct ** ptree)
     /* [previous][next][first][last][top][bottom][index][help] */
 466 {
 467         vm_avl_key_t key = node_to_delete->vm_avl_key;
 468         struct vm_area_struct ** nodeplace = ptree;
 469         struct vm_area_struct ** stack[avl_maxheight];
 470         int stack_count = 0;
 471         struct vm_area_struct *** stack_ptr = &stack[0]; /* = &stack[stackcount] */
 472         struct vm_area_struct ** nodeplace_to_delete;
 473         for (;;) {
 474                 struct vm_area_struct * node = *nodeplace;
 475                 if (node == avl_empty) {
 476                         /* what? node_to_delete not found in tree? */
 477                         printk("avl_remove: node to delete not found in tree\n");
 478                         return;
 479                 }
 480                 *stack_ptr++ = nodeplace; stack_count++;
 481                 if (key == node->vm_avl_key)
 482                         break;
 483                 if (key < node->vm_avl_key)
 484                         nodeplace = &node->vm_avl_left;
 485                 else
 486                         nodeplace = &node->vm_avl_right;
 487         }
 488         nodeplace_to_delete = nodeplace;
 489         /* Have to remove node_to_delete = *nodeplace_to_delete. */
 490         if (node_to_delete->vm_avl_left == avl_empty) {
 491                 *nodeplace_to_delete = node_to_delete->vm_avl_right;
 492                 stack_ptr--; stack_count--;
 493         } else {
 494                 struct vm_area_struct *** stack_ptr_to_delete = stack_ptr;
 495                 struct vm_area_struct ** nodeplace = &node_to_delete->vm_avl_left;
 496                 struct vm_area_struct * node;
 497                 for (;;) {
 498                         node = *nodeplace;
 499                         if (node->vm_avl_right == avl_empty)
 500                                 break;
 501                         *stack_ptr++ = nodeplace; stack_count++;
 502                         nodeplace = &node->vm_avl_right;
 503                 }
 504                 *nodeplace = node->vm_avl_left;
 505                 /* node replaces node_to_delete */
 506                 node->vm_avl_left = node_to_delete->vm_avl_left;
 507                 node->vm_avl_right = node_to_delete->vm_avl_right;
 508                 node->vm_avl_height = node_to_delete->vm_avl_height;
 509                 *nodeplace_to_delete = node; /* replace node_to_delete */
 510                 *stack_ptr_to_delete = &node->vm_avl_left; /* replace &node_to_delete->vm_avl_left */
 511         }
 512         avl_rebalance(stack_ptr,stack_count);
 513 }
 514 
 515 #ifdef DEBUG_AVL
 516 
 517 /* print a list */
 518 static void printk_list (struct vm_area_struct * vma)
     /* [previous][next][first][last][top][bottom][index][help] */
 519 {
 520         printk("[");
 521         while (vma) {
 522                 printk("%08lX-%08lX", vma->vm_start, vma->vm_end);
 523                 vma = vma->vm_next;
 524                 if (!vma)
 525                         break;
 526                 printk(" ");
 527         }
 528         printk("]");
 529 }
 530 
 531 /* print a tree */
 532 static void printk_avl (struct vm_area_struct * tree)
     /* [previous][next][first][last][top][bottom][index][help] */
 533 {
 534         if (tree != avl_empty) {
 535                 printk("(");
 536                 if (tree->vm_avl_left != avl_empty) {
 537                         printk_avl(tree->vm_avl_left);
 538                         printk("<");
 539                 }
 540                 printk("%08lX-%08lX", tree->vm_start, tree->vm_end);
 541                 if (tree->vm_avl_right != avl_empty) {
 542                         printk(">");
 543                         printk_avl(tree->vm_avl_right);
 544                 }
 545                 printk(")");
 546         }
 547 }
 548 
 549 static char *avl_check_point = "somewhere";
 550 
 551 /* check a tree's consistency and balancing */
 552 static void avl_checkheights (struct vm_area_struct * tree)
     /* [previous][next][first][last][top][bottom][index][help] */
 553 {
 554         int h, hl, hr;
 555 
 556         if (tree == avl_empty)
 557                 return;
 558         avl_checkheights(tree->vm_avl_left);
 559         avl_checkheights(tree->vm_avl_right);
 560         h = tree->vm_avl_height;
 561         hl = heightof(tree->vm_avl_left);
 562         hr = heightof(tree->vm_avl_right);
 563         if ((h == hl+1) && (hr <= hl) && (hl <= hr+1))
 564                 return;
 565         if ((h == hr+1) && (hl <= hr) && (hr <= hl+1))
 566                 return;
 567         printk("%s: avl_checkheights: heights inconsistent\n",avl_check_point);
 568 }
 569 
 570 /* check that all values stored in a tree are < key */
 571 static void avl_checkleft (struct vm_area_struct * tree, vm_avl_key_t key)
     /* [previous][next][first][last][top][bottom][index][help] */
 572 {
 573         if (tree == avl_empty)
 574                 return;
 575         avl_checkleft(tree->vm_avl_left,key);
 576         avl_checkleft(tree->vm_avl_right,key);
 577         if (tree->vm_avl_key < key)
 578                 return;
 579         printk("%s: avl_checkleft: left key %lu >= top key %lu\n",avl_check_point,tree->vm_avl_key,key);
 580 }
 581 
 582 /* check that all values stored in a tree are > key */
 583 static void avl_checkright (struct vm_area_struct * tree, vm_avl_key_t key)
     /* [previous][next][first][last][top][bottom][index][help] */
 584 {
 585         if (tree == avl_empty)
 586                 return;
 587         avl_checkright(tree->vm_avl_left,key);
 588         avl_checkright(tree->vm_avl_right,key);
 589         if (tree->vm_avl_key > key)
 590                 return;
 591         printk("%s: avl_checkright: right key %lu <= top key %lu\n",avl_check_point,tree->vm_avl_key,key);
 592 }
 593 
 594 /* check that all values are properly increasing */
 595 static void avl_checkorder (struct vm_area_struct * tree)
     /* [previous][next][first][last][top][bottom][index][help] */
 596 {
 597         if (tree == avl_empty)
 598                 return;
 599         avl_checkorder(tree->vm_avl_left);
 600         avl_checkorder(tree->vm_avl_right);
 601         avl_checkleft(tree->vm_avl_left,tree->vm_avl_key);
 602         avl_checkright(tree->vm_avl_right,tree->vm_avl_key);
 603 }
 604 
 605 /* all checks */
 606 static void avl_check (struct task_struct * task, char *caller)
     /* [previous][next][first][last][top][bottom][index][help] */
 607 {
 608         avl_check_point = caller;
 609 /*      printk("task \"%s\", %s\n",task->comm,caller); */
 610 /*      printk("task \"%s\" list: ",task->comm); printk_list(task->mm->mmap); printk("\n"); */
 611 /*      printk("task \"%s\" tree: ",task->comm); printk_avl(task->mm->mmap_avl); printk("\n"); */
 612         avl_checkheights(task->mm->mmap_avl);
 613         avl_checkorder(task->mm->mmap_avl);
 614 }
 615 
 616 #endif
 617 
 618 
 619 /*
 620  * Normal function to fix up a mapping
 621  * This function is the default for when an area has no specific
 622  * function.  This may be used as part of a more specific routine.
 623  * This function works out what part of an area is affected and
 624  * adjusts the mapping information.  Since the actual page
 625  * manipulation is done in do_mmap(), none need be done here,
 626  * though it would probably be more appropriate.
 627  *
 628  * By the time this function is called, the area struct has been
 629  * removed from the process mapping list, so it needs to be
 630  * reinserted if necessary.
 631  *
 632  * The 4 main cases are:
 633  *    Unmapping the whole area
 634  *    Unmapping from the start of the segment to a point in it
 635  *    Unmapping from an intermediate point to the end
 636  *    Unmapping between to intermediate points, making a hole.
 637  *
 638  * Case 4 involves the creation of 2 new areas, for each side of
 639  * the hole.
 640  */
 641 void unmap_fixup(struct vm_area_struct *area,
     /* [previous][next][first][last][top][bottom][index][help] */
 642                  unsigned long addr, size_t len)
 643 {
 644         struct vm_area_struct *mpnt;
 645         unsigned long end = addr + len;
 646 
 647         if (addr < area->vm_start || addr >= area->vm_end ||
 648             end <= area->vm_start || end > area->vm_end ||
 649             end < addr)
 650         {
 651                 printk("unmap_fixup: area=%lx-%lx, unmap %lx-%lx!!\n",
 652                        area->vm_start, area->vm_end, addr, end);
 653                 return;
 654         }
 655 
 656         /* Unmapping the whole area */
 657         if (addr == area->vm_start && end == area->vm_end) {
 658                 if (area->vm_ops && area->vm_ops->close)
 659                         area->vm_ops->close(area);
 660                 if (area->vm_inode)
 661                         iput(area->vm_inode);
 662                 return;
 663         }
 664 
 665         /* Work out to one of the ends */
 666         if (end == area->vm_end)
 667                 area->vm_end = addr;
 668         else
 669         if (addr == area->vm_start) {
 670                 area->vm_offset += (end - area->vm_start);
 671                 area->vm_start = end;
 672         }
 673         else {
 674         /* Unmapping a hole: area->vm_start < addr <= end < area->vm_end */
 675                 /* Add end mapping -- leave beginning for below */
 676                 mpnt = (struct vm_area_struct *)kmalloc(sizeof(*mpnt), GFP_KERNEL);
 677 
 678                 if (!mpnt)
 679                         return;
 680                 *mpnt = *area;
 681                 mpnt->vm_offset += (end - area->vm_start);
 682                 mpnt->vm_start = end;
 683                 if (mpnt->vm_inode)
 684                         mpnt->vm_inode->i_count++;
 685                 if (mpnt->vm_ops && mpnt->vm_ops->open)
 686                         mpnt->vm_ops->open(mpnt);
 687                 area->vm_end = addr;    /* Truncate area */
 688                 insert_vm_struct(current, mpnt);
 689         }
 690 
 691         /* construct whatever mapping is needed */
 692         mpnt = (struct vm_area_struct *)kmalloc(sizeof(*mpnt), GFP_KERNEL);
 693         if (!mpnt)
 694                 return;
 695         *mpnt = *area;
 696         if (mpnt->vm_ops && mpnt->vm_ops->open)
 697                 mpnt->vm_ops->open(mpnt);
 698         if (area->vm_ops && area->vm_ops->close) {
 699                 area->vm_end = area->vm_start;
 700                 area->vm_ops->close(area);
 701         }
 702         insert_vm_struct(current, mpnt);
 703 }
 704 
 705 asmlinkage int sys_munmap(unsigned long addr, size_t len)
     /* [previous][next][first][last][top][bottom][index][help] */
 706 {
 707         return do_munmap(addr, len);
 708 }
 709 
 710 /*
 711  * Munmap is split into 2 main parts -- this part which finds
 712  * what needs doing, and the areas themselves, which do the
 713  * work.  This now handles partial unmappings.
 714  * Jeremy Fitzhardine <jeremy@sw.oz.au>
 715  */
 716 int do_munmap(unsigned long addr, size_t len)
     /* [previous][next][first][last][top][bottom][index][help] */
 717 {
 718         struct vm_area_struct *mpnt, *prev, *next, **npp, *free;
 719 
 720         if ((addr & ~PAGE_MASK) || addr > TASK_SIZE || len > TASK_SIZE-addr)
 721                 return -EINVAL;
 722 
 723         if ((len = PAGE_ALIGN(len)) == 0)
 724                 return 0;
 725 
 726         /*
 727          * Check if this memory area is ok - put it on the temporary
 728          * list if so..  The checks here are pretty simple --
 729          * every area affected in some way (by any overlap) is put
 730          * on the list.  If nothing is put on, nothing is affected.
 731          */
 732         mpnt = find_vma(current, addr);
 733         if (!mpnt)
 734                 return 0;
 735         avl_neighbours(mpnt, current->mm->mmap_avl, &prev, &next);
 736         /* we have  prev->vm_next == mpnt && mpnt->vm_next = next */
 737         /* and  addr < mpnt->vm_end  */
 738 
 739         npp = (prev ? &prev->vm_next : &current->mm->mmap);
 740         free = NULL;
 741         for ( ; mpnt && mpnt->vm_start < addr+len; mpnt = *npp) {
 742                 *npp = mpnt->vm_next;
 743                 mpnt->vm_next = free;
 744                 free = mpnt;
 745                 avl_remove(mpnt, &current->mm->mmap_avl);
 746         }
 747 
 748         if (free == NULL)
 749                 return 0;
 750 
 751         /*
 752          * Ok - we have the memory areas we should free on the 'free' list,
 753          * so release them, and unmap the page range..
 754          * If the one of the segments is only being partially unmapped,
 755          * it will put new vm_area_struct(s) into the address space.
 756          */
 757         while (free) {
 758                 unsigned long st, end;
 759 
 760                 mpnt = free;
 761                 free = free->vm_next;
 762 
 763                 remove_shared_vm_struct(mpnt);
 764 
 765                 st = addr < mpnt->vm_start ? mpnt->vm_start : addr;
 766                 end = addr+len;
 767                 end = end > mpnt->vm_end ? mpnt->vm_end : end;
 768 
 769                 if (mpnt->vm_ops && mpnt->vm_ops->unmap)
 770                         mpnt->vm_ops->unmap(mpnt, st, end-st);
 771 
 772                 unmap_fixup(mpnt, st, end-st);
 773                 kfree(mpnt);
 774         }
 775 
 776         unmap_page_range(addr, len);
 777         return 0;
 778 }
 779 
 780 /* Build the AVL tree corresponding to the VMA list. */
 781 void build_mmap_avl(struct mm_struct * mm)
     /* [previous][next][first][last][top][bottom][index][help] */
 782 {
 783         struct vm_area_struct * vma;
 784 
 785         mm->mmap_avl = NULL;
 786         for (vma = mm->mmap; vma; vma = vma->vm_next)
 787                 avl_insert(vma, &mm->mmap_avl);
 788 }
 789 
 790 /* Release all mmaps. */
 791 void exit_mmap(struct mm_struct * mm)
     /* [previous][next][first][last][top][bottom][index][help] */
 792 {
 793         struct vm_area_struct * mpnt;
 794 
 795         mpnt = mm->mmap;
 796         mm->mmap = NULL;
 797         mm->mmap_avl = NULL;
 798         while (mpnt) {
 799                 struct vm_area_struct * next = mpnt->vm_next;
 800                 if (mpnt->vm_ops && mpnt->vm_ops->close)
 801                         mpnt->vm_ops->close(mpnt);
 802                 remove_shared_vm_struct(mpnt);
 803                 if (mpnt->vm_inode)
 804                         iput(mpnt->vm_inode);
 805                 zap_page_range(mm, mpnt->vm_start, mpnt->vm_end-mpnt->vm_start);
 806                 kfree(mpnt);
 807                 mpnt = next;
 808         }
 809 }
 810 
 811 /*
 812  * Insert vm structure into process list sorted by address
 813  * and into the inode's i_mmap ring.
 814  */
 815 void insert_vm_struct(struct task_struct *t, struct vm_area_struct *vmp)
     /* [previous][next][first][last][top][bottom][index][help] */
 816 {
 817         struct vm_area_struct *share;
 818         struct inode * inode;
 819 
 820 #if 0 /* equivalent, but slow */
 821         struct vm_area_struct **p, *mpnt;
 822 
 823         p = &t->mm->mmap;
 824         while ((mpnt = *p) != NULL) {
 825                 if (mpnt->vm_start > vmp->vm_start)
 826                         break;
 827                 if (mpnt->vm_end > vmp->vm_start)
 828                         printk("insert_vm_struct: overlapping memory areas\n");
 829                 p = &mpnt->vm_next;
 830         }
 831         vmp->vm_next = mpnt;
 832         *p = vmp;
 833 #else
 834         struct vm_area_struct * prev, * next;
 835 
 836         avl_insert_neighbours(vmp, &t->mm->mmap_avl, &prev, &next);
 837         if ((prev ? prev->vm_next : t->mm->mmap) != next)
 838                 printk("insert_vm_struct: tree inconsistent with list\n");
 839         if (prev)
 840                 prev->vm_next = vmp;
 841         else
 842                 t->mm->mmap = vmp;
 843         vmp->vm_next = next;
 844 #endif
 845 
 846         inode = vmp->vm_inode;
 847         if (!inode)
 848                 return;
 849 
 850         /* insert vmp into inode's circular share list */
 851         if ((share = inode->i_mmap)) {
 852                 vmp->vm_next_share = share->vm_next_share;
 853                 vmp->vm_next_share->vm_prev_share = vmp;
 854                 share->vm_next_share = vmp;
 855                 vmp->vm_prev_share = share;
 856         } else
 857                 inode->i_mmap = vmp->vm_next_share = vmp->vm_prev_share = vmp;
 858 }
 859 
 860 /*
 861  * Remove one vm structure from the inode's i_mmap ring.
 862  */
 863 void remove_shared_vm_struct(struct vm_area_struct *mpnt)
     /* [previous][next][first][last][top][bottom][index][help] */
 864 {
 865         struct inode * inode = mpnt->vm_inode;
 866 
 867         if (!inode)
 868                 return;
 869 
 870         if (mpnt->vm_next_share == mpnt) {
 871                 if (inode->i_mmap != mpnt)
 872                         printk("Inode i_mmap ring corrupted\n");
 873                 inode->i_mmap = NULL;
 874                 return;
 875         }
 876 
 877         if (inode->i_mmap == mpnt)
 878                 inode->i_mmap = mpnt->vm_next_share;
 879 
 880         mpnt->vm_prev_share->vm_next_share = mpnt->vm_next_share;
 881         mpnt->vm_next_share->vm_prev_share = mpnt->vm_prev_share;
 882 }
 883 
 884 /*
 885  * Merge the list of memory segments if possible.
 886  * Redundant vm_area_structs are freed.
 887  * This assumes that the list is ordered by address.
 888  * We don't need to traverse the entire list, only those segments
 889  * which intersect or are adjacent to a given interval.
 890  */
 891 void merge_segments (struct task_struct * task, unsigned long start_addr, unsigned long end_addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 892 {
 893         struct vm_area_struct *prev, *mpnt, *next;
 894 
 895         mpnt = find_vma(task, start_addr);
 896         if (!mpnt)
 897                 return;
 898         avl_neighbours(mpnt, task->mm->mmap_avl, &prev, &next);
 899         /* we have  prev->vm_next == mpnt && mpnt->vm_next = next */
 900 
 901         if (!prev) {
 902                 prev = mpnt;
 903                 mpnt = next;
 904         }
 905 
 906         /* prev and mpnt cycle through the list, as long as
 907          * start_addr < mpnt->vm_end && prev->vm_start < end_addr
 908          */
 909         for ( ; mpnt && prev->vm_start < end_addr ; prev = mpnt, mpnt = next) {
 910 #if 0
 911                 printk("looping in merge_segments, mpnt=0x%lX\n", (unsigned long) mpnt);
 912 #endif
 913 
 914                 next = mpnt->vm_next;
 915 
 916                 /*
 917                  * To share, we must have the same inode, operations.. 
 918                  */
 919                 if (mpnt->vm_inode != prev->vm_inode)
 920                         continue;
 921                 if (mpnt->vm_pte != prev->vm_pte)
 922                         continue;
 923                 if (mpnt->vm_ops != prev->vm_ops)
 924                         continue;
 925                 if (mpnt->vm_flags != prev->vm_flags)
 926                         continue;
 927                 if (prev->vm_end != mpnt->vm_start)
 928                         continue;
 929                 /*
 930                  * and if we have an inode, the offsets must be contiguous..
 931                  */
 932                 if ((mpnt->vm_inode != NULL) || (mpnt->vm_flags & VM_SHM)) {
 933                         if (prev->vm_offset + prev->vm_end - prev->vm_start != mpnt->vm_offset)
 934                                 continue;
 935                 }
 936 
 937                 /*
 938                  * merge prev with mpnt and set up pointers so the new
 939                  * big segment can possibly merge with the next one.
 940                  * The old unused mpnt is freed.
 941                  */
 942                 avl_remove(mpnt, &task->mm->mmap_avl);
 943                 prev->vm_end = mpnt->vm_end;
 944                 prev->vm_next = mpnt->vm_next;
 945                 if (mpnt->vm_ops && mpnt->vm_ops->close) {
 946                         mpnt->vm_offset += mpnt->vm_end - mpnt->vm_start;
 947                         mpnt->vm_start = mpnt->vm_end;
 948                         mpnt->vm_ops->close(mpnt);
 949                 }
 950                 remove_shared_vm_struct(mpnt);
 951                 if (mpnt->vm_inode)
 952                         mpnt->vm_inode->i_count--;
 953                 kfree_s(mpnt, sizeof(*mpnt));
 954                 mpnt = prev;
 955         }
 956 }
 957 
 958 /*
 959  * Map memory not associated with any file into a process
 960  * address space.  Adjacent memory is merged.
 961  */
 962 static int anon_map(struct inode *ino, struct file * file, struct vm_area_struct * vma)
     /* [previous][next][first][last][top][bottom][index][help] */
 963 {
 964         if (zeromap_page_range(vma->vm_start, vma->vm_end - vma->vm_start, vma->vm_page_prot))
 965                 return -ENOMEM;
 966         return 0;
 967 }

/* [previous][next][first][last][top][bottom][index][help] */