root/mm/memory.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. oom
  2. free_one_table
  3. clear_page_tables
  4. free_page_tables
  5. clone_page_tables
  6. copy_page_tables
  7. unmap_page_range
  8. zeromap_page_range
  9. remap_page_range
  10. put_page
  11. put_dirty_page
  12. do_wp_page
  13. verify_area
  14. get_empty_page
  15. try_to_share
  16. share_page
  17. get_empty_pgtable
  18. do_swap_page
  19. do_no_page

   1 /*
   2  *  linux/mm/memory.c
   3  *
   4  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   5  */
   6 
   7 /*
   8  * demand-loading started 01.12.91 - seems it is high on the list of
   9  * things wanted, and it should be easy to implement. - Linus
  10  */
  11 
  12 /*
  13  * Ok, demand-loading was easy, shared pages a little bit tricker. Shared
  14  * pages started 02.12.91, seems to work. - Linus.
  15  *
  16  * Tested sharing by executing about 30 /bin/sh: under the old kernel it
  17  * would have taken more than the 6M I have free, but it worked well as
  18  * far as I could see.
  19  *
  20  * Also corrected some "invalidate()"s - I wasn't doing enough of them.
  21  */
  22 
  23 /*
  24  * Real VM (paging to/from disk) started 18.12.91. Much more work and
  25  * thought has to go into this. Oh, well..
  26  * 19.12.91  -  works, somewhat. Sometimes I get faults, don't know why.
  27  *              Found it. Everything seems to work now.
  28  * 20.12.91  -  Ok, making the swap-device changeable like the root.
  29  */
  30 
  31 /*
  32  * 05.04.94  -  Multi-page memory management added for v1.1.
  33  *              Idea by Alex Bligh (alex@cconcepts.co.uk)
  34  */
  35 
  36 #include <linux/config.h>
  37 #include <linux/signal.h>
  38 #include <linux/sched.h>
  39 #include <linux/head.h>
  40 #include <linux/kernel.h>
  41 #include <linux/errno.h>
  42 #include <linux/string.h>
  43 #include <linux/types.h>
  44 #include <linux/ptrace.h>
  45 #include <linux/mman.h>
  46 
  47 #include <asm/system.h>
  48 #include <asm/segment.h>
  49 
  50 unsigned long high_memory = 0;
  51 
  52 /*
  53  * The free_area_list arrays point to the queue heads of the free areas
  54  * of different sizes
  55  */
  56 int nr_swap_pages = 0;
  57 int nr_free_pages = 0;
  58 struct mem_list free_area_list[NR_MEM_LISTS];
  59 unsigned char * free_area_map[NR_MEM_LISTS];
  60 
  61 #define copy_page(from,to) memcpy((void *) to, (void *) from, PAGE_SIZE)
  62 
  63 unsigned short * mem_map = NULL;
  64 
  65 #define CODE_SPACE(addr,p) ((addr) < (p)->end_code)
  66 
  67 /*
  68  * oom() prints a message (so that the user knows why the process died),
  69  * and gives the process an untrappable SIGKILL.
  70  */
  71 void oom(struct task_struct * task)
     /* [previous][next][first][last][top][bottom][index][help] */
  72 {
  73         printk("\nOut of memory for %s.\n", current->comm);
  74         task->sigaction[SIGKILL-1].sa_handler = NULL;
  75         task->blocked &= ~(1<<(SIGKILL-1));
  76         send_sig(SIGKILL,task,1);
  77 }
  78 
  79 static void free_one_table(unsigned long * page_dir)
     /* [previous][next][first][last][top][bottom][index][help] */
  80 {
  81         int j;
  82         unsigned long pg_table = *page_dir;
  83         unsigned long * page_table;
  84 
  85         if (!pg_table)
  86                 return;
  87         *page_dir = 0;
  88         if (pg_table >= high_memory || !(pg_table & PAGE_PRESENT)) {
  89                 printk("Bad page table: [%p]=%08lx\n",page_dir,pg_table);
  90                 return;
  91         }
  92         if (mem_map[MAP_NR(pg_table)] & MAP_PAGE_RESERVED)
  93                 return;
  94         page_table = (unsigned long *) (pg_table & PAGE_MASK);
  95         for (j = 0 ; j < PTRS_PER_PAGE ; j++,page_table++) {
  96                 unsigned long pg = *page_table;
  97                 
  98                 if (!pg)
  99                         continue;
 100                 *page_table = 0;
 101                 if (pg & PAGE_PRESENT)
 102                         free_page(PAGE_MASK & pg);
 103                 else
 104                         swap_free(pg);
 105         }
 106         free_page(PAGE_MASK & pg_table);
 107 }
 108 
 109 /*
 110  * This function clears all user-level page tables of a process - this
 111  * is needed by execve(), so that old pages aren't in the way. Note that
 112  * unlike 'free_page_tables()', this function still leaves a valid
 113  * page-table-tree in memory: it just removes the user pages. The two
 114  * functions are similar, but there is a fundamental difference.
 115  */
 116 void clear_page_tables(struct task_struct * tsk)
     /* [previous][next][first][last][top][bottom][index][help] */
 117 {
 118         int i;
 119         unsigned long * page_dir;
 120 
 121         if (!tsk)
 122                 return;
 123         if (tsk == task[0])
 124                 panic("task[0] (swapper) doesn't support exec()\n");
 125         page_dir = PAGE_DIR_OFFSET(tsk, 0);
 126         if (!page_dir || page_dir == swapper_pg_dir) {
 127                 printk("Trying to clear kernel page-directory: not good\n");
 128                 return;
 129         }
 130         if (mem_map[MAP_NR((unsigned long) page_dir)] > 1) {
 131                 unsigned long * new_pg;
 132 
 133                 if (!(new_pg = (unsigned long*) get_free_page(GFP_KERNEL))) {
 134                         oom(tsk);
 135                         return;
 136                 }
 137                 for (i = 768 ; i < 1024 ; i++)
 138                         new_pg[i] = page_dir[i];
 139                 free_page((unsigned long) page_dir);
 140                 SET_PAGE_DIR(tsk, new_pg);
 141                 return;
 142         }
 143         for (i = 0 ; i < 768 ; i++,page_dir++)
 144                 free_one_table(page_dir);
 145         invalidate();
 146         return;
 147 }
 148 
 149 /*
 150  * This function frees up all page tables of a process when it exits.
 151  */
 152 void free_page_tables(struct task_struct * tsk)
     /* [previous][next][first][last][top][bottom][index][help] */
 153 {
 154         int i;
 155         unsigned long * page_dir;
 156 
 157         if (!tsk)
 158                 return;
 159         if (tsk == task[0]) {
 160                 printk("task[0] (swapper) killed: unable to recover\n");
 161                 panic("Trying to free up swapper memory space");
 162         }
 163         page_dir = PAGE_DIR_OFFSET(tsk, 0);
 164         if (!page_dir || page_dir == swapper_pg_dir) {
 165                 printk("Trying to free kernel page-directory: not good\n");
 166                 return;
 167         }
 168         SET_PAGE_DIR(tsk, swapper_pg_dir);
 169         if (mem_map[MAP_NR((unsigned long) page_dir)] > 1) {
 170                 free_page((unsigned long) page_dir);
 171                 return;
 172         }
 173         for (i = 0 ; i < PTRS_PER_PAGE ; i++)
 174                 free_one_table(page_dir + i);
 175         free_page((unsigned long) page_dir);
 176         invalidate();
 177 }
 178 
 179 /*
 180  * clone_page_tables() clones the page table for a process - both
 181  * processes will have the exact same pages in memory. There are
 182  * probably races in the memory management with cloning, but we'll
 183  * see..
 184  */
 185 int clone_page_tables(struct task_struct * tsk)
     /* [previous][next][first][last][top][bottom][index][help] */
 186 {
 187         unsigned long pg_dir;
 188 
 189         pg_dir = (unsigned long) PAGE_DIR_OFFSET(current, 0);
 190         mem_map[MAP_NR(pg_dir)]++;
 191         SET_PAGE_DIR(tsk, pg_dir);
 192         return 0;
 193 }
 194 
 195 /*
 196  * copy_page_tables() just copies the whole process memory range:
 197  * note the special handling of RESERVED (ie kernel) pages, which
 198  * means that they are always shared by all processes.
 199  */
 200 int copy_page_tables(struct task_struct * tsk)
     /* [previous][next][first][last][top][bottom][index][help] */
 201 {
 202         int i;
 203         unsigned long *old_page_dir;
 204         unsigned long *new_page_dir;
 205 
 206         new_page_dir = (unsigned long *) get_free_page(GFP_KERNEL);
 207         if (!new_page_dir)
 208                 return -ENOMEM;
 209         old_page_dir = PAGE_DIR_OFFSET(current, 0);
 210         SET_PAGE_DIR(tsk, new_page_dir);
 211         for (i = 0 ; i < PTRS_PER_PAGE ; i++,old_page_dir++,new_page_dir++) {
 212                 int j;
 213                 unsigned long old_pg_table, *old_page_table;
 214                 unsigned long new_pg_table, *new_page_table;
 215 
 216                 old_pg_table = *old_page_dir;
 217                 if (!old_pg_table)
 218                         continue;
 219                 if (old_pg_table >= high_memory || !(old_pg_table & PAGE_PRESENT)) {
 220                         printk("copy_page_tables: bad page table: "
 221                                 "probable memory corruption\n");
 222                         *old_page_dir = 0;
 223                         continue;
 224                 }
 225                 if (mem_map[MAP_NR(old_pg_table)] & MAP_PAGE_RESERVED) {
 226                         *new_page_dir = old_pg_table;
 227                         continue;
 228                 }
 229                 if (!(new_pg_table = get_free_page(GFP_KERNEL))) {
 230                         free_page_tables(tsk);
 231                         return -ENOMEM;
 232                 }
 233                 old_page_table = (unsigned long *) (PAGE_MASK & old_pg_table);
 234                 new_page_table = (unsigned long *) (PAGE_MASK & new_pg_table);
 235                 for (j = 0 ; j < PTRS_PER_PAGE ; j++,old_page_table++,new_page_table++) {
 236                         unsigned long pg;
 237                         pg = *old_page_table;
 238                         if (!pg)
 239                                 continue;
 240                         if (!(pg & PAGE_PRESENT)) {
 241                                 *new_page_table = swap_duplicate(pg);
 242                                 continue;
 243                         }
 244                         if (pg > high_memory || (mem_map[MAP_NR(pg)] & MAP_PAGE_RESERVED)) {
 245                                 *new_page_table = pg;
 246                                 continue;
 247                         }
 248                         if (pg & PAGE_COW)
 249                                 pg &= ~PAGE_RW;
 250                         if (delete_from_swap_cache(pg))
 251                                 pg |= PAGE_DIRTY;
 252                         *new_page_table = pg;
 253                         *old_page_table = pg;
 254                         mem_map[MAP_NR(pg)]++;
 255                 }
 256                 *new_page_dir = new_pg_table | PAGE_TABLE;
 257         }
 258         invalidate();
 259         return 0;
 260 }
 261 
 262 /*
 263  * a more complete version of free_page_tables which performs with page
 264  * granularity.
 265  */
 266 int unmap_page_range(unsigned long from, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
 267 {
 268         unsigned long page, page_dir;
 269         unsigned long *page_table, *dir;
 270         unsigned long poff, pcnt, pc;
 271 
 272         if (from & ~PAGE_MASK) {
 273                 printk("unmap_page_range called with wrong alignment\n");
 274                 return -EINVAL;
 275         }
 276         size = (size + ~PAGE_MASK) >> PAGE_SHIFT;
 277         dir = PAGE_DIR_OFFSET(current,from);
 278         poff = (from >> PAGE_SHIFT) & (PTRS_PER_PAGE-1);
 279         if ((pcnt = PTRS_PER_PAGE - poff) > size)
 280                 pcnt = size;
 281 
 282         for ( ; size > 0; ++dir, size -= pcnt,
 283              pcnt = (size > PTRS_PER_PAGE ? PTRS_PER_PAGE : size)) {
 284                 if (!(page_dir = *dir)) {
 285                         poff = 0;
 286                         continue;
 287                 }
 288                 if (!(page_dir & PAGE_PRESENT)) {
 289                         printk("unmap_page_range: bad page directory.");
 290                         continue;
 291                 }
 292                 page_table = (unsigned long *)(PAGE_MASK & page_dir);
 293                 if (poff) {
 294                         page_table += poff;
 295                         poff = 0;
 296                 }
 297                 for (pc = pcnt; pc--; page_table++) {
 298                         if ((page = *page_table) != 0) {
 299                                 *page_table = 0;
 300                                 if (PAGE_PRESENT & page) {
 301                                         if (!(mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED))
 302                                                 if (current->mm->rss > 0)
 303                                                         --current->mm->rss;
 304                                         free_page(PAGE_MASK & page);
 305                                 } else
 306                                         swap_free(page);
 307                         }
 308                 }
 309                 if (pcnt == PTRS_PER_PAGE) {
 310                         *dir = 0;
 311                         free_page(PAGE_MASK & page_dir);
 312                 }
 313         }
 314         invalidate();
 315         return 0;
 316 }
 317 
 318 int zeromap_page_range(unsigned long from, unsigned long size, int mask)
     /* [previous][next][first][last][top][bottom][index][help] */
 319 {
 320         unsigned long *page_table, *dir;
 321         unsigned long poff, pcnt;
 322         unsigned long page;
 323 
 324         if (mask) {
 325                 if ((mask & (PAGE_MASK|PAGE_PRESENT)) != PAGE_PRESENT) {
 326                         printk("zeromap_page_range: mask = %08x\n",mask);
 327                         return -EINVAL;
 328                 }
 329                 mask |= ZERO_PAGE;
 330         }
 331         if (from & ~PAGE_MASK) {
 332                 printk("zeromap_page_range: from = %08lx\n",from);
 333                 return -EINVAL;
 334         }
 335         dir = PAGE_DIR_OFFSET(current,from);
 336         size = (size + ~PAGE_MASK) >> PAGE_SHIFT;
 337         poff = (from >> PAGE_SHIFT) & (PTRS_PER_PAGE-1);
 338         if ((pcnt = PTRS_PER_PAGE - poff) > size)
 339                 pcnt = size;
 340 
 341         while (size > 0) {
 342                 if (!(PAGE_PRESENT & *dir)) {
 343                                 /* clear page needed here?  SRB. */
 344                         if (!(page_table = (unsigned long*) get_free_page(GFP_KERNEL))) {
 345                                 invalidate();
 346                                 return -ENOMEM;
 347                         }
 348                         if (PAGE_PRESENT & *dir) {
 349                                 free_page((unsigned long) page_table);
 350                                 page_table = (unsigned long *)(PAGE_MASK & *dir++);
 351                         } else
 352                                 *dir++ = ((unsigned long) page_table) | PAGE_TABLE;
 353                 } else
 354                         page_table = (unsigned long *)(PAGE_MASK & *dir++);
 355                 page_table += poff;
 356                 poff = 0;
 357                 for (size -= pcnt; pcnt-- ;) {
 358                         if ((page = *page_table) != 0) {
 359                                 *page_table = 0;
 360                                 if (page & PAGE_PRESENT) {
 361                                         if (!(mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED))
 362                                                 if (current->mm->rss > 0)
 363                                                         --current->mm->rss;
 364                                         free_page(PAGE_MASK & page);
 365                                 } else
 366                                         swap_free(page);
 367                         }
 368                         *page_table++ = mask;
 369                 }
 370                 pcnt = (size > PTRS_PER_PAGE ? PTRS_PER_PAGE : size);
 371         }
 372         invalidate();
 373         return 0;
 374 }
 375 
 376 /*
 377  * maps a range of physical memory into the requested pages. the old
 378  * mappings are removed. any references to nonexistent pages results
 379  * in null mappings (currently treated as "copy-on-access")
 380  */
 381 int remap_page_range(unsigned long from, unsigned long to, unsigned long size, int mask)
     /* [previous][next][first][last][top][bottom][index][help] */
 382 {
 383         unsigned long *page_table, *dir;
 384         unsigned long poff, pcnt;
 385         unsigned long page;
 386 
 387         if (mask) {
 388                 if ((mask & (PAGE_MASK|PAGE_PRESENT)) != PAGE_PRESENT) {
 389                         printk("remap_page_range: mask = %08x\n",mask);
 390                         return -EINVAL;
 391                 }
 392         }
 393         if ((from & ~PAGE_MASK) || (to & ~PAGE_MASK)) {
 394                 printk("remap_page_range: from = %08lx, to=%08lx\n",from,to);
 395                 return -EINVAL;
 396         }
 397         dir = PAGE_DIR_OFFSET(current,from);
 398         size = (size + ~PAGE_MASK) >> PAGE_SHIFT;
 399         poff = (from >> PAGE_SHIFT) & (PTRS_PER_PAGE-1);
 400         if ((pcnt = PTRS_PER_PAGE - poff) > size)
 401                 pcnt = size;
 402 
 403         while (size > 0) {
 404                 if (!(PAGE_PRESENT & *dir)) {
 405                         /* clearing page here, needed?  SRB. */
 406                         if (!(page_table = (unsigned long*) get_free_page(GFP_KERNEL))) {
 407                                 invalidate();
 408                                 return -1;
 409                         }
 410                         *dir++ = ((unsigned long) page_table) | PAGE_TABLE;
 411                 }
 412                 else
 413                         page_table = (unsigned long *)(PAGE_MASK & *dir++);
 414                 if (poff) {
 415                         page_table += poff;
 416                         poff = 0;
 417                 }
 418 
 419                 for (size -= pcnt; pcnt-- ;) {
 420                         if ((page = *page_table) != 0) {
 421                                 *page_table = 0;
 422                                 if (PAGE_PRESENT & page) {
 423                                         if (!(mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED))
 424                                                 if (current->mm->rss > 0)
 425                                                         --current->mm->rss;
 426                                         free_page(PAGE_MASK & page);
 427                                 } else
 428                                         swap_free(page);
 429                         }
 430 
 431                         /*
 432                          * the first condition should return an invalid access
 433                          * when the page is referenced. current assumptions
 434                          * cause it to be treated as demand allocation in some
 435                          * cases.
 436                          */
 437                         if (!mask)
 438                                 *page_table++ = 0;      /* not present */
 439                         else if (to >= high_memory)
 440                                 *page_table++ = (to | mask);
 441                         else if (!mem_map[MAP_NR(to)])
 442                                 *page_table++ = 0;      /* not present */
 443                         else {
 444                                 *page_table++ = (to | mask);
 445                                 if (!(mem_map[MAP_NR(to)] & MAP_PAGE_RESERVED)) {
 446                                         ++current->mm->rss;
 447                                         mem_map[MAP_NR(to)]++;
 448                                 }
 449                         }
 450                         to += PAGE_SIZE;
 451                 }
 452                 pcnt = (size > PTRS_PER_PAGE ? PTRS_PER_PAGE : size);
 453         }
 454         invalidate();
 455         return 0;
 456 }
 457 
 458 /*
 459  * This function puts a page in memory at the wanted address.
 460  * It returns the physical address of the page gotten, 0 if
 461  * out of memory (either when trying to access page-table or
 462  * page.)
 463  */
 464 unsigned long put_page(struct task_struct * tsk,unsigned long page,
     /* [previous][next][first][last][top][bottom][index][help] */
 465         unsigned long address,int prot)
 466 {
 467         unsigned long *page_table;
 468 
 469         if ((prot & (PAGE_MASK|PAGE_PRESENT)) != PAGE_PRESENT)
 470                 printk("put_page: prot = %08x\n",prot);
 471         if (page >= high_memory) {
 472                 printk("put_page: trying to put page %08lx at %08lx\n",page,address);
 473                 return 0;
 474         }
 475         page_table = PAGE_DIR_OFFSET(tsk,address);
 476         if ((*page_table) & PAGE_PRESENT)
 477                 page_table = (unsigned long *) (PAGE_MASK & *page_table);
 478         else {
 479                 printk("put_page: bad page directory entry\n");
 480                 oom(tsk);
 481                 *page_table = BAD_PAGETABLE | PAGE_TABLE;
 482                 return 0;
 483         }
 484         page_table += (address >> PAGE_SHIFT) & (PTRS_PER_PAGE-1);
 485         if (*page_table) {
 486                 printk("put_page: page already exists\n");
 487                 *page_table = 0;
 488                 invalidate();
 489         }
 490         *page_table = page | prot;
 491 /* no need for invalidate */
 492         return page;
 493 }
 494 
 495 /*
 496  * The previous function doesn't work very well if you also want to mark
 497  * the page dirty: exec.c wants this, as it has earlier changed the page,
 498  * and we want the dirty-status to be correct (for VM). Thus the same
 499  * routine, but this time we mark it dirty too.
 500  */
 501 unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsigned long address)
     /* [previous][next][first][last][top][bottom][index][help] */
 502 {
 503         unsigned long tmp, *page_table;
 504 
 505         if (page >= high_memory)
 506                 printk("put_dirty_page: trying to put page %08lx at %08lx\n",page,address);
 507         if (mem_map[MAP_NR(page)] != 1)
 508                 printk("mem_map disagrees with %08lx at %08lx\n",page,address);
 509         page_table = PAGE_DIR_OFFSET(tsk,address);
 510         if (PAGE_PRESENT & *page_table)
 511                 page_table = (unsigned long *) (PAGE_MASK & *page_table);
 512         else {
 513                 if (!(tmp = get_free_page(GFP_KERNEL)))
 514                         return 0;
 515                 if (PAGE_PRESENT & *page_table) {
 516                         free_page(tmp);
 517                         page_table = (unsigned long *) (PAGE_MASK & *page_table);
 518                 } else {
 519                         *page_table = tmp | PAGE_TABLE;
 520                         page_table = (unsigned long *) tmp;
 521                 }
 522         }
 523         page_table += (address >> PAGE_SHIFT) & (PTRS_PER_PAGE-1);
 524         if (*page_table) {
 525                 printk("put_dirty_page: page already exists\n");
 526                 *page_table = 0;
 527                 invalidate();
 528         }
 529         *page_table = page | (PAGE_DIRTY | PAGE_PRIVATE);
 530 /* no need for invalidate */
 531         return page;
 532 }
 533 
 534 /*
 535  * This routine handles present pages, when users try to write
 536  * to a shared page. It is done by copying the page to a new address
 537  * and decrementing the shared-page counter for the old page.
 538  *
 539  * Goto-purists beware: the only reason for goto's here is that it results
 540  * in better assembly code.. The "default" path will see no jumps at all.
 541  */
 542 void do_wp_page(struct vm_area_struct * vma, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 543         unsigned long error_code)
 544 {
 545         unsigned long *pde, pte, old_page, prot;
 546         unsigned long new_page;
 547 
 548         new_page = __get_free_page(GFP_KERNEL);
 549         pde = PAGE_DIR_OFFSET(vma->vm_task,address);
 550         pte = *pde;
 551         if (!(pte & PAGE_PRESENT))
 552                 goto end_wp_page;
 553         if ((pte & PAGE_TABLE) != PAGE_TABLE || pte >= high_memory)
 554                 goto bad_wp_pagetable;
 555         pte &= PAGE_MASK;
 556         pte += PAGE_PTR(address);
 557         old_page = *(unsigned long *) pte;
 558         if (!(old_page & PAGE_PRESENT))
 559                 goto end_wp_page;
 560         if (old_page >= high_memory)
 561                 goto bad_wp_page;
 562         if (old_page & PAGE_RW)
 563                 goto end_wp_page;
 564         vma->vm_task->mm->min_flt++;
 565         prot = (old_page & ~PAGE_MASK) | PAGE_RW | PAGE_DIRTY;
 566         old_page &= PAGE_MASK;
 567         if (mem_map[MAP_NR(old_page)] != 1) {
 568                 if (new_page) {
 569                         if (mem_map[MAP_NR(old_page)] & MAP_PAGE_RESERVED)
 570                                 ++vma->vm_task->mm->rss;
 571                         copy_page(old_page,new_page);
 572                         *(unsigned long *) pte = new_page | prot;
 573                         free_page(old_page);
 574                         invalidate();
 575                         return;
 576                 }
 577                 free_page(old_page);
 578                 oom(vma->vm_task);
 579                 *(unsigned long *) pte = BAD_PAGE | prot;
 580                 invalidate();
 581                 return;
 582         }
 583         *(unsigned long *) pte |= PAGE_RW | PAGE_DIRTY;
 584         invalidate();
 585         if (new_page)
 586                 free_page(new_page);
 587         return;
 588 bad_wp_page:
 589         printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page);
 590         *(unsigned long *) pte = BAD_PAGE | PAGE_SHARED;
 591         send_sig(SIGKILL, vma->vm_task, 1);
 592         goto end_wp_page;
 593 bad_wp_pagetable:
 594         printk("do_wp_page: bogus page-table at address %08lx (%08lx)\n",address,pte);
 595         *pde = BAD_PAGETABLE | PAGE_TABLE;
 596         send_sig(SIGKILL, vma->vm_task, 1);
 597 end_wp_page:
 598         if (new_page)
 599                 free_page(new_page);
 600         return;
 601 }
 602 
 603 /*
 604  * Ugly, ugly, but the goto's result in better assembly..
 605  */
 606 int verify_area(int type, const void * addr, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
 607 {
 608         struct vm_area_struct * vma;
 609         unsigned long start = (unsigned long) addr;
 610 
 611         /* If the current user space is mapped to kernel space (for the
 612          * case where we use a fake user buffer with get_fs/set_fs()) we
 613          * don't expect to find the address in the user vm map.
 614          */
 615         if (get_fs() == get_ds())
 616                 return 0;
 617 
 618         for (vma = current->mm->mmap ; ; vma = vma->vm_next) {
 619                 if (!vma)
 620                         goto bad_area;
 621                 if (vma->vm_end > start)
 622                         break;
 623         }
 624         if (vma->vm_start <= start)
 625                 goto good_area;
 626         if (!(vma->vm_flags & VM_GROWSDOWN))
 627                 goto bad_area;
 628         if (vma->vm_end - start > current->rlim[RLIMIT_STACK].rlim_cur)
 629                 goto bad_area;
 630 
 631 good_area:
 632         if (!wp_works_ok && type == VERIFY_WRITE)
 633                 goto check_wp_fault_by_hand;
 634         for (;;) {
 635                 struct vm_area_struct * next;
 636                 if (!(vma->vm_page_prot & PAGE_USER))
 637                         goto bad_area;
 638                 if (type != VERIFY_READ && !(vma->vm_page_prot & (PAGE_COW | PAGE_RW)))
 639                         goto bad_area;
 640                 if (vma->vm_end - start >= size)
 641                         return 0;
 642                 next = vma->vm_next;
 643                 if (!next || vma->vm_end != next->vm_start)
 644                         goto bad_area;
 645                 vma = next;
 646         }
 647 
 648 check_wp_fault_by_hand:
 649         size--;
 650         size += start & ~PAGE_MASK;
 651         size >>= PAGE_SHIFT;
 652         start &= PAGE_MASK;
 653 
 654         for (;;) {
 655                 if (!(vma->vm_page_prot & (PAGE_COW | PAGE_RW)))
 656                         goto bad_area;
 657                 do_wp_page(vma, start, PAGE_PRESENT);
 658                 if (!size)
 659                         return 0;
 660                 size--;
 661                 start += PAGE_SIZE;
 662                 if (start < vma->vm_end)
 663                         continue;
 664                 vma = vma->vm_next;
 665                 if (!vma || vma->vm_start != start)
 666                         break;
 667         }
 668 
 669 bad_area:
 670         return -EFAULT;
 671 }
 672 
 673 static inline void get_empty_page(struct task_struct * tsk, unsigned long address)
     /* [previous][next][first][last][top][bottom][index][help] */
 674 {
 675         unsigned long tmp;
 676 
 677         if (!(tmp = get_free_page(GFP_KERNEL))) {
 678                 oom(tsk);
 679                 tmp = BAD_PAGE;
 680         }
 681         if (!put_page(tsk,tmp,address,PAGE_PRIVATE))
 682                 free_page(tmp);
 683 }
 684 
 685 /*
 686  * try_to_share() checks the page at address "address" in the task "p",
 687  * to see if it exists, and if it is clean. If so, share it with the current
 688  * task.
 689  *
 690  * NOTE! This assumes we have checked that p != current, and that they
 691  * share the same inode and can generally otherwise be shared.
 692  */
 693 static int try_to_share(unsigned long to_address, struct vm_area_struct * to_area,
     /* [previous][next][first][last][top][bottom][index][help] */
 694         unsigned long from_address, struct vm_area_struct * from_area,
 695         unsigned long newpage)
 696 {
 697         unsigned long from;
 698         unsigned long to;
 699         unsigned long from_page;
 700         unsigned long to_page;
 701 
 702         from_page = (unsigned long)PAGE_DIR_OFFSET(from_area->vm_task,from_address);
 703         to_page = (unsigned long)PAGE_DIR_OFFSET(to_area->vm_task,to_address);
 704 /* is there a page-directory at from? */
 705         from = *(unsigned long *) from_page;
 706         if (!(from & PAGE_PRESENT))
 707                 return 0;
 708         from &= PAGE_MASK;
 709         from_page = from + PAGE_PTR(from_address);
 710         from = *(unsigned long *) from_page;
 711 /* is the page present? */
 712         if (!(from & PAGE_PRESENT))
 713                 return 0;
 714 /* if it is private, it must be clean to be shared */
 715         if (from & PAGE_DIRTY) {
 716                 if (from_area->vm_page_prot & PAGE_COW)
 717                         return 0;
 718                 if (!(from_area->vm_page_prot & PAGE_RW))
 719                         return 0;
 720         }               
 721 /* is the page reasonable at all? */
 722         if (from >= high_memory)
 723                 return 0;
 724         if (mem_map[MAP_NR(from)] & MAP_PAGE_RESERVED)
 725                 return 0;
 726 /* is the destination ok? */
 727         to = *(unsigned long *) to_page;
 728         if (!(to & PAGE_PRESENT))
 729                 return 0;
 730         to &= PAGE_MASK;
 731         to_page = to + PAGE_PTR(to_address);
 732         if (*(unsigned long *) to_page)
 733                 return 0;
 734 /* do we copy? */
 735         if (newpage) {
 736                 if (in_swap_cache(from)) { /* implies PAGE_DIRTY */
 737                         if (from_area->vm_page_prot & PAGE_COW)
 738                                 return 0;
 739                         if (!(from_area->vm_page_prot & PAGE_RW))
 740                                 return 0;
 741                 }
 742                 copy_page((from & PAGE_MASK), newpage);
 743                 *(unsigned long *) to_page = newpage | to_area->vm_page_prot;
 744                 return 1;
 745         }
 746 /* do a final swap-cache test before sharing them.. */
 747         if (in_swap_cache(from)) {
 748                 if (from_area->vm_page_prot & PAGE_COW)
 749                         return 0;
 750                 if (!(from_area->vm_page_prot & PAGE_RW))
 751                         return 0;
 752                 from |= PAGE_DIRTY;
 753                 *(unsigned long *) from_page = from;
 754                 delete_from_swap_cache(from);
 755                 invalidate();
 756         }
 757         mem_map[MAP_NR(from)]++;
 758 /* fill in the 'to' field, checking for COW-stuff */
 759         to = (from & (PAGE_MASK | PAGE_DIRTY)) | to_area->vm_page_prot;
 760         if (to & PAGE_COW)
 761                 to &= ~PAGE_RW;
 762         *(unsigned long *) to_page = to;
 763 /* Check if we need to do anything at all to the 'from' field */
 764         if (!(from & PAGE_RW))
 765                 return 1;
 766         if (!(from_area->vm_page_prot & PAGE_COW))
 767                 return 1;
 768 /* ok, need to mark it read-only, so invalidate any possible old TB entry */
 769         from &= ~PAGE_RW;
 770         *(unsigned long *) from_page = from;
 771         invalidate();
 772         return 1;
 773 }
 774 
 775 /*
 776  * share_page() tries to find a process that could share a page with
 777  * the current one.
 778  *
 779  * We first check if it is at all feasible by checking inode->i_count.
 780  * It should be >1 if there are other tasks sharing this inode.
 781  */
 782 static int share_page(struct vm_area_struct * area, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 783         unsigned long error_code, unsigned long newpage)
 784 {
 785         struct inode * inode;
 786         unsigned long offset;
 787         unsigned long from_address;
 788         unsigned long give_page;
 789         struct vm_area_struct * mpnt;
 790 
 791         if (!area || !(inode = area->vm_inode) || inode->i_count < 2)
 792                 return 0;
 793         /* do we need to copy or can we just share? */
 794         give_page = 0;
 795         if ((area->vm_page_prot & PAGE_COW) && (error_code & PAGE_RW)) {
 796                 if (!newpage)
 797                         return 0;
 798                 give_page = newpage;
 799         }
 800         offset = address - area->vm_start + area->vm_offset;
 801         /* See if there is something in the VM we can share pages with. */
 802         /* Traverse the entire circular i_mmap list, except `area' itself. */
 803         for (mpnt = area->vm_next_share; mpnt != area; mpnt = mpnt->vm_next_share) {
 804                 /* must be same inode */
 805                 if (mpnt->vm_inode != inode) {
 806                         printk("Aiee! Corrupt vm_area_struct i_mmap ring\n");
 807                         break;  
 808                 }
 809                 /* offsets must be mutually page-aligned */
 810                 if ((mpnt->vm_offset ^ area->vm_offset) & ~PAGE_MASK)
 811                         continue;
 812                 /* the other area must actually cover the wanted page.. */
 813                 from_address = offset + mpnt->vm_start - mpnt->vm_offset;
 814                 if (from_address < mpnt->vm_start || from_address >= mpnt->vm_end)
 815                         continue;
 816                 /* .. NOW we can actually try to use the same physical page */
 817                 if (!try_to_share(address, area, from_address, mpnt, give_page))
 818                         continue;
 819                 /* free newpage if we never used it.. */
 820                 if (give_page || !newpage)
 821                         return 1;
 822                 free_page(newpage);
 823                 return 1;
 824         }
 825         return 0;
 826 }
 827 
 828 /*
 829  * fill in an empty page-table if none exists.
 830  */
 831 static inline unsigned long get_empty_pgtable(struct task_struct * tsk,unsigned long address)
     /* [previous][next][first][last][top][bottom][index][help] */
 832 {
 833         unsigned long page;
 834         unsigned long *p;
 835 
 836         p = PAGE_DIR_OFFSET(tsk,address);
 837         if (PAGE_PRESENT & *p)
 838                 return *p;
 839         if (*p) {
 840                 printk("get_empty_pgtable: bad page-directory entry \n");
 841                 *p = 0;
 842         }
 843         page = get_free_page(GFP_KERNEL);
 844         p = PAGE_DIR_OFFSET(tsk,address);
 845         if (PAGE_PRESENT & *p) {
 846                 free_page(page);
 847                 return *p;
 848         }
 849         if (*p) {
 850                 printk("get_empty_pgtable: bad page-directory entry \n");
 851                 *p = 0;
 852         }
 853         if (page) {
 854                 *p = page | PAGE_TABLE;
 855                 return *p;
 856         }
 857         oom(current);
 858         *p = BAD_PAGETABLE | PAGE_TABLE;
 859         return 0;
 860 }
 861 
 862 static inline void do_swap_page(struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 863         unsigned long address, unsigned long * pge, unsigned long entry)
 864 {
 865         unsigned long page;
 866 
 867         if (vma->vm_ops && vma->vm_ops->swapin)
 868                 page = vma->vm_ops->swapin(vma, address - vma->vm_start + vma->vm_offset, entry);
 869         else
 870                 page = swap_in(entry);
 871         if (*pge != entry) {
 872                 free_page(page);
 873                 return;
 874         }
 875         page = page | vma->vm_page_prot;
 876         if (mem_map[MAP_NR(page)] > 1 && (page & PAGE_COW))
 877                 page &= ~PAGE_RW;
 878         ++vma->vm_task->mm->rss;
 879         ++vma->vm_task->mm->maj_flt;
 880         *pge = page;
 881         return;
 882 }
 883 
 884 void do_no_page(struct vm_area_struct * vma, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 885         unsigned long error_code)
 886 {
 887         unsigned long page, entry, prot;
 888 
 889         page = get_empty_pgtable(vma->vm_task,address);
 890         if (!page)
 891                 return;
 892         page &= PAGE_MASK;
 893         page += PAGE_PTR(address);
 894         entry = *(unsigned long *) page;
 895         if (entry & PAGE_PRESENT)
 896                 return;
 897         if (entry) {
 898                 do_swap_page(vma, address, (unsigned long *) page, entry);
 899                 return;
 900         }
 901         address &= PAGE_MASK;
 902 
 903         if (!vma->vm_ops || !vma->vm_ops->nopage) {
 904                 ++vma->vm_task->mm->rss;
 905                 ++vma->vm_task->mm->min_flt;
 906                 get_empty_page(vma->vm_task,address);
 907                 return;
 908         }
 909         page = get_free_page(GFP_KERNEL);
 910         if (share_page(vma, address, error_code, page)) {
 911                 ++vma->vm_task->mm->min_flt;
 912                 ++vma->vm_task->mm->rss;
 913                 return;
 914         }
 915         if (!page) {
 916                 oom(current);
 917                 put_page(vma->vm_task, BAD_PAGE, address, PAGE_PRIVATE);
 918                 return;
 919         }
 920         ++vma->vm_task->mm->maj_flt;
 921         ++vma->vm_task->mm->rss;
 922         prot = vma->vm_page_prot;
 923         /*
 924          * The fourth argument is "no_share", which tells the low-level code
 925          * to copy, not share the page even if sharing is possible.  It's
 926          * essentially an early COW detection ("moo at 5 AM").
 927          */
 928         page = vma->vm_ops->nopage(vma, address, page, (error_code & PAGE_RW) && (prot & PAGE_COW));
 929         if (share_page(vma, address, error_code, 0)) {
 930                 free_page(page);
 931                 return;
 932         }
 933         /*
 934          * This silly early PAGE_DIRTY setting removes a race
 935          * due to the bad i386 page protection.
 936          */
 937         if (error_code & PAGE_RW) {
 938                 prot |= PAGE_DIRTY;     /* can't be COW-shared: see "no_share" above */
 939         } else if ((prot & PAGE_COW) && mem_map[MAP_NR(page)] > 1)
 940                 prot &= ~PAGE_RW;
 941         if (put_page(vma->vm_task, page, address, prot))
 942                 return;
 943         free_page(page);
 944         oom(current);
 945 }

/* [previous][next][first][last][top][bottom][index][help] */