root/mm/memory.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. oom
  2. free_one_table
  3. clear_page_tables
  4. free_page_tables
  5. clone_page_tables
  6. copy_page_tables
  7. unmap_page_range
  8. zeromap_page_range
  9. remap_page_range
  10. put_page
  11. put_dirty_page
  12. __do_wp_page
  13. do_wp_page
  14. verify_area
  15. get_empty_page
  16. try_to_share
  17. share_page
  18. get_empty_pgtable
  19. do_no_page
  20. do_page_fault
  21. __bad_pagetable
  22. __bad_page
  23. __zero_page
  24. show_mem
  25. paging_init
  26. mem_init
  27. si_meminfo

   1 /*
   2  *  linux/mm/memory.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  * demand-loading started 01.12.91 - seems it is high on the list of
   9  * things wanted, and it should be easy to implement. - Linus
  10  */
  11 
  12 /*
  13  * Ok, demand-loading was easy, shared pages a little bit tricker. Shared
  14  * pages started 02.12.91, seems to work. - Linus.
  15  *
  16  * Tested sharing by executing about 30 /bin/sh: under the old kernel it
  17  * would have taken more than the 6M I have free, but it worked well as
  18  * far as I could see.
  19  *
  20  * Also corrected some "invalidate()"s - I wasn't doing enough of them.
  21  */
  22 
  23 /*
  24  * Real VM (paging to/from disk) started 18.12.91. Much more work and
  25  * thought has to go into this. Oh, well..
  26  * 19.12.91  -  works, somewhat. Sometimes I get faults, don't know why.
  27  *              Found it. Everything seems to work now.
  28  * 20.12.91  -  Ok, making the swap-device changeable like the root.
  29  */
  30 
  31 #include <asm/system.h>
  32 
  33 #include <linux/signal.h>
  34 #include <linux/sched.h>
  35 #include <linux/head.h>
  36 #include <linux/kernel.h>
  37 #include <linux/errno.h>
  38 #include <linux/string.h>
  39 #include <linux/types.h>
  40 
  41 unsigned long high_memory = 0;
  42 
  43 extern void sound_mem_init(void);
  44 
  45 int nr_free_pages = 0;
  46 unsigned long free_page_list = 0;
  47 /*
  48  * The secondary free_page_list is used for malloc() etc things that
  49  * may need pages during interrupts etc. Normal get_free_page() operations
  50  * don't touch it, so it stays as a kind of "panic-list", that can be
  51  * accessed when all other mm tricks have failed.
  52  */
  53 int nr_secondary_pages = 0;
  54 unsigned long secondary_page_list = 0;
  55 
  56 #define copy_page(from,to) \
  57 __asm__("cld ; rep ; movsl"::"S" (from),"D" (to),"c" (1024):"cx","di","si")
  58 
  59 unsigned short * mem_map = NULL;
  60 
  61 #define CODE_SPACE(addr,p) ((addr) < (p)->end_code)
  62 
  63 /*
  64  * oom() prints a message (so that the user knows why the process died),
  65  * and gives the process an untrappable SIGSEGV.
  66  */
  67 void oom(struct task_struct * task)
     /* [previous][next][first][last][top][bottom][index][help] */
  68 {
  69         printk("\nout of memory\n");
  70         task->sigaction[SIGKILL-1].sa_handler = NULL;
  71         task->blocked &= ~(1<<(SIGKILL-1));
  72         send_sig(SIGKILL,task,1);
  73 }
  74 
  75 static void free_one_table(unsigned long * page_dir)
     /* [previous][next][first][last][top][bottom][index][help] */
  76 {
  77         int j;
  78         unsigned long pg_table = *page_dir;
  79         unsigned long * page_table;
  80 
  81         if (!pg_table)
  82                 return;
  83         *page_dir = 0;
  84         if (pg_table >= high_memory || !(pg_table & PAGE_PRESENT)) {
  85                 printk("Bad page table: [%08x]=%08x\n",page_dir,pg_table);
  86                 return;
  87         }
  88         if (mem_map[MAP_NR(pg_table)] & MAP_PAGE_RESERVED)
  89                 return;
  90         page_table = (unsigned long *) (pg_table & 0xfffff000);
  91         for (j = 0 ; j < 1024 ; j++,page_table++) {
  92                 unsigned long pg = *page_table;
  93                 
  94                 if (!pg)
  95                         continue;
  96                 *page_table = 0;
  97                 if (pg & PAGE_PRESENT)
  98                         free_page(0xfffff000 & pg);
  99                 else
 100                         swap_free(pg);
 101         }
 102         free_page(0xfffff000 & pg_table);
 103 }
 104 
 105 /*
 106  * This function clears all user-level page tables of a process - this
 107  * is needed by execve(), so that old pages aren't in the way. Note that
 108  * unlike 'free_page_tables()', this function still leaves a valid
 109  * page-table-tree in memory: it just removes the user pages. The two
 110  * functions are similar, but there is a fundamental difference.
 111  */
 112 void clear_page_tables(struct task_struct * tsk)
     /* [previous][next][first][last][top][bottom][index][help] */
 113 {
 114         int i;
 115         unsigned long * page_dir;
 116 
 117         if (!tsk)
 118                 return;
 119         if (tsk == task[0])
 120                 panic("task[0] (swapper) doesn't support exec()\n");
 121         page_dir = (unsigned long *) tsk->tss.cr3;
 122         if (!page_dir || page_dir == swapper_pg_dir) {
 123                 printk("Trying to clear kernel page-directory: not good\n");
 124                 return;
 125         }
 126         for (i = 0 ; i < 768 ; i++,page_dir++)
 127                 free_one_table(page_dir);
 128         invalidate();
 129         return;
 130 }
 131 
 132 /*
 133  * This function frees up all page tables of a process when it exits.
 134  */
 135 void free_page_tables(struct task_struct * tsk)
     /* [previous][next][first][last][top][bottom][index][help] */
 136 {
 137         int i;
 138         unsigned long pg_dir;
 139         unsigned long * page_dir;
 140 
 141         if (!tsk)
 142                 return;
 143         if (tsk == task[0]) {
 144                 printk("task[0] (swapper) killed: unable to recover\n");
 145                 panic("Trying to free up swapper memory space");
 146         }
 147         pg_dir = tsk->tss.cr3;
 148         if (!pg_dir || pg_dir == (unsigned long) swapper_pg_dir) {
 149                 printk("Trying to free kernel page-directory: not good\n");
 150                 return;
 151         }
 152         tsk->tss.cr3 = (unsigned long) swapper_pg_dir;
 153         if (tsk == current)
 154                 __asm__ __volatile__("movl %0,%%cr3"::"a" (tsk->tss.cr3));
 155         if (mem_map[MAP_NR(pg_dir)] > 1) {
 156                 free_page(pg_dir);
 157                 return;
 158         }
 159         page_dir = (unsigned long *) pg_dir;
 160         for (i = 0 ; i < 1024 ; i++,page_dir++)
 161                 free_one_table(page_dir);
 162         free_page(pg_dir);
 163         invalidate();
 164 }
 165 
 166 /*
 167  * clone_page_tables() clones the page table for a process - both
 168  * processes will have the exact same pages in memory. There are
 169  * probably races in the memory management with cloning, but we'll
 170  * see..
 171  */
 172 int clone_page_tables(struct task_struct * tsk)
     /* [previous][next][first][last][top][bottom][index][help] */
 173 {
 174         unsigned long pg_dir;
 175 
 176         pg_dir = current->tss.cr3;
 177         mem_map[MAP_NR(pg_dir)]++;
 178         tsk->tss.cr3 = pg_dir;
 179         return 0;
 180 }
 181 
 182 /*
 183  * copy_page_tables() just copies the whole process memory range:
 184  * note the special handling of RESERVED (ie kernel) pages, which
 185  * means that they are always shared by all processes.
 186  */
 187 int copy_page_tables(struct task_struct * tsk)
     /* [previous][next][first][last][top][bottom][index][help] */
 188 {
 189         int i;
 190         unsigned long old_pg_dir, *old_page_dir;
 191         unsigned long new_pg_dir, *new_page_dir;
 192 
 193         new_pg_dir = get_free_page(GFP_KERNEL);
 194         if (!new_pg_dir)
 195                 return -ENOMEM;
 196         old_pg_dir = current->tss.cr3;
 197         tsk->tss.cr3 = new_pg_dir;
 198         old_page_dir = (unsigned long *) old_pg_dir;
 199         new_page_dir = (unsigned long *) new_pg_dir;
 200         for (i = 0 ; i < 1024 ; i++,old_page_dir++,new_page_dir++) {
 201                 int j;
 202                 unsigned long old_pg_table, *old_page_table;
 203                 unsigned long new_pg_table, *new_page_table;
 204 
 205                 old_pg_table = *old_page_dir;
 206                 if (!old_pg_table)
 207                         continue;
 208                 if (old_pg_table >= high_memory || !(old_pg_table & PAGE_PRESENT)) {
 209                         printk("copy_page_tables: bad page table: "
 210                                 "probable memory corruption");
 211                         *old_page_dir = 0;
 212                         continue;
 213                 }
 214                 if (mem_map[MAP_NR(old_pg_table)] & MAP_PAGE_RESERVED) {
 215                         *new_page_dir = old_pg_table;
 216                         continue;
 217                 }
 218                 new_pg_table = get_free_page(GFP_KERNEL);
 219                 if (!new_pg_table) {
 220                         free_page_tables(tsk);
 221                         return -ENOMEM;
 222                 }
 223                 old_page_table = (unsigned long *) (0xfffff000 & old_pg_table);
 224                 new_page_table = (unsigned long *) (0xfffff000 & new_pg_table);
 225                 for (j = 0 ; j < 1024 ; j++,old_page_table++,new_page_table++) {
 226                         unsigned long pg;
 227                         pg = *old_page_table;
 228                         if (!pg)
 229                                 continue;
 230                         if (!(pg & PAGE_PRESENT)) {
 231                                 *new_page_table = swap_duplicate(pg);
 232                                 continue;
 233                         }
 234                         if ((pg & (PAGE_RW | PAGE_COW)) == (PAGE_RW | PAGE_COW))
 235                                 pg &= ~PAGE_RW;
 236                         *new_page_table = pg;
 237                         if (mem_map[MAP_NR(pg)] & MAP_PAGE_RESERVED)
 238                                 continue;
 239                         *old_page_table = pg;
 240                         mem_map[MAP_NR(pg)]++;
 241                 }
 242                 *new_page_dir = new_pg_table | PAGE_TABLE;
 243         }
 244         invalidate();
 245         return 0;
 246 }
 247 
 248 /*
 249  * a more complete version of free_page_tables which performs with page
 250  * granularity.
 251  */
 252 int unmap_page_range(unsigned long from, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
 253 {
 254         unsigned long page, page_dir;
 255         unsigned long *page_table, *dir;
 256         unsigned long poff, pcnt, pc;
 257 
 258         if (from & 0xfff) {
 259                 printk("unmap_page_range called with wrong alignment\n");
 260                 return -EINVAL;
 261         }
 262         size = (size + 0xfff) >> PAGE_SHIFT;
 263         dir = (unsigned long *) (current->tss.cr3 + ((from >> 20) & 0xffc));
 264         poff = (from >> PAGE_SHIFT) & 0x3ff;
 265         if ((pcnt = 1024 - poff) > size)
 266                 pcnt = size;
 267 
 268         for ( ; size > 0; ++dir, size -= pcnt,
 269              pcnt = (size > 1024 ? 1024 : size)) {
 270                 if (!(page_dir = *dir)) {
 271                         poff = 0;
 272                         continue;
 273                 }
 274                 if (!(page_dir & PAGE_PRESENT)) {
 275                         printk("unmap_page_range: bad page directory.");
 276                         continue;
 277                 }
 278                 page_table = (unsigned long *)(0xfffff000 & page_dir);
 279                 if (poff) {
 280                         page_table += poff;
 281                         poff = 0;
 282                 }
 283                 for (pc = pcnt; pc--; page_table++) {
 284                         if ((page = *page_table) != 0) {
 285                                 *page_table = 0;
 286                                 if (1 & page) {
 287                                         --current->rss;
 288                                         free_page(0xfffff000 & page);
 289                                 } else
 290                                         swap_free(page);
 291                         }
 292                 }
 293                 if (pcnt == 1024) {
 294                         free_page(0xfffff000 & page_dir);
 295                         *dir = 0;
 296                 }
 297         }
 298         invalidate();
 299         return 0;
 300 }
 301 
 302 int zeromap_page_range(unsigned long from, unsigned long size, int mask)
     /* [previous][next][first][last][top][bottom][index][help] */
 303 {
 304         unsigned long *page_table, *dir;
 305         unsigned long poff, pcnt;
 306         unsigned long page;
 307 
 308         if (mask) {
 309                 if ((mask & 0xfffff001) != PAGE_PRESENT) {
 310                         printk("zeromap_page_range: mask = %08x\n",mask);
 311                         return -EINVAL;
 312                 }
 313                 mask |= ZERO_PAGE;
 314         }
 315         if (from & 0xfff) {
 316                 printk("zeromap_page_range: from = %08x\n",from);
 317                 return -EINVAL;
 318         }
 319         dir = (unsigned long *) (current->tss.cr3 + ((from >> 20) & 0xffc));
 320         size = (size + 0xfff) >> PAGE_SHIFT;
 321         poff = (from >> PAGE_SHIFT) & 0x3ff;
 322         if ((pcnt = 1024 - poff) > size)
 323                 pcnt = size;
 324 
 325         while (size > 0) {
 326                 if (!(PAGE_PRESENT & *dir)) {
 327                         if (!(page_table = (unsigned long *)get_free_page(GFP_KERNEL))) {
 328                                 invalidate();
 329                                 return -ENOMEM;
 330                         }
 331                         if (PAGE_PRESENT & *dir) {
 332                                 free_page((unsigned long) page_table);
 333                                 page_table = (unsigned long *)(0xfffff000 & *dir++);
 334                         } else
 335                                 *dir++ = ((unsigned long) page_table) | PAGE_TABLE;
 336                 } else
 337                         page_table = (unsigned long *)(0xfffff000 & *dir++);
 338                 page_table += poff;
 339                 poff = 0;
 340                 for (size -= pcnt; pcnt-- ;) {
 341                         if ((page = *page_table) != 0) {
 342                                 *page_table = 0;
 343                                 if (page & PAGE_PRESENT) {
 344                                         --current->rss;
 345                                         free_page(0xfffff000 & page);
 346                                 } else
 347                                         swap_free(page);
 348                         }
 349                         if (mask)
 350                                 ++current->rss;
 351                         *page_table++ = mask;
 352                 }
 353                 pcnt = (size > 1024 ? 1024 : size);
 354         }
 355         invalidate();
 356         return 0;
 357 }
 358 
 359 /*
 360  * maps a range of physical memory into the requested pages. the old
 361  * mappings are removed. any references to nonexistent pages results
 362  * in null mappings (currently treated as "copy-on-access")
 363  */
 364 int remap_page_range(unsigned long from, unsigned long to, unsigned long size, int mask)
     /* [previous][next][first][last][top][bottom][index][help] */
 365 {
 366         unsigned long *page_table, *dir;
 367         unsigned long poff, pcnt;
 368         unsigned long page;
 369 
 370         if (mask) {
 371                 if ((mask & 0xfffff001) != PAGE_PRESENT) {
 372                         printk("remap_page_range: mask = %08x\n",mask);
 373                         return -EINVAL;
 374                 }
 375         }
 376         if ((from & 0xfff) || (to & 0xfff)) {
 377                 printk("remap_page_range: from = %08x, to=%08x\n",from,to);
 378                 return -EINVAL;
 379         }
 380         dir = (unsigned long *) (current->tss.cr3 + ((from >> 20) & 0xffc));
 381         size = (size + 0xfff) >> PAGE_SHIFT;
 382         poff = (from >> PAGE_SHIFT) & 0x3ff;
 383         if ((pcnt = 1024 - poff) > size)
 384                 pcnt = size;
 385 
 386         while (size > 0) {
 387                 if (!(PAGE_PRESENT & *dir)) {
 388                         if (!(page_table = (unsigned long *)get_free_page(GFP_KERNEL))) {
 389                                 invalidate();
 390                                 return -1;
 391                         }
 392                         *dir++ = ((unsigned long) page_table) | PAGE_TABLE;
 393                 }
 394                 else
 395                         page_table = (unsigned long *)(0xfffff000 & *dir++);
 396                 if (poff) {
 397                         page_table += poff;
 398                         poff = 0;
 399                 }
 400 
 401                 for (size -= pcnt; pcnt-- ;) {
 402                         if ((page = *page_table) != 0) {
 403                                 *page_table = 0;
 404                                 if (PAGE_PRESENT & page) {
 405                                         --current->rss;
 406                                         free_page(0xfffff000 & page);
 407                                 } else
 408                                         swap_free(page);
 409                         }
 410 
 411                         /*
 412                          * i'm not sure of the second cond here. should we
 413                          * report failure?
 414                          * the first condition should return an invalid access
 415                          * when the page is referenced. current assumptions
 416                          * cause it to be treated as demand allocation.
 417                          */
 418                         if (!mask || to >= high_memory || !mem_map[MAP_NR(to)])
 419                                 *page_table++ = 0;      /* not present */
 420                         else {
 421                                 ++current->rss;
 422                                 *page_table++ = (to | mask);
 423                                 if (!(mem_map[MAP_NR(to)] & MAP_PAGE_RESERVED))
 424                                         mem_map[MAP_NR(to)]++;
 425                         }
 426                         to += PAGE_SIZE;
 427                 }
 428                 pcnt = (size > 1024 ? 1024 : size);
 429         }
 430         invalidate();
 431         return 0;
 432 }
 433 
 434 /*
 435  * This function puts a page in memory at the wanted address.
 436  * It returns the physical address of the page gotten, 0 if
 437  * out of memory (either when trying to access page-table or
 438  * page.)
 439  */
 440 static unsigned long put_page(struct task_struct * tsk,unsigned long page,
     /* [previous][next][first][last][top][bottom][index][help] */
 441         unsigned long address,int prot)
 442 {
 443         unsigned long tmp, *page_table;
 444 
 445         if ((prot & 0xfffff001) != PAGE_PRESENT)
 446                 printk("put_page: prot = %08x\n",prot);
 447         if (page >= high_memory) {
 448                 printk("put_page: trying to put page %p at %p\n",page,address);
 449                 return 0;
 450         }
 451         tmp = mem_map[MAP_NR(page)];
 452         if (!(tmp & MAP_PAGE_RESERVED) && (tmp != 1)) {
 453                 printk("put_page: mem_map disagrees with %p at %p\n",page,address);
 454                 return 0;
 455         }
 456         page_table = (unsigned long *) (tsk->tss.cr3 + ((address>>20) & 0xffc));
 457         if ((*page_table) & PAGE_PRESENT)
 458                 page_table = (unsigned long *) (0xfffff000 & *page_table);
 459         else {
 460                 printk("put_page: bad page directory entry\n");
 461                 oom(tsk);
 462                 *page_table = BAD_PAGETABLE | PAGE_TABLE;
 463                 return 0;
 464         }
 465         page_table += ((address & 0x003ff000) >> PAGE_SHIFT);
 466         if (*page_table) {
 467                 printk("put_page: page already exists\n");
 468                 *page_table = 0;
 469                 invalidate();
 470         }
 471         *page_table = page | prot;
 472 /* no need for invalidate */
 473         return page;
 474 }
 475 
 476 /*
 477  * The previous function doesn't work very well if you also want to mark
 478  * the page dirty: exec.c wants this, as it has earlier changed the page,
 479  * and we want the dirty-status to be correct (for VM). Thus the same
 480  * routine, but this time we mark it dirty too.
 481  */
 482 unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsigned long address)
     /* [previous][next][first][last][top][bottom][index][help] */
 483 {
 484         unsigned long tmp, *page_table;
 485 
 486         if (page >= high_memory)
 487                 printk("put_dirty_page: trying to put page %p at %p\n",page,address);
 488         if (mem_map[MAP_NR(page)] != 1)
 489                 printk("mem_map disagrees with %p at %p\n",page,address);
 490         page_table = (unsigned long *) (tsk->tss.cr3 + ((address>>20) & 0xffc));
 491         if (PAGE_PRESENT & *page_table)
 492                 page_table = (unsigned long *) (0xfffff000 & *page_table);
 493         else {
 494                 if (!(tmp=get_free_page(GFP_KERNEL)))
 495                         return 0;
 496                 if (PAGE_PRESENT & *page_table) {
 497                         free_page(tmp);
 498                         page_table = (unsigned long *) (0xfffff000 & *page_table);
 499                 } else {
 500                         *page_table = tmp | PAGE_TABLE;
 501                         page_table = (unsigned long *) tmp;
 502                 }
 503         }
 504         page_table += (address >> PAGE_SHIFT) & 0x3ff;
 505         if (*page_table) {
 506                 printk("put_dirty_page: page already exists\n");
 507                 *page_table = 0;
 508                 invalidate();
 509         }
 510         *page_table = page | (PAGE_DIRTY | PAGE_PRIVATE);
 511 /* no need for invalidate */
 512         return page;
 513 }
 514 
 515 /*
 516  * This routine handles present pages, when users try to write
 517  * to a shared page. It is done by copying the page to a new address
 518  * and decrementing the shared-page counter for the old page.
 519  *
 520  * Note that we do many checks twice (look at do_wp_page()), as
 521  * we have to be careful about race-conditions.
 522  *
 523  * Goto-purists beware: the only reason for goto's here is that it results
 524  * in better assembly code.. The "default" path will see no jumps at all.
 525  */
 526 static void __do_wp_page(unsigned long error_code, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 527         struct task_struct * tsk, unsigned long user_esp)
 528 {
 529         unsigned long pde, pte, old_page, prot;
 530         unsigned long new_page;
 531 
 532         new_page = __get_free_page(GFP_KERNEL);
 533         pde = tsk->tss.cr3 + ((address>>20) & 0xffc);
 534         pte = *(unsigned long *) pde;
 535         if (!(pte & PAGE_PRESENT))
 536                 goto end_wp_page;
 537         if ((pte & PAGE_TABLE) != PAGE_TABLE || pte >= high_memory)
 538                 goto bad_wp_pagetable;
 539         pte &= 0xfffff000;
 540         pte += (address>>10) & 0xffc;
 541         old_page = *(unsigned long *) pte;
 542         if (!(old_page & PAGE_PRESENT))
 543                 goto end_wp_page;
 544         if (old_page >= high_memory)
 545                 goto bad_wp_page;
 546         if (old_page & PAGE_RW)
 547                 goto end_wp_page;
 548         tsk->min_flt++;
 549         prot = (old_page & 0x00000fff) | PAGE_RW;
 550         old_page &= 0xfffff000;
 551         if (mem_map[MAP_NR(old_page)] != 1) {
 552                 if (new_page) {
 553                         copy_page(old_page,new_page);
 554                         *(unsigned long *) pte = new_page | prot;
 555                         free_page(old_page);
 556                         invalidate();
 557                         return;
 558                 }
 559                 free_page(old_page);
 560                 oom(tsk);
 561                 *(unsigned long *) pte = BAD_PAGE | prot;
 562                 invalidate();
 563                 return;
 564         }
 565         *(unsigned long *) pte |= PAGE_RW;
 566         invalidate();
 567         if (new_page)
 568                 free_page(new_page);
 569         return;
 570 bad_wp_page:
 571         printk("do_wp_page: bogus page at address %08x (%08x)\n",address,old_page);
 572         *(unsigned long *) pte = BAD_PAGE | PAGE_SHARED;
 573         send_sig(SIGKILL, tsk, 1);
 574         goto end_wp_page;
 575 bad_wp_pagetable:
 576         printk("do_wp_page: bogus page-table at address %08x (%08x)\n",address,pte);
 577         *(unsigned long *) pde = BAD_PAGETABLE | PAGE_TABLE;
 578         send_sig(SIGKILL, tsk, 1);
 579 end_wp_page:
 580         if (new_page)
 581                 free_page(new_page);
 582         return;
 583 }
 584 
 585 /*
 586  * check that a page table change is actually needed, and call
 587  * the low-level function only in that case..
 588  */
 589 void do_wp_page(unsigned long error_code, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 590         struct task_struct * tsk, unsigned long user_esp)
 591 {
 592         unsigned long page;
 593         unsigned long * pg_table;
 594 
 595         pg_table = (unsigned long *) (tsk->tss.cr3 + ((address>>20) & 0xffc));
 596         page = *pg_table;
 597         if (!page)
 598                 return;
 599         if ((page & PAGE_PRESENT) && page < high_memory) {
 600                 pg_table = (unsigned long *) ((page & 0xfffff000) + ((address>>10) & 0xffc));
 601                 page = *pg_table;
 602                 if (!(page & PAGE_PRESENT))
 603                         return;
 604                 if (page & PAGE_RW)
 605                         return;
 606                 if (!(page & PAGE_COW))
 607                         if (user_esp && tsk == current)
 608                                 send_sig(SIGSEGV, tsk, 1);
 609                 if (mem_map[MAP_NR(page)] == 1) {
 610                         *pg_table |= PAGE_RW;
 611                         invalidate();
 612                         return;
 613                 }
 614                 __do_wp_page(error_code, address, tsk, user_esp);
 615                 return;
 616         }
 617         printk("bad page directory entry %08x\n",page);
 618         *pg_table = 0;
 619 }
 620 
 621 int verify_area(int type, void * addr, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
 622 {
 623         unsigned long start;
 624 
 625         start = (unsigned long) addr;
 626         if (start >= TASK_SIZE)
 627                 return -EFAULT;
 628         if (size > TASK_SIZE - start)
 629                 return -EFAULT;
 630         if (type == VERIFY_READ || !size)
 631                 return 0;
 632         if (!size)
 633                 return 0;
 634         size--;
 635         size += start & 0xfff;
 636         size >>= 12;
 637         start &= 0xfffff000;
 638         do {
 639                 do_wp_page(1,start,current,0);
 640                 start += 4096;
 641         } while (size--);
 642         return 0;
 643 }
 644 
 645 static void get_empty_page(struct task_struct * tsk, unsigned long address)
     /* [previous][next][first][last][top][bottom][index][help] */
 646 {
 647         unsigned long tmp;
 648 
 649         tmp = get_free_page(GFP_KERNEL);
 650         if (!tmp) {
 651                 oom(tsk);
 652                 tmp = BAD_PAGE;
 653         }
 654         if (!put_page(tsk,tmp,address,PAGE_PRIVATE))
 655                 free_page(tmp);
 656 }
 657 
 658 /*
 659  * try_to_share() checks the page at address "address" in the task "p",
 660  * to see if it exists, and if it is clean. If so, share it with the current
 661  * task.
 662  *
 663  * NOTE! This assumes we have checked that p != current, and that they
 664  * share the same executable or library.
 665  */
 666 static int try_to_share(unsigned long address, struct task_struct * tsk,
     /* [previous][next][first][last][top][bottom][index][help] */
 667         struct task_struct * p, unsigned long error_code, unsigned long newpage)
 668 {
 669         unsigned long from;
 670         unsigned long to;
 671         unsigned long from_page;
 672         unsigned long to_page;
 673 
 674         from_page = p->tss.cr3 + ((address>>20) & 0xffc);
 675         to_page = tsk->tss.cr3 + ((address>>20) & 0xffc);
 676 /* is there a page-directory at from? */
 677         from = *(unsigned long *) from_page;
 678         if (!(from & PAGE_PRESENT))
 679                 return 0;
 680         from &= 0xfffff000;
 681         from_page = from + ((address>>10) & 0xffc);
 682         from = *(unsigned long *) from_page;
 683 /* is the page clean and present? */
 684         if ((from & (PAGE_PRESENT | PAGE_DIRTY)) != PAGE_PRESENT)
 685                 return 0;
 686         if (from >= high_memory)
 687                 return 0;
 688         if (mem_map[MAP_NR(from)] & MAP_PAGE_RESERVED)
 689                 return 0;
 690 /* is the destination ok? */
 691         to = *(unsigned long *) to_page;
 692         if (!(to & PAGE_PRESENT))
 693                 return 0;
 694         to &= 0xfffff000;
 695         to_page = to + ((address>>10) & 0xffc);
 696         if (*(unsigned long *) to_page)
 697                 return 0;
 698 /* share them if read - do COW immediately otherwise */
 699         if (error_code & PAGE_RW) {
 700                 copy_page((from & 0xfffff000),newpage);
 701                 to = newpage | PAGE_PRIVATE;
 702         } else {
 703                 mem_map[MAP_NR(from)]++;
 704                 from &= ~PAGE_RW;
 705                 to = from;
 706                 free_page(newpage);
 707         }
 708         *(unsigned long *) from_page = from;
 709         *(unsigned long *) to_page = to;
 710         invalidate();
 711         return 1;
 712 }
 713 
 714 /*
 715  * share_page() tries to find a process that could share a page with
 716  * the current one. Address is the address of the wanted page relative
 717  * to the current data space.
 718  *
 719  * We first check if it is at all feasible by checking executable->i_count.
 720  * It should be >1 if there are other tasks sharing this inode.
 721  */
 722 static int share_page(struct task_struct * tsk, struct inode * inode,
     /* [previous][next][first][last][top][bottom][index][help] */
 723         unsigned long address, unsigned long error_code, unsigned long newpage)
 724 {
 725         struct task_struct ** p;
 726         int i;
 727 
 728         if (!inode || inode->i_count < 2)
 729                 return 0;
 730         for (p = &LAST_TASK ; p > &FIRST_TASK ; --p) {
 731                 if (!*p)
 732                         continue;
 733                 if (tsk == *p)
 734                         continue;
 735                 if (inode != (*p)->executable) {
 736                         for (i=0; i < (*p)->numlibraries; i++)
 737                                 if (inode == (*p)->libraries[i].library)
 738                                         break;
 739                         if (i >= (*p)->numlibraries)
 740                                 continue;
 741                 }
 742                 if (try_to_share(address,tsk,*p,error_code,newpage))
 743                         return 1;
 744         }
 745         return 0;
 746 }
 747 
 748 /*
 749  * fill in an empty page-table if none exists.
 750  */
 751 static inline unsigned long get_empty_pgtable(struct task_struct * tsk,unsigned long address)
     /* [previous][next][first][last][top][bottom][index][help] */
 752 {
 753         unsigned long page;
 754         unsigned long *p;
 755 
 756         p = (unsigned long *) (tsk->tss.cr3 + ((address >> 20) & 0xffc));
 757         if (PAGE_PRESENT & *p)
 758                 return *p;
 759         if (*p) {
 760                 printk("get_empty_pgtable: bad page-directory entry \n");
 761                 *p = 0;
 762         }
 763         page = get_free_page(GFP_KERNEL);
 764         p = (unsigned long *) (tsk->tss.cr3 + ((address >> 20) & 0xffc));
 765         if (PAGE_PRESENT & *p) {
 766                 free_page(page);
 767                 return *p;
 768         }
 769         if (*p) {
 770                 printk("get_empty_pgtable: bad page-directory entry \n");
 771                 *p = 0;
 772         }
 773         if (page) {
 774                 *p = page | PAGE_TABLE;
 775                 return *p;
 776         }
 777         oom(current);
 778         *p = BAD_PAGETABLE | PAGE_TABLE;
 779         return 0;
 780 }
 781 
 782 void do_no_page(unsigned long error_code, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 783         struct task_struct *tsk, unsigned long user_esp)
 784 {
 785         int nr[8], prot;
 786         unsigned long tmp;
 787         unsigned long page;
 788         unsigned int block,i;
 789         struct inode * inode;
 790 
 791         page = get_empty_pgtable(tsk,address);
 792         if (!page)
 793                 return;
 794         page &= 0xfffff000;
 795         page += (address >> 10) & 0xffc;
 796         tmp = *(unsigned long *) page;
 797         if (tmp & PAGE_PRESENT)
 798                 return;
 799         ++tsk->rss;
 800         if (tmp) {
 801                 ++tsk->maj_flt;
 802                 swap_in((unsigned long *) page);
 803                 return;
 804         }
 805         address &= 0xfffff000;
 806         inode = NULL;
 807         block = 0;
 808         if (address < tsk->end_data) {
 809                 inode = tsk->executable;
 810                 block = 1 + address / BLOCK_SIZE;
 811         } else {
 812                 i = tsk->numlibraries;
 813                 while (i-- > 0) {
 814                         if (address < tsk->libraries[i].start)
 815                                 continue;
 816                         block = address - tsk->libraries[i].start;
 817                         if (block >= tsk->libraries[i].length + tsk->libraries[i].bss)
 818                                 continue;
 819                         inode = tsk->libraries[i].library;
 820                         if (block < tsk->libraries[i].length)
 821                                 block = 1 + block / BLOCK_SIZE;
 822                         else
 823                                 block = 0;
 824                         break;
 825                 }
 826         }
 827         if (!inode) {
 828                 ++tsk->min_flt;
 829                 get_empty_page(tsk,address);
 830                 if (tsk != current)
 831                         return;
 832                 if (address < tsk->brk)
 833                         return;
 834                 if (address+8192 >= (user_esp & 0xfffff000))
 835                         return;
 836                 send_sig(SIGSEGV,tsk,1);
 837                 return;
 838         }
 839         page = get_free_page(GFP_KERNEL);
 840         if (share_page(tsk,inode,address,error_code,page)) {
 841                 ++tsk->min_flt;
 842                 return;
 843         }
 844         ++tsk->maj_flt;
 845         if (!page) {
 846                 oom(current);
 847                 put_page(tsk,BAD_PAGE,address,PAGE_PRIVATE);
 848                 return;
 849         }
 850         prot = PAGE_PRIVATE;
 851         if (CODE_SPACE(address, tsk))
 852                 prot = PAGE_READONLY;
 853         if (block) {
 854                 for (i=0 ; i<4 ; block++,i++)
 855                         nr[i] = bmap(inode,block);
 856                 page = bread_page(page,inode->i_dev,nr,1024,prot);
 857         }
 858         if (!(error_code & PAGE_RW) && share_page(tsk,inode,address, error_code,page))
 859                 return;
 860         i = address + PAGE_SIZE - tsk->end_data;
 861         if (i > PAGE_SIZE-1)
 862                 i = 0;
 863         tmp = page + PAGE_SIZE;
 864         while (i--) {
 865                 tmp--;
 866                 *(char *)tmp = 0;
 867         }
 868         if (put_page(tsk,page,address,prot))
 869                 return;
 870         free_page(page);
 871         oom(current);
 872 }
 873 
 874 /*
 875  * This routine handles page faults.  It determines the address,
 876  * and the problem, and then passes it off to one of the appropriate
 877  * routines.
 878  */
 879 void do_page_fault(unsigned long *esp, unsigned long error_code)
     /* [previous][next][first][last][top][bottom][index][help] */
 880 {
 881         unsigned long address;
 882         unsigned long user_esp = 0;
 883         unsigned long stack_limit;
 884         unsigned int bit;
 885         extern void die_if_kernel(char *,long,long);
 886 
 887         /* get the address */
 888         __asm__("movl %%cr2,%0":"=r" (address));
 889         if (address < TASK_SIZE) {
 890                 if (error_code & 4) {   /* user mode access? */
 891                         if (esp[2] & VM_MASK) {
 892                                 bit = (address - 0xA0000) >> PAGE_SHIFT;
 893                                 if (bit < 32)
 894                                         current->screen_bitmap |= 1 << bit;
 895                         } else 
 896                                 user_esp = esp[3];
 897                 }
 898                 if (error_code & 1)
 899                         do_wp_page(error_code, address, current, user_esp);
 900                 else
 901                         do_no_page(error_code, address, current, user_esp);
 902                 if (!user_esp)
 903                         return;
 904                 stack_limit = current->rlim[RLIMIT_STACK].rlim_cur;
 905                 if (stack_limit >= RLIM_INFINITY)
 906                         return;
 907                 if (stack_limit >= current->start_stack)
 908                         return;
 909                 stack_limit = current->start_stack - stack_limit;
 910                 if (user_esp < stack_limit)
 911                         send_sig(SIGSEGV, current, 1);
 912                 return;
 913         }
 914         printk("Unable to handle kernel paging request at address %08x\n",address);
 915         die_if_kernel("Oops",(long)esp,error_code);
 916         do_exit(SIGKILL);
 917 }
 918 
 919 /*
 920  * BAD_PAGE is the page that is used for page faults when linux
 921  * is out-of-memory. Older versions of linux just did a
 922  * do_exit(), but using this instead means there is less risk
 923  * for a process dying in kernel mode, possibly leaving a inode
 924  * unused etc..
 925  *
 926  * BAD_PAGETABLE is the accompanying page-table: it is initialized
 927  * to point to BAD_PAGE entries.
 928  *
 929  * ZERO_PAGE is a special page that is used for zero-initialized
 930  * data and COW.
 931  */
 932 unsigned long __bad_pagetable(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 933 {
 934         extern char empty_bad_page_table[PAGE_SIZE];
 935 
 936         __asm__ __volatile__("cld ; rep ; stosl"
 937                 ::"a" (BAD_PAGE + PAGE_TABLE),
 938                   "D" ((long) empty_bad_page_table),
 939                   "c" (1024)
 940                 :"di","cx");
 941         return (unsigned long) empty_bad_page_table;
 942 }
 943 
 944 unsigned long __bad_page(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 945 {
 946         extern char empty_bad_page[PAGE_SIZE];
 947 
 948         __asm__ __volatile__("cld ; rep ; stosl"
 949                 ::"a" (0),
 950                   "D" ((long) empty_bad_page),
 951                   "c" (1024)
 952                 :"di","cx");
 953         return (unsigned long) empty_bad_page;
 954 }
 955 
 956 unsigned long __zero_page(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 957 {
 958         extern char empty_zero_page[PAGE_SIZE];
 959 
 960         __asm__ __volatile__("cld ; rep ; stosl"
 961                 ::"a" (0),
 962                   "D" ((long) empty_zero_page),
 963                   "c" (1024)
 964                 :"di","cx");
 965         return (unsigned long) empty_zero_page;
 966 }
 967 
 968 void show_mem(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 969 {
 970         int i,free = 0,total = 0,reserved = 0;
 971         int shared = 0;
 972 
 973         printk("Mem-info:\n");
 974         printk("Free pages:      %6d\n",nr_free_pages);
 975         printk("Secondary pages: %6d\n",nr_secondary_pages);
 976         printk("Buffer heads:    %6d\n",nr_buffer_heads);
 977         printk("Buffer blocks:   %6d\n",nr_buffers);
 978         i = high_memory >> PAGE_SHIFT;
 979         while (i-- > 0) {
 980                 total++;
 981                 if (mem_map[i] & MAP_PAGE_RESERVED)
 982                         reserved++;
 983                 else if (!mem_map[i])
 984                         free++;
 985                 else
 986                         shared += mem_map[i]-1;
 987         }
 988         printk("%d pages of RAM\n",total);
 989         printk("%d free pages\n",free);
 990         printk("%d reserved pages\n",reserved);
 991         printk("%d pages shared\n",shared);
 992 }
 993 
 994 /*
 995  * paging_init() sets up the page tables - note that the first 4MB are
 996  * already mapped by head.S.
 997  *
 998  * This routines also unmaps the page at virtual kernel address 0, so
 999  * that we can trap those pesky NULL-reference errors in the kernel.
1000  */
1001 unsigned long paging_init(unsigned long start_mem, unsigned long end_mem)
     /* [previous][next][first][last][top][bottom][index][help] */
1002 {
1003         unsigned long * pg_dir;
1004         unsigned long * pg_table;
1005         unsigned long tmp;
1006         unsigned long address;
1007 
1008 /*
1009  * Physical page 0 is special: it's a "zero-page", and is guaranteed to
1010  * stay that way - it's write-protected and when there is a c-o-w, the
1011  * mm handler treats it specially.
1012  */
1013         memset((void *) 0, 0, 4096);
1014         start_mem += 4095;
1015         start_mem &= 0xfffff000;
1016         address = 0;
1017         pg_dir = swapper_pg_dir + 768;          /* at virtual addr 0xC0000000 */
1018         while (address < end_mem) {
1019                 tmp = *pg_dir;
1020                 if (!tmp) {
1021                         tmp = start_mem;
1022                         *pg_dir = tmp | PAGE_TABLE;
1023                         start_mem += 4096;
1024                 }
1025                 pg_dir++;
1026                 pg_table = (unsigned long *) (tmp & 0xfffff000);
1027                 for (tmp = 0 ; tmp < 1024 ; tmp++,pg_table++) {
1028                         if (address && address < end_mem)
1029                                 *pg_table = address | PAGE_SHARED;
1030                         else
1031                                 *pg_table = 0;
1032                         address += 4096;
1033                 }
1034         }
1035         invalidate();
1036         return start_mem;
1037 }
1038 
1039 void mem_init(unsigned long start_low_mem,
     /* [previous][next][first][last][top][bottom][index][help] */
1040               unsigned long start_mem, unsigned long end_mem)
1041 {
1042         int codepages = 0;
1043         int reservedpages = 0;
1044         int datapages = 0;
1045         unsigned long tmp;
1046         unsigned short * p;
1047         extern int etext;
1048 
1049         cli();
1050         end_mem &= 0xfffff000;
1051         high_memory = end_mem;
1052         start_mem += 0x0000000f;
1053         start_mem &= 0xfffffff0;
1054         tmp = MAP_NR(end_mem);
1055         mem_map = (unsigned short *) start_mem;
1056         p = mem_map + tmp;
1057         start_mem = (unsigned long) p;
1058         while (p > mem_map)
1059                 *--p = MAP_PAGE_RESERVED;
1060         start_low_mem += 0x00000fff;
1061         start_low_mem &= 0xfffff000;
1062         start_mem += 0x00000fff;
1063         start_mem &= 0xfffff000;
1064         while (start_low_mem < 0xA0000) {
1065                 mem_map[MAP_NR(start_low_mem)] = 0;
1066                 start_low_mem += 4096;
1067         }
1068         while (start_mem < end_mem) {
1069                 mem_map[MAP_NR(start_mem)] = 0;
1070                 start_mem += 4096;
1071         }
1072         sound_mem_init();
1073         free_page_list = 0;
1074         nr_free_pages = 0;
1075         for (tmp = 0 ; tmp < end_mem ; tmp += 4096) {
1076                 if (mem_map[MAP_NR(tmp)]) {
1077                         if (tmp >= 0xA0000 && tmp < 0x100000)
1078                                 reservedpages++;
1079                         else if (tmp < (unsigned long) &etext)
1080                                 codepages++;
1081                         else
1082                                 datapages++;
1083                         continue;
1084                 }
1085                 *(unsigned long *) tmp = free_page_list;
1086                 free_page_list = tmp;
1087                 nr_free_pages++;
1088         }
1089         tmp = nr_free_pages << PAGE_SHIFT;
1090         printk("Memory: %dk/%dk available (%dk kernel code, %dk reserved, %dk data)\n",
1091                 tmp >> 10,
1092                 end_mem >> 10,
1093                 codepages << 2,
1094                 reservedpages << 2,
1095                 datapages << 2);
1096         return;
1097 }
1098 
1099 void si_meminfo(struct sysinfo *val)
     /* [previous][next][first][last][top][bottom][index][help] */
1100 {
1101         int i;
1102 
1103         i = high_memory >> PAGE_SHIFT;
1104         val->totalram = 0;
1105         val->freeram = 0;
1106         val->sharedram = 0;
1107         val->bufferram = buffermem;
1108         while (i-- > 0)  {
1109                 if (mem_map[i] & MAP_PAGE_RESERVED)
1110                         continue;
1111                 val->totalram++;
1112                 if (!mem_map[i]) {
1113                         val->freeram++;
1114                         continue;
1115                 }
1116                 val->sharedram += mem_map[i]-1;
1117         }
1118         val->totalram <<= PAGE_SHIFT;
1119         val->freeram <<= PAGE_SHIFT;
1120         val->sharedram <<= PAGE_SHIFT;
1121         return;
1122 }

/* [previous][next][first][last][top][bottom][index][help] */