root/mm/memory.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. oom
  2. free_one_table
  3. clear_page_tables
  4. free_page_tables
  5. copy_page_tables
  6. unmap_page_range
  7. zeromap_page_range
  8. remap_page_range
  9. put_page
  10. put_dirty_page
  11. do_wp_page
  12. verify_area
  13. get_empty_page
  14. try_to_share
  15. share_page
  16. get_empty_pgtable
  17. do_no_page
  18. do_page_fault
  19. __bad_pagetable
  20. __bad_page
  21. __zero_page
  22. show_mem
  23. paging_init
  24. mem_init
  25. si_meminfo

   1 /*
   2  *  linux/mm/memory.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  * demand-loading started 01.12.91 - seems it is high on the list of
   9  * things wanted, and it should be easy to implement. - Linus
  10  */
  11 
  12 /*
  13  * Ok, demand-loading was easy, shared pages a little bit tricker. Shared
  14  * pages started 02.12.91, seems to work. - Linus.
  15  *
  16  * Tested sharing by executing about 30 /bin/sh: under the old kernel it
  17  * would have taken more than the 6M I have free, but it worked well as
  18  * far as I could see.
  19  *
  20  * Also corrected some "invalidate()"s - I wasn't doing enough of them.
  21  */
  22 
  23 /*
  24  * Real VM (paging to/from disk) started 18.12.91. Much more work and
  25  * thought has to go into this. Oh, well..
  26  * 19.12.91  -  works, somewhat. Sometimes I get faults, don't know why.
  27  *              Found it. Everything seems to work now.
  28  * 20.12.91  -  Ok, making the swap-device changeable like the root.
  29  */
  30 
  31 #include <asm/system.h>
  32 
  33 #include <linux/signal.h>
  34 #include <linux/sched.h>
  35 #include <linux/head.h>
  36 #include <linux/kernel.h>
  37 #include <linux/errno.h>
  38 #include <linux/string.h>
  39 #include <linux/types.h>
  40 
  41 unsigned long high_memory = 0;
  42 
  43 extern void sound_mem_init(void);
  44 
  45 int nr_free_pages = 0;
  46 unsigned long free_page_list = 0;
  47 /*
  48  * The secondary free_page_list is used for malloc() etc things that
  49  * may need pages during interrupts etc. Normal get_free_page() operations
  50  * don't touch it, so it stays as a kind of "panic-list", that can be
  51  * accessed when all other mm tricks have failed.
  52  */
  53 int nr_secondary_pages = 0;
  54 unsigned long secondary_page_list = 0;
  55 
  56 #define copy_page(from,to) \
  57 __asm__("cld ; rep ; movsl"::"S" (from),"D" (to),"c" (1024):"cx","di","si")
  58 
  59 unsigned short * mem_map = NULL;
  60 
  61 #define CODE_SPACE(addr,p) ((addr) < (p)->end_code)
  62 
  63 /*
  64  * oom() prints a message (so that the user knows why the process died),
  65  * and gives the process an untrappable SIGSEGV.
  66  */
  67 void oom(struct task_struct * task)
     /* [previous][next][first][last][top][bottom][index][help] */
  68 {
  69         printk("\nout of memory\n");
  70         task->sigaction[SIGKILL-1].sa_handler = NULL;
  71         task->blocked &= ~(1<<(SIGKILL-1));
  72         send_sig(SIGKILL,task,1);
  73 }
  74 
  75 static void free_one_table(unsigned long * page_dir)
     /* [previous][next][first][last][top][bottom][index][help] */
  76 {
  77         int j;
  78         unsigned long pg_table = *page_dir;
  79         unsigned long * page_table;
  80 
  81         if (!pg_table)
  82                 return;
  83         *page_dir = 0;
  84         if (pg_table >= high_memory || !(pg_table & PAGE_PRESENT)) {
  85                 printk("Bad page table: [%08x]=%08x\n",page_dir,pg_table);
  86                 return;
  87         }
  88         if (mem_map[MAP_NR(pg_table)] & MAP_PAGE_RESERVED)
  89                 return;
  90         page_table = (unsigned long *) (pg_table & 0xfffff000);
  91         for (j = 0 ; j < 1024 ; j++,page_table++) {
  92                 unsigned long pg = *page_table;
  93                 
  94                 if (!pg)
  95                         continue;
  96                 *page_table = 0;
  97                 if (pg & PAGE_PRESENT)
  98                         free_page(0xfffff000 & pg);
  99                 else
 100                         swap_free(pg);
 101         }
 102         free_page(0xfffff000 & pg_table);
 103 }
 104 
 105 /*
 106  * This function clears all user-level page tables of a process - this
 107  * is needed by execve(), so that old pages aren't in the way. Note that
 108  * unlike 'free_page_tables()', this function still leaves a valid
 109  * page-table-tree in memory: it just removes the user pages. The two
 110  * functions are similar, but there is a fundamental difference.
 111  */
 112 void clear_page_tables(struct task_struct * tsk)
     /* [previous][next][first][last][top][bottom][index][help] */
 113 {
 114         int i;
 115         unsigned long * page_dir;
 116 
 117         if (!tsk)
 118                 return;
 119         if (tsk == task[0])
 120                 panic("task[0] (swapper) doesn't support exec()\n");
 121         page_dir = (unsigned long *) tsk->tss.cr3;
 122         if (!page_dir || page_dir == swapper_pg_dir) {
 123                 printk("Trying to clear kernel page-directory: not good\n");
 124                 return;
 125         }
 126         for (i = 0 ; i < 768 ; i++,page_dir++)
 127                 free_one_table(page_dir);
 128         invalidate();
 129         return;
 130 }
 131 
 132 /*
 133  * This function frees up all page tables of a process when it exits.
 134  */
 135 void free_page_tables(struct task_struct * tsk)
     /* [previous][next][first][last][top][bottom][index][help] */
 136 {
 137         int i;
 138         unsigned long pg_dir;
 139         unsigned long * page_dir;
 140 
 141         if (!tsk)
 142                 return;
 143         if (tsk == task[0]) {
 144                 printk("task[0] (swapper) killed: unable to recover\n");
 145                 panic("Trying to free up swapper memory space");
 146         }
 147         pg_dir = tsk->tss.cr3;
 148         if (!pg_dir || pg_dir == (unsigned long) swapper_pg_dir) {
 149                 printk("Trying to free kernel page-directory: not good\n");
 150                 return;
 151         }
 152         tsk->tss.cr3 = (unsigned long) swapper_pg_dir;
 153         if (tsk == current)
 154                 __asm__ __volatile__("movl %0,%%cr3"::"a" (tsk->tss.cr3));
 155         page_dir = (unsigned long *) pg_dir;
 156         for (i = 0 ; i < 1024 ; i++,page_dir++)
 157                 free_one_table(page_dir);
 158         free_page(pg_dir);
 159         invalidate();
 160 }
 161 
 162 /*
 163  * copy_page_tables() just copies the whole process memory range:
 164  * note the special handling of RESERVED (ie kernel) pages, which
 165  * means that they are always shared by all processes.
 166  */
 167 int copy_page_tables(struct task_struct * tsk)
     /* [previous][next][first][last][top][bottom][index][help] */
 168 {
 169         int i;
 170         unsigned long old_pg_dir, *old_page_dir;
 171         unsigned long new_pg_dir, *new_page_dir;
 172 
 173         old_pg_dir = current->tss.cr3;
 174         new_pg_dir = get_free_page(GFP_KERNEL);
 175         if (!new_pg_dir)
 176                 return -ENOMEM;
 177         tsk->tss.cr3 = new_pg_dir;
 178         old_page_dir = (unsigned long *) old_pg_dir;
 179         new_page_dir = (unsigned long *) new_pg_dir;
 180         for (i = 0 ; i < 1024 ; i++,old_page_dir++,new_page_dir++) {
 181                 int j;
 182                 unsigned long old_pg_table, *old_page_table;
 183                 unsigned long new_pg_table, *new_page_table;
 184 
 185                 old_pg_table = *old_page_dir;
 186                 if (!old_pg_table)
 187                         continue;
 188                 if (old_pg_table >= high_memory || !(old_pg_table & PAGE_PRESENT)) {
 189                         printk("copy_page_tables: bad page table: "
 190                                 "probable memory corruption");
 191                         *old_page_dir = 0;
 192                         continue;
 193                 }
 194                 if (mem_map[MAP_NR(old_pg_table)] & MAP_PAGE_RESERVED) {
 195                         *new_page_dir = old_pg_table;
 196                         continue;
 197                 }
 198                 new_pg_table = get_free_page(GFP_KERNEL);
 199                 if (!new_pg_table) {
 200                         free_page_tables(tsk);
 201                         return -ENOMEM;
 202                 }
 203                 *new_page_dir = new_pg_table | PAGE_TABLE;
 204                 old_page_table = (unsigned long *) (0xfffff000 & old_pg_table);
 205                 new_page_table = (unsigned long *) (0xfffff000 & new_pg_table);
 206                 for (j = 0 ; j < 1024 ; j++,old_page_table++,new_page_table++) {
 207                         unsigned long pg;
 208                         pg = *old_page_table;
 209                         if (!pg)
 210                                 continue;
 211                         if (!(pg & PAGE_PRESENT)) {
 212                                 *new_page_table = swap_duplicate(pg);
 213                                 continue;
 214                         }
 215                         if ((pg & (PAGE_RW | PAGE_COW)) == (PAGE_RW | PAGE_COW))
 216                                 pg &= ~PAGE_RW;
 217                         *new_page_table = pg;
 218                         if (mem_map[MAP_NR(pg)] & MAP_PAGE_RESERVED)
 219                                 continue;
 220                         *old_page_table = pg;
 221                         mem_map[MAP_NR(pg)]++;
 222                 }
 223         }
 224         invalidate();
 225         return 0;
 226 }
 227 
 228 /*
 229  * a more complete version of free_page_tables which performs with page
 230  * granularity.
 231  */
 232 int unmap_page_range(unsigned long from, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
 233 {
 234         unsigned long page, page_dir;
 235         unsigned long *page_table, *dir;
 236         unsigned long poff, pcnt, pc;
 237 
 238         if (from & 0xfff) {
 239                 printk("unmap_page_range called with wrong alignment\n");
 240                 return -EINVAL;
 241         }
 242         size = (size + 0xfff) >> PAGE_SHIFT;
 243         dir = (unsigned long *) (current->tss.cr3 + ((from >> 20) & 0xffc));
 244         poff = (from >> PAGE_SHIFT) & 0x3ff;
 245         if ((pcnt = 1024 - poff) > size)
 246                 pcnt = size;
 247 
 248         for ( ; size > 0; ++dir, size -= pcnt,
 249              pcnt = (size > 1024 ? 1024 : size)) {
 250                 if (!(page_dir = *dir)) {
 251                         poff = 0;
 252                         continue;
 253                 }
 254                 if (!(page_dir & PAGE_PRESENT)) {
 255                         printk("unmap_page_range: bad page directory.");
 256                         continue;
 257                 }
 258                 page_table = (unsigned long *)(0xfffff000 & page_dir);
 259                 if (poff) {
 260                         page_table += poff;
 261                         poff = 0;
 262                 }
 263                 for (pc = pcnt; pc--; page_table++) {
 264                         if ((page = *page_table) != 0) {
 265                                 *page_table = 0;
 266                                 if (1 & page) {
 267                                         --current->rss;
 268                                         free_page(0xfffff000 & page);
 269                                 } else
 270                                         swap_free(page);
 271                         }
 272                 }
 273                 if (pcnt == 1024) {
 274                         free_page(0xfffff000 & page_dir);
 275                         *dir = 0;
 276                 }
 277         }
 278         invalidate();
 279         return 0;
 280 }
 281 
 282 int zeromap_page_range(unsigned long from, unsigned long size, int mask)
     /* [previous][next][first][last][top][bottom][index][help] */
 283 {
 284         unsigned long *page_table, *dir;
 285         unsigned long poff, pcnt;
 286         unsigned long page;
 287 
 288         if (mask) {
 289                 if ((mask & 0xfffff001) != PAGE_PRESENT) {
 290                         printk("zeromap_page_range: mask = %08x\n",mask);
 291                         return -EINVAL;
 292                 }
 293                 mask |= ZERO_PAGE;
 294         }
 295         if (from & 0xfff) {
 296                 printk("zeromap_page_range: from = %08x\n",from);
 297                 return -EINVAL;
 298         }
 299         dir = (unsigned long *) (current->tss.cr3 + ((from >> 20) & 0xffc));
 300         size = (size + 0xfff) >> PAGE_SHIFT;
 301         poff = (from >> PAGE_SHIFT) & 0x3ff;
 302         if ((pcnt = 1024 - poff) > size)
 303                 pcnt = size;
 304 
 305         while (size > 0) {
 306                 if (!(PAGE_PRESENT & *dir)) {
 307                         if (!(page_table = (unsigned long *)get_free_page(GFP_KERNEL))) {
 308                                 invalidate();
 309                                 return -ENOMEM;
 310                         }
 311                         if (PAGE_PRESENT & *dir) {
 312                                 free_page((unsigned long) page_table);
 313                                 page_table = (unsigned long *)(0xfffff000 & *dir++);
 314                         } else
 315                                 *dir++ = ((unsigned long) page_table) | PAGE_TABLE;
 316                 } else
 317                         page_table = (unsigned long *)(0xfffff000 & *dir++);
 318                 page_table += poff;
 319                 poff = 0;
 320                 for (size -= pcnt; pcnt-- ;) {
 321                         if ((page = *page_table) != 0) {
 322                                 *page_table = 0;
 323                                 if (page & PAGE_PRESENT) {
 324                                         --current->rss;
 325                                         free_page(0xfffff000 & page);
 326                                 } else
 327                                         swap_free(page);
 328                         }
 329                         if (mask)
 330                                 ++current->rss;
 331                         *page_table++ = mask;
 332                 }
 333                 pcnt = (size > 1024 ? 1024 : size);
 334         }
 335         invalidate();
 336         return 0;
 337 }
 338 
 339 /*
 340  * maps a range of physical memory into the requested pages. the old
 341  * mappings are removed. any references to nonexistent pages results
 342  * in null mappings (currently treated as "copy-on-access")
 343  */
 344 int remap_page_range(unsigned long from, unsigned long to, unsigned long size, int mask)
     /* [previous][next][first][last][top][bottom][index][help] */
 345 {
 346         unsigned long *page_table, *dir;
 347         unsigned long poff, pcnt;
 348         unsigned long page;
 349 
 350         if (mask) {
 351                 if ((mask & 0xfffff001) != PAGE_PRESENT) {
 352                         printk("remap_page_range: mask = %08x\n",mask);
 353                         return -EINVAL;
 354                 }
 355         }
 356         if ((from & 0xfff) || (to & 0xfff)) {
 357                 printk("remap_page_range: from = %08x, to=%08x\n",from,to);
 358                 return -EINVAL;
 359         }
 360         dir = (unsigned long *) (current->tss.cr3 + ((from >> 20) & 0xffc));
 361         size = (size + 0xfff) >> PAGE_SHIFT;
 362         poff = (from >> PAGE_SHIFT) & 0x3ff;
 363         if ((pcnt = 1024 - poff) > size)
 364                 pcnt = size;
 365 
 366         while (size > 0) {
 367                 if (!(PAGE_PRESENT & *dir)) {
 368                         if (!(page_table = (unsigned long *)get_free_page(GFP_KERNEL))) {
 369                                 invalidate();
 370                                 return -1;
 371                         }
 372                         *dir++ = ((unsigned long) page_table) | PAGE_TABLE;
 373                 }
 374                 else
 375                         page_table = (unsigned long *)(0xfffff000 & *dir++);
 376                 if (poff) {
 377                         page_table += poff;
 378                         poff = 0;
 379                 }
 380 
 381                 for (size -= pcnt; pcnt-- ;) {
 382                         if ((page = *page_table) != 0) {
 383                                 *page_table = 0;
 384                                 if (PAGE_PRESENT & page) {
 385                                         --current->rss;
 386                                         free_page(0xfffff000 & page);
 387                                 } else
 388                                         swap_free(page);
 389                         }
 390 
 391                         /*
 392                          * i'm not sure of the second cond here. should we
 393                          * report failure?
 394                          * the first condition should return an invalid access
 395                          * when the page is referenced. current assumptions
 396                          * cause it to be treated as demand allocation.
 397                          */
 398                         if (!mask || to >= high_memory || !mem_map[MAP_NR(to)])
 399                                 *page_table++ = 0;      /* not present */
 400                         else {
 401                                 ++current->rss;
 402                                 *page_table++ = (to | mask);
 403                                 if (!(mem_map[MAP_NR(to)] & MAP_PAGE_RESERVED))
 404                                         mem_map[MAP_NR(to)]++;
 405                         }
 406                         to += PAGE_SIZE;
 407                 }
 408                 pcnt = (size > 1024 ? 1024 : size);
 409         }
 410         invalidate();
 411         return 0;
 412 }
 413 
 414 /*
 415  * This function puts a page in memory at the wanted address.
 416  * It returns the physical address of the page gotten, 0 if
 417  * out of memory (either when trying to access page-table or
 418  * page.)
 419  * if wp = 1 the page will be write protected
 420  */
 421 static unsigned long put_page(struct task_struct * tsk,unsigned long page,
     /* [previous][next][first][last][top][bottom][index][help] */
 422         unsigned long address,int prot)
 423 {
 424         unsigned long tmp, *page_table;
 425 
 426 /* NOTE !!! This uses the fact that _pg_dir=0 */
 427 
 428         if ((prot & 0xfffff001) != PAGE_PRESENT)
 429                 printk("put_page: prot = %08x\n",prot);
 430         if (page >= high_memory) {
 431                 printk("put_page: trying to put page %p at %p\n",page,address);
 432                 return 0;
 433         }
 434         tmp = mem_map[MAP_NR(page)];
 435         if (!(tmp & MAP_PAGE_RESERVED) && (tmp != 1)) {
 436                 printk("put_page: mem_map disagrees with %p at %p\n",page,address);
 437                 return 0;
 438         }
 439         page_table = (unsigned long *) (tsk->tss.cr3 + ((address>>20) & 0xffc));
 440         if ((*page_table) & PAGE_PRESENT)
 441                 page_table = (unsigned long *) (0xfffff000 & *page_table);
 442         else {
 443                 printk("put_page: bad page directory entry\n");
 444                 oom(tsk);
 445                 *page_table = BAD_PAGETABLE | PAGE_TABLE;
 446                 return 0;
 447         }
 448         page_table += (address >> PAGE_SHIFT) & 0x3ff;
 449         if (*page_table) {
 450                 printk("put_page: page already exists\n");
 451                 *page_table = 0;
 452                 invalidate();
 453         }
 454         *page_table = page | prot;
 455 /* no need for invalidate */
 456         return page;
 457 }
 458 
 459 /*
 460  * The previous function doesn't work very well if you also want to mark
 461  * the page dirty: exec.c wants this, as it has earlier changed the page,
 462  * and we want the dirty-status to be correct (for VM). Thus the same
 463  * routine, but this time we mark it dirty too.
 464  */
 465 unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsigned long address)
     /* [previous][next][first][last][top][bottom][index][help] */
 466 {
 467         unsigned long tmp, *page_table;
 468 
 469         if (page >= high_memory)
 470                 printk("put_dirty_page: trying to put page %p at %p\n",page,address);
 471         if (mem_map[MAP_NR(page)] != 1)
 472                 printk("mem_map disagrees with %p at %p\n",page,address);
 473         page_table = (unsigned long *) (tsk->tss.cr3 + ((address>>20) & 0xffc));
 474         if ((*page_table)&PAGE_PRESENT)
 475                 page_table = (unsigned long *) (0xfffff000 & *page_table);
 476         else {
 477                 if (!(tmp=get_free_page(GFP_KERNEL)))
 478                         return 0;
 479                 *page_table = tmp | PAGE_TABLE;
 480                 page_table = (unsigned long *) tmp;
 481         }
 482         page_table += (address >> PAGE_SHIFT) & 0x3ff;
 483         if (*page_table) {
 484                 printk("put_dirty_page: page already exists\n");
 485                 *page_table = 0;
 486                 invalidate();
 487         }
 488         *page_table = page | (PAGE_DIRTY | PAGE_PRIVATE);
 489 /* no need for invalidate */
 490         return page;
 491 }
 492 
 493 /*
 494  * This routine handles present pages, when users try to write
 495  * to a shared page. It is done by copying the page to a new address
 496  * and decrementing the shared-page counter for the old page.
 497  */
 498 void do_wp_page(unsigned long error_code, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 499         struct task_struct * tsk, unsigned long user_esp)
 500 {
 501         unsigned long pde, pte, old_page, prot;
 502         unsigned long new_page;
 503 
 504         new_page = __get_free_page(GFP_KERNEL);
 505         pde = tsk->tss.cr3 + ((address>>20) & 0xffc);
 506         pte = *(unsigned long *) pde;
 507         if (!(pte & PAGE_PRESENT)) {
 508                 if (new_page)
 509                         free_page(new_page);
 510                 return;
 511         }
 512         if ((pte & PAGE_TABLE) != PAGE_TABLE || pte >= high_memory) {
 513                 printk("do_wp_page: bogus page-table at address %08x (%08x)\n",address,pte);
 514                 *(unsigned long *) pde = BAD_PAGETABLE | PAGE_TABLE;
 515                 send_sig(SIGKILL, tsk, 1);
 516                 if (new_page)
 517                         free_page(new_page);
 518                 return;
 519         }
 520         pte &= 0xfffff000;
 521         pte += (address>>10) & 0xffc;
 522         old_page = *(unsigned long *) pte;
 523         if (!(old_page & PAGE_PRESENT)) {
 524                 if (new_page)
 525                         free_page(new_page);
 526                 return;
 527         }
 528         if (old_page >= high_memory) {
 529                 printk("do_wp_page: bogus page at address %08x (%08x)\n",address,old_page);
 530                 *(unsigned long *) pte = BAD_PAGE | PAGE_SHARED;
 531                 send_sig(SIGKILL, tsk, 1);
 532                 if (new_page)
 533                         free_page(new_page);
 534                 return;
 535         }
 536         if (old_page & PAGE_RW) {
 537                 if (new_page)
 538                         free_page(new_page);
 539                 return;
 540         }
 541         if (!(old_page & PAGE_COW)) {
 542                 if (user_esp && tsk == current)
 543                         send_sig(SIGSEGV, tsk, 1);
 544         }
 545         tsk->min_flt++;
 546         prot = (old_page & 0x00000fff) | PAGE_RW;
 547         old_page &= 0xfffff000;
 548         if (mem_map[MAP_NR(old_page)]==1) {
 549                 *(unsigned long *) pte |= 2;
 550                 invalidate();
 551                 if (new_page)
 552                         free_page(new_page);
 553                 return;
 554         }
 555         if (new_page)
 556                 copy_page(old_page,new_page);
 557         else {
 558                 new_page = BAD_PAGE;
 559                 oom(tsk);
 560         }
 561         *(unsigned long *) pte = new_page | prot;
 562         free_page(old_page);
 563         invalidate();
 564 }
 565 
 566 int verify_area(int type, void * addr, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
 567 {
 568         unsigned long start;
 569 
 570         start = (unsigned long) addr;
 571         if (start >= TASK_SIZE)
 572                 return -EFAULT;
 573         if (size > TASK_SIZE - start)
 574                 return -EFAULT;
 575         if (type == VERIFY_READ || !size)
 576                 return 0;
 577         if (!size)
 578                 return 0;
 579         size--;
 580         size += start & 0xfff;
 581         size >>= 12;
 582         start &= 0xfffff000;
 583         do {
 584                 do_wp_page(1,start,current,0);
 585                 start += 4096;
 586         } while (size--);
 587         return 0;
 588 }
 589 
 590 static void get_empty_page(struct task_struct * tsk, unsigned long address)
     /* [previous][next][first][last][top][bottom][index][help] */
 591 {
 592         unsigned long tmp;
 593 
 594         tmp = get_free_page(GFP_KERNEL);
 595         if (!tmp) {
 596                 oom(tsk);
 597                 tmp = BAD_PAGE;
 598         }
 599         if (!put_page(tsk,tmp,address,PAGE_PRIVATE))
 600                 free_page(tmp);
 601 }
 602 
 603 /*
 604  * try_to_share() checks the page at address "address" in the task "p",
 605  * to see if it exists, and if it is clean. If so, share it with the current
 606  * task.
 607  *
 608  * NOTE! This assumes we have checked that p != current, and that they
 609  * share the same executable or library.
 610  */
 611 static int try_to_share(unsigned long address, struct task_struct * tsk,
     /* [previous][next][first][last][top][bottom][index][help] */
 612         struct task_struct * p, unsigned long error_code, unsigned long newpage)
 613 {
 614         unsigned long from;
 615         unsigned long to;
 616         unsigned long from_page;
 617         unsigned long to_page;
 618 
 619         from_page = p->tss.cr3 + ((address>>20) & 0xffc);
 620         to_page = tsk->tss.cr3 + ((address>>20) & 0xffc);
 621 /* is there a page-directory at from? */
 622         from = *(unsigned long *) from_page;
 623         if (!(from & PAGE_PRESENT))
 624                 return 0;
 625         from &= 0xfffff000;
 626         from_page = from + ((address>>10) & 0xffc);
 627         from = *(unsigned long *) from_page;
 628 /* is the page clean and present? */
 629         if ((from & (PAGE_PRESENT | PAGE_DIRTY)) != PAGE_PRESENT)
 630                 return 0;
 631         if (from >= high_memory)
 632                 return 0;
 633         if (mem_map[MAP_NR(from)] & MAP_PAGE_RESERVED)
 634                 return 0;
 635 /* is the destination ok? */
 636         to = *(unsigned long *) to_page;
 637         if (!(to & PAGE_PRESENT))
 638                 return 0;
 639         to &= 0xfffff000;
 640         to_page = to + ((address>>10) & 0xffc);
 641         if (*(unsigned long *) to_page)
 642                 return 0;
 643 /* share them if read - do COW immediately otherwise */
 644         if (error_code & PAGE_RW) {
 645                 copy_page((from & 0xfffff000),newpage);
 646                 to = newpage | PAGE_PRIVATE;
 647         } else {
 648                 mem_map[MAP_NR(from)]++;
 649                 from &= ~PAGE_RW;
 650                 to = from;
 651                 free_page(newpage);
 652         }
 653         *(unsigned long *) from_page = from;
 654         *(unsigned long *) to_page = to;
 655         invalidate();
 656         return 1;
 657 }
 658 
 659 /*
 660  * share_page() tries to find a process that could share a page with
 661  * the current one. Address is the address of the wanted page relative
 662  * to the current data space.
 663  *
 664  * We first check if it is at all feasible by checking executable->i_count.
 665  * It should be >1 if there are other tasks sharing this inode.
 666  */
 667 static int share_page(struct task_struct * tsk, struct inode * inode,
     /* [previous][next][first][last][top][bottom][index][help] */
 668         unsigned long address, unsigned long error_code, unsigned long newpage)
 669 {
 670         struct task_struct ** p;
 671         int i;
 672 
 673         if (!inode || inode->i_count < 2)
 674                 return 0;
 675         for (p = &LAST_TASK ; p > &FIRST_TASK ; --p) {
 676                 if (!*p)
 677                         continue;
 678                 if (tsk == *p)
 679                         continue;
 680                 if (inode != (*p)->executable) {
 681                         for (i=0; i < (*p)->numlibraries; i++)
 682                                 if (inode == (*p)->libraries[i].library)
 683                                         break;
 684                         if (i >= (*p)->numlibraries)
 685                                 continue;
 686                 }
 687                 if (try_to_share(address,tsk,*p,error_code,newpage))
 688                         return 1;
 689         }
 690         return 0;
 691 }
 692 
 693 /*
 694  * fill in an empty page-table if none exists
 695  */
 696 static unsigned long get_empty_pgtable(struct task_struct * tsk,unsigned long address)
     /* [previous][next][first][last][top][bottom][index][help] */
 697 {
 698         unsigned long page = 0;
 699         unsigned long *p;
 700 repeat:
 701         p = (unsigned long *) (tsk->tss.cr3 + ((address >> 20) & 0xffc));
 702         if (PAGE_PRESENT & *p) {
 703                 free_page(page);
 704                 return *p;
 705         }
 706         if (*p) {
 707                 printk("get_empty_pgtable: bad page-directory entry \n");
 708                 *p = 0;
 709         }
 710         if (page) {
 711                 *p = page | PAGE_TABLE;
 712                 return *p;
 713         }
 714         if ((page = get_free_page(GFP_KERNEL)) != 0)
 715                 goto repeat;
 716         oom(current);
 717         *p = BAD_PAGETABLE | PAGE_TABLE;
 718         return 0;
 719 }
 720 
 721 void do_no_page(unsigned long error_code, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 722         struct task_struct *tsk, unsigned long user_esp)
 723 {
 724         int nr[8], prot;
 725         unsigned long tmp;
 726         unsigned long page;
 727         unsigned int block,i;
 728         struct inode * inode;
 729 
 730         page = get_empty_pgtable(tsk,address);
 731         if (!page)
 732                 return;
 733         page &= 0xfffff000;
 734         page += (address >> 10) & 0xffc;
 735         tmp = *(unsigned long *) page;
 736         if (tmp & PAGE_PRESENT)
 737                 return;
 738         ++tsk->rss;
 739         if (tmp) {
 740                 ++tsk->maj_flt;
 741                 swap_in((unsigned long *) page);
 742                 return;
 743         }
 744         address &= 0xfffff000;
 745         inode = NULL;
 746         block = 0;
 747         if (address < tsk->end_data) {
 748                 inode = tsk->executable;
 749                 block = 1 + address / BLOCK_SIZE;
 750         } else {
 751                 i = tsk->numlibraries;
 752                 while (i-- > 0) {
 753                         if (address < tsk->libraries[i].start)
 754                                 continue;
 755                         block = address - tsk->libraries[i].start;
 756                         if (block >= tsk->libraries[i].length + tsk->libraries[i].bss)
 757                                 continue;
 758                         inode = tsk->libraries[i].library;
 759                         if (block < tsk->libraries[i].length)
 760                                 block = 1 + block / BLOCK_SIZE;
 761                         else
 762                                 block = 0;
 763                         break;
 764                 }
 765         }
 766         if (!inode) {
 767                 ++tsk->min_flt;
 768                 get_empty_page(tsk,address);
 769                 if (tsk != current)
 770                         return;
 771                 if (address < tsk->brk)
 772                         return;
 773                 if (address+8192 >= (user_esp & 0xfffff000))
 774                         return;
 775                 send_sig(SIGSEGV,tsk,1);
 776                 return;
 777         }
 778         page = get_free_page(GFP_KERNEL);
 779         if (share_page(tsk,inode,address,error_code,page)) {
 780                 ++tsk->min_flt;
 781                 return;
 782         }
 783         ++tsk->maj_flt;
 784         if (!page) {
 785                 oom(current);
 786                 put_page(tsk,BAD_PAGE,address,PAGE_PRIVATE);
 787                 return;
 788         }
 789         prot = PAGE_PRIVATE;
 790         if (CODE_SPACE(address, tsk))
 791                 prot = PAGE_READONLY;
 792         if (block) {
 793                 for (i=0 ; i<4 ; block++,i++)
 794                         nr[i] = bmap(inode,block);
 795                 page = bread_page(page,inode->i_dev,nr,1024,prot);
 796         }
 797         if (!(error_code & PAGE_RW) && share_page(tsk,inode,address, error_code,page))
 798                 return;
 799         i = address + PAGE_SIZE - tsk->end_data;
 800         if (i > PAGE_SIZE-1)
 801                 i = 0;
 802         tmp = page + PAGE_SIZE;
 803         while (i--) {
 804                 tmp--;
 805                 *(char *)tmp = 0;
 806         }
 807         if (put_page(tsk,page,address,prot))
 808                 return;
 809         free_page(page);
 810         oom(current);
 811 }
 812 
 813 /*
 814  * This routine handles page faults.  It determines the address,
 815  * and the problem, and then passes it off to one of the appropriate
 816  * routines.
 817  */
 818 void do_page_fault(unsigned long *esp, unsigned long error_code)
     /* [previous][next][first][last][top][bottom][index][help] */
 819 {
 820         unsigned long address;
 821         unsigned long user_esp = 0;
 822         unsigned long stack_limit;
 823         unsigned int bit;
 824         extern void die_if_kernel(char *,long,long);
 825 
 826         /* get the address */
 827         __asm__("movl %%cr2,%0":"=r" (address));
 828         if (address < TASK_SIZE) {
 829                 if (error_code & 4) {   /* user mode access? */
 830                         if (esp[2] & VM_MASK) {
 831                                 bit = (address - 0xA0000) >> PAGE_SHIFT;
 832                                 if (bit < 32)
 833                                         current->screen_bitmap |= 1 << bit;
 834                         } else 
 835                                 user_esp = esp[3];
 836                 }
 837                 if (error_code & 1)
 838                         do_wp_page(error_code, address, current, user_esp);
 839                 else
 840                         do_no_page(error_code, address, current, user_esp);
 841                 if (!user_esp)
 842                         return;
 843                 stack_limit = current->rlim[RLIMIT_STACK].rlim_cur;
 844                 if (stack_limit >= RLIM_INFINITY)
 845                         return;
 846                 if (stack_limit >= current->start_stack)
 847                         return;
 848                 stack_limit = current->start_stack - stack_limit;
 849                 if (user_esp < stack_limit)
 850                         send_sig(SIGSEGV, current, 1);
 851                 return;
 852         }
 853         printk("Unable to handle kernel paging request at address %08x\n",address);
 854         die_if_kernel("Oops",(long)esp,error_code);
 855         do_exit(SIGKILL);
 856 }
 857 
 858 /*
 859  * BAD_PAGE is the page that is used for page faults when linux
 860  * is out-of-memory. Older versions of linux just did a
 861  * do_exit(), but using this instead means there is less risk
 862  * for a process dying in kernel mode, possibly leaving a inode
 863  * unused etc..
 864  *
 865  * BAD_PAGETABLE is the accompanying page-table: it is initialized
 866  * to point to BAD_PAGE entries.
 867  *
 868  * ZERO_PAGE is a special page that is used for zero-initialized
 869  * data and COW.
 870  */
 871 unsigned long __bad_pagetable(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 872 {
 873         extern char empty_bad_page_table[PAGE_SIZE];
 874 
 875         __asm__ __volatile__("cld ; rep ; stosl"
 876                 ::"a" (BAD_PAGE + PAGE_TABLE),
 877                   "D" ((long) empty_bad_page_table),
 878                   "c" (1024)
 879                 :"di","cx");
 880         return (unsigned long) empty_bad_page_table;
 881 }
 882 
 883 unsigned long __bad_page(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 884 {
 885         extern char empty_bad_page[PAGE_SIZE];
 886 
 887         __asm__ __volatile__("cld ; rep ; stosl"
 888                 ::"a" (0),
 889                   "D" ((long) empty_bad_page),
 890                   "c" (1024)
 891                 :"di","cx");
 892         return (unsigned long) empty_bad_page;
 893 }
 894 
 895 unsigned long __zero_page(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 896 {
 897         extern char empty_zero_page[PAGE_SIZE];
 898 
 899         __asm__ __volatile__("cld ; rep ; stosl"
 900                 ::"a" (0),
 901                   "D" ((long) empty_zero_page),
 902                   "c" (1024)
 903                 :"di","cx");
 904         return (unsigned long) empty_zero_page;
 905 }
 906 
 907 void show_mem(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 908 {
 909         int i,free = 0,total = 0,reserved = 0;
 910         int shared = 0;
 911 
 912         printk("Mem-info:\n");
 913         printk("Free pages:      %6d\n",nr_free_pages);
 914         printk("Secondary pages: %6d\n",nr_secondary_pages);
 915         printk("Buffer heads:    %6d\n",nr_buffer_heads);
 916         printk("Buffer blocks:   %6d\n",nr_buffers);
 917         i = high_memory >> PAGE_SHIFT;
 918         while (i-- > 0) {
 919                 total++;
 920                 if (mem_map[i] & MAP_PAGE_RESERVED)
 921                         reserved++;
 922                 else if (!mem_map[i])
 923                         free++;
 924                 else
 925                         shared += mem_map[i]-1;
 926         }
 927         printk("%d pages of RAM\n",total);
 928         printk("%d free pages\n",free);
 929         printk("%d reserved pages\n",reserved);
 930         printk("%d pages shared\n",shared);
 931 }
 932 
 933 /*
 934  * paging_init() sets up the page tables - note that the first 4MB are
 935  * already mapped by head.S.
 936  *
 937  * This routines also unmaps the page at virtual kernel address 0, so
 938  * that we can trap those pesky NULL-reference errors in the kernel.
 939  */
 940 unsigned long paging_init(unsigned long start_mem, unsigned long end_mem)
     /* [previous][next][first][last][top][bottom][index][help] */
 941 {
 942         unsigned long * pg_dir;
 943         unsigned long * pg_table;
 944         unsigned long tmp;
 945         unsigned long address;
 946 
 947 /*
 948  * Physical page 0 is special: it's a "zero-page", and is guaranteed to
 949  * stay that way - it's write-protected and when there is a c-o-w, the
 950  * mm handler treats it specially.
 951  */
 952         memset((void *) 0, 0, 4096);
 953         start_mem += 4095;
 954         start_mem &= 0xfffff000;
 955         address = 0;
 956         pg_dir = swapper_pg_dir + 768;          /* at virtual addr 0xC0000000 */
 957         while (address < end_mem) {
 958                 tmp = *pg_dir;
 959                 if (!tmp) {
 960                         tmp = start_mem;
 961                         *pg_dir = tmp | PAGE_TABLE;
 962                         start_mem += 4096;
 963                 }
 964                 pg_dir++;
 965                 pg_table = (unsigned long *) (tmp & 0xfffff000);
 966                 for (tmp = 0 ; tmp < 1024 ; tmp++,pg_table++) {
 967                         if (address && address < end_mem)
 968                                 *pg_table = address | PAGE_SHARED;
 969                         else
 970                                 *pg_table = 0;
 971                         address += 4096;
 972                 }
 973         }
 974         invalidate();
 975         return start_mem;
 976 }
 977 
 978 void mem_init(unsigned long start_low_mem,
     /* [previous][next][first][last][top][bottom][index][help] */
 979               unsigned long start_mem, unsigned long end_mem)
 980 {
 981         int codepages = 0;
 982         int reservedpages = 0;
 983         int datapages = 0;
 984         unsigned long tmp;
 985         unsigned short * p;
 986         extern int etext;
 987 
 988         cli();
 989         end_mem &= 0xfffff000;
 990         high_memory = end_mem;
 991         start_mem += 0x0000000f;
 992         start_mem &= 0xfffffff0;
 993         tmp = MAP_NR(end_mem);
 994         mem_map = (unsigned short *) start_mem;
 995         p = mem_map + tmp;
 996         start_mem = (unsigned long) p;
 997         while (p > mem_map)
 998                 *--p = MAP_PAGE_RESERVED;
 999         start_low_mem += 0x00000fff;
1000         start_low_mem &= 0xfffff000;
1001         start_mem += 0x00000fff;
1002         start_mem &= 0xfffff000;
1003         while (start_low_mem < 0xA0000) {
1004                 mem_map[MAP_NR(start_low_mem)] = 0;
1005                 start_low_mem += 4096;
1006         }
1007         while (start_mem < end_mem) {
1008                 mem_map[MAP_NR(start_mem)] = 0;
1009                 start_mem += 4096;
1010         }
1011         sound_mem_init();
1012         free_page_list = 0;
1013         nr_free_pages = 0;
1014         for (tmp = 0 ; tmp < end_mem ; tmp += 4096) {
1015                 if (mem_map[MAP_NR(tmp)]) {
1016                         if (tmp >= 0xA0000 && tmp < 0x100000)
1017                                 reservedpages++;
1018                         else if (tmp < (unsigned long) &etext)
1019                                 codepages++;
1020                         else
1021                                 datapages++;
1022                         continue;
1023                 }
1024                 *(unsigned long *) tmp = free_page_list;
1025                 free_page_list = tmp;
1026                 nr_free_pages++;
1027         }
1028         tmp = nr_free_pages << PAGE_SHIFT;
1029         printk("Memory: %dk/%dk available (%dk kernel code, %dk reserved, %dk data)\n",
1030                 tmp >> 10,
1031                 end_mem >> 10,
1032                 codepages << 2,
1033                 reservedpages << 2,
1034                 datapages << 2);
1035         return;
1036 }
1037 
1038 void si_meminfo(struct sysinfo *val)
     /* [previous][next][first][last][top][bottom][index][help] */
1039 {
1040         int i;
1041 
1042         i = high_memory >> PAGE_SHIFT;
1043         val->totalram = 0;
1044         val->freeram = 0;
1045         val->sharedram = 0;
1046         val->bufferram = buffermem;
1047         while (i-- > 0)  {
1048                 if (mem_map[i] & MAP_PAGE_RESERVED)
1049                         continue;
1050                 val->totalram++;
1051                 if (!mem_map[i]) {
1052                         val->freeram++;
1053                         continue;
1054                 }
1055                 val->sharedram += mem_map[i]-1;
1056         }
1057         val->totalram <<= PAGE_SHIFT;
1058         val->freeram <<= PAGE_SHIFT;
1059         val->sharedram <<= PAGE_SHIFT;
1060         return;
1061 }

/* [previous][next][first][last][top][bottom][index][help] */