root/mm/memory.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. oom
  2. free_one_table
  3. clear_page_tables
  4. free_page_tables
  5. copy_page_tables
  6. unmap_page_range
  7. remap_page_range
  8. put_page
  9. put_dirty_page
  10. do_wp_page
  11. write_verify
  12. get_empty_page
  13. try_to_share
  14. share_page
  15. get_empty_pgtable
  16. do_no_page
  17. do_page_fault
  18. __bad_pagetable
  19. __bad_page
  20. show_mem
  21. paging_init
  22. mem_init
  23. si_meminfo

   1 /*
   2  *  linux/mm/memory.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  * demand-loading started 01.12.91 - seems it is high on the list of
   9  * things wanted, and it should be easy to implement. - Linus
  10  */
  11 
  12 /*
  13  * Ok, demand-loading was easy, shared pages a little bit tricker. Shared
  14  * pages started 02.12.91, seems to work. - Linus.
  15  *
  16  * Tested sharing by executing about 30 /bin/sh: under the old kernel it
  17  * would have taken more than the 6M I have free, but it worked well as
  18  * far as I could see.
  19  *
  20  * Also corrected some "invalidate()"s - I wasn't doing enough of them.
  21  */
  22 
  23 /*
  24  * Real VM (paging to/from disk) started 18.12.91. Much more work and
  25  * thought has to go into this. Oh, well..
  26  * 19.12.91  -  works, somewhat. Sometimes I get faults, don't know why.
  27  *              Found it. Everything seems to work now.
  28  * 20.12.91  -  Ok, making the swap-device changeable like the root.
  29  */
  30 
  31 #include <asm/system.h>
  32 
  33 #include <linux/signal.h>
  34 #include <linux/sched.h>
  35 #include <linux/head.h>
  36 #include <linux/kernel.h>
  37 #include <linux/errno.h>
  38 #include <linux/string.h>
  39 #include <linux/types.h>
  40 
  41 unsigned long high_memory = 0;
  42 
  43 int nr_free_pages = 0;
  44 unsigned long free_page_list = 0;
  45 /*
  46  * The secondary free_page_list is used for malloc() etc things that
  47  * may need pages during interrupts etc. Normal get_free_page() operations
  48  * don't touch it, so it stays as a kind of "panic-list", that can be
  49  * accessed when all other mm tricks have failed.
  50  */
  51 int nr_secondary_pages = 0;
  52 unsigned long secondary_page_list = 0;
  53 
  54 #define copy_page(from,to) \
  55 __asm__("cld ; rep ; movsl"::"S" (from),"D" (to),"c" (1024):"cx","di","si")
  56 
  57 unsigned short * mem_map = NULL;
  58 
  59 #define CODE_SPACE(addr,p) ((addr) < (p)->end_code)
  60 
  61 /*
  62  * oom() prints a message (so that the user knows why the process died),
  63  * and gives the process an untrappable SIGSEGV.
  64  */
  65 void oom(struct task_struct * task)
     /* [previous][next][first][last][top][bottom][index][help] */
  66 {
  67         printk("\nout of memory\n");
  68         task->sigaction[SIGKILL-1].sa_handler = NULL;
  69         task->blocked &= ~(1<<(SIGKILL-1));
  70         send_sig(SIGKILL,task,1);
  71 }
  72 
  73 static void free_one_table(unsigned long * page_dir)
     /* [previous][next][first][last][top][bottom][index][help] */
  74 {
  75         int j;
  76         unsigned long pg_table = *page_dir;
  77         unsigned long * page_table;
  78 
  79         if (!pg_table)
  80                 return;
  81         if (pg_table >= high_memory || !(pg_table & 1)) {
  82                 printk("Bad page table: [%08x]=%08x\n",page_dir,pg_table);
  83                 *page_dir = 0;
  84                 return;
  85         }
  86         *page_dir = 0;
  87         if (mem_map[MAP_NR(pg_table)] & MAP_PAGE_RESERVED)
  88                 return;
  89         page_table = (unsigned long *) (pg_table & 0xfffff000);
  90         for (j = 0 ; j < 1024 ; j++,page_table++) {
  91                 unsigned long pg = *page_table;
  92                 
  93                 if (!pg)
  94                         continue;
  95                 *page_table = 0;
  96                 if (1 & pg)
  97                         free_page(0xfffff000 & pg);
  98                 else
  99                         swap_free(pg >> 1);
 100         }
 101         free_page(0xfffff000 & pg_table);
 102 }
 103 
 104 /*
 105  * This function clears all user-level page tables of a process - this
 106  * is needed by execve(), so that old pages aren't in the way. Note that
 107  * unlike 'free_page_tables()', this function still leaves a valid
 108  * page-table-tree in memory: it just removes the user pages. The two
 109  * functions are similar, but there is a fundamental difference.
 110  */
 111 void clear_page_tables(struct task_struct * tsk)
     /* [previous][next][first][last][top][bottom][index][help] */
 112 {
 113         int i;
 114         unsigned long * page_dir;
 115 
 116         if (!tsk)
 117                 return;
 118         if (tsk == task[0])
 119                 panic("task[0] (swapper) doesn't support exec() yet\n");
 120         page_dir = (unsigned long *) tsk->tss.cr3;
 121         if (!page_dir) {
 122                 printk("Trying to clear kernel page-directory: not good\n");
 123                 return;
 124         }
 125         for (i = 0 ; i < 768 ; i++,page_dir++)
 126                 free_one_table(page_dir);
 127         invalidate();
 128         return;
 129 }
 130 
 131 /*
 132  * This function frees up all page tables of a process when it exits.
 133  */
 134 void free_page_tables(struct task_struct * tsk)
     /* [previous][next][first][last][top][bottom][index][help] */
 135 {
 136         int i;
 137         unsigned long pg_dir;
 138         unsigned long * page_dir;
 139 
 140         if (!tsk)
 141                 return;
 142         if (tsk == task[0]) {
 143                 printk("task[0] (swapper) killed: unable to recover\n");
 144                 panic("Trying to free up swapper memory space");
 145         }
 146         pg_dir = tsk->tss.cr3;
 147         if (!pg_dir) {
 148                 printk("Trying to free kernel page-directory: not good\n");
 149                 return;
 150         }
 151         tsk->tss.cr3 = (unsigned long) swapper_pg_dir;
 152         if (tsk == current)
 153                 __asm__ __volatile__("movl %0,%%cr3"::"a" (tsk->tss.cr3));
 154         page_dir = (unsigned long *) pg_dir;
 155         for (i = 0 ; i < 1024 ; i++,page_dir++)
 156                 free_one_table(page_dir);
 157         free_page(pg_dir);
 158         invalidate();
 159 }
 160 
 161 /*
 162  * copy_page_tables() just copies the whole process memory range:
 163  * note the special handling of RESERVED (ie kernel) pages, which
 164  * means that they are always shared by all processes.
 165  */
 166 int copy_page_tables(struct task_struct * tsk)
     /* [previous][next][first][last][top][bottom][index][help] */
 167 {
 168         int i;
 169         unsigned long old_pg_dir, *old_page_dir;
 170         unsigned long new_pg_dir, *new_page_dir;
 171 
 172         old_pg_dir = current->tss.cr3;
 173         new_pg_dir = get_free_page(GFP_KERNEL);
 174         if (!new_pg_dir)
 175                 return -ENOMEM;
 176         tsk->tss.cr3 = new_pg_dir;
 177         old_page_dir = (unsigned long *) old_pg_dir;
 178         new_page_dir = (unsigned long *) new_pg_dir;
 179         for (i = 0 ; i < 1024 ; i++,old_page_dir++,new_page_dir++) {
 180                 int j;
 181                 unsigned long old_pg_table, *old_page_table;
 182                 unsigned long new_pg_table, *new_page_table;
 183 
 184                 old_pg_table = *old_page_dir;
 185                 if (!old_pg_table)
 186                         continue;
 187                 if (old_pg_table >= high_memory || !(1 & old_pg_table)) {
 188                         printk("copy_page_tables: bad page table: "
 189                                 "probable memory corruption");
 190                         *old_page_dir = 0;
 191                         continue;
 192                 }
 193                 if (mem_map[MAP_NR(old_pg_table)] & MAP_PAGE_RESERVED) {
 194                         *new_page_dir = old_pg_table;
 195                         continue;
 196                 }
 197                 new_pg_table = get_free_page(GFP_KERNEL);
 198                 if (!new_pg_table) {
 199                         free_page_tables(tsk);
 200                         return -ENOMEM;
 201                 }
 202                 *new_page_dir = new_pg_table | PAGE_ACCESSED | 7;
 203                 old_page_table = (unsigned long *) (0xfffff000 & old_pg_table);
 204                 new_page_table = (unsigned long *) (0xfffff000 & new_pg_table);
 205                 for (j = 0 ; j < 1024 ; j++,old_page_table++,new_page_table++) {
 206                         unsigned long pg;
 207                         pg = *old_page_table;
 208                         if (!pg)
 209                                 continue;
 210                         if (!(pg & PAGE_PRESENT)) {
 211                                 swap_duplicate(pg>>1);
 212                                 *new_page_table = pg;
 213                                 continue;
 214                         }
 215                         pg &= ~2;
 216                         *new_page_table = pg;
 217                         if (mem_map[MAP_NR(pg)] & MAP_PAGE_RESERVED)
 218                                 continue;
 219                         *old_page_table = pg;
 220                         mem_map[MAP_NR(pg)]++;
 221                 }
 222         }
 223         invalidate();
 224         return 0;
 225 }
 226 
 227 /*
 228  * a more complete version of free_page_tables which performs with page
 229  * granularity.
 230  */
 231 int unmap_page_range(unsigned long from, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
 232 {
 233         unsigned long page, page_dir;
 234         unsigned long *page_table, *dir;
 235         unsigned long poff, pcnt, pc;
 236 
 237         if (from & 0xfff)
 238                 panic("unmap_page_range called with wrong alignment");
 239         if (!from)
 240                 panic("unmap_page_range trying to free swapper memory space");
 241         size = (size + 0xfff) >> PAGE_SHIFT;
 242         dir = (unsigned long *) (current->tss.cr3 + ((from >> 20) & 0xffc));
 243         poff = (from >> PAGE_SHIFT) & 0x3ff;
 244         if ((pcnt = 1024 - poff) > size)
 245                 pcnt = size;
 246 
 247         for ( ; size > 0; ++dir, size -= pcnt,
 248              pcnt = (size > 1024 ? 1024 : size)) {
 249                 if (!(page_dir = *dir)) {
 250                         poff = 0;
 251                         continue;
 252                 }
 253                 if (!(page_dir & 1)) {
 254                         printk("unmap_page_range: bad page directory.");
 255                         continue;
 256                 }
 257                 page_table = (unsigned long *)(0xfffff000 & page_dir);
 258                 if (poff) {
 259                         page_table += poff;
 260                         poff = 0;
 261                 }
 262                 for (pc = pcnt; pc--; page_table++) {
 263                         if ((page = *page_table) != 0) {
 264                                 --current->rss;
 265                                 *page_table = 0;
 266                                 if (1 & page)
 267                                         free_page(0xfffff000 & page);
 268                                 else
 269                                         swap_free(page >> 1);
 270                         }
 271                 }
 272                 if (pcnt == 1024) {
 273                         free_page(0xfffff000 & page_dir);
 274                         *dir = 0;
 275                 }
 276         }
 277         invalidate();
 278         return 0;
 279 }
 280 
 281 /*
 282  * maps a range of physical memory into the requested pages. the old
 283  * mappings are removed. any references to nonexistent pages results
 284  * in null mappings (currently treated as "copy-on-access")
 285  *
 286  * permiss is encoded as cxwr (copy,exec,write,read) where copy modifies
 287  * the behavior of write to be copy-on-write.
 288  *
 289  * due to current limitations, we actually have the following
 290  *              on              off
 291  * read:        yes             yes
 292  * write/copy:  yes/copy        copy/copy
 293  * exec:        yes             yes
 294  */
 295 int remap_page_range(unsigned long from, unsigned long to, unsigned long size,
     /* [previous][next][first][last][top][bottom][index][help] */
 296                  int permiss)
 297 {
 298         unsigned long *page_table, *dir;
 299         unsigned long poff, pcnt;
 300         unsigned long page;
 301 
 302         if ((from & 0xfff) || (to & 0xfff))
 303                 panic("remap_page_range called with wrong alignment");
 304         dir = (unsigned long *) (current->tss.cr3 + ((from >> 20) & 0xffc));
 305         size = (size + 0xfff) >> PAGE_SHIFT;
 306         poff = (from >> PAGE_SHIFT) & 0x3ff;
 307         if ((pcnt = 1024 - poff) > size)
 308                 pcnt = size;
 309 
 310         while (size > 0) {
 311                 if (!(1 & *dir)) {
 312                         if (!(page_table = (unsigned long *)get_free_page(GFP_KERNEL))) {
 313                                 invalidate();
 314                                 return -1;
 315                         }
 316                         *dir++ = ((unsigned long) page_table) | PAGE_ACCESSED | 7;
 317                 }
 318                 else
 319                         page_table = (unsigned long *)(0xfffff000 & *dir++);
 320                 if (poff) {
 321                         page_table += poff;
 322                         poff = 0;
 323                 }
 324 
 325                 for (size -= pcnt; pcnt-- ;) {
 326                         int mask;
 327 
 328                         mask = 4;
 329                         if (permiss & 1)
 330                                 mask |= 1;
 331                         if (permiss & 2) {
 332                                 if (permiss & 8)
 333                                         mask |= 1;
 334                                 else
 335                                         mask |= 3;
 336                         }
 337                         if (permiss & 4)
 338                                 mask |= 1;
 339 
 340                         if ((page = *page_table) != 0) {
 341                                 *page_table = 0;
 342                                 --current->rss;
 343                                 if (1 & page)
 344                                         free_page(0xfffff000 & page);
 345                                 else
 346                                         swap_free(page >> 1);
 347                         }
 348 
 349                         /*
 350                          * i'm not sure of the second cond here. should we
 351                          * report failure?
 352                          * the first condition should return an invalid access
 353                          * when the page is referenced. current assumptions
 354                          * cause it to be treated as demand allocation.
 355                          */
 356                         if (mask == 4 || to >= high_memory || !mem_map[MAP_NR(to)])
 357                                 *page_table++ = 0;      /* not present */
 358                         else {
 359                                 ++current->rss;
 360                                 *page_table++ = (to | mask);
 361                                 if (!(mem_map[MAP_NR(to)] & MAP_PAGE_RESERVED))
 362                                         mem_map[MAP_NR(to)]++;
 363                         }
 364                         to += PAGE_SIZE;
 365                 }
 366                 pcnt = (size > 1024 ? 1024 : size);
 367         }
 368         invalidate();
 369         return 0;
 370 }
 371 
 372 /*
 373  * This function puts a page in memory at the wanted address.
 374  * It returns the physical address of the page gotten, 0 if
 375  * out of memory (either when trying to access page-table or
 376  * page.)
 377  * if wp = 1 the page will be write protected
 378  */
 379 static unsigned long put_page(struct task_struct * tsk,unsigned long page,unsigned long address,int wp)
     /* [previous][next][first][last][top][bottom][index][help] */
 380 {
 381         unsigned long tmp, *page_table;
 382 
 383 /* NOTE !!! This uses the fact that _pg_dir=0 */
 384 
 385         if (page >= high_memory) {
 386                 printk("put_page: trying to put page %p at %p\n",page,address);
 387                 return 0;
 388         }
 389         tmp = mem_map[MAP_NR(page)];
 390         if (!(tmp & MAP_PAGE_RESERVED) && (tmp != 1)) {
 391                 printk("put_page: mem_map disagrees with %p at %p\n",page,address);
 392                 return 0;
 393         }
 394         page_table = (unsigned long *) (tsk->tss.cr3 + ((address>>20) & 0xffc));
 395         if ((*page_table)&1)
 396                 page_table = (unsigned long *) (0xfffff000 & *page_table);
 397         else {
 398                 tmp = get_free_page(GFP_KERNEL);
 399                 if (!tmp) {
 400                         oom(tsk);
 401                         tmp = BAD_PAGETABLE;
 402                 }
 403                 *page_table = tmp | PAGE_ACCESSED | 7;
 404                 return 0;
 405         }
 406         page_table += (address >> PAGE_SHIFT) & 0x3ff;
 407         if (*page_table) {
 408                 printk("put_page: page already exists\n");
 409                 *page_table = 0;
 410                 invalidate();
 411         }
 412         *page_table = page | PAGE_ACCESSED | 5 | (!wp << 1);
 413 /* no need for invalidate */
 414         return page;
 415 }
 416 
 417 /*
 418  * The previous function doesn't work very well if you also want to mark
 419  * the page dirty: exec.c wants this, as it has earlier changed the page,
 420  * and we want the dirty-status to be correct (for VM). Thus the same
 421  * routine, but this time we mark it dirty too.
 422  */
 423 unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsigned long address)
     /* [previous][next][first][last][top][bottom][index][help] */
 424 {
 425         unsigned long tmp, *page_table;
 426 
 427 /* NOTE !!! This uses the fact that _pg_dir=0 */
 428 
 429         if (page >= high_memory)
 430                 printk("put_dirty_page: trying to put page %p at %p\n",page,address);
 431         if (mem_map[MAP_NR(page)] != 1)
 432                 printk("mem_map disagrees with %p at %p\n",page,address);
 433         page_table = (unsigned long *) (tsk->tss.cr3 + ((address>>20) & 0xffc));
 434         if ((*page_table)&1)
 435                 page_table = (unsigned long *) (0xfffff000 & *page_table);
 436         else {
 437                 if (!(tmp=get_free_page(GFP_KERNEL)))
 438                         return 0;
 439                 *page_table = tmp|7;
 440                 page_table = (unsigned long *) tmp;
 441         }
 442         page_table += (address >> PAGE_SHIFT) & 0x3ff;
 443         if (*page_table) {
 444                 printk("put_dirty_page: page already exists\n");
 445                 *page_table = 0;
 446                 invalidate();
 447         }
 448         *page_table = page | (PAGE_DIRTY | PAGE_ACCESSED | 7);
 449 /* no need for invalidate */
 450         return page;
 451 }
 452 
 453 /*
 454  * This routine handles present pages, when users try to write
 455  * to a shared page. It is done by copying the page to a new address
 456  * and decrementing the shared-page counter for the old page.
 457  *
 458  * Fixed the routine to repeat a bit more: this is slightly slower,
 459  * but there were race-conditions in the old code..
 460  */
 461 void do_wp_page(unsigned long error_code, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 462         struct task_struct * tsk, unsigned long user_esp)
 463 {
 464         unsigned long pde, pte, old_page, dirty;
 465         unsigned long new_page = 0;
 466 
 467         /* check code space write */
 468         if (tsk == current && tsk->executable && CODE_SPACE(address, current)) {
 469                 /* don't send SIGSEGV when in kernel or v86 mode */
 470                 if (user_esp)
 471                         send_sig(SIGSEGV, tsk, 1);
 472                 /* Note that we still do the copy-on-write: if the process catches
 473                  * SIGSEGV we want things to work..
 474                  */
 475         }
 476 repeat:
 477         pde = tsk->tss.cr3 + ((address>>20) & 0xffc);
 478         pte = *(unsigned long *) pde;
 479         if (!(pte & PAGE_PRESENT)) {
 480                 if (new_page)
 481                         free_page(new_page);
 482                 return;
 483         }
 484         if ((pte & 7) != 7 || pte >= high_memory) {
 485                 printk("do_wp_page: bogus page-table at address %08x (%08x)\n",address,pte);
 486                 *(unsigned long *) pde = BAD_PAGETABLE | 7;
 487                 send_sig(SIGKILL, tsk, 1);
 488                 if (new_page)
 489                         free_page(new_page);
 490                 return;
 491         }
 492         pte &= 0xfffff000;
 493         pte += (address>>10) & 0xffc;
 494         old_page = *(unsigned long *) pte;
 495         if (!(old_page & PAGE_PRESENT)) {
 496                 if (new_page)
 497                         free_page(new_page);
 498                 return;
 499         }
 500         if (old_page >= high_memory) {
 501                 printk("do_wp_page: bogus page at address %08x (%08x)\n",address,old_page);
 502                 *(unsigned long *) pte = BAD_PAGE | 7;
 503                 send_sig(SIGKILL, tsk, 1);
 504                 if (new_page)
 505                         free_page(new_page);
 506                 return;
 507         }
 508         if (old_page & PAGE_RW) {
 509                 if (new_page)
 510                         free_page(new_page);
 511                 return;
 512         }
 513         tsk->min_flt++;
 514         dirty = old_page & PAGE_DIRTY;
 515         old_page &= 0xfffff000;
 516         if (mem_map[MAP_NR(old_page)]==1) {
 517                 *(unsigned long *) pte |= 2;
 518                 invalidate();
 519                 if (new_page)
 520                         free_page(new_page);
 521                 return;
 522         }
 523         if (!new_page && (new_page=get_free_page(GFP_KERNEL)))
 524                 goto repeat;
 525         if (new_page)
 526                 copy_page(old_page,new_page);
 527         else {
 528                 new_page = BAD_PAGE;
 529                 oom(tsk);
 530         }
 531         *(unsigned long *) pte = new_page | dirty | PAGE_ACCESSED | 7;
 532         free_page(old_page);
 533         invalidate();
 534 }
 535 
 536 void write_verify(unsigned long address)
     /* [previous][next][first][last][top][bottom][index][help] */
 537 {
 538         if (address < TASK_SIZE)
 539                 do_wp_page(1,address,current,0);
 540 }
 541 
 542 static void get_empty_page(struct task_struct * tsk, unsigned long address)
     /* [previous][next][first][last][top][bottom][index][help] */
 543 {
 544         unsigned long tmp;
 545 
 546         tmp = get_free_page(GFP_KERNEL);
 547         if (!tmp) {
 548                 oom(tsk);
 549                 tmp = BAD_PAGE;
 550         }
 551         if (!put_page(tsk,tmp,address,0))
 552                 free_page(tmp);
 553 }
 554 
 555 /*
 556  * try_to_share() checks the page at address "address" in the task "p",
 557  * to see if it exists, and if it is clean. If so, share it with the current
 558  * task.
 559  *
 560  * NOTE! This assumes we have checked that p != current, and that they
 561  * share the same executable or library.
 562  */
 563 static int try_to_share(unsigned long address, struct task_struct * tsk,
     /* [previous][next][first][last][top][bottom][index][help] */
 564         struct task_struct * p)
 565 {
 566         unsigned long from;
 567         unsigned long to;
 568         unsigned long from_page;
 569         unsigned long to_page;
 570         unsigned long phys_addr;
 571 
 572         from_page = p->tss.cr3 + ((address>>20) & 0xffc);
 573         to_page = tsk->tss.cr3 + ((address>>20) & 0xffc);
 574 /* is there a page-directory at from? */
 575         from = *(unsigned long *) from_page;
 576         if (!(from & 1))
 577                 return 0;
 578         from &= 0xfffff000;
 579         from_page = from + ((address>>10) & 0xffc);
 580         phys_addr = *(unsigned long *) from_page;
 581 /* is the page clean and present? */
 582         if ((phys_addr & 0x41) != 0x01)
 583                 return 0;
 584         phys_addr &= 0xfffff000;
 585         if (phys_addr >= high_memory)
 586                 return 0;
 587         if (mem_map[MAP_NR(phys_addr)] & MAP_PAGE_RESERVED)
 588                 return 0;
 589 /* share them: write-protect */
 590         *(unsigned long *) from_page &= ~2;
 591         invalidate();
 592         phys_addr >>= PAGE_SHIFT;
 593         mem_map[phys_addr]++;
 594         to = *(unsigned long *) to_page;
 595         if (!(to & 1)) {
 596                 to = get_free_page(GFP_KERNEL);
 597                 if (!to) {
 598                         mem_map[phys_addr]--;
 599                         return 0;
 600                 }
 601                 *(unsigned long *) to_page = to | PAGE_ACCESSED | 7;
 602         }
 603         to &= 0xfffff000;
 604         to_page = to + ((address>>10) & 0xffc);
 605         if (1 & *(unsigned long *) to_page)
 606                 panic("try_to_share: to_page already exists");
 607         *(unsigned long *) to_page = *(unsigned long *) from_page;
 608         return 1;
 609 }
 610 
 611 /*
 612  * share_page() tries to find a process that could share a page with
 613  * the current one. Address is the address of the wanted page relative
 614  * to the current data space.
 615  *
 616  * We first check if it is at all feasible by checking executable->i_count.
 617  * It should be >1 if there are other tasks sharing this inode.
 618  */
 619 static int share_page(struct task_struct * tsk, struct inode * inode, unsigned long address)
     /* [previous][next][first][last][top][bottom][index][help] */
 620 {
 621         struct task_struct ** p;
 622         int i;
 623 
 624         if (!inode || inode->i_count < 2)
 625                 return 0;
 626         for (p = &LAST_TASK ; p > &FIRST_TASK ; --p) {
 627                 if (!*p)
 628                         continue;
 629                 if (tsk == *p)
 630                         continue;
 631                 if (inode != (*p)->executable) {
 632                         for (i=0; i < (*p)->numlibraries; i++)
 633                                 if (inode == (*p)->libraries[i].library)
 634                                         break;
 635                         if (i >= (*p)->numlibraries)
 636                                 continue;
 637                 }
 638                 if (try_to_share(address,tsk,*p))
 639                         return 1;
 640         }
 641         return 0;
 642 }
 643 
 644 /*
 645  * fill in an empty page-table if none exists
 646  */
 647 static unsigned long get_empty_pgtable(struct task_struct * tsk,unsigned long address)
     /* [previous][next][first][last][top][bottom][index][help] */
 648 {
 649         unsigned long page = 0;
 650         unsigned long *p;
 651 repeat:
 652         p = (unsigned long *) (tsk->tss.cr3 + ((address >> 20) & 0xffc));
 653         if (1 & *p) {
 654                 free_page(page);
 655                 return *p;
 656         }
 657         if (*p) {
 658                 printk("get_empty_pgtable: bad page-directory entry \n");
 659                 *p = 0;
 660         }
 661         if (page) {
 662                 *p = page | PAGE_ACCESSED | 7;
 663                 return *p;
 664         }
 665         if ((page = get_free_page(GFP_KERNEL)) != 0)
 666                 goto repeat;
 667         oom(current);
 668         *p = BAD_PAGETABLE | 7;
 669         return 0;
 670 }
 671 
 672 void do_no_page(unsigned long error_code, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 673         struct task_struct *tsk, unsigned long user_esp)
 674 {
 675         int nr[4];
 676         unsigned long tmp;
 677         unsigned long page;
 678         unsigned int block,i;
 679         struct inode * inode;
 680 
 681         page = get_empty_pgtable(tsk,address);
 682         if (!page)
 683                 return;
 684         page &= 0xfffff000;
 685         page += (address >> 10) & 0xffc;
 686         tmp = *(unsigned long *) page;
 687         if (tmp & 1)
 688                 return;
 689         ++tsk->rss;
 690         if (tmp) {
 691                 ++tsk->maj_flt;
 692                 swap_in((unsigned long *) page);
 693                 return;
 694         }
 695         address &= 0xfffff000;
 696         inode = NULL;
 697         block = 0;
 698         if (address < tsk->end_data) {
 699                 inode = tsk->executable;
 700                 block = 1 + address / BLOCK_SIZE;
 701         } else {
 702                 i = tsk->numlibraries;
 703                 while (i-- > 0) {
 704                         if (address < tsk->libraries[i].start)
 705                                 continue;
 706                         block = address - tsk->libraries[i].start;
 707                         if (block >= tsk->libraries[i].length + tsk->libraries[i].bss)
 708                                 continue;
 709                         inode = tsk->libraries[i].library;
 710                         if (block < tsk->libraries[i].length)
 711                                 block = 1 + block / BLOCK_SIZE;
 712                         else
 713                                 block = 0;
 714                         break;
 715                 }
 716         }
 717         if (!inode) {
 718                 ++tsk->min_flt;
 719                 get_empty_page(tsk,address);
 720                 if (tsk != current)
 721                         return;
 722                 if (address < tsk->brk)
 723                         return;
 724                 if (address+8192 >= (user_esp & 0xfffff000))
 725                         return;
 726                 send_sig(SIGSEGV,tsk,1);
 727                 return;
 728         }
 729         if (share_page(tsk,inode,address)) {
 730                 ++tsk->min_flt;
 731                 return;
 732         }
 733         ++tsk->maj_flt;
 734         page = get_free_page(GFP_KERNEL);
 735         if (!page) {
 736                 oom(current);
 737                 put_page(tsk,BAD_PAGE,address,0);
 738                 return;
 739         }
 740         if (block) {
 741                 for (i=0 ; i<4 ; block++,i++)
 742                         nr[i] = bmap(inode,block);
 743                 bread_page(page,inode->i_dev,nr);
 744         }
 745         if (share_page(tsk,inode,address)) {
 746                 free_page(page);
 747                 return;
 748         }
 749         i = address + PAGE_SIZE - tsk->end_data;
 750         if (i > PAGE_SIZE-1)
 751                 i = 0;
 752         tmp = page + PAGE_SIZE;
 753         while (i--) {
 754                 tmp--;
 755                 *(char *)tmp = 0;
 756         }
 757         if (put_page(tsk,page,address,CODE_SPACE(address, tsk)))
 758                 return;
 759         free_page(page);
 760         oom(current);
 761 }
 762 
 763 /*
 764  * This routine handles page faults.  It determines the address,
 765  * and the problem, and then passes it off to one of the appropriate
 766  * routines.
 767  */
 768 void do_page_fault(unsigned long *esp, unsigned long error_code)
     /* [previous][next][first][last][top][bottom][index][help] */
 769 {
 770         unsigned long address;
 771         unsigned long user_esp = 0;
 772         unsigned int bit;
 773         extern void die_if_kernel();
 774 
 775         /* get the address */
 776         __asm__("movl %%cr2,%0":"=r" (address));
 777         if (address < TASK_SIZE) {
 778                 if (error_code & 4) {   /* user mode access? */
 779                         if (esp[2] & VM_MASK) {
 780                                 bit = (address - 0xA0000) >> PAGE_SHIFT;
 781                                 if (bit < 32)
 782                                         current->screen_bitmap |= 1 << bit;
 783                         } else 
 784                                 user_esp = esp[3];
 785                 }
 786                 if (error_code & 1)
 787                         do_wp_page(error_code, address, current, user_esp);
 788                 else
 789                         do_no_page(error_code, address, current, user_esp);
 790                 return;
 791         }
 792         printk("Unable to handle kernel paging request at address %08x\n",address);
 793         die_if_kernel("Oops",esp,error_code);
 794         do_exit(SIGKILL);
 795 }
 796 
 797 /*
 798  * BAD_PAGE is the page that is used for page faults when linux
 799  * is out-of-memory. Older versions of linux just did a
 800  * do_exit(), but using this instead means there is less risk
 801  * for a process dying in kernel mode, possibly leaving a inode
 802  * unused etc..
 803  *
 804  * BAD_PAGETABLE is the accompanying page-table: it is initialized
 805  * to point to BAD_PAGE entries.
 806  */
 807 unsigned long __bad_pagetable(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 808 {
 809         extern char empty_bad_page_table[PAGE_SIZE];
 810 
 811         __asm__ __volatile__("cld ; rep ; stosl"
 812                 ::"a" (7+BAD_PAGE),
 813                   "D" ((long) empty_bad_page_table),
 814                   "c" (1024)
 815                 :"di","cx");
 816         return (unsigned long) empty_bad_page_table;
 817 }
 818 
 819 unsigned long __bad_page(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 820 {
 821         extern char empty_bad_page[PAGE_SIZE];
 822 
 823         __asm__ __volatile__("cld ; rep ; stosl"
 824                 ::"a" (0),
 825                   "D" ((long) empty_bad_page),
 826                   "c" (1024)
 827                 :"di","cx");
 828         return (unsigned long) empty_bad_page;
 829 }
 830 
 831 void show_mem(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 832 {
 833         int i,free = 0,total = 0,reserved = 0;
 834         int shared = 0;
 835 
 836         printk("Mem-info:\n");
 837         printk("Free pages:      %6d\n",nr_free_pages);
 838         printk("Secondary pages: %6d\n",nr_secondary_pages);
 839         printk("Buffer heads:    %6d\n",nr_buffer_heads);
 840         printk("Buffer blocks:   %6d\n",nr_buffers);
 841         i = high_memory >> PAGE_SHIFT;
 842         while (i-- > 0) {
 843                 total++;
 844                 if (mem_map[i] & MAP_PAGE_RESERVED)
 845                         reserved++;
 846                 else if (!mem_map[i])
 847                         free++;
 848                 else
 849                         shared += mem_map[i]-1;
 850         }
 851         printk("%d pages of RAM\n",total);
 852         printk("%d free pages\n",free);
 853         printk("%d reserved pages\n",reserved);
 854         printk("%d pages shared\n",shared);
 855 }
 856 
 857 /*
 858  * paging_init() sets up the page tables - note that the first 4MB are
 859  * already mapped by head.S.
 860  *
 861  * This routines also unmaps the page at virtual kernel address 0, so
 862  * that we can trap those pesky NULL-reference errors in the kernel.
 863  */
 864 unsigned long paging_init(unsigned long start_mem, unsigned long end_mem)
     /* [previous][next][first][last][top][bottom][index][help] */
 865 {
 866         unsigned long * pg_dir;
 867         unsigned long * pg_table;
 868         unsigned long tmp;
 869         unsigned long address;
 870 
 871 /*
 872  * Physical page 0 is special: it's a "zero-page", and is guaranteed to
 873  * stay that way - it's write-protected and when there is a c-o-w, the
 874  * mm handler treats it specially.
 875  */
 876         memset((void *) 0, 0, 4096);
 877         start_mem += 4095;
 878         start_mem &= 0xfffff000;
 879         address = 0;
 880         pg_dir = swapper_pg_dir + 768;          /* at virtual addr 0xC0000000 */
 881         while (address < end_mem) {
 882                 tmp = *pg_dir;
 883                 if (!tmp) {
 884                         tmp = start_mem;
 885                         *pg_dir = tmp | 7;
 886                         start_mem += 4096;
 887                 }
 888                 pg_dir++;
 889                 pg_table = (unsigned long *) (tmp & 0xfffff000);
 890                 for (tmp = 0 ; tmp < 1024 ; tmp++,pg_table++) {
 891                         if (address && address < end_mem)
 892                                 *pg_table = 7 + address;
 893                         else
 894                                 *pg_table = 0;
 895                         address += 4096;
 896                 }
 897         }
 898         invalidate();
 899         return start_mem;
 900 }
 901 
 902 void mem_init(unsigned long start_low_mem,
     /* [previous][next][first][last][top][bottom][index][help] */
 903               unsigned long start_mem, unsigned long end_mem)
 904 {
 905         int codepages = 0;
 906         int reservedpages = 0;
 907         int datapages = 0;
 908         unsigned long tmp;
 909         unsigned short * p;
 910 
 911         cli();
 912         end_mem &= 0xfffff000;
 913         high_memory = end_mem;
 914         start_mem += 0x0000000f;
 915         start_mem &= 0xfffffff0;
 916         tmp = MAP_NR(end_mem);
 917         mem_map = (unsigned short *) start_mem;
 918         p = mem_map + tmp;
 919         start_mem = (unsigned long) p;
 920         while (p > mem_map)
 921                 *--p = MAP_PAGE_RESERVED;
 922         start_low_mem += 0x00000fff;
 923         start_low_mem &= 0xfffff000;
 924         start_mem += 0x00000fff;
 925         start_mem &= 0xfffff000;
 926         while (start_low_mem < 0xA0000) {
 927                 mem_map[MAP_NR(start_low_mem)] = 0;
 928                 start_low_mem += 4096;
 929         }
 930         while (start_mem < end_mem) {
 931                 mem_map[MAP_NR(start_mem)] = 0;
 932                 start_mem += 4096;
 933         }
 934         free_page_list = 0;
 935         nr_free_pages = 0;
 936         for (tmp = 0 ; tmp < end_mem ; tmp += 4096) {
 937                 if (mem_map[MAP_NR(tmp)]) {
 938                         if (tmp < 0xA0000)
 939                                 codepages++;
 940                         else if (tmp < 0x100000)
 941                                 reservedpages++;
 942                         else
 943                                 datapages++;
 944                         continue;
 945                 }
 946                 *(unsigned long *) tmp = free_page_list;
 947                 free_page_list = tmp;
 948                 nr_free_pages++;
 949         }
 950         tmp = nr_free_pages << PAGE_SHIFT;
 951         printk("Memory: %dk/%dk available (%dk kernel, %dk reserved, %dk data)\n",
 952                 tmp >> 10,
 953                 end_mem >> 10,
 954                 codepages << 2,
 955                 reservedpages << 2,
 956                 datapages << 2);
 957         return;
 958 }
 959 
 960 void si_meminfo(struct sysinfo *val)
     /* [previous][next][first][last][top][bottom][index][help] */
 961 {
 962         int i;
 963 
 964         i = high_memory >> PAGE_SHIFT;
 965         val->totalram = 0;
 966         val->freeram = 0;
 967         val->sharedram = 0;
 968         val->bufferram = buffermem;
 969         while (i-- > 0)  {
 970                 if (mem_map[i] & MAP_PAGE_RESERVED)
 971                         continue;
 972                 val->totalram++;
 973                 if (!mem_map[i]) {
 974                         val->freeram++;
 975                         continue;
 976                 }
 977                 val->sharedram += mem_map[i]-1;
 978         }
 979         val->totalram <<= PAGE_SHIFT;
 980         val->freeram <<= PAGE_SHIFT;
 981         val->sharedram <<= PAGE_SHIFT;
 982         return;
 983 }

/* [previous][next][first][last][top][bottom][index][help] */