root/mm/memory.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. oom
  2. free_one_table
  3. clear_page_tables
  4. free_page_tables
  5. copy_page_tables
  6. unmap_page_range
  7. remap_page_range
  8. put_page
  9. put_dirty_page
  10. do_wp_page
  11. write_verify
  12. get_empty_page
  13. try_to_share
  14. share_page
  15. get_empty_pgtable
  16. do_no_page
  17. do_page_fault
  18. __bad_pagetable
  19. __bad_page
  20. show_mem
  21. paging_init
  22. mem_init
  23. si_meminfo

   1 /*
   2  *  linux/mm/memory.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  * demand-loading started 01.12.91 - seems it is high on the list of
   9  * things wanted, and it should be easy to implement. - Linus
  10  */
  11 
  12 /*
  13  * Ok, demand-loading was easy, shared pages a little bit tricker. Shared
  14  * pages started 02.12.91, seems to work. - Linus.
  15  *
  16  * Tested sharing by executing about 30 /bin/sh: under the old kernel it
  17  * would have taken more than the 6M I have free, but it worked well as
  18  * far as I could see.
  19  *
  20  * Also corrected some "invalidate()"s - I wasn't doing enough of them.
  21  */
  22 
  23 /*
  24  * Real VM (paging to/from disk) started 18.12.91. Much more work and
  25  * thought has to go into this. Oh, well..
  26  * 19.12.91  -  works, somewhat. Sometimes I get faults, don't know why.
  27  *              Found it. Everything seems to work now.
  28  * 20.12.91  -  Ok, making the swap-device changeable like the root.
  29  */
  30 
  31 #include <asm/system.h>
  32 
  33 #include <linux/signal.h>
  34 #include <linux/sched.h>
  35 #include <linux/head.h>
  36 #include <linux/kernel.h>
  37 #include <linux/errno.h>
  38 #include <linux/string.h>
  39 #include <linux/types.h>
  40 
  41 unsigned long high_memory = 0;
  42 
  43 extern void sound_mem_init(void);
  44 
  45 int nr_free_pages = 0;
  46 unsigned long free_page_list = 0;
  47 /*
  48  * The secondary free_page_list is used for malloc() etc things that
  49  * may need pages during interrupts etc. Normal get_free_page() operations
  50  * don't touch it, so it stays as a kind of "panic-list", that can be
  51  * accessed when all other mm tricks have failed.
  52  */
  53 int nr_secondary_pages = 0;
  54 unsigned long secondary_page_list = 0;
  55 
  56 #define copy_page(from,to) \
  57 __asm__("cld ; rep ; movsl"::"S" (from),"D" (to),"c" (1024):"cx","di","si")
  58 
  59 unsigned short * mem_map = NULL;
  60 
  61 #define CODE_SPACE(addr,p) ((addr) < (p)->end_code)
  62 
  63 /*
  64  * oom() prints a message (so that the user knows why the process died),
  65  * and gives the process an untrappable SIGSEGV.
  66  */
  67 void oom(struct task_struct * task)
     /* [previous][next][first][last][top][bottom][index][help] */
  68 {
  69         printk("\nout of memory\n");
  70         task->sigaction[SIGKILL-1].sa_handler = NULL;
  71         task->blocked &= ~(1<<(SIGKILL-1));
  72         send_sig(SIGKILL,task,1);
  73 }
  74 
  75 static void free_one_table(unsigned long * page_dir)
     /* [previous][next][first][last][top][bottom][index][help] */
  76 {
  77         int j;
  78         unsigned long pg_table = *page_dir;
  79         unsigned long * page_table;
  80 
  81         if (!pg_table)
  82                 return;
  83         if (pg_table >= high_memory || !(pg_table & 1)) {
  84                 printk("Bad page table: [%08x]=%08x\n",page_dir,pg_table);
  85                 *page_dir = 0;
  86                 return;
  87         }
  88         *page_dir = 0;
  89         if (mem_map[MAP_NR(pg_table)] & MAP_PAGE_RESERVED)
  90                 return;
  91         page_table = (unsigned long *) (pg_table & 0xfffff000);
  92         for (j = 0 ; j < 1024 ; j++,page_table++) {
  93                 unsigned long pg = *page_table;
  94                 
  95                 if (!pg)
  96                         continue;
  97                 *page_table = 0;
  98                 if (1 & pg)
  99                         free_page(0xfffff000 & pg);
 100                 else
 101                         swap_free(pg >> 1);
 102         }
 103         free_page(0xfffff000 & pg_table);
 104 }
 105 
 106 /*
 107  * This function clears all user-level page tables of a process - this
 108  * is needed by execve(), so that old pages aren't in the way. Note that
 109  * unlike 'free_page_tables()', this function still leaves a valid
 110  * page-table-tree in memory: it just removes the user pages. The two
 111  * functions are similar, but there is a fundamental difference.
 112  */
 113 void clear_page_tables(struct task_struct * tsk)
     /* [previous][next][first][last][top][bottom][index][help] */
 114 {
 115         int i;
 116         unsigned long * page_dir;
 117 
 118         if (!tsk)
 119                 return;
 120         if (tsk == task[0])
 121                 panic("task[0] (swapper) doesn't support exec() yet\n");
 122         page_dir = (unsigned long *) tsk->tss.cr3;
 123         if (!page_dir) {
 124                 printk("Trying to clear kernel page-directory: not good\n");
 125                 return;
 126         }
 127         for (i = 0 ; i < 768 ; i++,page_dir++)
 128                 free_one_table(page_dir);
 129         invalidate();
 130         return;
 131 }
 132 
 133 /*
 134  * This function frees up all page tables of a process when it exits.
 135  */
 136 void free_page_tables(struct task_struct * tsk)
     /* [previous][next][first][last][top][bottom][index][help] */
 137 {
 138         int i;
 139         unsigned long pg_dir;
 140         unsigned long * page_dir;
 141 
 142         if (!tsk)
 143                 return;
 144         if (tsk == task[0]) {
 145                 printk("task[0] (swapper) killed: unable to recover\n");
 146                 panic("Trying to free up swapper memory space");
 147         }
 148         pg_dir = tsk->tss.cr3;
 149         if (!pg_dir) {
 150                 printk("Trying to free kernel page-directory: not good\n");
 151                 return;
 152         }
 153         tsk->tss.cr3 = (unsigned long) swapper_pg_dir;
 154         if (tsk == current)
 155                 __asm__ __volatile__("movl %0,%%cr3"::"a" (tsk->tss.cr3));
 156         page_dir = (unsigned long *) pg_dir;
 157         for (i = 0 ; i < 1024 ; i++,page_dir++)
 158                 free_one_table(page_dir);
 159         free_page(pg_dir);
 160         invalidate();
 161 }
 162 
 163 /*
 164  * copy_page_tables() just copies the whole process memory range:
 165  * note the special handling of RESERVED (ie kernel) pages, which
 166  * means that they are always shared by all processes.
 167  */
 168 int copy_page_tables(struct task_struct * tsk)
     /* [previous][next][first][last][top][bottom][index][help] */
 169 {
 170         int i;
 171         unsigned long old_pg_dir, *old_page_dir;
 172         unsigned long new_pg_dir, *new_page_dir;
 173 
 174         old_pg_dir = current->tss.cr3;
 175         new_pg_dir = get_free_page(GFP_KERNEL);
 176         if (!new_pg_dir)
 177                 return -ENOMEM;
 178         tsk->tss.cr3 = new_pg_dir;
 179         old_page_dir = (unsigned long *) old_pg_dir;
 180         new_page_dir = (unsigned long *) new_pg_dir;
 181         for (i = 0 ; i < 1024 ; i++,old_page_dir++,new_page_dir++) {
 182                 int j;
 183                 unsigned long old_pg_table, *old_page_table;
 184                 unsigned long new_pg_table, *new_page_table;
 185 
 186                 old_pg_table = *old_page_dir;
 187                 if (!old_pg_table)
 188                         continue;
 189                 if (old_pg_table >= high_memory || !(1 & old_pg_table)) {
 190                         printk("copy_page_tables: bad page table: "
 191                                 "probable memory corruption");
 192                         *old_page_dir = 0;
 193                         continue;
 194                 }
 195                 if (mem_map[MAP_NR(old_pg_table)] & MAP_PAGE_RESERVED) {
 196                         *new_page_dir = old_pg_table;
 197                         continue;
 198                 }
 199                 new_pg_table = get_free_page(GFP_KERNEL);
 200                 if (!new_pg_table) {
 201                         free_page_tables(tsk);
 202                         return -ENOMEM;
 203                 }
 204                 *new_page_dir = new_pg_table | PAGE_ACCESSED | 7;
 205                 old_page_table = (unsigned long *) (0xfffff000 & old_pg_table);
 206                 new_page_table = (unsigned long *) (0xfffff000 & new_pg_table);
 207                 for (j = 0 ; j < 1024 ; j++,old_page_table++,new_page_table++) {
 208                         unsigned long pg;
 209                         pg = *old_page_table;
 210                         if (!pg)
 211                                 continue;
 212                         if (!(pg & PAGE_PRESENT)) {
 213                                 swap_duplicate(pg>>1);
 214                                 *new_page_table = pg;
 215                                 continue;
 216                         }
 217                         pg &= ~2;
 218                         *new_page_table = pg;
 219                         if (mem_map[MAP_NR(pg)] & MAP_PAGE_RESERVED)
 220                                 continue;
 221                         *old_page_table = pg;
 222                         mem_map[MAP_NR(pg)]++;
 223                 }
 224         }
 225         invalidate();
 226         return 0;
 227 }
 228 
 229 /*
 230  * a more complete version of free_page_tables which performs with page
 231  * granularity.
 232  */
 233 int unmap_page_range(unsigned long from, unsigned long size)
     /* [previous][next][first][last][top][bottom][index][help] */
 234 {
 235         unsigned long page, page_dir;
 236         unsigned long *page_table, *dir;
 237         unsigned long poff, pcnt, pc;
 238 
 239         if (from & 0xfff)
 240                 panic("unmap_page_range called with wrong alignment");
 241         if (!from)
 242                 panic("unmap_page_range trying to free swapper memory space");
 243         size = (size + 0xfff) >> PAGE_SHIFT;
 244         dir = (unsigned long *) (current->tss.cr3 + ((from >> 20) & 0xffc));
 245         poff = (from >> PAGE_SHIFT) & 0x3ff;
 246         if ((pcnt = 1024 - poff) > size)
 247                 pcnt = size;
 248 
 249         for ( ; size > 0; ++dir, size -= pcnt,
 250              pcnt = (size > 1024 ? 1024 : size)) {
 251                 if (!(page_dir = *dir)) {
 252                         poff = 0;
 253                         continue;
 254                 }
 255                 if (!(page_dir & 1)) {
 256                         printk("unmap_page_range: bad page directory.");
 257                         continue;
 258                 }
 259                 page_table = (unsigned long *)(0xfffff000 & page_dir);
 260                 if (poff) {
 261                         page_table += poff;
 262                         poff = 0;
 263                 }
 264                 for (pc = pcnt; pc--; page_table++) {
 265                         if ((page = *page_table) != 0) {
 266                                 *page_table = 0;
 267                                 if (1 & page) {
 268                                         --current->rss;
 269                                         free_page(0xfffff000 & page);
 270                                 } else
 271                                         swap_free(page >> 1);
 272                         }
 273                 }
 274                 if (pcnt == 1024) {
 275                         free_page(0xfffff000 & page_dir);
 276                         *dir = 0;
 277                 }
 278         }
 279         invalidate();
 280         return 0;
 281 }
 282 
 283 /*
 284  * maps a range of physical memory into the requested pages. the old
 285  * mappings are removed. any references to nonexistent pages results
 286  * in null mappings (currently treated as "copy-on-access")
 287  *
 288  * permiss is encoded as cxwr (copy,exec,write,read) where copy modifies
 289  * the behavior of write to be copy-on-write.
 290  *
 291  * due to current limitations, we actually have the following
 292  *              on              off
 293  * read:        yes             yes
 294  * write/copy:  yes/copy        copy/copy
 295  * exec:        yes             yes
 296  */
 297 int remap_page_range(unsigned long from, unsigned long to, unsigned long size,
     /* [previous][next][first][last][top][bottom][index][help] */
 298                  int permiss)
 299 {
 300         unsigned long *page_table, *dir;
 301         unsigned long poff, pcnt;
 302         unsigned long page;
 303 
 304         if ((from & 0xfff) || (to & 0xfff))
 305                 panic("remap_page_range called with wrong alignment");
 306         dir = (unsigned long *) (current->tss.cr3 + ((from >> 20) & 0xffc));
 307         size = (size + 0xfff) >> PAGE_SHIFT;
 308         poff = (from >> PAGE_SHIFT) & 0x3ff;
 309         if ((pcnt = 1024 - poff) > size)
 310                 pcnt = size;
 311 
 312         while (size > 0) {
 313                 if (!(1 & *dir)) {
 314                         if (!(page_table = (unsigned long *)get_free_page(GFP_KERNEL))) {
 315                                 invalidate();
 316                                 return -1;
 317                         }
 318                         *dir++ = ((unsigned long) page_table) | PAGE_ACCESSED | 7;
 319                 }
 320                 else
 321                         page_table = (unsigned long *)(0xfffff000 & *dir++);
 322                 if (poff) {
 323                         page_table += poff;
 324                         poff = 0;
 325                 }
 326 
 327                 for (size -= pcnt; pcnt-- ;) {
 328                         int mask;
 329 
 330                         mask = 4;
 331                         if (permiss & 1)
 332                                 mask |= 1;
 333                         if (permiss & 2) {
 334                                 if (permiss & 8)
 335                                         mask |= 1;
 336                                 else
 337                                         mask |= 3;
 338                         }
 339                         if (permiss & 4)
 340                                 mask |= 1;
 341 
 342                         if ((page = *page_table) != 0) {
 343                                 *page_table = 0;
 344                                 --current->rss;
 345                                 if (1 & page)
 346                                         free_page(0xfffff000 & page);
 347                                 else
 348                                         swap_free(page >> 1);
 349                         }
 350 
 351                         /*
 352                          * i'm not sure of the second cond here. should we
 353                          * report failure?
 354                          * the first condition should return an invalid access
 355                          * when the page is referenced. current assumptions
 356                          * cause it to be treated as demand allocation.
 357                          */
 358                         if (mask == 4 || to >= high_memory || !mem_map[MAP_NR(to)])
 359                                 *page_table++ = 0;      /* not present */
 360                         else {
 361                                 ++current->rss;
 362                                 *page_table++ = (to | mask);
 363                                 if (!(mem_map[MAP_NR(to)] & MAP_PAGE_RESERVED))
 364                                         mem_map[MAP_NR(to)]++;
 365                         }
 366                         to += PAGE_SIZE;
 367                 }
 368                 pcnt = (size > 1024 ? 1024 : size);
 369         }
 370         invalidate();
 371         return 0;
 372 }
 373 
 374 /*
 375  * This function puts a page in memory at the wanted address.
 376  * It returns the physical address of the page gotten, 0 if
 377  * out of memory (either when trying to access page-table or
 378  * page.)
 379  * if wp = 1 the page will be write protected
 380  */
 381 static unsigned long put_page(struct task_struct * tsk,unsigned long page,unsigned long address,int wp)
     /* [previous][next][first][last][top][bottom][index][help] */
 382 {
 383         unsigned long tmp, *page_table;
 384 
 385 /* NOTE !!! This uses the fact that _pg_dir=0 */
 386 
 387         if (page >= high_memory) {
 388                 printk("put_page: trying to put page %p at %p\n",page,address);
 389                 return 0;
 390         }
 391         tmp = mem_map[MAP_NR(page)];
 392         if (!(tmp & MAP_PAGE_RESERVED) && (tmp != 1)) {
 393                 printk("put_page: mem_map disagrees with %p at %p\n",page,address);
 394                 return 0;
 395         }
 396         page_table = (unsigned long *) (tsk->tss.cr3 + ((address>>20) & 0xffc));
 397         if ((*page_table) & PAGE_PRESENT)
 398                 page_table = (unsigned long *) (0xfffff000 & *page_table);
 399         else {
 400                 printk("put_page: bad page directory entry\n");
 401                 oom(tsk);
 402                 *page_table = BAD_PAGETABLE | PAGE_ACCESSED | 7;
 403                 return 0;
 404         }
 405         page_table += (address >> PAGE_SHIFT) & 0x3ff;
 406         if (*page_table) {
 407                 printk("put_page: page already exists\n");
 408                 *page_table = 0;
 409                 invalidate();
 410         }
 411         *page_table = page | PAGE_ACCESSED | 5 | (!wp << 1);
 412 /* no need for invalidate */
 413         return page;
 414 }
 415 
 416 /*
 417  * The previous function doesn't work very well if you also want to mark
 418  * the page dirty: exec.c wants this, as it has earlier changed the page,
 419  * and we want the dirty-status to be correct (for VM). Thus the same
 420  * routine, but this time we mark it dirty too.
 421  */
 422 unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsigned long address)
     /* [previous][next][first][last][top][bottom][index][help] */
 423 {
 424         unsigned long tmp, *page_table;
 425 
 426 /* NOTE !!! This uses the fact that _pg_dir=0 */
 427 
 428         if (page >= high_memory)
 429                 printk("put_dirty_page: trying to put page %p at %p\n",page,address);
 430         if (mem_map[MAP_NR(page)] != 1)
 431                 printk("mem_map disagrees with %p at %p\n",page,address);
 432         page_table = (unsigned long *) (tsk->tss.cr3 + ((address>>20) & 0xffc));
 433         if ((*page_table)&1)
 434                 page_table = (unsigned long *) (0xfffff000 & *page_table);
 435         else {
 436                 if (!(tmp=get_free_page(GFP_KERNEL)))
 437                         return 0;
 438                 *page_table = tmp|7;
 439                 page_table = (unsigned long *) tmp;
 440         }
 441         page_table += (address >> PAGE_SHIFT) & 0x3ff;
 442         if (*page_table) {
 443                 printk("put_dirty_page: page already exists\n");
 444                 *page_table = 0;
 445                 invalidate();
 446         }
 447         *page_table = page | (PAGE_DIRTY | PAGE_ACCESSED | 7);
 448 /* no need for invalidate */
 449         return page;
 450 }
 451 
 452 /*
 453  * This routine handles present pages, when users try to write
 454  * to a shared page. It is done by copying the page to a new address
 455  * and decrementing the shared-page counter for the old page.
 456  *
 457  * Fixed the routine to repeat a bit more: this is slightly slower,
 458  * but there were race-conditions in the old code..
 459  */
 460 void do_wp_page(unsigned long error_code, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 461         struct task_struct * tsk, unsigned long user_esp)
 462 {
 463         unsigned long pde, pte, old_page, dirty;
 464         unsigned long new_page = 0;
 465 
 466         /* check code space write */
 467         if (tsk == current && tsk->executable && CODE_SPACE(address, current)) {
 468                 /* don't send SIGSEGV when in kernel or v86 mode */
 469                 if (user_esp)
 470                         send_sig(SIGSEGV, tsk, 1);
 471                 /* Note that we still do the copy-on-write: if the process catches
 472                  * SIGSEGV we want things to work..
 473                  */
 474         }
 475 repeat:
 476         pde = tsk->tss.cr3 + ((address>>20) & 0xffc);
 477         pte = *(unsigned long *) pde;
 478         if (!(pte & PAGE_PRESENT)) {
 479                 if (new_page)
 480                         free_page(new_page);
 481                 return;
 482         }
 483         if ((pte & 7) != 7 || pte >= high_memory) {
 484                 printk("do_wp_page: bogus page-table at address %08x (%08x)\n",address,pte);
 485                 *(unsigned long *) pde = BAD_PAGETABLE | 7;
 486                 send_sig(SIGKILL, tsk, 1);
 487                 if (new_page)
 488                         free_page(new_page);
 489                 return;
 490         }
 491         pte &= 0xfffff000;
 492         pte += (address>>10) & 0xffc;
 493         old_page = *(unsigned long *) pte;
 494         if (!(old_page & PAGE_PRESENT)) {
 495                 if (new_page)
 496                         free_page(new_page);
 497                 return;
 498         }
 499         if (old_page >= high_memory) {
 500                 printk("do_wp_page: bogus page at address %08x (%08x)\n",address,old_page);
 501                 *(unsigned long *) pte = BAD_PAGE | 7;
 502                 send_sig(SIGKILL, tsk, 1);
 503                 if (new_page)
 504                         free_page(new_page);
 505                 return;
 506         }
 507         if (old_page & PAGE_RW) {
 508                 if (new_page)
 509                         free_page(new_page);
 510                 return;
 511         }
 512         tsk->min_flt++;
 513         dirty = old_page & PAGE_DIRTY;
 514         old_page &= 0xfffff000;
 515         if (mem_map[MAP_NR(old_page)]==1) {
 516                 *(unsigned long *) pte |= 2;
 517                 invalidate();
 518                 if (new_page)
 519                         free_page(new_page);
 520                 return;
 521         }
 522         if (!new_page && (new_page=get_free_page(GFP_KERNEL)))
 523                 goto repeat;
 524         if (new_page)
 525                 copy_page(old_page,new_page);
 526         else {
 527                 new_page = BAD_PAGE;
 528                 oom(tsk);
 529         }
 530         *(unsigned long *) pte = new_page | dirty | PAGE_ACCESSED | 7;
 531         free_page(old_page);
 532         invalidate();
 533 }
 534 
 535 void write_verify(unsigned long address)
     /* [previous][next][first][last][top][bottom][index][help] */
 536 {
 537         if (address < TASK_SIZE)
 538                 do_wp_page(1,address,current,0);
 539 }
 540 
 541 static void get_empty_page(struct task_struct * tsk, unsigned long address)
     /* [previous][next][first][last][top][bottom][index][help] */
 542 {
 543         unsigned long tmp;
 544 
 545         tmp = get_free_page(GFP_KERNEL);
 546         if (!tmp) {
 547                 oom(tsk);
 548                 tmp = BAD_PAGE;
 549         }
 550         if (!put_page(tsk,tmp,address,0))
 551                 free_page(tmp);
 552 }
 553 
 554 /*
 555  * try_to_share() checks the page at address "address" in the task "p",
 556  * to see if it exists, and if it is clean. If so, share it with the current
 557  * task.
 558  *
 559  * NOTE! This assumes we have checked that p != current, and that they
 560  * share the same executable or library.
 561  */
 562 static int try_to_share(unsigned long address, struct task_struct * tsk,
     /* [previous][next][first][last][top][bottom][index][help] */
 563         struct task_struct * p)
 564 {
 565         unsigned long from;
 566         unsigned long to;
 567         unsigned long from_page;
 568         unsigned long to_page;
 569         unsigned long phys_addr;
 570 
 571         from_page = p->tss.cr3 + ((address>>20) & 0xffc);
 572         to_page = tsk->tss.cr3 + ((address>>20) & 0xffc);
 573 /* is there a page-directory at from? */
 574         from = *(unsigned long *) from_page;
 575         if (!(from & 1))
 576                 return 0;
 577         from &= 0xfffff000;
 578         from_page = from + ((address>>10) & 0xffc);
 579         phys_addr = *(unsigned long *) from_page;
 580 /* is the page clean and present? */
 581         if ((phys_addr & 0x41) != 0x01)
 582                 return 0;
 583         phys_addr &= 0xfffff000;
 584         if (phys_addr >= high_memory)
 585                 return 0;
 586         if (mem_map[MAP_NR(phys_addr)] & MAP_PAGE_RESERVED)
 587                 return 0;
 588 /* share them: write-protect */
 589         *(unsigned long *) from_page &= ~2;
 590         invalidate();
 591         phys_addr >>= PAGE_SHIFT;
 592         mem_map[phys_addr]++;
 593         to = *(unsigned long *) to_page;
 594         if (!(to & 1)) {
 595                 to = get_free_page(GFP_KERNEL);
 596                 if (!to) {
 597                         mem_map[phys_addr]--;
 598                         return 0;
 599                 }
 600                 *(unsigned long *) to_page = to | PAGE_ACCESSED | 7;
 601         }
 602         to &= 0xfffff000;
 603         to_page = to + ((address>>10) & 0xffc);
 604         if (1 & *(unsigned long *) to_page)
 605                 panic("try_to_share: to_page already exists");
 606         *(unsigned long *) to_page = *(unsigned long *) from_page;
 607         return 1;
 608 }
 609 
 610 /*
 611  * share_page() tries to find a process that could share a page with
 612  * the current one. Address is the address of the wanted page relative
 613  * to the current data space.
 614  *
 615  * We first check if it is at all feasible by checking executable->i_count.
 616  * It should be >1 if there are other tasks sharing this inode.
 617  */
 618 static int share_page(struct task_struct * tsk, struct inode * inode, unsigned long address)
     /* [previous][next][first][last][top][bottom][index][help] */
 619 {
 620         struct task_struct ** p;
 621         int i;
 622 
 623         if (!inode || inode->i_count < 2)
 624                 return 0;
 625         for (p = &LAST_TASK ; p > &FIRST_TASK ; --p) {
 626                 if (!*p)
 627                         continue;
 628                 if (tsk == *p)
 629                         continue;
 630                 if (inode != (*p)->executable) {
 631                         for (i=0; i < (*p)->numlibraries; i++)
 632                                 if (inode == (*p)->libraries[i].library)
 633                                         break;
 634                         if (i >= (*p)->numlibraries)
 635                                 continue;
 636                 }
 637                 if (try_to_share(address,tsk,*p))
 638                         return 1;
 639         }
 640         return 0;
 641 }
 642 
 643 /*
 644  * fill in an empty page-table if none exists
 645  */
 646 static unsigned long get_empty_pgtable(struct task_struct * tsk,unsigned long address)
     /* [previous][next][first][last][top][bottom][index][help] */
 647 {
 648         unsigned long page = 0;
 649         unsigned long *p;
 650 repeat:
 651         p = (unsigned long *) (tsk->tss.cr3 + ((address >> 20) & 0xffc));
 652         if (1 & *p) {
 653                 free_page(page);
 654                 return *p;
 655         }
 656         if (*p) {
 657                 printk("get_empty_pgtable: bad page-directory entry \n");
 658                 *p = 0;
 659         }
 660         if (page) {
 661                 *p = page | PAGE_ACCESSED | 7;
 662                 return *p;
 663         }
 664         if ((page = get_free_page(GFP_KERNEL)) != 0)
 665                 goto repeat;
 666         oom(current);
 667         *p = BAD_PAGETABLE | 7;
 668         return 0;
 669 }
 670 
 671 void do_no_page(unsigned long error_code, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 672         struct task_struct *tsk, unsigned long user_esp)
 673 {
 674         int nr[4];
 675         unsigned long tmp;
 676         unsigned long page;
 677         unsigned int block,i;
 678         struct inode * inode;
 679 
 680         page = get_empty_pgtable(tsk,address);
 681         if (!page)
 682                 return;
 683         page &= 0xfffff000;
 684         page += (address >> 10) & 0xffc;
 685         tmp = *(unsigned long *) page;
 686         if (tmp & 1)
 687                 return;
 688         ++tsk->rss;
 689         if (tmp) {
 690                 ++tsk->maj_flt;
 691                 swap_in((unsigned long *) page);
 692                 return;
 693         }
 694         address &= 0xfffff000;
 695         inode = NULL;
 696         block = 0;
 697         if (address < tsk->end_data) {
 698                 inode = tsk->executable;
 699                 block = 1 + address / BLOCK_SIZE;
 700         } else {
 701                 i = tsk->numlibraries;
 702                 while (i-- > 0) {
 703                         if (address < tsk->libraries[i].start)
 704                                 continue;
 705                         block = address - tsk->libraries[i].start;
 706                         if (block >= tsk->libraries[i].length + tsk->libraries[i].bss)
 707                                 continue;
 708                         inode = tsk->libraries[i].library;
 709                         if (block < tsk->libraries[i].length)
 710                                 block = 1 + block / BLOCK_SIZE;
 711                         else
 712                                 block = 0;
 713                         break;
 714                 }
 715         }
 716         if (!inode) {
 717                 ++tsk->min_flt;
 718                 get_empty_page(tsk,address);
 719                 if (tsk != current)
 720                         return;
 721                 if (address < tsk->brk)
 722                         return;
 723                 if (address+8192 >= (user_esp & 0xfffff000))
 724                         return;
 725                 send_sig(SIGSEGV,tsk,1);
 726                 return;
 727         }
 728         if (share_page(tsk,inode,address)) {
 729                 ++tsk->min_flt;
 730                 return;
 731         }
 732         ++tsk->maj_flt;
 733         page = get_free_page(GFP_KERNEL);
 734         if (!page) {
 735                 oom(current);
 736                 put_page(tsk,BAD_PAGE,address,0);
 737                 return;
 738         }
 739         if (block) {
 740                 for (i=0 ; i<4 ; block++,i++)
 741                         nr[i] = bmap(inode,block);
 742                 bread_page(page,inode->i_dev,nr);
 743         }
 744         if (share_page(tsk,inode,address)) {
 745                 free_page(page);
 746                 return;
 747         }
 748         i = address + PAGE_SIZE - tsk->end_data;
 749         if (i > PAGE_SIZE-1)
 750                 i = 0;
 751         tmp = page + PAGE_SIZE;
 752         while (i--) {
 753                 tmp--;
 754                 *(char *)tmp = 0;
 755         }
 756         if (put_page(tsk,page,address,CODE_SPACE(address, tsk)))
 757                 return;
 758         free_page(page);
 759         oom(current);
 760 }
 761 
 762 /*
 763  * This routine handles page faults.  It determines the address,
 764  * and the problem, and then passes it off to one of the appropriate
 765  * routines.
 766  */
 767 void do_page_fault(unsigned long *esp, unsigned long error_code)
     /* [previous][next][first][last][top][bottom][index][help] */
 768 {
 769         unsigned long address;
 770         unsigned long user_esp = 0;
 771         unsigned long stack_limit;
 772         unsigned int bit;
 773         extern void die_if_kernel();
 774 
 775         /* get the address */
 776         __asm__("movl %%cr2,%0":"=r" (address));
 777         if (address < TASK_SIZE) {
 778                 if (error_code & 4) {   /* user mode access? */
 779                         if (esp[2] & VM_MASK) {
 780                                 bit = (address - 0xA0000) >> PAGE_SHIFT;
 781                                 if (bit < 32)
 782                                         current->screen_bitmap |= 1 << bit;
 783                         } else 
 784                                 user_esp = esp[3];
 785                 }
 786                 if (error_code & 1)
 787                         do_wp_page(error_code, address, current, user_esp);
 788                 else
 789                         do_no_page(error_code, address, current, user_esp);
 790                 if (!user_esp)
 791                         return;
 792                 stack_limit = current->rlim[RLIMIT_STACK].rlim_cur;
 793                 if (stack_limit >= RLIM_INFINITY)
 794                         return;
 795                 if (stack_limit >= current->start_stack)
 796                         return;
 797                 stack_limit = current->start_stack - stack_limit;
 798                 if (user_esp < stack_limit)
 799                         send_sig(SIGSEGV, current, 1);
 800                 return;
 801         }
 802         printk("Unable to handle kernel paging request at address %08x\n",address);
 803         die_if_kernel("Oops",esp,error_code);
 804         do_exit(SIGKILL);
 805 }
 806 
 807 /*
 808  * BAD_PAGE is the page that is used for page faults when linux
 809  * is out-of-memory. Older versions of linux just did a
 810  * do_exit(), but using this instead means there is less risk
 811  * for a process dying in kernel mode, possibly leaving a inode
 812  * unused etc..
 813  *
 814  * BAD_PAGETABLE is the accompanying page-table: it is initialized
 815  * to point to BAD_PAGE entries.
 816  */
 817 unsigned long __bad_pagetable(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 818 {
 819         extern char empty_bad_page_table[PAGE_SIZE];
 820 
 821         __asm__ __volatile__("cld ; rep ; stosl"
 822                 ::"a" (7+BAD_PAGE),
 823                   "D" ((long) empty_bad_page_table),
 824                   "c" (1024)
 825                 :"di","cx");
 826         return (unsigned long) empty_bad_page_table;
 827 }
 828 
 829 unsigned long __bad_page(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 830 {
 831         extern char empty_bad_page[PAGE_SIZE];
 832 
 833         __asm__ __volatile__("cld ; rep ; stosl"
 834                 ::"a" (0),
 835                   "D" ((long) empty_bad_page),
 836                   "c" (1024)
 837                 :"di","cx");
 838         return (unsigned long) empty_bad_page;
 839 }
 840 
 841 void show_mem(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 842 {
 843         int i,free = 0,total = 0,reserved = 0;
 844         int shared = 0;
 845 
 846         printk("Mem-info:\n");
 847         printk("Free pages:      %6d\n",nr_free_pages);
 848         printk("Secondary pages: %6d\n",nr_secondary_pages);
 849         printk("Buffer heads:    %6d\n",nr_buffer_heads);
 850         printk("Buffer blocks:   %6d\n",nr_buffers);
 851         i = high_memory >> PAGE_SHIFT;
 852         while (i-- > 0) {
 853                 total++;
 854                 if (mem_map[i] & MAP_PAGE_RESERVED)
 855                         reserved++;
 856                 else if (!mem_map[i])
 857                         free++;
 858                 else
 859                         shared += mem_map[i]-1;
 860         }
 861         printk("%d pages of RAM\n",total);
 862         printk("%d free pages\n",free);
 863         printk("%d reserved pages\n",reserved);
 864         printk("%d pages shared\n",shared);
 865 }
 866 
 867 /*
 868  * paging_init() sets up the page tables - note that the first 4MB are
 869  * already mapped by head.S.
 870  *
 871  * This routines also unmaps the page at virtual kernel address 0, so
 872  * that we can trap those pesky NULL-reference errors in the kernel.
 873  */
 874 unsigned long paging_init(unsigned long start_mem, unsigned long end_mem)
     /* [previous][next][first][last][top][bottom][index][help] */
 875 {
 876         unsigned long * pg_dir;
 877         unsigned long * pg_table;
 878         unsigned long tmp;
 879         unsigned long address;
 880 
 881 /*
 882  * Physical page 0 is special: it's a "zero-page", and is guaranteed to
 883  * stay that way - it's write-protected and when there is a c-o-w, the
 884  * mm handler treats it specially.
 885  */
 886         memset((void *) 0, 0, 4096);
 887         start_mem += 4095;
 888         start_mem &= 0xfffff000;
 889         address = 0;
 890         pg_dir = swapper_pg_dir + 768;          /* at virtual addr 0xC0000000 */
 891         while (address < end_mem) {
 892                 tmp = *pg_dir;
 893                 if (!tmp) {
 894                         tmp = start_mem;
 895                         *pg_dir = tmp | 7;
 896                         start_mem += 4096;
 897                 }
 898                 pg_dir++;
 899                 pg_table = (unsigned long *) (tmp & 0xfffff000);
 900                 for (tmp = 0 ; tmp < 1024 ; tmp++,pg_table++) {
 901                         if (address && address < end_mem)
 902                                 *pg_table = 7 + address;
 903                         else
 904                                 *pg_table = 0;
 905                         address += 4096;
 906                 }
 907         }
 908         invalidate();
 909         return start_mem;
 910 }
 911 
 912 void mem_init(unsigned long start_low_mem,
     /* [previous][next][first][last][top][bottom][index][help] */
 913               unsigned long start_mem, unsigned long end_mem)
 914 {
 915         int codepages = 0;
 916         int reservedpages = 0;
 917         int datapages = 0;
 918         unsigned long tmp;
 919         unsigned short * p;
 920         extern int etext;
 921 
 922         cli();
 923         end_mem &= 0xfffff000;
 924         high_memory = end_mem;
 925         start_mem += 0x0000000f;
 926         start_mem &= 0xfffffff0;
 927         tmp = MAP_NR(end_mem);
 928         mem_map = (unsigned short *) start_mem;
 929         p = mem_map + tmp;
 930         start_mem = (unsigned long) p;
 931         while (p > mem_map)
 932                 *--p = MAP_PAGE_RESERVED;
 933         start_low_mem += 0x00000fff;
 934         start_low_mem &= 0xfffff000;
 935         start_mem += 0x00000fff;
 936         start_mem &= 0xfffff000;
 937         while (start_low_mem < 0xA0000) {
 938                 mem_map[MAP_NR(start_low_mem)] = 0;
 939                 start_low_mem += 4096;
 940         }
 941         while (start_mem < end_mem) {
 942                 mem_map[MAP_NR(start_mem)] = 0;
 943                 start_mem += 4096;
 944         }
 945         sound_mem_init();
 946         free_page_list = 0;
 947         nr_free_pages = 0;
 948         for (tmp = 0 ; tmp < end_mem ; tmp += 4096) {
 949                 if (mem_map[MAP_NR(tmp)]) {
 950                         if (tmp >= 0xA0000 && tmp < 0x100000)
 951                                 reservedpages++;
 952                         else if (tmp < (unsigned long) &etext)
 953                                 codepages++;
 954                         else
 955                                 datapages++;
 956                         continue;
 957                 }
 958                 *(unsigned long *) tmp = free_page_list;
 959                 free_page_list = tmp;
 960                 nr_free_pages++;
 961         }
 962         tmp = nr_free_pages << PAGE_SHIFT;
 963         printk("Memory: %dk/%dk available (%dk kernel code, %dk reserved, %dk data)\n",
 964                 tmp >> 10,
 965                 end_mem >> 10,
 966                 codepages << 2,
 967                 reservedpages << 2,
 968                 datapages << 2);
 969         return;
 970 }
 971 
 972 void si_meminfo(struct sysinfo *val)
     /* [previous][next][first][last][top][bottom][index][help] */
 973 {
 974         int i;
 975 
 976         i = high_memory >> PAGE_SHIFT;
 977         val->totalram = 0;
 978         val->freeram = 0;
 979         val->sharedram = 0;
 980         val->bufferram = buffermem;
 981         while (i-- > 0)  {
 982                 if (mem_map[i] & MAP_PAGE_RESERVED)
 983                         continue;
 984                 val->totalram++;
 985                 if (!mem_map[i]) {
 986                         val->freeram++;
 987                         continue;
 988                 }
 989                 val->sharedram += mem_map[i]-1;
 990         }
 991         val->totalram <<= PAGE_SHIFT;
 992         val->freeram <<= PAGE_SHIFT;
 993         val->sharedram <<= PAGE_SHIFT;
 994         return;
 995 }

/* [previous][next][first][last][top][bottom][index][help] */