This source file includes following definitions.
- oom
- free_one_pmd
- free_one_pgd
- clear_page_tables
- free_page_tables
- new_page_tables
- copy_one_pte
- copy_pte_range
- copy_pmd_range
- copy_page_range
- forget_pte
- zap_pte_range
- zap_pmd_range
- zap_page_range
- zeromap_pte_range
- zeromap_pmd_range
- zeromap_page_range
- remap_pte_range
- remap_pmd_range
- remap_page_range
- put_page
- put_dirty_page
- do_wp_page
- verify_area
- get_empty_page
- try_to_share
- share_page
- unshare
- vmtruncate
- get_empty_pgtable
- do_swap_page
- do_no_page
- handle_pte_fault
- handle_mm_fault
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36 #include <linux/signal.h>
37 #include <linux/sched.h>
38 #include <linux/head.h>
39 #include <linux/kernel.h>
40 #include <linux/errno.h>
41 #include <linux/string.h>
42 #include <linux/types.h>
43 #include <linux/ptrace.h>
44 #include <linux/mman.h>
45 #include <linux/mm.h>
46
47 #include <asm/system.h>
48 #include <asm/segment.h>
49 #include <asm/pgtable.h>
50
51 unsigned long high_memory = 0;
52
53
54
55
56
57 int nr_swap_pages = 0;
58 int nr_free_pages = 0;
59 struct mem_list free_area_list[NR_MEM_LISTS];
60 unsigned char * free_area_map[NR_MEM_LISTS];
61
62 #define copy_page(from,to) memcpy((void *) to, (void *) from, PAGE_SIZE)
63
64 #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
65
66 mem_map_t * mem_map = NULL;
67
68
69
70
71
72 void oom(struct task_struct * task)
73 {
74 printk("\nOut of memory for %s.\n", current->comm);
75 task->sig->action[SIGKILL-1].sa_handler = NULL;
76 task->blocked &= ~(1<<(SIGKILL-1));
77 send_sig(SIGKILL,task,1);
78 }
79
80
81
82
83
84 static inline void free_one_pmd(pmd_t * dir)
85 {
86 pte_t * pte;
87
88 if (pmd_none(*dir))
89 return;
90 if (pmd_bad(*dir)) {
91 printk("free_one_pmd: bad directory entry %08lx\n", pmd_val(*dir));
92 pmd_clear(dir);
93 return;
94 }
95 pte = pte_offset(dir, 0);
96 pmd_clear(dir);
97 pte_free(pte);
98 }
99
100 static inline void free_one_pgd(pgd_t * dir)
101 {
102 pmd_t * pmd;
103
104 if (pgd_none(*dir))
105 return;
106 if (pgd_bad(*dir)) {
107 printk("free_one_pgd: bad directory entry %08lx\n", pgd_val(*dir));
108 pgd_clear(dir);
109 return;
110 }
111 pmd = pmd_offset(dir, 0);
112 pgd_clear(dir);
113 if (!pmd_inuse(pmd)) {
114 int j;
115 for (j = 0; j < PTRS_PER_PMD ; j++)
116 free_one_pmd(pmd+j);
117 }
118 pmd_free(pmd);
119 }
120
121
122
123
124
125 void clear_page_tables(struct task_struct * tsk)
126 {
127 int i;
128 pgd_t * page_dir;
129
130 page_dir = tsk->mm->pgd;
131 if (!page_dir || page_dir == swapper_pg_dir) {
132 printk("%s trying to clear kernel page-directory: not good\n", tsk->comm);
133 return;
134 }
135 for (i = 0 ; i < USER_PTRS_PER_PGD ; i++)
136 free_one_pgd(page_dir + i);
137 invalidate();
138 }
139
140
141
142
143
144
145
146 void free_page_tables(struct task_struct * tsk)
147 {
148 int i;
149 pgd_t * page_dir;
150
151 page_dir = tsk->mm->pgd;
152 if (!page_dir || page_dir == swapper_pg_dir) {
153 printk("%s trying to free kernel page-directory: not good\n", tsk->comm);
154 return;
155 }
156 SET_PAGE_DIR(tsk, swapper_pg_dir);
157 tsk->mm->pgd = swapper_pg_dir;
158 for (i = 0 ; i < PTRS_PER_PGD ; i++)
159 free_one_pgd(page_dir + i);
160 pgd_free(page_dir);
161 invalidate();
162 }
163
164 int new_page_tables(struct task_struct * tsk)
165 {
166 pgd_t * page_dir, * new_pg;
167 int i;
168
169 if (!(new_pg = pgd_alloc()))
170 return -ENOMEM;
171 page_dir = pgd_offset(&init_mm, 0);
172 for (i = USER_PTRS_PER_PGD ; i < PTRS_PER_PGD ; i++)
173 new_pg[i] = page_dir[i];
174 SET_PAGE_DIR(tsk, new_pg);
175 tsk->mm->pgd = new_pg;
176 return 0;
177 }
178
179 static inline void copy_one_pte(pte_t * old_pte, pte_t * new_pte)
180 {
181 pte_t pte = *old_pte;
182
183 if (pte_none(pte))
184 return;
185 if (!pte_present(pte)) {
186 swap_duplicate(pte_val(pte));
187 set_pte(new_pte, pte);
188 return;
189 }
190 if (pte_page(pte) > high_memory || mem_map[MAP_NR(pte_page(pte))].reserved) {
191 set_pte(new_pte, pte);
192 return;
193 }
194 if (pte_cow(pte))
195 pte = pte_wrprotect(pte);
196 if (delete_from_swap_cache(pte_page(pte)))
197 pte = pte_mkdirty(pte);
198 set_pte(new_pte, pte_mkold(pte));
199 set_pte(old_pte, pte);
200 mem_map[MAP_NR(pte_page(pte))].count++;
201 }
202
203 static inline int copy_pte_range(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long address, unsigned long size)
204 {
205 pte_t * src_pte, * dst_pte;
206 unsigned long end;
207
208 if (pmd_none(*src_pmd))
209 return 0;
210 if (pmd_bad(*src_pmd)) {
211 printk("copy_pte_range: bad pmd (%08lx)\n", pmd_val(*src_pmd));
212 pmd_clear(src_pmd);
213 return 0;
214 }
215 src_pte = pte_offset(src_pmd, address);
216 if (pmd_none(*dst_pmd)) {
217 if (!pte_alloc(dst_pmd, 0))
218 return -ENOMEM;
219 }
220 dst_pte = pte_offset(dst_pmd, address);
221 address &= ~PMD_MASK;
222 end = address + size;
223 if (end >= PMD_SIZE)
224 end = PMD_SIZE;
225 do {
226
227
228
229 copy_one_pte(src_pte++, dst_pte++);
230 address += PAGE_SIZE;
231 } while (address < end);
232 return 0;
233 }
234
235 static inline int copy_pmd_range(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long address, unsigned long size)
236 {
237 pmd_t * src_pmd, * dst_pmd;
238 unsigned long end;
239 int error = 0;
240
241 if (pgd_none(*src_pgd))
242 return 0;
243 if (pgd_bad(*src_pgd)) {
244 printk("copy_pmd_range: bad pgd (%08lx)\n", pgd_val(*src_pgd));
245 pgd_clear(src_pgd);
246 return 0;
247 }
248 src_pmd = pmd_offset(src_pgd, address);
249 if (pgd_none(*dst_pgd)) {
250 if (!pmd_alloc(dst_pgd, 0))
251 return -ENOMEM;
252 }
253 dst_pmd = pmd_offset(dst_pgd, address);
254 address &= ~PGDIR_MASK;
255 end = address + size;
256 if (end > PGDIR_SIZE)
257 end = PGDIR_SIZE;
258 do {
259 error = copy_pte_range(dst_pmd++, src_pmd++, address, end - address);
260 if (error)
261 break;
262 address = (address + PMD_SIZE) & PMD_MASK;
263 } while (address < end);
264 return error;
265 }
266
267
268
269
270
271
272 int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
273 struct vm_area_struct *vma)
274 {
275 pgd_t * src_pgd, * dst_pgd;
276 unsigned long address = vma->vm_start;
277 unsigned long end = vma->vm_end;
278 int error = 0;
279
280 src_pgd = pgd_offset(src, address);
281 dst_pgd = pgd_offset(dst, address);
282 while (address < end) {
283 error = copy_pmd_range(dst_pgd++, src_pgd++, address, end - address);
284 if (error)
285 break;
286 address = (address + PGDIR_SIZE) & PGDIR_MASK;
287 }
288 invalidate();
289 return error;
290 }
291
292 static inline void forget_pte(pte_t page)
293 {
294 if (pte_none(page))
295 return;
296 if (pte_present(page)) {
297 free_page(pte_page(page));
298 if (mem_map[MAP_NR(pte_page(page))].reserved)
299 return;
300 if (current->mm->rss <= 0)
301 return;
302 current->mm->rss--;
303 return;
304 }
305 swap_free(pte_val(page));
306 }
307
308 static inline void zap_pte_range(pmd_t * pmd, unsigned long address, unsigned long size)
309 {
310 pte_t * pte;
311 unsigned long end;
312
313 if (pmd_none(*pmd))
314 return;
315 if (pmd_bad(*pmd)) {
316 printk("zap_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
317 pmd_clear(pmd);
318 return;
319 }
320 pte = pte_offset(pmd, address);
321 address &= ~PMD_MASK;
322 end = address + size;
323 if (end >= PMD_SIZE)
324 end = PMD_SIZE;
325 do {
326 pte_t page = *pte;
327 pte_clear(pte);
328 forget_pte(page);
329 address += PAGE_SIZE;
330 pte++;
331 } while (address < end);
332 }
333
334 static inline void zap_pmd_range(pgd_t * dir, unsigned long address, unsigned long size)
335 {
336 pmd_t * pmd;
337 unsigned long end;
338
339 if (pgd_none(*dir))
340 return;
341 if (pgd_bad(*dir)) {
342 printk("zap_pmd_range: bad pgd (%08lx)\n", pgd_val(*dir));
343 pgd_clear(dir);
344 return;
345 }
346 pmd = pmd_offset(dir, address);
347 address &= ~PGDIR_MASK;
348 end = address + size;
349 if (end > PGDIR_SIZE)
350 end = PGDIR_SIZE;
351 do {
352 zap_pte_range(pmd, address, end - address);
353 address = (address + PMD_SIZE) & PMD_MASK;
354 pmd++;
355 } while (address < end);
356 }
357
358
359
360
361 int zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size)
362 {
363 pgd_t * dir;
364 unsigned long end = address + size;
365
366 dir = pgd_offset(mm, address);
367 while (address < end) {
368 zap_pmd_range(dir, address, end - address);
369 address = (address + PGDIR_SIZE) & PGDIR_MASK;
370 dir++;
371 }
372 invalidate();
373 return 0;
374 }
375
376 static inline void zeromap_pte_range(pte_t * pte, unsigned long address, unsigned long size, pte_t zero_pte)
377 {
378 unsigned long end;
379
380 address &= ~PMD_MASK;
381 end = address + size;
382 if (end > PMD_SIZE)
383 end = PMD_SIZE;
384 do {
385 pte_t oldpage = *pte;
386 set_pte(pte, zero_pte);
387 forget_pte(oldpage);
388 address += PAGE_SIZE;
389 pte++;
390 } while (address < end);
391 }
392
393 static inline int zeromap_pmd_range(pmd_t * pmd, unsigned long address, unsigned long size, pte_t zero_pte)
394 {
395 unsigned long end;
396
397 address &= ~PGDIR_MASK;
398 end = address + size;
399 if (end > PGDIR_SIZE)
400 end = PGDIR_SIZE;
401 do {
402 pte_t * pte = pte_alloc(pmd, address);
403 if (!pte)
404 return -ENOMEM;
405 zeromap_pte_range(pte, address, end - address, zero_pte);
406 address = (address + PMD_SIZE) & PMD_MASK;
407 pmd++;
408 } while (address < end);
409 return 0;
410 }
411
412 int zeromap_page_range(unsigned long address, unsigned long size, pgprot_t prot)
413 {
414 int error = 0;
415 pgd_t * dir;
416 unsigned long end = address + size;
417 pte_t zero_pte;
418
419 zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE, prot));
420 dir = pgd_offset(current->mm, address);
421 while (address < end) {
422 pmd_t *pmd = pmd_alloc(dir, address);
423 error = -ENOMEM;
424 if (!pmd)
425 break;
426 error = zeromap_pmd_range(pmd, address, end - address, zero_pte);
427 if (error)
428 break;
429 address = (address + PGDIR_SIZE) & PGDIR_MASK;
430 dir++;
431 }
432 invalidate();
433 return error;
434 }
435
436
437
438
439
440
441 static inline void remap_pte_range(pte_t * pte, unsigned long address, unsigned long size,
442 unsigned long offset, pgprot_t prot)
443 {
444 unsigned long end;
445
446 address &= ~PMD_MASK;
447 end = address + size;
448 if (end > PMD_SIZE)
449 end = PMD_SIZE;
450 do {
451 pte_t oldpage = *pte;
452 pte_clear(pte);
453 if (offset >= high_memory || mem_map[MAP_NR(offset)].reserved)
454 set_pte(pte, mk_pte(offset, prot));
455 forget_pte(oldpage);
456 address += PAGE_SIZE;
457 offset += PAGE_SIZE;
458 pte++;
459 } while (address < end);
460 }
461
462 static inline int remap_pmd_range(pmd_t * pmd, unsigned long address, unsigned long size,
463 unsigned long offset, pgprot_t prot)
464 {
465 unsigned long end;
466
467 address &= ~PGDIR_MASK;
468 end = address + size;
469 if (end > PGDIR_SIZE)
470 end = PGDIR_SIZE;
471 offset -= address;
472 do {
473 pte_t * pte = pte_alloc(pmd, address);
474 if (!pte)
475 return -ENOMEM;
476 remap_pte_range(pte, address, end - address, address + offset, prot);
477 address = (address + PMD_SIZE) & PMD_MASK;
478 pmd++;
479 } while (address < end);
480 return 0;
481 }
482
483 int remap_page_range(unsigned long from, unsigned long offset, unsigned long size, pgprot_t prot)
484 {
485 int error = 0;
486 pgd_t * dir;
487 unsigned long end = from + size;
488
489 offset -= from;
490 dir = pgd_offset(current->mm, from);
491 while (from < end) {
492 pmd_t *pmd = pmd_alloc(dir, from);
493 error = -ENOMEM;
494 if (!pmd)
495 break;
496 error = remap_pmd_range(pmd, from, end - from, offset + from, prot);
497 if (error)
498 break;
499 from = (from + PGDIR_SIZE) & PGDIR_MASK;
500 dir++;
501 }
502 invalidate();
503 return error;
504 }
505
506
507
508
509 static void put_page(pte_t * page_table, pte_t pte)
510 {
511 if (!pte_none(*page_table)) {
512 printk("put_page: page already exists %08lx\n", pte_val(*page_table));
513 free_page(pte_page(pte));
514 return;
515 }
516
517 *page_table = pte;
518 }
519
520
521
522
523
524 unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsigned long address)
525 {
526 pgd_t * pgd;
527 pmd_t * pmd;
528 pte_t * pte;
529
530 if (page >= high_memory)
531 printk("put_dirty_page: trying to put page %08lx at %08lx\n",page,address);
532 if (mem_map[MAP_NR(page)].count != 1)
533 printk("mem_map disagrees with %08lx at %08lx\n",page,address);
534 pgd = pgd_offset(tsk->mm,address);
535 pmd = pmd_alloc(pgd, address);
536 if (!pmd) {
537 free_page(page);
538 oom(tsk);
539 return 0;
540 }
541 pte = pte_alloc(pmd, address);
542 if (!pte) {
543 free_page(page);
544 oom(tsk);
545 return 0;
546 }
547 if (!pte_none(*pte)) {
548 printk("put_dirty_page: page already exists\n");
549 pte_clear(pte);
550 invalidate();
551 }
552 set_pte(pte, pte_mkwrite(pte_mkdirty(mk_pte(page, PAGE_COPY))));
553
554 return page;
555 }
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574 void do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
575 unsigned long address, int write_access)
576 {
577 pgd_t *page_dir;
578 pmd_t *page_middle;
579 pte_t *page_table, pte;
580 unsigned long old_page, new_page;
581
582 new_page = __get_free_page(GFP_KERNEL);
583 page_dir = pgd_offset(vma->vm_mm, address);
584 if (pgd_none(*page_dir))
585 goto end_wp_page;
586 if (pgd_bad(*page_dir))
587 goto bad_wp_pagedir;
588 page_middle = pmd_offset(page_dir, address);
589 if (pmd_none(*page_middle))
590 goto end_wp_page;
591 if (pmd_bad(*page_middle))
592 goto bad_wp_pagemiddle;
593 page_table = pte_offset(page_middle, address);
594 pte = *page_table;
595 if (!pte_present(pte))
596 goto end_wp_page;
597 if (pte_write(pte))
598 goto end_wp_page;
599 old_page = pte_page(pte);
600 if (old_page >= high_memory)
601 goto bad_wp_page;
602 tsk->min_flt++;
603
604
605
606 if (mem_map[MAP_NR(old_page)].count != 1) {
607 if (new_page) {
608 if (mem_map[MAP_NR(old_page)].reserved)
609 ++vma->vm_mm->rss;
610 copy_page(old_page,new_page);
611 set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot))));
612 free_page(old_page);
613 invalidate();
614 return;
615 }
616 set_pte(page_table, BAD_PAGE);
617 free_page(old_page);
618 oom(tsk);
619 invalidate();
620 return;
621 }
622 set_pte(page_table, pte_mkdirty(pte_mkwrite(pte)));
623 invalidate();
624 if (new_page)
625 free_page(new_page);
626 return;
627 bad_wp_page:
628 printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page);
629 send_sig(SIGKILL, tsk, 1);
630 goto end_wp_page;
631 bad_wp_pagemiddle:
632 printk("do_wp_page: bogus page-middle at address %08lx (%08lx)\n", address, pmd_val(*page_middle));
633 send_sig(SIGKILL, tsk, 1);
634 goto end_wp_page;
635 bad_wp_pagedir:
636 printk("do_wp_page: bogus page-dir entry at address %08lx (%08lx)\n", address, pgd_val(*page_dir));
637 send_sig(SIGKILL, tsk, 1);
638 end_wp_page:
639 if (new_page)
640 free_page(new_page);
641 return;
642 }
643
644
645
646
647 int verify_area(int type, const void * addr, unsigned long size)
648 {
649 struct vm_area_struct * vma;
650 unsigned long start = (unsigned long) addr;
651
652
653
654
655
656 if (get_fs() == get_ds())
657 return 0;
658
659 vma = find_vma(current, start);
660 if (!vma)
661 goto bad_area;
662 if (vma->vm_start <= start)
663 goto good_area;
664 if (!(vma->vm_flags & VM_GROWSDOWN))
665 goto bad_area;
666 if (vma->vm_end - start > current->rlim[RLIMIT_STACK].rlim_cur)
667 goto bad_area;
668
669 good_area:
670 if (type == VERIFY_WRITE)
671 goto check_write;
672 for (;;) {
673 struct vm_area_struct * next;
674 if (!(vma->vm_flags & VM_READ))
675 goto bad_area;
676 if (vma->vm_end - start >= size)
677 return 0;
678 next = vma->vm_next;
679 if (!next || vma->vm_end != next->vm_start)
680 goto bad_area;
681 vma = next;
682 }
683
684 check_write:
685 if (!(vma->vm_flags & VM_WRITE))
686 goto bad_area;
687 if (!wp_works_ok)
688 goto check_wp_fault_by_hand;
689 for (;;) {
690 if (vma->vm_end - start >= size)
691 break;
692 if (!vma->vm_next || vma->vm_end != vma->vm_next->vm_start)
693 goto bad_area;
694 vma = vma->vm_next;
695 if (!(vma->vm_flags & VM_WRITE))
696 goto bad_area;
697 }
698 return 0;
699
700 check_wp_fault_by_hand:
701 size--;
702 size += start & ~PAGE_MASK;
703 size >>= PAGE_SHIFT;
704 start &= PAGE_MASK;
705
706 for (;;) {
707 do_wp_page(current, vma, start, 1);
708 if (!size)
709 break;
710 size--;
711 start += PAGE_SIZE;
712 if (start < vma->vm_end)
713 continue;
714 vma = vma->vm_next;
715 if (!vma || vma->vm_start != start)
716 goto bad_area;
717 if (!(vma->vm_flags & VM_WRITE))
718 goto bad_area;;
719 }
720 return 0;
721
722 bad_area:
723 return -EFAULT;
724 }
725
726 static inline void get_empty_page(struct task_struct * tsk, struct vm_area_struct * vma, pte_t * page_table)
727 {
728 unsigned long tmp;
729
730 if (!(tmp = get_free_page(GFP_KERNEL))) {
731 oom(tsk);
732 put_page(page_table, BAD_PAGE);
733 return;
734 }
735 put_page(page_table, pte_mkwrite(mk_pte(tmp, vma->vm_page_prot)));
736 }
737
738
739
740
741
742
743
744
745
746 static int try_to_share(unsigned long to_address, struct vm_area_struct * to_area,
747 unsigned long from_address, struct vm_area_struct * from_area,
748 unsigned long newpage)
749 {
750 pgd_t * from_dir, * to_dir;
751 pmd_t * from_middle, * to_middle;
752 pte_t * from_table, * to_table;
753 pte_t from, to;
754
755 from_dir = pgd_offset(from_area->vm_mm,from_address);
756
757 if (pgd_none(*from_dir))
758 return 0;
759 if (pgd_bad(*from_dir)) {
760 printk("try_to_share: bad page directory %08lx\n", pgd_val(*from_dir));
761 pgd_clear(from_dir);
762 return 0;
763 }
764 from_middle = pmd_offset(from_dir, from_address);
765
766 if (pmd_none(*from_middle))
767 return 0;
768 if (pmd_bad(*from_middle)) {
769 printk("try_to_share: bad mid directory %08lx\n", pmd_val(*from_middle));
770 pmd_clear(from_middle);
771 return 0;
772 }
773 from_table = pte_offset(from_middle, from_address);
774 from = *from_table;
775
776 if (!pte_present(from))
777 return 0;
778
779 if (pte_dirty(from)) {
780 if (!(from_area->vm_flags & VM_SHARED))
781 return 0;
782 }
783
784 if (pte_page(from) >= high_memory)
785 return 0;
786 if (mem_map[MAP_NR(pte_page(from))].reserved)
787 return 0;
788
789 to_dir = pgd_offset(to_area->vm_mm,to_address);
790
791 if (pgd_none(*to_dir))
792 return 0;
793 if (pgd_bad(*to_dir)) {
794 printk("try_to_share: bad page directory %08lx\n", pgd_val(*to_dir));
795 return 0;
796 }
797 to_middle = pmd_offset(to_dir, to_address);
798
799 if (pmd_none(*to_middle))
800 return 0;
801 if (pmd_bad(*to_middle)) {
802 printk("try_to_share: bad mid directory %08lx\n", pmd_val(*to_middle));
803 return 0;
804 }
805 to_table = pte_offset(to_middle, to_address);
806 to = *to_table;
807 if (!pte_none(to))
808 return 0;
809
810 if (newpage) {
811
812
813 if (in_swap_cache(pte_page(from))) {
814 if (!(from_area->vm_flags & VM_SHARED))
815 return 0;
816 }
817 copy_page(pte_page(from), newpage);
818 set_pte(to_table, mk_pte(newpage, to_area->vm_page_prot));
819 return 1;
820 }
821
822
823
824
825
826
827
828
829
830 if (in_swap_cache(pte_page(from))) {
831 if (!(from_area->vm_flags & VM_SHARED))
832 return 0;
833 set_pte(from_table, pte_mkdirty(from));
834 delete_from_swap_cache(pte_page(from));
835 }
836 mem_map[MAP_NR(pte_page(from))].count++;
837 set_pte(to_table, mk_pte(pte_page(from), to_area->vm_page_prot));
838
839 if (!pte_write(from))
840 return 1;
841 if (from_area->vm_flags & VM_SHARED)
842 return 1;
843
844 set_pte(from_table, pte_wrprotect(from));
845 invalidate();
846 return 1;
847 }
848
849
850
851
852
853
854
855
856 static int share_page(struct vm_area_struct * area, unsigned long address,
857 int write_access, unsigned long newpage)
858 {
859 struct inode * inode;
860 unsigned long offset;
861 unsigned long from_address;
862 unsigned long give_page;
863 struct vm_area_struct * mpnt;
864
865 if (!area || !(inode = area->vm_inode) || inode->i_count < 2)
866 return 0;
867
868 give_page = 0;
869 if (write_access && !(area->vm_flags & VM_SHARED)) {
870 if (!newpage)
871 return 0;
872 give_page = newpage;
873 }
874 offset = address - area->vm_start + area->vm_offset;
875
876
877 for (mpnt = area->vm_next_share; mpnt != area; mpnt = mpnt->vm_next_share) {
878
879 if (mpnt->vm_inode != inode) {
880 printk("Aiee! Corrupt vm_area_struct i_mmap ring\n");
881 break;
882 }
883
884 if ((mpnt->vm_offset ^ area->vm_offset) & ~PAGE_MASK)
885 continue;
886
887 from_address = offset + mpnt->vm_start - mpnt->vm_offset;
888 if (from_address < mpnt->vm_start || from_address >= mpnt->vm_end)
889 continue;
890
891 if (!try_to_share(address, area, from_address, mpnt, give_page))
892 continue;
893
894 if (give_page || !newpage)
895 return 1;
896 free_page(newpage);
897 return 1;
898 }
899 return 0;
900 }
901
902
903
904
905
906
907
908 static int unshare(struct vm_area_struct *vma, unsigned long address, unsigned long new_page)
909 {
910 pgd_t *page_dir;
911 pmd_t *page_middle;
912 pte_t *page_table, pte;
913 unsigned long old_page;
914 struct buffer_head * bh, * tmp;
915
916 page_dir = pgd_offset(vma->vm_mm, address);
917 if (pgd_none(*page_dir))
918 return 0;
919 if (pgd_bad(*page_dir)) {
920 printk("bad page table directory entry %p:[%lx]\n", page_dir, pgd_val(*page_dir));
921 pgd_clear(page_dir);
922 return 0;
923 }
924 page_middle = pmd_offset(page_dir, address);
925 if (pmd_none(*page_middle))
926 return 0;
927 if (pmd_bad(*page_middle)) {
928 printk("bad page table directory entry %p:[%lx]\n", page_dir, pgd_val(*page_dir));
929 pmd_clear(page_middle);
930 return 0;
931 }
932 page_table = pte_offset(page_middle, address);
933 pte = *page_table;
934 if (!pte_present(pte))
935 return 0;
936 old_page = pte_page(pte);
937 if (MAP_NR(old_page) > MAP_NR(high_memory))
938 return 0;
939 address &= ~PAGE_MASK;
940 memset((void *) (old_page + address), 0, PAGE_SIZE - address);
941 bh = buffer_pages[MAP_NR(old_page)];
942 if (!bh)
943 return 0;
944 if (!new_page) {
945 printk("Aieee... unshare(): no page available\n");
946 return 0;
947 }
948 buffer_pages[MAP_NR(old_page)] = NULL;
949 copy_page(old_page, new_page);
950 free_page(old_page);
951 old_page -= new_page;
952 buffer_pages[MAP_NR(new_page)] = bh;
953 tmp = bh;
954 do {
955 tmp->b_data -= old_page;
956 tmp = tmp->b_this_page;
957 } while (tmp != bh);
958 return 1;
959 }
960
961
962
963
964
965
966
967
968
969 void vmtruncate(struct inode * inode, unsigned long offset)
970 {
971 unsigned long page;
972 struct vm_area_struct * mpnt;
973
974 if (!inode->i_mmap)
975 return;
976 page = __get_free_page(GFP_KERNEL);
977 mpnt = inode->i_mmap;
978 if (!mpnt) {
979 free_page(page);
980 return;
981 }
982 do {
983 unsigned long start = mpnt->vm_start;
984 unsigned long len = mpnt->vm_end - start;
985 unsigned long diff;
986
987
988 if (mpnt->vm_offset >= offset) {
989 zap_page_range(mpnt->vm_mm, start, len);
990 continue;
991 }
992
993 diff = offset - mpnt->vm_offset;
994 if (diff >= len)
995 continue;
996
997 start += diff;
998 len = (len - diff) & PAGE_MASK;
999
1000 if (start & ~PAGE_MASK) {
1001 if (unshare(mpnt, start, page))
1002 page = 0;
1003 start = (start + ~PAGE_MASK) & PAGE_MASK;
1004 }
1005 zap_page_range(mpnt->vm_mm, start, len);
1006 } while ((mpnt = mpnt->vm_next_share) != inode->i_mmap);
1007 free_page(page);
1008 }
1009
1010
1011
1012
1013 static inline pte_t * get_empty_pgtable(struct task_struct * tsk,unsigned long address)
1014 {
1015 pgd_t *pgd;
1016 pmd_t *pmd;
1017 pte_t *pte;
1018
1019 pgd = pgd_offset(tsk->mm, address);
1020 pmd = pmd_alloc(pgd, address);
1021 if (!pmd) {
1022 oom(tsk);
1023 return NULL;
1024 }
1025 pte = pte_alloc(pmd, address);
1026 if (!pte) {
1027 oom(tsk);
1028 return NULL;
1029 }
1030 return pte;
1031 }
1032
1033 static inline void do_swap_page(struct task_struct * tsk,
1034 struct vm_area_struct * vma, unsigned long address,
1035 pte_t * page_table, pte_t entry, int write_access)
1036 {
1037 pte_t page;
1038
1039 if (!vma->vm_ops || !vma->vm_ops->swapin) {
1040 swap_in(tsk, vma, page_table, pte_val(entry), write_access);
1041 return;
1042 }
1043 page = vma->vm_ops->swapin(vma, address - vma->vm_start + vma->vm_offset, pte_val(entry));
1044 if (pte_val(*page_table) != pte_val(entry)) {
1045 free_page(pte_page(page));
1046 return;
1047 }
1048 if (mem_map[MAP_NR(pte_page(page))].count > 1 && !(vma->vm_flags & VM_SHARED))
1049 page = pte_wrprotect(page);
1050 ++vma->vm_mm->rss;
1051 ++tsk->maj_flt;
1052 set_pte(page_table, page);
1053 return;
1054 }
1055
1056
1057
1058
1059
1060
1061
1062 void do_no_page(struct task_struct * tsk, struct vm_area_struct * vma,
1063 unsigned long address, int write_access)
1064 {
1065 pte_t * page_table;
1066 pte_t entry;
1067 unsigned long page;
1068
1069 page_table = get_empty_pgtable(tsk, address);
1070 if (!page_table)
1071 return;
1072 entry = *page_table;
1073 if (pte_present(entry))
1074 return;
1075 if (!pte_none(entry)) {
1076 do_swap_page(tsk, vma, address, page_table, entry, write_access);
1077 return;
1078 }
1079 address &= PAGE_MASK;
1080 if (!vma->vm_ops || !vma->vm_ops->nopage) {
1081 ++vma->vm_mm->rss;
1082 ++tsk->min_flt;
1083 get_empty_page(tsk, vma, page_table);
1084 return;
1085 }
1086 page = __get_free_page(GFP_KERNEL);
1087 if (share_page(vma, address, write_access, page)) {
1088 ++vma->vm_mm->rss;
1089 ++tsk->min_flt;
1090 return;
1091 }
1092 if (!page) {
1093 oom(tsk);
1094 put_page(page_table, BAD_PAGE);
1095 return;
1096 }
1097 ++tsk->maj_flt;
1098 ++vma->vm_mm->rss;
1099
1100
1101
1102
1103
1104 page = vma->vm_ops->nopage(vma, address, page,
1105 write_access && !(vma->vm_flags & VM_SHARED));
1106 if (share_page(vma, address, write_access, 0)) {
1107 free_page(page);
1108 return;
1109 }
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120 entry = mk_pte(page, vma->vm_page_prot);
1121 if (write_access) {
1122 entry = pte_mkwrite(pte_mkdirty(entry));
1123 } else if (mem_map[MAP_NR(page)].count > 1 && !(vma->vm_flags & VM_SHARED))
1124 entry = pte_wrprotect(entry);
1125 put_page(page_table, entry);
1126 }
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141 static inline void handle_pte_fault(struct vm_area_struct * vma, unsigned long address,
1142 int write_access, pte_t * pte)
1143 {
1144 if (!pte_present(*pte)) {
1145 do_no_page(current, vma, address, write_access);
1146 return;
1147 }
1148 set_pte(pte, pte_mkyoung(*pte));
1149 if (!write_access)
1150 return;
1151 if (pte_write(*pte)) {
1152 set_pte(pte, pte_mkdirty(*pte));
1153 return;
1154 }
1155 do_wp_page(current, vma, address, write_access);
1156 }
1157
1158 void handle_mm_fault(struct vm_area_struct * vma, unsigned long address,
1159 int write_access)
1160 {
1161 pgd_t *pgd;
1162 pmd_t *pmd;
1163 pte_t *pte;
1164
1165 pgd = pgd_offset(vma->vm_mm, address);
1166 pmd = pmd_alloc(pgd, address);
1167 if (!pmd)
1168 goto no_memory;
1169 pte = pte_alloc(pmd, address);
1170 if (!pte)
1171 goto no_memory;
1172 handle_pte_fault(vma, address, write_access, pte);
1173 update_mmu_cache(vma, address, *pte);
1174 return;
1175 no_memory:
1176 oom(current);
1177 }