This source file includes following definitions.
- copy_page
- oom
- free_one_pmd
- free_one_pgd
- clear_page_tables
- free_page_tables
- new_page_tables
- copy_one_pte
- copy_pte_range
- copy_pmd_range
- copy_page_range
- forget_pte
- zap_pte_range
- zap_pmd_range
- zap_page_range
- zeromap_pte_range
- zeromap_pmd_range
- zeromap_page_range
- remap_pte_range
- remap_pmd_range
- remap_page_range
- put_page
- put_dirty_page
- do_wp_page
- verify_area
- get_empty_page
- try_to_share
- share_page
- unshare
- vmtruncate
- get_empty_pgtable
- do_swap_page
- do_no_page
- handle_pte_fault
- handle_mm_fault
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36 #include <linux/signal.h>
37 #include <linux/sched.h>
38 #include <linux/head.h>
39 #include <linux/kernel.h>
40 #include <linux/errno.h>
41 #include <linux/string.h>
42 #include <linux/types.h>
43 #include <linux/ptrace.h>
44 #include <linux/mman.h>
45 #include <linux/mm.h>
46
47 #include <asm/system.h>
48 #include <asm/segment.h>
49 #include <asm/pgtable.h>
50
51 unsigned long high_memory = 0;
52
53
54
55
56
57 int nr_swap_pages = 0;
58 int nr_free_pages = 0;
59 struct mem_list free_area_list[NR_MEM_LISTS];
60 unsigned char * free_area_map[NR_MEM_LISTS];
61
62
63
64
65
66
67 static inline void copy_page(unsigned long from, unsigned long to)
68 {
69 if (from == ZERO_PAGE) {
70 memset((void *) to, 0, PAGE_SIZE);
71 return;
72 }
73 memcpy((void *) to, (void *) from, PAGE_SIZE);
74 }
75
76 #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
77
78 mem_map_t * mem_map = NULL;
79
80
81
82
83
84 void oom(struct task_struct * task)
85 {
86 printk("\nOut of memory for %s.\n", current->comm);
87 task->sig->action[SIGKILL-1].sa_handler = NULL;
88 task->blocked &= ~(1<<(SIGKILL-1));
89 send_sig(SIGKILL,task,1);
90 }
91
92
93
94
95
96 static inline void free_one_pmd(pmd_t * dir)
97 {
98 pte_t * pte;
99
100 if (pmd_none(*dir))
101 return;
102 if (pmd_bad(*dir)) {
103 printk("free_one_pmd: bad directory entry %08lx\n", pmd_val(*dir));
104 pmd_clear(dir);
105 return;
106 }
107 pte = pte_offset(dir, 0);
108 pmd_clear(dir);
109 pte_free(pte);
110 }
111
112 static inline void free_one_pgd(pgd_t * dir)
113 {
114 pmd_t * pmd;
115
116 if (pgd_none(*dir))
117 return;
118 if (pgd_bad(*dir)) {
119 printk("free_one_pgd: bad directory entry %08lx\n", pgd_val(*dir));
120 pgd_clear(dir);
121 return;
122 }
123 pmd = pmd_offset(dir, 0);
124 pgd_clear(dir);
125 if (!pmd_inuse(pmd)) {
126 int j;
127 for (j = 0; j < PTRS_PER_PMD ; j++)
128 free_one_pmd(pmd+j);
129 }
130 pmd_free(pmd);
131 }
132
133
134
135
136
137 void clear_page_tables(struct task_struct * tsk)
138 {
139 int i;
140 pgd_t * page_dir;
141
142 page_dir = tsk->mm->pgd;
143 if (!page_dir || page_dir == swapper_pg_dir) {
144 printk("%s trying to clear kernel page-directory: not good\n", tsk->comm);
145 return;
146 }
147 for (i = 0 ; i < USER_PTRS_PER_PGD ; i++)
148 free_one_pgd(page_dir + i);
149 invalidate_mm(tsk->mm);
150 }
151
152
153
154
155
156
157
158 void free_page_tables(struct task_struct * tsk)
159 {
160 int i;
161 pgd_t * page_dir;
162
163 page_dir = tsk->mm->pgd;
164 if (!page_dir || page_dir == swapper_pg_dir) {
165 printk("%s trying to free kernel page-directory: not good\n", tsk->comm);
166 return;
167 }
168 invalidate_mm(tsk->mm);
169 SET_PAGE_DIR(tsk, swapper_pg_dir);
170 tsk->mm->pgd = swapper_pg_dir;
171 for (i = 0 ; i < PTRS_PER_PGD ; i++)
172 free_one_pgd(page_dir + i);
173 pgd_free(page_dir);
174 }
175
176 int new_page_tables(struct task_struct * tsk)
177 {
178 pgd_t * page_dir, * new_pg;
179 int i;
180
181 if (!(new_pg = pgd_alloc()))
182 return -ENOMEM;
183 page_dir = pgd_offset(&init_mm, 0);
184 for (i = USER_PTRS_PER_PGD ; i < PTRS_PER_PGD ; i++)
185 new_pg[i] = page_dir[i];
186 invalidate_mm(tsk->mm);
187 SET_PAGE_DIR(tsk, new_pg);
188 tsk->mm->pgd = new_pg;
189 return 0;
190 }
191
192 static inline void copy_one_pte(pte_t * old_pte, pte_t * new_pte)
193 {
194 pte_t pte = *old_pte;
195
196 if (pte_none(pte))
197 return;
198 if (!pte_present(pte)) {
199 swap_duplicate(pte_val(pte));
200 set_pte(new_pte, pte);
201 return;
202 }
203 if (pte_page(pte) > high_memory || mem_map[MAP_NR(pte_page(pte))].reserved) {
204 set_pte(new_pte, pte);
205 return;
206 }
207 if (pte_cow(pte))
208 pte = pte_wrprotect(pte);
209 if (delete_from_swap_cache(pte_page(pte)))
210 pte = pte_mkdirty(pte);
211 set_pte(new_pte, pte_mkold(pte));
212 set_pte(old_pte, pte);
213 mem_map[MAP_NR(pte_page(pte))].count++;
214 }
215
216 static inline int copy_pte_range(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long address, unsigned long size)
217 {
218 pte_t * src_pte, * dst_pte;
219 unsigned long end;
220
221 if (pmd_none(*src_pmd))
222 return 0;
223 if (pmd_bad(*src_pmd)) {
224 printk("copy_pte_range: bad pmd (%08lx)\n", pmd_val(*src_pmd));
225 pmd_clear(src_pmd);
226 return 0;
227 }
228 src_pte = pte_offset(src_pmd, address);
229 if (pmd_none(*dst_pmd)) {
230 if (!pte_alloc(dst_pmd, 0))
231 return -ENOMEM;
232 }
233 dst_pte = pte_offset(dst_pmd, address);
234 address &= ~PMD_MASK;
235 end = address + size;
236 if (end >= PMD_SIZE)
237 end = PMD_SIZE;
238 do {
239
240
241
242 copy_one_pte(src_pte++, dst_pte++);
243 address += PAGE_SIZE;
244 } while (address < end);
245 return 0;
246 }
247
248 static inline int copy_pmd_range(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long address, unsigned long size)
249 {
250 pmd_t * src_pmd, * dst_pmd;
251 unsigned long end;
252 int error = 0;
253
254 if (pgd_none(*src_pgd))
255 return 0;
256 if (pgd_bad(*src_pgd)) {
257 printk("copy_pmd_range: bad pgd (%08lx)\n", pgd_val(*src_pgd));
258 pgd_clear(src_pgd);
259 return 0;
260 }
261 src_pmd = pmd_offset(src_pgd, address);
262 if (pgd_none(*dst_pgd)) {
263 if (!pmd_alloc(dst_pgd, 0))
264 return -ENOMEM;
265 }
266 dst_pmd = pmd_offset(dst_pgd, address);
267 address &= ~PGDIR_MASK;
268 end = address + size;
269 if (end > PGDIR_SIZE)
270 end = PGDIR_SIZE;
271 do {
272 error = copy_pte_range(dst_pmd++, src_pmd++, address, end - address);
273 if (error)
274 break;
275 address = (address + PMD_SIZE) & PMD_MASK;
276 } while (address < end);
277 return error;
278 }
279
280
281
282
283
284
285 int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
286 struct vm_area_struct *vma)
287 {
288 pgd_t * src_pgd, * dst_pgd;
289 unsigned long address = vma->vm_start;
290 unsigned long end = vma->vm_end;
291 int error = 0;
292
293 src_pgd = pgd_offset(src, address);
294 dst_pgd = pgd_offset(dst, address);
295 while (address < end) {
296 error = copy_pmd_range(dst_pgd++, src_pgd++, address, end - address);
297 if (error)
298 break;
299 address = (address + PGDIR_SIZE) & PGDIR_MASK;
300 }
301
302 invalidate_range(src, vma->vm_start, vma->vm_end);
303 invalidate_range(dst, vma->vm_start, vma->vm_end);
304 return error;
305 }
306
307 static inline void forget_pte(pte_t page)
308 {
309 if (pte_none(page))
310 return;
311 if (pte_present(page)) {
312 free_page(pte_page(page));
313 if (mem_map[MAP_NR(pte_page(page))].reserved)
314 return;
315 if (current->mm->rss <= 0)
316 return;
317 current->mm->rss--;
318 return;
319 }
320 swap_free(pte_val(page));
321 }
322
323 static inline void zap_pte_range(pmd_t * pmd, unsigned long address, unsigned long size)
324 {
325 pte_t * pte;
326 unsigned long end;
327
328 if (pmd_none(*pmd))
329 return;
330 if (pmd_bad(*pmd)) {
331 printk("zap_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
332 pmd_clear(pmd);
333 return;
334 }
335 pte = pte_offset(pmd, address);
336 address &= ~PMD_MASK;
337 end = address + size;
338 if (end >= PMD_SIZE)
339 end = PMD_SIZE;
340 do {
341 pte_t page = *pte;
342 pte_clear(pte);
343 forget_pte(page);
344 address += PAGE_SIZE;
345 pte++;
346 } while (address < end);
347 }
348
349 static inline void zap_pmd_range(pgd_t * dir, unsigned long address, unsigned long size)
350 {
351 pmd_t * pmd;
352 unsigned long end;
353
354 if (pgd_none(*dir))
355 return;
356 if (pgd_bad(*dir)) {
357 printk("zap_pmd_range: bad pgd (%08lx)\n", pgd_val(*dir));
358 pgd_clear(dir);
359 return;
360 }
361 pmd = pmd_offset(dir, address);
362 address &= ~PGDIR_MASK;
363 end = address + size;
364 if (end > PGDIR_SIZE)
365 end = PGDIR_SIZE;
366 do {
367 zap_pte_range(pmd, address, end - address);
368 address = (address + PMD_SIZE) & PMD_MASK;
369 pmd++;
370 } while (address < end);
371 }
372
373
374
375
376 int zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size)
377 {
378 pgd_t * dir;
379 unsigned long end = address + size;
380
381 dir = pgd_offset(mm, address);
382 while (address < end) {
383 zap_pmd_range(dir, address, end - address);
384 address = (address + PGDIR_SIZE) & PGDIR_MASK;
385 dir++;
386 }
387 invalidate_range(mm, end - size, end);
388 return 0;
389 }
390
391 static inline void zeromap_pte_range(pte_t * pte, unsigned long address, unsigned long size, pte_t zero_pte)
392 {
393 unsigned long end;
394
395 address &= ~PMD_MASK;
396 end = address + size;
397 if (end > PMD_SIZE)
398 end = PMD_SIZE;
399 do {
400 pte_t oldpage = *pte;
401 set_pte(pte, zero_pte);
402 forget_pte(oldpage);
403 address += PAGE_SIZE;
404 pte++;
405 } while (address < end);
406 }
407
408 static inline int zeromap_pmd_range(pmd_t * pmd, unsigned long address, unsigned long size, pte_t zero_pte)
409 {
410 unsigned long end;
411
412 address &= ~PGDIR_MASK;
413 end = address + size;
414 if (end > PGDIR_SIZE)
415 end = PGDIR_SIZE;
416 do {
417 pte_t * pte = pte_alloc(pmd, address);
418 if (!pte)
419 return -ENOMEM;
420 zeromap_pte_range(pte, address, end - address, zero_pte);
421 address = (address + PMD_SIZE) & PMD_MASK;
422 pmd++;
423 } while (address < end);
424 return 0;
425 }
426
427 int zeromap_page_range(unsigned long address, unsigned long size, pgprot_t prot)
428 {
429 int error = 0;
430 pgd_t * dir;
431 unsigned long end = address + size;
432 pte_t zero_pte;
433
434 zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE, prot));
435 dir = pgd_offset(current->mm, address);
436 while (address < end) {
437 pmd_t *pmd = pmd_alloc(dir, address);
438 error = -ENOMEM;
439 if (!pmd)
440 break;
441 error = zeromap_pmd_range(pmd, address, end - address, zero_pte);
442 if (error)
443 break;
444 address = (address + PGDIR_SIZE) & PGDIR_MASK;
445 dir++;
446 }
447 invalidate_range(current->mm, end - size, end);
448 return error;
449 }
450
451
452
453
454
455
456 static inline void remap_pte_range(pte_t * pte, unsigned long address, unsigned long size,
457 unsigned long offset, pgprot_t prot)
458 {
459 unsigned long end;
460
461 address &= ~PMD_MASK;
462 end = address + size;
463 if (end > PMD_SIZE)
464 end = PMD_SIZE;
465 do {
466 pte_t oldpage = *pte;
467 pte_clear(pte);
468 if (offset >= high_memory || mem_map[MAP_NR(offset)].reserved)
469 set_pte(pte, mk_pte(offset, prot));
470 forget_pte(oldpage);
471 address += PAGE_SIZE;
472 offset += PAGE_SIZE;
473 pte++;
474 } while (address < end);
475 }
476
477 static inline int remap_pmd_range(pmd_t * pmd, unsigned long address, unsigned long size,
478 unsigned long offset, pgprot_t prot)
479 {
480 unsigned long end;
481
482 address &= ~PGDIR_MASK;
483 end = address + size;
484 if (end > PGDIR_SIZE)
485 end = PGDIR_SIZE;
486 offset -= address;
487 do {
488 pte_t * pte = pte_alloc(pmd, address);
489 if (!pte)
490 return -ENOMEM;
491 remap_pte_range(pte, address, end - address, address + offset, prot);
492 address = (address + PMD_SIZE) & PMD_MASK;
493 pmd++;
494 } while (address < end);
495 return 0;
496 }
497
498 int remap_page_range(unsigned long from, unsigned long offset, unsigned long size, pgprot_t prot)
499 {
500 int error = 0;
501 pgd_t * dir;
502 unsigned long end = from + size;
503
504 offset -= from;
505 dir = pgd_offset(current->mm, from);
506 while (from < end) {
507 pmd_t *pmd = pmd_alloc(dir, from);
508 error = -ENOMEM;
509 if (!pmd)
510 break;
511 error = remap_pmd_range(pmd, from, end - from, offset + from, prot);
512 if (error)
513 break;
514 from = (from + PGDIR_SIZE) & PGDIR_MASK;
515 dir++;
516 }
517 invalidate_range(current->mm, from - size, from);
518 return error;
519 }
520
521
522
523
524 static void put_page(pte_t * page_table, pte_t pte)
525 {
526 if (!pte_none(*page_table)) {
527 printk("put_page: page already exists %08lx\n", pte_val(*page_table));
528 free_page(pte_page(pte));
529 return;
530 }
531
532 set_pte(page_table, pte);
533 }
534
535
536
537
538
539 unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsigned long address)
540 {
541 pgd_t * pgd;
542 pmd_t * pmd;
543 pte_t * pte;
544
545 if (page >= high_memory)
546 printk("put_dirty_page: trying to put page %08lx at %08lx\n",page,address);
547 if (mem_map[MAP_NR(page)].count != 1)
548 printk("mem_map disagrees with %08lx at %08lx\n",page,address);
549 pgd = pgd_offset(tsk->mm,address);
550 pmd = pmd_alloc(pgd, address);
551 if (!pmd) {
552 free_page(page);
553 oom(tsk);
554 return 0;
555 }
556 pte = pte_alloc(pmd, address);
557 if (!pte) {
558 free_page(page);
559 oom(tsk);
560 return 0;
561 }
562 if (!pte_none(*pte)) {
563 printk("put_dirty_page: page already exists\n");
564 free_page(page);
565 return 0;
566 }
567 set_pte(pte, pte_mkwrite(pte_mkdirty(mk_pte(page, PAGE_COPY))));
568
569 return page;
570 }
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589 void do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
590 unsigned long address, int write_access)
591 {
592 pgd_t *page_dir;
593 pmd_t *page_middle;
594 pte_t *page_table, pte;
595 unsigned long old_page, new_page;
596
597 new_page = __get_free_page(GFP_KERNEL);
598 page_dir = pgd_offset(vma->vm_mm, address);
599 if (pgd_none(*page_dir))
600 goto end_wp_page;
601 if (pgd_bad(*page_dir))
602 goto bad_wp_pagedir;
603 page_middle = pmd_offset(page_dir, address);
604 if (pmd_none(*page_middle))
605 goto end_wp_page;
606 if (pmd_bad(*page_middle))
607 goto bad_wp_pagemiddle;
608 page_table = pte_offset(page_middle, address);
609 pte = *page_table;
610 if (!pte_present(pte))
611 goto end_wp_page;
612 if (pte_write(pte))
613 goto end_wp_page;
614 old_page = pte_page(pte);
615 if (old_page >= high_memory)
616 goto bad_wp_page;
617 tsk->min_flt++;
618
619
620
621 if (mem_map[MAP_NR(old_page)].count != 1) {
622 if (new_page) {
623 if (mem_map[MAP_NR(old_page)].reserved)
624 ++vma->vm_mm->rss;
625 copy_page(old_page,new_page);
626 set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot))));
627 free_page(old_page);
628 invalidate_page(vma, address);
629 return;
630 }
631 set_pte(page_table, BAD_PAGE);
632 free_page(old_page);
633 oom(tsk);
634 invalidate_page(vma, address);
635 return;
636 }
637 set_pte(page_table, pte_mkdirty(pte_mkwrite(pte)));
638 invalidate_page(vma, address);
639 if (new_page)
640 free_page(new_page);
641 return;
642 bad_wp_page:
643 printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page);
644 send_sig(SIGKILL, tsk, 1);
645 goto end_wp_page;
646 bad_wp_pagemiddle:
647 printk("do_wp_page: bogus page-middle at address %08lx (%08lx)\n", address, pmd_val(*page_middle));
648 send_sig(SIGKILL, tsk, 1);
649 goto end_wp_page;
650 bad_wp_pagedir:
651 printk("do_wp_page: bogus page-dir entry at address %08lx (%08lx)\n", address, pgd_val(*page_dir));
652 send_sig(SIGKILL, tsk, 1);
653 end_wp_page:
654 if (new_page)
655 free_page(new_page);
656 return;
657 }
658
659
660
661
662 int verify_area(int type, const void * addr, unsigned long size)
663 {
664 struct vm_area_struct * vma;
665 unsigned long start = (unsigned long) addr;
666
667
668
669
670
671 if (get_fs() == get_ds())
672 return 0;
673
674 vma = find_vma(current, start);
675 if (!vma)
676 goto bad_area;
677 if (vma->vm_start <= start)
678 goto good_area;
679 if (!(vma->vm_flags & VM_GROWSDOWN))
680 goto bad_area;
681 if (expand_stack(vma, start))
682 goto bad_area;
683
684 good_area:
685 if (type == VERIFY_WRITE)
686 goto check_write;
687 for (;;) {
688 struct vm_area_struct * next;
689 if (!(vma->vm_flags & VM_READ))
690 goto bad_area;
691 if (vma->vm_end - start >= size)
692 return 0;
693 next = vma->vm_next;
694 if (!next || vma->vm_end != next->vm_start)
695 goto bad_area;
696 vma = next;
697 }
698
699 check_write:
700 if (!(vma->vm_flags & VM_WRITE))
701 goto bad_area;
702 if (!wp_works_ok)
703 goto check_wp_fault_by_hand;
704 for (;;) {
705 if (vma->vm_end - start >= size)
706 break;
707 if (!vma->vm_next || vma->vm_end != vma->vm_next->vm_start)
708 goto bad_area;
709 vma = vma->vm_next;
710 if (!(vma->vm_flags & VM_WRITE))
711 goto bad_area;
712 }
713 return 0;
714
715 check_wp_fault_by_hand:
716 size--;
717 size += start & ~PAGE_MASK;
718 size >>= PAGE_SHIFT;
719 start &= PAGE_MASK;
720
721 for (;;) {
722 do_wp_page(current, vma, start, 1);
723 if (!size)
724 break;
725 size--;
726 start += PAGE_SIZE;
727 if (start < vma->vm_end)
728 continue;
729 vma = vma->vm_next;
730 if (!vma || vma->vm_start != start)
731 goto bad_area;
732 if (!(vma->vm_flags & VM_WRITE))
733 goto bad_area;;
734 }
735 return 0;
736
737 bad_area:
738 return -EFAULT;
739 }
740
741 static inline void get_empty_page(struct task_struct * tsk, struct vm_area_struct * vma, pte_t * page_table)
742 {
743 unsigned long tmp;
744
745 if (!(tmp = get_free_page(GFP_KERNEL))) {
746 oom(tsk);
747 put_page(page_table, BAD_PAGE);
748 return;
749 }
750 put_page(page_table, pte_mkwrite(mk_pte(tmp, vma->vm_page_prot)));
751 }
752
753
754
755
756
757
758
759
760
761 static int try_to_share(unsigned long to_address, struct vm_area_struct * to_area,
762 unsigned long from_address, struct vm_area_struct * from_area,
763 unsigned long newpage)
764 {
765 pgd_t * from_dir, * to_dir;
766 pmd_t * from_middle, * to_middle;
767 pte_t * from_table, * to_table;
768 pte_t from, to;
769
770 from_dir = pgd_offset(from_area->vm_mm,from_address);
771
772 if (pgd_none(*from_dir))
773 return 0;
774 if (pgd_bad(*from_dir)) {
775 printk("try_to_share: bad page directory %08lx\n", pgd_val(*from_dir));
776 pgd_clear(from_dir);
777 return 0;
778 }
779 from_middle = pmd_offset(from_dir, from_address);
780
781 if (pmd_none(*from_middle))
782 return 0;
783 if (pmd_bad(*from_middle)) {
784 printk("try_to_share: bad mid directory %08lx\n", pmd_val(*from_middle));
785 pmd_clear(from_middle);
786 return 0;
787 }
788 from_table = pte_offset(from_middle, from_address);
789 from = *from_table;
790
791 if (!pte_present(from))
792 return 0;
793
794 if (pte_dirty(from)) {
795 if (!(from_area->vm_flags & VM_SHARED))
796 return 0;
797 }
798
799 if (pte_page(from) >= high_memory)
800 return 0;
801 if (mem_map[MAP_NR(pte_page(from))].reserved)
802 return 0;
803
804 to_dir = pgd_offset(to_area->vm_mm,to_address);
805
806 if (pgd_none(*to_dir))
807 return 0;
808 if (pgd_bad(*to_dir)) {
809 printk("try_to_share: bad page directory %08lx\n", pgd_val(*to_dir));
810 return 0;
811 }
812 to_middle = pmd_offset(to_dir, to_address);
813
814 if (pmd_none(*to_middle))
815 return 0;
816 if (pmd_bad(*to_middle)) {
817 printk("try_to_share: bad mid directory %08lx\n", pmd_val(*to_middle));
818 return 0;
819 }
820 to_table = pte_offset(to_middle, to_address);
821 to = *to_table;
822 if (!pte_none(to))
823 return 0;
824
825 if (newpage) {
826
827
828 if (in_swap_cache(pte_page(from))) {
829 if (!(from_area->vm_flags & VM_SHARED))
830 return 0;
831 }
832 copy_page(pte_page(from), newpage);
833 set_pte(to_table, mk_pte(newpage, to_area->vm_page_prot));
834 return 1;
835 }
836
837
838
839
840
841
842
843
844
845 if (in_swap_cache(pte_page(from))) {
846 if (!(from_area->vm_flags & VM_SHARED))
847 return 0;
848 set_pte(from_table, pte_mkdirty(from));
849 delete_from_swap_cache(pte_page(from));
850 }
851 mem_map[MAP_NR(pte_page(from))].count++;
852 set_pte(to_table, mk_pte(pte_page(from), to_area->vm_page_prot));
853
854 if (!pte_write(from))
855 return 1;
856 if (from_area->vm_flags & VM_SHARED)
857 return 1;
858
859 set_pte(from_table, pte_wrprotect(from));
860 invalidate_page(from_area, from_address);
861 return 1;
862 }
863
864
865
866
867
868
869
870
871 static int share_page(struct vm_area_struct * area, unsigned long address,
872 int write_access, unsigned long newpage)
873 {
874 struct inode * inode;
875 unsigned long offset;
876 unsigned long from_address;
877 unsigned long give_page;
878 struct vm_area_struct * mpnt;
879
880 if (!area || !(inode = area->vm_inode) || inode->i_count < 2)
881 return 0;
882
883 give_page = 0;
884 if (write_access && !(area->vm_flags & VM_SHARED)) {
885 if (!newpage)
886 return 0;
887 give_page = newpage;
888 }
889 offset = address - area->vm_start + area->vm_offset;
890
891
892 for (mpnt = area->vm_next_share; mpnt != area; mpnt = mpnt->vm_next_share) {
893
894 if (mpnt->vm_inode != inode) {
895 printk("Aiee! Corrupt vm_area_struct i_mmap ring\n");
896 break;
897 }
898
899 if ((mpnt->vm_offset ^ area->vm_offset) & ~PAGE_MASK)
900 continue;
901
902 from_address = offset + mpnt->vm_start - mpnt->vm_offset;
903 if (from_address < mpnt->vm_start || from_address >= mpnt->vm_end)
904 continue;
905
906 if (!try_to_share(address, area, from_address, mpnt, give_page))
907 continue;
908
909 if (give_page || !newpage)
910 return 1;
911 free_page(newpage);
912 return 1;
913 }
914 return 0;
915 }
916
917
918
919
920
921
922
923 static int unshare(struct vm_area_struct *vma, unsigned long address, unsigned long new_page)
924 {
925 pgd_t *page_dir;
926 pmd_t *page_middle;
927 pte_t *page_table, pte;
928 unsigned long old_page;
929 struct buffer_head * bh, * tmp;
930
931 page_dir = pgd_offset(vma->vm_mm, address);
932 if (pgd_none(*page_dir))
933 return 0;
934 if (pgd_bad(*page_dir)) {
935 printk("bad page table directory entry %p:[%lx]\n", page_dir, pgd_val(*page_dir));
936 pgd_clear(page_dir);
937 return 0;
938 }
939 page_middle = pmd_offset(page_dir, address);
940 if (pmd_none(*page_middle))
941 return 0;
942 if (pmd_bad(*page_middle)) {
943 printk("bad page table directory entry %p:[%lx]\n", page_dir, pgd_val(*page_dir));
944 pmd_clear(page_middle);
945 return 0;
946 }
947 page_table = pte_offset(page_middle, address);
948 pte = *page_table;
949 if (!pte_present(pte))
950 return 0;
951 old_page = pte_page(pte);
952 if (MAP_NR(old_page) > MAP_NR(high_memory))
953 return 0;
954 address &= ~PAGE_MASK;
955 memset((void *) (old_page + address), 0, PAGE_SIZE - address);
956 bh = buffer_pages[MAP_NR(old_page)];
957 if (!bh)
958 return 0;
959 if (!new_page) {
960 printk("Aieee... unshare(): no page available\n");
961 return 0;
962 }
963 buffer_pages[MAP_NR(old_page)] = NULL;
964 copy_page(old_page, new_page);
965 free_page(old_page);
966 old_page -= new_page;
967 buffer_pages[MAP_NR(new_page)] = bh;
968 tmp = bh;
969 do {
970 tmp->b_data -= old_page;
971 tmp = tmp->b_this_page;
972 } while (tmp != bh);
973 return 1;
974 }
975
976
977
978
979
980
981
982
983
984 void vmtruncate(struct inode * inode, unsigned long offset)
985 {
986 unsigned long page;
987 struct vm_area_struct * mpnt;
988
989 if (!inode->i_mmap)
990 return;
991 page = __get_free_page(GFP_KERNEL);
992 mpnt = inode->i_mmap;
993 if (!mpnt) {
994 free_page(page);
995 return;
996 }
997 do {
998 unsigned long start = mpnt->vm_start;
999 unsigned long len = mpnt->vm_end - start;
1000 unsigned long diff;
1001
1002
1003 if (mpnt->vm_offset >= offset) {
1004 zap_page_range(mpnt->vm_mm, start, len);
1005 continue;
1006 }
1007
1008 diff = offset - mpnt->vm_offset;
1009 if (diff >= len)
1010 continue;
1011
1012 start += diff;
1013 len = (len - diff) & PAGE_MASK;
1014
1015 if (start & ~PAGE_MASK) {
1016 if (unshare(mpnt, start, page))
1017 page = 0;
1018 start = (start + ~PAGE_MASK) & PAGE_MASK;
1019 }
1020 zap_page_range(mpnt->vm_mm, start, len);
1021 } while ((mpnt = mpnt->vm_next_share) != inode->i_mmap);
1022 free_page(page);
1023 }
1024
1025
1026
1027
1028 static inline pte_t * get_empty_pgtable(struct task_struct * tsk,unsigned long address)
1029 {
1030 pgd_t *pgd;
1031 pmd_t *pmd;
1032 pte_t *pte;
1033
1034 pgd = pgd_offset(tsk->mm, address);
1035 pmd = pmd_alloc(pgd, address);
1036 if (!pmd) {
1037 oom(tsk);
1038 return NULL;
1039 }
1040 pte = pte_alloc(pmd, address);
1041 if (!pte) {
1042 oom(tsk);
1043 return NULL;
1044 }
1045 return pte;
1046 }
1047
1048 static inline void do_swap_page(struct task_struct * tsk,
1049 struct vm_area_struct * vma, unsigned long address,
1050 pte_t * page_table, pte_t entry, int write_access)
1051 {
1052 pte_t page;
1053
1054 if (!vma->vm_ops || !vma->vm_ops->swapin) {
1055 swap_in(tsk, vma, page_table, pte_val(entry), write_access);
1056 return;
1057 }
1058 page = vma->vm_ops->swapin(vma, address - vma->vm_start + vma->vm_offset, pte_val(entry));
1059 if (pte_val(*page_table) != pte_val(entry)) {
1060 free_page(pte_page(page));
1061 return;
1062 }
1063 if (mem_map[MAP_NR(pte_page(page))].count > 1 && !(vma->vm_flags & VM_SHARED))
1064 page = pte_wrprotect(page);
1065 ++vma->vm_mm->rss;
1066 ++tsk->maj_flt;
1067 set_pte(page_table, page);
1068 return;
1069 }
1070
1071
1072
1073
1074
1075
1076
1077 void do_no_page(struct task_struct * tsk, struct vm_area_struct * vma,
1078 unsigned long address, int write_access)
1079 {
1080 pte_t * page_table;
1081 pte_t entry;
1082 unsigned long page;
1083
1084 page_table = get_empty_pgtable(tsk, address);
1085 if (!page_table)
1086 return;
1087 entry = *page_table;
1088 if (pte_present(entry))
1089 return;
1090 if (!pte_none(entry)) {
1091 do_swap_page(tsk, vma, address, page_table, entry, write_access);
1092 return;
1093 }
1094 address &= PAGE_MASK;
1095 if (!vma->vm_ops || !vma->vm_ops->nopage) {
1096 ++vma->vm_mm->rss;
1097 ++tsk->min_flt;
1098 get_empty_page(tsk, vma, page_table);
1099 return;
1100 }
1101 page = __get_free_page(GFP_KERNEL);
1102 if (share_page(vma, address, write_access, page)) {
1103 ++vma->vm_mm->rss;
1104 ++tsk->min_flt;
1105 return;
1106 }
1107 if (!page) {
1108 oom(tsk);
1109 put_page(page_table, BAD_PAGE);
1110 return;
1111 }
1112 ++tsk->maj_flt;
1113 ++vma->vm_mm->rss;
1114
1115
1116
1117
1118
1119 page = vma->vm_ops->nopage(vma, address, page,
1120 write_access && !(vma->vm_flags & VM_SHARED));
1121 if (share_page(vma, address, write_access, 0)) {
1122 free_page(page);
1123 return;
1124 }
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135 entry = mk_pte(page, vma->vm_page_prot);
1136 if (write_access) {
1137 entry = pte_mkwrite(pte_mkdirty(entry));
1138 } else if (mem_map[MAP_NR(page)].count > 1 && !(vma->vm_flags & VM_SHARED))
1139 entry = pte_wrprotect(entry);
1140 put_page(page_table, entry);
1141 }
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156 static inline void handle_pte_fault(struct vm_area_struct * vma, unsigned long address,
1157 int write_access, pte_t * pte)
1158 {
1159 if (!pte_present(*pte)) {
1160 do_no_page(current, vma, address, write_access);
1161 return;
1162 }
1163 set_pte(pte, pte_mkyoung(*pte));
1164 if (!write_access)
1165 return;
1166 if (pte_write(*pte)) {
1167 set_pte(pte, pte_mkdirty(*pte));
1168 return;
1169 }
1170 do_wp_page(current, vma, address, write_access);
1171 }
1172
1173 void handle_mm_fault(struct vm_area_struct * vma, unsigned long address,
1174 int write_access)
1175 {
1176 pgd_t *pgd;
1177 pmd_t *pmd;
1178 pte_t *pte;
1179
1180 pgd = pgd_offset(vma->vm_mm, address);
1181 pmd = pmd_alloc(pgd, address);
1182 if (!pmd)
1183 goto no_memory;
1184 pte = pte_alloc(pmd, address);
1185 if (!pte)
1186 goto no_memory;
1187 handle_pte_fault(vma, address, write_access, pte);
1188 update_mmu_cache(vma, address, *pte);
1189 return;
1190 no_memory:
1191 oom(current);
1192 }