This source file includes following definitions.
- oom
- free_one_pmd
- free_one_pgd
- new_page_tables
- clear_page_tables
- free_page_tables
- copy_one_pte
- copy_pte_range
- copy_pmd_range
- copy_page_range
- forget_pte
- unmap_pte_range
- unmap_pmd_range
- zap_page_range
- unmap_page_range
- zeromap_pte_range
- zeromap_pmd_range
- zeromap_page_range
- remap_pte_range
- remap_pmd_range
- remap_page_range
- put_page
- put_dirty_page
- do_wp_page
- verify_area
- get_empty_page
- try_to_share
- share_page
- unshare
- vmtruncate
- get_empty_pgtable
- do_swap_page
- do_no_page
- handle_pte_fault
- handle_mm_fault
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36 #include <linux/signal.h>
37 #include <linux/sched.h>
38 #include <linux/head.h>
39 #include <linux/kernel.h>
40 #include <linux/errno.h>
41 #include <linux/string.h>
42 #include <linux/types.h>
43 #include <linux/ptrace.h>
44 #include <linux/mman.h>
45 #include <linux/mm.h>
46
47 #include <asm/system.h>
48 #include <asm/segment.h>
49 #include <asm/pgtable.h>
50
51 unsigned long high_memory = 0;
52
53
54
55
56
57 int nr_swap_pages = 0;
58 int nr_free_pages = 0;
59 struct mem_list free_area_list[NR_MEM_LISTS];
60 unsigned char * free_area_map[NR_MEM_LISTS];
61
62 #define copy_page(from,to) memcpy((void *) to, (void *) from, PAGE_SIZE)
63
64 #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
65
66 mem_map_t * mem_map = NULL;
67
68
69
70
71
72 void oom(struct task_struct * task)
73 {
74 printk("\nOut of memory for %s.\n", current->comm);
75 task->sig->action[SIGKILL-1].sa_handler = NULL;
76 task->blocked &= ~(1<<(SIGKILL-1));
77 send_sig(SIGKILL,task,1);
78 }
79
80
81
82
83
84 static inline void free_one_pmd(pmd_t * dir)
85 {
86 pte_t * pte;
87
88 if (pmd_none(*dir))
89 return;
90 if (pmd_bad(*dir)) {
91 printk("free_one_pmd: bad directory entry %08lx\n", pmd_val(*dir));
92 pmd_clear(dir);
93 return;
94 }
95 pte = pte_offset(dir, 0);
96 pmd_clear(dir);
97 pte_free(pte);
98 }
99
100 static inline void free_one_pgd(pgd_t * dir)
101 {
102 pmd_t * pmd;
103
104 if (pgd_none(*dir))
105 return;
106 if (pgd_bad(*dir)) {
107 printk("free_one_pgd: bad directory entry %08lx\n", pgd_val(*dir));
108 pgd_clear(dir);
109 return;
110 }
111 pmd = pmd_offset(dir, 0);
112 pgd_clear(dir);
113 if (!pmd_inuse(pmd)) {
114 int j;
115 for (j = 0; j < PTRS_PER_PMD ; j++)
116 free_one_pmd(pmd+j);
117 }
118 pmd_free(pmd);
119 }
120
121 int new_page_tables(struct task_struct * tsk)
122 {
123 pgd_t * page_dir, * new_pg;
124 int i;
125
126 if (!(new_pg = pgd_alloc()))
127 return -ENOMEM;
128 page_dir = pgd_offset(&init_mm, 0);
129 for (i = USER_PTRS_PER_PGD ; i < PTRS_PER_PGD ; i++)
130 new_pg[i] = page_dir[i];
131 SET_PAGE_DIR(tsk, new_pg);
132 tsk->mm->pgd = new_pg;
133 return 0;
134 }
135
136
137
138
139
140
141
142
143 void clear_page_tables(struct task_struct * tsk)
144 {
145 int i;
146 pgd_t * page_dir;
147
148 if (!tsk)
149 return;
150 if (tsk == task[0])
151 panic("task[0] (swapper) doesn't support exec()\n");
152 page_dir = pgd_offset(tsk->mm, 0);
153 if (!page_dir) {
154 printk("%s trying to clear NULL page-directory: not good\n", tsk->comm);
155 return;
156 }
157 if (pgd_inuse(page_dir)) {
158 if (new_page_tables(tsk))
159 oom(tsk);
160 pgd_free(page_dir);
161 return;
162 }
163 if (page_dir == swapper_pg_dir) {
164 printk("%s trying to clear kernel page-directory: not good\n", tsk->comm);
165 return;
166 }
167 for (i = 0 ; i < USER_PTRS_PER_PGD ; i++)
168 free_one_pgd(page_dir + i);
169 invalidate();
170 return;
171 }
172
173
174
175
176 void free_page_tables(struct task_struct * tsk)
177 {
178 int i;
179 pgd_t * page_dir;
180
181 page_dir = tsk->mm->pgd;
182 if (!page_dir || page_dir == swapper_pg_dir) {
183 printk("%s trying to free kernel page-directory: not good\n", tsk->comm);
184 return;
185 }
186 SET_PAGE_DIR(tsk, swapper_pg_dir);
187 if (pgd_inuse(page_dir)) {
188 pgd_free(page_dir);
189 return;
190 }
191 tsk->mm->pgd = swapper_pg_dir;
192 for (i = 0 ; i < PTRS_PER_PGD ; i++)
193 free_one_pgd(page_dir + i);
194 pgd_free(page_dir);
195 invalidate();
196 }
197
198 static inline void copy_one_pte(pte_t * old_pte, pte_t * new_pte)
199 {
200 pte_t pte = *old_pte;
201
202 if (pte_none(pte))
203 return;
204 if (!pte_present(pte)) {
205 swap_duplicate(pte_val(pte));
206 set_pte(new_pte, pte);
207 return;
208 }
209 if (pte_page(pte) > high_memory || mem_map[MAP_NR(pte_page(pte))].reserved) {
210 set_pte(new_pte, pte);
211 return;
212 }
213 if (pte_cow(pte))
214 pte = pte_wrprotect(pte);
215 if (delete_from_swap_cache(pte_page(pte)))
216 pte = pte_mkdirty(pte);
217 set_pte(new_pte, pte_mkold(pte));
218 set_pte(old_pte, pte);
219 mem_map[MAP_NR(pte_page(pte))].count++;
220 }
221
222 static inline int copy_pte_range(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long address, unsigned long size)
223 {
224 pte_t * src_pte, * dst_pte;
225 unsigned long end;
226
227 if (pmd_none(*src_pmd))
228 return 0;
229 if (pmd_bad(*src_pmd)) {
230 printk("copy_pte_range: bad pmd (%08lx)\n", pmd_val(*src_pmd));
231 pmd_clear(src_pmd);
232 return 0;
233 }
234 src_pte = pte_offset(src_pmd, address);
235 if (pmd_none(*dst_pmd)) {
236 if (!pte_alloc(dst_pmd, 0))
237 return -ENOMEM;
238 }
239 dst_pte = pte_offset(dst_pmd, address);
240 address &= ~PMD_MASK;
241 end = address + size;
242 if (end >= PMD_SIZE)
243 end = PMD_SIZE;
244 do {
245
246
247
248 copy_one_pte(src_pte++, dst_pte++);
249 address += PAGE_SIZE;
250 } while (address < end);
251 return 0;
252 }
253
254 static inline int copy_pmd_range(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long address, unsigned long size)
255 {
256 pmd_t * src_pmd, * dst_pmd;
257 unsigned long end;
258 int error = 0;
259
260 if (pgd_none(*src_pgd))
261 return 0;
262 if (pgd_bad(*src_pgd)) {
263 printk("copy_pmd_range: bad pgd (%08lx)\n", pgd_val(*src_pgd));
264 pgd_clear(src_pgd);
265 return 0;
266 }
267 src_pmd = pmd_offset(src_pgd, address);
268 if (pgd_none(*dst_pgd)) {
269 if (!pmd_alloc(dst_pgd, 0))
270 return -ENOMEM;
271 }
272 dst_pmd = pmd_offset(dst_pgd, address);
273 address &= ~PGDIR_MASK;
274 end = address + size;
275 if (end > PGDIR_SIZE)
276 end = PGDIR_SIZE;
277 do {
278 error = copy_pte_range(dst_pmd++, src_pmd++, address, end - address);
279 if (error)
280 break;
281 address = (address + PMD_SIZE) & PMD_MASK;
282 } while (address < end);
283 return error;
284 }
285
286
287
288
289
290
291 int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
292 struct vm_area_struct *vma)
293 {
294 pgd_t * src_pgd, * dst_pgd;
295 unsigned long address = vma->vm_start;
296 unsigned long end = vma->vm_end;
297 int error = 0;
298
299 src_pgd = pgd_offset(src, address);
300 dst_pgd = pgd_offset(dst, address);
301 while (address < end) {
302 error = copy_pmd_range(dst_pgd++, src_pgd++, address, end - address);
303 if (error)
304 break;
305 address = (address + PGDIR_SIZE) & PGDIR_MASK;
306 }
307 invalidate();
308 return error;
309 }
310
311 static inline void forget_pte(pte_t page)
312 {
313 if (pte_none(page))
314 return;
315 if (pte_present(page)) {
316 free_page(pte_page(page));
317 if (mem_map[MAP_NR(pte_page(page))].reserved)
318 return;
319 if (current->mm->rss <= 0)
320 return;
321 current->mm->rss--;
322 return;
323 }
324 swap_free(pte_val(page));
325 }
326
327 static inline void unmap_pte_range(pmd_t * pmd, unsigned long address, unsigned long size)
328 {
329 pte_t * pte;
330 unsigned long end;
331
332 if (pmd_none(*pmd))
333 return;
334 if (pmd_bad(*pmd)) {
335 printk("unmap_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
336 pmd_clear(pmd);
337 return;
338 }
339 pte = pte_offset(pmd, address);
340 address &= ~PMD_MASK;
341 end = address + size;
342 if (end >= PMD_SIZE)
343 end = PMD_SIZE;
344 do {
345 pte_t page = *pte;
346 pte_clear(pte);
347 forget_pte(page);
348 address += PAGE_SIZE;
349 pte++;
350 } while (address < end);
351 }
352
353 static inline void unmap_pmd_range(pgd_t * dir, unsigned long address, unsigned long size)
354 {
355 pmd_t * pmd;
356 unsigned long end;
357
358 if (pgd_none(*dir))
359 return;
360 if (pgd_bad(*dir)) {
361 printk("unmap_pmd_range: bad pgd (%08lx)\n", pgd_val(*dir));
362 pgd_clear(dir);
363 return;
364 }
365 pmd = pmd_offset(dir, address);
366 address &= ~PGDIR_MASK;
367 end = address + size;
368 if (end > PGDIR_SIZE)
369 end = PGDIR_SIZE;
370 do {
371 unmap_pte_range(pmd, address, end - address);
372 address = (address + PMD_SIZE) & PMD_MASK;
373 pmd++;
374 } while (address < end);
375 }
376
377
378
379
380 int zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size)
381 {
382 pgd_t * dir;
383 unsigned long end = address + size;
384
385 dir = pgd_offset(mm, address);
386 while (address < end) {
387 unmap_pmd_range(dir, address, end - address);
388 address = (address + PGDIR_SIZE) & PGDIR_MASK;
389 dir++;
390 }
391 invalidate();
392 return 0;
393 }
394
395
396
397
398
399 int unmap_page_range(unsigned long address, unsigned long size)
400 {
401 return zap_page_range(current->mm, address, size);
402 }
403
404 static inline void zeromap_pte_range(pte_t * pte, unsigned long address, unsigned long size, pte_t zero_pte)
405 {
406 unsigned long end;
407
408 address &= ~PMD_MASK;
409 end = address + size;
410 if (end > PMD_SIZE)
411 end = PMD_SIZE;
412 do {
413 pte_t oldpage = *pte;
414 set_pte(pte, zero_pte);
415 forget_pte(oldpage);
416 address += PAGE_SIZE;
417 pte++;
418 } while (address < end);
419 }
420
421 static inline int zeromap_pmd_range(pmd_t * pmd, unsigned long address, unsigned long size, pte_t zero_pte)
422 {
423 unsigned long end;
424
425 address &= ~PGDIR_MASK;
426 end = address + size;
427 if (end > PGDIR_SIZE)
428 end = PGDIR_SIZE;
429 do {
430 pte_t * pte = pte_alloc(pmd, address);
431 if (!pte)
432 return -ENOMEM;
433 zeromap_pte_range(pte, address, end - address, zero_pte);
434 address = (address + PMD_SIZE) & PMD_MASK;
435 pmd++;
436 } while (address < end);
437 return 0;
438 }
439
440 int zeromap_page_range(unsigned long address, unsigned long size, pgprot_t prot)
441 {
442 int error = 0;
443 pgd_t * dir;
444 unsigned long end = address + size;
445 pte_t zero_pte;
446
447 zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE, prot));
448 dir = pgd_offset(current->mm, address);
449 while (address < end) {
450 pmd_t *pmd = pmd_alloc(dir, address);
451 error = -ENOMEM;
452 if (!pmd)
453 break;
454 error = zeromap_pmd_range(pmd, address, end - address, zero_pte);
455 if (error)
456 break;
457 address = (address + PGDIR_SIZE) & PGDIR_MASK;
458 dir++;
459 }
460 invalidate();
461 return error;
462 }
463
464
465
466
467
468
469 static inline void remap_pte_range(pte_t * pte, unsigned long address, unsigned long size,
470 unsigned long offset, pgprot_t prot)
471 {
472 unsigned long end;
473
474 address &= ~PMD_MASK;
475 end = address + size;
476 if (end > PMD_SIZE)
477 end = PMD_SIZE;
478 do {
479 pte_t oldpage = *pte;
480 pte_clear(pte);
481 if (offset >= high_memory || mem_map[MAP_NR(offset)].reserved)
482 set_pte(pte, mk_pte(offset, prot));
483 forget_pte(oldpage);
484 address += PAGE_SIZE;
485 offset += PAGE_SIZE;
486 pte++;
487 } while (address < end);
488 }
489
490 static inline int remap_pmd_range(pmd_t * pmd, unsigned long address, unsigned long size,
491 unsigned long offset, pgprot_t prot)
492 {
493 unsigned long end;
494
495 address &= ~PGDIR_MASK;
496 end = address + size;
497 if (end > PGDIR_SIZE)
498 end = PGDIR_SIZE;
499 offset -= address;
500 do {
501 pte_t * pte = pte_alloc(pmd, address);
502 if (!pte)
503 return -ENOMEM;
504 remap_pte_range(pte, address, end - address, address + offset, prot);
505 address = (address + PMD_SIZE) & PMD_MASK;
506 pmd++;
507 } while (address < end);
508 return 0;
509 }
510
511 int remap_page_range(unsigned long from, unsigned long offset, unsigned long size, pgprot_t prot)
512 {
513 int error = 0;
514 pgd_t * dir;
515 unsigned long end = from + size;
516
517 offset -= from;
518 dir = pgd_offset(current->mm, from);
519 while (from < end) {
520 pmd_t *pmd = pmd_alloc(dir, from);
521 error = -ENOMEM;
522 if (!pmd)
523 break;
524 error = remap_pmd_range(pmd, from, end - from, offset + from, prot);
525 if (error)
526 break;
527 from = (from + PGDIR_SIZE) & PGDIR_MASK;
528 dir++;
529 }
530 invalidate();
531 return error;
532 }
533
534
535
536
537 static void put_page(pte_t * page_table, pte_t pte)
538 {
539 if (!pte_none(*page_table)) {
540 printk("put_page: page already exists %08lx\n", pte_val(*page_table));
541 free_page(pte_page(pte));
542 return;
543 }
544
545 *page_table = pte;
546 }
547
548
549
550
551
552 unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsigned long address)
553 {
554 pgd_t * pgd;
555 pmd_t * pmd;
556 pte_t * pte;
557
558 if (page >= high_memory)
559 printk("put_dirty_page: trying to put page %08lx at %08lx\n",page,address);
560 if (mem_map[MAP_NR(page)].count != 1)
561 printk("mem_map disagrees with %08lx at %08lx\n",page,address);
562 pgd = pgd_offset(tsk->mm,address);
563 pmd = pmd_alloc(pgd, address);
564 if (!pmd) {
565 free_page(page);
566 oom(tsk);
567 return 0;
568 }
569 pte = pte_alloc(pmd, address);
570 if (!pte) {
571 free_page(page);
572 oom(tsk);
573 return 0;
574 }
575 if (!pte_none(*pte)) {
576 printk("put_dirty_page: page already exists\n");
577 pte_clear(pte);
578 invalidate();
579 }
580 set_pte(pte, pte_mkwrite(pte_mkdirty(mk_pte(page, PAGE_COPY))));
581
582 return page;
583 }
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602 void do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
603 unsigned long address, int write_access)
604 {
605 pgd_t *page_dir;
606 pmd_t *page_middle;
607 pte_t *page_table, pte;
608 unsigned long old_page, new_page;
609
610 new_page = __get_free_page(GFP_KERNEL);
611 page_dir = pgd_offset(vma->vm_mm, address);
612 if (pgd_none(*page_dir))
613 goto end_wp_page;
614 if (pgd_bad(*page_dir))
615 goto bad_wp_pagedir;
616 page_middle = pmd_offset(page_dir, address);
617 if (pmd_none(*page_middle))
618 goto end_wp_page;
619 if (pmd_bad(*page_middle))
620 goto bad_wp_pagemiddle;
621 page_table = pte_offset(page_middle, address);
622 pte = *page_table;
623 if (!pte_present(pte))
624 goto end_wp_page;
625 if (pte_write(pte))
626 goto end_wp_page;
627 old_page = pte_page(pte);
628 if (old_page >= high_memory)
629 goto bad_wp_page;
630 tsk->min_flt++;
631
632
633
634 if (mem_map[MAP_NR(old_page)].count != 1) {
635 if (new_page) {
636 if (mem_map[MAP_NR(old_page)].reserved)
637 ++vma->vm_mm->rss;
638 copy_page(old_page,new_page);
639 set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot))));
640 free_page(old_page);
641 invalidate();
642 return;
643 }
644 set_pte(page_table, BAD_PAGE);
645 free_page(old_page);
646 oom(tsk);
647 invalidate();
648 return;
649 }
650 set_pte(page_table, pte_mkdirty(pte_mkwrite(pte)));
651 invalidate();
652 if (new_page)
653 free_page(new_page);
654 return;
655 bad_wp_page:
656 printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page);
657 send_sig(SIGKILL, tsk, 1);
658 goto end_wp_page;
659 bad_wp_pagemiddle:
660 printk("do_wp_page: bogus page-middle at address %08lx (%08lx)\n", address, pmd_val(*page_middle));
661 send_sig(SIGKILL, tsk, 1);
662 goto end_wp_page;
663 bad_wp_pagedir:
664 printk("do_wp_page: bogus page-dir entry at address %08lx (%08lx)\n", address, pgd_val(*page_dir));
665 send_sig(SIGKILL, tsk, 1);
666 end_wp_page:
667 if (new_page)
668 free_page(new_page);
669 return;
670 }
671
672
673
674
675 int verify_area(int type, const void * addr, unsigned long size)
676 {
677 struct vm_area_struct * vma;
678 unsigned long start = (unsigned long) addr;
679
680
681
682
683
684 if (get_fs() == get_ds())
685 return 0;
686
687 vma = find_vma(current, start);
688 if (!vma)
689 goto bad_area;
690 if (vma->vm_start <= start)
691 goto good_area;
692 if (!(vma->vm_flags & VM_GROWSDOWN))
693 goto bad_area;
694 if (vma->vm_end - start > current->rlim[RLIMIT_STACK].rlim_cur)
695 goto bad_area;
696
697 good_area:
698 if (type == VERIFY_WRITE)
699 goto check_write;
700 for (;;) {
701 struct vm_area_struct * next;
702 if (!(vma->vm_flags & VM_READ))
703 goto bad_area;
704 if (vma->vm_end - start >= size)
705 return 0;
706 next = vma->vm_next;
707 if (!next || vma->vm_end != next->vm_start)
708 goto bad_area;
709 vma = next;
710 }
711
712 check_write:
713 if (!(vma->vm_flags & VM_WRITE))
714 goto bad_area;
715 if (!wp_works_ok)
716 goto check_wp_fault_by_hand;
717 for (;;) {
718 if (vma->vm_end - start >= size)
719 break;
720 if (!vma->vm_next || vma->vm_end != vma->vm_next->vm_start)
721 goto bad_area;
722 vma = vma->vm_next;
723 if (!(vma->vm_flags & VM_WRITE))
724 goto bad_area;
725 }
726 return 0;
727
728 check_wp_fault_by_hand:
729 size--;
730 size += start & ~PAGE_MASK;
731 size >>= PAGE_SHIFT;
732 start &= PAGE_MASK;
733
734 for (;;) {
735 do_wp_page(current, vma, start, 1);
736 if (!size)
737 break;
738 size--;
739 start += PAGE_SIZE;
740 if (start < vma->vm_end)
741 continue;
742 vma = vma->vm_next;
743 if (!vma || vma->vm_start != start)
744 goto bad_area;
745 if (!(vma->vm_flags & VM_WRITE))
746 goto bad_area;;
747 }
748 return 0;
749
750 bad_area:
751 return -EFAULT;
752 }
753
754 static inline void get_empty_page(struct task_struct * tsk, struct vm_area_struct * vma, pte_t * page_table)
755 {
756 unsigned long tmp;
757
758 if (!(tmp = get_free_page(GFP_KERNEL))) {
759 oom(tsk);
760 put_page(page_table, BAD_PAGE);
761 return;
762 }
763 put_page(page_table, pte_mkwrite(mk_pte(tmp, vma->vm_page_prot)));
764 }
765
766
767
768
769
770
771
772
773
774 static int try_to_share(unsigned long to_address, struct vm_area_struct * to_area,
775 unsigned long from_address, struct vm_area_struct * from_area,
776 unsigned long newpage)
777 {
778 pgd_t * from_dir, * to_dir;
779 pmd_t * from_middle, * to_middle;
780 pte_t * from_table, * to_table;
781 pte_t from, to;
782
783 from_dir = pgd_offset(from_area->vm_mm,from_address);
784
785 if (pgd_none(*from_dir))
786 return 0;
787 if (pgd_bad(*from_dir)) {
788 printk("try_to_share: bad page directory %08lx\n", pgd_val(*from_dir));
789 pgd_clear(from_dir);
790 return 0;
791 }
792 from_middle = pmd_offset(from_dir, from_address);
793
794 if (pmd_none(*from_middle))
795 return 0;
796 if (pmd_bad(*from_middle)) {
797 printk("try_to_share: bad mid directory %08lx\n", pmd_val(*from_middle));
798 pmd_clear(from_middle);
799 return 0;
800 }
801 from_table = pte_offset(from_middle, from_address);
802 from = *from_table;
803
804 if (!pte_present(from))
805 return 0;
806
807 if (pte_dirty(from)) {
808 if (!(from_area->vm_flags & VM_SHARED))
809 return 0;
810 }
811
812 if (pte_page(from) >= high_memory)
813 return 0;
814 if (mem_map[MAP_NR(pte_page(from))].reserved)
815 return 0;
816
817 to_dir = pgd_offset(to_area->vm_mm,to_address);
818
819 if (pgd_none(*to_dir))
820 return 0;
821 if (pgd_bad(*to_dir)) {
822 printk("try_to_share: bad page directory %08lx\n", pgd_val(*to_dir));
823 return 0;
824 }
825 to_middle = pmd_offset(to_dir, to_address);
826
827 if (pmd_none(*to_middle))
828 return 0;
829 if (pmd_bad(*to_middle)) {
830 printk("try_to_share: bad mid directory %08lx\n", pmd_val(*to_middle));
831 return 0;
832 }
833 to_table = pte_offset(to_middle, to_address);
834 to = *to_table;
835 if (!pte_none(to))
836 return 0;
837
838 if (newpage) {
839
840
841 if (in_swap_cache(pte_page(from))) {
842 if (!(from_area->vm_flags & VM_SHARED))
843 return 0;
844 }
845 copy_page(pte_page(from), newpage);
846 set_pte(to_table, mk_pte(newpage, to_area->vm_page_prot));
847 return 1;
848 }
849
850
851
852
853
854
855
856
857
858 if (in_swap_cache(pte_page(from))) {
859 if (!(from_area->vm_flags & VM_SHARED))
860 return 0;
861 set_pte(from_table, pte_mkdirty(from));
862 delete_from_swap_cache(pte_page(from));
863 }
864 mem_map[MAP_NR(pte_page(from))].count++;
865 set_pte(to_table, mk_pte(pte_page(from), to_area->vm_page_prot));
866
867 if (!pte_write(from))
868 return 1;
869 if (from_area->vm_flags & VM_SHARED)
870 return 1;
871
872 set_pte(from_table, pte_wrprotect(from));
873 invalidate();
874 return 1;
875 }
876
877
878
879
880
881
882
883
884 static int share_page(struct vm_area_struct * area, unsigned long address,
885 int write_access, unsigned long newpage)
886 {
887 struct inode * inode;
888 unsigned long offset;
889 unsigned long from_address;
890 unsigned long give_page;
891 struct vm_area_struct * mpnt;
892
893 if (!area || !(inode = area->vm_inode) || inode->i_count < 2)
894 return 0;
895
896 give_page = 0;
897 if (write_access && !(area->vm_flags & VM_SHARED)) {
898 if (!newpage)
899 return 0;
900 give_page = newpage;
901 }
902 offset = address - area->vm_start + area->vm_offset;
903
904
905 for (mpnt = area->vm_next_share; mpnt != area; mpnt = mpnt->vm_next_share) {
906
907 if (mpnt->vm_inode != inode) {
908 printk("Aiee! Corrupt vm_area_struct i_mmap ring\n");
909 break;
910 }
911
912 if ((mpnt->vm_offset ^ area->vm_offset) & ~PAGE_MASK)
913 continue;
914
915 from_address = offset + mpnt->vm_start - mpnt->vm_offset;
916 if (from_address < mpnt->vm_start || from_address >= mpnt->vm_end)
917 continue;
918
919 if (!try_to_share(address, area, from_address, mpnt, give_page))
920 continue;
921
922 if (give_page || !newpage)
923 return 1;
924 free_page(newpage);
925 return 1;
926 }
927 return 0;
928 }
929
930
931
932
933
934
935
936 static int unshare(struct vm_area_struct *vma, unsigned long address, unsigned long new_page)
937 {
938 pgd_t *page_dir;
939 pmd_t *page_middle;
940 pte_t *page_table, pte;
941 unsigned long old_page;
942 struct buffer_head * bh, * tmp;
943
944 page_dir = pgd_offset(vma->vm_mm, address);
945 if (pgd_none(*page_dir))
946 return 0;
947 if (pgd_bad(*page_dir)) {
948 printk("bad page table directory entry %p:[%lx]\n", page_dir, pgd_val(*page_dir));
949 pgd_clear(page_dir);
950 return 0;
951 }
952 page_middle = pmd_offset(page_dir, address);
953 if (pmd_none(*page_middle))
954 return 0;
955 if (pmd_bad(*page_middle)) {
956 printk("bad page table directory entry %p:[%lx]\n", page_dir, pgd_val(*page_dir));
957 pmd_clear(page_middle);
958 return 0;
959 }
960 page_table = pte_offset(page_middle, address);
961 pte = *page_table;
962 if (!pte_present(pte))
963 return 0;
964 old_page = pte_page(pte);
965 if (MAP_NR(old_page) > MAP_NR(high_memory))
966 return 0;
967 address &= ~PAGE_MASK;
968 memset((void *) (old_page + address), 0, PAGE_SIZE - address);
969 bh = buffer_pages[MAP_NR(old_page)];
970 if (!bh)
971 return 0;
972 if (!new_page) {
973 printk("Aieee... unshare(): no page available\n");
974 return 0;
975 }
976 buffer_pages[MAP_NR(old_page)] = NULL;
977 copy_page(old_page, new_page);
978 free_page(old_page);
979 old_page -= new_page;
980 buffer_pages[MAP_NR(new_page)] = bh;
981 tmp = bh;
982 do {
983 tmp->b_data -= old_page;
984 tmp = tmp->b_this_page;
985 } while (tmp != bh);
986 return 1;
987 }
988
989
990
991
992
993
994
995
996
997 void vmtruncate(struct inode * inode, unsigned long offset)
998 {
999 unsigned long page;
1000 struct vm_area_struct * mpnt;
1001
1002 if (!inode->i_mmap)
1003 return;
1004 page = __get_free_page(GFP_KERNEL);
1005 mpnt = inode->i_mmap;
1006 if (!mpnt) {
1007 free_page(page);
1008 return;
1009 }
1010 do {
1011 unsigned long start = mpnt->vm_start;
1012 unsigned long len = mpnt->vm_end - start;
1013 unsigned long diff;
1014
1015
1016 if (mpnt->vm_offset >= offset) {
1017 zap_page_range(mpnt->vm_mm, start, len);
1018 continue;
1019 }
1020
1021 diff = offset - mpnt->vm_offset;
1022 if (diff >= len)
1023 continue;
1024
1025 start += diff;
1026 len = (len - diff) & PAGE_MASK;
1027
1028 if (start & ~PAGE_MASK) {
1029 if (unshare(mpnt, start, page))
1030 page = 0;
1031 start = (start + ~PAGE_MASK) & PAGE_MASK;
1032 }
1033 zap_page_range(mpnt->vm_mm, start, len);
1034 } while ((mpnt = mpnt->vm_next_share) != inode->i_mmap);
1035 free_page(page);
1036 }
1037
1038
1039
1040
1041 static inline pte_t * get_empty_pgtable(struct task_struct * tsk,unsigned long address)
1042 {
1043 pgd_t *pgd;
1044 pmd_t *pmd;
1045 pte_t *pte;
1046
1047 pgd = pgd_offset(tsk->mm, address);
1048 pmd = pmd_alloc(pgd, address);
1049 if (!pmd) {
1050 oom(tsk);
1051 return NULL;
1052 }
1053 pte = pte_alloc(pmd, address);
1054 if (!pte) {
1055 oom(tsk);
1056 return NULL;
1057 }
1058 return pte;
1059 }
1060
1061 static inline void do_swap_page(struct task_struct * tsk,
1062 struct vm_area_struct * vma, unsigned long address,
1063 pte_t * page_table, pte_t entry, int write_access)
1064 {
1065 pte_t page;
1066
1067 if (!vma->vm_ops || !vma->vm_ops->swapin) {
1068 swap_in(tsk, vma, page_table, pte_val(entry), write_access);
1069 return;
1070 }
1071 page = vma->vm_ops->swapin(vma, address - vma->vm_start + vma->vm_offset, pte_val(entry));
1072 if (pte_val(*page_table) != pte_val(entry)) {
1073 free_page(pte_page(page));
1074 return;
1075 }
1076 if (mem_map[MAP_NR(pte_page(page))].count > 1 && !(vma->vm_flags & VM_SHARED))
1077 page = pte_wrprotect(page);
1078 ++vma->vm_mm->rss;
1079 ++tsk->maj_flt;
1080 set_pte(page_table, page);
1081 return;
1082 }
1083
1084
1085
1086
1087
1088
1089
1090 void do_no_page(struct task_struct * tsk, struct vm_area_struct * vma,
1091 unsigned long address, int write_access)
1092 {
1093 pte_t * page_table;
1094 pte_t entry;
1095 unsigned long page;
1096
1097 page_table = get_empty_pgtable(tsk, address);
1098 if (!page_table)
1099 return;
1100 entry = *page_table;
1101 if (pte_present(entry))
1102 return;
1103 if (!pte_none(entry)) {
1104 do_swap_page(tsk, vma, address, page_table, entry, write_access);
1105 return;
1106 }
1107 address &= PAGE_MASK;
1108 if (!vma->vm_ops || !vma->vm_ops->nopage) {
1109 ++vma->vm_mm->rss;
1110 ++tsk->min_flt;
1111 get_empty_page(tsk, vma, page_table);
1112 return;
1113 }
1114 page = __get_free_page(GFP_KERNEL);
1115 if (share_page(vma, address, write_access, page)) {
1116 ++vma->vm_mm->rss;
1117 ++tsk->min_flt;
1118 return;
1119 }
1120 if (!page) {
1121 oom(tsk);
1122 put_page(page_table, BAD_PAGE);
1123 return;
1124 }
1125 ++tsk->maj_flt;
1126 ++vma->vm_mm->rss;
1127
1128
1129
1130
1131
1132 page = vma->vm_ops->nopage(vma, address, page,
1133 write_access && !(vma->vm_flags & VM_SHARED));
1134 if (share_page(vma, address, write_access, 0)) {
1135 free_page(page);
1136 return;
1137 }
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148 entry = mk_pte(page, vma->vm_page_prot);
1149 if (write_access) {
1150 entry = pte_mkwrite(pte_mkdirty(entry));
1151 } else if (mem_map[MAP_NR(page)].count > 1 && !(vma->vm_flags & VM_SHARED))
1152 entry = pte_wrprotect(entry);
1153 put_page(page_table, entry);
1154 }
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169 static inline void handle_pte_fault(struct vm_area_struct * vma, unsigned long address,
1170 int write_access, pte_t * pte)
1171 {
1172 if (!pte_present(*pte)) {
1173 do_no_page(current, vma, address, write_access);
1174 return;
1175 }
1176 set_pte(pte, pte_mkyoung(*pte));
1177 if (!write_access)
1178 return;
1179 if (pte_write(*pte)) {
1180 set_pte(pte, pte_mkdirty(*pte));
1181 return;
1182 }
1183 do_wp_page(current, vma, address, write_access);
1184 }
1185
1186 void handle_mm_fault(struct vm_area_struct * vma, unsigned long address,
1187 int write_access)
1188 {
1189 pgd_t *pgd;
1190 pmd_t *pmd;
1191 pte_t *pte;
1192
1193 pgd = pgd_offset(vma->vm_mm, address);
1194 pmd = pmd_alloc(pgd, address);
1195 if (!pmd)
1196 goto no_memory;
1197 pte = pte_alloc(pmd, address);
1198 if (!pte)
1199 goto no_memory;
1200 handle_pte_fault(vma, address, write_access, pte);
1201 update_mmu_cache(vma, address, *pte);
1202 return;
1203 no_memory:
1204 oom(current);
1205 }