This source file includes following definitions.
- invalidate_inode_pages
- truncate_inode_pages
- shrink_mmap
- page_unuse
- update_vm_cache
- try_to_read_ahead
- __wait_on_page
- generic_file_read
- fill_page
- filemap_nopage
- do_write_page
- filemap_write_page
- filemap_swapout
- filemap_swapin
- filemap_sync_pte
- filemap_sync_pte_range
- filemap_sync_pmd_range
- filemap_sync
- filemap_unmap
- generic_file_mmap
- msync_interval
- sys_msync
1
2
3
4
5
6
7
8
9
10
11
12 #include <linux/stat.h>
13 #include <linux/sched.h>
14 #include <linux/kernel.h>
15 #include <linux/mm.h>
16 #include <linux/shm.h>
17 #include <linux/errno.h>
18 #include <linux/mman.h>
19 #include <linux/string.h>
20 #include <linux/malloc.h>
21 #include <linux/fs.h>
22 #include <linux/locks.h>
23 #include <linux/pagemap.h>
24 #include <linux/swap.h>
25
26 #include <asm/segment.h>
27 #include <asm/system.h>
28 #include <asm/pgtable.h>
29
30
31
32
33
34
35
36
37 unsigned long page_cache_size = 0;
38 struct page * page_hash_table[PAGE_HASH_SIZE];
39
40
41
42
43
44
45
46
47
48
49 void invalidate_inode_pages(struct inode * inode)
50 {
51 struct page ** p;
52 struct page * page;
53
54 p = &inode->i_pages;
55 while ((page = *p) != NULL) {
56 if (page->locked) {
57 p = &page->next;
58 continue;
59 }
60 inode->i_nrpages--;
61 if ((*p = page->next) != NULL)
62 (*p)->prev = page->prev;
63 page->dirty = 0;
64 page->next = NULL;
65 page->prev = NULL;
66 remove_page_from_hash_queue(page);
67 page->inode = NULL;
68 free_page(page_address(page));
69 continue;
70 }
71 }
72
73
74
75
76
77 void truncate_inode_pages(struct inode * inode, unsigned long start)
78 {
79 struct page ** p;
80 struct page * page;
81
82 repeat:
83 p = &inode->i_pages;
84 while ((page = *p) != NULL) {
85 unsigned long offset = page->offset;
86
87
88 if (offset >= start) {
89 if (page->locked) {
90 wait_on_page(page);
91 goto repeat;
92 }
93 inode->i_nrpages--;
94 if ((*p = page->next) != NULL)
95 (*p)->prev = page->prev;
96 page->dirty = 0;
97 page->next = NULL;
98 page->prev = NULL;
99 remove_page_from_hash_queue(page);
100 page->inode = NULL;
101 free_page(page_address(page));
102 continue;
103 }
104 p = &page->next;
105 offset = start - offset;
106
107 if (offset < PAGE_SIZE)
108 memset((void *) (offset + page_address(page)), 0, PAGE_SIZE - offset);
109 }
110 }
111
112 int shrink_mmap(int priority, int dma)
113 {
114 static int clock = 0;
115 struct page * page;
116 unsigned long limit = MAP_NR(high_memory);
117 struct buffer_head *tmp, *bh;
118
119 priority = (limit<<2) >> priority;
120 page = mem_map + clock;
121 while (priority-- > 0) {
122 if (page->locked)
123 goto next;
124 if (dma && !page->dma)
125 goto next;
126
127
128 bh = page->buffers;
129 if (bh) {
130 tmp = bh;
131 do {
132 if (buffer_touched(tmp)) {
133 clear_bit(BH_Touched, &tmp->b_state);
134 page->referenced = 1;
135 }
136 tmp = tmp->b_this_page;
137 } while (tmp != bh);
138 }
139
140
141
142
143
144
145 if (page->count > 1)
146 page->referenced = 1;
147 else if (page->referenced)
148 page->referenced = 0;
149 else if (page->count) {
150
151
152 if (page->inode) {
153 remove_page_from_hash_queue(page);
154 remove_page_from_inode_queue(page);
155 free_page(page_address(page));
156 return 1;
157 }
158 if (bh && try_to_free_buffer(bh, &bh, 6))
159 return 1;
160 }
161 next:
162 page++;
163 clock++;
164 if (clock >= limit) {
165 clock = 0;
166 page = mem_map;
167 }
168 }
169 return 0;
170 }
171
172
173
174
175
176
177
178 unsigned long page_unuse(unsigned long page)
179 {
180 struct page * p = mem_map + MAP_NR(page);
181 int count = p->count;
182
183 if (count != 2)
184 return count;
185 if (!p->inode)
186 return count;
187 remove_page_from_hash_queue(p);
188 remove_page_from_inode_queue(p);
189 free_page(page);
190 return 1;
191 }
192
193
194
195
196
197 void update_vm_cache(struct inode * inode, unsigned long pos, const char * buf, int count)
198 {
199 unsigned long offset, len;
200
201 offset = (pos & ~PAGE_MASK);
202 pos = pos & PAGE_MASK;
203 len = PAGE_SIZE - offset;
204 do {
205 struct page * page;
206
207 if (len > count)
208 len = count;
209 page = find_page(inode, pos);
210 if (page) {
211 unsigned long addr;
212
213 wait_on_page(page);
214 addr = page_address(page);
215 memcpy((void *) (offset + addr), buf, len);
216 free_page(addr);
217 }
218 count -= len;
219 buf += len;
220 len = PAGE_SIZE;
221 offset = 0;
222 pos += PAGE_SIZE;
223 } while (count);
224 }
225
226
227
228
229
230
231 static unsigned long try_to_read_ahead(struct inode * inode, unsigned long offset, unsigned long page_cache)
232 {
233 struct page * page;
234
235 offset &= PAGE_MASK;
236 if (!page_cache) {
237 page_cache = __get_free_page(GFP_KERNEL);
238 if (!page_cache)
239 return 0;
240 }
241 if (offset >= inode->i_size)
242 return page_cache;
243 #if 1
244 page = find_page(inode, offset);
245 if (page) {
246 page->count--;
247 return page_cache;
248 }
249
250
251
252 page = mem_map + MAP_NR(page_cache);
253 page->count++;
254 page->uptodate = 0;
255 page->error = 0;
256 page->offset = offset;
257 add_page_to_inode_queue(inode, page);
258 add_page_to_hash_queue(inode, page);
259
260 inode->i_op->readpage(inode, page);
261
262 free_page(page_cache);
263 return 0;
264 #else
265 return page_cache;
266 #endif
267 }
268
269
270
271
272 void __wait_on_page(struct page *page)
273 {
274 struct wait_queue wait = { current, NULL };
275
276 page->count++;
277 add_wait_queue(&page->wait, &wait);
278 repeat:
279 current->state = TASK_UNINTERRUPTIBLE;
280 if (page->locked) {
281 schedule();
282 goto repeat;
283 }
284 remove_wait_queue(&page->wait, &wait);
285 page->count--;
286 current->state = TASK_RUNNING;
287 }
288
289
290
291
292
293
294
295
296
297
298 #define MAX_READAHEAD (PAGE_SIZE*8)
299 int generic_file_read(struct inode * inode, struct file * filp, char * buf, int count)
300 {
301 int error, read;
302 unsigned long pos, page_cache;
303
304 if (count <= 0)
305 return 0;
306 error = 0;
307 read = 0;
308 page_cache = 0;
309
310 pos = filp->f_pos;
311 for (;;) {
312 struct page *page;
313 unsigned long offset, addr, nr;
314
315 if (pos >= inode->i_size)
316 break;
317 offset = pos & ~PAGE_MASK;
318 nr = PAGE_SIZE - offset;
319
320
321
322 page = find_page(inode, pos & PAGE_MASK);
323 if (page)
324 goto found_page;
325
326
327
328
329
330 if (page_cache)
331 goto new_page;
332
333 error = -ENOMEM;
334 page_cache = __get_free_page(GFP_KERNEL);
335 if (!page_cache)
336 break;
337 error = 0;
338
339
340
341
342 if (pos >= inode->i_size)
343 break;
344 page = find_page(inode, pos & PAGE_MASK);
345 if (!page)
346 goto new_page;
347
348 found_page:
349 addr = page_address(page);
350 if (nr > count)
351 nr = count;
352
353
354
355
356
357
358
359
360 if (page->locked) {
361 unsigned long max_ahead, ahead;
362
363 max_ahead = count - nr;
364 if (filp->f_reada || max_ahead > MAX_READAHEAD)
365 max_ahead = MAX_READAHEAD;
366 ahead = 0;
367 while (ahead < max_ahead) {
368 ahead += PAGE_SIZE;
369 page_cache = try_to_read_ahead(inode, pos + ahead, page_cache);
370 if (!page->locked)
371 goto unlocked_page;
372 }
373 __wait_on_page(page);
374 }
375 unlocked_page:
376 if (!page->uptodate)
377 goto read_page;
378 if (nr > inode->i_size - pos)
379 nr = inode->i_size - pos;
380 memcpy_tofs(buf, (void *) (addr + offset), nr);
381 free_page(addr);
382 buf += nr;
383 pos += nr;
384 read += nr;
385 count -= nr;
386 if (count)
387 continue;
388 break;
389
390
391 new_page:
392
393
394
395 addr = page_cache;
396 page = mem_map + MAP_NR(page_cache);
397 page_cache = 0;
398 page->count++;
399 page->uptodate = 0;
400 page->error = 0;
401 page->offset = pos & PAGE_MASK;
402 add_page_to_inode_queue(inode, page);
403 add_page_to_hash_queue(inode, page);
404
405
406
407
408
409
410
411
412
413 read_page:
414 error = inode->i_op->readpage(inode, page);
415 if (!error)
416 goto found_page;
417 free_page(addr);
418 break;
419 }
420
421 filp->f_pos = pos;
422 filp->f_reada = 1;
423 if (page_cache)
424 free_page(page_cache);
425 if (!IS_RDONLY(inode)) {
426 inode->i_atime = CURRENT_TIME;
427 inode->i_dirt = 1;
428 }
429 if (!read)
430 read = error;
431 return read;
432 }
433
434
435
436
437
438 static inline unsigned long fill_page(struct inode * inode, unsigned long offset)
439 {
440 struct page * page;
441 unsigned long new_page;
442
443 page = find_page(inode, offset);
444 if (page)
445 goto found_page_dont_free;
446 new_page = __get_free_page(GFP_KERNEL);
447 page = find_page(inode, offset);
448 if (page)
449 goto found_page;
450 if (!new_page)
451 return 0;
452 page = mem_map + MAP_NR(new_page);
453 new_page = 0;
454 page->count++;
455 page->uptodate = 0;
456 page->error = 0;
457 page->offset = offset;
458 add_page_to_inode_queue(inode, page);
459 add_page_to_hash_queue(inode, page);
460 inode->i_op->readpage(inode, page);
461 if (page->locked)
462 new_page = try_to_read_ahead(inode, offset + PAGE_SIZE, 0);
463 found_page:
464 if (new_page)
465 free_page(new_page);
466 found_page_dont_free:
467 wait_on_page(page);
468 return page_address(page);
469 }
470
471
472
473
474
475
476 static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long address, int no_share)
477 {
478 unsigned long offset;
479 struct inode * inode = area->vm_inode;
480 unsigned long page;
481
482 offset = (address & PAGE_MASK) - area->vm_start + area->vm_offset;
483 if (offset >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm)
484 return 0;
485
486 page = fill_page(inode, offset);
487 if (page && no_share) {
488 unsigned long new_page = __get_free_page(GFP_KERNEL);
489 if (new_page)
490 memcpy((void *) new_page, (void *) page, PAGE_SIZE);
491 free_page(page);
492 return new_page;
493 }
494 return page;
495 }
496
497
498
499
500
501 static inline int do_write_page(struct inode * inode, struct file * file,
502 const char * page, unsigned long offset)
503 {
504 int old_fs, retval;
505 unsigned long size;
506
507 size = offset + PAGE_SIZE;
508
509 if (S_ISREG(inode->i_mode)) {
510 if (size > inode->i_size)
511 size = inode->i_size;
512
513 if (size < offset)
514 return -EIO;
515 }
516 size -= offset;
517 old_fs = get_fs();
518 set_fs(KERNEL_DS);
519 retval = -EIO;
520 if (size == file->f_op->write(inode, file, (const char *) page, size))
521 retval = 0;
522 set_fs(old_fs);
523 return retval;
524 }
525
526 static int filemap_write_page(struct vm_area_struct * vma,
527 unsigned long offset,
528 unsigned long page)
529 {
530 int result;
531 struct file file;
532 struct inode * inode;
533 struct buffer_head * bh;
534
535 bh = mem_map[MAP_NR(page)].buffers;
536 if (bh) {
537
538 struct buffer_head * tmp = bh;
539 do {
540 mark_buffer_dirty(tmp, 0);
541 tmp = tmp->b_this_page;
542 } while (tmp != bh);
543 return 0;
544 }
545
546 inode = vma->vm_inode;
547 file.f_op = inode->i_op->default_file_ops;
548 if (!file.f_op->write)
549 return -EIO;
550 file.f_mode = 3;
551 file.f_flags = 0;
552 file.f_count = 1;
553 file.f_inode = inode;
554 file.f_pos = offset;
555 file.f_reada = 0;
556
557 down(&inode->i_sem);
558 result = do_write_page(inode, &file, (const char *) page, offset);
559 up(&inode->i_sem);
560 return result;
561 }
562
563
564
565
566
567
568
569
570
571
572
573
574 int filemap_swapout(struct vm_area_struct * vma,
575 unsigned long offset,
576 pte_t *page_table)
577 {
578 int error;
579 unsigned long page = pte_page(*page_table);
580 unsigned long entry = SWP_ENTRY(SHM_SWP_TYPE, MAP_NR(page));
581
582 flush_cache_page(vma, (offset + vma->vm_start - vma->vm_offset));
583 set_pte(page_table, __pte(entry));
584 flush_tlb_page(vma, (offset + vma->vm_start - vma->vm_offset));
585 error = filemap_write_page(vma, offset, page);
586 if (pte_val(*page_table) == entry)
587 pte_clear(page_table);
588 return error;
589 }
590
591
592
593
594
595
596
597 static pte_t filemap_swapin(struct vm_area_struct * vma,
598 unsigned long offset,
599 unsigned long entry)
600 {
601 unsigned long page = SWP_OFFSET(entry);
602
603 mem_map[page].count++;
604 page = (page << PAGE_SHIFT) + PAGE_OFFSET;
605 return mk_pte(page,vma->vm_page_prot);
606 }
607
608
609 static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
610 unsigned long address, unsigned int flags)
611 {
612 pte_t pte = *ptep;
613 unsigned long page;
614 int error;
615
616 if (!(flags & MS_INVALIDATE)) {
617 if (!pte_present(pte))
618 return 0;
619 if (!pte_dirty(pte))
620 return 0;
621 flush_cache_page(vma, address);
622 set_pte(ptep, pte_mkclean(pte));
623 flush_tlb_page(vma, address);
624 page = pte_page(pte);
625 mem_map[MAP_NR(page)].count++;
626 } else {
627 if (pte_none(pte))
628 return 0;
629 flush_cache_page(vma, address);
630 pte_clear(ptep);
631 flush_tlb_page(vma, address);
632 if (!pte_present(pte)) {
633 swap_free(pte_val(pte));
634 return 0;
635 }
636 page = pte_page(pte);
637 if (!pte_dirty(pte) || flags == MS_INVALIDATE) {
638 free_page(page);
639 return 0;
640 }
641 }
642 error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page);
643 free_page(page);
644 return error;
645 }
646
647 static inline int filemap_sync_pte_range(pmd_t * pmd,
648 unsigned long address, unsigned long size,
649 struct vm_area_struct *vma, unsigned long offset, unsigned int flags)
650 {
651 pte_t * pte;
652 unsigned long end;
653 int error;
654
655 if (pmd_none(*pmd))
656 return 0;
657 if (pmd_bad(*pmd)) {
658 printk("filemap_sync_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
659 pmd_clear(pmd);
660 return 0;
661 }
662 pte = pte_offset(pmd, address);
663 offset += address & PMD_MASK;
664 address &= ~PMD_MASK;
665 end = address + size;
666 if (end > PMD_SIZE)
667 end = PMD_SIZE;
668 error = 0;
669 do {
670 error |= filemap_sync_pte(pte, vma, address + offset, flags);
671 address += PAGE_SIZE;
672 pte++;
673 } while (address < end);
674 return error;
675 }
676
677 static inline int filemap_sync_pmd_range(pgd_t * pgd,
678 unsigned long address, unsigned long size,
679 struct vm_area_struct *vma, unsigned int flags)
680 {
681 pmd_t * pmd;
682 unsigned long offset, end;
683 int error;
684
685 if (pgd_none(*pgd))
686 return 0;
687 if (pgd_bad(*pgd)) {
688 printk("filemap_sync_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd));
689 pgd_clear(pgd);
690 return 0;
691 }
692 pmd = pmd_offset(pgd, address);
693 offset = address & PMD_MASK;
694 address &= ~PMD_MASK;
695 end = address + size;
696 if (end > PGDIR_SIZE)
697 end = PGDIR_SIZE;
698 error = 0;
699 do {
700 error |= filemap_sync_pte_range(pmd, address, end - address, vma, offset, flags);
701 address = (address + PMD_SIZE) & PMD_MASK;
702 pmd++;
703 } while (address < end);
704 return error;
705 }
706
707 static int filemap_sync(struct vm_area_struct * vma, unsigned long address,
708 size_t size, unsigned int flags)
709 {
710 pgd_t * dir;
711 unsigned long end = address + size;
712 int error = 0;
713
714 dir = pgd_offset(current->mm, address);
715 flush_cache_range(vma->vm_mm, end - size, end);
716 while (address < end) {
717 error |= filemap_sync_pmd_range(dir, address, end - address, vma, flags);
718 address = (address + PGDIR_SIZE) & PGDIR_MASK;
719 dir++;
720 }
721 flush_tlb_range(vma->vm_mm, end - size, end);
722 return error;
723 }
724
725
726
727
728 static void filemap_unmap(struct vm_area_struct *vma, unsigned long start, size_t len)
729 {
730 filemap_sync(vma, start, len, MS_ASYNC);
731 }
732
733
734
735
736
737
738 static struct vm_operations_struct file_shared_mmap = {
739 NULL,
740 NULL,
741 filemap_unmap,
742 NULL,
743 filemap_sync,
744 NULL,
745 filemap_nopage,
746 NULL,
747 filemap_swapout,
748 filemap_swapin,
749 };
750
751
752
753
754
755
756
757 static struct vm_operations_struct file_private_mmap = {
758 NULL,
759 NULL,
760 NULL,
761 NULL,
762 NULL,
763 NULL,
764 filemap_nopage,
765 NULL,
766 NULL,
767 NULL,
768 };
769
770
771 int generic_file_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma)
772 {
773 struct vm_operations_struct * ops;
774
775 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
776 ops = &file_shared_mmap;
777
778
779 if (vma->vm_offset & (PAGE_SIZE - 1))
780 return -EINVAL;
781 } else {
782 ops = &file_private_mmap;
783 if (vma->vm_offset & (inode->i_sb->s_blocksize - 1))
784 return -EINVAL;
785 }
786 if (!inode->i_sb || !S_ISREG(inode->i_mode))
787 return -EACCES;
788 if (!inode->i_op || !inode->i_op->readpage)
789 return -ENOEXEC;
790 if (!IS_RDONLY(inode)) {
791 inode->i_atime = CURRENT_TIME;
792 inode->i_dirt = 1;
793 }
794 vma->vm_inode = inode;
795 inode->i_count++;
796 vma->vm_ops = ops;
797 return 0;
798 }
799
800
801
802
803
804
805 static int msync_interval(struct vm_area_struct * vma,
806 unsigned long start, unsigned long end, int flags)
807 {
808 if (!vma->vm_inode)
809 return 0;
810 if (vma->vm_ops->sync) {
811 int error;
812 error = vma->vm_ops->sync(vma, start, end-start, flags);
813 if (error)
814 return error;
815 if (flags & MS_SYNC)
816 return file_fsync(vma->vm_inode, NULL);
817 return 0;
818 }
819 return 0;
820 }
821
822 asmlinkage int sys_msync(unsigned long start, size_t len, int flags)
823 {
824 unsigned long end;
825 struct vm_area_struct * vma;
826 int unmapped_error, error;
827
828 if (start & ~PAGE_MASK)
829 return -EINVAL;
830 len = (len + ~PAGE_MASK) & PAGE_MASK;
831 end = start + len;
832 if (end < start)
833 return -EINVAL;
834 if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
835 return -EINVAL;
836 if (end == start)
837 return 0;
838
839
840
841
842 vma = find_vma(current, start);
843 unmapped_error = 0;
844 for (;;) {
845
846 if (!vma)
847 return -EFAULT;
848
849 if (start < vma->vm_start) {
850 unmapped_error = -EFAULT;
851 start = vma->vm_start;
852 }
853
854 if (end <= vma->vm_end) {
855 if (start < end) {
856 error = msync_interval(vma, start, end, flags);
857 if (error)
858 return error;
859 }
860 return unmapped_error;
861 }
862
863 error = msync_interval(vma, start, vma->vm_end, flags);
864 if (error)
865 return error;
866 start = vma->vm_end;
867 vma = vma->vm_next;
868 }
869 }