This source file includes following definitions.
- invalidate_inode_pages
- truncate_inode_pages
- shrink_mmap
- page_unuse
- update_vm_cache
- try_to_read_ahead
- __wait_on_page
- generic_file_read
- fill_page
- filemap_nopage
- do_write_page
- filemap_write_page
- filemap_swapout
- filemap_swapin
- filemap_sync_pte
- filemap_sync_pte_range
- filemap_sync_pmd_range
- filemap_sync
- filemap_unmap
- generic_file_mmap
- msync_interval
- sys_msync
1
2
3
4
5
6
7
8
9
10
11
12 #include <linux/stat.h>
13 #include <linux/sched.h>
14 #include <linux/kernel.h>
15 #include <linux/mm.h>
16 #include <linux/shm.h>
17 #include <linux/errno.h>
18 #include <linux/mman.h>
19 #include <linux/string.h>
20 #include <linux/malloc.h>
21 #include <linux/fs.h>
22 #include <linux/locks.h>
23 #include <linux/pagemap.h>
24 #include <linux/swap.h>
25
26 #include <asm/segment.h>
27 #include <asm/system.h>
28 #include <asm/pgtable.h>
29
30
31
32
33
34
35
36
37 unsigned long page_cache_size = 0;
38 struct page * page_hash_table[PAGE_HASH_SIZE];
39
40
41
42
43
44
45
46
47
48
49 void invalidate_inode_pages(struct inode * inode)
50 {
51 struct page ** p;
52 struct page * page;
53
54 p = &inode->i_pages;
55 while ((page = *p) != NULL) {
56 if (page->locked) {
57 p = &page->next;
58 continue;
59 }
60 inode->i_nrpages--;
61 if ((*p = page->next) != NULL)
62 (*p)->prev = page->prev;
63 page->dirty = 0;
64 page->next = NULL;
65 page->prev = NULL;
66 remove_page_from_hash_queue(page);
67 page->inode = NULL;
68 free_page(page_address(page));
69 continue;
70 }
71 }
72
73
74
75
76
77 void truncate_inode_pages(struct inode * inode, unsigned long start)
78 {
79 struct page ** p;
80 struct page * page;
81
82 repeat:
83 p = &inode->i_pages;
84 while ((page = *p) != NULL) {
85 unsigned long offset = page->offset;
86
87
88 if (offset >= start) {
89 if (page->locked) {
90 wait_on_page(page);
91 goto repeat;
92 }
93 inode->i_nrpages--;
94 if ((*p = page->next) != NULL)
95 (*p)->prev = page->prev;
96 page->dirty = 0;
97 page->next = NULL;
98 page->prev = NULL;
99 remove_page_from_hash_queue(page);
100 page->inode = NULL;
101 free_page(page_address(page));
102 continue;
103 }
104 p = &page->next;
105 offset = start - offset;
106
107 if (offset < PAGE_SIZE)
108 memset((void *) (offset + page_address(page)), 0, PAGE_SIZE - offset);
109 }
110 }
111
112 int shrink_mmap(int priority, int dma)
113 {
114 static int clock = 0;
115 struct page * page;
116 unsigned long limit = MAP_NR(high_memory);
117 struct buffer_head *tmp, *bh;
118
119 priority = (limit<<2) >> priority;
120 page = mem_map + clock;
121 while (priority-- > 0) {
122 if (page->locked)
123 goto next;
124 if (dma && !page->dma)
125 goto next;
126
127
128 bh = page->buffers;
129 if (bh) {
130 tmp = bh;
131 do {
132 if (buffer_touched(tmp)) {
133 clear_bit(BH_Touched, &tmp->b_state);
134 page->referenced = 1;
135 }
136 tmp = tmp->b_this_page;
137 } while (tmp != bh);
138 }
139
140
141
142
143
144
145 if (page->count > 1)
146 page->referenced = 1;
147 else if (page->referenced)
148 page->referenced = 0;
149 else if (page->count) {
150
151
152 if (page->inode) {
153 remove_page_from_hash_queue(page);
154 remove_page_from_inode_queue(page);
155 free_page(page_address(page));
156 return 1;
157 }
158 if (bh && try_to_free_buffer(bh, &bh, 6))
159 return 1;
160 }
161 next:
162 page++;
163 clock++;
164 if (clock >= limit) {
165 clock = 0;
166 page = mem_map;
167 }
168 }
169 return 0;
170 }
171
172
173
174
175
176
177
178 unsigned long page_unuse(unsigned long page)
179 {
180 struct page * p = mem_map + MAP_NR(page);
181 int count = p->count;
182
183 if (count != 2)
184 return count;
185 if (!p->inode)
186 return count;
187 remove_page_from_hash_queue(p);
188 remove_page_from_inode_queue(p);
189 free_page(page);
190 return 1;
191 }
192
193
194
195
196
197 void update_vm_cache(struct inode * inode, unsigned long pos, const char * buf, int count)
198 {
199 unsigned long offset, len;
200
201 offset = (pos & ~PAGE_MASK);
202 pos = pos & PAGE_MASK;
203 len = PAGE_SIZE - offset;
204 do {
205 struct page * page;
206
207 if (len > count)
208 len = count;
209 page = find_page(inode, pos);
210 if (page) {
211 unsigned long addr;
212
213 wait_on_page(page);
214 addr = page_address(page);
215 memcpy((void *) (offset + addr), buf, len);
216 free_page(addr);
217 }
218 count -= len;
219 buf += len;
220 len = PAGE_SIZE;
221 offset = 0;
222 pos += PAGE_SIZE;
223 } while (count);
224 }
225
226
227
228
229
230
231 static unsigned long try_to_read_ahead(struct inode * inode, unsigned long offset, unsigned long page_cache)
232 {
233 struct page * page;
234
235 offset &= PAGE_MASK;
236 if (!page_cache) {
237 page_cache = __get_free_page(GFP_KERNEL);
238 if (!page_cache)
239 return 0;
240 }
241 if (offset >= inode->i_size)
242 return page_cache;
243 #if 1
244 page = find_page(inode, offset);
245 if (page) {
246 page->count--;
247 return page_cache;
248 }
249
250
251
252 page = mem_map + MAP_NR(page_cache);
253 page->count++;
254 page->uptodate = 0;
255 page->error = 0;
256 page->offset = offset;
257 add_page_to_inode_queue(inode, page);
258 add_page_to_hash_queue(inode, page);
259
260 inode->i_op->readpage(inode, page);
261
262 free_page(page_cache);
263 return 0;
264 #else
265 return page_cache;
266 #endif
267 }
268
269
270
271
272 void __wait_on_page(struct page *page)
273 {
274 struct wait_queue wait = { current, NULL };
275
276 page->count++;
277 add_wait_queue(&page->wait, &wait);
278 repeat:
279 run_task_queue(&tq_disk);
280 current->state = TASK_UNINTERRUPTIBLE;
281 if (page->locked) {
282 schedule();
283 goto repeat;
284 }
285 remove_wait_queue(&page->wait, &wait);
286 page->count--;
287 current->state = TASK_RUNNING;
288 }
289
290
291
292
293
294
295
296
297
298
299 #define MAX_READAHEAD (PAGE_SIZE*8)
300 int generic_file_read(struct inode * inode, struct file * filp, char * buf, int count)
301 {
302 int error, read;
303 unsigned long pos, page_cache;
304 unsigned long ra_pos, ra_end;
305
306 if (count <= 0)
307 return 0;
308 error = 0;
309 read = 0;
310 page_cache = 0;
311
312 pos = filp->f_pos;
313 ra_pos = filp->f_reada;
314 ra_end = MAX_READAHEAD;
315 if (!ra_pos) {
316 ra_pos = (pos + PAGE_SIZE) & PAGE_MASK;
317 ra_end = 0;
318 }
319 ra_end += pos + count;
320
321 for (;;) {
322 struct page *page;
323 unsigned long offset, addr, nr;
324
325 if (pos >= inode->i_size)
326 break;
327 offset = pos & ~PAGE_MASK;
328 nr = PAGE_SIZE - offset;
329
330
331
332 page = find_page(inode, pos & PAGE_MASK);
333 if (page)
334 goto found_page;
335
336
337
338
339
340 if (page_cache)
341 goto new_page;
342
343 error = -ENOMEM;
344 page_cache = __get_free_page(GFP_KERNEL);
345 if (!page_cache)
346 break;
347 error = 0;
348
349
350
351
352 if (pos >= inode->i_size)
353 break;
354 page = find_page(inode, pos & PAGE_MASK);
355 if (!page)
356 goto new_page;
357
358 found_page:
359 addr = page_address(page);
360 if (nr > count)
361 nr = count;
362
363
364
365
366
367
368
369
370 if (page->locked) {
371 while (ra_pos < ra_end) {
372 page_cache = try_to_read_ahead(inode, ra_pos, page_cache);
373 ra_pos += PAGE_SIZE;
374 if (!page->locked)
375 goto unlocked_page;
376 }
377 __wait_on_page(page);
378 }
379 unlocked_page:
380 if (!page->uptodate)
381 goto read_page;
382 if (nr > inode->i_size - pos)
383 nr = inode->i_size - pos;
384 memcpy_tofs(buf, (void *) (addr + offset), nr);
385 free_page(addr);
386 buf += nr;
387 pos += nr;
388 read += nr;
389 count -= nr;
390 if (count)
391 continue;
392 break;
393
394
395 new_page:
396
397
398
399 addr = page_cache;
400 page = mem_map + MAP_NR(page_cache);
401 page_cache = 0;
402 page->count++;
403 page->uptodate = 0;
404 page->error = 0;
405 page->offset = pos & PAGE_MASK;
406 add_page_to_inode_queue(inode, page);
407 add_page_to_hash_queue(inode, page);
408
409
410
411
412
413
414
415
416
417 read_page:
418 error = inode->i_op->readpage(inode, page);
419 if (!error)
420 goto found_page;
421 free_page(addr);
422 break;
423 }
424
425 if (read) {
426 error = read;
427
428
429
430
431
432 while (ra_pos < ra_end) {
433 page_cache = try_to_read_ahead(inode, ra_pos, page_cache);
434 ra_pos += PAGE_SIZE;
435 }
436 run_task_queue(&tq_disk);
437
438 filp->f_pos = pos;
439 filp->f_reada = ra_pos;
440 if (!IS_RDONLY(inode)) {
441 inode->i_atime = CURRENT_TIME;
442 inode->i_dirt = 1;
443 }
444 }
445 if (page_cache)
446 free_page(page_cache);
447
448 return error;
449 }
450
451
452
453
454
455 static inline unsigned long fill_page(struct inode * inode, unsigned long offset)
456 {
457 struct page * page;
458 unsigned long new_page;
459
460 page = find_page(inode, offset);
461 if (page)
462 goto found_page_dont_free;
463 new_page = __get_free_page(GFP_KERNEL);
464 page = find_page(inode, offset);
465 if (page)
466 goto found_page;
467 if (!new_page)
468 return 0;
469 page = mem_map + MAP_NR(new_page);
470 new_page = 0;
471 page->count++;
472 page->uptodate = 0;
473 page->error = 0;
474 page->offset = offset;
475 add_page_to_inode_queue(inode, page);
476 add_page_to_hash_queue(inode, page);
477 inode->i_op->readpage(inode, page);
478 if (page->locked)
479 new_page = try_to_read_ahead(inode, offset + PAGE_SIZE, 0);
480 found_page:
481 if (new_page)
482 free_page(new_page);
483 found_page_dont_free:
484 wait_on_page(page);
485 return page_address(page);
486 }
487
488
489
490
491
492
493 static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long address, int no_share)
494 {
495 unsigned long offset;
496 struct inode * inode = area->vm_inode;
497 unsigned long page;
498
499 offset = (address & PAGE_MASK) - area->vm_start + area->vm_offset;
500 if (offset >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm)
501 return 0;
502
503 page = fill_page(inode, offset);
504 if (page && no_share) {
505 unsigned long new_page = __get_free_page(GFP_KERNEL);
506 if (new_page)
507 memcpy((void *) new_page, (void *) page, PAGE_SIZE);
508 free_page(page);
509 return new_page;
510 }
511 return page;
512 }
513
514
515
516
517
518 static inline int do_write_page(struct inode * inode, struct file * file,
519 const char * page, unsigned long offset)
520 {
521 int old_fs, retval;
522 unsigned long size;
523
524 size = offset + PAGE_SIZE;
525
526 if (S_ISREG(inode->i_mode)) {
527 if (size > inode->i_size)
528 size = inode->i_size;
529
530 if (size < offset)
531 return -EIO;
532 }
533 size -= offset;
534 old_fs = get_fs();
535 set_fs(KERNEL_DS);
536 retval = -EIO;
537 if (size == file->f_op->write(inode, file, (const char *) page, size))
538 retval = 0;
539 set_fs(old_fs);
540 return retval;
541 }
542
543 static int filemap_write_page(struct vm_area_struct * vma,
544 unsigned long offset,
545 unsigned long page)
546 {
547 int result;
548 struct file file;
549 struct inode * inode;
550 struct buffer_head * bh;
551
552 bh = mem_map[MAP_NR(page)].buffers;
553 if (bh) {
554
555 struct buffer_head * tmp = bh;
556 do {
557 mark_buffer_dirty(tmp, 0);
558 tmp = tmp->b_this_page;
559 } while (tmp != bh);
560 return 0;
561 }
562
563 inode = vma->vm_inode;
564 file.f_op = inode->i_op->default_file_ops;
565 if (!file.f_op->write)
566 return -EIO;
567 file.f_mode = 3;
568 file.f_flags = 0;
569 file.f_count = 1;
570 file.f_inode = inode;
571 file.f_pos = offset;
572 file.f_reada = 0;
573
574 down(&inode->i_sem);
575 result = do_write_page(inode, &file, (const char *) page, offset);
576 up(&inode->i_sem);
577 return result;
578 }
579
580
581
582
583
584
585
586
587
588
589
590
591 int filemap_swapout(struct vm_area_struct * vma,
592 unsigned long offset,
593 pte_t *page_table)
594 {
595 int error;
596 unsigned long page = pte_page(*page_table);
597 unsigned long entry = SWP_ENTRY(SHM_SWP_TYPE, MAP_NR(page));
598
599 flush_cache_page(vma, (offset + vma->vm_start - vma->vm_offset));
600 set_pte(page_table, __pte(entry));
601 flush_tlb_page(vma, (offset + vma->vm_start - vma->vm_offset));
602 error = filemap_write_page(vma, offset, page);
603 if (pte_val(*page_table) == entry)
604 pte_clear(page_table);
605 return error;
606 }
607
608
609
610
611
612
613
614 static pte_t filemap_swapin(struct vm_area_struct * vma,
615 unsigned long offset,
616 unsigned long entry)
617 {
618 unsigned long page = SWP_OFFSET(entry);
619
620 mem_map[page].count++;
621 page = (page << PAGE_SHIFT) + PAGE_OFFSET;
622 return mk_pte(page,vma->vm_page_prot);
623 }
624
625
626 static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
627 unsigned long address, unsigned int flags)
628 {
629 pte_t pte = *ptep;
630 unsigned long page;
631 int error;
632
633 if (!(flags & MS_INVALIDATE)) {
634 if (!pte_present(pte))
635 return 0;
636 if (!pte_dirty(pte))
637 return 0;
638 flush_cache_page(vma, address);
639 set_pte(ptep, pte_mkclean(pte));
640 flush_tlb_page(vma, address);
641 page = pte_page(pte);
642 mem_map[MAP_NR(page)].count++;
643 } else {
644 if (pte_none(pte))
645 return 0;
646 flush_cache_page(vma, address);
647 pte_clear(ptep);
648 flush_tlb_page(vma, address);
649 if (!pte_present(pte)) {
650 swap_free(pte_val(pte));
651 return 0;
652 }
653 page = pte_page(pte);
654 if (!pte_dirty(pte) || flags == MS_INVALIDATE) {
655 free_page(page);
656 return 0;
657 }
658 }
659 error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page);
660 free_page(page);
661 return error;
662 }
663
664 static inline int filemap_sync_pte_range(pmd_t * pmd,
665 unsigned long address, unsigned long size,
666 struct vm_area_struct *vma, unsigned long offset, unsigned int flags)
667 {
668 pte_t * pte;
669 unsigned long end;
670 int error;
671
672 if (pmd_none(*pmd))
673 return 0;
674 if (pmd_bad(*pmd)) {
675 printk("filemap_sync_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
676 pmd_clear(pmd);
677 return 0;
678 }
679 pte = pte_offset(pmd, address);
680 offset += address & PMD_MASK;
681 address &= ~PMD_MASK;
682 end = address + size;
683 if (end > PMD_SIZE)
684 end = PMD_SIZE;
685 error = 0;
686 do {
687 error |= filemap_sync_pte(pte, vma, address + offset, flags);
688 address += PAGE_SIZE;
689 pte++;
690 } while (address < end);
691 return error;
692 }
693
694 static inline int filemap_sync_pmd_range(pgd_t * pgd,
695 unsigned long address, unsigned long size,
696 struct vm_area_struct *vma, unsigned int flags)
697 {
698 pmd_t * pmd;
699 unsigned long offset, end;
700 int error;
701
702 if (pgd_none(*pgd))
703 return 0;
704 if (pgd_bad(*pgd)) {
705 printk("filemap_sync_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd));
706 pgd_clear(pgd);
707 return 0;
708 }
709 pmd = pmd_offset(pgd, address);
710 offset = address & PMD_MASK;
711 address &= ~PMD_MASK;
712 end = address + size;
713 if (end > PGDIR_SIZE)
714 end = PGDIR_SIZE;
715 error = 0;
716 do {
717 error |= filemap_sync_pte_range(pmd, address, end - address, vma, offset, flags);
718 address = (address + PMD_SIZE) & PMD_MASK;
719 pmd++;
720 } while (address < end);
721 return error;
722 }
723
724 static int filemap_sync(struct vm_area_struct * vma, unsigned long address,
725 size_t size, unsigned int flags)
726 {
727 pgd_t * dir;
728 unsigned long end = address + size;
729 int error = 0;
730
731 dir = pgd_offset(current->mm, address);
732 flush_cache_range(vma->vm_mm, end - size, end);
733 while (address < end) {
734 error |= filemap_sync_pmd_range(dir, address, end - address, vma, flags);
735 address = (address + PGDIR_SIZE) & PGDIR_MASK;
736 dir++;
737 }
738 flush_tlb_range(vma->vm_mm, end - size, end);
739 return error;
740 }
741
742
743
744
745 static void filemap_unmap(struct vm_area_struct *vma, unsigned long start, size_t len)
746 {
747 filemap_sync(vma, start, len, MS_ASYNC);
748 }
749
750
751
752
753
754
755 static struct vm_operations_struct file_shared_mmap = {
756 NULL,
757 NULL,
758 filemap_unmap,
759 NULL,
760 filemap_sync,
761 NULL,
762 filemap_nopage,
763 NULL,
764 filemap_swapout,
765 filemap_swapin,
766 };
767
768
769
770
771
772
773
774 static struct vm_operations_struct file_private_mmap = {
775 NULL,
776 NULL,
777 NULL,
778 NULL,
779 NULL,
780 NULL,
781 filemap_nopage,
782 NULL,
783 NULL,
784 NULL,
785 };
786
787
788 int generic_file_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma)
789 {
790 struct vm_operations_struct * ops;
791
792 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
793 ops = &file_shared_mmap;
794
795
796 if (vma->vm_offset & (PAGE_SIZE - 1))
797 return -EINVAL;
798 } else {
799 ops = &file_private_mmap;
800 if (vma->vm_offset & (inode->i_sb->s_blocksize - 1))
801 return -EINVAL;
802 }
803 if (!inode->i_sb || !S_ISREG(inode->i_mode))
804 return -EACCES;
805 if (!inode->i_op || !inode->i_op->readpage)
806 return -ENOEXEC;
807 if (!IS_RDONLY(inode)) {
808 inode->i_atime = CURRENT_TIME;
809 inode->i_dirt = 1;
810 }
811 vma->vm_inode = inode;
812 inode->i_count++;
813 vma->vm_ops = ops;
814 return 0;
815 }
816
817
818
819
820
821
822 static int msync_interval(struct vm_area_struct * vma,
823 unsigned long start, unsigned long end, int flags)
824 {
825 if (!vma->vm_inode)
826 return 0;
827 if (vma->vm_ops->sync) {
828 int error;
829 error = vma->vm_ops->sync(vma, start, end-start, flags);
830 if (error)
831 return error;
832 if (flags & MS_SYNC)
833 return file_fsync(vma->vm_inode, NULL);
834 return 0;
835 }
836 return 0;
837 }
838
839 asmlinkage int sys_msync(unsigned long start, size_t len, int flags)
840 {
841 unsigned long end;
842 struct vm_area_struct * vma;
843 int unmapped_error, error;
844
845 if (start & ~PAGE_MASK)
846 return -EINVAL;
847 len = (len + ~PAGE_MASK) & PAGE_MASK;
848 end = start + len;
849 if (end < start)
850 return -EINVAL;
851 if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
852 return -EINVAL;
853 if (end == start)
854 return 0;
855
856
857
858
859 vma = find_vma(current, start);
860 unmapped_error = 0;
861 for (;;) {
862
863 if (!vma)
864 return -EFAULT;
865
866 if (start < vma->vm_start) {
867 unmapped_error = -EFAULT;
868 start = vma->vm_start;
869 }
870
871 if (end <= vma->vm_end) {
872 if (start < end) {
873 error = msync_interval(vma, start, end, flags);
874 if (error)
875 return error;
876 }
877 return unmapped_error;
878 }
879
880 error = msync_interval(vma, start, vma->vm_end, flags);
881 if (error)
882 return error;
883 start = vma->vm_end;
884 vma = vma->vm_next;
885 }
886 }