This source file includes following definitions.
- invalidate_inode_pages
- truncate_inode_pages
- shrink_mmap
- page_unuse
- update_vm_cache
- try_to_read_ahead
- __wait_on_page
- generic_file_read
- fill_page
- filemap_nopage
- do_write_page
- filemap_write_page
- filemap_swapout
- filemap_swapin
- filemap_sync_pte
- filemap_sync_pte_range
- filemap_sync_pmd_range
- filemap_sync
- filemap_unmap
- generic_file_mmap
- msync_interval
- sys_msync
1
2
3
4
5
6
7
8
9
10
11
12 #include <linux/stat.h>
13 #include <linux/sched.h>
14 #include <linux/kernel.h>
15 #include <linux/mm.h>
16 #include <linux/shm.h>
17 #include <linux/errno.h>
18 #include <linux/mman.h>
19 #include <linux/string.h>
20 #include <linux/malloc.h>
21 #include <linux/fs.h>
22 #include <linux/locks.h>
23 #include <linux/pagemap.h>
24 #include <linux/swap.h>
25
26 #include <asm/segment.h>
27 #include <asm/system.h>
28 #include <asm/pgtable.h>
29
30
31
32
33
34
35
36
37 unsigned long page_cache_size = 0;
38 struct page * page_hash_table[PAGE_HASH_SIZE];
39
40
41
42
43
44
45
46
47
48
49 void invalidate_inode_pages(struct inode * inode)
50 {
51 struct page ** p;
52 struct page * page;
53
54 p = &inode->i_pages;
55 while ((page = *p) != NULL) {
56 if (page->locked) {
57 p = &page->next;
58 continue;
59 }
60 inode->i_nrpages--;
61 if ((*p = page->next) != NULL)
62 (*p)->prev = page->prev;
63 page->dirty = 0;
64 page->next = NULL;
65 page->prev = NULL;
66 remove_page_from_hash_queue(page);
67 page->inode = NULL;
68 free_page(page_address(page));
69 continue;
70 }
71 }
72
73
74
75
76
77 void truncate_inode_pages(struct inode * inode, unsigned long start)
78 {
79 struct page ** p;
80 struct page * page;
81
82 repeat:
83 p = &inode->i_pages;
84 while ((page = *p) != NULL) {
85 unsigned long offset = page->offset;
86
87
88 if (offset >= start) {
89 if (page->locked) {
90 wait_on_page(page);
91 goto repeat;
92 }
93 inode->i_nrpages--;
94 if ((*p = page->next) != NULL)
95 (*p)->prev = page->prev;
96 page->dirty = 0;
97 page->next = NULL;
98 page->prev = NULL;
99 remove_page_from_hash_queue(page);
100 page->inode = NULL;
101 free_page(page_address(page));
102 continue;
103 }
104 p = &page->next;
105 offset = start - offset;
106
107 if (offset < PAGE_SIZE)
108 memset((void *) (offset + page_address(page)), 0, PAGE_SIZE - offset);
109 }
110 }
111
112 int shrink_mmap(int priority, int dma)
113 {
114 static int clock = 0;
115 struct page * page;
116 unsigned long limit = MAP_NR(high_memory);
117 struct buffer_head *tmp, *bh;
118
119 priority = (limit<<2) >> priority;
120 page = mem_map + clock;
121 while (priority-- > 0) {
122 if (page->locked)
123 goto next;
124 if (dma && !page->dma)
125 goto next;
126
127
128 bh = page->buffers;
129 if (bh) {
130 tmp = bh;
131 do {
132 if (buffer_touched(tmp)) {
133 clear_bit(BH_Touched, &tmp->b_state);
134 page->referenced = 1;
135 }
136 tmp = tmp->b_this_page;
137 } while (tmp != bh);
138 }
139
140
141
142
143
144
145 if (page->count > 1)
146 page->referenced = 1;
147 else if (page->referenced)
148 page->referenced = 0;
149 else if (page->count) {
150
151
152 if (page->inode) {
153 remove_page_from_hash_queue(page);
154 remove_page_from_inode_queue(page);
155 free_page(page_address(page));
156 return 1;
157 }
158 if (bh && try_to_free_buffer(bh, &bh, 6))
159 return 1;
160 }
161 next:
162 page++;
163 clock++;
164 if (clock >= limit) {
165 clock = 0;
166 page = mem_map;
167 }
168 }
169 return 0;
170 }
171
172
173
174
175
176
177
178 unsigned long page_unuse(unsigned long page)
179 {
180 struct page * p = mem_map + MAP_NR(page);
181 int count = p->count;
182
183 if (count != 2)
184 return count;
185 if (!p->inode)
186 return count;
187 remove_page_from_hash_queue(p);
188 remove_page_from_inode_queue(p);
189 free_page(page);
190 return 1;
191 }
192
193
194
195
196
197 void update_vm_cache(struct inode * inode, unsigned long pos, const char * buf, int count)
198 {
199 unsigned long offset, len;
200
201 offset = (pos & ~PAGE_MASK);
202 pos = pos & PAGE_MASK;
203 len = PAGE_SIZE - offset;
204 do {
205 struct page * page;
206
207 if (len > count)
208 len = count;
209 page = find_page(inode, pos);
210 if (page) {
211 unsigned long addr;
212
213 wait_on_page(page);
214 addr = page_address(page);
215 memcpy((void *) (offset + addr), buf, len);
216 free_page(addr);
217 }
218 count -= len;
219 buf += len;
220 len = PAGE_SIZE;
221 offset = 0;
222 pos += PAGE_SIZE;
223 } while (count);
224 }
225
226
227
228
229
230
231 static unsigned long try_to_read_ahead(struct inode * inode, unsigned long offset, unsigned long page_cache)
232 {
233 struct page * page;
234
235 offset &= PAGE_MASK;
236 if (!page_cache) {
237 page_cache = __get_free_page(GFP_KERNEL);
238 if (!page_cache)
239 return 0;
240 }
241 if (offset >= inode->i_size)
242 return page_cache;
243 #if 1
244 page = find_page(inode, offset);
245 if (page) {
246 page->count--;
247 return page_cache;
248 }
249
250
251
252 page = mem_map + MAP_NR(page_cache);
253 page->count++;
254 page->uptodate = 0;
255 page->error = 0;
256 page->offset = offset;
257 add_page_to_inode_queue(inode, page);
258 add_page_to_hash_queue(inode, page);
259
260 inode->i_op->readpage(inode, page);
261
262 free_page(page_cache);
263 return 0;
264 #else
265 return page_cache;
266 #endif
267 }
268
269
270
271
272 void __wait_on_page(struct page *page)
273 {
274 struct wait_queue wait = { current, NULL };
275
276 page->count++;
277 add_wait_queue(&page->wait, &wait);
278 repeat:
279 run_task_queue(&tq_disk);
280 current->state = TASK_UNINTERRUPTIBLE;
281 if (page->locked) {
282 schedule();
283 goto repeat;
284 }
285 remove_wait_queue(&page->wait, &wait);
286 page->count--;
287 current->state = TASK_RUNNING;
288 }
289
290
291
292
293
294
295
296
297
298
299 #define MAX_READAHEAD (PAGE_SIZE*8)
300 int generic_file_read(struct inode * inode, struct file * filp, char * buf, int count)
301 {
302 int error, read;
303 unsigned long pos, page_cache;
304 unsigned long ra_pos, ra_end;
305
306 if (count <= 0)
307 return 0;
308 error = 0;
309 read = 0;
310 page_cache = 0;
311
312 pos = filp->f_pos;
313 ra_pos = filp->f_reada;
314 ra_end = MAX_READAHEAD;
315 if (!ra_pos) {
316 ra_pos = (pos + PAGE_SIZE) & PAGE_MASK;
317 ra_end = 0;
318 }
319 ra_end += pos + count;
320
321 for (;;) {
322 struct page *page;
323 unsigned long offset, addr, nr;
324
325 if (pos >= inode->i_size)
326 break;
327 offset = pos & ~PAGE_MASK;
328 nr = PAGE_SIZE - offset;
329
330
331
332 page = find_page(inode, pos & PAGE_MASK);
333 if (page)
334 goto found_page;
335
336
337
338
339
340 if (page_cache)
341 goto new_page;
342
343 error = -ENOMEM;
344 page_cache = __get_free_page(GFP_KERNEL);
345 if (!page_cache)
346 break;
347 error = 0;
348
349
350
351
352 if (pos >= inode->i_size)
353 break;
354 page = find_page(inode, pos & PAGE_MASK);
355 if (!page)
356 goto new_page;
357
358 found_page:
359 addr = page_address(page);
360 if (nr > count)
361 nr = count;
362
363
364
365
366
367
368
369
370 if (page->locked) {
371 while (ra_pos < ra_end) {
372 page_cache = try_to_read_ahead(inode, ra_pos, page_cache);
373 ra_pos += PAGE_SIZE;
374 if (!page->locked)
375 goto unlocked_page;
376 }
377 __wait_on_page(page);
378 }
379 unlocked_page:
380 if (!page->uptodate)
381 goto read_page;
382 if (nr > inode->i_size - pos)
383 nr = inode->i_size - pos;
384 memcpy_tofs(buf, (void *) (addr + offset), nr);
385 free_page(addr);
386 buf += nr;
387 pos += nr;
388 read += nr;
389 count -= nr;
390 if (count)
391 continue;
392 break;
393
394
395 new_page:
396
397
398
399 addr = page_cache;
400 page = mem_map + MAP_NR(page_cache);
401 page_cache = 0;
402 page->count++;
403 page->uptodate = 0;
404 page->error = 0;
405 page->offset = pos & PAGE_MASK;
406 add_page_to_inode_queue(inode, page);
407 add_page_to_hash_queue(inode, page);
408
409
410
411
412
413
414
415
416
417 read_page:
418 error = inode->i_op->readpage(inode, page);
419 if (!error)
420 goto found_page;
421 free_page(addr);
422 break;
423 }
424
425 if (read) {
426 error = read;
427
428 #ifdef WE_SHOULD_DO_SOME_EXTRA_CHECKS
429
430
431
432
433 while (ra_pos < ra_end) {
434 page_cache = try_to_read_ahead(inode, ra_pos, page_cache);
435 ra_pos += PAGE_SIZE;
436 }
437 run_task_queue(&tq_disk);
438 #endif
439
440 filp->f_pos = pos;
441 filp->f_reada = ra_pos;
442 if (!IS_RDONLY(inode)) {
443 inode->i_atime = CURRENT_TIME;
444 inode->i_dirt = 1;
445 }
446 }
447 if (page_cache)
448 free_page(page_cache);
449
450 return error;
451 }
452
453
454
455
456
457 static inline unsigned long fill_page(struct inode * inode, unsigned long offset)
458 {
459 struct page * page;
460 unsigned long new_page;
461
462 page = find_page(inode, offset);
463 if (page)
464 goto found_page_dont_free;
465 new_page = __get_free_page(GFP_KERNEL);
466 page = find_page(inode, offset);
467 if (page)
468 goto found_page;
469 if (!new_page)
470 return 0;
471 page = mem_map + MAP_NR(new_page);
472 new_page = 0;
473 page->count++;
474 page->uptodate = 0;
475 page->error = 0;
476 page->offset = offset;
477 add_page_to_inode_queue(inode, page);
478 add_page_to_hash_queue(inode, page);
479 inode->i_op->readpage(inode, page);
480 if (page->locked)
481 new_page = try_to_read_ahead(inode, offset + PAGE_SIZE, 0);
482 found_page:
483 if (new_page)
484 free_page(new_page);
485 found_page_dont_free:
486 wait_on_page(page);
487 return page_address(page);
488 }
489
490
491
492
493
494
495 static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long address, int no_share)
496 {
497 unsigned long offset;
498 struct inode * inode = area->vm_inode;
499 unsigned long page;
500
501 offset = (address & PAGE_MASK) - area->vm_start + area->vm_offset;
502 if (offset >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm)
503 return 0;
504
505 page = fill_page(inode, offset);
506 if (page && no_share) {
507 unsigned long new_page = __get_free_page(GFP_KERNEL);
508 if (new_page)
509 memcpy((void *) new_page, (void *) page, PAGE_SIZE);
510 free_page(page);
511 return new_page;
512 }
513 return page;
514 }
515
516
517
518
519
520 static inline int do_write_page(struct inode * inode, struct file * file,
521 const char * page, unsigned long offset)
522 {
523 int old_fs, retval;
524 unsigned long size;
525
526 size = offset + PAGE_SIZE;
527
528 if (S_ISREG(inode->i_mode)) {
529 if (size > inode->i_size)
530 size = inode->i_size;
531
532 if (size < offset)
533 return -EIO;
534 }
535 size -= offset;
536 old_fs = get_fs();
537 set_fs(KERNEL_DS);
538 retval = -EIO;
539 if (size == file->f_op->write(inode, file, (const char *) page, size))
540 retval = 0;
541 set_fs(old_fs);
542 return retval;
543 }
544
545 static int filemap_write_page(struct vm_area_struct * vma,
546 unsigned long offset,
547 unsigned long page)
548 {
549 int result;
550 struct file file;
551 struct inode * inode;
552 struct buffer_head * bh;
553
554 bh = mem_map[MAP_NR(page)].buffers;
555 if (bh) {
556
557 struct buffer_head * tmp = bh;
558 do {
559 mark_buffer_dirty(tmp, 0);
560 tmp = tmp->b_this_page;
561 } while (tmp != bh);
562 return 0;
563 }
564
565 inode = vma->vm_inode;
566 file.f_op = inode->i_op->default_file_ops;
567 if (!file.f_op->write)
568 return -EIO;
569 file.f_mode = 3;
570 file.f_flags = 0;
571 file.f_count = 1;
572 file.f_inode = inode;
573 file.f_pos = offset;
574 file.f_reada = 0;
575
576 down(&inode->i_sem);
577 result = do_write_page(inode, &file, (const char *) page, offset);
578 up(&inode->i_sem);
579 return result;
580 }
581
582
583
584
585
586
587
588
589
590
591
592
593 int filemap_swapout(struct vm_area_struct * vma,
594 unsigned long offset,
595 pte_t *page_table)
596 {
597 int error;
598 unsigned long page = pte_page(*page_table);
599 unsigned long entry = SWP_ENTRY(SHM_SWP_TYPE, MAP_NR(page));
600
601 flush_cache_page(vma, (offset + vma->vm_start - vma->vm_offset));
602 set_pte(page_table, __pte(entry));
603 flush_tlb_page(vma, (offset + vma->vm_start - vma->vm_offset));
604 error = filemap_write_page(vma, offset, page);
605 if (pte_val(*page_table) == entry)
606 pte_clear(page_table);
607 return error;
608 }
609
610
611
612
613
614
615
616 static pte_t filemap_swapin(struct vm_area_struct * vma,
617 unsigned long offset,
618 unsigned long entry)
619 {
620 unsigned long page = SWP_OFFSET(entry);
621
622 mem_map[page].count++;
623 page = (page << PAGE_SHIFT) + PAGE_OFFSET;
624 return mk_pte(page,vma->vm_page_prot);
625 }
626
627
628 static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
629 unsigned long address, unsigned int flags)
630 {
631 pte_t pte = *ptep;
632 unsigned long page;
633 int error;
634
635 if (!(flags & MS_INVALIDATE)) {
636 if (!pte_present(pte))
637 return 0;
638 if (!pte_dirty(pte))
639 return 0;
640 flush_cache_page(vma, address);
641 set_pte(ptep, pte_mkclean(pte));
642 flush_tlb_page(vma, address);
643 page = pte_page(pte);
644 mem_map[MAP_NR(page)].count++;
645 } else {
646 if (pte_none(pte))
647 return 0;
648 flush_cache_page(vma, address);
649 pte_clear(ptep);
650 flush_tlb_page(vma, address);
651 if (!pte_present(pte)) {
652 swap_free(pte_val(pte));
653 return 0;
654 }
655 page = pte_page(pte);
656 if (!pte_dirty(pte) || flags == MS_INVALIDATE) {
657 free_page(page);
658 return 0;
659 }
660 }
661 error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page);
662 free_page(page);
663 return error;
664 }
665
666 static inline int filemap_sync_pte_range(pmd_t * pmd,
667 unsigned long address, unsigned long size,
668 struct vm_area_struct *vma, unsigned long offset, unsigned int flags)
669 {
670 pte_t * pte;
671 unsigned long end;
672 int error;
673
674 if (pmd_none(*pmd))
675 return 0;
676 if (pmd_bad(*pmd)) {
677 printk("filemap_sync_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
678 pmd_clear(pmd);
679 return 0;
680 }
681 pte = pte_offset(pmd, address);
682 offset += address & PMD_MASK;
683 address &= ~PMD_MASK;
684 end = address + size;
685 if (end > PMD_SIZE)
686 end = PMD_SIZE;
687 error = 0;
688 do {
689 error |= filemap_sync_pte(pte, vma, address + offset, flags);
690 address += PAGE_SIZE;
691 pte++;
692 } while (address < end);
693 return error;
694 }
695
696 static inline int filemap_sync_pmd_range(pgd_t * pgd,
697 unsigned long address, unsigned long size,
698 struct vm_area_struct *vma, unsigned int flags)
699 {
700 pmd_t * pmd;
701 unsigned long offset, end;
702 int error;
703
704 if (pgd_none(*pgd))
705 return 0;
706 if (pgd_bad(*pgd)) {
707 printk("filemap_sync_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd));
708 pgd_clear(pgd);
709 return 0;
710 }
711 pmd = pmd_offset(pgd, address);
712 offset = address & PMD_MASK;
713 address &= ~PMD_MASK;
714 end = address + size;
715 if (end > PGDIR_SIZE)
716 end = PGDIR_SIZE;
717 error = 0;
718 do {
719 error |= filemap_sync_pte_range(pmd, address, end - address, vma, offset, flags);
720 address = (address + PMD_SIZE) & PMD_MASK;
721 pmd++;
722 } while (address < end);
723 return error;
724 }
725
726 static int filemap_sync(struct vm_area_struct * vma, unsigned long address,
727 size_t size, unsigned int flags)
728 {
729 pgd_t * dir;
730 unsigned long end = address + size;
731 int error = 0;
732
733 dir = pgd_offset(current->mm, address);
734 flush_cache_range(vma->vm_mm, end - size, end);
735 while (address < end) {
736 error |= filemap_sync_pmd_range(dir, address, end - address, vma, flags);
737 address = (address + PGDIR_SIZE) & PGDIR_MASK;
738 dir++;
739 }
740 flush_tlb_range(vma->vm_mm, end - size, end);
741 return error;
742 }
743
744
745
746
747 static void filemap_unmap(struct vm_area_struct *vma, unsigned long start, size_t len)
748 {
749 filemap_sync(vma, start, len, MS_ASYNC);
750 }
751
752
753
754
755
756
757 static struct vm_operations_struct file_shared_mmap = {
758 NULL,
759 NULL,
760 filemap_unmap,
761 NULL,
762 filemap_sync,
763 NULL,
764 filemap_nopage,
765 NULL,
766 filemap_swapout,
767 filemap_swapin,
768 };
769
770
771
772
773
774
775
776 static struct vm_operations_struct file_private_mmap = {
777 NULL,
778 NULL,
779 NULL,
780 NULL,
781 NULL,
782 NULL,
783 filemap_nopage,
784 NULL,
785 NULL,
786 NULL,
787 };
788
789
790 int generic_file_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma)
791 {
792 struct vm_operations_struct * ops;
793
794 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
795 ops = &file_shared_mmap;
796
797
798 if (vma->vm_offset & (PAGE_SIZE - 1))
799 return -EINVAL;
800 } else {
801 ops = &file_private_mmap;
802 if (vma->vm_offset & (inode->i_sb->s_blocksize - 1))
803 return -EINVAL;
804 }
805 if (!inode->i_sb || !S_ISREG(inode->i_mode))
806 return -EACCES;
807 if (!inode->i_op || !inode->i_op->readpage)
808 return -ENOEXEC;
809 if (!IS_RDONLY(inode)) {
810 inode->i_atime = CURRENT_TIME;
811 inode->i_dirt = 1;
812 }
813 vma->vm_inode = inode;
814 inode->i_count++;
815 vma->vm_ops = ops;
816 return 0;
817 }
818
819
820
821
822
823
824 static int msync_interval(struct vm_area_struct * vma,
825 unsigned long start, unsigned long end, int flags)
826 {
827 if (!vma->vm_inode)
828 return 0;
829 if (vma->vm_ops->sync) {
830 int error;
831 error = vma->vm_ops->sync(vma, start, end-start, flags);
832 if (error)
833 return error;
834 if (flags & MS_SYNC)
835 return file_fsync(vma->vm_inode, NULL);
836 return 0;
837 }
838 return 0;
839 }
840
841 asmlinkage int sys_msync(unsigned long start, size_t len, int flags)
842 {
843 unsigned long end;
844 struct vm_area_struct * vma;
845 int unmapped_error, error;
846
847 if (start & ~PAGE_MASK)
848 return -EINVAL;
849 len = (len + ~PAGE_MASK) & PAGE_MASK;
850 end = start + len;
851 if (end < start)
852 return -EINVAL;
853 if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
854 return -EINVAL;
855 if (end == start)
856 return 0;
857
858
859
860
861 vma = find_vma(current, start);
862 unmapped_error = 0;
863 for (;;) {
864
865 if (!vma)
866 return -EFAULT;
867
868 if (start < vma->vm_start) {
869 unmapped_error = -EFAULT;
870 start = vma->vm_start;
871 }
872
873 if (end <= vma->vm_end) {
874 if (start < end) {
875 error = msync_interval(vma, start, end, flags);
876 if (error)
877 return error;
878 }
879 return unmapped_error;
880 }
881
882 error = msync_interval(vma, start, vma->vm_end, flags);
883 if (error)
884 return error;
885 start = vma->vm_end;
886 vma = vma->vm_next;
887 }
888 }