This source file includes following definitions.
- invalidate_inode_pages
- truncate_inode_pages
- shrink_mmap
- page_unuse
- update_vm_cache
- try_to_read_ahead
- __wait_on_page
- generic_file_read
- fill_page
- filemap_nopage
- do_write_page
- filemap_write_page
- filemap_swapout
- filemap_swapin
- filemap_sync_pte
- filemap_sync_pte_range
- filemap_sync_pmd_range
- filemap_sync
- filemap_unmap
- generic_file_mmap
- msync_interval
- sys_msync
1
2
3
4
5
6
7
8
9
10
11
12 #include <linux/stat.h>
13 #include <linux/sched.h>
14 #include <linux/kernel.h>
15 #include <linux/mm.h>
16 #include <linux/shm.h>
17 #include <linux/errno.h>
18 #include <linux/mman.h>
19 #include <linux/string.h>
20 #include <linux/malloc.h>
21 #include <linux/fs.h>
22 #include <linux/locks.h>
23 #include <linux/pagemap.h>
24 #include <linux/swap.h>
25
26 #include <asm/segment.h>
27 #include <asm/system.h>
28 #include <asm/pgtable.h>
29
30
31
32
33
34
35
36
37 unsigned long page_cache_size = 0;
38 struct page * page_hash_table[PAGE_HASH_SIZE];
39
40
41
42
43
44
45
46
47
48
49 void invalidate_inode_pages(struct inode * inode)
50 {
51 struct page ** p;
52 struct page * page;
53
54 p = &inode->i_pages;
55 while ((page = *p) != NULL) {
56 if (page->locked) {
57 p = &page->next;
58 continue;
59 }
60 inode->i_nrpages--;
61 if ((*p = page->next) != NULL)
62 (*p)->prev = page->prev;
63 page->dirty = 0;
64 page->next = NULL;
65 page->prev = NULL;
66 remove_page_from_hash_queue(page);
67 page->inode = NULL;
68 free_page(page_address(page));
69 continue;
70 }
71 }
72
73
74
75
76
77 void truncate_inode_pages(struct inode * inode, unsigned long start)
78 {
79 struct page ** p;
80 struct page * page;
81
82 repeat:
83 p = &inode->i_pages;
84 while ((page = *p) != NULL) {
85 unsigned long offset = page->offset;
86
87
88 if (offset >= start) {
89 if (page->locked) {
90 wait_on_page(page);
91 goto repeat;
92 }
93 inode->i_nrpages--;
94 if ((*p = page->next) != NULL)
95 (*p)->prev = page->prev;
96 page->dirty = 0;
97 page->next = NULL;
98 page->prev = NULL;
99 remove_page_from_hash_queue(page);
100 page->inode = NULL;
101 free_page(page_address(page));
102 continue;
103 }
104 p = &page->next;
105 offset = start - offset;
106
107 if (offset < PAGE_SIZE)
108 memset((void *) (offset + page_address(page)), 0, PAGE_SIZE - offset);
109 }
110 }
111
112 int shrink_mmap(int priority, int dma)
113 {
114 static int clock = 0;
115 struct page * page;
116 unsigned long limit = MAP_NR(high_memory);
117 struct buffer_head *tmp, *bh;
118
119 priority = (limit<<2) >> priority;
120 page = mem_map + clock;
121 while (priority-- > 0) {
122 if (page->locked)
123 goto next;
124 if (dma && !page->dma)
125 goto next;
126
127
128 bh = page->buffers;
129 if (bh) {
130 tmp = bh;
131 do {
132 if (buffer_touched(tmp)) {
133 clear_bit(BH_Touched, &tmp->b_state);
134 page->referenced = 1;
135 }
136 tmp = tmp->b_this_page;
137 } while (tmp != bh);
138 }
139
140
141
142
143
144
145 if (page->count > 1)
146 page->referenced = 1;
147 else if (page->referenced)
148 page->referenced = 0;
149 else if (page->count) {
150
151
152 if (page->inode) {
153 remove_page_from_hash_queue(page);
154 remove_page_from_inode_queue(page);
155 free_page(page_address(page));
156 return 1;
157 }
158 if (bh && try_to_free_buffer(bh, &bh, 6))
159 return 1;
160 }
161 next:
162 page++;
163 clock++;
164 if (clock >= limit) {
165 clock = 0;
166 page = mem_map;
167 }
168 }
169 return 0;
170 }
171
172
173
174
175
176
177
178 unsigned long page_unuse(unsigned long page)
179 {
180 struct page * p = mem_map + MAP_NR(page);
181 int count = p->count;
182
183 if (count != 2)
184 return count;
185 if (!p->inode)
186 return count;
187 remove_page_from_hash_queue(p);
188 remove_page_from_inode_queue(p);
189 free_page(page);
190 return 1;
191 }
192
193
194
195
196
197 void update_vm_cache(struct inode * inode, unsigned long pos, const char * buf, int count)
198 {
199 unsigned long offset, len;
200
201 offset = (pos & ~PAGE_MASK);
202 pos = pos & PAGE_MASK;
203 len = PAGE_SIZE - offset;
204 do {
205 struct page * page;
206
207 if (len > count)
208 len = count;
209 page = find_page(inode, pos);
210 if (page) {
211 unsigned long addr;
212
213 wait_on_page(page);
214 addr = page_address(page);
215 memcpy((void *) (offset + addr), buf, len);
216 free_page(addr);
217 }
218 count -= len;
219 buf += len;
220 len = PAGE_SIZE;
221 offset = 0;
222 pos += PAGE_SIZE;
223 } while (count);
224 }
225
226
227
228
229
230
231 static unsigned long try_to_read_ahead(struct inode * inode, unsigned long offset, unsigned long page_cache)
232 {
233 struct page * page;
234
235 offset &= PAGE_MASK;
236 if (!page_cache) {
237 page_cache = __get_free_page(GFP_KERNEL);
238 if (!page_cache)
239 return 0;
240 }
241 if (offset >= inode->i_size)
242 return page_cache;
243 #if 1
244 page = find_page(inode, offset);
245 if (page) {
246 page->count--;
247 return page_cache;
248 }
249
250
251
252 page = mem_map + MAP_NR(page_cache);
253 page->count++;
254 page->uptodate = 0;
255 page->error = 0;
256 page->offset = offset;
257 add_page_to_inode_queue(inode, page);
258 add_page_to_hash_queue(inode, page);
259
260 inode->i_op->readpage(inode, page);
261
262 free_page(page_cache);
263 return 0;
264 #else
265 return page_cache;
266 #endif
267 }
268
269
270
271
272 void __wait_on_page(struct page *page)
273 {
274 struct wait_queue wait = { current, NULL };
275
276 page->count++;
277 add_wait_queue(&page->wait, &wait);
278 repeat:
279 current->state = TASK_UNINTERRUPTIBLE;
280 if (page->locked) {
281 schedule();
282 goto repeat;
283 }
284 remove_wait_queue(&page->wait, &wait);
285 page->count--;
286 current->state = TASK_RUNNING;
287 }
288
289
290
291
292
293
294
295
296
297
298 #define MAX_READAHEAD (PAGE_SIZE*8)
299 int generic_file_read(struct inode * inode, struct file * filp, char * buf, int count)
300 {
301 int error, read;
302 unsigned long pos, page_cache;
303
304 if (count <= 0)
305 return 0;
306 error = 0;
307 read = 0;
308 page_cache = 0;
309
310 pos = filp->f_pos;
311 for (;;) {
312 struct page *page;
313 unsigned long offset, addr, nr;
314
315 if (pos >= inode->i_size)
316 break;
317 offset = pos & ~PAGE_MASK;
318 nr = PAGE_SIZE - offset;
319
320
321
322 page = find_page(inode, pos & PAGE_MASK);
323 if (page)
324 goto found_page;
325
326
327
328
329
330 if (page_cache)
331 goto new_page;
332
333 error = -ENOMEM;
334 page_cache = __get_free_page(GFP_KERNEL);
335 if (!page_cache)
336 break;
337 error = 0;
338
339
340
341
342 if (pos >= inode->i_size)
343 break;
344 page = find_page(inode, pos & PAGE_MASK);
345 if (!page)
346 goto new_page;
347
348 found_page:
349 addr = page_address(page);
350 if (nr > count)
351 nr = count;
352
353
354
355
356
357
358
359
360 if (page->locked) {
361 unsigned long max_ahead, ahead;
362
363 max_ahead = count - nr;
364 if (filp->f_reada || max_ahead > MAX_READAHEAD)
365 max_ahead = MAX_READAHEAD;
366 ahead = 0;
367 while (ahead < max_ahead) {
368 ahead += PAGE_SIZE;
369 page_cache = try_to_read_ahead(inode, pos + ahead, page_cache);
370 }
371 __wait_on_page(page);
372 }
373 if (!page->uptodate)
374 goto read_page;
375 if (nr > inode->i_size - pos)
376 nr = inode->i_size - pos;
377 memcpy_tofs(buf, (void *) (addr + offset), nr);
378 free_page(addr);
379 buf += nr;
380 pos += nr;
381 read += nr;
382 count -= nr;
383 if (count)
384 continue;
385 break;
386
387
388 new_page:
389
390
391
392 addr = page_cache;
393 page = mem_map + MAP_NR(page_cache);
394 page_cache = 0;
395 page->count++;
396 page->uptodate = 0;
397 page->error = 0;
398 page->offset = pos & PAGE_MASK;
399 add_page_to_inode_queue(inode, page);
400 add_page_to_hash_queue(inode, page);
401
402
403
404
405
406
407
408
409
410 read_page:
411 error = inode->i_op->readpage(inode, page);
412 if (!error)
413 goto found_page;
414 free_page(addr);
415 break;
416 }
417
418 filp->f_pos = pos;
419 filp->f_reada = 1;
420 if (page_cache)
421 free_page(page_cache);
422 if (!IS_RDONLY(inode)) {
423 inode->i_atime = CURRENT_TIME;
424 inode->i_dirt = 1;
425 }
426 if (!read)
427 read = error;
428 return read;
429 }
430
431
432
433
434
435 static inline unsigned long fill_page(struct inode * inode, unsigned long offset)
436 {
437 struct page * page;
438 unsigned long new_page;
439
440 page = find_page(inode, offset);
441 if (page)
442 goto found_page;
443 new_page = __get_free_page(GFP_KERNEL);
444 page = find_page(inode, offset);
445 if (page) {
446 if (new_page)
447 free_page(new_page);
448 goto found_page;
449 }
450 if (!new_page)
451 return 0;
452 page = mem_map + MAP_NR(new_page);
453 new_page = 0;
454 page->count++;
455 page->uptodate = 0;
456 page->error = 0;
457 page->offset = offset;
458 add_page_to_inode_queue(inode, page);
459 add_page_to_hash_queue(inode, page);
460 inode->i_op->readpage(inode, page);
461 found_page:
462 wait_on_page(page);
463 return page_address(page);
464 }
465
466
467
468
469
470
471 static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long address, int no_share)
472 {
473 unsigned long offset;
474 struct inode * inode = area->vm_inode;
475 unsigned long page;
476
477 offset = (address & PAGE_MASK) - area->vm_start + area->vm_offset;
478 if (offset >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm)
479 return 0;
480
481 page = fill_page(inode, offset);
482 if (page && no_share) {
483 unsigned long new_page = __get_free_page(GFP_KERNEL);
484 if (new_page)
485 memcpy((void *) new_page, (void *) page, PAGE_SIZE);
486 free_page(page);
487 return new_page;
488 }
489 return page;
490 }
491
492
493
494
495
496 static inline int do_write_page(struct inode * inode, struct file * file,
497 const char * page, unsigned long offset)
498 {
499 int old_fs, retval;
500 unsigned long size;
501
502 size = offset + PAGE_SIZE;
503
504 if (S_ISREG(inode->i_mode)) {
505 if (size > inode->i_size)
506 size = inode->i_size;
507
508 if (size < offset)
509 return -EIO;
510 }
511 size -= offset;
512 old_fs = get_fs();
513 set_fs(KERNEL_DS);
514 retval = -EIO;
515 if (size == file->f_op->write(inode, file, (const char *) page, size))
516 retval = 0;
517 set_fs(old_fs);
518 return retval;
519 }
520
521 static int filemap_write_page(struct vm_area_struct * vma,
522 unsigned long offset,
523 unsigned long page)
524 {
525 int result;
526 struct file file;
527 struct inode * inode;
528 struct buffer_head * bh;
529
530 bh = mem_map[MAP_NR(page)].buffers;
531 if (bh) {
532
533 struct buffer_head * tmp = bh;
534 do {
535 mark_buffer_dirty(tmp, 0);
536 tmp = tmp->b_this_page;
537 } while (tmp != bh);
538 return 0;
539 }
540
541 inode = vma->vm_inode;
542 file.f_op = inode->i_op->default_file_ops;
543 if (!file.f_op->write)
544 return -EIO;
545 file.f_mode = 3;
546 file.f_flags = 0;
547 file.f_count = 1;
548 file.f_inode = inode;
549 file.f_pos = offset;
550 file.f_reada = 0;
551
552 down(&inode->i_sem);
553 result = do_write_page(inode, &file, (const char *) page, offset);
554 up(&inode->i_sem);
555 return result;
556 }
557
558
559
560
561
562
563
564
565
566
567
568
569 int filemap_swapout(struct vm_area_struct * vma,
570 unsigned long offset,
571 pte_t *page_table)
572 {
573 int error;
574 unsigned long page = pte_page(*page_table);
575 unsigned long entry = SWP_ENTRY(SHM_SWP_TYPE, MAP_NR(page));
576
577 set_pte(page_table, __pte(entry));
578
579 invalidate_page(vma, (offset + vma->vm_start - vma->vm_offset));
580 error = filemap_write_page(vma, offset, page);
581 if (pte_val(*page_table) == entry)
582 pte_clear(page_table);
583 return error;
584 }
585
586
587
588
589
590
591
592 static pte_t filemap_swapin(struct vm_area_struct * vma,
593 unsigned long offset,
594 unsigned long entry)
595 {
596 unsigned long page = SWP_OFFSET(entry);
597
598 mem_map[page].count++;
599 page = (page << PAGE_SHIFT) + PAGE_OFFSET;
600 return mk_pte(page,vma->vm_page_prot);
601 }
602
603
604 static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
605 unsigned long address, unsigned int flags)
606 {
607 pte_t pte = *ptep;
608 unsigned long page;
609 int error;
610
611 if (!(flags & MS_INVALIDATE)) {
612 if (!pte_present(pte))
613 return 0;
614 if (!pte_dirty(pte))
615 return 0;
616 set_pte(ptep, pte_mkclean(pte));
617 invalidate_page(vma, address);
618 page = pte_page(pte);
619 mem_map[MAP_NR(page)].count++;
620 } else {
621 if (pte_none(pte))
622 return 0;
623 pte_clear(ptep);
624 invalidate_page(vma, address);
625 if (!pte_present(pte)) {
626 swap_free(pte_val(pte));
627 return 0;
628 }
629 page = pte_page(pte);
630 if (!pte_dirty(pte) || flags == MS_INVALIDATE) {
631 free_page(page);
632 return 0;
633 }
634 }
635 error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page);
636 free_page(page);
637 return error;
638 }
639
640 static inline int filemap_sync_pte_range(pmd_t * pmd,
641 unsigned long address, unsigned long size,
642 struct vm_area_struct *vma, unsigned long offset, unsigned int flags)
643 {
644 pte_t * pte;
645 unsigned long end;
646 int error;
647
648 if (pmd_none(*pmd))
649 return 0;
650 if (pmd_bad(*pmd)) {
651 printk("filemap_sync_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
652 pmd_clear(pmd);
653 return 0;
654 }
655 pte = pte_offset(pmd, address);
656 offset += address & PMD_MASK;
657 address &= ~PMD_MASK;
658 end = address + size;
659 if (end > PMD_SIZE)
660 end = PMD_SIZE;
661 error = 0;
662 do {
663 error |= filemap_sync_pte(pte, vma, address + offset, flags);
664 address += PAGE_SIZE;
665 pte++;
666 } while (address < end);
667 return error;
668 }
669
670 static inline int filemap_sync_pmd_range(pgd_t * pgd,
671 unsigned long address, unsigned long size,
672 struct vm_area_struct *vma, unsigned int flags)
673 {
674 pmd_t * pmd;
675 unsigned long offset, end;
676 int error;
677
678 if (pgd_none(*pgd))
679 return 0;
680 if (pgd_bad(*pgd)) {
681 printk("filemap_sync_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd));
682 pgd_clear(pgd);
683 return 0;
684 }
685 pmd = pmd_offset(pgd, address);
686 offset = address & PMD_MASK;
687 address &= ~PMD_MASK;
688 end = address + size;
689 if (end > PGDIR_SIZE)
690 end = PGDIR_SIZE;
691 error = 0;
692 do {
693 error |= filemap_sync_pte_range(pmd, address, end - address, vma, offset, flags);
694 address = (address + PMD_SIZE) & PMD_MASK;
695 pmd++;
696 } while (address < end);
697 return error;
698 }
699
700 static int filemap_sync(struct vm_area_struct * vma, unsigned long address,
701 size_t size, unsigned int flags)
702 {
703 pgd_t * dir;
704 unsigned long end = address + size;
705 int error = 0;
706
707 dir = pgd_offset(current->mm, address);
708 while (address < end) {
709 error |= filemap_sync_pmd_range(dir, address, end - address, vma, flags);
710 address = (address + PGDIR_SIZE) & PGDIR_MASK;
711 dir++;
712 }
713 invalidate_range(vma->vm_mm, end - size, end);
714 return error;
715 }
716
717
718
719
720 static void filemap_unmap(struct vm_area_struct *vma, unsigned long start, size_t len)
721 {
722 filemap_sync(vma, start, len, MS_ASYNC);
723 }
724
725
726
727
728
729
730 static struct vm_operations_struct file_shared_mmap = {
731 NULL,
732 NULL,
733 filemap_unmap,
734 NULL,
735 filemap_sync,
736 NULL,
737 filemap_nopage,
738 NULL,
739 filemap_swapout,
740 filemap_swapin,
741 };
742
743
744
745
746
747
748
749 static struct vm_operations_struct file_private_mmap = {
750 NULL,
751 NULL,
752 NULL,
753 NULL,
754 NULL,
755 NULL,
756 filemap_nopage,
757 NULL,
758 NULL,
759 NULL,
760 };
761
762
763 int generic_file_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma)
764 {
765 struct vm_operations_struct * ops;
766
767 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
768 ops = &file_shared_mmap;
769
770
771 if (vma->vm_offset & (PAGE_SIZE - 1))
772 return -EINVAL;
773 } else {
774 ops = &file_private_mmap;
775 if (vma->vm_offset & (inode->i_sb->s_blocksize - 1))
776 return -EINVAL;
777 }
778 if (!inode->i_sb || !S_ISREG(inode->i_mode))
779 return -EACCES;
780 if (!inode->i_op || !inode->i_op->readpage)
781 return -ENOEXEC;
782 if (!IS_RDONLY(inode)) {
783 inode->i_atime = CURRENT_TIME;
784 inode->i_dirt = 1;
785 }
786 vma->vm_inode = inode;
787 inode->i_count++;
788 vma->vm_ops = ops;
789 return 0;
790 }
791
792
793
794
795
796
797 static int msync_interval(struct vm_area_struct * vma,
798 unsigned long start, unsigned long end, int flags)
799 {
800 if (!vma->vm_inode)
801 return 0;
802 if (vma->vm_ops->sync) {
803 int error;
804 error = vma->vm_ops->sync(vma, start, end-start, flags);
805 if (error)
806 return error;
807 if (flags & MS_SYNC)
808 return file_fsync(vma->vm_inode, NULL);
809 return 0;
810 }
811 return 0;
812 }
813
814 asmlinkage int sys_msync(unsigned long start, size_t len, int flags)
815 {
816 unsigned long end;
817 struct vm_area_struct * vma;
818 int unmapped_error, error;
819
820 if (start & ~PAGE_MASK)
821 return -EINVAL;
822 len = (len + ~PAGE_MASK) & PAGE_MASK;
823 end = start + len;
824 if (end < start)
825 return -EINVAL;
826 if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
827 return -EINVAL;
828 if (end == start)
829 return 0;
830
831
832
833
834 vma = find_vma(current, start);
835 unmapped_error = 0;
836 for (;;) {
837
838 if (!vma)
839 return -EFAULT;
840
841 if (start < vma->vm_start) {
842 unmapped_error = -EFAULT;
843 start = vma->vm_start;
844 }
845
846 if (end <= vma->vm_end) {
847 if (start < end) {
848 error = msync_interval(vma, start, end, flags);
849 if (error)
850 return error;
851 }
852 return unmapped_error;
853 }
854
855 error = msync_interval(vma, start, vma->vm_end, flags);
856 if (error)
857 return error;
858 start = vma->vm_end;
859 vma = vma->vm_next;
860 }
861 }