This source file includes following definitions.
- invalidate_inode_pages
- shrink_mmap
- page_unuse
- update_vm_cache
- try_to_read_ahead
- __wait_on_page
- generic_file_read
- fill_page
- filemap_nopage
- filemap_write_page
- filemap_swapout
- filemap_swapin
- filemap_sync_pte
- filemap_sync_pte_range
- filemap_sync_pmd_range
- filemap_sync
- filemap_unmap
- generic_file_mmap
- msync_interval
- sys_msync
1
2
3
4
5
6
7
8
9
10
11
12 #include <linux/stat.h>
13 #include <linux/sched.h>
14 #include <linux/kernel.h>
15 #include <linux/mm.h>
16 #include <linux/shm.h>
17 #include <linux/errno.h>
18 #include <linux/mman.h>
19 #include <linux/string.h>
20 #include <linux/malloc.h>
21 #include <linux/fs.h>
22 #include <linux/locks.h>
23 #include <linux/pagemap.h>
24 #include <linux/swap.h>
25
26 #include <asm/segment.h>
27 #include <asm/system.h>
28 #include <asm/pgtable.h>
29
30
31
32
33
34
35
36
37 unsigned long page_cache_size = 0;
38 struct page * page_hash_table[PAGE_HASH_SIZE];
39
40
41
42
43
44 void invalidate_inode_pages(struct inode * inode, unsigned long start)
45 {
46 struct page ** p;
47 struct page * page;
48
49 repeat:
50 p = &inode->i_pages;
51 while ((page = *p) != NULL) {
52 unsigned long offset = page->offset;
53
54
55 if (offset >= start) {
56 if (page->locked) {
57 wait_on_page(page);
58 goto repeat;
59 }
60 inode->i_nrpages--;
61 if ((*p = page->next) != NULL)
62 (*p)->prev = page->prev;
63 page->dirty = 0;
64 page->next = NULL;
65 page->prev = NULL;
66 remove_page_from_hash_queue(page);
67 page->inode = NULL;
68 free_page(page_address(page));
69 continue;
70 }
71 p = &page->next;
72 offset = start - offset;
73
74 if (offset < PAGE_SIZE)
75 memset((void *) (offset + page_address(page)), 0, PAGE_SIZE - offset);
76 }
77 }
78
79 int shrink_mmap(int priority, unsigned long limit)
80 {
81 static int clock = 0;
82 struct page * page;
83 struct buffer_head *tmp, *bh;
84
85 if (limit > high_memory)
86 limit = high_memory;
87 limit = MAP_NR(limit);
88 if (clock >= limit)
89 clock = 0;
90 priority = (limit<<2) >> priority;
91 page = mem_map + clock;
92 while (priority-- > 0) {
93 if (page->locked)
94 goto next;
95
96
97 bh = buffer_pages[MAP_NR(page_address(page))];
98 if (bh) {
99 tmp = bh;
100 do {
101 if (buffer_touched(tmp)) {
102 clear_bit(BH_Touched, &tmp->b_state);
103 page->referenced = 1;
104 }
105 tmp = tmp->b_this_page;
106 } while (tmp != bh);
107 }
108
109
110
111
112
113
114 if (page->count > 1)
115 page->referenced = 1;
116 else if (page->referenced)
117 page->referenced = 0;
118 else if (page->count) {
119
120
121 if (page->inode) {
122 remove_page_from_hash_queue(page);
123 remove_page_from_inode_queue(page);
124 free_page(page_address(page));
125 return 1;
126 }
127 if (bh && try_to_free_buffer(bh, &bh, 6))
128 return 1;
129 }
130 next:
131 page++;
132 clock++;
133 if (clock >= limit) {
134 clock = 0;
135 page = mem_map;
136 }
137 }
138 return 0;
139 }
140
141
142
143
144
145
146
147 unsigned long page_unuse(unsigned long page)
148 {
149 struct page * p = mem_map + MAP_NR(page);
150 int count = p->count;
151
152 if (count != 2)
153 return count;
154 if (!p->inode)
155 return count;
156 remove_page_from_hash_queue(p);
157 remove_page_from_inode_queue(p);
158 free_page(page);
159 return 1;
160 }
161
162
163
164
165
166 void update_vm_cache(struct inode * inode, unsigned long pos, const char * buf, int count)
167 {
168 unsigned long offset, len;
169
170 offset = (pos & ~PAGE_MASK);
171 pos = pos & PAGE_MASK;
172 len = PAGE_SIZE - offset;
173 do {
174 struct page * page;
175
176 if (len > count)
177 len = count;
178 page = find_page(inode, pos);
179 if (page) {
180 unsigned long addr;
181
182 wait_on_page(page);
183 addr = page_address(page);
184 memcpy((void *) (offset + addr), buf, len);
185 free_page(addr);
186 }
187 count -= len;
188 buf += len;
189 len = PAGE_SIZE;
190 offset = 0;
191 pos += PAGE_SIZE;
192 } while (count);
193 }
194
195
196
197
198
199
200 static unsigned long try_to_read_ahead(struct inode * inode, unsigned long offset, unsigned long page_cache)
201 {
202 struct page * page;
203
204 offset &= PAGE_MASK;
205 if (!page_cache) {
206 page_cache = __get_free_page(GFP_KERNEL);
207 if (!page_cache)
208 return 0;
209 }
210 if (offset >= inode->i_size)
211 return page_cache;
212 #if 1
213 page = find_page(inode, offset);
214 if (page) {
215 page->count--;
216 return page_cache;
217 }
218
219
220
221 page = mem_map + MAP_NR(page_cache);
222 page->count++;
223 page->uptodate = 0;
224 page->error = 0;
225 page->offset = offset;
226 add_page_to_inode_queue(inode, page);
227 add_page_to_hash_queue(inode, page);
228
229 inode->i_op->readpage(inode, page);
230
231 free_page(page_cache);
232 return 0;
233 #else
234 return page_cache;
235 #endif
236 }
237
238
239
240
241 void __wait_on_page(struct page *page)
242 {
243 struct wait_queue wait = { current, NULL };
244
245 page->count++;
246 add_wait_queue(&page->wait, &wait);
247 repeat:
248 current->state = TASK_UNINTERRUPTIBLE;
249 if (page->locked) {
250 schedule();
251 goto repeat;
252 }
253 remove_wait_queue(&page->wait, &wait);
254 page->count--;
255 current->state = TASK_RUNNING;
256 }
257
258
259
260
261
262
263
264 #define READAHEAD_PAGES 3
265 #define MAX_IO_PAGES 4
266 int generic_file_read(struct inode * inode, struct file * filp, char * buf, int count)
267 {
268 int read = 0, newpage = 0;
269 unsigned long pos;
270 unsigned long page_cache = 0;
271 int pre_read = 0;
272
273 if (count <= 0)
274 return 0;
275
276 pos = filp->f_pos;
277 do {
278 struct page *page;
279 unsigned long offset, addr, nr;
280 int i;
281 off_t p;
282
283 if (pos >= inode->i_size)
284 break;
285 offset = pos & ~PAGE_MASK;
286 nr = PAGE_SIZE - offset;
287
288
289
290 page = find_page(inode, pos & PAGE_MASK);
291 if (page)
292 goto found_page;
293
294
295
296
297
298 if (!page_cache) {
299 page_cache = __get_free_page(GFP_KERNEL);
300 if (!page_cache) {
301 if (!read)
302 read = -ENOMEM;
303 break;
304 }
305 }
306
307
308
309
310 if (pos >= inode->i_size)
311 break;
312 page = find_page(inode, pos & PAGE_MASK);
313 if (page)
314 goto found_page;
315
316
317
318
319 page = mem_map + MAP_NR(page_cache);
320 page_cache = 0;
321 page->count++;
322 page->uptodate = 0;
323 page->error = 0;
324 page->offset = pos & PAGE_MASK;
325 add_page_to_inode_queue(inode, page);
326 add_page_to_hash_queue(inode, page);
327
328 inode->i_op->readpage(inode, page);
329
330
331
332
333
334
335 newpage = 1;
336
337 found_page:
338 addr = page_address(page);
339 if (nr > count)
340 nr = count;
341
342
343
344
345 if (newpage && nr < count) {
346 if (pre_read > 0)
347 pre_read -= PAGE_SIZE;
348 else {
349 pre_read = (MAX_IO_PAGES-1) * PAGE_SIZE;
350 if (pre_read > (count - nr))
351 pre_read = count - nr;
352 for (i=0, p=pos; i<pre_read; i+=PAGE_SIZE) {
353 p += PAGE_SIZE;
354 page_cache = try_to_read_ahead(inode, p, page_cache);
355 }
356 }
357 }
358 else
359
360
361
362
363 if (newpage && nr == count && filp->f_reada
364 && !((pos + nr) & ~PAGE_MASK)) {
365 for (i=0, p=pos; i<READAHEAD_PAGES; i++) {
366 p += PAGE_SIZE;
367 page_cache = try_to_read_ahead(inode, p, page_cache);
368 }
369 }
370 wait_on_page(page);
371 if (nr > inode->i_size - pos)
372 nr = inode->i_size - pos;
373 memcpy_tofs(buf, (void *) (addr + offset), nr);
374 free_page(addr);
375 buf += nr;
376 pos += nr;
377 read += nr;
378 count -= nr;
379 } while (count);
380
381 filp->f_pos = pos;
382 filp->f_reada = 1;
383 if (page_cache)
384 free_page(page_cache);
385 if (!IS_RDONLY(inode)) {
386 inode->i_atime = CURRENT_TIME;
387 inode->i_dirt = 1;
388 }
389 return read;
390 }
391
392
393
394
395
396 static inline unsigned long fill_page(struct inode * inode, unsigned long offset)
397 {
398 struct page * page;
399 unsigned long new_page;
400
401 page = find_page(inode, offset);
402 if (page)
403 goto found_page;
404 new_page = __get_free_page(GFP_KERNEL);
405 page = find_page(inode, offset);
406 if (page) {
407 if (new_page)
408 free_page(new_page);
409 goto found_page;
410 }
411 if (!new_page)
412 return 0;
413 page = mem_map + MAP_NR(new_page);
414 new_page = 0;
415 page->count++;
416 page->uptodate = 0;
417 page->error = 0;
418 page->offset = offset;
419 add_page_to_inode_queue(inode, page);
420 add_page_to_hash_queue(inode, page);
421 inode->i_op->readpage(inode, page);
422 found_page:
423 wait_on_page(page);
424 return page_address(page);
425 }
426
427
428
429
430
431
432 static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long address, int no_share)
433 {
434 unsigned long offset;
435 struct inode * inode = area->vm_inode;
436 unsigned long page;
437
438 offset = (address & PAGE_MASK) - area->vm_start + area->vm_offset;
439 if (offset >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm)
440 return 0;
441
442 page = fill_page(inode, offset);
443 if (page && no_share) {
444 unsigned long new_page = __get_free_page(GFP_KERNEL);
445 if (new_page)
446 memcpy((void *) new_page, (void *) page, PAGE_SIZE);
447 free_page(page);
448 return new_page;
449 }
450 return page;
451 }
452
453
454
455
456
457 static int filemap_write_page(struct vm_area_struct * vma,
458 unsigned long offset,
459 unsigned long page)
460 {
461 int old_fs;
462 unsigned long size, result;
463 struct file file;
464 struct inode * inode;
465 struct buffer_head * bh;
466
467 bh = buffer_pages[MAP_NR(page)];
468 if (bh) {
469
470 struct buffer_head * tmp = bh;
471 do {
472 mark_buffer_dirty(tmp, 0);
473 tmp = tmp->b_this_page;
474 } while (tmp != bh);
475 return 0;
476 }
477
478 inode = vma->vm_inode;
479 file.f_op = inode->i_op->default_file_ops;
480 if (!file.f_op->write)
481 return -EIO;
482 size = offset + PAGE_SIZE;
483
484 if (S_ISREG(inode->i_mode)) {
485 if (size > inode->i_size)
486 size = inode->i_size;
487
488 if (size < offset)
489 return -EIO;
490 }
491 size -= offset;
492 file.f_mode = 3;
493 file.f_flags = 0;
494 file.f_count = 1;
495 file.f_inode = inode;
496 file.f_pos = offset;
497 file.f_reada = 0;
498 old_fs = get_fs();
499 set_fs(KERNEL_DS);
500 result = file.f_op->write(inode, &file, (const char *) page, size);
501 set_fs(old_fs);
502 if (result != size)
503 return -EIO;
504 return 0;
505 }
506
507
508
509
510
511
512
513
514
515
516
517
518 int filemap_swapout(struct vm_area_struct * vma,
519 unsigned long offset,
520 pte_t *page_table)
521 {
522 int error;
523 unsigned long page = pte_page(*page_table);
524 unsigned long entry = SWP_ENTRY(SHM_SWP_TYPE, MAP_NR(page));
525
526 set_pte(page_table, __pte(entry));
527
528 invalidate_page(vma, (offset + vma->vm_start - vma->vm_offset));
529 error = filemap_write_page(vma, offset, page);
530 if (pte_val(*page_table) == entry)
531 pte_clear(page_table);
532 return error;
533 }
534
535
536
537
538
539
540
541 static pte_t filemap_swapin(struct vm_area_struct * vma,
542 unsigned long offset,
543 unsigned long entry)
544 {
545 unsigned long page = SWP_OFFSET(entry);
546
547 mem_map[page].count++;
548 page = (page << PAGE_SHIFT) + PAGE_OFFSET;
549 return mk_pte(page,vma->vm_page_prot);
550 }
551
552
553 static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
554 unsigned long address, unsigned int flags)
555 {
556 pte_t pte = *ptep;
557 unsigned long page;
558 int error;
559
560 if (!(flags & MS_INVALIDATE)) {
561 if (!pte_present(pte))
562 return 0;
563 if (!pte_dirty(pte))
564 return 0;
565 set_pte(ptep, pte_mkclean(pte));
566 invalidate_page(vma, address);
567 page = pte_page(pte);
568 mem_map[MAP_NR(page)].count++;
569 } else {
570 if (pte_none(pte))
571 return 0;
572 pte_clear(ptep);
573 invalidate_page(vma, address);
574 if (!pte_present(pte)) {
575 swap_free(pte_val(pte));
576 return 0;
577 }
578 page = pte_page(pte);
579 if (!pte_dirty(pte) || flags == MS_INVALIDATE) {
580 free_page(page);
581 return 0;
582 }
583 }
584 error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page);
585 free_page(page);
586 return error;
587 }
588
589 static inline int filemap_sync_pte_range(pmd_t * pmd,
590 unsigned long address, unsigned long size,
591 struct vm_area_struct *vma, unsigned long offset, unsigned int flags)
592 {
593 pte_t * pte;
594 unsigned long end;
595 int error;
596
597 if (pmd_none(*pmd))
598 return 0;
599 if (pmd_bad(*pmd)) {
600 printk("filemap_sync_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
601 pmd_clear(pmd);
602 return 0;
603 }
604 pte = pte_offset(pmd, address);
605 offset += address & PMD_MASK;
606 address &= ~PMD_MASK;
607 end = address + size;
608 if (end > PMD_SIZE)
609 end = PMD_SIZE;
610 error = 0;
611 do {
612 error |= filemap_sync_pte(pte, vma, address + offset, flags);
613 address += PAGE_SIZE;
614 pte++;
615 } while (address < end);
616 return error;
617 }
618
619 static inline int filemap_sync_pmd_range(pgd_t * pgd,
620 unsigned long address, unsigned long size,
621 struct vm_area_struct *vma, unsigned int flags)
622 {
623 pmd_t * pmd;
624 unsigned long offset, end;
625 int error;
626
627 if (pgd_none(*pgd))
628 return 0;
629 if (pgd_bad(*pgd)) {
630 printk("filemap_sync_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd));
631 pgd_clear(pgd);
632 return 0;
633 }
634 pmd = pmd_offset(pgd, address);
635 offset = address & PMD_MASK;
636 address &= ~PMD_MASK;
637 end = address + size;
638 if (end > PGDIR_SIZE)
639 end = PGDIR_SIZE;
640 error = 0;
641 do {
642 error |= filemap_sync_pte_range(pmd, address, end - address, vma, offset, flags);
643 address = (address + PMD_SIZE) & PMD_MASK;
644 pmd++;
645 } while (address < end);
646 return error;
647 }
648
649 static int filemap_sync(struct vm_area_struct * vma, unsigned long address,
650 size_t size, unsigned int flags)
651 {
652 pgd_t * dir;
653 unsigned long end = address + size;
654 int error = 0;
655
656 dir = pgd_offset(current->mm, address);
657 while (address < end) {
658 error |= filemap_sync_pmd_range(dir, address, end - address, vma, flags);
659 address = (address + PGDIR_SIZE) & PGDIR_MASK;
660 dir++;
661 }
662 invalidate_range(vma->vm_mm, end - size, end);
663 return error;
664 }
665
666
667
668
669 static void filemap_unmap(struct vm_area_struct *vma, unsigned long start, size_t len)
670 {
671 filemap_sync(vma, start, len, MS_ASYNC);
672 }
673
674
675
676
677
678
679 static struct vm_operations_struct file_shared_mmap = {
680 NULL,
681 NULL,
682 filemap_unmap,
683 NULL,
684 filemap_sync,
685 NULL,
686 filemap_nopage,
687 NULL,
688 filemap_swapout,
689 filemap_swapin,
690 };
691
692
693
694
695
696
697
698 static struct vm_operations_struct file_private_mmap = {
699 NULL,
700 NULL,
701 NULL,
702 NULL,
703 NULL,
704 NULL,
705 filemap_nopage,
706 NULL,
707 NULL,
708 NULL,
709 };
710
711
712 int generic_file_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma)
713 {
714 struct vm_operations_struct * ops;
715
716 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
717 ops = &file_shared_mmap;
718
719
720 if (vma->vm_offset & (PAGE_SIZE - 1))
721 return -EINVAL;
722 } else {
723 ops = &file_private_mmap;
724 if (vma->vm_offset & (inode->i_sb->s_blocksize - 1))
725 return -EINVAL;
726 }
727 if (!inode->i_sb || !S_ISREG(inode->i_mode))
728 return -EACCES;
729 if (!inode->i_op || !inode->i_op->readpage)
730 return -ENOEXEC;
731 if (!IS_RDONLY(inode)) {
732 inode->i_atime = CURRENT_TIME;
733 inode->i_dirt = 1;
734 }
735 vma->vm_inode = inode;
736 inode->i_count++;
737 vma->vm_ops = ops;
738 return 0;
739 }
740
741
742
743
744
745
746 static int msync_interval(struct vm_area_struct * vma,
747 unsigned long start, unsigned long end, int flags)
748 {
749 if (!vma->vm_inode)
750 return 0;
751 if (vma->vm_ops->sync) {
752 int error;
753 error = vma->vm_ops->sync(vma, start, end-start, flags);
754 if (error)
755 return error;
756 if (flags & MS_SYNC)
757 return file_fsync(vma->vm_inode, NULL);
758 return 0;
759 }
760 return 0;
761 }
762
763 asmlinkage int sys_msync(unsigned long start, size_t len, int flags)
764 {
765 unsigned long end;
766 struct vm_area_struct * vma;
767 int unmapped_error, error;
768
769 if (start & ~PAGE_MASK)
770 return -EINVAL;
771 len = (len + ~PAGE_MASK) & PAGE_MASK;
772 end = start + len;
773 if (end < start)
774 return -EINVAL;
775 if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
776 return -EINVAL;
777 if (end == start)
778 return 0;
779
780
781
782
783 vma = find_vma(current, start);
784 unmapped_error = 0;
785 for (;;) {
786
787 if (!vma)
788 return -EFAULT;
789
790 if (start < vma->vm_start) {
791 unmapped_error = -EFAULT;
792 start = vma->vm_start;
793 }
794
795 if (end <= vma->vm_end) {
796 if (start < end) {
797 error = msync_interval(vma, start, end, flags);
798 if (error)
799 return error;
800 }
801 return unmapped_error;
802 }
803
804 error = msync_interval(vma, start, vma->vm_end, flags);
805 if (error)
806 return error;
807 start = vma->vm_end;
808 vma = vma->vm_next;
809 }
810 }