This source file includes following definitions.
- invalidate_inode_pages
- shrink_mmap
- page_unuse
- update_vm_cache
- try_to_read_ahead
- __wait_on_page
- generic_file_read
- fill_page
- filemap_nopage
- filemap_write_page
- filemap_swapout
- filemap_swapin
- filemap_sync_pte
- filemap_sync_pte_range
- filemap_sync_pmd_range
- filemap_sync
- filemap_unmap
- generic_mmap
- msync_interval
- sys_msync
1
2
3
4
5
6
7
8
9
10
11
12 #include <linux/stat.h>
13 #include <linux/sched.h>
14 #include <linux/kernel.h>
15 #include <linux/mm.h>
16 #include <linux/shm.h>
17 #include <linux/errno.h>
18 #include <linux/mman.h>
19 #include <linux/string.h>
20 #include <linux/malloc.h>
21 #include <linux/fs.h>
22 #include <linux/locks.h>
23 #include <linux/pagemap.h>
24 #include <linux/swap.h>
25
26 #include <asm/segment.h>
27 #include <asm/system.h>
28 #include <asm/pgtable.h>
29
30
31
32
33
34
35
36
37 unsigned long page_cache_size = 0;
38 struct page * page_hash_table[PAGE_HASH_SIZE];
39
40
41
42
43
44 void invalidate_inode_pages(struct inode * inode, unsigned long start)
45 {
46 struct page ** p;
47 struct page * page;
48
49 repeat:
50 p = &inode->i_pages;
51 while ((page = *p) != NULL) {
52 unsigned long offset = page->offset;
53
54
55 if (offset >= start) {
56 if (page->locked) {
57 wait_on_page(page);
58 goto repeat;
59 }
60 inode->i_nrpages--;
61 if ((*p = page->next) != NULL)
62 (*p)->prev = page->prev;
63 page->dirty = 0;
64 page->next = NULL;
65 page->prev = NULL;
66 remove_page_from_hash_queue(page);
67 page->inode = NULL;
68 free_page(page_address(page));
69 continue;
70 }
71 p = &page->next;
72 offset = start - offset;
73
74 if (offset < PAGE_SIZE)
75 memset((void *) (offset + page_address(page)), 0, PAGE_SIZE - offset);
76 }
77 }
78
79 int shrink_mmap(int priority, unsigned long limit)
80 {
81 static int clock = 0;
82 struct page * page;
83 struct buffer_head *tmp, *bh;
84
85 if (limit > high_memory)
86 limit = high_memory;
87 limit = MAP_NR(limit);
88 if (clock >= limit)
89 clock = 0;
90 priority = (limit<<2) >> priority;
91 page = mem_map + clock;
92 while (priority-- > 0) {
93 if (page->locked)
94 goto next;
95
96
97 bh = buffer_pages[MAP_NR(page_address(page))];
98 if (bh) {
99 tmp = bh;
100 do {
101 if (buffer_touched(tmp)) {
102 clear_bit(BH_Touched, &tmp->b_state);
103 page->referenced = 1;
104 }
105 tmp = tmp->b_this_page;
106 } while (tmp != bh);
107 }
108
109
110
111
112
113
114 if (page->count > 1)
115 page->referenced = 1;
116 else if (page->referenced)
117 page->referenced = 0;
118 else if (page->count) {
119
120
121 if (page->inode) {
122 remove_page_from_hash_queue(page);
123 remove_page_from_inode_queue(page);
124 free_page(page_address(page));
125 return 1;
126 }
127 if (bh && try_to_free_buffer(bh, &bh, 6))
128 return 1;
129 }
130 next:
131 page++;
132 clock++;
133 if (clock >= limit) {
134 clock = 0;
135 page = mem_map;
136 }
137 }
138 return 0;
139 }
140
141
142
143
144
145
146
147 unsigned long page_unuse(unsigned long page)
148 {
149 struct page * p = mem_map + MAP_NR(page);
150 int count = p->count;
151
152 if (count != 2)
153 return count;
154 if (!p->inode)
155 return count;
156 remove_page_from_hash_queue(p);
157 remove_page_from_inode_queue(p);
158 free_page(page);
159 return 1;
160 }
161
162
163
164
165
166 void update_vm_cache(struct inode * inode, unsigned long pos, const char * buf, int count)
167 {
168 struct page * page;
169
170 page = find_page(inode, pos & PAGE_MASK);
171 if (page) {
172 unsigned long addr;
173
174 wait_on_page(page);
175 addr = page_address(page);
176 memcpy((void *) ((pos & ~PAGE_MASK) + addr), buf, count);
177 free_page(addr);
178 }
179 }
180
181
182
183
184
185
186 static unsigned long try_to_read_ahead(struct inode * inode, unsigned long offset, unsigned long page_cache)
187 {
188 struct page * page;
189
190 offset &= PAGE_MASK;
191 if (!page_cache) {
192 page_cache = __get_free_page(GFP_KERNEL);
193 if (!page_cache)
194 return 0;
195 }
196 if (offset >= inode->i_size)
197 return page_cache;
198 #if 1
199 page = find_page(inode, offset);
200 if (page) {
201 page->count--;
202 return page_cache;
203 }
204
205
206
207 page = mem_map + MAP_NR(page_cache);
208 page->count++;
209 page->uptodate = 0;
210 page->error = 0;
211 page->offset = offset;
212 add_page_to_inode_queue(inode, page);
213 add_page_to_hash_queue(inode, page);
214
215 inode->i_op->readpage(inode, page);
216
217 free_page(page_cache);
218 return 0;
219 #else
220 return page_cache;
221 #endif
222 }
223
224
225
226
227 void __wait_on_page(struct page *page)
228 {
229 struct wait_queue wait = { current, NULL };
230
231 page->count++;
232 add_wait_queue(&page->wait, &wait);
233 repeat:
234 current->state = TASK_UNINTERRUPTIBLE;
235 if (page->locked) {
236 schedule();
237 goto repeat;
238 }
239 remove_wait_queue(&page->wait, &wait);
240 page->count--;
241 current->state = TASK_RUNNING;
242 }
243
244
245
246
247
248
249
250 #define READAHEAD_PAGES 3
251 #define MAX_IO_PAGES 4
252 int generic_file_read(struct inode * inode, struct file * filp, char * buf, int count)
253 {
254 int read = 0, newpage = 0;
255 unsigned long pos;
256 unsigned long page_cache = 0;
257 int pre_read = 0;
258
259 if (count <= 0)
260 return 0;
261
262 pos = filp->f_pos;
263 do {
264 struct page *page;
265 unsigned long offset, addr, nr;
266 int i;
267 off_t p;
268
269 if (pos >= inode->i_size)
270 break;
271 offset = pos & ~PAGE_MASK;
272 nr = PAGE_SIZE - offset;
273
274
275
276 page = find_page(inode, pos & PAGE_MASK);
277 if (page)
278 goto found_page;
279
280
281
282
283
284 if (!page_cache) {
285 page_cache = __get_free_page(GFP_KERNEL);
286 if (!page_cache) {
287 if (!read)
288 read = -ENOMEM;
289 break;
290 }
291 }
292
293
294
295
296 if (pos >= inode->i_size)
297 break;
298 page = find_page(inode, pos & PAGE_MASK);
299 if (page)
300 goto found_page;
301
302
303
304
305 page = mem_map + MAP_NR(page_cache);
306 page_cache = 0;
307 page->count++;
308 page->uptodate = 0;
309 page->error = 0;
310 page->offset = pos & PAGE_MASK;
311 add_page_to_inode_queue(inode, page);
312 add_page_to_hash_queue(inode, page);
313
314 inode->i_op->readpage(inode, page);
315
316
317
318
319
320
321 newpage = 1;
322
323 found_page:
324 addr = page_address(page);
325 if (nr > count)
326 nr = count;
327
328
329
330
331 if (newpage && nr < count) {
332 if (pre_read > 0)
333 pre_read -= PAGE_SIZE;
334 else {
335 pre_read = (MAX_IO_PAGES-1) * PAGE_SIZE;
336 if (pre_read > (count - nr))
337 pre_read = count - nr;
338 for (i=0, p=pos; i<pre_read; i+=PAGE_SIZE) {
339 p += PAGE_SIZE;
340 page_cache = try_to_read_ahead(inode, p, page_cache);
341 }
342 }
343 }
344 else
345
346
347
348
349 if (newpage && nr == count && filp->f_reada
350 && !((pos + nr) & ~PAGE_MASK)) {
351 for (i=0, p=pos; i<READAHEAD_PAGES; i++) {
352 p += PAGE_SIZE;
353 page_cache = try_to_read_ahead(inode, p, page_cache);
354 }
355 }
356 wait_on_page(page);
357 if (nr > inode->i_size - pos)
358 nr = inode->i_size - pos;
359 memcpy_tofs(buf, (void *) (addr + offset), nr);
360 free_page(addr);
361 buf += nr;
362 pos += nr;
363 read += nr;
364 count -= nr;
365 } while (count);
366
367 filp->f_pos = pos;
368 filp->f_reada = 1;
369 if (page_cache)
370 free_page(page_cache);
371 if (!IS_RDONLY(inode)) {
372 inode->i_atime = CURRENT_TIME;
373 inode->i_dirt = 1;
374 }
375 return read;
376 }
377
378
379
380
381
382 static inline unsigned long fill_page(struct inode * inode, unsigned long offset)
383 {
384 struct page * page;
385 unsigned long new_page;
386
387 page = find_page(inode, offset);
388 if (page)
389 goto found_page;
390 new_page = __get_free_page(GFP_KERNEL);
391 page = find_page(inode, offset);
392 if (page) {
393 if (new_page)
394 free_page(new_page);
395 goto found_page;
396 }
397 if (!new_page)
398 return 0;
399 page = mem_map + MAP_NR(new_page);
400 new_page = 0;
401 page->count++;
402 page->uptodate = 0;
403 page->error = 0;
404 page->offset = offset;
405 add_page_to_inode_queue(inode, page);
406 add_page_to_hash_queue(inode, page);
407 inode->i_op->readpage(inode, page);
408 found_page:
409 wait_on_page(page);
410 return page_address(page);
411 }
412
413
414
415
416
417
418 static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long address, int no_share)
419 {
420 unsigned long offset;
421 struct inode * inode = area->vm_inode;
422 unsigned long page;
423
424 offset = (address & PAGE_MASK) - area->vm_start + area->vm_offset;
425 if (offset >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm)
426 return 0;
427
428 page = fill_page(inode, offset);
429 if (page && no_share) {
430 unsigned long new_page = __get_free_page(GFP_KERNEL);
431 if (new_page)
432 memcpy((void *) new_page, (void *) page, PAGE_SIZE);
433 free_page(page);
434 return new_page;
435 }
436 return page;
437 }
438
439
440
441
442
443 static int filemap_write_page(struct vm_area_struct * vma,
444 unsigned long offset,
445 unsigned long page)
446 {
447 int old_fs;
448 unsigned long size, result;
449 struct file file;
450 struct inode * inode;
451 struct buffer_head * bh;
452
453 bh = buffer_pages[MAP_NR(page)];
454 if (bh) {
455
456 struct buffer_head * tmp = bh;
457 do {
458 mark_buffer_dirty(tmp, 0);
459 tmp = tmp->b_this_page;
460 } while (tmp != bh);
461 return 0;
462 }
463
464 inode = vma->vm_inode;
465 file.f_op = inode->i_op->default_file_ops;
466 if (!file.f_op->write)
467 return -EIO;
468 size = offset + PAGE_SIZE;
469
470 if (S_ISREG(inode->i_mode)) {
471 if (size > inode->i_size)
472 size = inode->i_size;
473
474 if (size < offset)
475 return -EIO;
476 }
477 size -= offset;
478 file.f_mode = 3;
479 file.f_flags = 0;
480 file.f_count = 1;
481 file.f_inode = inode;
482 file.f_pos = offset;
483 file.f_reada = 0;
484 old_fs = get_fs();
485 set_fs(KERNEL_DS);
486 result = file.f_op->write(inode, &file, (const char *) page, size);
487 set_fs(old_fs);
488 if (result != size)
489 return -EIO;
490 return 0;
491 }
492
493
494
495
496
497
498
499
500
501
502
503
504 int filemap_swapout(struct vm_area_struct * vma,
505 unsigned long offset,
506 pte_t *page_table)
507 {
508 int error;
509 unsigned long page = pte_page(*page_table);
510 unsigned long entry = SWP_ENTRY(SHM_SWP_TYPE, MAP_NR(page));
511
512 set_pte(page_table, __pte(entry));
513
514 invalidate_page(vma, (offset + vma->vm_start - vma->vm_offset));
515 error = filemap_write_page(vma, offset, page);
516 if (pte_val(*page_table) == entry)
517 pte_clear(page_table);
518 return error;
519 }
520
521
522
523
524
525
526
527 static pte_t filemap_swapin(struct vm_area_struct * vma,
528 unsigned long offset,
529 unsigned long entry)
530 {
531 unsigned long page = SWP_OFFSET(entry);
532
533 mem_map[page].count++;
534 page = (page << PAGE_SHIFT) + PAGE_OFFSET;
535 return mk_pte(page,vma->vm_page_prot);
536 }
537
538
539 static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
540 unsigned long address, unsigned int flags)
541 {
542 pte_t pte = *ptep;
543 unsigned long page;
544 int error;
545
546 if (!(flags & MS_INVALIDATE)) {
547 if (!pte_present(pte))
548 return 0;
549 if (!pte_dirty(pte))
550 return 0;
551 set_pte(ptep, pte_mkclean(pte));
552 invalidate_page(vma, address);
553 page = pte_page(pte);
554 mem_map[MAP_NR(page)].count++;
555 } else {
556 if (pte_none(pte))
557 return 0;
558 pte_clear(ptep);
559 invalidate_page(vma, address);
560 if (!pte_present(pte)) {
561 swap_free(pte_val(pte));
562 return 0;
563 }
564 page = pte_page(pte);
565 if (!pte_dirty(pte) || flags == MS_INVALIDATE) {
566 free_page(page);
567 return 0;
568 }
569 }
570 error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page);
571 free_page(page);
572 return error;
573 }
574
575 static inline int filemap_sync_pte_range(pmd_t * pmd,
576 unsigned long address, unsigned long size,
577 struct vm_area_struct *vma, unsigned long offset, unsigned int flags)
578 {
579 pte_t * pte;
580 unsigned long end;
581 int error;
582
583 if (pmd_none(*pmd))
584 return 0;
585 if (pmd_bad(*pmd)) {
586 printk("filemap_sync_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
587 pmd_clear(pmd);
588 return 0;
589 }
590 pte = pte_offset(pmd, address);
591 offset += address & PMD_MASK;
592 address &= ~PMD_MASK;
593 end = address + size;
594 if (end > PMD_SIZE)
595 end = PMD_SIZE;
596 error = 0;
597 do {
598 error |= filemap_sync_pte(pte, vma, address + offset, flags);
599 address += PAGE_SIZE;
600 pte++;
601 } while (address < end);
602 return error;
603 }
604
605 static inline int filemap_sync_pmd_range(pgd_t * pgd,
606 unsigned long address, unsigned long size,
607 struct vm_area_struct *vma, unsigned int flags)
608 {
609 pmd_t * pmd;
610 unsigned long offset, end;
611 int error;
612
613 if (pgd_none(*pgd))
614 return 0;
615 if (pgd_bad(*pgd)) {
616 printk("filemap_sync_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd));
617 pgd_clear(pgd);
618 return 0;
619 }
620 pmd = pmd_offset(pgd, address);
621 offset = address & PMD_MASK;
622 address &= ~PMD_MASK;
623 end = address + size;
624 if (end > PGDIR_SIZE)
625 end = PGDIR_SIZE;
626 error = 0;
627 do {
628 error |= filemap_sync_pte_range(pmd, address, end - address, vma, offset, flags);
629 address = (address + PMD_SIZE) & PMD_MASK;
630 pmd++;
631 } while (address < end);
632 return error;
633 }
634
635 static int filemap_sync(struct vm_area_struct * vma, unsigned long address,
636 size_t size, unsigned int flags)
637 {
638 pgd_t * dir;
639 unsigned long end = address + size;
640 int error = 0;
641
642 dir = pgd_offset(current->mm, address);
643 while (address < end) {
644 error |= filemap_sync_pmd_range(dir, address, end - address, vma, flags);
645 address = (address + PGDIR_SIZE) & PGDIR_MASK;
646 dir++;
647 }
648 invalidate_range(vma->vm_mm, end - size, end);
649 return error;
650 }
651
652
653
654
655 static void filemap_unmap(struct vm_area_struct *vma, unsigned long start, size_t len)
656 {
657 filemap_sync(vma, start, len, MS_ASYNC);
658 }
659
660
661
662
663
664
665 static struct vm_operations_struct file_shared_mmap = {
666 NULL,
667 NULL,
668 filemap_unmap,
669 NULL,
670 filemap_sync,
671 NULL,
672 filemap_nopage,
673 NULL,
674 filemap_swapout,
675 filemap_swapin,
676 };
677
678
679
680
681
682
683
684 static struct vm_operations_struct file_private_mmap = {
685 NULL,
686 NULL,
687 NULL,
688 NULL,
689 NULL,
690 NULL,
691 filemap_nopage,
692 NULL,
693 NULL,
694 NULL,
695 };
696
697
698 int generic_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma)
699 {
700 struct vm_operations_struct * ops;
701
702 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
703 ops = &file_shared_mmap;
704
705
706 if (vma->vm_offset & (PAGE_SIZE - 1))
707 return -EINVAL;
708 } else {
709 ops = &file_private_mmap;
710 if (vma->vm_offset & (inode->i_sb->s_blocksize - 1))
711 return -EINVAL;
712 }
713 if (!inode->i_sb || !S_ISREG(inode->i_mode))
714 return -EACCES;
715 if (!inode->i_op || !inode->i_op->readpage)
716 return -ENOEXEC;
717 if (!IS_RDONLY(inode)) {
718 inode->i_atime = CURRENT_TIME;
719 inode->i_dirt = 1;
720 }
721 vma->vm_inode = inode;
722 inode->i_count++;
723 vma->vm_ops = ops;
724 return 0;
725 }
726
727
728
729
730
731
732 static int msync_interval(struct vm_area_struct * vma,
733 unsigned long start, unsigned long end, int flags)
734 {
735 if (!vma->vm_inode)
736 return 0;
737 if (vma->vm_ops->sync) {
738 int error;
739 error = vma->vm_ops->sync(vma, start, end-start, flags);
740 if (error)
741 return error;
742 if (flags & MS_SYNC)
743 return file_fsync(vma->vm_inode, NULL);
744 return 0;
745 }
746 return 0;
747 }
748
749 asmlinkage int sys_msync(unsigned long start, size_t len, int flags)
750 {
751 unsigned long end;
752 struct vm_area_struct * vma;
753 int unmapped_error, error;
754
755 if (start & ~PAGE_MASK)
756 return -EINVAL;
757 len = (len + ~PAGE_MASK) & PAGE_MASK;
758 end = start + len;
759 if (end < start)
760 return -EINVAL;
761 if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
762 return -EINVAL;
763 if (end == start)
764 return 0;
765
766
767
768
769 vma = find_vma(current, start);
770 unmapped_error = 0;
771 for (;;) {
772
773 if (!vma)
774 return -EFAULT;
775
776 if (start < vma->vm_start) {
777 unmapped_error = -EFAULT;
778 start = vma->vm_start;
779 }
780
781 if (end <= vma->vm_end) {
782 if (start < end) {
783 error = msync_interval(vma, start, end, flags);
784 if (error)
785 return error;
786 }
787 return unmapped_error;
788 }
789
790 error = msync_interval(vma, start, vma->vm_end, flags);
791 if (error)
792 return error;
793 start = vma->vm_end;
794 vma = vma->vm_next;
795 }
796 }