This source file includes following definitions.
- invalidate_inode_pages
- truncate_inode_pages
- shrink_mmap
- page_unuse
- update_vm_cache
- try_to_read_ahead
- __wait_on_page
- generic_file_read
- fill_page
- filemap_nopage
- filemap_write_page
- filemap_swapout
- filemap_swapin
- filemap_sync_pte
- filemap_sync_pte_range
- filemap_sync_pmd_range
- filemap_sync
- filemap_unmap
- generic_file_mmap
- msync_interval
- sys_msync
1
2
3
4
5
6
7
8
9
10
11
12 #include <linux/stat.h>
13 #include <linux/sched.h>
14 #include <linux/kernel.h>
15 #include <linux/mm.h>
16 #include <linux/shm.h>
17 #include <linux/errno.h>
18 #include <linux/mman.h>
19 #include <linux/string.h>
20 #include <linux/malloc.h>
21 #include <linux/fs.h>
22 #include <linux/locks.h>
23 #include <linux/pagemap.h>
24 #include <linux/swap.h>
25
26 #include <asm/segment.h>
27 #include <asm/system.h>
28 #include <asm/pgtable.h>
29
30
31
32
33
34
35
36
37 unsigned long page_cache_size = 0;
38 struct page * page_hash_table[PAGE_HASH_SIZE];
39
40
41
42
43
44
45
46
47
48
49 void invalidate_inode_pages(struct inode * inode)
50 {
51 struct page ** p;
52 struct page * page;
53
54 p = &inode->i_pages;
55 while ((page = *p) != NULL) {
56 if (page->locked) {
57 p = &page->next;
58 continue;
59 }
60 inode->i_nrpages--;
61 if ((*p = page->next) != NULL)
62 (*p)->prev = page->prev;
63 page->dirty = 0;
64 page->next = NULL;
65 page->prev = NULL;
66 remove_page_from_hash_queue(page);
67 page->inode = NULL;
68 free_page(page_address(page));
69 continue;
70 }
71 }
72
73
74
75
76
77 void truncate_inode_pages(struct inode * inode, unsigned long start)
78 {
79 struct page ** p;
80 struct page * page;
81
82 repeat:
83 p = &inode->i_pages;
84 while ((page = *p) != NULL) {
85 unsigned long offset = page->offset;
86
87
88 if (offset >= start) {
89 if (page->locked) {
90 wait_on_page(page);
91 goto repeat;
92 }
93 inode->i_nrpages--;
94 if ((*p = page->next) != NULL)
95 (*p)->prev = page->prev;
96 page->dirty = 0;
97 page->next = NULL;
98 page->prev = NULL;
99 remove_page_from_hash_queue(page);
100 page->inode = NULL;
101 free_page(page_address(page));
102 continue;
103 }
104 p = &page->next;
105 offset = start - offset;
106
107 if (offset < PAGE_SIZE)
108 memset((void *) (offset + page_address(page)), 0, PAGE_SIZE - offset);
109 }
110 }
111
112 int shrink_mmap(int priority, unsigned long limit)
113 {
114 static int clock = 0;
115 struct page * page;
116 struct buffer_head *tmp, *bh;
117
118 if (limit > high_memory)
119 limit = high_memory;
120 limit = MAP_NR(limit);
121 if (clock >= limit)
122 clock = 0;
123 priority = (limit<<2) >> priority;
124 page = mem_map + clock;
125 while (priority-- > 0) {
126 if (page->locked)
127 goto next;
128
129
130 bh = buffer_pages[MAP_NR(page_address(page))];
131 if (bh) {
132 tmp = bh;
133 do {
134 if (buffer_touched(tmp)) {
135 clear_bit(BH_Touched, &tmp->b_state);
136 page->referenced = 1;
137 }
138 tmp = tmp->b_this_page;
139 } while (tmp != bh);
140 }
141
142
143
144
145
146
147 if (page->count > 1)
148 page->referenced = 1;
149 else if (page->referenced)
150 page->referenced = 0;
151 else if (page->count) {
152
153
154 if (page->inode) {
155 remove_page_from_hash_queue(page);
156 remove_page_from_inode_queue(page);
157 free_page(page_address(page));
158 return 1;
159 }
160 if (bh && try_to_free_buffer(bh, &bh, 6))
161 return 1;
162 }
163 next:
164 page++;
165 clock++;
166 if (clock >= limit) {
167 clock = 0;
168 page = mem_map;
169 }
170 }
171 return 0;
172 }
173
174
175
176
177
178
179
180 unsigned long page_unuse(unsigned long page)
181 {
182 struct page * p = mem_map + MAP_NR(page);
183 int count = p->count;
184
185 if (count != 2)
186 return count;
187 if (!p->inode)
188 return count;
189 remove_page_from_hash_queue(p);
190 remove_page_from_inode_queue(p);
191 free_page(page);
192 return 1;
193 }
194
195
196
197
198
199 void update_vm_cache(struct inode * inode, unsigned long pos, const char * buf, int count)
200 {
201 unsigned long offset, len;
202
203 offset = (pos & ~PAGE_MASK);
204 pos = pos & PAGE_MASK;
205 len = PAGE_SIZE - offset;
206 do {
207 struct page * page;
208
209 if (len > count)
210 len = count;
211 page = find_page(inode, pos);
212 if (page) {
213 unsigned long addr;
214
215 wait_on_page(page);
216 addr = page_address(page);
217 memcpy((void *) (offset + addr), buf, len);
218 free_page(addr);
219 }
220 count -= len;
221 buf += len;
222 len = PAGE_SIZE;
223 offset = 0;
224 pos += PAGE_SIZE;
225 } while (count);
226 }
227
228
229
230
231
232
233 static unsigned long try_to_read_ahead(struct inode * inode, unsigned long offset, unsigned long page_cache)
234 {
235 struct page * page;
236
237 offset &= PAGE_MASK;
238 if (!page_cache) {
239 page_cache = __get_free_page(GFP_KERNEL);
240 if (!page_cache)
241 return 0;
242 }
243 if (offset >= inode->i_size)
244 return page_cache;
245 #if 1
246 page = find_page(inode, offset);
247 if (page) {
248 page->count--;
249 return page_cache;
250 }
251
252
253
254 page = mem_map + MAP_NR(page_cache);
255 page->count++;
256 page->uptodate = 0;
257 page->error = 0;
258 page->offset = offset;
259 add_page_to_inode_queue(inode, page);
260 add_page_to_hash_queue(inode, page);
261
262 inode->i_op->readpage(inode, page);
263
264 free_page(page_cache);
265 return 0;
266 #else
267 return page_cache;
268 #endif
269 }
270
271
272
273
274 void __wait_on_page(struct page *page)
275 {
276 struct wait_queue wait = { current, NULL };
277
278 page->count++;
279 add_wait_queue(&page->wait, &wait);
280 repeat:
281 current->state = TASK_UNINTERRUPTIBLE;
282 if (page->locked) {
283 schedule();
284 goto repeat;
285 }
286 remove_wait_queue(&page->wait, &wait);
287 page->count--;
288 current->state = TASK_RUNNING;
289 }
290
291
292
293
294
295
296
297 #define MAX_READAHEAD (PAGE_SIZE*4)
298 int generic_file_read(struct inode * inode, struct file * filp, char * buf, int count)
299 {
300 int read = 0;
301 unsigned long pos;
302 unsigned long page_cache = 0;
303
304 if (count <= 0)
305 return 0;
306
307 pos = filp->f_pos;
308 do {
309 struct page *page;
310 unsigned long offset, addr, nr;
311
312 if (pos >= inode->i_size)
313 break;
314 offset = pos & ~PAGE_MASK;
315 nr = PAGE_SIZE - offset;
316
317
318
319 page = find_page(inode, pos & PAGE_MASK);
320 if (page)
321 goto found_page;
322
323
324
325
326
327 if (!page_cache) {
328 page_cache = __get_free_page(GFP_KERNEL);
329 if (!page_cache) {
330 if (!read)
331 read = -ENOMEM;
332 break;
333 }
334 }
335
336
337
338
339 if (pos >= inode->i_size)
340 break;
341 page = find_page(inode, pos & PAGE_MASK);
342 if (page)
343 goto found_page;
344
345
346
347
348 page = mem_map + MAP_NR(page_cache);
349 page_cache = 0;
350 page->count++;
351 page->uptodate = 0;
352 page->error = 0;
353 page->offset = pos & PAGE_MASK;
354 add_page_to_inode_queue(inode, page);
355 add_page_to_hash_queue(inode, page);
356
357 inode->i_op->readpage(inode, page);
358
359 found_page:
360 addr = page_address(page);
361 if (nr > count)
362 nr = count;
363
364
365
366
367
368
369
370
371 if (page->locked) {
372 if (nr < count || filp->f_reada) {
373 unsigned long ahead = 0;
374 do {
375 ahead += PAGE_SIZE;
376 page_cache = try_to_read_ahead(inode, pos + ahead, page_cache);
377 } while (ahead < MAX_READAHEAD);
378 }
379 __wait_on_page(page);
380 }
381 if (nr > inode->i_size - pos)
382 nr = inode->i_size - pos;
383 memcpy_tofs(buf, (void *) (addr + offset), nr);
384 free_page(addr);
385 buf += nr;
386 pos += nr;
387 read += nr;
388 count -= nr;
389 } while (count);
390
391 filp->f_pos = pos;
392 filp->f_reada = 1;
393 if (page_cache)
394 free_page(page_cache);
395 if (!IS_RDONLY(inode)) {
396 inode->i_atime = CURRENT_TIME;
397 inode->i_dirt = 1;
398 }
399 return read;
400 }
401
402
403
404
405
406 static inline unsigned long fill_page(struct inode * inode, unsigned long offset)
407 {
408 struct page * page;
409 unsigned long new_page;
410
411 page = find_page(inode, offset);
412 if (page)
413 goto found_page;
414 new_page = __get_free_page(GFP_KERNEL);
415 page = find_page(inode, offset);
416 if (page) {
417 if (new_page)
418 free_page(new_page);
419 goto found_page;
420 }
421 if (!new_page)
422 return 0;
423 page = mem_map + MAP_NR(new_page);
424 new_page = 0;
425 page->count++;
426 page->uptodate = 0;
427 page->error = 0;
428 page->offset = offset;
429 add_page_to_inode_queue(inode, page);
430 add_page_to_hash_queue(inode, page);
431 inode->i_op->readpage(inode, page);
432 found_page:
433 wait_on_page(page);
434 return page_address(page);
435 }
436
437
438
439
440
441
442 static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long address, int no_share)
443 {
444 unsigned long offset;
445 struct inode * inode = area->vm_inode;
446 unsigned long page;
447
448 offset = (address & PAGE_MASK) - area->vm_start + area->vm_offset;
449 if (offset >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm)
450 return 0;
451
452 page = fill_page(inode, offset);
453 if (page && no_share) {
454 unsigned long new_page = __get_free_page(GFP_KERNEL);
455 if (new_page)
456 memcpy((void *) new_page, (void *) page, PAGE_SIZE);
457 free_page(page);
458 return new_page;
459 }
460 return page;
461 }
462
463
464
465
466
467 static int filemap_write_page(struct vm_area_struct * vma,
468 unsigned long offset,
469 unsigned long page)
470 {
471 int old_fs;
472 unsigned long size, result;
473 struct file file;
474 struct inode * inode;
475 struct buffer_head * bh;
476
477 bh = buffer_pages[MAP_NR(page)];
478 if (bh) {
479
480 struct buffer_head * tmp = bh;
481 do {
482 mark_buffer_dirty(tmp, 0);
483 tmp = tmp->b_this_page;
484 } while (tmp != bh);
485 return 0;
486 }
487
488 inode = vma->vm_inode;
489 file.f_op = inode->i_op->default_file_ops;
490 if (!file.f_op->write)
491 return -EIO;
492 size = offset + PAGE_SIZE;
493
494 if (S_ISREG(inode->i_mode)) {
495 if (size > inode->i_size)
496 size = inode->i_size;
497
498 if (size < offset)
499 return -EIO;
500 }
501 size -= offset;
502 file.f_mode = 3;
503 file.f_flags = 0;
504 file.f_count = 1;
505 file.f_inode = inode;
506 file.f_pos = offset;
507 file.f_reada = 0;
508 old_fs = get_fs();
509 set_fs(KERNEL_DS);
510 result = file.f_op->write(inode, &file, (const char *) page, size);
511 set_fs(old_fs);
512 if (result != size)
513 return -EIO;
514 return 0;
515 }
516
517
518
519
520
521
522
523
524
525
526
527
528 int filemap_swapout(struct vm_area_struct * vma,
529 unsigned long offset,
530 pte_t *page_table)
531 {
532 int error;
533 unsigned long page = pte_page(*page_table);
534 unsigned long entry = SWP_ENTRY(SHM_SWP_TYPE, MAP_NR(page));
535
536 set_pte(page_table, __pte(entry));
537
538 invalidate_page(vma, (offset + vma->vm_start - vma->vm_offset));
539 error = filemap_write_page(vma, offset, page);
540 if (pte_val(*page_table) == entry)
541 pte_clear(page_table);
542 return error;
543 }
544
545
546
547
548
549
550
551 static pte_t filemap_swapin(struct vm_area_struct * vma,
552 unsigned long offset,
553 unsigned long entry)
554 {
555 unsigned long page = SWP_OFFSET(entry);
556
557 mem_map[page].count++;
558 page = (page << PAGE_SHIFT) + PAGE_OFFSET;
559 return mk_pte(page,vma->vm_page_prot);
560 }
561
562
563 static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
564 unsigned long address, unsigned int flags)
565 {
566 pte_t pte = *ptep;
567 unsigned long page;
568 int error;
569
570 if (!(flags & MS_INVALIDATE)) {
571 if (!pte_present(pte))
572 return 0;
573 if (!pte_dirty(pte))
574 return 0;
575 set_pte(ptep, pte_mkclean(pte));
576 invalidate_page(vma, address);
577 page = pte_page(pte);
578 mem_map[MAP_NR(page)].count++;
579 } else {
580 if (pte_none(pte))
581 return 0;
582 pte_clear(ptep);
583 invalidate_page(vma, address);
584 if (!pte_present(pte)) {
585 swap_free(pte_val(pte));
586 return 0;
587 }
588 page = pte_page(pte);
589 if (!pte_dirty(pte) || flags == MS_INVALIDATE) {
590 free_page(page);
591 return 0;
592 }
593 }
594 error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page);
595 free_page(page);
596 return error;
597 }
598
599 static inline int filemap_sync_pte_range(pmd_t * pmd,
600 unsigned long address, unsigned long size,
601 struct vm_area_struct *vma, unsigned long offset, unsigned int flags)
602 {
603 pte_t * pte;
604 unsigned long end;
605 int error;
606
607 if (pmd_none(*pmd))
608 return 0;
609 if (pmd_bad(*pmd)) {
610 printk("filemap_sync_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
611 pmd_clear(pmd);
612 return 0;
613 }
614 pte = pte_offset(pmd, address);
615 offset += address & PMD_MASK;
616 address &= ~PMD_MASK;
617 end = address + size;
618 if (end > PMD_SIZE)
619 end = PMD_SIZE;
620 error = 0;
621 do {
622 error |= filemap_sync_pte(pte, vma, address + offset, flags);
623 address += PAGE_SIZE;
624 pte++;
625 } while (address < end);
626 return error;
627 }
628
629 static inline int filemap_sync_pmd_range(pgd_t * pgd,
630 unsigned long address, unsigned long size,
631 struct vm_area_struct *vma, unsigned int flags)
632 {
633 pmd_t * pmd;
634 unsigned long offset, end;
635 int error;
636
637 if (pgd_none(*pgd))
638 return 0;
639 if (pgd_bad(*pgd)) {
640 printk("filemap_sync_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd));
641 pgd_clear(pgd);
642 return 0;
643 }
644 pmd = pmd_offset(pgd, address);
645 offset = address & PMD_MASK;
646 address &= ~PMD_MASK;
647 end = address + size;
648 if (end > PGDIR_SIZE)
649 end = PGDIR_SIZE;
650 error = 0;
651 do {
652 error |= filemap_sync_pte_range(pmd, address, end - address, vma, offset, flags);
653 address = (address + PMD_SIZE) & PMD_MASK;
654 pmd++;
655 } while (address < end);
656 return error;
657 }
658
659 static int filemap_sync(struct vm_area_struct * vma, unsigned long address,
660 size_t size, unsigned int flags)
661 {
662 pgd_t * dir;
663 unsigned long end = address + size;
664 int error = 0;
665
666 dir = pgd_offset(current->mm, address);
667 while (address < end) {
668 error |= filemap_sync_pmd_range(dir, address, end - address, vma, flags);
669 address = (address + PGDIR_SIZE) & PGDIR_MASK;
670 dir++;
671 }
672 invalidate_range(vma->vm_mm, end - size, end);
673 return error;
674 }
675
676
677
678
679 static void filemap_unmap(struct vm_area_struct *vma, unsigned long start, size_t len)
680 {
681 filemap_sync(vma, start, len, MS_ASYNC);
682 }
683
684
685
686
687
688
689 static struct vm_operations_struct file_shared_mmap = {
690 NULL,
691 NULL,
692 filemap_unmap,
693 NULL,
694 filemap_sync,
695 NULL,
696 filemap_nopage,
697 NULL,
698 filemap_swapout,
699 filemap_swapin,
700 };
701
702
703
704
705
706
707
708 static struct vm_operations_struct file_private_mmap = {
709 NULL,
710 NULL,
711 NULL,
712 NULL,
713 NULL,
714 NULL,
715 filemap_nopage,
716 NULL,
717 NULL,
718 NULL,
719 };
720
721
722 int generic_file_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma)
723 {
724 struct vm_operations_struct * ops;
725
726 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
727 ops = &file_shared_mmap;
728
729
730 if (vma->vm_offset & (PAGE_SIZE - 1))
731 return -EINVAL;
732 } else {
733 ops = &file_private_mmap;
734 if (vma->vm_offset & (inode->i_sb->s_blocksize - 1))
735 return -EINVAL;
736 }
737 if (!inode->i_sb || !S_ISREG(inode->i_mode))
738 return -EACCES;
739 if (!inode->i_op || !inode->i_op->readpage)
740 return -ENOEXEC;
741 if (!IS_RDONLY(inode)) {
742 inode->i_atime = CURRENT_TIME;
743 inode->i_dirt = 1;
744 }
745 vma->vm_inode = inode;
746 inode->i_count++;
747 vma->vm_ops = ops;
748 return 0;
749 }
750
751
752
753
754
755
756 static int msync_interval(struct vm_area_struct * vma,
757 unsigned long start, unsigned long end, int flags)
758 {
759 if (!vma->vm_inode)
760 return 0;
761 if (vma->vm_ops->sync) {
762 int error;
763 error = vma->vm_ops->sync(vma, start, end-start, flags);
764 if (error)
765 return error;
766 if (flags & MS_SYNC)
767 return file_fsync(vma->vm_inode, NULL);
768 return 0;
769 }
770 return 0;
771 }
772
773 asmlinkage int sys_msync(unsigned long start, size_t len, int flags)
774 {
775 unsigned long end;
776 struct vm_area_struct * vma;
777 int unmapped_error, error;
778
779 if (start & ~PAGE_MASK)
780 return -EINVAL;
781 len = (len + ~PAGE_MASK) & PAGE_MASK;
782 end = start + len;
783 if (end < start)
784 return -EINVAL;
785 if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
786 return -EINVAL;
787 if (end == start)
788 return 0;
789
790
791
792
793 vma = find_vma(current, start);
794 unmapped_error = 0;
795 for (;;) {
796
797 if (!vma)
798 return -EFAULT;
799
800 if (start < vma->vm_start) {
801 unmapped_error = -EFAULT;
802 start = vma->vm_start;
803 }
804
805 if (end <= vma->vm_end) {
806 if (start < end) {
807 error = msync_interval(vma, start, end, flags);
808 if (error)
809 return error;
810 }
811 return unmapped_error;
812 }
813
814 error = msync_interval(vma, start, vma->vm_end, flags);
815 if (error)
816 return error;
817 start = vma->vm_end;
818 vma = vma->vm_next;
819 }
820 }