This source file includes following definitions.
- show_swap_cache_info
- add_to_swap_cache
- init_swap_cache
- rw_swap_page
- get_swap_page
- swap_duplicate
- swap_free
- swap_in
- try_to_swap_out
- swap_out_pmd
- swap_out_pgd
- swap_out_vma
- swap_out_process
- swap_out
- try_to_free_page
- add_mem_queue
- remove_mem_queue
- free_pages_ok
- check_free_buffers
- free_pages
- mark_used
- __get_free_pages
- __get_dma_pages
- show_free_areas
- unuse_pte
- unuse_pmd
- unuse_pgd
- unuse_vma
- unuse_process
- try_to_unuse
- sys_swapoff
- sys_swapon
- si_swapinfo
- free_area_init
1
2
3
4
5
6
7
8
9
10
11
12 #include <linux/mm.h>
13 #include <linux/sched.h>
14 #include <linux/head.h>
15 #include <linux/kernel.h>
16 #include <linux/kernel_stat.h>
17 #include <linux/errno.h>
18 #include <linux/string.h>
19 #include <linux/stat.h>
20 #include <linux/swap.h>
21 #include <linux/fs.h>
22
23 #include <asm/dma.h>
24 #include <asm/system.h>
25 #include <asm/bitops.h>
26 #include <asm/pgtable.h>
27
28 #define MAX_SWAPFILES 8
29
30 #define SWP_USED 1
31 #define SWP_WRITEOK 3
32
33 int min_free_pages = 20;
34
35 static int nr_swapfiles = 0;
36 static struct wait_queue * lock_queue = NULL;
37 static struct {
38 int head;
39 int next;
40 } swap_list = {-1, -1};
41
42 static struct swap_info_struct {
43 unsigned int flags;
44 unsigned int swap_device;
45 struct inode * swap_file;
46 unsigned char * swap_map;
47 unsigned char * swap_lockmap;
48 int lowest_bit;
49 int highest_bit;
50 int prio;
51 int pages;
52 unsigned long max;
53 int next;
54 } swap_info[MAX_SWAPFILES];
55
56 extern int shm_swap (int);
57
58 unsigned long *swap_cache;
59
60 #ifdef SWAP_CACHE_INFO
61 unsigned long swap_cache_add_total = 0;
62 unsigned long swap_cache_add_success = 0;
63 unsigned long swap_cache_del_total = 0;
64 unsigned long swap_cache_del_success = 0;
65 unsigned long swap_cache_find_total = 0;
66 unsigned long swap_cache_find_success = 0;
67
68 extern inline void show_swap_cache_info(void)
69 {
70 printk("Swap cache: add %ld/%ld, delete %ld/%ld, find %ld/%ld\n",
71 swap_cache_add_total, swap_cache_add_success,
72 swap_cache_del_total, swap_cache_del_success,
73 swap_cache_find_total, swap_cache_find_success);
74 }
75 #endif
76
77 static int add_to_swap_cache(unsigned long addr, unsigned long entry)
78 {
79 struct swap_info_struct * p = &swap_info[SWP_TYPE(entry)];
80
81 #ifdef SWAP_CACHE_INFO
82 swap_cache_add_total++;
83 #endif
84 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
85 entry = (unsigned long) xchg_ptr(swap_cache + MAP_NR(addr), (void *) entry);
86 if (entry) {
87 printk("swap_cache: replacing non-NULL entry\n");
88 }
89 #ifdef SWAP_CACHE_INFO
90 swap_cache_add_success++;
91 #endif
92 return 1;
93 }
94 return 0;
95 }
96
97 static unsigned long init_swap_cache(unsigned long mem_start,
98 unsigned long mem_end)
99 {
100 unsigned long swap_cache_size;
101
102 mem_start = (mem_start + 15) & ~15;
103 swap_cache = (unsigned long *) mem_start;
104 swap_cache_size = MAP_NR(mem_end);
105 memset(swap_cache, 0, swap_cache_size * sizeof (unsigned long));
106 return (unsigned long) (swap_cache + swap_cache_size);
107 }
108
109 void rw_swap_page(int rw, unsigned long entry, char * buf)
110 {
111 unsigned long type, offset;
112 struct swap_info_struct * p;
113
114 type = SWP_TYPE(entry);
115 if (type >= nr_swapfiles) {
116 printk("Internal error: bad swap-device\n");
117 return;
118 }
119 p = &swap_info[type];
120 offset = SWP_OFFSET(entry);
121 if (offset >= p->max) {
122 printk("rw_swap_page: weirdness\n");
123 return;
124 }
125 if (p->swap_map && !p->swap_map[offset]) {
126 printk("Hmm.. Trying to use unallocated swap (%08lx)\n", entry);
127 return;
128 }
129 if (!(p->flags & SWP_USED)) {
130 printk("Trying to swap to unused swap-device\n");
131 return;
132 }
133 while (set_bit(offset,p->swap_lockmap))
134 sleep_on(&lock_queue);
135 if (rw == READ)
136 kstat.pswpin++;
137 else
138 kstat.pswpout++;
139 if (p->swap_device) {
140 ll_rw_page(rw,p->swap_device,offset,buf);
141 } else if (p->swap_file) {
142 struct inode *swapf = p->swap_file;
143 unsigned int zones[PAGE_SIZE/512];
144 int i;
145 if (swapf->i_op->bmap == NULL
146 && swapf->i_op->smap != NULL){
147
148
149
150
151
152
153
154
155
156
157
158
159 int j;
160 unsigned int block = offset << 3;
161
162 for (i=0, j=0; j< PAGE_SIZE ; i++, j += 512){
163 if (!(zones[i] = swapf->i_op->smap(swapf,block++))) {
164 printk("rw_swap_page: bad swap file\n");
165 return;
166 }
167 }
168 }else{
169 int j;
170 unsigned int block = offset
171 << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits);
172
173 for (i=0, j=0; j< PAGE_SIZE ; i++, j +=swapf->i_sb->s_blocksize)
174 if (!(zones[i] = bmap(swapf,block++))) {
175 printk("rw_swap_page: bad swap file\n");
176 return;
177 }
178 }
179 ll_rw_swap_file(rw,swapf->i_dev, zones, i,buf);
180 } else
181 printk("re_swap_page: no swap file or device\n");
182 if (offset && !clear_bit(offset,p->swap_lockmap))
183 printk("rw_swap_page: lock already cleared\n");
184 wake_up(&lock_queue);
185 }
186
187 unsigned long get_swap_page(void)
188 {
189 struct swap_info_struct * p;
190 unsigned long offset, entry;
191 int type, wrapped = 0;
192
193 type = swap_list.next;
194 if (type < 0)
195 return 0;
196
197 while (1) {
198 p = &swap_info[type];
199 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
200 for (offset = p->lowest_bit; offset <= p->highest_bit ; offset++) {
201 if (p->swap_map[offset])
202 continue;
203 if (test_bit(offset, p->swap_lockmap))
204 continue;
205 p->swap_map[offset] = 1;
206 nr_swap_pages--;
207 if (offset == p->highest_bit)
208 p->highest_bit--;
209 p->lowest_bit = offset;
210 entry = SWP_ENTRY(type,offset);
211
212 type = swap_info[type].next;
213 if (type < 0 || p->prio != swap_info[type].prio) {
214 swap_list.next = swap_list.head;
215 } else {
216 swap_list.next = type;
217 }
218 return entry;
219 }
220 }
221 type = p->next;
222 if (!wrapped) {
223 if (type < 0 || p->prio != swap_info[type].prio) {
224 type = swap_list.head;
225 wrapped = 1;
226 }
227 } else if (type < 0) {
228 return 0;
229 }
230 }
231 }
232
233 void swap_duplicate(unsigned long entry)
234 {
235 struct swap_info_struct * p;
236 unsigned long offset, type;
237
238 if (!entry)
239 return;
240 offset = SWP_OFFSET(entry);
241 type = SWP_TYPE(entry);
242 if (type & SHM_SWP_TYPE)
243 return;
244 if (type >= nr_swapfiles) {
245 printk("Trying to duplicate nonexistent swap-page\n");
246 return;
247 }
248 p = type + swap_info;
249 if (offset >= p->max) {
250 printk("swap_duplicate: weirdness\n");
251 return;
252 }
253 if (!p->swap_map[offset]) {
254 printk("swap_duplicate: trying to duplicate unused page\n");
255 return;
256 }
257 p->swap_map[offset]++;
258 return;
259 }
260
261 void swap_free(unsigned long entry)
262 {
263 struct swap_info_struct * p;
264 unsigned long offset, type;
265
266 if (!entry)
267 return;
268 type = SWP_TYPE(entry);
269 if (type & SHM_SWP_TYPE)
270 return;
271 if (type >= nr_swapfiles) {
272 printk("Trying to free nonexistent swap-page\n");
273 return;
274 }
275 p = & swap_info[type];
276 offset = SWP_OFFSET(entry);
277 if (offset >= p->max) {
278 printk("swap_free: weirdness\n");
279 return;
280 }
281 if (!(p->flags & SWP_USED)) {
282 printk("Trying to free swap from unused swap-device\n");
283 return;
284 }
285 if (offset < p->lowest_bit)
286 p->lowest_bit = offset;
287 if (offset > p->highest_bit)
288 p->highest_bit = offset;
289 if (!p->swap_map[offset])
290 printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
291 else
292 if (!--p->swap_map[offset])
293 nr_swap_pages++;
294 if (p->prio > swap_info[swap_list.next].prio) {
295 swap_list.next = swap_list.head;
296 }
297 }
298
299
300
301
302
303
304
305
306 void swap_in(struct vm_area_struct * vma, pte_t * page_table,
307 unsigned long entry, int write_access)
308 {
309 unsigned long page = __get_free_page(GFP_KERNEL);
310
311 if (pte_val(*page_table) != entry) {
312 free_page(page);
313 return;
314 }
315 if (!page) {
316 *page_table = BAD_PAGE;
317 swap_free(entry);
318 oom(current);
319 return;
320 }
321 read_swap_page(entry, (char *) page);
322 if (pte_val(*page_table) != entry) {
323 free_page(page);
324 return;
325 }
326 vma->vm_task->mm->rss++;
327 vma->vm_task->mm->maj_flt++;
328 if (!write_access && add_to_swap_cache(page, entry)) {
329 *page_table = mk_pte(page, vma->vm_page_prot);
330 return;
331 }
332 *page_table = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
333 swap_free(entry);
334 return;
335 }
336
337
338
339
340
341
342
343
344
345
346
347
348 static inline int try_to_swap_out(struct vm_area_struct* vma, unsigned long address, pte_t * page_table)
349 {
350 pte_t pte;
351 unsigned long entry;
352 unsigned long page;
353
354 pte = *page_table;
355 if (!pte_present(pte))
356 return 0;
357 page = pte_page(pte);
358 if (page >= high_memory)
359 return 0;
360 if (mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED)
361 return 0;
362 if ((pte_dirty(pte) && delete_from_swap_cache(page)) || pte_young(pte)) {
363 *page_table = pte_mkold(pte);
364 return 0;
365 }
366 if (pte_dirty(pte)) {
367 if (mem_map[MAP_NR(page)] != 1)
368 return 0;
369 if (vma->vm_ops && vma->vm_ops->swapout) {
370 vma->vm_task->mm->rss--;
371 vma->vm_ops->swapout(vma, address-vma->vm_start, page_table);
372 } else {
373 if (!(entry = get_swap_page()))
374 return 0;
375 vma->vm_task->mm->rss--;
376 pte_val(*page_table) = entry;
377 invalidate();
378 write_swap_page(entry, (char *) page);
379 }
380 free_page(page);
381 return 1;
382 }
383 if ((entry = find_in_swap_cache(page))) {
384 if (mem_map[MAP_NR(page)] != 1) {
385 *page_table = pte_mkdirty(pte);
386 printk("Aiee.. duplicated cached swap-cache entry\n");
387 return 0;
388 }
389 vma->vm_task->mm->rss--;
390 pte_val(*page_table) = entry;
391 invalidate();
392 free_page(page);
393 return 1;
394 }
395 vma->vm_task->mm->rss--;
396 pte_clear(page_table);
397 invalidate();
398 entry = mem_map[MAP_NR(page)];
399 free_page(page);
400 return entry;
401 }
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421 #define SWAP_MIN 4
422 #define SWAP_MAX 32
423
424
425
426
427
428 #define SWAP_RATIO 128
429
430 static inline int swap_out_pmd(struct vm_area_struct * vma, pmd_t *dir,
431 unsigned long address, unsigned long end)
432 {
433 pte_t * pte;
434 unsigned long pmd_end;
435
436 if (pmd_none(*dir))
437 return 0;
438 if (pmd_bad(*dir)) {
439 printk("swap_out_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
440 pmd_clear(dir);
441 return 0;
442 }
443
444 pte = pte_offset(dir, address);
445
446 pmd_end = (address + PMD_SIZE) & PMD_MASK;
447 if (end > pmd_end)
448 end = pmd_end;
449
450 do {
451 int result;
452 vma->vm_task->mm->swap_address = address + PAGE_SIZE;
453 result = try_to_swap_out(vma, address, pte);
454 if (result)
455 return result;
456 address += PAGE_SIZE;
457 pte++;
458 } while (address < end);
459 return 0;
460 }
461
462 static inline int swap_out_pgd(struct vm_area_struct * vma, pgd_t *dir,
463 unsigned long address, unsigned long end)
464 {
465 pmd_t * pmd;
466 unsigned long pgd_end;
467
468 if (pgd_none(*dir))
469 return 0;
470 if (pgd_bad(*dir)) {
471 printk("swap_out_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
472 pgd_clear(dir);
473 return 0;
474 }
475
476 pmd = pmd_offset(dir, address);
477
478 pgd_end = (address + PGDIR_SIZE) & PGDIR_MASK;
479 if (end > pgd_end)
480 end = pgd_end;
481
482 do {
483 int result = swap_out_pmd(vma, pmd, address, end);
484 if (result)
485 return result;
486 address = (address + PMD_SIZE) & PMD_MASK;
487 pmd++;
488 } while (address < end);
489 return 0;
490 }
491
492 static int swap_out_vma(struct vm_area_struct * vma, pgd_t *pgdir,
493 unsigned long start)
494 {
495 unsigned long end;
496
497
498
499 if (vma->vm_flags & VM_SHM)
500 return 0;
501
502 end = vma->vm_end;
503 while (start < end) {
504 int result = swap_out_pgd(vma, pgdir, start, end);
505 if (result)
506 return result;
507 start = (start + PGDIR_SIZE) & PGDIR_MASK;
508 pgdir++;
509 }
510 return 0;
511 }
512
513 static int swap_out_process(struct task_struct * p)
514 {
515 unsigned long address;
516 struct vm_area_struct* vma;
517
518
519
520
521 address = p->mm->swap_address;
522 p->mm->swap_address = 0;
523
524
525
526
527 vma = find_vma(p, address);
528 if (!vma)
529 return 0;
530 if (address < vma->vm_start)
531 address = vma->vm_start;
532
533 for (;;) {
534 int result = swap_out_vma(vma, pgd_offset(p, address), address);
535 if (result)
536 return result;
537 vma = vma->vm_next;
538 if (!vma)
539 break;
540 address = vma->vm_start;
541 }
542 p->mm->swap_address = 0;
543 return 0;
544 }
545
546 static int swap_out(unsigned int priority)
547 {
548 static int swap_task;
549 int loop, counter;
550 struct task_struct *p;
551
552 counter = 6*nr_tasks >> priority;
553 for(; counter >= 0; counter--) {
554
555
556
557
558 loop = 0;
559 while(1) {
560 if (swap_task >= NR_TASKS) {
561 swap_task = 1;
562 if (loop)
563
564 return 0;
565 loop = 1;
566 }
567
568 p = task[swap_task];
569 if (p && p->mm->swappable && p->mm->rss)
570 break;
571
572 swap_task++;
573 }
574
575
576
577
578 if (!p->mm->swap_cnt) {
579 p->mm->dec_flt = (p->mm->dec_flt * 3) / 4 + p->mm->maj_flt - p->mm->old_maj_flt;
580 p->mm->old_maj_flt = p->mm->maj_flt;
581
582 if (p->mm->dec_flt >= SWAP_RATIO / SWAP_MIN) {
583 p->mm->dec_flt = SWAP_RATIO / SWAP_MIN;
584 p->mm->swap_cnt = SWAP_MIN;
585 } else if (p->mm->dec_flt <= SWAP_RATIO / SWAP_MAX)
586 p->mm->swap_cnt = SWAP_MAX;
587 else
588 p->mm->swap_cnt = SWAP_RATIO / p->mm->dec_flt;
589 }
590 if (!--p->mm->swap_cnt)
591 swap_task++;
592 switch (swap_out_process(p)) {
593 case 0:
594 if (p->mm->swap_cnt)
595 swap_task++;
596 break;
597 case 1:
598 return 1;
599 default:
600 break;
601 }
602 }
603 return 0;
604 }
605
606
607
608
609
610
611
612
613
614
615 static int try_to_free_page(int priority)
616 {
617 static int state = 0;
618 int i=6;
619
620 switch (state) {
621 do {
622 case 0:
623 if (priority != GFP_NOBUFFER && shrink_buffers(i))
624 return 1;
625 state = 1;
626 case 1:
627 if (shm_swap(i))
628 return 1;
629 state = 2;
630 default:
631 if (swap_out(i))
632 return 1;
633 state = 0;
634 } while(i--);
635 }
636 return 0;
637 }
638
639 static inline void add_mem_queue(struct mem_list * head, struct mem_list * entry)
640 {
641 entry->prev = head;
642 (entry->next = head->next)->prev = entry;
643 head->next = entry;
644 }
645
646 static inline void remove_mem_queue(struct mem_list * head, struct mem_list * entry)
647 {
648 entry->next->prev = entry->prev;
649 entry->prev->next = entry->next;
650 }
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671 static inline void free_pages_ok(unsigned long addr, unsigned long order)
672 {
673 unsigned long index = MAP_NR(addr) >> (1 + order);
674 unsigned long mask = PAGE_MASK << order;
675
676 addr &= mask;
677 nr_free_pages += 1 << order;
678 while (order < NR_MEM_LISTS-1) {
679 if (!change_bit(index, free_area_map[order]))
680 break;
681 remove_mem_queue(free_area_list+order, (struct mem_list *) (addr ^ (1+~mask)));
682 order++;
683 index >>= 1;
684 mask <<= 1;
685 addr &= mask;
686 }
687 add_mem_queue(free_area_list+order, (struct mem_list *) addr);
688 }
689
690 static inline void check_free_buffers(unsigned long addr)
691 {
692 struct buffer_head * bh;
693
694 bh = buffer_pages[MAP_NR(addr)];
695 if (bh) {
696 struct buffer_head *tmp = bh;
697 do {
698 if (tmp->b_list == BUF_SHARED && tmp->b_dev != 0xffff)
699 refile_buffer(tmp);
700 tmp = tmp->b_this_page;
701 } while (tmp != bh);
702 }
703 }
704
705 void free_pages(unsigned long addr, unsigned long order)
706 {
707 if (addr < high_memory) {
708 unsigned long flag;
709 mem_map_t * map = mem_map + MAP_NR(addr);
710 if (*map) {
711 if (!(*map & MAP_PAGE_RESERVED)) {
712 save_flags(flag);
713 cli();
714 if (!--*map) {
715 free_pages_ok(addr, order);
716 delete_from_swap_cache(addr);
717 }
718 restore_flags(flag);
719 if (*map == 1)
720 check_free_buffers(addr);
721 }
722 return;
723 }
724 printk("Trying to free free memory (%08lx): memory probably corrupted\n",addr);
725 printk("PC = %p\n", __builtin_return_address(0));
726 return;
727 }
728 }
729
730
731
732
733 #define RMQUEUE(order) \
734 do { struct mem_list * queue = free_area_list+order; \
735 unsigned long new_order = order; \
736 do { struct mem_list *next = queue->next; \
737 if (queue != next) { \
738 (queue->next = next->next)->prev = queue; \
739 mark_used((unsigned long) next, new_order); \
740 nr_free_pages -= 1 << order; \
741 restore_flags(flags); \
742 EXPAND(next, order, new_order); \
743 return (unsigned long) next; \
744 } new_order++; queue++; \
745 } while (new_order < NR_MEM_LISTS); \
746 } while (0)
747
748 static inline int mark_used(unsigned long addr, unsigned long order)
749 {
750 return change_bit(MAP_NR(addr) >> (1+order), free_area_map[order]);
751 }
752
753 #define EXPAND(addr,low,high) \
754 do { unsigned long size = PAGE_SIZE << high; \
755 while (high > low) { \
756 high--; size >>= 1; cli(); \
757 add_mem_queue(free_area_list+high, addr); \
758 mark_used((unsigned long) addr, high); \
759 restore_flags(flags); \
760 addr = (struct mem_list *) (size + (unsigned long) addr); \
761 } mem_map[MAP_NR((unsigned long) addr)] = 1; \
762 } while (0)
763
764 unsigned long __get_free_pages(int priority, unsigned long order)
765 {
766 unsigned long flags;
767 int reserved_pages;
768
769 if (intr_count && priority != GFP_ATOMIC) {
770 static int count = 0;
771 if (++count < 5) {
772 printk("gfp called nonatomically from interrupt %p\n",
773 __builtin_return_address(0));
774 priority = GFP_ATOMIC;
775 }
776 }
777 reserved_pages = 5;
778 if (priority != GFP_NFS)
779 reserved_pages = min_free_pages;
780 save_flags(flags);
781 repeat:
782 cli();
783 if ((priority==GFP_ATOMIC) || nr_free_pages > reserved_pages) {
784 RMQUEUE(order);
785 restore_flags(flags);
786 return 0;
787 }
788 restore_flags(flags);
789 if (priority != GFP_BUFFER && try_to_free_page(priority))
790 goto repeat;
791 return 0;
792 }
793
794
795
796
797 unsigned long __get_dma_pages(int priority, unsigned long order)
798 {
799 unsigned long list = 0;
800 unsigned long result;
801 unsigned long limit = MAX_DMA_ADDRESS;
802
803
804 if (priority != GFP_ATOMIC)
805 priority = GFP_BUFFER;
806 for (;;) {
807 result = __get_free_pages(priority, order);
808 if (result < limit)
809 break;
810 *(unsigned long *) result = list;
811 list = result;
812 }
813 while (list) {
814 unsigned long tmp = list;
815 list = *(unsigned long *) list;
816 free_pages(tmp, order);
817 }
818 return result;
819 }
820
821
822
823
824
825
826 void show_free_areas(void)
827 {
828 unsigned long order, flags;
829 unsigned long total = 0;
830
831 printk("Free pages: %6dkB\n ( ",nr_free_pages<<(PAGE_SHIFT-10));
832 save_flags(flags);
833 cli();
834 for (order=0 ; order < NR_MEM_LISTS; order++) {
835 struct mem_list * tmp;
836 unsigned long nr = 0;
837 for (tmp = free_area_list[order].next ; tmp != free_area_list + order ; tmp = tmp->next) {
838 nr ++;
839 }
840 total += nr * ((PAGE_SIZE>>10) << order);
841 printk("%lu*%lukB ", nr, (PAGE_SIZE>>10) << order);
842 }
843 restore_flags(flags);
844 printk("= %lukB)\n", total);
845 #ifdef SWAP_CACHE_INFO
846 show_swap_cache_info();
847 #endif
848 }
849
850
851
852
853
854
855
856
857
858
859 static inline int unuse_pte(struct vm_area_struct * vma, unsigned long address,
860 pte_t *dir, unsigned int type, unsigned long page)
861 {
862 pte_t pte = *dir;
863
864 if (pte_none(pte))
865 return 0;
866 if (pte_present(pte)) {
867 unsigned long page = pte_page(pte);
868 if (page >= high_memory)
869 return 0;
870 if (!in_swap_cache(page))
871 return 0;
872 if (SWP_TYPE(in_swap_cache(page)) != type)
873 return 0;
874 delete_from_swap_cache(page);
875 *dir = pte_mkdirty(pte);
876 return 0;
877 }
878 if (SWP_TYPE(pte_val(pte)) != type)
879 return 0;
880 read_swap_page(pte_val(pte), (char *) page);
881 if (pte_val(*dir) != pte_val(pte)) {
882 free_page(page);
883 return 1;
884 }
885 *dir = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
886 ++vma->vm_task->mm->rss;
887 swap_free(pte_val(pte));
888 return 1;
889 }
890
891 static inline int unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
892 unsigned long address, unsigned long size, unsigned long offset,
893 unsigned int type, unsigned long page)
894 {
895 pte_t * pte;
896 unsigned long end;
897
898 if (pmd_none(*dir))
899 return 0;
900 if (pmd_bad(*dir)) {
901 printk("unuse_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
902 pmd_clear(dir);
903 return 0;
904 }
905 pte = pte_offset(dir, address);
906 offset += address & PMD_MASK;
907 address &= ~PMD_MASK;
908 end = address + size;
909 if (end > PMD_SIZE)
910 end = PMD_SIZE;
911 do {
912 if (unuse_pte(vma, offset+address-vma->vm_start, pte, type, page))
913 return 1;
914 address += PAGE_SIZE;
915 pte++;
916 } while (address < end);
917 return 0;
918 }
919
920 static inline int unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
921 unsigned long address, unsigned long size,
922 unsigned int type, unsigned long page)
923 {
924 pmd_t * pmd;
925 unsigned long offset, end;
926
927 if (pgd_none(*dir))
928 return 0;
929 if (pgd_bad(*dir)) {
930 printk("unuse_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
931 pgd_clear(dir);
932 return 0;
933 }
934 pmd = pmd_offset(dir, address);
935 offset = address & PGDIR_MASK;
936 address &= ~PGDIR_MASK;
937 end = address + size;
938 if (end > PGDIR_SIZE)
939 end = PGDIR_SIZE;
940 do {
941 if (unuse_pmd(vma, pmd, address, end - address, offset, type, page))
942 return 1;
943 address = (address + PMD_SIZE) & PMD_MASK;
944 pmd++;
945 } while (address < end);
946 return 0;
947 }
948
949 static int unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
950 unsigned long start, unsigned long end,
951 unsigned int type, unsigned long page)
952 {
953 while (start < end) {
954 if (unuse_pgd(vma, pgdir, start, end - start, type, page))
955 return 1;
956 start = (start + PGDIR_SIZE) & PGDIR_MASK;
957 pgdir++;
958 }
959 return 0;
960 }
961
962 static int unuse_process(struct task_struct * p, unsigned int type, unsigned long page)
963 {
964 struct vm_area_struct* vma;
965
966
967
968
969 vma = p->mm->mmap;
970 while (vma) {
971 pgd_t * pgd = pgd_offset(p, vma->vm_start);
972 if (unuse_vma(vma, pgd, vma->vm_start, vma->vm_end, type, page))
973 return 1;
974 vma = vma->vm_next;
975 }
976 return 0;
977 }
978
979
980
981
982
983
984 static int try_to_unuse(unsigned int type)
985 {
986 int nr;
987 unsigned long page = get_free_page(GFP_KERNEL);
988
989 if (!page)
990 return -ENOMEM;
991 nr = 0;
992 while (nr < NR_TASKS) {
993 if (task[nr]) {
994 if (unuse_process(task[nr], type, page)) {
995 page = get_free_page(GFP_KERNEL);
996 if (!page)
997 return -ENOMEM;
998 continue;
999 }
1000 }
1001 nr++;
1002 }
1003 free_page(page);
1004 return 0;
1005 }
1006
1007 asmlinkage int sys_swapoff(const char * specialfile)
1008 {
1009 struct swap_info_struct * p;
1010 struct inode * inode;
1011 struct file filp;
1012 int i, type, prev;
1013
1014 if (!suser())
1015 return -EPERM;
1016 i = namei(specialfile,&inode);
1017 if (i)
1018 return i;
1019 prev = -1;
1020 for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
1021 p = swap_info + type;
1022 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
1023 if (p->swap_file) {
1024 if (p->swap_file == inode)
1025 break;
1026 } else {
1027 if (S_ISBLK(inode->i_mode)
1028 && (p->swap_device == inode->i_rdev))
1029 break;
1030 }
1031 }
1032 prev = type;
1033 }
1034 if (type < 0){
1035 iput(inode);
1036 return -EINVAL;
1037 }
1038 if (prev < 0) {
1039 swap_list.head = p->next;
1040 } else {
1041 swap_info[prev].next = p->next;
1042 }
1043 if (type == swap_list.next) {
1044
1045 swap_list.next = swap_list.head;
1046 }
1047 p->flags = SWP_USED;
1048 i = try_to_unuse(type);
1049 if (i) {
1050 iput(inode);
1051 p->flags = SWP_WRITEOK;
1052 return i;
1053 }
1054
1055 if(p->swap_device){
1056 memset(&filp, 0, sizeof(filp));
1057 filp.f_inode = inode;
1058 filp.f_mode = 3;
1059
1060 if( !blkdev_open(inode, &filp) &&
1061 filp.f_op && filp.f_op->release){
1062 filp.f_op->release(inode,&filp);
1063 filp.f_op->release(inode,&filp);
1064 }
1065 }
1066 iput(inode);
1067
1068 nr_swap_pages -= p->pages;
1069 iput(p->swap_file);
1070 p->swap_file = NULL;
1071 p->swap_device = 0;
1072 vfree(p->swap_map);
1073 p->swap_map = NULL;
1074 free_page((long) p->swap_lockmap);
1075 p->swap_lockmap = NULL;
1076 p->flags = 0;
1077 return 0;
1078 }
1079
1080
1081
1082
1083
1084
1085 asmlinkage int sys_swapon(const char * specialfile, int swap_flags)
1086 {
1087 struct swap_info_struct * p;
1088 struct inode * swap_inode;
1089 unsigned int type;
1090 int i, j, prev;
1091 int error;
1092 struct file filp;
1093 static int least_priority = 0;
1094
1095 memset(&filp, 0, sizeof(filp));
1096 if (!suser())
1097 return -EPERM;
1098 p = swap_info;
1099 for (type = 0 ; type < nr_swapfiles ; type++,p++)
1100 if (!(p->flags & SWP_USED))
1101 break;
1102 if (type >= MAX_SWAPFILES)
1103 return -EPERM;
1104 if (type >= nr_swapfiles)
1105 nr_swapfiles = type+1;
1106 p->flags = SWP_USED;
1107 p->swap_file = NULL;
1108 p->swap_device = 0;
1109 p->swap_map = NULL;
1110 p->swap_lockmap = NULL;
1111 p->lowest_bit = 0;
1112 p->highest_bit = 0;
1113 p->max = 1;
1114 p->next = -1;
1115 if (swap_flags & SWAP_FLAG_PREFER) {
1116 p->prio =
1117 (swap_flags & SWAP_FLAG_PRIO_MASK)>>SWAP_FLAG_PRIO_SHIFT;
1118 } else {
1119 p->prio = --least_priority;
1120 }
1121 error = namei(specialfile,&swap_inode);
1122 if (error)
1123 goto bad_swap_2;
1124 p->swap_file = swap_inode;
1125 error = -EBUSY;
1126 if (swap_inode->i_count != 1)
1127 goto bad_swap_2;
1128 error = -EINVAL;
1129
1130 if (S_ISBLK(swap_inode->i_mode)) {
1131 p->swap_device = swap_inode->i_rdev;
1132
1133 filp.f_inode = swap_inode;
1134 filp.f_mode = 3;
1135 error = blkdev_open(swap_inode, &filp);
1136 p->swap_file = NULL;
1137 iput(swap_inode);
1138 if(error)
1139 goto bad_swap_2;
1140 error = -ENODEV;
1141 if (!p->swap_device)
1142 goto bad_swap;
1143 error = -EBUSY;
1144 for (i = 0 ; i < nr_swapfiles ; i++) {
1145 if (i == type)
1146 continue;
1147 if (p->swap_device == swap_info[i].swap_device)
1148 goto bad_swap;
1149 }
1150 } else if (!S_ISREG(swap_inode->i_mode))
1151 goto bad_swap;
1152 p->swap_lockmap = (unsigned char *) get_free_page(GFP_USER);
1153 if (!p->swap_lockmap) {
1154 printk("Unable to start swapping: out of memory :-)\n");
1155 error = -ENOMEM;
1156 goto bad_swap;
1157 }
1158 read_swap_page(SWP_ENTRY(type,0), (char *) p->swap_lockmap);
1159 if (memcmp("SWAP-SPACE",p->swap_lockmap+PAGE_SIZE-10,10)) {
1160 printk("Unable to find swap-space signature\n");
1161 error = -EINVAL;
1162 goto bad_swap;
1163 }
1164 memset(p->swap_lockmap+PAGE_SIZE-10,0,10);
1165 j = 0;
1166 p->lowest_bit = 0;
1167 p->highest_bit = 0;
1168 for (i = 1 ; i < 8*PAGE_SIZE ; i++) {
1169 if (test_bit(i,p->swap_lockmap)) {
1170 if (!p->lowest_bit)
1171 p->lowest_bit = i;
1172 p->highest_bit = i;
1173 p->max = i+1;
1174 j++;
1175 }
1176 }
1177 if (!j) {
1178 printk("Empty swap-file\n");
1179 error = -EINVAL;
1180 goto bad_swap;
1181 }
1182 p->swap_map = (unsigned char *) vmalloc(p->max);
1183 if (!p->swap_map) {
1184 error = -ENOMEM;
1185 goto bad_swap;
1186 }
1187 for (i = 1 ; i < p->max ; i++) {
1188 if (test_bit(i,p->swap_lockmap))
1189 p->swap_map[i] = 0;
1190 else
1191 p->swap_map[i] = 0x80;
1192 }
1193 p->swap_map[0] = 0x80;
1194 memset(p->swap_lockmap,0,PAGE_SIZE);
1195 p->flags = SWP_WRITEOK;
1196 p->pages = j;
1197 nr_swap_pages += j;
1198 printk("Adding Swap: %dk swap-space\n",j<<(PAGE_SHIFT-10));
1199
1200
1201 prev = -1;
1202 for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
1203 if (p->prio >= swap_info[i].prio) {
1204 break;
1205 }
1206 prev = i;
1207 }
1208 p->next = i;
1209 if (prev < 0) {
1210 swap_list.head = swap_list.next = p - swap_info;
1211 } else {
1212 swap_info[prev].next = p - swap_info;
1213 }
1214 return 0;
1215 bad_swap:
1216 if(filp.f_op && filp.f_op->release)
1217 filp.f_op->release(filp.f_inode,&filp);
1218 bad_swap_2:
1219 free_page((long) p->swap_lockmap);
1220 vfree(p->swap_map);
1221 iput(p->swap_file);
1222 p->swap_device = 0;
1223 p->swap_file = NULL;
1224 p->swap_map = NULL;
1225 p->swap_lockmap = NULL;
1226 p->flags = 0;
1227 return error;
1228 }
1229
1230 void si_swapinfo(struct sysinfo *val)
1231 {
1232 unsigned int i, j;
1233
1234 val->freeswap = val->totalswap = 0;
1235 for (i = 0; i < nr_swapfiles; i++) {
1236 if ((swap_info[i].flags & SWP_WRITEOK) != SWP_WRITEOK)
1237 continue;
1238 for (j = 0; j < swap_info[i].max; ++j)
1239 switch (swap_info[i].swap_map[j]) {
1240 case 128:
1241 continue;
1242 case 0:
1243 ++val->freeswap;
1244 default:
1245 ++val->totalswap;
1246 }
1247 }
1248 val->freeswap <<= PAGE_SHIFT;
1249 val->totalswap <<= PAGE_SHIFT;
1250 return;
1251 }
1252
1253
1254
1255
1256
1257
1258
1259 unsigned long free_area_init(unsigned long start_mem, unsigned long end_mem)
1260 {
1261 mem_map_t * p;
1262 unsigned long mask = PAGE_MASK;
1263 int i;
1264
1265
1266
1267
1268
1269 i = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT+6);
1270 if (i < 16)
1271 i = 16;
1272 min_free_pages = i;
1273 start_mem = init_swap_cache(start_mem, end_mem);
1274 mem_map = (mem_map_t *) start_mem;
1275 p = mem_map + MAP_NR(end_mem);
1276 start_mem = (unsigned long) p;
1277 while (p > mem_map)
1278 *--p = MAP_PAGE_RESERVED;
1279
1280 for (i = 0 ; i < NR_MEM_LISTS ; i++) {
1281 unsigned long bitmap_size;
1282 free_area_list[i].prev = free_area_list[i].next = &free_area_list[i];
1283 mask += mask;
1284 end_mem = (end_mem + ~mask) & mask;
1285 bitmap_size = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT + i);
1286 bitmap_size = (bitmap_size + 7) >> 3;
1287 bitmap_size = (bitmap_size + sizeof(unsigned long) - 1) & ~(sizeof(unsigned long)-1);
1288 free_area_map[i] = (unsigned char *) start_mem;
1289 memset((void *) start_mem, 0, bitmap_size);
1290 start_mem += bitmap_size;
1291 }
1292 return start_mem;
1293 }