This source file includes following definitions.
- show_swap_cache_info
- add_to_swap_cache
- init_swap_cache
- swap_setup
- buff_setup
- rw_swap_page
- get_swap_page
- swap_duplicate
- swap_free
- swap_in
- try_to_swap_out
- swap_out_pmd
- swap_out_pgd
- swap_out_vma
- swap_out_process
- swap_out
- try_to_free_page
- add_mem_queue
- remove_mem_queue
- free_pages_ok
- check_free_buffers
- free_pages
- mark_used
- __get_free_pages
- show_free_areas
- unuse_pte
- unuse_pmd
- unuse_pgd
- unuse_vma
- unuse_process
- try_to_unuse
- sys_swapoff
- sys_swapon
- si_swapinfo
- free_area_init
1
2
3
4
5
6
7
8
9
10
11
12
13
14 #include <linux/mm.h>
15 #include <linux/sched.h>
16 #include <linux/head.h>
17 #include <linux/kernel.h>
18 #include <linux/kernel_stat.h>
19 #include <linux/errno.h>
20 #include <linux/string.h>
21 #include <linux/stat.h>
22 #include <linux/swap.h>
23 #include <linux/fs.h>
24 #include <linux/swapctl.h>
25
26 #include <asm/dma.h>
27 #include <asm/system.h>
28 #include <asm/segment.h>
29 #include <asm/bitops.h>
30 #include <asm/pgtable.h>
31
32 #define MAX_SWAPFILES 8
33
34 #define SWP_USED 1
35 #define SWP_WRITEOK 3
36
37 int min_free_pages = 20;
38
39
40
41
42
43
44
45 swap_control_t swap_control = {
46 20, 3, 1, 3,
47 10, 2, 2, 0,
48 32, 4,
49 8192, 4096,
50 -200,
51 1, 1,
52 RCL_ROUND_ROBIN
53 };
54
55 static int nr_swapfiles = 0;
56 static struct wait_queue * lock_queue = NULL;
57 static struct {
58 int head;
59 int next;
60 } swap_list = {-1, -1};
61
62 static struct swap_info_struct {
63 unsigned int flags;
64 kdev_t swap_device;
65 struct inode * swap_file;
66 unsigned char * swap_map;
67 unsigned char * swap_lockmap;
68 int lowest_bit;
69 int highest_bit;
70 int prio;
71 int pages;
72 unsigned long max;
73 int next;
74 } swap_info[MAX_SWAPFILES];
75
76 extern int shm_swap (int, unsigned long);
77
78
79
80
81
82
83
84
85 unsigned long *swap_cache;
86
87 #ifdef SWAP_CACHE_INFO
88 unsigned long swap_cache_add_total = 0;
89 unsigned long swap_cache_add_success = 0;
90 unsigned long swap_cache_del_total = 0;
91 unsigned long swap_cache_del_success = 0;
92 unsigned long swap_cache_find_total = 0;
93 unsigned long swap_cache_find_success = 0;
94
95 extern inline void show_swap_cache_info(void)
96 {
97 printk("Swap cache: add %ld/%ld, delete %ld/%ld, find %ld/%ld\n",
98 swap_cache_add_total, swap_cache_add_success,
99 swap_cache_del_total, swap_cache_del_success,
100 swap_cache_find_total, swap_cache_find_success);
101 }
102 #endif
103
104 static int add_to_swap_cache(unsigned long addr, unsigned long entry)
105 {
106 struct swap_info_struct * p = &swap_info[SWP_TYPE(entry)];
107
108 #ifdef SWAP_CACHE_INFO
109 swap_cache_add_total++;
110 #endif
111 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
112 entry = xchg(swap_cache + MAP_NR(addr), entry);
113 if (entry) {
114 printk("swap_cache: replacing non-NULL entry\n");
115 }
116 #ifdef SWAP_CACHE_INFO
117 swap_cache_add_success++;
118 #endif
119 return 1;
120 }
121 return 0;
122 }
123
124 static unsigned long init_swap_cache(unsigned long mem_start,
125 unsigned long mem_end)
126 {
127 unsigned long swap_cache_size;
128
129 mem_start = (mem_start + 15) & ~15;
130 swap_cache = (unsigned long *) mem_start;
131 swap_cache_size = MAP_NR(mem_end);
132 memset(swap_cache, 0, swap_cache_size * sizeof (unsigned long));
133 return (unsigned long) (swap_cache + swap_cache_size);
134 }
135
136
137
138
139 void swap_setup(char *str, int *ints)
140 {
141 int * swap_vars[8] = {
142 &MAX_PAGE_AGE,
143 &PAGE_ADVANCE,
144 &PAGE_DECLINE,
145 &PAGE_INITIAL_AGE,
146 &AGE_CLUSTER_FRACT,
147 &AGE_CLUSTER_MIN,
148 &PAGEOUT_WEIGHT,
149 &BUFFEROUT_WEIGHT
150 };
151 int i;
152 for (i=0; i < ints[0] && i < 8; i++) {
153 if (ints[i+1])
154 *(swap_vars[i]) = ints[i+1];
155 }
156 }
157
158
159 void buff_setup(char *str, int *ints)
160 {
161 int * buff_vars[6] = {
162 &MAX_BUFF_AGE,
163 &BUFF_ADVANCE,
164 &BUFF_DECLINE,
165 &BUFF_INITIAL_AGE,
166 &BUFFEROUT_WEIGHT,
167 &BUFFERMEM_GRACE
168 };
169 int i;
170 for (i=0; i < ints[0] && i < 6; i++) {
171 if (ints[i+1])
172 *(buff_vars[i]) = ints[i+1];
173 }
174 }
175
176
177
178 void rw_swap_page(int rw, unsigned long entry, char * buf)
179 {
180 unsigned long type, offset;
181 struct swap_info_struct * p;
182
183 type = SWP_TYPE(entry);
184 if (type >= nr_swapfiles) {
185 printk("Internal error: bad swap-device\n");
186 return;
187 }
188 p = &swap_info[type];
189 offset = SWP_OFFSET(entry);
190 if (offset >= p->max) {
191 printk("rw_swap_page: weirdness\n");
192 return;
193 }
194 if (p->swap_map && !p->swap_map[offset]) {
195 printk("Hmm.. Trying to use unallocated swap (%08lx)\n", entry);
196 return;
197 }
198 if (!(p->flags & SWP_USED)) {
199 printk("Trying to swap to unused swap-device\n");
200 return;
201 }
202 while (set_bit(offset,p->swap_lockmap))
203 sleep_on(&lock_queue);
204 if (rw == READ)
205 kstat.pswpin++;
206 else
207 kstat.pswpout++;
208 if (p->swap_device) {
209 ll_rw_page(rw,p->swap_device,offset,buf);
210 } else if (p->swap_file) {
211 struct inode *swapf = p->swap_file;
212 unsigned int zones[PAGE_SIZE/512];
213 int i;
214 if (swapf->i_op->bmap == NULL
215 && swapf->i_op->smap != NULL){
216
217
218
219
220
221
222
223
224
225
226
227
228 int j;
229 unsigned int block = offset << 3;
230
231 for (i=0, j=0; j< PAGE_SIZE ; i++, j += 512){
232 if (!(zones[i] = swapf->i_op->smap(swapf,block++))) {
233 printk("rw_swap_page: bad swap file\n");
234 return;
235 }
236 }
237 }else{
238 int j;
239 unsigned int block = offset
240 << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits);
241
242 for (i=0, j=0; j< PAGE_SIZE ; i++, j +=swapf->i_sb->s_blocksize)
243 if (!(zones[i] = bmap(swapf,block++))) {
244 printk("rw_swap_page: bad swap file\n");
245 return;
246 }
247 }
248 ll_rw_swap_file(rw,swapf->i_dev, zones, i,buf);
249 } else
250 printk("re_swap_page: no swap file or device\n");
251 if (offset && !clear_bit(offset,p->swap_lockmap))
252 printk("rw_swap_page: lock already cleared\n");
253 wake_up(&lock_queue);
254 }
255
256 unsigned long get_swap_page(void)
257 {
258 struct swap_info_struct * p;
259 unsigned long offset, entry;
260 int type, wrapped = 0;
261
262 type = swap_list.next;
263 if (type < 0)
264 return 0;
265
266 while (1) {
267 p = &swap_info[type];
268 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
269 for (offset = p->lowest_bit; offset <= p->highest_bit ; offset++) {
270 if (p->swap_map[offset])
271 continue;
272 if (test_bit(offset, p->swap_lockmap))
273 continue;
274 p->swap_map[offset] = 1;
275 nr_swap_pages--;
276 if (offset == p->highest_bit)
277 p->highest_bit--;
278 p->lowest_bit = offset;
279 entry = SWP_ENTRY(type,offset);
280
281 type = swap_info[type].next;
282 if (type < 0 || p->prio != swap_info[type].prio) {
283 swap_list.next = swap_list.head;
284 } else {
285 swap_list.next = type;
286 }
287 return entry;
288 }
289 }
290 type = p->next;
291 if (!wrapped) {
292 if (type < 0 || p->prio != swap_info[type].prio) {
293 type = swap_list.head;
294 wrapped = 1;
295 }
296 } else if (type < 0) {
297 return 0;
298 }
299 }
300 }
301
302 void swap_duplicate(unsigned long entry)
303 {
304 struct swap_info_struct * p;
305 unsigned long offset, type;
306
307 if (!entry)
308 return;
309 offset = SWP_OFFSET(entry);
310 type = SWP_TYPE(entry);
311 if (type & SHM_SWP_TYPE)
312 return;
313 if (type >= nr_swapfiles) {
314 printk("Trying to duplicate nonexistent swap-page\n");
315 return;
316 }
317 p = type + swap_info;
318 if (offset >= p->max) {
319 printk("swap_duplicate: weirdness\n");
320 return;
321 }
322 if (!p->swap_map[offset]) {
323 printk("swap_duplicate: trying to duplicate unused page\n");
324 return;
325 }
326 p->swap_map[offset]++;
327 return;
328 }
329
330 void swap_free(unsigned long entry)
331 {
332 struct swap_info_struct * p;
333 unsigned long offset, type;
334
335 if (!entry)
336 return;
337 type = SWP_TYPE(entry);
338 if (type & SHM_SWP_TYPE)
339 return;
340 if (type >= nr_swapfiles) {
341 printk("Trying to free nonexistent swap-page\n");
342 return;
343 }
344 p = & swap_info[type];
345 offset = SWP_OFFSET(entry);
346 if (offset >= p->max) {
347 printk("swap_free: weirdness\n");
348 return;
349 }
350 if (!(p->flags & SWP_USED)) {
351 printk("Trying to free swap from unused swap-device\n");
352 return;
353 }
354 if (offset < p->lowest_bit)
355 p->lowest_bit = offset;
356 if (offset > p->highest_bit)
357 p->highest_bit = offset;
358 if (!p->swap_map[offset])
359 printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
360 else
361 if (!--p->swap_map[offset])
362 nr_swap_pages++;
363 if (p->prio > swap_info[swap_list.next].prio) {
364 swap_list.next = swap_list.head;
365 }
366 }
367
368
369
370
371
372
373
374
375 void swap_in(struct task_struct * tsk, struct vm_area_struct * vma,
376 pte_t * page_table, unsigned long entry, int write_access)
377 {
378 unsigned long page = __get_free_page(GFP_KERNEL);
379
380 if (pte_val(*page_table) != entry) {
381 free_page(page);
382 return;
383 }
384 if (!page) {
385 set_pte(page_table, BAD_PAGE);
386 swap_free(entry);
387 oom(tsk);
388 return;
389 }
390 read_swap_page(entry, (char *) page);
391 if (pte_val(*page_table) != entry) {
392 free_page(page);
393 return;
394 }
395 vma->vm_mm->rss++;
396 tsk->maj_flt++;
397 if (!write_access && add_to_swap_cache(page, entry)) {
398 set_pte(page_table, mk_pte(page, vma->vm_page_prot));
399 return;
400 }
401 set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
402 swap_free(entry);
403 return;
404 }
405
406
407
408
409
410
411
412
413
414
415
416
417 static inline int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
418 unsigned long address, pte_t * page_table, unsigned long limit)
419 {
420 pte_t pte;
421 unsigned long entry;
422 unsigned long page;
423
424 pte = *page_table;
425 if (!pte_present(pte))
426 return 0;
427 page = pte_page(pte);
428 if (page >= high_memory)
429 return 0;
430 if (page >= limit)
431 return 0;
432
433 if (mem_map[MAP_NR(page)].reserved)
434 return 0;
435
436
437
438 if ((pte_dirty(pte) && delete_from_swap_cache(page))
439 || pte_young(pte)) {
440 set_pte(page_table, pte_mkold(pte));
441 touch_page(page);
442 return 0;
443 }
444 age_page(page);
445 if (age_of(page))
446 return 0;
447 if (pte_dirty(pte)) {
448 if (vma->vm_ops && vma->vm_ops->swapout) {
449 pid_t pid = tsk->pid;
450 vma->vm_mm->rss--;
451 if (vma->vm_ops->swapout(vma, address - vma->vm_start + vma->vm_offset, page_table))
452 kill_proc(pid, SIGBUS, 1);
453 } else {
454 if (mem_map[MAP_NR(page)].count != 1)
455 return 0;
456 if (!(entry = get_swap_page()))
457 return 0;
458 vma->vm_mm->rss--;
459 set_pte(page_table, __pte(entry));
460 invalidate();
461 tsk->nswap++;
462 write_swap_page(entry, (char *) page);
463 }
464 free_page(page);
465 return 1;
466 }
467 if ((entry = find_in_swap_cache(page))) {
468 if (mem_map[MAP_NR(page)].count != 1) {
469 set_pte(page_table, pte_mkdirty(pte));
470 printk("Aiee.. duplicated cached swap-cache entry\n");
471 return 0;
472 }
473 vma->vm_mm->rss--;
474 set_pte(page_table, __pte(entry));
475 invalidate();
476 free_page(page);
477 return 1;
478 }
479 vma->vm_mm->rss--;
480 pte_clear(page_table);
481 invalidate();
482 entry = mem_map[MAP_NR(page)].count;
483 free_page(page);
484 return entry;
485 }
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501 static inline int swap_out_pmd(struct task_struct * tsk, struct vm_area_struct * vma,
502 pmd_t *dir, unsigned long address, unsigned long end, unsigned long limit)
503 {
504 pte_t * pte;
505 unsigned long pmd_end;
506
507 if (pmd_none(*dir))
508 return 0;
509 if (pmd_bad(*dir)) {
510 printk("swap_out_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
511 pmd_clear(dir);
512 return 0;
513 }
514
515 pte = pte_offset(dir, address);
516
517 pmd_end = (address + PMD_SIZE) & PMD_MASK;
518 if (end > pmd_end)
519 end = pmd_end;
520
521 do {
522 int result;
523 tsk->swap_address = address + PAGE_SIZE;
524 result = try_to_swap_out(tsk, vma, address, pte, limit);
525 if (result)
526 return result;
527 address += PAGE_SIZE;
528 pte++;
529 } while (address < end);
530 return 0;
531 }
532
533 static inline int swap_out_pgd(struct task_struct * tsk, struct vm_area_struct * vma,
534 pgd_t *dir, unsigned long address, unsigned long end, unsigned long limit)
535 {
536 pmd_t * pmd;
537 unsigned long pgd_end;
538
539 if (pgd_none(*dir))
540 return 0;
541 if (pgd_bad(*dir)) {
542 printk("swap_out_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
543 pgd_clear(dir);
544 return 0;
545 }
546
547 pmd = pmd_offset(dir, address);
548
549 pgd_end = (address + PGDIR_SIZE) & PGDIR_MASK;
550 if (end > pgd_end)
551 end = pgd_end;
552
553 do {
554 int result = swap_out_pmd(tsk, vma, pmd, address, end, limit);
555 if (result)
556 return result;
557 address = (address + PMD_SIZE) & PMD_MASK;
558 pmd++;
559 } while (address < end);
560 return 0;
561 }
562
563 static int swap_out_vma(struct task_struct * tsk, struct vm_area_struct * vma,
564 pgd_t *pgdir, unsigned long start, unsigned long limit)
565 {
566 unsigned long end;
567
568
569
570 if (vma->vm_flags & VM_SHM)
571 return 0;
572
573
574
575 if (vma->vm_flags & VM_DONTSWAP)
576 return 0;
577
578 end = vma->vm_end;
579 while (start < end) {
580 int result = swap_out_pgd(tsk, vma, pgdir, start, end, limit);
581 if (result)
582 return result;
583 start = (start + PGDIR_SIZE) & PGDIR_MASK;
584 pgdir++;
585 }
586 return 0;
587 }
588
589 static int swap_out_process(struct task_struct * p, unsigned long limit)
590 {
591 unsigned long address;
592 struct vm_area_struct* vma;
593
594
595
596
597 address = p->swap_address;
598 p->swap_address = 0;
599
600
601
602
603 vma = find_vma(p, address);
604 if (!vma)
605 return 0;
606 if (address < vma->vm_start)
607 address = vma->vm_start;
608
609 for (;;) {
610 int result = swap_out_vma(p, vma, pgd_offset(p->mm, address), address, limit);
611 if (result)
612 return result;
613 vma = vma->vm_next;
614 if (!vma)
615 break;
616 address = vma->vm_start;
617 }
618 p->swap_address = 0;
619 return 0;
620 }
621
622 static int swap_out(unsigned int priority, unsigned long limit)
623 {
624 static int swap_task;
625 int loop, counter;
626 struct task_struct *p;
627
628 counter = ((PAGEOUT_WEIGHT * nr_tasks) >> 10) >> priority;
629 for(; counter >= 0; counter--) {
630
631
632
633
634 loop = 0;
635 while(1) {
636 if (swap_task >= NR_TASKS) {
637 swap_task = 1;
638 if (loop)
639
640 return 0;
641 loop = 1;
642 }
643
644 p = task[swap_task];
645 if (p && p->swappable && p->mm->rss)
646 break;
647
648 swap_task++;
649 }
650
651
652
653
654 if (!p->swap_cnt) {
655
656
657 p->swap_cnt = AGE_CLUSTER_SIZE(p->mm->rss);
658 }
659 if (!--p->swap_cnt)
660 swap_task++;
661 switch (swap_out_process(p, limit)) {
662 case 0:
663 if (p->swap_cnt)
664 swap_task++;
665 break;
666 case 1:
667 return 1;
668 default:
669 break;
670 }
671 }
672 return 0;
673 }
674
675
676
677
678
679
680 static int try_to_free_page(int priority, unsigned long limit)
681 {
682 static int state = 0;
683 int i=6;
684
685 switch (state) {
686 do {
687 case 0:
688 if (priority != GFP_NOBUFFER && shrink_buffers(i, limit))
689 return 1;
690 state = 1;
691 case 1:
692 if (shm_swap(i, limit))
693 return 1;
694 state = 2;
695 default:
696 if (swap_out(i, limit))
697 return 1;
698 state = 0;
699 } while(i--);
700 }
701 return 0;
702 }
703
704 static inline void add_mem_queue(struct mem_list * head, struct mem_list * entry)
705 {
706 entry->prev = head;
707 (entry->next = head->next)->prev = entry;
708 head->next = entry;
709 }
710
711 static inline void remove_mem_queue(struct mem_list * head, struct mem_list * entry)
712 {
713 entry->next->prev = entry->prev;
714 entry->prev->next = entry->next;
715 }
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736 static inline void free_pages_ok(unsigned long addr, unsigned long order)
737 {
738 unsigned long index = MAP_NR(addr) >> (1 + order);
739 unsigned long mask = PAGE_MASK << order;
740
741 addr &= mask;
742 nr_free_pages += 1 << order;
743 while (order < NR_MEM_LISTS-1) {
744 if (!change_bit(index, free_area_map[order]))
745 break;
746 remove_mem_queue(free_area_list+order, (struct mem_list *) (addr ^ (1+~mask)));
747 order++;
748 index >>= 1;
749 mask <<= 1;
750 addr &= mask;
751 }
752 add_mem_queue(free_area_list+order, (struct mem_list *) addr);
753 }
754
755 static inline void check_free_buffers(unsigned long addr)
756 {
757 struct buffer_head * bh;
758
759 bh = buffer_pages[MAP_NR(addr)];
760 if (bh) {
761 struct buffer_head *tmp = bh;
762 do {
763 if (tmp->b_list == BUF_SHARED
764 && tmp->b_dev != B_FREE)
765 refile_buffer(tmp);
766 tmp = tmp->b_this_page;
767 } while (tmp != bh);
768 }
769 }
770
771 void free_pages(unsigned long addr, unsigned long order)
772 {
773 if (MAP_NR(addr) < MAP_NR(high_memory)) {
774 unsigned long flag;
775 mem_map_t * map = mem_map + MAP_NR(addr);
776 if (map->reserved)
777 return;
778 if (map->count) {
779 save_flags(flag);
780 cli();
781 if (!--map->count) {
782 free_pages_ok(addr, order);
783 delete_from_swap_cache(addr);
784 }
785 restore_flags(flag);
786 if (map->count == 1)
787 check_free_buffers(addr);
788 return;
789 }
790 printk("Trying to free free memory (%08lx): memory probably corrupted\n",addr);
791 printk("PC = %p\n", __builtin_return_address(0));
792 return;
793 }
794 }
795
796
797
798
799 #define RMQUEUE(order, limit) \
800 do { struct mem_list * queue = free_area_list+order; \
801 unsigned long new_order = order; \
802 do { struct mem_list *prev = queue, *ret; \
803 while (queue != (ret = prev->next)) { \
804 if ((unsigned long) ret < (limit)) { \
805 (prev->next = ret->next)->prev = prev; \
806 mark_used((unsigned long) ret, new_order); \
807 nr_free_pages -= 1 << order; \
808 restore_flags(flags); \
809 EXPAND(ret, order, new_order); \
810 return (unsigned long) ret; \
811 } \
812 prev = ret; \
813 } \
814 new_order++; queue++; \
815 } while (new_order < NR_MEM_LISTS); \
816 } while (0)
817
818 static inline int mark_used(unsigned long addr, unsigned long order)
819 {
820 return change_bit(MAP_NR(addr) >> (1+order), free_area_map[order]);
821 }
822
823 #define EXPAND(addr,low,high) \
824 do { unsigned long size = PAGE_SIZE << high; \
825 while (high > low) { \
826 high--; size >>= 1; cli(); \
827 add_mem_queue(free_area_list+high, addr); \
828 mark_used((unsigned long) addr, high); \
829 restore_flags(flags); \
830 addr = (struct mem_list *) (size + (unsigned long) addr); \
831 } mem_map[MAP_NR((unsigned long) addr)].count = 1; \
832 mem_map[MAP_NR((unsigned long) addr)].age = PAGE_INITIAL_AGE; \
833 } while (0)
834
835 unsigned long __get_free_pages(int priority, unsigned long order, unsigned long limit)
836 {
837 unsigned long flags;
838 int reserved_pages;
839
840 if (order >= NR_MEM_LISTS)
841 return 0;
842 if (intr_count && priority != GFP_ATOMIC) {
843 static int count = 0;
844 if (++count < 5) {
845 printk("gfp called nonatomically from interrupt %p\n",
846 __builtin_return_address(0));
847 priority = GFP_ATOMIC;
848 }
849 }
850 reserved_pages = 5;
851 if (priority != GFP_NFS)
852 reserved_pages = min_free_pages;
853 save_flags(flags);
854 repeat:
855 cli();
856 if ((priority==GFP_ATOMIC) || nr_free_pages > reserved_pages) {
857 RMQUEUE(order, limit);
858 restore_flags(flags);
859 return 0;
860 }
861 restore_flags(flags);
862 if (priority != GFP_BUFFER && try_to_free_page(priority, limit))
863 goto repeat;
864 return 0;
865 }
866
867
868
869
870
871
872 void show_free_areas(void)
873 {
874 unsigned long order, flags;
875 unsigned long total = 0;
876
877 printk("Free pages: %6dkB\n ( ",nr_free_pages<<(PAGE_SHIFT-10));
878 save_flags(flags);
879 cli();
880 for (order=0 ; order < NR_MEM_LISTS; order++) {
881 struct mem_list * tmp;
882 unsigned long nr = 0;
883 for (tmp = free_area_list[order].next ; tmp != free_area_list + order ; tmp = tmp->next) {
884 nr ++;
885 }
886 total += nr * ((PAGE_SIZE>>10) << order);
887 printk("%lu*%lukB ", nr, (PAGE_SIZE>>10) << order);
888 }
889 restore_flags(flags);
890 printk("= %lukB)\n", total);
891 #ifdef SWAP_CACHE_INFO
892 show_swap_cache_info();
893 #endif
894 }
895
896
897
898
899
900
901
902
903
904
905 static inline int unuse_pte(struct vm_area_struct * vma, unsigned long address,
906 pte_t *dir, unsigned int type, unsigned long page)
907 {
908 pte_t pte = *dir;
909
910 if (pte_none(pte))
911 return 0;
912 if (pte_present(pte)) {
913 unsigned long page = pte_page(pte);
914 if (page >= high_memory)
915 return 0;
916 if (!in_swap_cache(page))
917 return 0;
918 if (SWP_TYPE(in_swap_cache(page)) != type)
919 return 0;
920 delete_from_swap_cache(page);
921 set_pte(dir, pte_mkdirty(pte));
922 return 0;
923 }
924 if (SWP_TYPE(pte_val(pte)) != type)
925 return 0;
926 read_swap_page(pte_val(pte), (char *) page);
927 if (pte_val(*dir) != pte_val(pte)) {
928 free_page(page);
929 return 1;
930 }
931 set_pte(dir, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
932 ++vma->vm_mm->rss;
933 swap_free(pte_val(pte));
934 return 1;
935 }
936
937 static inline int unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
938 unsigned long address, unsigned long size, unsigned long offset,
939 unsigned int type, unsigned long page)
940 {
941 pte_t * pte;
942 unsigned long end;
943
944 if (pmd_none(*dir))
945 return 0;
946 if (pmd_bad(*dir)) {
947 printk("unuse_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
948 pmd_clear(dir);
949 return 0;
950 }
951 pte = pte_offset(dir, address);
952 offset += address & PMD_MASK;
953 address &= ~PMD_MASK;
954 end = address + size;
955 if (end > PMD_SIZE)
956 end = PMD_SIZE;
957 do {
958 if (unuse_pte(vma, offset+address-vma->vm_start, pte, type, page))
959 return 1;
960 address += PAGE_SIZE;
961 pte++;
962 } while (address < end);
963 return 0;
964 }
965
966 static inline int unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
967 unsigned long address, unsigned long size,
968 unsigned int type, unsigned long page)
969 {
970 pmd_t * pmd;
971 unsigned long offset, end;
972
973 if (pgd_none(*dir))
974 return 0;
975 if (pgd_bad(*dir)) {
976 printk("unuse_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
977 pgd_clear(dir);
978 return 0;
979 }
980 pmd = pmd_offset(dir, address);
981 offset = address & PGDIR_MASK;
982 address &= ~PGDIR_MASK;
983 end = address + size;
984 if (end > PGDIR_SIZE)
985 end = PGDIR_SIZE;
986 do {
987 if (unuse_pmd(vma, pmd, address, end - address, offset, type, page))
988 return 1;
989 address = (address + PMD_SIZE) & PMD_MASK;
990 pmd++;
991 } while (address < end);
992 return 0;
993 }
994
995 static int unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
996 unsigned long start, unsigned long end,
997 unsigned int type, unsigned long page)
998 {
999 while (start < end) {
1000 if (unuse_pgd(vma, pgdir, start, end - start, type, page))
1001 return 1;
1002 start = (start + PGDIR_SIZE) & PGDIR_MASK;
1003 pgdir++;
1004 }
1005 return 0;
1006 }
1007
1008 static int unuse_process(struct task_struct * p, unsigned int type, unsigned long page)
1009 {
1010 struct vm_area_struct* vma;
1011
1012
1013
1014
1015 if (!p->mm || pgd_inuse(p->mm->pgd))
1016 return 0;
1017 vma = p->mm->mmap;
1018 while (vma) {
1019 pgd_t * pgd = pgd_offset(p->mm, vma->vm_start);
1020 if (unuse_vma(vma, pgd, vma->vm_start, vma->vm_end, type, page))
1021 return 1;
1022 vma = vma->vm_next;
1023 }
1024 return 0;
1025 }
1026
1027
1028
1029
1030
1031
1032 static int try_to_unuse(unsigned int type)
1033 {
1034 int nr;
1035 unsigned long page = get_free_page(GFP_KERNEL);
1036
1037 if (!page)
1038 return -ENOMEM;
1039 nr = 0;
1040 while (nr < NR_TASKS) {
1041 if (task[nr]) {
1042 if (unuse_process(task[nr], type, page)) {
1043 page = get_free_page(GFP_KERNEL);
1044 if (!page)
1045 return -ENOMEM;
1046 continue;
1047 }
1048 }
1049 nr++;
1050 }
1051 free_page(page);
1052 return 0;
1053 }
1054
1055 asmlinkage int sys_swapoff(const char * specialfile)
1056 {
1057 struct swap_info_struct * p;
1058 struct inode * inode;
1059 struct file filp;
1060 int i, type, prev;
1061
1062 if (!suser())
1063 return -EPERM;
1064 i = namei(specialfile,&inode);
1065 if (i)
1066 return i;
1067 prev = -1;
1068 for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
1069 p = swap_info + type;
1070 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
1071 if (p->swap_file) {
1072 if (p->swap_file == inode)
1073 break;
1074 } else {
1075 if (S_ISBLK(inode->i_mode)
1076 && (p->swap_device == inode->i_rdev))
1077 break;
1078 }
1079 }
1080 prev = type;
1081 }
1082 if (type < 0){
1083 iput(inode);
1084 return -EINVAL;
1085 }
1086 if (prev < 0) {
1087 swap_list.head = p->next;
1088 } else {
1089 swap_info[prev].next = p->next;
1090 }
1091 if (type == swap_list.next) {
1092
1093 swap_list.next = swap_list.head;
1094 }
1095 p->flags = SWP_USED;
1096 i = try_to_unuse(type);
1097 if (i) {
1098 iput(inode);
1099 p->flags = SWP_WRITEOK;
1100 return i;
1101 }
1102
1103 if(p->swap_device){
1104 memset(&filp, 0, sizeof(filp));
1105 filp.f_inode = inode;
1106 filp.f_mode = 3;
1107
1108 if( !blkdev_open(inode, &filp) &&
1109 filp.f_op && filp.f_op->release){
1110 filp.f_op->release(inode,&filp);
1111 filp.f_op->release(inode,&filp);
1112 }
1113 }
1114 iput(inode);
1115
1116 nr_swap_pages -= p->pages;
1117 iput(p->swap_file);
1118 p->swap_file = NULL;
1119 p->swap_device = 0;
1120 vfree(p->swap_map);
1121 p->swap_map = NULL;
1122 free_page((long) p->swap_lockmap);
1123 p->swap_lockmap = NULL;
1124 p->flags = 0;
1125 return 0;
1126 }
1127
1128
1129
1130
1131
1132
1133 asmlinkage int sys_swapon(const char * specialfile, int swap_flags)
1134 {
1135 struct swap_info_struct * p;
1136 struct inode * swap_inode;
1137 unsigned int type;
1138 int i, j, prev;
1139 int error;
1140 struct file filp;
1141 static int least_priority = 0;
1142
1143 memset(&filp, 0, sizeof(filp));
1144 if (!suser())
1145 return -EPERM;
1146 p = swap_info;
1147 for (type = 0 ; type < nr_swapfiles ; type++,p++)
1148 if (!(p->flags & SWP_USED))
1149 break;
1150 if (type >= MAX_SWAPFILES)
1151 return -EPERM;
1152 if (type >= nr_swapfiles)
1153 nr_swapfiles = type+1;
1154 p->flags = SWP_USED;
1155 p->swap_file = NULL;
1156 p->swap_device = 0;
1157 p->swap_map = NULL;
1158 p->swap_lockmap = NULL;
1159 p->lowest_bit = 0;
1160 p->highest_bit = 0;
1161 p->max = 1;
1162 p->next = -1;
1163 if (swap_flags & SWAP_FLAG_PREFER) {
1164 p->prio =
1165 (swap_flags & SWAP_FLAG_PRIO_MASK)>>SWAP_FLAG_PRIO_SHIFT;
1166 } else {
1167 p->prio = --least_priority;
1168 }
1169 error = namei(specialfile,&swap_inode);
1170 if (error)
1171 goto bad_swap_2;
1172 p->swap_file = swap_inode;
1173 error = -EBUSY;
1174 if (swap_inode->i_count != 1)
1175 goto bad_swap_2;
1176 error = -EINVAL;
1177
1178 if (S_ISBLK(swap_inode->i_mode)) {
1179 p->swap_device = swap_inode->i_rdev;
1180
1181 filp.f_inode = swap_inode;
1182 filp.f_mode = 3;
1183 error = blkdev_open(swap_inode, &filp);
1184 p->swap_file = NULL;
1185 iput(swap_inode);
1186 if(error)
1187 goto bad_swap_2;
1188 error = -ENODEV;
1189 if (!p->swap_device)
1190 goto bad_swap;
1191 error = -EBUSY;
1192 for (i = 0 ; i < nr_swapfiles ; i++) {
1193 if (i == type)
1194 continue;
1195 if (p->swap_device == swap_info[i].swap_device)
1196 goto bad_swap;
1197 }
1198 } else if (!S_ISREG(swap_inode->i_mode))
1199 goto bad_swap;
1200 p->swap_lockmap = (unsigned char *) get_free_page(GFP_USER);
1201 if (!p->swap_lockmap) {
1202 printk("Unable to start swapping: out of memory :-)\n");
1203 error = -ENOMEM;
1204 goto bad_swap;
1205 }
1206 read_swap_page(SWP_ENTRY(type,0), (char *) p->swap_lockmap);
1207 if (memcmp("SWAP-SPACE",p->swap_lockmap+PAGE_SIZE-10,10)) {
1208 printk("Unable to find swap-space signature\n");
1209 error = -EINVAL;
1210 goto bad_swap;
1211 }
1212 memset(p->swap_lockmap+PAGE_SIZE-10,0,10);
1213 j = 0;
1214 p->lowest_bit = 0;
1215 p->highest_bit = 0;
1216 for (i = 1 ; i < 8*PAGE_SIZE ; i++) {
1217 if (test_bit(i,p->swap_lockmap)) {
1218 if (!p->lowest_bit)
1219 p->lowest_bit = i;
1220 p->highest_bit = i;
1221 p->max = i+1;
1222 j++;
1223 }
1224 }
1225 if (!j) {
1226 printk("Empty swap-file\n");
1227 error = -EINVAL;
1228 goto bad_swap;
1229 }
1230 p->swap_map = (unsigned char *) vmalloc(p->max);
1231 if (!p->swap_map) {
1232 error = -ENOMEM;
1233 goto bad_swap;
1234 }
1235 for (i = 1 ; i < p->max ; i++) {
1236 if (test_bit(i,p->swap_lockmap))
1237 p->swap_map[i] = 0;
1238 else
1239 p->swap_map[i] = 0x80;
1240 }
1241 p->swap_map[0] = 0x80;
1242 memset(p->swap_lockmap,0,PAGE_SIZE);
1243 p->flags = SWP_WRITEOK;
1244 p->pages = j;
1245 nr_swap_pages += j;
1246 printk("Adding Swap: %dk swap-space\n",j<<(PAGE_SHIFT-10));
1247
1248
1249 prev = -1;
1250 for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
1251 if (p->prio >= swap_info[i].prio) {
1252 break;
1253 }
1254 prev = i;
1255 }
1256 p->next = i;
1257 if (prev < 0) {
1258 swap_list.head = swap_list.next = p - swap_info;
1259 } else {
1260 swap_info[prev].next = p - swap_info;
1261 }
1262 return 0;
1263 bad_swap:
1264 if(filp.f_op && filp.f_op->release)
1265 filp.f_op->release(filp.f_inode,&filp);
1266 bad_swap_2:
1267 free_page((long) p->swap_lockmap);
1268 vfree(p->swap_map);
1269 iput(p->swap_file);
1270 p->swap_device = 0;
1271 p->swap_file = NULL;
1272 p->swap_map = NULL;
1273 p->swap_lockmap = NULL;
1274 p->flags = 0;
1275 return error;
1276 }
1277
1278 void si_swapinfo(struct sysinfo *val)
1279 {
1280 unsigned int i, j;
1281
1282 val->freeswap = val->totalswap = 0;
1283 for (i = 0; i < nr_swapfiles; i++) {
1284 if ((swap_info[i].flags & SWP_WRITEOK) != SWP_WRITEOK)
1285 continue;
1286 for (j = 0; j < swap_info[i].max; ++j)
1287 switch (swap_info[i].swap_map[j]) {
1288 case 128:
1289 continue;
1290 case 0:
1291 ++val->freeswap;
1292 default:
1293 ++val->totalswap;
1294 }
1295 }
1296 val->freeswap <<= PAGE_SHIFT;
1297 val->totalswap <<= PAGE_SHIFT;
1298 return;
1299 }
1300
1301 #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
1302
1303
1304
1305
1306
1307
1308
1309 unsigned long free_area_init(unsigned long start_mem, unsigned long end_mem)
1310 {
1311 mem_map_t * p;
1312 unsigned long mask = PAGE_MASK;
1313 int i;
1314
1315
1316
1317
1318
1319 i = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT+6);
1320 if (i < 16)
1321 i = 16;
1322 min_free_pages = i;
1323 start_mem = init_swap_cache(start_mem, end_mem);
1324 mem_map = (mem_map_t *) start_mem;
1325 p = mem_map + MAP_NR(end_mem);
1326 start_mem = LONG_ALIGN((unsigned long) p);
1327 while (p > mem_map) {
1328 --p;
1329 p->count = 0;
1330 p->dirty = 0;
1331 p->reserved = 1;
1332 }
1333
1334 for (i = 0 ; i < NR_MEM_LISTS ; i++) {
1335 unsigned long bitmap_size;
1336 free_area_list[i].prev = free_area_list[i].next = &free_area_list[i];
1337 mask += mask;
1338 end_mem = (end_mem + ~mask) & mask;
1339 bitmap_size = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT + i);
1340 bitmap_size = (bitmap_size + 7) >> 3;
1341 bitmap_size = LONG_ALIGN(bitmap_size);
1342 free_area_map[i] = (unsigned char *) start_mem;
1343 memset((void *) start_mem, 0, bitmap_size);
1344 start_mem += bitmap_size;
1345 }
1346 return start_mem;
1347 }