This source file includes following definitions.
- show_swap_cache_info
- add_to_swap_cache
- init_swap_cache
- swap_setup
- buff_setup
- rw_swap_page
- get_swap_page
- swap_duplicate
- swap_free
- swap_in
- try_to_swap_out
- swap_out_pmd
- swap_out_pgd
- swap_out_vma
- swap_out_process
- swap_out
- try_to_free_page
- add_mem_queue
- remove_mem_queue
- free_pages_ok
- check_free_buffers
- free_pages
- mark_used
- __get_free_pages
- show_free_areas
- unuse_pte
- unuse_pmd
- unuse_pgd
- unuse_vma
- unuse_process
- try_to_unuse
- sys_swapoff
- sys_swapon
- si_swapinfo
- free_area_init
1
2
3
4
5
6
7
8
9
10
11
12
13
14 #include <linux/mm.h>
15 #include <linux/sched.h>
16 #include <linux/head.h>
17 #include <linux/kernel.h>
18 #include <linux/kernel_stat.h>
19 #include <linux/errno.h>
20 #include <linux/string.h>
21 #include <linux/stat.h>
22 #include <linux/swap.h>
23 #include <linux/fs.h>
24 #include <linux/swapctl.h>
25 #include <linux/pagemap.h>
26
27 #include <asm/dma.h>
28 #include <asm/system.h>
29 #include <asm/segment.h>
30 #include <asm/bitops.h>
31 #include <asm/pgtable.h>
32
33 #define MAX_SWAPFILES 8
34
35 #define SWP_USED 1
36 #define SWP_WRITEOK 3
37
38 int min_free_pages = 20;
39
40
41
42
43
44
45
46 swap_control_t swap_control = {
47 20, 3, 1, 3,
48 10, 2, 2, 4,
49 32, 4,
50 8192, 8192,
51 -200,
52 1, 1,
53 RCL_ROUND_ROBIN
54 };
55
56 static int nr_swapfiles = 0;
57 static struct wait_queue * lock_queue = NULL;
58 static struct {
59 int head;
60 int next;
61 } swap_list = {-1, -1};
62
63 static struct swap_info_struct {
64 unsigned int flags;
65 kdev_t swap_device;
66 struct inode * swap_file;
67 unsigned char * swap_map;
68 unsigned char * swap_lockmap;
69 int lowest_bit;
70 int highest_bit;
71 int prio;
72 int pages;
73 unsigned long max;
74 int next;
75 } swap_info[MAX_SWAPFILES];
76
77 extern int shm_swap (int, unsigned long);
78
79
80
81
82
83
84
85
86 unsigned long *swap_cache;
87
88 #ifdef SWAP_CACHE_INFO
89 unsigned long swap_cache_add_total = 0;
90 unsigned long swap_cache_add_success = 0;
91 unsigned long swap_cache_del_total = 0;
92 unsigned long swap_cache_del_success = 0;
93 unsigned long swap_cache_find_total = 0;
94 unsigned long swap_cache_find_success = 0;
95
96 extern inline void show_swap_cache_info(void)
97 {
98 printk("Swap cache: add %ld/%ld, delete %ld/%ld, find %ld/%ld\n",
99 swap_cache_add_total, swap_cache_add_success,
100 swap_cache_del_total, swap_cache_del_success,
101 swap_cache_find_total, swap_cache_find_success);
102 }
103 #endif
104
105 static int add_to_swap_cache(unsigned long addr, unsigned long entry)
106 {
107 struct swap_info_struct * p = &swap_info[SWP_TYPE(entry)];
108
109 #ifdef SWAP_CACHE_INFO
110 swap_cache_add_total++;
111 #endif
112 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
113 entry = xchg(swap_cache + MAP_NR(addr), entry);
114 if (entry) {
115 printk("swap_cache: replacing non-NULL entry\n");
116 }
117 #ifdef SWAP_CACHE_INFO
118 swap_cache_add_success++;
119 #endif
120 return 1;
121 }
122 return 0;
123 }
124
125 static unsigned long init_swap_cache(unsigned long mem_start,
126 unsigned long mem_end)
127 {
128 unsigned long swap_cache_size;
129
130 mem_start = (mem_start + 15) & ~15;
131 swap_cache = (unsigned long *) mem_start;
132 swap_cache_size = MAP_NR(mem_end);
133 memset(swap_cache, 0, swap_cache_size * sizeof (unsigned long));
134 return (unsigned long) (swap_cache + swap_cache_size);
135 }
136
137
138
139
140 void swap_setup(char *str, int *ints)
141 {
142 int * swap_vars[8] = {
143 &MAX_PAGE_AGE,
144 &PAGE_ADVANCE,
145 &PAGE_DECLINE,
146 &PAGE_INITIAL_AGE,
147 &AGE_CLUSTER_FRACT,
148 &AGE_CLUSTER_MIN,
149 &PAGEOUT_WEIGHT,
150 &BUFFEROUT_WEIGHT
151 };
152 int i;
153 for (i=0; i < ints[0] && i < 8; i++) {
154 if (ints[i+1])
155 *(swap_vars[i]) = ints[i+1];
156 }
157 }
158
159
160 void buff_setup(char *str, int *ints)
161 {
162 int * buff_vars[6] = {
163 &MAX_BUFF_AGE,
164 &BUFF_ADVANCE,
165 &BUFF_DECLINE,
166 &BUFF_INITIAL_AGE,
167 &BUFFEROUT_WEIGHT,
168 &BUFFERMEM_GRACE
169 };
170 int i;
171 for (i=0; i < ints[0] && i < 6; i++) {
172 if (ints[i+1])
173 *(buff_vars[i]) = ints[i+1];
174 }
175 }
176
177
178
179 void rw_swap_page(int rw, unsigned long entry, char * buf)
180 {
181 unsigned long type, offset;
182 struct swap_info_struct * p;
183
184 type = SWP_TYPE(entry);
185 if (type >= nr_swapfiles) {
186 printk("Internal error: bad swap-device\n");
187 return;
188 }
189 p = &swap_info[type];
190 offset = SWP_OFFSET(entry);
191 if (offset >= p->max) {
192 printk("rw_swap_page: weirdness\n");
193 return;
194 }
195 if (p->swap_map && !p->swap_map[offset]) {
196 printk("Hmm.. Trying to use unallocated swap (%08lx)\n", entry);
197 return;
198 }
199 if (!(p->flags & SWP_USED)) {
200 printk("Trying to swap to unused swap-device\n");
201 return;
202 }
203 while (set_bit(offset,p->swap_lockmap))
204 sleep_on(&lock_queue);
205 if (rw == READ)
206 kstat.pswpin++;
207 else
208 kstat.pswpout++;
209 if (p->swap_device) {
210 ll_rw_page(rw,p->swap_device,offset,buf);
211 } else if (p->swap_file) {
212 struct inode *swapf = p->swap_file;
213 unsigned int zones[PAGE_SIZE/512];
214 int i;
215 if (swapf->i_op->bmap == NULL
216 && swapf->i_op->smap != NULL){
217
218
219
220
221
222
223
224
225
226
227
228
229 int j;
230 unsigned int block = offset << 3;
231
232 for (i=0, j=0; j< PAGE_SIZE ; i++, j += 512){
233 if (!(zones[i] = swapf->i_op->smap(swapf,block++))) {
234 printk("rw_swap_page: bad swap file\n");
235 return;
236 }
237 }
238 }else{
239 int j;
240 unsigned int block = offset
241 << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits);
242
243 for (i=0, j=0; j< PAGE_SIZE ; i++, j +=swapf->i_sb->s_blocksize)
244 if (!(zones[i] = bmap(swapf,block++))) {
245 printk("rw_swap_page: bad swap file\n");
246 return;
247 }
248 }
249 ll_rw_swap_file(rw,swapf->i_dev, zones, i,buf);
250 } else
251 printk("re_swap_page: no swap file or device\n");
252 if (offset && !clear_bit(offset,p->swap_lockmap))
253 printk("rw_swap_page: lock already cleared\n");
254 wake_up(&lock_queue);
255 }
256
257 unsigned long get_swap_page(void)
258 {
259 struct swap_info_struct * p;
260 unsigned long offset, entry;
261 int type, wrapped = 0;
262
263 type = swap_list.next;
264 if (type < 0)
265 return 0;
266
267 while (1) {
268 p = &swap_info[type];
269 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
270 for (offset = p->lowest_bit; offset <= p->highest_bit ; offset++) {
271 if (p->swap_map[offset])
272 continue;
273 if (test_bit(offset, p->swap_lockmap))
274 continue;
275 p->swap_map[offset] = 1;
276 nr_swap_pages--;
277 if (offset == p->highest_bit)
278 p->highest_bit--;
279 p->lowest_bit = offset;
280 entry = SWP_ENTRY(type,offset);
281
282 type = swap_info[type].next;
283 if (type < 0 || p->prio != swap_info[type].prio) {
284 swap_list.next = swap_list.head;
285 } else {
286 swap_list.next = type;
287 }
288 return entry;
289 }
290 }
291 type = p->next;
292 if (!wrapped) {
293 if (type < 0 || p->prio != swap_info[type].prio) {
294 type = swap_list.head;
295 wrapped = 1;
296 }
297 } else if (type < 0) {
298 return 0;
299 }
300 }
301 }
302
303 void swap_duplicate(unsigned long entry)
304 {
305 struct swap_info_struct * p;
306 unsigned long offset, type;
307
308 if (!entry)
309 return;
310 offset = SWP_OFFSET(entry);
311 type = SWP_TYPE(entry);
312 if (type & SHM_SWP_TYPE)
313 return;
314 if (type >= nr_swapfiles) {
315 printk("Trying to duplicate nonexistent swap-page\n");
316 return;
317 }
318 p = type + swap_info;
319 if (offset >= p->max) {
320 printk("swap_duplicate: weirdness\n");
321 return;
322 }
323 if (!p->swap_map[offset]) {
324 printk("swap_duplicate: trying to duplicate unused page\n");
325 return;
326 }
327 p->swap_map[offset]++;
328 return;
329 }
330
331 void swap_free(unsigned long entry)
332 {
333 struct swap_info_struct * p;
334 unsigned long offset, type;
335
336 if (!entry)
337 return;
338 type = SWP_TYPE(entry);
339 if (type & SHM_SWP_TYPE)
340 return;
341 if (type >= nr_swapfiles) {
342 printk("Trying to free nonexistent swap-page\n");
343 return;
344 }
345 p = & swap_info[type];
346 offset = SWP_OFFSET(entry);
347 if (offset >= p->max) {
348 printk("swap_free: weirdness\n");
349 return;
350 }
351 if (!(p->flags & SWP_USED)) {
352 printk("Trying to free swap from unused swap-device\n");
353 return;
354 }
355 if (offset < p->lowest_bit)
356 p->lowest_bit = offset;
357 if (offset > p->highest_bit)
358 p->highest_bit = offset;
359 if (!p->swap_map[offset])
360 printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
361 else
362 if (!--p->swap_map[offset])
363 nr_swap_pages++;
364 if (p->prio > swap_info[swap_list.next].prio) {
365 swap_list.next = swap_list.head;
366 }
367 }
368
369
370
371
372
373
374
375
376 void swap_in(struct task_struct * tsk, struct vm_area_struct * vma,
377 pte_t * page_table, unsigned long entry, int write_access)
378 {
379 unsigned long page = __get_free_page(GFP_KERNEL);
380
381 if (pte_val(*page_table) != entry) {
382 free_page(page);
383 return;
384 }
385 if (!page) {
386 set_pte(page_table, BAD_PAGE);
387 swap_free(entry);
388 oom(tsk);
389 return;
390 }
391 read_swap_page(entry, (char *) page);
392 if (pte_val(*page_table) != entry) {
393 free_page(page);
394 return;
395 }
396 vma->vm_mm->rss++;
397 tsk->maj_flt++;
398 if (!write_access && add_to_swap_cache(page, entry)) {
399 set_pte(page_table, mk_pte(page, vma->vm_page_prot));
400 return;
401 }
402 set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
403 swap_free(entry);
404 return;
405 }
406
407
408
409
410
411
412
413
414
415
416
417
418 static inline int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
419 unsigned long address, pte_t * page_table, unsigned long limit)
420 {
421 pte_t pte;
422 unsigned long entry;
423 unsigned long page;
424 struct page * page_map;
425
426 pte = *page_table;
427 if (!pte_present(pte))
428 return 0;
429 page = pte_page(pte);
430 if (MAP_NR(page) >= MAP_NR(high_memory))
431 return 0;
432 if (page >= limit)
433 return 0;
434
435 page_map = mem_map + MAP_NR(page);
436 if (page_map->reserved)
437 return 0;
438
439
440
441 if ((pte_dirty(pte) && delete_from_swap_cache(page))
442 || pte_young(pte)) {
443 set_pte(page_table, pte_mkold(pte));
444 page_age_update(page_map, 1);
445 return 0;
446 }
447 if (page_age_update(page_map, pte_young(pte)))
448 return 0;
449 if (pte_dirty(pte)) {
450 if (vma->vm_ops && vma->vm_ops->swapout) {
451 pid_t pid = tsk->pid;
452 vma->vm_mm->rss--;
453 if (vma->vm_ops->swapout(vma, address - vma->vm_start + vma->vm_offset, page_table))
454 kill_proc(pid, SIGBUS, 1);
455 } else {
456 if (page_map->count != 1)
457 return 0;
458 if (!(entry = get_swap_page()))
459 return 0;
460 vma->vm_mm->rss--;
461 set_pte(page_table, __pte(entry));
462 invalidate_page(vma, address);
463 tsk->nswap++;
464 write_swap_page(entry, (char *) page);
465 }
466 free_page(page);
467 return 1;
468 }
469 if ((entry = find_in_swap_cache(page))) {
470 if (page_map->count != 1) {
471 set_pte(page_table, pte_mkdirty(pte));
472 printk("Aiee.. duplicated cached swap-cache entry\n");
473 return 0;
474 }
475 vma->vm_mm->rss--;
476 set_pte(page_table, __pte(entry));
477 invalidate_page(vma, address);
478 free_page(page);
479 return 1;
480 }
481 vma->vm_mm->rss--;
482 pte_clear(page_table);
483 invalidate_page(vma, address);
484 entry = page_unuse(page);
485 free_page(page);
486 return entry;
487 }
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503 static inline int swap_out_pmd(struct task_struct * tsk, struct vm_area_struct * vma,
504 pmd_t *dir, unsigned long address, unsigned long end, unsigned long limit)
505 {
506 pte_t * pte;
507 unsigned long pmd_end;
508
509 if (pmd_none(*dir))
510 return 0;
511 if (pmd_bad(*dir)) {
512 printk("swap_out_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
513 pmd_clear(dir);
514 return 0;
515 }
516
517 pte = pte_offset(dir, address);
518
519 pmd_end = (address + PMD_SIZE) & PMD_MASK;
520 if (end > pmd_end)
521 end = pmd_end;
522
523 do {
524 int result;
525 tsk->swap_address = address + PAGE_SIZE;
526 result = try_to_swap_out(tsk, vma, address, pte, limit);
527 if (result)
528 return result;
529 address += PAGE_SIZE;
530 pte++;
531 } while (address < end);
532 return 0;
533 }
534
535 static inline int swap_out_pgd(struct task_struct * tsk, struct vm_area_struct * vma,
536 pgd_t *dir, unsigned long address, unsigned long end, unsigned long limit)
537 {
538 pmd_t * pmd;
539 unsigned long pgd_end;
540
541 if (pgd_none(*dir))
542 return 0;
543 if (pgd_bad(*dir)) {
544 printk("swap_out_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
545 pgd_clear(dir);
546 return 0;
547 }
548
549 pmd = pmd_offset(dir, address);
550
551 pgd_end = (address + PGDIR_SIZE) & PGDIR_MASK;
552 if (end > pgd_end)
553 end = pgd_end;
554
555 do {
556 int result = swap_out_pmd(tsk, vma, pmd, address, end, limit);
557 if (result)
558 return result;
559 address = (address + PMD_SIZE) & PMD_MASK;
560 pmd++;
561 } while (address < end);
562 return 0;
563 }
564
565 static int swap_out_vma(struct task_struct * tsk, struct vm_area_struct * vma,
566 pgd_t *pgdir, unsigned long start, unsigned long limit)
567 {
568 unsigned long end;
569
570
571
572 if (vma->vm_flags & (VM_SHM | VM_LOCKED))
573 return 0;
574
575 end = vma->vm_end;
576 while (start < end) {
577 int result = swap_out_pgd(tsk, vma, pgdir, start, end, limit);
578 if (result)
579 return result;
580 start = (start + PGDIR_SIZE) & PGDIR_MASK;
581 pgdir++;
582 }
583 return 0;
584 }
585
586 static int swap_out_process(struct task_struct * p, unsigned long limit)
587 {
588 unsigned long address;
589 struct vm_area_struct* vma;
590
591
592
593
594 address = p->swap_address;
595 p->swap_address = 0;
596
597
598
599
600 vma = find_vma(p, address);
601 if (!vma)
602 return 0;
603 if (address < vma->vm_start)
604 address = vma->vm_start;
605
606 for (;;) {
607 int result = swap_out_vma(p, vma, pgd_offset(p->mm, address), address, limit);
608 if (result)
609 return result;
610 vma = vma->vm_next;
611 if (!vma)
612 break;
613 address = vma->vm_start;
614 }
615 p->swap_address = 0;
616 return 0;
617 }
618
619 static int swap_out(unsigned int priority, unsigned long limit)
620 {
621 static int swap_task;
622 int loop, counter;
623 struct task_struct *p;
624
625 counter = ((PAGEOUT_WEIGHT * nr_tasks) >> 10) >> priority;
626 for(; counter >= 0; counter--) {
627
628
629
630
631 loop = 0;
632 while(1) {
633 if (swap_task >= NR_TASKS) {
634 swap_task = 1;
635 if (loop)
636
637 return 0;
638 loop = 1;
639 }
640
641 p = task[swap_task];
642 if (p && p->swappable && p->mm->rss)
643 break;
644
645 swap_task++;
646 }
647
648
649
650
651 if (!p->swap_cnt) {
652
653
654 p->swap_cnt = AGE_CLUSTER_SIZE(p->mm->rss);
655 }
656 if (!--p->swap_cnt)
657 swap_task++;
658 switch (swap_out_process(p, limit)) {
659 case 0:
660 if (p->swap_cnt)
661 swap_task++;
662 break;
663 case 1:
664 return 1;
665 default:
666 break;
667 }
668 }
669 return 0;
670 }
671
672
673
674
675
676
677 static int try_to_free_page(int priority, unsigned long limit)
678 {
679 static int state = 0;
680 int i=6;
681
682 switch (state) {
683 do {
684 case 0:
685 if (priority != GFP_NOBUFFER && shrink_buffers(i, limit))
686 return 1;
687 state = 1;
688 case 1:
689 if (shm_swap(i, limit))
690 return 1;
691 state = 2;
692 case 2:
693 if (shrink_mmap(i, limit))
694 return 1;
695 state = 3;
696 default:
697 if (swap_out(i, limit))
698 return 1;
699 state = 0;
700 } while(i--);
701 }
702 return 0;
703 }
704
705 static inline void add_mem_queue(struct mem_list * head, struct mem_list * entry)
706 {
707 entry->prev = head;
708 (entry->next = head->next)->prev = entry;
709 head->next = entry;
710 }
711
712 static inline void remove_mem_queue(struct mem_list * head, struct mem_list * entry)
713 {
714 struct mem_list * next = entry->next;
715 (next->prev = entry->prev)->next = next;
716 }
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737 static inline void free_pages_ok(unsigned long addr, unsigned long order)
738 {
739 unsigned long index = MAP_NR(addr) >> (1 + order);
740 unsigned long mask = PAGE_MASK << order;
741
742 addr &= mask;
743 nr_free_pages += 1 << order;
744 while (order < NR_MEM_LISTS-1) {
745 if (!change_bit(index, free_area_map[order]))
746 break;
747 remove_mem_queue(free_area_list+order, (struct mem_list *) (addr ^ (1+~mask)));
748 order++;
749 index >>= 1;
750 mask <<= 1;
751 addr &= mask;
752 }
753 add_mem_queue(free_area_list+order, (struct mem_list *) addr);
754 }
755
756 static inline void check_free_buffers(unsigned long addr)
757 {
758 struct buffer_head * bh;
759
760 bh = buffer_pages[MAP_NR(addr)];
761 if (bh) {
762 struct buffer_head *tmp = bh;
763 do {
764 if (tmp->b_list == BUF_SHARED
765 && tmp->b_dev != B_FREE)
766 refile_buffer(tmp);
767 tmp = tmp->b_this_page;
768 } while (tmp != bh);
769 }
770 }
771
772 void free_pages(unsigned long addr, unsigned long order)
773 {
774 if (MAP_NR(addr) < MAP_NR(high_memory)) {
775 unsigned long flag;
776 mem_map_t * map = mem_map + MAP_NR(addr);
777 if (map->reserved)
778 return;
779 if (map->count) {
780 save_flags(flag);
781 cli();
782 if (!--map->count) {
783 free_pages_ok(addr, order);
784 delete_from_swap_cache(addr);
785 }
786 restore_flags(flag);
787 if (map->count == 1)
788 check_free_buffers(addr);
789 return;
790 }
791 printk("Trying to free free memory (%08lx): memory probably corrupted\n",addr);
792 printk("PC = %p\n", __builtin_return_address(0));
793 return;
794 }
795 }
796
797
798
799
800 #define RMQUEUE(order, limit) \
801 do { struct mem_list * queue = free_area_list+order; \
802 unsigned long new_order = order; \
803 do { struct mem_list *prev = queue, *ret; \
804 while (queue != (ret = prev->next)) { \
805 if ((unsigned long) ret < (limit)) { \
806 (prev->next = ret->next)->prev = prev; \
807 mark_used((unsigned long) ret, new_order); \
808 nr_free_pages -= 1 << order; \
809 restore_flags(flags); \
810 EXPAND(ret, order, new_order); \
811 return (unsigned long) ret; \
812 } \
813 prev = ret; \
814 } \
815 new_order++; queue++; \
816 } while (new_order < NR_MEM_LISTS); \
817 } while (0)
818
819 static inline int mark_used(unsigned long addr, unsigned long order)
820 {
821 return change_bit(MAP_NR(addr) >> (1+order), free_area_map[order]);
822 }
823
824 #define EXPAND(addr,low,high) \
825 do { unsigned long size = PAGE_SIZE << high; \
826 while (high > low) { \
827 high--; size >>= 1; cli(); \
828 add_mem_queue(free_area_list+high, addr); \
829 mark_used((unsigned long) addr, high); \
830 restore_flags(flags); \
831 addr = (struct mem_list *) (size + (unsigned long) addr); \
832 } mem_map[MAP_NR((unsigned long) addr)].count = 1; \
833 mem_map[MAP_NR((unsigned long) addr)].age = PAGE_INITIAL_AGE; \
834 } while (0)
835
836 unsigned long __get_free_pages(int priority, unsigned long order, unsigned long limit)
837 {
838 unsigned long flags;
839 int reserved_pages;
840
841 if (order >= NR_MEM_LISTS)
842 return 0;
843 if (intr_count && priority != GFP_ATOMIC) {
844 static int count = 0;
845 if (++count < 5) {
846 printk("gfp called nonatomically from interrupt %p\n",
847 __builtin_return_address(0));
848 priority = GFP_ATOMIC;
849 }
850 }
851 reserved_pages = 5;
852 if (priority != GFP_NFS)
853 reserved_pages = min_free_pages;
854 save_flags(flags);
855 repeat:
856 cli();
857 if ((priority==GFP_ATOMIC) || nr_free_pages > reserved_pages) {
858 RMQUEUE(order, limit);
859 restore_flags(flags);
860 return 0;
861 }
862 restore_flags(flags);
863 if (priority != GFP_BUFFER && try_to_free_page(priority, limit))
864 goto repeat;
865 return 0;
866 }
867
868
869
870
871
872
873 void show_free_areas(void)
874 {
875 unsigned long order, flags;
876 unsigned long total = 0;
877
878 printk("Free pages: %6dkB\n ( ",nr_free_pages<<(PAGE_SHIFT-10));
879 save_flags(flags);
880 cli();
881 for (order=0 ; order < NR_MEM_LISTS; order++) {
882 struct mem_list * tmp;
883 unsigned long nr = 0;
884 for (tmp = free_area_list[order].next ; tmp != free_area_list + order ; tmp = tmp->next) {
885 nr ++;
886 }
887 total += nr * ((PAGE_SIZE>>10) << order);
888 printk("%lu*%lukB ", nr, (PAGE_SIZE>>10) << order);
889 }
890 restore_flags(flags);
891 printk("= %lukB)\n", total);
892 #ifdef SWAP_CACHE_INFO
893 show_swap_cache_info();
894 #endif
895 }
896
897
898
899
900
901
902
903
904
905
906 static inline int unuse_pte(struct vm_area_struct * vma, unsigned long address,
907 pte_t *dir, unsigned int type, unsigned long page)
908 {
909 pte_t pte = *dir;
910
911 if (pte_none(pte))
912 return 0;
913 if (pte_present(pte)) {
914 unsigned long page = pte_page(pte);
915 if (page >= high_memory)
916 return 0;
917 if (!in_swap_cache(page))
918 return 0;
919 if (SWP_TYPE(in_swap_cache(page)) != type)
920 return 0;
921 delete_from_swap_cache(page);
922 set_pte(dir, pte_mkdirty(pte));
923 return 0;
924 }
925 if (SWP_TYPE(pte_val(pte)) != type)
926 return 0;
927 read_swap_page(pte_val(pte), (char *) page);
928 if (pte_val(*dir) != pte_val(pte)) {
929 free_page(page);
930 return 1;
931 }
932 set_pte(dir, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
933 ++vma->vm_mm->rss;
934 swap_free(pte_val(pte));
935 return 1;
936 }
937
938 static inline int unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
939 unsigned long address, unsigned long size, unsigned long offset,
940 unsigned int type, unsigned long page)
941 {
942 pte_t * pte;
943 unsigned long end;
944
945 if (pmd_none(*dir))
946 return 0;
947 if (pmd_bad(*dir)) {
948 printk("unuse_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
949 pmd_clear(dir);
950 return 0;
951 }
952 pte = pte_offset(dir, address);
953 offset += address & PMD_MASK;
954 address &= ~PMD_MASK;
955 end = address + size;
956 if (end > PMD_SIZE)
957 end = PMD_SIZE;
958 do {
959 if (unuse_pte(vma, offset+address-vma->vm_start, pte, type, page))
960 return 1;
961 address += PAGE_SIZE;
962 pte++;
963 } while (address < end);
964 return 0;
965 }
966
967 static inline int unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
968 unsigned long address, unsigned long size,
969 unsigned int type, unsigned long page)
970 {
971 pmd_t * pmd;
972 unsigned long offset, end;
973
974 if (pgd_none(*dir))
975 return 0;
976 if (pgd_bad(*dir)) {
977 printk("unuse_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
978 pgd_clear(dir);
979 return 0;
980 }
981 pmd = pmd_offset(dir, address);
982 offset = address & PGDIR_MASK;
983 address &= ~PGDIR_MASK;
984 end = address + size;
985 if (end > PGDIR_SIZE)
986 end = PGDIR_SIZE;
987 do {
988 if (unuse_pmd(vma, pmd, address, end - address, offset, type, page))
989 return 1;
990 address = (address + PMD_SIZE) & PMD_MASK;
991 pmd++;
992 } while (address < end);
993 return 0;
994 }
995
996 static int unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
997 unsigned long start, unsigned long end,
998 unsigned int type, unsigned long page)
999 {
1000 while (start < end) {
1001 if (unuse_pgd(vma, pgdir, start, end - start, type, page))
1002 return 1;
1003 start = (start + PGDIR_SIZE) & PGDIR_MASK;
1004 pgdir++;
1005 }
1006 return 0;
1007 }
1008
1009 static int unuse_process(struct task_struct * p, unsigned int type, unsigned long page)
1010 {
1011 struct vm_area_struct* vma;
1012
1013
1014
1015
1016 if (!p->mm || pgd_inuse(p->mm->pgd))
1017 return 0;
1018 vma = p->mm->mmap;
1019 while (vma) {
1020 pgd_t * pgd = pgd_offset(p->mm, vma->vm_start);
1021 if (unuse_vma(vma, pgd, vma->vm_start, vma->vm_end, type, page))
1022 return 1;
1023 vma = vma->vm_next;
1024 }
1025 return 0;
1026 }
1027
1028
1029
1030
1031
1032
1033 static int try_to_unuse(unsigned int type)
1034 {
1035 int nr;
1036 unsigned long page = get_free_page(GFP_KERNEL);
1037
1038 if (!page)
1039 return -ENOMEM;
1040 nr = 0;
1041 while (nr < NR_TASKS) {
1042 if (task[nr]) {
1043 if (unuse_process(task[nr], type, page)) {
1044 page = get_free_page(GFP_KERNEL);
1045 if (!page)
1046 return -ENOMEM;
1047 continue;
1048 }
1049 }
1050 nr++;
1051 }
1052 free_page(page);
1053 return 0;
1054 }
1055
1056 asmlinkage int sys_swapoff(const char * specialfile)
1057 {
1058 struct swap_info_struct * p;
1059 struct inode * inode;
1060 struct file filp;
1061 int i, type, prev;
1062
1063 if (!suser())
1064 return -EPERM;
1065 i = namei(specialfile,&inode);
1066 if (i)
1067 return i;
1068 prev = -1;
1069 for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
1070 p = swap_info + type;
1071 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
1072 if (p->swap_file) {
1073 if (p->swap_file == inode)
1074 break;
1075 } else {
1076 if (S_ISBLK(inode->i_mode)
1077 && (p->swap_device == inode->i_rdev))
1078 break;
1079 }
1080 }
1081 prev = type;
1082 }
1083 if (type < 0){
1084 iput(inode);
1085 return -EINVAL;
1086 }
1087 if (prev < 0) {
1088 swap_list.head = p->next;
1089 } else {
1090 swap_info[prev].next = p->next;
1091 }
1092 if (type == swap_list.next) {
1093
1094 swap_list.next = swap_list.head;
1095 }
1096 p->flags = SWP_USED;
1097 i = try_to_unuse(type);
1098 if (i) {
1099 iput(inode);
1100 p->flags = SWP_WRITEOK;
1101 return i;
1102 }
1103
1104 if(p->swap_device){
1105 memset(&filp, 0, sizeof(filp));
1106 filp.f_inode = inode;
1107 filp.f_mode = 3;
1108
1109 if( !blkdev_open(inode, &filp) &&
1110 filp.f_op && filp.f_op->release){
1111 filp.f_op->release(inode,&filp);
1112 filp.f_op->release(inode,&filp);
1113 }
1114 }
1115 iput(inode);
1116
1117 nr_swap_pages -= p->pages;
1118 iput(p->swap_file);
1119 p->swap_file = NULL;
1120 p->swap_device = 0;
1121 vfree(p->swap_map);
1122 p->swap_map = NULL;
1123 free_page((long) p->swap_lockmap);
1124 p->swap_lockmap = NULL;
1125 p->flags = 0;
1126 return 0;
1127 }
1128
1129
1130
1131
1132
1133
1134 asmlinkage int sys_swapon(const char * specialfile, int swap_flags)
1135 {
1136 struct swap_info_struct * p;
1137 struct inode * swap_inode;
1138 unsigned int type;
1139 int i, j, prev;
1140 int error;
1141 struct file filp;
1142 static int least_priority = 0;
1143
1144 memset(&filp, 0, sizeof(filp));
1145 if (!suser())
1146 return -EPERM;
1147 p = swap_info;
1148 for (type = 0 ; type < nr_swapfiles ; type++,p++)
1149 if (!(p->flags & SWP_USED))
1150 break;
1151 if (type >= MAX_SWAPFILES)
1152 return -EPERM;
1153 if (type >= nr_swapfiles)
1154 nr_swapfiles = type+1;
1155 p->flags = SWP_USED;
1156 p->swap_file = NULL;
1157 p->swap_device = 0;
1158 p->swap_map = NULL;
1159 p->swap_lockmap = NULL;
1160 p->lowest_bit = 0;
1161 p->highest_bit = 0;
1162 p->max = 1;
1163 p->next = -1;
1164 if (swap_flags & SWAP_FLAG_PREFER) {
1165 p->prio =
1166 (swap_flags & SWAP_FLAG_PRIO_MASK)>>SWAP_FLAG_PRIO_SHIFT;
1167 } else {
1168 p->prio = --least_priority;
1169 }
1170 error = namei(specialfile,&swap_inode);
1171 if (error)
1172 goto bad_swap_2;
1173 p->swap_file = swap_inode;
1174 error = -EBUSY;
1175 if (swap_inode->i_count != 1)
1176 goto bad_swap_2;
1177 error = -EINVAL;
1178
1179 if (S_ISBLK(swap_inode->i_mode)) {
1180 p->swap_device = swap_inode->i_rdev;
1181
1182 filp.f_inode = swap_inode;
1183 filp.f_mode = 3;
1184 error = blkdev_open(swap_inode, &filp);
1185 p->swap_file = NULL;
1186 iput(swap_inode);
1187 if(error)
1188 goto bad_swap_2;
1189 error = -ENODEV;
1190 if (!p->swap_device)
1191 goto bad_swap;
1192 error = -EBUSY;
1193 for (i = 0 ; i < nr_swapfiles ; i++) {
1194 if (i == type)
1195 continue;
1196 if (p->swap_device == swap_info[i].swap_device)
1197 goto bad_swap;
1198 }
1199 } else if (!S_ISREG(swap_inode->i_mode))
1200 goto bad_swap;
1201 p->swap_lockmap = (unsigned char *) get_free_page(GFP_USER);
1202 if (!p->swap_lockmap) {
1203 printk("Unable to start swapping: out of memory :-)\n");
1204 error = -ENOMEM;
1205 goto bad_swap;
1206 }
1207 read_swap_page(SWP_ENTRY(type,0), (char *) p->swap_lockmap);
1208 if (memcmp("SWAP-SPACE",p->swap_lockmap+PAGE_SIZE-10,10)) {
1209 printk("Unable to find swap-space signature\n");
1210 error = -EINVAL;
1211 goto bad_swap;
1212 }
1213 memset(p->swap_lockmap+PAGE_SIZE-10,0,10);
1214 j = 0;
1215 p->lowest_bit = 0;
1216 p->highest_bit = 0;
1217 for (i = 1 ; i < 8*PAGE_SIZE ; i++) {
1218 if (test_bit(i,p->swap_lockmap)) {
1219 if (!p->lowest_bit)
1220 p->lowest_bit = i;
1221 p->highest_bit = i;
1222 p->max = i+1;
1223 j++;
1224 }
1225 }
1226 if (!j) {
1227 printk("Empty swap-file\n");
1228 error = -EINVAL;
1229 goto bad_swap;
1230 }
1231 p->swap_map = (unsigned char *) vmalloc(p->max);
1232 if (!p->swap_map) {
1233 error = -ENOMEM;
1234 goto bad_swap;
1235 }
1236 for (i = 1 ; i < p->max ; i++) {
1237 if (test_bit(i,p->swap_lockmap))
1238 p->swap_map[i] = 0;
1239 else
1240 p->swap_map[i] = 0x80;
1241 }
1242 p->swap_map[0] = 0x80;
1243 memset(p->swap_lockmap,0,PAGE_SIZE);
1244 p->flags = SWP_WRITEOK;
1245 p->pages = j;
1246 nr_swap_pages += j;
1247 printk("Adding Swap: %dk swap-space\n",j<<(PAGE_SHIFT-10));
1248
1249
1250 prev = -1;
1251 for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
1252 if (p->prio >= swap_info[i].prio) {
1253 break;
1254 }
1255 prev = i;
1256 }
1257 p->next = i;
1258 if (prev < 0) {
1259 swap_list.head = swap_list.next = p - swap_info;
1260 } else {
1261 swap_info[prev].next = p - swap_info;
1262 }
1263 return 0;
1264 bad_swap:
1265 if(filp.f_op && filp.f_op->release)
1266 filp.f_op->release(filp.f_inode,&filp);
1267 bad_swap_2:
1268 free_page((long) p->swap_lockmap);
1269 vfree(p->swap_map);
1270 iput(p->swap_file);
1271 p->swap_device = 0;
1272 p->swap_file = NULL;
1273 p->swap_map = NULL;
1274 p->swap_lockmap = NULL;
1275 p->flags = 0;
1276 return error;
1277 }
1278
1279 void si_swapinfo(struct sysinfo *val)
1280 {
1281 unsigned int i, j;
1282
1283 val->freeswap = val->totalswap = 0;
1284 for (i = 0; i < nr_swapfiles; i++) {
1285 if ((swap_info[i].flags & SWP_WRITEOK) != SWP_WRITEOK)
1286 continue;
1287 for (j = 0; j < swap_info[i].max; ++j)
1288 switch (swap_info[i].swap_map[j]) {
1289 case 128:
1290 continue;
1291 case 0:
1292 ++val->freeswap;
1293 default:
1294 ++val->totalswap;
1295 }
1296 }
1297 val->freeswap <<= PAGE_SHIFT;
1298 val->totalswap <<= PAGE_SHIFT;
1299 return;
1300 }
1301
1302 #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
1303
1304
1305
1306
1307
1308
1309
1310 unsigned long free_area_init(unsigned long start_mem, unsigned long end_mem)
1311 {
1312 mem_map_t * p;
1313 unsigned long mask = PAGE_MASK;
1314 int i;
1315
1316
1317
1318
1319
1320 i = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT+6);
1321 if (i < 16)
1322 i = 16;
1323 min_free_pages = i;
1324 start_mem = init_swap_cache(start_mem, end_mem);
1325 mem_map = (mem_map_t *) start_mem;
1326 p = mem_map + MAP_NR(end_mem);
1327 start_mem = LONG_ALIGN((unsigned long) p);
1328 memset(mem_map, 0, start_mem - (unsigned long) mem_map);
1329 do {
1330 --p;
1331 p->reserved = 1;
1332 } while (p > mem_map);
1333
1334 for (i = 0 ; i < NR_MEM_LISTS ; i++) {
1335 unsigned long bitmap_size;
1336 free_area_list[i].prev = free_area_list[i].next = &free_area_list[i];
1337 mask += mask;
1338 end_mem = (end_mem + ~mask) & mask;
1339 bitmap_size = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT + i);
1340 bitmap_size = (bitmap_size + 7) >> 3;
1341 bitmap_size = LONG_ALIGN(bitmap_size);
1342 free_area_map[i] = (unsigned int *) start_mem;
1343 memset((void *) start_mem, 0, bitmap_size);
1344 start_mem += bitmap_size;
1345 }
1346 return start_mem;
1347 }