This source file includes following definitions.
- show_swap_cache_info
- add_to_swap_cache
- init_swap_cache
- rw_swap_page
- get_swap_page
- swap_duplicate
- swap_free
- swap_in
- try_to_swap_out
- swap_out_pmd
- swap_out_pgd
- swap_out_vma
- swap_out_process
- swap_out
- try_to_free_page
- add_mem_queue
- remove_mem_queue
- free_pages_ok
- check_free_buffers
- free_pages
- mark_used
- __get_free_pages
- show_free_areas
- unuse_pte
- unuse_pmd
- unuse_pgd
- unuse_vma
- unuse_process
- try_to_unuse
- sys_swapoff
- sys_swapon
- si_swapinfo
- free_area_init
1
2
3
4
5
6
7
8
9
10
11
12 #include <linux/mm.h>
13 #include <linux/sched.h>
14 #include <linux/head.h>
15 #include <linux/kernel.h>
16 #include <linux/kernel_stat.h>
17 #include <linux/errno.h>
18 #include <linux/string.h>
19 #include <linux/stat.h>
20 #include <linux/swap.h>
21 #include <linux/fs.h>
22
23 #include <asm/dma.h>
24 #include <asm/system.h>
25 #include <asm/bitops.h>
26 #include <asm/pgtable.h>
27
28 #define MAX_SWAPFILES 8
29
30 #define SWP_USED 1
31 #define SWP_WRITEOK 3
32
33 int min_free_pages = 20;
34
35 static int nr_swapfiles = 0;
36 static struct wait_queue * lock_queue = NULL;
37 static struct {
38 int head;
39 int next;
40 } swap_list = {-1, -1};
41
42 static struct swap_info_struct {
43 unsigned int flags;
44 kdev_t swap_device;
45 struct inode * swap_file;
46 unsigned char * swap_map;
47 unsigned char * swap_lockmap;
48 int lowest_bit;
49 int highest_bit;
50 int prio;
51 int pages;
52 unsigned long max;
53 int next;
54 } swap_info[MAX_SWAPFILES];
55
56 extern int shm_swap (int, unsigned long);
57
58
59
60
61
62
63
64
65 unsigned long *swap_cache;
66
67 #ifdef SWAP_CACHE_INFO
68 unsigned long swap_cache_add_total = 0;
69 unsigned long swap_cache_add_success = 0;
70 unsigned long swap_cache_del_total = 0;
71 unsigned long swap_cache_del_success = 0;
72 unsigned long swap_cache_find_total = 0;
73 unsigned long swap_cache_find_success = 0;
74
75 extern inline void show_swap_cache_info(void)
76 {
77 printk("Swap cache: add %ld/%ld, delete %ld/%ld, find %ld/%ld\n",
78 swap_cache_add_total, swap_cache_add_success,
79 swap_cache_del_total, swap_cache_del_success,
80 swap_cache_find_total, swap_cache_find_success);
81 }
82 #endif
83
84 static int add_to_swap_cache(unsigned long addr, unsigned long entry)
85 {
86 struct swap_info_struct * p = &swap_info[SWP_TYPE(entry)];
87
88 #ifdef SWAP_CACHE_INFO
89 swap_cache_add_total++;
90 #endif
91 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
92 entry = xchg(swap_cache + MAP_NR(addr), entry);
93 if (entry) {
94 printk("swap_cache: replacing non-NULL entry\n");
95 }
96 #ifdef SWAP_CACHE_INFO
97 swap_cache_add_success++;
98 #endif
99 return 1;
100 }
101 return 0;
102 }
103
104 static unsigned long init_swap_cache(unsigned long mem_start,
105 unsigned long mem_end)
106 {
107 unsigned long swap_cache_size;
108
109 mem_start = (mem_start + 15) & ~15;
110 swap_cache = (unsigned long *) mem_start;
111 swap_cache_size = MAP_NR(mem_end);
112 memset(swap_cache, 0, swap_cache_size * sizeof (unsigned long));
113 return (unsigned long) (swap_cache + swap_cache_size);
114 }
115
116 void rw_swap_page(int rw, unsigned long entry, char * buf)
117 {
118 unsigned long type, offset;
119 struct swap_info_struct * p;
120
121 type = SWP_TYPE(entry);
122 if (type >= nr_swapfiles) {
123 printk("Internal error: bad swap-device\n");
124 return;
125 }
126 p = &swap_info[type];
127 offset = SWP_OFFSET(entry);
128 if (offset >= p->max) {
129 printk("rw_swap_page: weirdness\n");
130 return;
131 }
132 if (p->swap_map && !p->swap_map[offset]) {
133 printk("Hmm.. Trying to use unallocated swap (%08lx)\n", entry);
134 return;
135 }
136 if (!(p->flags & SWP_USED)) {
137 printk("Trying to swap to unused swap-device\n");
138 return;
139 }
140 while (set_bit(offset,p->swap_lockmap))
141 sleep_on(&lock_queue);
142 if (rw == READ)
143 kstat.pswpin++;
144 else
145 kstat.pswpout++;
146 if (p->swap_device) {
147 ll_rw_page(rw,p->swap_device,offset,buf);
148 } else if (p->swap_file) {
149 struct inode *swapf = p->swap_file;
150 unsigned int zones[PAGE_SIZE/512];
151 int i;
152 if (swapf->i_op->bmap == NULL
153 && swapf->i_op->smap != NULL){
154
155
156
157
158
159
160
161
162
163
164
165
166 int j;
167 unsigned int block = offset << 3;
168
169 for (i=0, j=0; j< PAGE_SIZE ; i++, j += 512){
170 if (!(zones[i] = swapf->i_op->smap(swapf,block++))) {
171 printk("rw_swap_page: bad swap file\n");
172 return;
173 }
174 }
175 }else{
176 int j;
177 unsigned int block = offset
178 << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits);
179
180 for (i=0, j=0; j< PAGE_SIZE ; i++, j +=swapf->i_sb->s_blocksize)
181 if (!(zones[i] = bmap(swapf,block++))) {
182 printk("rw_swap_page: bad swap file\n");
183 return;
184 }
185 }
186 ll_rw_swap_file(rw,swapf->i_dev, zones, i,buf);
187 } else
188 printk("re_swap_page: no swap file or device\n");
189 if (offset && !clear_bit(offset,p->swap_lockmap))
190 printk("rw_swap_page: lock already cleared\n");
191 wake_up(&lock_queue);
192 }
193
194 unsigned long get_swap_page(void)
195 {
196 struct swap_info_struct * p;
197 unsigned long offset, entry;
198 int type, wrapped = 0;
199
200 type = swap_list.next;
201 if (type < 0)
202 return 0;
203
204 while (1) {
205 p = &swap_info[type];
206 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
207 for (offset = p->lowest_bit; offset <= p->highest_bit ; offset++) {
208 if (p->swap_map[offset])
209 continue;
210 if (test_bit(offset, p->swap_lockmap))
211 continue;
212 p->swap_map[offset] = 1;
213 nr_swap_pages--;
214 if (offset == p->highest_bit)
215 p->highest_bit--;
216 p->lowest_bit = offset;
217 entry = SWP_ENTRY(type,offset);
218
219 type = swap_info[type].next;
220 if (type < 0 || p->prio != swap_info[type].prio) {
221 swap_list.next = swap_list.head;
222 } else {
223 swap_list.next = type;
224 }
225 return entry;
226 }
227 }
228 type = p->next;
229 if (!wrapped) {
230 if (type < 0 || p->prio != swap_info[type].prio) {
231 type = swap_list.head;
232 wrapped = 1;
233 }
234 } else if (type < 0) {
235 return 0;
236 }
237 }
238 }
239
240 void swap_duplicate(unsigned long entry)
241 {
242 struct swap_info_struct * p;
243 unsigned long offset, type;
244
245 if (!entry)
246 return;
247 offset = SWP_OFFSET(entry);
248 type = SWP_TYPE(entry);
249 if (type & SHM_SWP_TYPE)
250 return;
251 if (type >= nr_swapfiles) {
252 printk("Trying to duplicate nonexistent swap-page\n");
253 return;
254 }
255 p = type + swap_info;
256 if (offset >= p->max) {
257 printk("swap_duplicate: weirdness\n");
258 return;
259 }
260 if (!p->swap_map[offset]) {
261 printk("swap_duplicate: trying to duplicate unused page\n");
262 return;
263 }
264 p->swap_map[offset]++;
265 return;
266 }
267
268 void swap_free(unsigned long entry)
269 {
270 struct swap_info_struct * p;
271 unsigned long offset, type;
272
273 if (!entry)
274 return;
275 type = SWP_TYPE(entry);
276 if (type & SHM_SWP_TYPE)
277 return;
278 if (type >= nr_swapfiles) {
279 printk("Trying to free nonexistent swap-page\n");
280 return;
281 }
282 p = & swap_info[type];
283 offset = SWP_OFFSET(entry);
284 if (offset >= p->max) {
285 printk("swap_free: weirdness\n");
286 return;
287 }
288 if (!(p->flags & SWP_USED)) {
289 printk("Trying to free swap from unused swap-device\n");
290 return;
291 }
292 if (offset < p->lowest_bit)
293 p->lowest_bit = offset;
294 if (offset > p->highest_bit)
295 p->highest_bit = offset;
296 if (!p->swap_map[offset])
297 printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
298 else
299 if (!--p->swap_map[offset])
300 nr_swap_pages++;
301 if (p->prio > swap_info[swap_list.next].prio) {
302 swap_list.next = swap_list.head;
303 }
304 }
305
306
307
308
309
310
311
312
313 void swap_in(struct task_struct * tsk, struct vm_area_struct * vma,
314 pte_t * page_table, unsigned long entry, int write_access)
315 {
316 unsigned long page = __get_free_page(GFP_KERNEL);
317
318 if (pte_val(*page_table) != entry) {
319 free_page(page);
320 return;
321 }
322 if (!page) {
323 set_pte(page_table, BAD_PAGE);
324 swap_free(entry);
325 oom(tsk);
326 return;
327 }
328 read_swap_page(entry, (char *) page);
329 if (pte_val(*page_table) != entry) {
330 free_page(page);
331 return;
332 }
333 vma->vm_mm->rss++;
334 tsk->maj_flt++;
335 if (!write_access && add_to_swap_cache(page, entry)) {
336 set_pte(page_table, mk_pte(page, vma->vm_page_prot));
337 return;
338 }
339 set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
340 swap_free(entry);
341 return;
342 }
343
344
345
346
347
348
349
350
351
352
353
354
355 static inline int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
356 unsigned long address, pte_t * page_table, unsigned long limit)
357 {
358 pte_t pte;
359 unsigned long entry;
360 unsigned long page;
361
362 pte = *page_table;
363 if (!pte_present(pte))
364 return 0;
365 page = pte_page(pte);
366 if (page >= high_memory)
367 return 0;
368 if (page >= limit)
369 return 0;
370 if (mem_map[MAP_NR(page)].reserved)
371 return 0;
372 if ((pte_dirty(pte) && delete_from_swap_cache(page)) || pte_young(pte)) {
373 set_pte(page_table, pte_mkold(pte));
374 return 0;
375 }
376 if (pte_dirty(pte)) {
377 if (vma->vm_ops && vma->vm_ops->swapout) {
378 pid_t pid = tsk->pid;
379 vma->vm_mm->rss--;
380 if (vma->vm_ops->swapout(vma, address - vma->vm_start + vma->vm_offset, page_table))
381 kill_proc(pid, SIGBUS, 1);
382 } else {
383 if (mem_map[MAP_NR(page)].count != 1)
384 return 0;
385 if (!(entry = get_swap_page()))
386 return 0;
387 vma->vm_mm->rss--;
388 set_pte(page_table, __pte(entry));
389 invalidate();
390 write_swap_page(entry, (char *) page);
391 }
392 free_page(page);
393 return 1;
394 }
395 if ((entry = find_in_swap_cache(page))) {
396 if (mem_map[MAP_NR(page)].count != 1) {
397 set_pte(page_table, pte_mkdirty(pte));
398 printk("Aiee.. duplicated cached swap-cache entry\n");
399 return 0;
400 }
401 vma->vm_mm->rss--;
402 set_pte(page_table, __pte(entry));
403 invalidate();
404 free_page(page);
405 return 1;
406 }
407 vma->vm_mm->rss--;
408 pte_clear(page_table);
409 invalidate();
410 entry = mem_map[MAP_NR(page)].count;
411 free_page(page);
412 return entry;
413 }
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433 #define SWAP_MIN 4
434 #define SWAP_MAX 32
435
436
437
438
439
440 #define SWAP_RATIO 128
441
442 static inline int swap_out_pmd(struct task_struct * tsk, struct vm_area_struct * vma,
443 pmd_t *dir, unsigned long address, unsigned long end, unsigned long limit)
444 {
445 pte_t * pte;
446 unsigned long pmd_end;
447
448 if (pmd_none(*dir))
449 return 0;
450 if (pmd_bad(*dir)) {
451 printk("swap_out_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
452 pmd_clear(dir);
453 return 0;
454 }
455
456 pte = pte_offset(dir, address);
457
458 pmd_end = (address + PMD_SIZE) & PMD_MASK;
459 if (end > pmd_end)
460 end = pmd_end;
461
462 do {
463 int result;
464 tsk->swap_address = address + PAGE_SIZE;
465 result = try_to_swap_out(tsk, vma, address, pte, limit);
466 if (result)
467 return result;
468 address += PAGE_SIZE;
469 pte++;
470 } while (address < end);
471 return 0;
472 }
473
474 static inline int swap_out_pgd(struct task_struct * tsk, struct vm_area_struct * vma,
475 pgd_t *dir, unsigned long address, unsigned long end, unsigned long limit)
476 {
477 pmd_t * pmd;
478 unsigned long pgd_end;
479
480 if (pgd_none(*dir))
481 return 0;
482 if (pgd_bad(*dir)) {
483 printk("swap_out_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
484 pgd_clear(dir);
485 return 0;
486 }
487
488 pmd = pmd_offset(dir, address);
489
490 pgd_end = (address + PGDIR_SIZE) & PGDIR_MASK;
491 if (end > pgd_end)
492 end = pgd_end;
493
494 do {
495 int result = swap_out_pmd(tsk, vma, pmd, address, end, limit);
496 if (result)
497 return result;
498 address = (address + PMD_SIZE) & PMD_MASK;
499 pmd++;
500 } while (address < end);
501 return 0;
502 }
503
504 static int swap_out_vma(struct task_struct * tsk, struct vm_area_struct * vma,
505 pgd_t *pgdir, unsigned long start, unsigned long limit)
506 {
507 unsigned long end;
508
509
510
511 if (vma->vm_flags & VM_SHM)
512 return 0;
513
514 end = vma->vm_end;
515 while (start < end) {
516 int result = swap_out_pgd(tsk, vma, pgdir, start, end, limit);
517 if (result)
518 return result;
519 start = (start + PGDIR_SIZE) & PGDIR_MASK;
520 pgdir++;
521 }
522 return 0;
523 }
524
525 static int swap_out_process(struct task_struct * p, unsigned long limit)
526 {
527 unsigned long address;
528 struct vm_area_struct* vma;
529
530
531
532
533 address = p->swap_address;
534 p->swap_address = 0;
535
536
537
538
539 vma = find_vma(p, address);
540 if (!vma)
541 return 0;
542 if (address < vma->vm_start)
543 address = vma->vm_start;
544
545 for (;;) {
546 int result = swap_out_vma(p, vma, pgd_offset(p->mm, address), address, limit);
547 if (result)
548 return result;
549 vma = vma->vm_next;
550 if (!vma)
551 break;
552 address = vma->vm_start;
553 }
554 p->swap_address = 0;
555 return 0;
556 }
557
558 static int swap_out(unsigned int priority, unsigned long limit)
559 {
560 static int swap_task;
561 int loop, counter;
562 struct task_struct *p;
563
564 counter = 6*nr_tasks >> priority;
565 for(; counter >= 0; counter--) {
566
567
568
569
570 loop = 0;
571 while(1) {
572 if (swap_task >= NR_TASKS) {
573 swap_task = 1;
574 if (loop)
575
576 return 0;
577 loop = 1;
578 }
579
580 p = task[swap_task];
581 if (p && p->swappable && p->mm->rss)
582 break;
583
584 swap_task++;
585 }
586
587
588
589
590 if (!p->swap_cnt) {
591 p->dec_flt = (p->dec_flt * 3) / 4 + p->maj_flt - p->old_maj_flt;
592 p->old_maj_flt = p->maj_flt;
593
594 if (p->dec_flt >= SWAP_RATIO / SWAP_MIN) {
595 p->dec_flt = SWAP_RATIO / SWAP_MIN;
596 p->swap_cnt = SWAP_MIN;
597 } else if (p->dec_flt <= SWAP_RATIO / SWAP_MAX)
598 p->swap_cnt = SWAP_MAX;
599 else
600 p->swap_cnt = SWAP_RATIO / p->dec_flt;
601 }
602 if (!--p->swap_cnt)
603 swap_task++;
604 switch (swap_out_process(p, limit)) {
605 case 0:
606 if (p->swap_cnt)
607 swap_task++;
608 break;
609 case 1:
610 return 1;
611 default:
612 break;
613 }
614 }
615 return 0;
616 }
617
618
619
620
621
622
623
624
625
626
627 static int try_to_free_page(int priority, unsigned long limit)
628 {
629 static int state = 0;
630 int i=6;
631
632 switch (state) {
633 do {
634 case 0:
635 if (priority != GFP_NOBUFFER && shrink_buffers(i, limit))
636 return 1;
637 state = 1;
638 case 1:
639 if (shm_swap(i, limit))
640 return 1;
641 state = 2;
642 default:
643 if (swap_out(i, limit))
644 return 1;
645 state = 0;
646 } while(i--);
647 }
648 return 0;
649 }
650
651 static inline void add_mem_queue(struct mem_list * head, struct mem_list * entry)
652 {
653 entry->prev = head;
654 (entry->next = head->next)->prev = entry;
655 head->next = entry;
656 }
657
658 static inline void remove_mem_queue(struct mem_list * head, struct mem_list * entry)
659 {
660 entry->next->prev = entry->prev;
661 entry->prev->next = entry->next;
662 }
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683 static inline void free_pages_ok(unsigned long addr, unsigned long order)
684 {
685 unsigned long index = MAP_NR(addr) >> (1 + order);
686 unsigned long mask = PAGE_MASK << order;
687
688 addr &= mask;
689 nr_free_pages += 1 << order;
690 while (order < NR_MEM_LISTS-1) {
691 if (!change_bit(index, free_area_map[order]))
692 break;
693 remove_mem_queue(free_area_list+order, (struct mem_list *) (addr ^ (1+~mask)));
694 order++;
695 index >>= 1;
696 mask <<= 1;
697 addr &= mask;
698 }
699 add_mem_queue(free_area_list+order, (struct mem_list *) addr);
700 }
701
702 static inline void check_free_buffers(unsigned long addr)
703 {
704 struct buffer_head * bh;
705
706 bh = buffer_pages[MAP_NR(addr)];
707 if (bh) {
708 struct buffer_head *tmp = bh;
709 do {
710 if (tmp->b_list == BUF_SHARED
711 && tmp->b_dev != B_FREE)
712 refile_buffer(tmp);
713 tmp = tmp->b_this_page;
714 } while (tmp != bh);
715 }
716 }
717
718 void free_pages(unsigned long addr, unsigned long order)
719 {
720 if (MAP_NR(addr) < MAP_NR(high_memory)) {
721 unsigned long flag;
722 mem_map_t * map = mem_map + MAP_NR(addr);
723 if (map->reserved)
724 return;
725 if (map->count) {
726 save_flags(flag);
727 cli();
728 if (!--map->count) {
729 free_pages_ok(addr, order);
730 delete_from_swap_cache(addr);
731 }
732 restore_flags(flag);
733 if (map->count == 1)
734 check_free_buffers(addr);
735 return;
736 }
737 printk("Trying to free free memory (%08lx): memory probably corrupted\n",addr);
738 printk("PC = %p\n", __builtin_return_address(0));
739 return;
740 }
741 }
742
743
744
745
746 #define RMQUEUE(order, limit) \
747 do { struct mem_list * queue = free_area_list+order; \
748 unsigned long new_order = order; \
749 do { struct mem_list *prev = queue, *ret; \
750 while (queue != (ret = prev->next)) { \
751 if ((unsigned long) ret < (limit)) { \
752 (prev->next = ret->next)->prev = prev; \
753 mark_used((unsigned long) ret, new_order); \
754 nr_free_pages -= 1 << order; \
755 restore_flags(flags); \
756 EXPAND(ret, order, new_order); \
757 return (unsigned long) ret; \
758 } \
759 prev = ret; \
760 } \
761 new_order++; queue++; \
762 } while (new_order < NR_MEM_LISTS); \
763 } while (0)
764
765 static inline int mark_used(unsigned long addr, unsigned long order)
766 {
767 return change_bit(MAP_NR(addr) >> (1+order), free_area_map[order]);
768 }
769
770 #define EXPAND(addr,low,high) \
771 do { unsigned long size = PAGE_SIZE << high; \
772 while (high > low) { \
773 high--; size >>= 1; cli(); \
774 add_mem_queue(free_area_list+high, addr); \
775 mark_used((unsigned long) addr, high); \
776 restore_flags(flags); \
777 addr = (struct mem_list *) (size + (unsigned long) addr); \
778 } mem_map[MAP_NR((unsigned long) addr)].count = 1; \
779 } while (0)
780
781 unsigned long __get_free_pages(int priority, unsigned long order, unsigned long limit)
782 {
783 unsigned long flags;
784 int reserved_pages;
785
786 if (intr_count && priority != GFP_ATOMIC) {
787 static int count = 0;
788 if (++count < 5) {
789 printk("gfp called nonatomically from interrupt %p\n",
790 __builtin_return_address(0));
791 priority = GFP_ATOMIC;
792 }
793 }
794 reserved_pages = 5;
795 if (priority != GFP_NFS)
796 reserved_pages = min_free_pages;
797 save_flags(flags);
798 repeat:
799 cli();
800 if ((priority==GFP_ATOMIC) || nr_free_pages > reserved_pages) {
801 RMQUEUE(order, limit);
802 restore_flags(flags);
803 return 0;
804 }
805 restore_flags(flags);
806 if (priority != GFP_BUFFER && try_to_free_page(priority, limit))
807 goto repeat;
808 return 0;
809 }
810
811
812
813
814
815
816 void show_free_areas(void)
817 {
818 unsigned long order, flags;
819 unsigned long total = 0;
820
821 printk("Free pages: %6dkB\n ( ",nr_free_pages<<(PAGE_SHIFT-10));
822 save_flags(flags);
823 cli();
824 for (order=0 ; order < NR_MEM_LISTS; order++) {
825 struct mem_list * tmp;
826 unsigned long nr = 0;
827 for (tmp = free_area_list[order].next ; tmp != free_area_list + order ; tmp = tmp->next) {
828 nr ++;
829 }
830 total += nr * ((PAGE_SIZE>>10) << order);
831 printk("%lu*%lukB ", nr, (PAGE_SIZE>>10) << order);
832 }
833 restore_flags(flags);
834 printk("= %lukB)\n", total);
835 #ifdef SWAP_CACHE_INFO
836 show_swap_cache_info();
837 #endif
838 }
839
840
841
842
843
844
845
846
847
848
849 static inline int unuse_pte(struct vm_area_struct * vma, unsigned long address,
850 pte_t *dir, unsigned int type, unsigned long page)
851 {
852 pte_t pte = *dir;
853
854 if (pte_none(pte))
855 return 0;
856 if (pte_present(pte)) {
857 unsigned long page = pte_page(pte);
858 if (page >= high_memory)
859 return 0;
860 if (!in_swap_cache(page))
861 return 0;
862 if (SWP_TYPE(in_swap_cache(page)) != type)
863 return 0;
864 delete_from_swap_cache(page);
865 set_pte(dir, pte_mkdirty(pte));
866 return 0;
867 }
868 if (SWP_TYPE(pte_val(pte)) != type)
869 return 0;
870 read_swap_page(pte_val(pte), (char *) page);
871 if (pte_val(*dir) != pte_val(pte)) {
872 free_page(page);
873 return 1;
874 }
875 set_pte(dir, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
876 ++vma->vm_mm->rss;
877 swap_free(pte_val(pte));
878 return 1;
879 }
880
881 static inline int unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
882 unsigned long address, unsigned long size, unsigned long offset,
883 unsigned int type, unsigned long page)
884 {
885 pte_t * pte;
886 unsigned long end;
887
888 if (pmd_none(*dir))
889 return 0;
890 if (pmd_bad(*dir)) {
891 printk("unuse_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
892 pmd_clear(dir);
893 return 0;
894 }
895 pte = pte_offset(dir, address);
896 offset += address & PMD_MASK;
897 address &= ~PMD_MASK;
898 end = address + size;
899 if (end > PMD_SIZE)
900 end = PMD_SIZE;
901 do {
902 if (unuse_pte(vma, offset+address-vma->vm_start, pte, type, page))
903 return 1;
904 address += PAGE_SIZE;
905 pte++;
906 } while (address < end);
907 return 0;
908 }
909
910 static inline int unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
911 unsigned long address, unsigned long size,
912 unsigned int type, unsigned long page)
913 {
914 pmd_t * pmd;
915 unsigned long offset, end;
916
917 if (pgd_none(*dir))
918 return 0;
919 if (pgd_bad(*dir)) {
920 printk("unuse_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
921 pgd_clear(dir);
922 return 0;
923 }
924 pmd = pmd_offset(dir, address);
925 offset = address & PGDIR_MASK;
926 address &= ~PGDIR_MASK;
927 end = address + size;
928 if (end > PGDIR_SIZE)
929 end = PGDIR_SIZE;
930 do {
931 if (unuse_pmd(vma, pmd, address, end - address, offset, type, page))
932 return 1;
933 address = (address + PMD_SIZE) & PMD_MASK;
934 pmd++;
935 } while (address < end);
936 return 0;
937 }
938
939 static int unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
940 unsigned long start, unsigned long end,
941 unsigned int type, unsigned long page)
942 {
943 while (start < end) {
944 if (unuse_pgd(vma, pgdir, start, end - start, type, page))
945 return 1;
946 start = (start + PGDIR_SIZE) & PGDIR_MASK;
947 pgdir++;
948 }
949 return 0;
950 }
951
952 static int unuse_process(struct task_struct * p, unsigned int type, unsigned long page)
953 {
954 struct vm_area_struct* vma;
955
956
957
958
959 if (!p->mm || pgd_inuse(p->mm->pgd))
960 return 0;
961 vma = p->mm->mmap;
962 while (vma) {
963 pgd_t * pgd = pgd_offset(p->mm, vma->vm_start);
964 if (unuse_vma(vma, pgd, vma->vm_start, vma->vm_end, type, page))
965 return 1;
966 vma = vma->vm_next;
967 }
968 return 0;
969 }
970
971
972
973
974
975
976 static int try_to_unuse(unsigned int type)
977 {
978 int nr;
979 unsigned long page = get_free_page(GFP_KERNEL);
980
981 if (!page)
982 return -ENOMEM;
983 nr = 0;
984 while (nr < NR_TASKS) {
985 if (task[nr]) {
986 if (unuse_process(task[nr], type, page)) {
987 page = get_free_page(GFP_KERNEL);
988 if (!page)
989 return -ENOMEM;
990 continue;
991 }
992 }
993 nr++;
994 }
995 free_page(page);
996 return 0;
997 }
998
999 asmlinkage int sys_swapoff(const char * specialfile)
1000 {
1001 struct swap_info_struct * p;
1002 struct inode * inode;
1003 struct file filp;
1004 int i, type, prev;
1005
1006 if (!suser())
1007 return -EPERM;
1008 i = namei(specialfile,&inode);
1009 if (i)
1010 return i;
1011 prev = -1;
1012 for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
1013 p = swap_info + type;
1014 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
1015 if (p->swap_file) {
1016 if (p->swap_file == inode)
1017 break;
1018 } else {
1019 if (S_ISBLK(inode->i_mode)
1020 && (p->swap_device == inode->i_rdev))
1021 break;
1022 }
1023 }
1024 prev = type;
1025 }
1026 if (type < 0){
1027 iput(inode);
1028 return -EINVAL;
1029 }
1030 if (prev < 0) {
1031 swap_list.head = p->next;
1032 } else {
1033 swap_info[prev].next = p->next;
1034 }
1035 if (type == swap_list.next) {
1036
1037 swap_list.next = swap_list.head;
1038 }
1039 p->flags = SWP_USED;
1040 i = try_to_unuse(type);
1041 if (i) {
1042 iput(inode);
1043 p->flags = SWP_WRITEOK;
1044 return i;
1045 }
1046
1047 if(p->swap_device){
1048 memset(&filp, 0, sizeof(filp));
1049 filp.f_inode = inode;
1050 filp.f_mode = 3;
1051
1052 if( !blkdev_open(inode, &filp) &&
1053 filp.f_op && filp.f_op->release){
1054 filp.f_op->release(inode,&filp);
1055 filp.f_op->release(inode,&filp);
1056 }
1057 }
1058 iput(inode);
1059
1060 nr_swap_pages -= p->pages;
1061 iput(p->swap_file);
1062 p->swap_file = NULL;
1063 p->swap_device = 0;
1064 vfree(p->swap_map);
1065 p->swap_map = NULL;
1066 free_page((long) p->swap_lockmap);
1067 p->swap_lockmap = NULL;
1068 p->flags = 0;
1069 return 0;
1070 }
1071
1072
1073
1074
1075
1076
1077 asmlinkage int sys_swapon(const char * specialfile, int swap_flags)
1078 {
1079 struct swap_info_struct * p;
1080 struct inode * swap_inode;
1081 unsigned int type;
1082 int i, j, prev;
1083 int error;
1084 struct file filp;
1085 static int least_priority = 0;
1086
1087 memset(&filp, 0, sizeof(filp));
1088 if (!suser())
1089 return -EPERM;
1090 p = swap_info;
1091 for (type = 0 ; type < nr_swapfiles ; type++,p++)
1092 if (!(p->flags & SWP_USED))
1093 break;
1094 if (type >= MAX_SWAPFILES)
1095 return -EPERM;
1096 if (type >= nr_swapfiles)
1097 nr_swapfiles = type+1;
1098 p->flags = SWP_USED;
1099 p->swap_file = NULL;
1100 p->swap_device = 0;
1101 p->swap_map = NULL;
1102 p->swap_lockmap = NULL;
1103 p->lowest_bit = 0;
1104 p->highest_bit = 0;
1105 p->max = 1;
1106 p->next = -1;
1107 if (swap_flags & SWAP_FLAG_PREFER) {
1108 p->prio =
1109 (swap_flags & SWAP_FLAG_PRIO_MASK)>>SWAP_FLAG_PRIO_SHIFT;
1110 } else {
1111 p->prio = --least_priority;
1112 }
1113 error = namei(specialfile,&swap_inode);
1114 if (error)
1115 goto bad_swap_2;
1116 p->swap_file = swap_inode;
1117 error = -EBUSY;
1118 if (swap_inode->i_count != 1)
1119 goto bad_swap_2;
1120 error = -EINVAL;
1121
1122 if (S_ISBLK(swap_inode->i_mode)) {
1123 p->swap_device = swap_inode->i_rdev;
1124
1125 filp.f_inode = swap_inode;
1126 filp.f_mode = 3;
1127 error = blkdev_open(swap_inode, &filp);
1128 p->swap_file = NULL;
1129 iput(swap_inode);
1130 if(error)
1131 goto bad_swap_2;
1132 error = -ENODEV;
1133 if (!p->swap_device)
1134 goto bad_swap;
1135 error = -EBUSY;
1136 for (i = 0 ; i < nr_swapfiles ; i++) {
1137 if (i == type)
1138 continue;
1139 if (p->swap_device == swap_info[i].swap_device)
1140 goto bad_swap;
1141 }
1142 } else if (!S_ISREG(swap_inode->i_mode))
1143 goto bad_swap;
1144 p->swap_lockmap = (unsigned char *) get_free_page(GFP_USER);
1145 if (!p->swap_lockmap) {
1146 printk("Unable to start swapping: out of memory :-)\n");
1147 error = -ENOMEM;
1148 goto bad_swap;
1149 }
1150 read_swap_page(SWP_ENTRY(type,0), (char *) p->swap_lockmap);
1151 if (memcmp("SWAP-SPACE",p->swap_lockmap+PAGE_SIZE-10,10)) {
1152 printk("Unable to find swap-space signature\n");
1153 error = -EINVAL;
1154 goto bad_swap;
1155 }
1156 memset(p->swap_lockmap+PAGE_SIZE-10,0,10);
1157 j = 0;
1158 p->lowest_bit = 0;
1159 p->highest_bit = 0;
1160 for (i = 1 ; i < 8*PAGE_SIZE ; i++) {
1161 if (test_bit(i,p->swap_lockmap)) {
1162 if (!p->lowest_bit)
1163 p->lowest_bit = i;
1164 p->highest_bit = i;
1165 p->max = i+1;
1166 j++;
1167 }
1168 }
1169 if (!j) {
1170 printk("Empty swap-file\n");
1171 error = -EINVAL;
1172 goto bad_swap;
1173 }
1174 p->swap_map = (unsigned char *) vmalloc(p->max);
1175 if (!p->swap_map) {
1176 error = -ENOMEM;
1177 goto bad_swap;
1178 }
1179 for (i = 1 ; i < p->max ; i++) {
1180 if (test_bit(i,p->swap_lockmap))
1181 p->swap_map[i] = 0;
1182 else
1183 p->swap_map[i] = 0x80;
1184 }
1185 p->swap_map[0] = 0x80;
1186 memset(p->swap_lockmap,0,PAGE_SIZE);
1187 p->flags = SWP_WRITEOK;
1188 p->pages = j;
1189 nr_swap_pages += j;
1190 printk("Adding Swap: %dk swap-space\n",j<<(PAGE_SHIFT-10));
1191
1192
1193 prev = -1;
1194 for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
1195 if (p->prio >= swap_info[i].prio) {
1196 break;
1197 }
1198 prev = i;
1199 }
1200 p->next = i;
1201 if (prev < 0) {
1202 swap_list.head = swap_list.next = p - swap_info;
1203 } else {
1204 swap_info[prev].next = p - swap_info;
1205 }
1206 return 0;
1207 bad_swap:
1208 if(filp.f_op && filp.f_op->release)
1209 filp.f_op->release(filp.f_inode,&filp);
1210 bad_swap_2:
1211 free_page((long) p->swap_lockmap);
1212 vfree(p->swap_map);
1213 iput(p->swap_file);
1214 p->swap_device = 0;
1215 p->swap_file = NULL;
1216 p->swap_map = NULL;
1217 p->swap_lockmap = NULL;
1218 p->flags = 0;
1219 return error;
1220 }
1221
1222 void si_swapinfo(struct sysinfo *val)
1223 {
1224 unsigned int i, j;
1225
1226 val->freeswap = val->totalswap = 0;
1227 for (i = 0; i < nr_swapfiles; i++) {
1228 if ((swap_info[i].flags & SWP_WRITEOK) != SWP_WRITEOK)
1229 continue;
1230 for (j = 0; j < swap_info[i].max; ++j)
1231 switch (swap_info[i].swap_map[j]) {
1232 case 128:
1233 continue;
1234 case 0:
1235 ++val->freeswap;
1236 default:
1237 ++val->totalswap;
1238 }
1239 }
1240 val->freeswap <<= PAGE_SHIFT;
1241 val->totalswap <<= PAGE_SHIFT;
1242 return;
1243 }
1244
1245 #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
1246
1247
1248
1249
1250
1251
1252
1253 unsigned long free_area_init(unsigned long start_mem, unsigned long end_mem)
1254 {
1255 mem_map_t * p;
1256 unsigned long mask = PAGE_MASK;
1257 int i;
1258
1259
1260
1261
1262
1263 i = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT+6);
1264 if (i < 16)
1265 i = 16;
1266 min_free_pages = i;
1267 start_mem = init_swap_cache(start_mem, end_mem);
1268 mem_map = (mem_map_t *) start_mem;
1269 p = mem_map + MAP_NR(end_mem);
1270 start_mem = LONG_ALIGN((unsigned long) p);
1271 while (p > mem_map) {
1272 --p;
1273 p->count = 0;
1274 p->dirty = 0;
1275 p->reserved = 1;
1276 }
1277
1278 for (i = 0 ; i < NR_MEM_LISTS ; i++) {
1279 unsigned long bitmap_size;
1280 free_area_list[i].prev = free_area_list[i].next = &free_area_list[i];
1281 mask += mask;
1282 end_mem = (end_mem + ~mask) & mask;
1283 bitmap_size = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT + i);
1284 bitmap_size = (bitmap_size + 7) >> 3;
1285 bitmap_size = LONG_ALIGN(bitmap_size);
1286 free_area_map[i] = (unsigned char *) start_mem;
1287 memset((void *) start_mem, 0, bitmap_size);
1288 start_mem += bitmap_size;
1289 }
1290 return start_mem;
1291 }