root/mm/swap.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. show_swap_cache_info
  2. add_to_swap_cache
  3. init_swap_cache
  4. swap_setup
  5. buff_setup
  6. rw_swap_page
  7. get_swap_page
  8. swap_duplicate
  9. swap_free
  10. swap_in
  11. try_to_swap_out
  12. swap_out_pmd
  13. swap_out_pgd
  14. swap_out_vma
  15. swap_out_process
  16. swap_out
  17. try_to_free_page
  18. add_mem_queue
  19. remove_mem_queue
  20. free_pages_ok
  21. check_free_buffers
  22. free_pages
  23. mark_used
  24. __get_free_pages
  25. show_free_areas
  26. unuse_pte
  27. unuse_pmd
  28. unuse_pgd
  29. unuse_vma
  30. unuse_process
  31. try_to_unuse
  32. sys_swapoff
  33. sys_swapon
  34. si_swapinfo
  35. free_area_init

   1 /*
   2  *  linux/mm/swap.c
   3  *
   4  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   5  */
   6 
   7 /*
   8  * This file should contain most things doing the swapping from/to disk.
   9  * Started 18.12.91
  10  *
  11  * Swap aging added 23.2.95, Stephen Tweedie.
  12  */
  13 
  14 #include <linux/mm.h>
  15 #include <linux/sched.h>
  16 #include <linux/head.h>
  17 #include <linux/kernel.h>
  18 #include <linux/kernel_stat.h>
  19 #include <linux/errno.h>
  20 #include <linux/string.h>
  21 #include <linux/stat.h>
  22 #include <linux/swap.h>
  23 #include <linux/fs.h>
  24 #include <linux/swapctl.h>
  25 #include <linux/pagemap.h>
  26 
  27 #include <asm/dma.h>
  28 #include <asm/system.h> /* for cli()/sti() */
  29 #include <asm/segment.h> /* for memcpy_to/fromfs */
  30 #include <asm/bitops.h>
  31 #include <asm/pgtable.h>
  32 
  33 #define MAX_SWAPFILES 8
  34 
  35 #define SWP_USED        1
  36 #define SWP_WRITEOK     3
  37 
  38 int min_free_pages = 20;
  39 
  40 /*
  41  * Constants for the page aging mechanism: the maximum age (actually,
  42  * the maximum "youthfulness"); the quanta by which pages rejuvinate
  43  * and age; and the initial age for new pages. 
  44  */
  45 
  46 swap_control_t swap_control = {
  47         20, 3, 1, 3,            /* Page aging */
  48         10, 2, 2, 4,            /* Buffer aging */
  49         32, 4,                  /* Aging cluster */
  50         8192, 8192,             /* Pageout and bufferout weights */
  51         -200,                   /* Buffer grace */
  52         1, 1,                   /* Buffs/pages to free */
  53         RCL_ROUND_ROBIN         /* Balancing policy */
  54 };
  55 
  56 static int nr_swapfiles = 0;
  57 static struct wait_queue * lock_queue = NULL;
  58 static struct {
  59         int head;       /* head of priority-ordered swapfile list */
  60         int next;       /* swapfile to be used next */
  61 } swap_list = {-1, -1};
  62 
  63 static struct swap_info_struct {
  64         unsigned int flags;
  65         kdev_t swap_device;
  66         struct inode * swap_file;
  67         unsigned char * swap_map;
  68         unsigned char * swap_lockmap;
  69         int lowest_bit;
  70         int highest_bit;
  71         int prio;                       /* swap priority */
  72         int pages;
  73         unsigned long max;
  74         int next;                       /* next entry on swap list */
  75 } swap_info[MAX_SWAPFILES];
  76 
  77 extern int shm_swap (int, unsigned long);
  78 
  79 /*
  80  * To save us from swapping out pages which have just been swapped in and
  81  * have not been modified since then, we keep in swap_cache[page>>PAGE_SHIFT]
  82  * the swap entry which was last used to fill the page, or zero if the
  83  * page does not currently correspond to a page in swap. PAGE_DIRTY makes
  84  * this info useless.
  85  */
  86 unsigned long *swap_cache;
  87 
  88 #ifdef SWAP_CACHE_INFO
  89 unsigned long swap_cache_add_total = 0;
  90 unsigned long swap_cache_add_success = 0;
  91 unsigned long swap_cache_del_total = 0;
  92 unsigned long swap_cache_del_success = 0;
  93 unsigned long swap_cache_find_total = 0;
  94 unsigned long swap_cache_find_success = 0;
  95 
  96 extern inline void show_swap_cache_info(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  97 {
  98         printk("Swap cache: add %ld/%ld, delete %ld/%ld, find %ld/%ld\n",
  99                 swap_cache_add_total, swap_cache_add_success, 
 100                 swap_cache_del_total, swap_cache_del_success,
 101                 swap_cache_find_total, swap_cache_find_success);
 102 }
 103 #endif
 104 
 105 static int add_to_swap_cache(unsigned long addr, unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 106 {
 107         struct swap_info_struct * p = &swap_info[SWP_TYPE(entry)];
 108 
 109 #ifdef SWAP_CACHE_INFO
 110         swap_cache_add_total++;
 111 #endif
 112         if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
 113                 entry = xchg(swap_cache + MAP_NR(addr), entry);
 114                 if (entry)  {
 115                         printk("swap_cache: replacing non-NULL entry\n");
 116                 }
 117 #ifdef SWAP_CACHE_INFO
 118                 swap_cache_add_success++;
 119 #endif
 120                 return 1;
 121         }
 122         return 0;
 123 }
 124 
 125 static unsigned long init_swap_cache(unsigned long mem_start,
     /* [previous][next][first][last][top][bottom][index][help] */
 126         unsigned long mem_end)
 127 {
 128         unsigned long swap_cache_size;
 129 
 130         mem_start = (mem_start + 15) & ~15;
 131         swap_cache = (unsigned long *) mem_start;
 132         swap_cache_size = MAP_NR(mem_end);
 133         memset(swap_cache, 0, swap_cache_size * sizeof (unsigned long));
 134         return (unsigned long) (swap_cache + swap_cache_size);
 135 }
 136 
 137 /* General swap control */
 138 
 139 /* Parse the kernel command line "swap=" option at load time: */
 140 void swap_setup(char *str, int *ints)
     /* [previous][next][first][last][top][bottom][index][help] */
 141 {
 142         int * swap_vars[8] = {
 143                 &MAX_PAGE_AGE,
 144                 &PAGE_ADVANCE,
 145                 &PAGE_DECLINE,
 146                 &PAGE_INITIAL_AGE,
 147                 &AGE_CLUSTER_FRACT,
 148                 &AGE_CLUSTER_MIN,
 149                 &PAGEOUT_WEIGHT,
 150                 &BUFFEROUT_WEIGHT
 151         };
 152         int i;
 153         for (i=0; i < ints[0] && i < 8; i++) {
 154                 if (ints[i+1])
 155                         *(swap_vars[i]) = ints[i+1];
 156         }
 157 }
 158 
 159 /* Parse the kernel command line "buff=" option at load time: */
 160 void buff_setup(char *str, int *ints)
     /* [previous][next][first][last][top][bottom][index][help] */
 161 {
 162         int * buff_vars[6] = {
 163                 &MAX_BUFF_AGE,
 164                 &BUFF_ADVANCE,
 165                 &BUFF_DECLINE,
 166                 &BUFF_INITIAL_AGE,
 167                 &BUFFEROUT_WEIGHT,
 168                 &BUFFERMEM_GRACE
 169         };
 170         int i;
 171         for (i=0; i < ints[0] && i < 6; i++) {
 172                 if (ints[i+1])
 173                         *(buff_vars[i]) = ints[i+1];
 174         }
 175 }
 176 
 177 /* Page aging */
 178 
 179 void rw_swap_page(int rw, unsigned long entry, char * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 180 {
 181         unsigned long type, offset;
 182         struct swap_info_struct * p;
 183 
 184         type = SWP_TYPE(entry);
 185         if (type >= nr_swapfiles) {
 186                 printk("Internal error: bad swap-device\n");
 187                 return;
 188         }
 189         p = &swap_info[type];
 190         offset = SWP_OFFSET(entry);
 191         if (offset >= p->max) {
 192                 printk("rw_swap_page: weirdness\n");
 193                 return;
 194         }
 195         if (p->swap_map && !p->swap_map[offset]) {
 196                 printk("Hmm.. Trying to use unallocated swap (%08lx)\n", entry);
 197                 return;
 198         }
 199         if (!(p->flags & SWP_USED)) {
 200                 printk("Trying to swap to unused swap-device\n");
 201                 return;
 202         }
 203         while (set_bit(offset,p->swap_lockmap))
 204                 sleep_on(&lock_queue);
 205         if (rw == READ)
 206                 kstat.pswpin++;
 207         else
 208                 kstat.pswpout++;
 209         if (p->swap_device) {
 210                 ll_rw_page(rw,p->swap_device,offset,buf);
 211         } else if (p->swap_file) {
 212                 struct inode *swapf = p->swap_file;
 213                 unsigned int zones[PAGE_SIZE/512];
 214                 int i;
 215                 if (swapf->i_op->bmap == NULL
 216                         && swapf->i_op->smap != NULL){
 217                         /*
 218                                 With MsDOS, we use msdos_smap which return
 219                                 a sector number (not a cluster or block number).
 220                                 It is a patch to enable the UMSDOS project.
 221                                 Other people are working on better solution.
 222 
 223                                 It sounds like ll_rw_swap_file defined
 224                                 it operation size (sector size) based on
 225                                 PAGE_SIZE and the number of block to read.
 226                                 So using bmap or smap should work even if
 227                                 smap will require more blocks.
 228                         */
 229                         int j;
 230                         unsigned int block = offset << 3;
 231 
 232                         for (i=0, j=0; j< PAGE_SIZE ; i++, j += 512){
 233                                 if (!(zones[i] = swapf->i_op->smap(swapf,block++))) {
 234                                         printk("rw_swap_page: bad swap file\n");
 235                                         return;
 236                                 }
 237                         }
 238                 }else{
 239                         int j;
 240                         unsigned int block = offset
 241                                 << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits);
 242 
 243                         for (i=0, j=0; j< PAGE_SIZE ; i++, j +=swapf->i_sb->s_blocksize)
 244                                 if (!(zones[i] = bmap(swapf,block++))) {
 245                                         printk("rw_swap_page: bad swap file\n");
 246                                         return;
 247                                 }
 248                 }
 249                 ll_rw_swap_file(rw,swapf->i_dev, zones, i,buf);
 250         } else
 251                 printk("re_swap_page: no swap file or device\n");
 252         if (offset && !clear_bit(offset,p->swap_lockmap))
 253                 printk("rw_swap_page: lock already cleared\n");
 254         wake_up(&lock_queue);
 255 }
 256 
 257 unsigned long get_swap_page(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 258 {
 259         struct swap_info_struct * p;
 260         unsigned long offset, entry;
 261         int type, wrapped = 0;
 262 
 263         type = swap_list.next;
 264         if (type < 0)
 265           return 0;
 266 
 267         while (1) {
 268                 p = &swap_info[type];
 269                 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
 270                         for (offset = p->lowest_bit; offset <= p->highest_bit ; offset++) {
 271                                 if (p->swap_map[offset])
 272                                   continue;
 273                                 if (test_bit(offset, p->swap_lockmap))
 274                                   continue;
 275                                 p->swap_map[offset] = 1;
 276                                 nr_swap_pages--;
 277                                 if (offset == p->highest_bit)
 278                                   p->highest_bit--;
 279                                 p->lowest_bit = offset;
 280                                 entry = SWP_ENTRY(type,offset);
 281 
 282                                 type = swap_info[type].next;
 283                                 if (type < 0 || p->prio != swap_info[type].prio) {
 284                                     swap_list.next = swap_list.head;
 285                                 } else {
 286                                     swap_list.next = type;
 287                                 }
 288                                 return entry;
 289                         }
 290                 }
 291                 type = p->next;
 292                 if (!wrapped) {
 293                         if (type < 0 || p->prio != swap_info[type].prio) {
 294                                 type = swap_list.head;
 295                                 wrapped = 1;
 296                         }
 297                 } else if (type < 0) {
 298                         return 0;       /* out of swap space */
 299                 }
 300         }
 301 }
 302 
 303 void swap_duplicate(unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 304 {
 305         struct swap_info_struct * p;
 306         unsigned long offset, type;
 307 
 308         if (!entry)
 309                 return;
 310         offset = SWP_OFFSET(entry);
 311         type = SWP_TYPE(entry);
 312         if (type & SHM_SWP_TYPE)
 313                 return;
 314         if (type >= nr_swapfiles) {
 315                 printk("Trying to duplicate nonexistent swap-page\n");
 316                 return;
 317         }
 318         p = type + swap_info;
 319         if (offset >= p->max) {
 320                 printk("swap_duplicate: weirdness\n");
 321                 return;
 322         }
 323         if (!p->swap_map[offset]) {
 324                 printk("swap_duplicate: trying to duplicate unused page\n");
 325                 return;
 326         }
 327         p->swap_map[offset]++;
 328         return;
 329 }
 330 
 331 void swap_free(unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 332 {
 333         struct swap_info_struct * p;
 334         unsigned long offset, type;
 335 
 336         if (!entry)
 337                 return;
 338         type = SWP_TYPE(entry);
 339         if (type & SHM_SWP_TYPE)
 340                 return;
 341         if (type >= nr_swapfiles) {
 342                 printk("Trying to free nonexistent swap-page\n");
 343                 return;
 344         }
 345         p = & swap_info[type];
 346         offset = SWP_OFFSET(entry);
 347         if (offset >= p->max) {
 348                 printk("swap_free: weirdness\n");
 349                 return;
 350         }
 351         if (!(p->flags & SWP_USED)) {
 352                 printk("Trying to free swap from unused swap-device\n");
 353                 return;
 354         }
 355         if (offset < p->lowest_bit)
 356                 p->lowest_bit = offset;
 357         if (offset > p->highest_bit)
 358                 p->highest_bit = offset;
 359         if (!p->swap_map[offset])
 360                 printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
 361         else
 362                 if (!--p->swap_map[offset])
 363                         nr_swap_pages++;
 364         if (p->prio > swap_info[swap_list.next].prio) {
 365             swap_list.next = swap_list.head;
 366         }
 367 }
 368 
 369 /*
 370  * The tests may look silly, but it essentially makes sure that
 371  * no other process did a swap-in on us just as we were waiting.
 372  *
 373  * Also, don't bother to add to the swap cache if this page-in
 374  * was due to a write access.
 375  */
 376 void swap_in(struct task_struct * tsk, struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 377         pte_t * page_table, unsigned long entry, int write_access)
 378 {
 379         unsigned long page = __get_free_page(GFP_KERNEL);
 380 
 381         if (pte_val(*page_table) != entry) {
 382                 free_page(page);
 383                 return;
 384         }
 385         if (!page) {
 386                 set_pte(page_table, BAD_PAGE);
 387                 swap_free(entry);
 388                 oom(tsk);
 389                 return;
 390         }
 391         read_swap_page(entry, (char *) page);
 392         if (pte_val(*page_table) != entry) {
 393                 free_page(page);
 394                 return;
 395         }
 396         vma->vm_mm->rss++;
 397         tsk->maj_flt++;
 398         if (!write_access && add_to_swap_cache(page, entry)) {
 399                 set_pte(page_table, mk_pte(page, vma->vm_page_prot));
 400                 return;
 401         }
 402         set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
 403         swap_free(entry);
 404         return;
 405 }
 406 
 407 /*
 408  * The swap-out functions return 1 if they successfully
 409  * threw something out, and we got a free page. It returns
 410  * zero if it couldn't do anything, and any other value
 411  * indicates it decreased rss, but the page was shared.
 412  *
 413  * NOTE! If it sleeps, it *must* return 1 to make sure we
 414  * don't continue with the swap-out. Otherwise we may be
 415  * using a process that no longer actually exists (it might
 416  * have died while we slept).
 417  */
 418 static inline int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 419         unsigned long address, pte_t * page_table, unsigned long limit)
 420 {
 421         pte_t pte;
 422         unsigned long entry;
 423         unsigned long page;
 424         struct page * page_map;
 425 
 426         pte = *page_table;
 427         if (!pte_present(pte))
 428                 return 0;
 429         page = pte_page(pte);
 430         if (MAP_NR(page) >= MAP_NR(high_memory))
 431                 return 0;
 432         if (page >= limit)
 433                 return 0;
 434 
 435         page_map = mem_map + MAP_NR(page);
 436         if (page_map->reserved)
 437                 return 0;
 438         /* Deal with page aging.  Pages age from being unused; they
 439          * rejuvinate on being accessed.  Only swap old pages (age==0
 440          * is oldest). */
 441         if ((pte_dirty(pte) && delete_from_swap_cache(page)) 
 442             || pte_young(pte))  {
 443                 set_pte(page_table, pte_mkold(pte));
 444                 page_age_update(page_map, 1);
 445                 return 0;
 446         }       
 447         if (page_age_update(page_map, pte_young(pte)))
 448                 return 0;
 449         if (pte_dirty(pte)) {
 450                 if (vma->vm_ops && vma->vm_ops->swapout) {
 451                         pid_t pid = tsk->pid;
 452                         vma->vm_mm->rss--;
 453                         if (vma->vm_ops->swapout(vma, address - vma->vm_start + vma->vm_offset, page_table))
 454                                 kill_proc(pid, SIGBUS, 1);
 455                 } else {
 456                         if (page_map->count != 1)
 457                                 return 0;
 458                         if (!(entry = get_swap_page()))
 459                                 return 0;
 460                         vma->vm_mm->rss--;
 461                         set_pte(page_table, __pte(entry));
 462                         invalidate_page(vma, address);
 463                         tsk->nswap++;
 464                         write_swap_page(entry, (char *) page);
 465                 }
 466                 free_page(page);
 467                 return 1;       /* we slept: the process may not exist any more */
 468         }
 469         if ((entry = find_in_swap_cache(page)))  {
 470                 if (page_map->count != 1) {
 471                         set_pte(page_table, pte_mkdirty(pte));
 472                         printk("Aiee.. duplicated cached swap-cache entry\n");
 473                         return 0;
 474                 }
 475                 vma->vm_mm->rss--;
 476                 set_pte(page_table, __pte(entry));
 477                 invalidate_page(vma, address);
 478                 free_page(page);
 479                 return 1;
 480         } 
 481         vma->vm_mm->rss--;
 482         pte_clear(page_table);
 483         invalidate_page(vma, address);
 484         entry = page_unuse(page);
 485         free_page(page);
 486         return entry;
 487 }
 488 
 489 /*
 490  * A new implementation of swap_out().  We do not swap complete processes,
 491  * but only a small number of blocks, before we continue with the next
 492  * process.  The number of blocks actually swapped is determined on the
 493  * number of page faults, that this process actually had in the last time,
 494  * so we won't swap heavily used processes all the time ...
 495  *
 496  * Note: the priority argument is a hint on much CPU to waste with the
 497  *       swap block search, not a hint, of how much blocks to swap with
 498  *       each process.
 499  *
 500  * (C) 1993 Kai Petzke, wpp@marie.physik.tu-berlin.de
 501  */
 502 
 503 static inline int swap_out_pmd(struct task_struct * tsk, struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 504         pmd_t *dir, unsigned long address, unsigned long end, unsigned long limit)
 505 {
 506         pte_t * pte;
 507         unsigned long pmd_end;
 508 
 509         if (pmd_none(*dir))
 510                 return 0;
 511         if (pmd_bad(*dir)) {
 512                 printk("swap_out_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
 513                 pmd_clear(dir);
 514                 return 0;
 515         }
 516         
 517         pte = pte_offset(dir, address);
 518         
 519         pmd_end = (address + PMD_SIZE) & PMD_MASK;
 520         if (end > pmd_end)
 521                 end = pmd_end;
 522 
 523         do {
 524                 int result;
 525                 tsk->swap_address = address + PAGE_SIZE;
 526                 result = try_to_swap_out(tsk, vma, address, pte, limit);
 527                 if (result)
 528                         return result;
 529                 address += PAGE_SIZE;
 530                 pte++;
 531         } while (address < end);
 532         return 0;
 533 }
 534 
 535 static inline int swap_out_pgd(struct task_struct * tsk, struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 536         pgd_t *dir, unsigned long address, unsigned long end, unsigned long limit)
 537 {
 538         pmd_t * pmd;
 539         unsigned long pgd_end;
 540 
 541         if (pgd_none(*dir))
 542                 return 0;
 543         if (pgd_bad(*dir)) {
 544                 printk("swap_out_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
 545                 pgd_clear(dir);
 546                 return 0;
 547         }
 548 
 549         pmd = pmd_offset(dir, address);
 550 
 551         pgd_end = (address + PGDIR_SIZE) & PGDIR_MASK;  
 552         if (end > pgd_end)
 553                 end = pgd_end;
 554         
 555         do {
 556                 int result = swap_out_pmd(tsk, vma, pmd, address, end, limit);
 557                 if (result)
 558                         return result;
 559                 address = (address + PMD_SIZE) & PMD_MASK;
 560                 pmd++;
 561         } while (address < end);
 562         return 0;
 563 }
 564 
 565 static int swap_out_vma(struct task_struct * tsk, struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 566         pgd_t *pgdir, unsigned long start, unsigned long limit)
 567 {
 568         unsigned long end;
 569 
 570         /* Don't swap out areas like shared memory which have their
 571             own separate swapping mechanism or areas which are locked down */
 572         if (vma->vm_flags & (VM_SHM | VM_LOCKED))
 573                 return 0;
 574 
 575         end = vma->vm_end;
 576         while (start < end) {
 577                 int result = swap_out_pgd(tsk, vma, pgdir, start, end, limit);
 578                 if (result)
 579                         return result;
 580                 start = (start + PGDIR_SIZE) & PGDIR_MASK;
 581                 pgdir++;
 582         }
 583         return 0;
 584 }
 585 
 586 static int swap_out_process(struct task_struct * p, unsigned long limit)
     /* [previous][next][first][last][top][bottom][index][help] */
 587 {
 588         unsigned long address;
 589         struct vm_area_struct* vma;
 590 
 591         /*
 592          * Go through process' page directory.
 593          */
 594         address = p->swap_address;
 595         p->swap_address = 0;
 596 
 597         /*
 598          * Find the proper vm-area
 599          */
 600         vma = find_vma(p, address);
 601         if (!vma)
 602                 return 0;
 603         if (address < vma->vm_start)
 604                 address = vma->vm_start;
 605 
 606         for (;;) {
 607                 int result = swap_out_vma(p, vma, pgd_offset(p->mm, address), address, limit);
 608                 if (result)
 609                         return result;
 610                 vma = vma->vm_next;
 611                 if (!vma)
 612                         break;
 613                 address = vma->vm_start;
 614         }
 615         p->swap_address = 0;
 616         return 0;
 617 }
 618 
 619 static int swap_out(unsigned int priority, unsigned long limit)
     /* [previous][next][first][last][top][bottom][index][help] */
 620 {
 621         static int swap_task;
 622         int loop, counter;
 623         struct task_struct *p;
 624 
 625         counter = ((PAGEOUT_WEIGHT * nr_tasks) >> 10) >> priority;
 626         for(; counter >= 0; counter--) {
 627                 /*
 628                  * Check that swap_task is suitable for swapping.  If not, look for
 629                  * the next suitable process.
 630                  */
 631                 loop = 0;
 632                 while(1) {
 633                         if (swap_task >= NR_TASKS) {
 634                                 swap_task = 1;
 635                                 if (loop)
 636                                         /* all processes are unswappable or already swapped out */
 637                                         return 0;
 638                                 loop = 1;
 639                         }
 640 
 641                         p = task[swap_task];
 642                         if (p && p->swappable && p->mm->rss)
 643                                 break;
 644 
 645                         swap_task++;
 646                 }
 647 
 648                 /*
 649                  * Determine the number of pages to swap from this process.
 650                  */
 651                 if (!p->swap_cnt) {
 652                         /* Normalise the number of pages swapped by
 653                            multiplying by (RSS / 1MB) */
 654                         p->swap_cnt = AGE_CLUSTER_SIZE(p->mm->rss);
 655                 }
 656                 if (!--p->swap_cnt)
 657                         swap_task++;
 658                 switch (swap_out_process(p, limit)) {
 659                         case 0:
 660                                 if (p->swap_cnt)
 661                                         swap_task++;
 662                                 break;
 663                         case 1:
 664                                 return 1;
 665                         default:
 666                                 break;
 667                 }
 668         }
 669         return 0;
 670 }
 671 
 672 /*
 673  * We are much more aggressive about trying to swap out than we used
 674  * to be.  This works out OK, because we now do proper aging on page
 675  * contents. 
 676  */
 677 static int try_to_free_page(int priority, unsigned long limit)
     /* [previous][next][first][last][top][bottom][index][help] */
 678 {
 679         static int state = 0;
 680         int i=6;
 681 
 682         switch (state) {
 683                 do {
 684                 case 0:
 685                         if (priority != GFP_NOBUFFER && shrink_buffers(i, limit))
 686                                 return 1;
 687                         state = 1;
 688                 case 1:
 689                         if (shm_swap(i, limit))
 690                                 return 1;
 691                         state = 2;
 692                 case 2:
 693                         if (shrink_mmap(i, limit))
 694                                 return 1;
 695                         state = 3;
 696                 default:
 697                         if (swap_out(i, limit))
 698                                 return 1;
 699                         state = 0;
 700                 } while(i--);
 701         }
 702         return 0;
 703 }
 704 
 705 static inline void add_mem_queue(struct mem_list * head, struct mem_list * entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 706 {
 707         entry->prev = head;
 708         (entry->next = head->next)->prev = entry;
 709         head->next = entry;
 710 }
 711 
 712 static inline void remove_mem_queue(struct mem_list * head, struct mem_list * entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 713 {
 714         struct mem_list * next = entry->next;
 715         (next->prev = entry->prev)->next = next;
 716 }
 717 
 718 /*
 719  * Free_page() adds the page to the free lists. This is optimized for
 720  * fast normal cases (no error jumps taken normally).
 721  *
 722  * The way to optimize jumps for gcc-2.2.2 is to:
 723  *  - select the "normal" case and put it inside the if () { XXX }
 724  *  - no else-statements if you can avoid them
 725  *
 726  * With the above two rules, you get a straight-line execution path
 727  * for the normal case, giving better asm-code.
 728  *
 729  * free_page() may sleep since the page being freed may be a buffer
 730  * page or present in the swap cache. It will not sleep, however,
 731  * for a freshly allocated page (get_free_page()).
 732  */
 733 
 734 /*
 735  * Buddy system. Hairy. You really aren't expected to understand this
 736  */
 737 static inline void free_pages_ok(unsigned long addr, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 738 {
 739         unsigned long index = MAP_NR(addr) >> (1 + order);
 740         unsigned long mask = PAGE_MASK << order;
 741 
 742         addr &= mask;
 743         nr_free_pages += 1 << order;
 744         while (order < NR_MEM_LISTS-1) {
 745                 if (!change_bit(index, free_area_map[order]))
 746                         break;
 747                 remove_mem_queue(free_area_list+order, (struct mem_list *) (addr ^ (1+~mask)));
 748                 order++;
 749                 index >>= 1;
 750                 mask <<= 1;
 751                 addr &= mask;
 752         }
 753         add_mem_queue(free_area_list+order, (struct mem_list *) addr);
 754 }
 755 
 756 static inline void check_free_buffers(unsigned long addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 757 {
 758         struct buffer_head * bh;
 759 
 760         bh = buffer_pages[MAP_NR(addr)];
 761         if (bh) {
 762                 struct buffer_head *tmp = bh;
 763                 do {
 764                         if (tmp->b_list == BUF_SHARED
 765                             && tmp->b_dev != B_FREE)
 766                                 refile_buffer(tmp);
 767                         tmp = tmp->b_this_page;
 768                 } while (tmp != bh);
 769         }
 770 }
 771 
 772 void free_pages(unsigned long addr, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 773 {
 774         if (MAP_NR(addr) < MAP_NR(high_memory)) {
 775                 unsigned long flag;
 776                 mem_map_t * map = mem_map + MAP_NR(addr);
 777                 if (map->reserved)
 778                         return;
 779                 if (map->count) {
 780                         save_flags(flag);
 781                         cli();
 782                         if (!--map->count) {
 783                                 free_pages_ok(addr, order);
 784                                 delete_from_swap_cache(addr);
 785                         }
 786                         restore_flags(flag);
 787                         if (map->count == 1)
 788                                 check_free_buffers(addr);
 789                         return;
 790                 }
 791                 printk("Trying to free free memory (%08lx): memory probably corrupted\n",addr);
 792                 printk("PC = %p\n", __builtin_return_address(0));
 793                 return;
 794         }
 795 }
 796 
 797 /*
 798  * Some ugly macros to speed up __get_free_pages()..
 799  */
 800 #define RMQUEUE(order, limit) \
 801 do { struct mem_list * queue = free_area_list+order; \
 802      unsigned long new_order = order; \
 803         do { struct mem_list *prev = queue, *ret; \
 804                 while (queue != (ret = prev->next)) { \
 805                         if ((unsigned long) ret < (limit)) { \
 806                                 (prev->next = ret->next)->prev = prev; \
 807                                 mark_used((unsigned long) ret, new_order); \
 808                                 nr_free_pages -= 1 << order; \
 809                                 restore_flags(flags); \
 810                                 EXPAND(ret, order, new_order); \
 811                                 return (unsigned long) ret; \
 812                         } \
 813                         prev = ret; \
 814                 } \
 815                 new_order++; queue++; \
 816         } while (new_order < NR_MEM_LISTS); \
 817 } while (0)
 818 
 819 static inline int mark_used(unsigned long addr, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 820 {
 821         return change_bit(MAP_NR(addr) >> (1+order), free_area_map[order]);
 822 }
 823 
 824 #define EXPAND(addr,low,high) \
 825 do { unsigned long size = PAGE_SIZE << high; \
 826         while (high > low) { \
 827                 high--; size >>= 1; cli(); \
 828                 add_mem_queue(free_area_list+high, addr); \
 829                 mark_used((unsigned long) addr, high); \
 830                 restore_flags(flags); \
 831                 addr = (struct mem_list *) (size + (unsigned long) addr); \
 832         } mem_map[MAP_NR((unsigned long) addr)].count = 1; \
 833         mem_map[MAP_NR((unsigned long) addr)].age = PAGE_INITIAL_AGE; \
 834 } while (0)
 835 
 836 unsigned long __get_free_pages(int priority, unsigned long order, unsigned long limit)
     /* [previous][next][first][last][top][bottom][index][help] */
 837 {
 838         unsigned long flags;
 839         int reserved_pages;
 840 
 841         if (order >= NR_MEM_LISTS)
 842                 return 0;
 843         if (intr_count && priority != GFP_ATOMIC) {
 844                 static int count = 0;
 845                 if (++count < 5) {
 846                         printk("gfp called nonatomically from interrupt %p\n",
 847                                 __builtin_return_address(0));
 848                         priority = GFP_ATOMIC;
 849                 }
 850         }
 851         reserved_pages = 5;
 852         if (priority != GFP_NFS)
 853                 reserved_pages = min_free_pages;
 854         save_flags(flags);
 855 repeat:
 856         cli();
 857         if ((priority==GFP_ATOMIC) || nr_free_pages > reserved_pages) {
 858                 RMQUEUE(order, limit);
 859                 restore_flags(flags);
 860                 return 0;
 861         }
 862         restore_flags(flags);
 863         if (priority != GFP_BUFFER && try_to_free_page(priority, limit))
 864                 goto repeat;
 865         return 0;
 866 }
 867 
 868 /*
 869  * Show free area list (used inside shift_scroll-lock stuff)
 870  * We also calculate the percentage fragmentation. We do this by counting the
 871  * memory on each free list with the exception of the first item on the list.
 872  */
 873 void show_free_areas(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 874 {
 875         unsigned long order, flags;
 876         unsigned long total = 0;
 877 
 878         printk("Free pages:      %6dkB\n ( ",nr_free_pages<<(PAGE_SHIFT-10));
 879         save_flags(flags);
 880         cli();
 881         for (order=0 ; order < NR_MEM_LISTS; order++) {
 882                 struct mem_list * tmp;
 883                 unsigned long nr = 0;
 884                 for (tmp = free_area_list[order].next ; tmp != free_area_list + order ; tmp = tmp->next) {
 885                         nr ++;
 886                 }
 887                 total += nr * ((PAGE_SIZE>>10) << order);
 888                 printk("%lu*%lukB ", nr, (PAGE_SIZE>>10) << order);
 889         }
 890         restore_flags(flags);
 891         printk("= %lukB)\n", total);
 892 #ifdef SWAP_CACHE_INFO
 893         show_swap_cache_info();
 894 #endif  
 895 }
 896 
 897 /*
 898  * Trying to stop swapping from a file is fraught with races, so
 899  * we repeat quite a bit here when we have to pause. swapoff()
 900  * isn't exactly timing-critical, so who cares (but this is /really/
 901  * inefficient, ugh).
 902  *
 903  * We return 1 after having slept, which makes the process start over
 904  * from the beginning for this process..
 905  */
 906 static inline int unuse_pte(struct vm_area_struct * vma, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 907         pte_t *dir, unsigned int type, unsigned long page)
 908 {
 909         pte_t pte = *dir;
 910 
 911         if (pte_none(pte))
 912                 return 0;
 913         if (pte_present(pte)) {
 914                 unsigned long page = pte_page(pte);
 915                 if (page >= high_memory)
 916                         return 0;
 917                 if (!in_swap_cache(page))
 918                         return 0;
 919                 if (SWP_TYPE(in_swap_cache(page)) != type)
 920                         return 0;
 921                 delete_from_swap_cache(page);
 922                 set_pte(dir, pte_mkdirty(pte));
 923                 return 0;
 924         }
 925         if (SWP_TYPE(pte_val(pte)) != type)
 926                 return 0;
 927         read_swap_page(pte_val(pte), (char *) page);
 928         if (pte_val(*dir) != pte_val(pte)) {
 929                 free_page(page);
 930                 return 1;
 931         }
 932         set_pte(dir, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
 933         ++vma->vm_mm->rss;
 934         swap_free(pte_val(pte));
 935         return 1;
 936 }
 937 
 938 static inline int unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
     /* [previous][next][first][last][top][bottom][index][help] */
 939         unsigned long address, unsigned long size, unsigned long offset,
 940         unsigned int type, unsigned long page)
 941 {
 942         pte_t * pte;
 943         unsigned long end;
 944 
 945         if (pmd_none(*dir))
 946                 return 0;
 947         if (pmd_bad(*dir)) {
 948                 printk("unuse_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
 949                 pmd_clear(dir);
 950                 return 0;
 951         }
 952         pte = pte_offset(dir, address);
 953         offset += address & PMD_MASK;
 954         address &= ~PMD_MASK;
 955         end = address + size;
 956         if (end > PMD_SIZE)
 957                 end = PMD_SIZE;
 958         do {
 959                 if (unuse_pte(vma, offset+address-vma->vm_start, pte, type, page))
 960                         return 1;
 961                 address += PAGE_SIZE;
 962                 pte++;
 963         } while (address < end);
 964         return 0;
 965 }
 966 
 967 static inline int unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
     /* [previous][next][first][last][top][bottom][index][help] */
 968         unsigned long address, unsigned long size,
 969         unsigned int type, unsigned long page)
 970 {
 971         pmd_t * pmd;
 972         unsigned long offset, end;
 973 
 974         if (pgd_none(*dir))
 975                 return 0;
 976         if (pgd_bad(*dir)) {
 977                 printk("unuse_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
 978                 pgd_clear(dir);
 979                 return 0;
 980         }
 981         pmd = pmd_offset(dir, address);
 982         offset = address & PGDIR_MASK;
 983         address &= ~PGDIR_MASK;
 984         end = address + size;
 985         if (end > PGDIR_SIZE)
 986                 end = PGDIR_SIZE;
 987         do {
 988                 if (unuse_pmd(vma, pmd, address, end - address, offset, type, page))
 989                         return 1;
 990                 address = (address + PMD_SIZE) & PMD_MASK;
 991                 pmd++;
 992         } while (address < end);
 993         return 0;
 994 }
 995 
 996 static int unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
     /* [previous][next][first][last][top][bottom][index][help] */
 997         unsigned long start, unsigned long end,
 998         unsigned int type, unsigned long page)
 999 {
1000         while (start < end) {
1001                 if (unuse_pgd(vma, pgdir, start, end - start, type, page))
1002                         return 1;
1003                 start = (start + PGDIR_SIZE) & PGDIR_MASK;
1004                 pgdir++;
1005         }
1006         return 0;
1007 }
1008 
1009 static int unuse_process(struct task_struct * p, unsigned int type, unsigned long page)
     /* [previous][next][first][last][top][bottom][index][help] */
1010 {
1011         struct vm_area_struct* vma;
1012 
1013         /*
1014          * Go through process' page directory.
1015          */
1016         if (!p->mm || pgd_inuse(p->mm->pgd))
1017                 return 0;
1018         vma = p->mm->mmap;
1019         while (vma) {
1020                 pgd_t * pgd = pgd_offset(p->mm, vma->vm_start);
1021                 if (unuse_vma(vma, pgd, vma->vm_start, vma->vm_end, type, page))
1022                         return 1;
1023                 vma = vma->vm_next;
1024         }
1025         return 0;
1026 }
1027 
1028 /*
1029  * To avoid races, we repeat for each process after having
1030  * swapped something in. That gets rid of a few pesky races,
1031  * and "swapoff" isn't exactly timing critical.
1032  */
1033 static int try_to_unuse(unsigned int type)
     /* [previous][next][first][last][top][bottom][index][help] */
1034 {
1035         int nr;
1036         unsigned long page = get_free_page(GFP_KERNEL);
1037 
1038         if (!page)
1039                 return -ENOMEM;
1040         nr = 0;
1041         while (nr < NR_TASKS) {
1042                 if (task[nr]) {
1043                         if (unuse_process(task[nr], type, page)) {
1044                                 page = get_free_page(GFP_KERNEL);
1045                                 if (!page)
1046                                         return -ENOMEM;
1047                                 continue;
1048                         }
1049                 }
1050                 nr++;
1051         }
1052         free_page(page);
1053         return 0;
1054 }
1055 
1056 asmlinkage int sys_swapoff(const char * specialfile)
     /* [previous][next][first][last][top][bottom][index][help] */
1057 {
1058         struct swap_info_struct * p;
1059         struct inode * inode;
1060         struct file filp;
1061         int i, type, prev;
1062 
1063         if (!suser())
1064                 return -EPERM;
1065         i = namei(specialfile,&inode);
1066         if (i)
1067                 return i;
1068         prev = -1;
1069         for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
1070                 p = swap_info + type;
1071                 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
1072                         if (p->swap_file) {
1073                                 if (p->swap_file == inode)
1074                                   break;
1075                         } else {
1076                                 if (S_ISBLK(inode->i_mode)
1077                                     && (p->swap_device == inode->i_rdev))
1078                                   break;
1079                         }
1080                 }
1081                 prev = type;
1082         }
1083         if (type < 0){
1084                 iput(inode);
1085                 return -EINVAL;
1086         }
1087         if (prev < 0) {
1088                 swap_list.head = p->next;
1089         } else {
1090                 swap_info[prev].next = p->next;
1091         }
1092         if (type == swap_list.next) {
1093                 /* just pick something that's safe... */
1094                 swap_list.next = swap_list.head;
1095         }
1096         p->flags = SWP_USED;
1097         i = try_to_unuse(type);
1098         if (i) {
1099                 iput(inode);
1100                 p->flags = SWP_WRITEOK;
1101                 return i;
1102         }
1103 
1104         if(p->swap_device){
1105                 memset(&filp, 0, sizeof(filp));         
1106                 filp.f_inode = inode;
1107                 filp.f_mode = 3; /* read write */
1108                 /* open it again to get fops */
1109                 if( !blkdev_open(inode, &filp) &&
1110                    filp.f_op && filp.f_op->release){
1111                         filp.f_op->release(inode,&filp);
1112                         filp.f_op->release(inode,&filp);
1113                 }
1114         }
1115         iput(inode);
1116 
1117         nr_swap_pages -= p->pages;
1118         iput(p->swap_file);
1119         p->swap_file = NULL;
1120         p->swap_device = 0;
1121         vfree(p->swap_map);
1122         p->swap_map = NULL;
1123         free_page((long) p->swap_lockmap);
1124         p->swap_lockmap = NULL;
1125         p->flags = 0;
1126         return 0;
1127 }
1128 
1129 /*
1130  * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
1131  *
1132  * The swapon system call
1133  */
1134 asmlinkage int sys_swapon(const char * specialfile, int swap_flags)
     /* [previous][next][first][last][top][bottom][index][help] */
1135 {
1136         struct swap_info_struct * p;
1137         struct inode * swap_inode;
1138         unsigned int type;
1139         int i, j, prev;
1140         int error;
1141         struct file filp;
1142         static int least_priority = 0;
1143 
1144         memset(&filp, 0, sizeof(filp));
1145         if (!suser())
1146                 return -EPERM;
1147         p = swap_info;
1148         for (type = 0 ; type < nr_swapfiles ; type++,p++)
1149                 if (!(p->flags & SWP_USED))
1150                         break;
1151         if (type >= MAX_SWAPFILES)
1152                 return -EPERM;
1153         if (type >= nr_swapfiles)
1154                 nr_swapfiles = type+1;
1155         p->flags = SWP_USED;
1156         p->swap_file = NULL;
1157         p->swap_device = 0;
1158         p->swap_map = NULL;
1159         p->swap_lockmap = NULL;
1160         p->lowest_bit = 0;
1161         p->highest_bit = 0;
1162         p->max = 1;
1163         p->next = -1;
1164         if (swap_flags & SWAP_FLAG_PREFER) {
1165                 p->prio =
1166                   (swap_flags & SWAP_FLAG_PRIO_MASK)>>SWAP_FLAG_PRIO_SHIFT;
1167         } else {
1168                 p->prio = --least_priority;
1169         }
1170         error = namei(specialfile,&swap_inode);
1171         if (error)
1172                 goto bad_swap_2;
1173         p->swap_file = swap_inode;
1174         error = -EBUSY;
1175         if (swap_inode->i_count != 1)
1176                 goto bad_swap_2;
1177         error = -EINVAL;
1178 
1179         if (S_ISBLK(swap_inode->i_mode)) {
1180                 p->swap_device = swap_inode->i_rdev;
1181 
1182                 filp.f_inode = swap_inode;
1183                 filp.f_mode = 3; /* read write */
1184                 error = blkdev_open(swap_inode, &filp);
1185                 p->swap_file = NULL;
1186                 iput(swap_inode);
1187                 if(error)
1188                         goto bad_swap_2;
1189                 error = -ENODEV;
1190                 if (!p->swap_device)
1191                         goto bad_swap;
1192                 error = -EBUSY;
1193                 for (i = 0 ; i < nr_swapfiles ; i++) {
1194                         if (i == type)
1195                                 continue;
1196                         if (p->swap_device == swap_info[i].swap_device)
1197                                 goto bad_swap;
1198                 }
1199         } else if (!S_ISREG(swap_inode->i_mode))
1200                 goto bad_swap;
1201         p->swap_lockmap = (unsigned char *) get_free_page(GFP_USER);
1202         if (!p->swap_lockmap) {
1203                 printk("Unable to start swapping: out of memory :-)\n");
1204                 error = -ENOMEM;
1205                 goto bad_swap;
1206         }
1207         read_swap_page(SWP_ENTRY(type,0), (char *) p->swap_lockmap);
1208         if (memcmp("SWAP-SPACE",p->swap_lockmap+PAGE_SIZE-10,10)) {
1209                 printk("Unable to find swap-space signature\n");
1210                 error = -EINVAL;
1211                 goto bad_swap;
1212         }
1213         memset(p->swap_lockmap+PAGE_SIZE-10,0,10);
1214         j = 0;
1215         p->lowest_bit = 0;
1216         p->highest_bit = 0;
1217         for (i = 1 ; i < 8*PAGE_SIZE ; i++) {
1218                 if (test_bit(i,p->swap_lockmap)) {
1219                         if (!p->lowest_bit)
1220                                 p->lowest_bit = i;
1221                         p->highest_bit = i;
1222                         p->max = i+1;
1223                         j++;
1224                 }
1225         }
1226         if (!j) {
1227                 printk("Empty swap-file\n");
1228                 error = -EINVAL;
1229                 goto bad_swap;
1230         }
1231         p->swap_map = (unsigned char *) vmalloc(p->max);
1232         if (!p->swap_map) {
1233                 error = -ENOMEM;
1234                 goto bad_swap;
1235         }
1236         for (i = 1 ; i < p->max ; i++) {
1237                 if (test_bit(i,p->swap_lockmap))
1238                         p->swap_map[i] = 0;
1239                 else
1240                         p->swap_map[i] = 0x80;
1241         }
1242         p->swap_map[0] = 0x80;
1243         memset(p->swap_lockmap,0,PAGE_SIZE);
1244         p->flags = SWP_WRITEOK;
1245         p->pages = j;
1246         nr_swap_pages += j;
1247         printk("Adding Swap: %dk swap-space\n",j<<(PAGE_SHIFT-10));
1248 
1249         /* insert swap space into swap_list: */
1250         prev = -1;
1251         for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
1252                 if (p->prio >= swap_info[i].prio) {
1253                         break;
1254                 }
1255                 prev = i;
1256         }
1257         p->next = i;
1258         if (prev < 0) {
1259                 swap_list.head = swap_list.next = p - swap_info;
1260         } else {
1261                 swap_info[prev].next = p - swap_info;
1262         }
1263         return 0;
1264 bad_swap:
1265         if(filp.f_op && filp.f_op->release)
1266                 filp.f_op->release(filp.f_inode,&filp);
1267 bad_swap_2:
1268         free_page((long) p->swap_lockmap);
1269         vfree(p->swap_map);
1270         iput(p->swap_file);
1271         p->swap_device = 0;
1272         p->swap_file = NULL;
1273         p->swap_map = NULL;
1274         p->swap_lockmap = NULL;
1275         p->flags = 0;
1276         return error;
1277 }
1278 
1279 void si_swapinfo(struct sysinfo *val)
     /* [previous][next][first][last][top][bottom][index][help] */
1280 {
1281         unsigned int i, j;
1282 
1283         val->freeswap = val->totalswap = 0;
1284         for (i = 0; i < nr_swapfiles; i++) {
1285                 if ((swap_info[i].flags & SWP_WRITEOK) != SWP_WRITEOK)
1286                         continue;
1287                 for (j = 0; j < swap_info[i].max; ++j)
1288                         switch (swap_info[i].swap_map[j]) {
1289                                 case 128:
1290                                         continue;
1291                                 case 0:
1292                                         ++val->freeswap;
1293                                 default:
1294                                         ++val->totalswap;
1295                         }
1296         }
1297         val->freeswap <<= PAGE_SHIFT;
1298         val->totalswap <<= PAGE_SHIFT;
1299         return;
1300 }
1301 
1302 #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
1303 
1304 /*
1305  * set up the free-area data structures:
1306  *   - mark all pages reserved
1307  *   - mark all memory queues empty
1308  *   - clear the memory bitmaps
1309  */
1310 unsigned long free_area_init(unsigned long start_mem, unsigned long end_mem)
     /* [previous][next][first][last][top][bottom][index][help] */
1311 {
1312         mem_map_t * p;
1313         unsigned long mask = PAGE_MASK;
1314         int i;
1315 
1316         /*
1317          * select nr of pages we try to keep free for important stuff
1318          * with a minimum of 16 pages. This is totally arbitrary
1319          */
1320         i = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT+6);
1321         if (i < 16)
1322                 i = 16;
1323         min_free_pages = i;
1324         start_mem = init_swap_cache(start_mem, end_mem);
1325         mem_map = (mem_map_t *) start_mem;
1326         p = mem_map + MAP_NR(end_mem);
1327         start_mem = LONG_ALIGN((unsigned long) p);
1328         memset(mem_map, 0, start_mem - (unsigned long) mem_map);
1329         do {
1330                 --p;
1331                 p->reserved = 1;
1332         } while (p > mem_map);
1333 
1334         for (i = 0 ; i < NR_MEM_LISTS ; i++) {
1335                 unsigned long bitmap_size;
1336                 free_area_list[i].prev = free_area_list[i].next = &free_area_list[i];
1337                 mask += mask;
1338                 end_mem = (end_mem + ~mask) & mask;
1339                 bitmap_size = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT + i);
1340                 bitmap_size = (bitmap_size + 7) >> 3;
1341                 bitmap_size = LONG_ALIGN(bitmap_size);
1342                 free_area_map[i] = (unsigned int *) start_mem;
1343                 memset((void *) start_mem, 0, bitmap_size);
1344                 start_mem += bitmap_size;
1345         }
1346         return start_mem;
1347 }

/* [previous][next][first][last][top][bottom][index][help] */