root/mm/swap.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. show_swap_cache_info
  2. add_to_swap_cache
  3. init_swap_cache
  4. swap_setup
  5. buff_setup
  6. rw_swap_page
  7. get_swap_page
  8. swap_duplicate
  9. swap_free
  10. swap_in
  11. try_to_swap_out
  12. swap_out_pmd
  13. swap_out_pgd
  14. swap_out_vma
  15. swap_out_process
  16. swap_out
  17. try_to_free_page
  18. add_mem_queue
  19. remove_mem_queue
  20. free_pages_ok
  21. check_free_buffers
  22. free_pages
  23. mark_used
  24. __get_free_pages
  25. show_free_areas
  26. unuse_pte
  27. unuse_pmd
  28. unuse_pgd
  29. unuse_vma
  30. unuse_process
  31. try_to_unuse
  32. sys_swapoff
  33. sys_swapon
  34. si_swapinfo
  35. free_area_init

   1 /*
   2  *  linux/mm/swap.c
   3  *
   4  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   5  */
   6 
   7 /*
   8  * This file should contain most things doing the swapping from/to disk.
   9  * Started 18.12.91
  10  *
  11  * Swap aging added 23.2.95, Stephen Tweedie.
  12  */
  13 
  14 #include <linux/mm.h>
  15 #include <linux/sched.h>
  16 #include <linux/head.h>
  17 #include <linux/kernel.h>
  18 #include <linux/kernel_stat.h>
  19 #include <linux/errno.h>
  20 #include <linux/string.h>
  21 #include <linux/stat.h>
  22 #include <linux/swap.h>
  23 #include <linux/fs.h>
  24 #include <linux/swapctl.h>
  25 
  26 #include <asm/dma.h>
  27 #include <asm/system.h> /* for cli()/sti() */
  28 #include <asm/segment.h> /* for memcpy_to/fromfs */
  29 #include <asm/bitops.h>
  30 #include <asm/pgtable.h>
  31 
  32 #define MAX_SWAPFILES 8
  33 
  34 #define SWP_USED        1
  35 #define SWP_WRITEOK     3
  36 
  37 int min_free_pages = 20;
  38 
  39 /*
  40  * Constants for the page aging mechanism: the maximum age (actually,
  41  * the maximum "youthfulness"); the quanta by which pages rejuvinate
  42  * and age; and the initial age for new pages. 
  43  */
  44 
  45 swap_control_t swap_control = {
  46         20, 3, 1, 3,            /* Page aging */
  47         10, 2, 2, 0,            /* Buffer aging */
  48         32, 4,                  /* Aging cluster */
  49         8192, 4096,             /* Pageout and bufferout weights */
  50         -200,                   /* Buffer grace */
  51         1, 1,                   /* Buffs/pages to free */
  52         RCL_ROUND_ROBIN         /* Balancing policy */
  53 };
  54 
  55 static int nr_swapfiles = 0;
  56 static struct wait_queue * lock_queue = NULL;
  57 static struct {
  58         int head;       /* head of priority-ordered swapfile list */
  59         int next;       /* swapfile to be used next */
  60 } swap_list = {-1, -1};
  61 
  62 static struct swap_info_struct {
  63         unsigned int flags;
  64         kdev_t swap_device;
  65         struct inode * swap_file;
  66         unsigned char * swap_map;
  67         unsigned char * swap_lockmap;
  68         int lowest_bit;
  69         int highest_bit;
  70         int prio;                       /* swap priority */
  71         int pages;
  72         unsigned long max;
  73         int next;                       /* next entry on swap list */
  74 } swap_info[MAX_SWAPFILES];
  75 
  76 extern int shm_swap (int, unsigned long);
  77 
  78 /*
  79  * To save us from swapping out pages which have just been swapped in and
  80  * have not been modified since then, we keep in swap_cache[page>>PAGE_SHIFT]
  81  * the swap entry which was last used to fill the page, or zero if the
  82  * page does not currently correspond to a page in swap. PAGE_DIRTY makes
  83  * this info useless.
  84  */
  85 unsigned long *swap_cache;
  86 
  87 #ifdef SWAP_CACHE_INFO
  88 unsigned long swap_cache_add_total = 0;
  89 unsigned long swap_cache_add_success = 0;
  90 unsigned long swap_cache_del_total = 0;
  91 unsigned long swap_cache_del_success = 0;
  92 unsigned long swap_cache_find_total = 0;
  93 unsigned long swap_cache_find_success = 0;
  94 
  95 extern inline void show_swap_cache_info(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  96 {
  97         printk("Swap cache: add %ld/%ld, delete %ld/%ld, find %ld/%ld\n",
  98                 swap_cache_add_total, swap_cache_add_success, 
  99                 swap_cache_del_total, swap_cache_del_success,
 100                 swap_cache_find_total, swap_cache_find_success);
 101 }
 102 #endif
 103 
 104 static int add_to_swap_cache(unsigned long addr, unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 105 {
 106         struct swap_info_struct * p = &swap_info[SWP_TYPE(entry)];
 107 
 108 #ifdef SWAP_CACHE_INFO
 109         swap_cache_add_total++;
 110 #endif
 111         if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
 112                 entry = xchg(swap_cache + MAP_NR(addr), entry);
 113                 if (entry)  {
 114                         printk("swap_cache: replacing non-NULL entry\n");
 115                 }
 116 #ifdef SWAP_CACHE_INFO
 117                 swap_cache_add_success++;
 118 #endif
 119                 return 1;
 120         }
 121         return 0;
 122 }
 123 
 124 static unsigned long init_swap_cache(unsigned long mem_start,
     /* [previous][next][first][last][top][bottom][index][help] */
 125         unsigned long mem_end)
 126 {
 127         unsigned long swap_cache_size;
 128 
 129         mem_start = (mem_start + 15) & ~15;
 130         swap_cache = (unsigned long *) mem_start;
 131         swap_cache_size = MAP_NR(mem_end);
 132         memset(swap_cache, 0, swap_cache_size * sizeof (unsigned long));
 133         return (unsigned long) (swap_cache + swap_cache_size);
 134 }
 135 
 136 /* General swap control */
 137 
 138 /* Parse the kernel command line "swap=" option at load time: */
 139 void swap_setup(char *str, int *ints)
     /* [previous][next][first][last][top][bottom][index][help] */
 140 {
 141         int * swap_vars[8] = {
 142                 &MAX_PAGE_AGE,
 143                 &PAGE_ADVANCE,
 144                 &PAGE_DECLINE,
 145                 &PAGE_INITIAL_AGE,
 146                 &AGE_CLUSTER_FRACT,
 147                 &AGE_CLUSTER_MIN,
 148                 &PAGEOUT_WEIGHT,
 149                 &BUFFEROUT_WEIGHT
 150         };
 151         int i;
 152         for (i=0; i < ints[0] && i < 8; i++) {
 153                 if (ints[i+1])
 154                         *(swap_vars[i]) = ints[i+1];
 155         }
 156 }
 157 
 158 /* Parse the kernel command line "buff=" option at load time: */
 159 void buff_setup(char *str, int *ints)
     /* [previous][next][first][last][top][bottom][index][help] */
 160 {
 161         int * buff_vars[6] = {
 162                 &MAX_BUFF_AGE,
 163                 &BUFF_ADVANCE,
 164                 &BUFF_DECLINE,
 165                 &BUFF_INITIAL_AGE,
 166                 &BUFFEROUT_WEIGHT,
 167                 &BUFFERMEM_GRACE
 168         };
 169         int i;
 170         for (i=0; i < ints[0] && i < 6; i++) {
 171                 if (ints[i+1])
 172                         *(buff_vars[i]) = ints[i+1];
 173         }
 174 }
 175 
 176 /* Page aging */
 177 
 178 void rw_swap_page(int rw, unsigned long entry, char * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 179 {
 180         unsigned long type, offset;
 181         struct swap_info_struct * p;
 182 
 183         type = SWP_TYPE(entry);
 184         if (type >= nr_swapfiles) {
 185                 printk("Internal error: bad swap-device\n");
 186                 return;
 187         }
 188         p = &swap_info[type];
 189         offset = SWP_OFFSET(entry);
 190         if (offset >= p->max) {
 191                 printk("rw_swap_page: weirdness\n");
 192                 return;
 193         }
 194         if (p->swap_map && !p->swap_map[offset]) {
 195                 printk("Hmm.. Trying to use unallocated swap (%08lx)\n", entry);
 196                 return;
 197         }
 198         if (!(p->flags & SWP_USED)) {
 199                 printk("Trying to swap to unused swap-device\n");
 200                 return;
 201         }
 202         while (set_bit(offset,p->swap_lockmap))
 203                 sleep_on(&lock_queue);
 204         if (rw == READ)
 205                 kstat.pswpin++;
 206         else
 207                 kstat.pswpout++;
 208         if (p->swap_device) {
 209                 ll_rw_page(rw,p->swap_device,offset,buf);
 210         } else if (p->swap_file) {
 211                 struct inode *swapf = p->swap_file;
 212                 unsigned int zones[PAGE_SIZE/512];
 213                 int i;
 214                 if (swapf->i_op->bmap == NULL
 215                         && swapf->i_op->smap != NULL){
 216                         /*
 217                                 With MsDOS, we use msdos_smap which return
 218                                 a sector number (not a cluster or block number).
 219                                 It is a patch to enable the UMSDOS project.
 220                                 Other people are working on better solution.
 221 
 222                                 It sounds like ll_rw_swap_file defined
 223                                 it operation size (sector size) based on
 224                                 PAGE_SIZE and the number of block to read.
 225                                 So using bmap or smap should work even if
 226                                 smap will require more blocks.
 227                         */
 228                         int j;
 229                         unsigned int block = offset << 3;
 230 
 231                         for (i=0, j=0; j< PAGE_SIZE ; i++, j += 512){
 232                                 if (!(zones[i] = swapf->i_op->smap(swapf,block++))) {
 233                                         printk("rw_swap_page: bad swap file\n");
 234                                         return;
 235                                 }
 236                         }
 237                 }else{
 238                         int j;
 239                         unsigned int block = offset
 240                                 << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits);
 241 
 242                         for (i=0, j=0; j< PAGE_SIZE ; i++, j +=swapf->i_sb->s_blocksize)
 243                                 if (!(zones[i] = bmap(swapf,block++))) {
 244                                         printk("rw_swap_page: bad swap file\n");
 245                                         return;
 246                                 }
 247                 }
 248                 ll_rw_swap_file(rw,swapf->i_dev, zones, i,buf);
 249         } else
 250                 printk("re_swap_page: no swap file or device\n");
 251         if (offset && !clear_bit(offset,p->swap_lockmap))
 252                 printk("rw_swap_page: lock already cleared\n");
 253         wake_up(&lock_queue);
 254 }
 255 
 256 unsigned long get_swap_page(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 257 {
 258         struct swap_info_struct * p;
 259         unsigned long offset, entry;
 260         int type, wrapped = 0;
 261 
 262         type = swap_list.next;
 263         if (type < 0)
 264           return 0;
 265 
 266         while (1) {
 267                 p = &swap_info[type];
 268                 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
 269                         for (offset = p->lowest_bit; offset <= p->highest_bit ; offset++) {
 270                                 if (p->swap_map[offset])
 271                                   continue;
 272                                 if (test_bit(offset, p->swap_lockmap))
 273                                   continue;
 274                                 p->swap_map[offset] = 1;
 275                                 nr_swap_pages--;
 276                                 if (offset == p->highest_bit)
 277                                   p->highest_bit--;
 278                                 p->lowest_bit = offset;
 279                                 entry = SWP_ENTRY(type,offset);
 280 
 281                                 type = swap_info[type].next;
 282                                 if (type < 0 || p->prio != swap_info[type].prio) {
 283                                     swap_list.next = swap_list.head;
 284                                 } else {
 285                                     swap_list.next = type;
 286                                 }
 287                                 return entry;
 288                         }
 289                 }
 290                 type = p->next;
 291                 if (!wrapped) {
 292                         if (type < 0 || p->prio != swap_info[type].prio) {
 293                                 type = swap_list.head;
 294                                 wrapped = 1;
 295                         }
 296                 } else if (type < 0) {
 297                         return 0;       /* out of swap space */
 298                 }
 299         }
 300 }
 301 
 302 void swap_duplicate(unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 303 {
 304         struct swap_info_struct * p;
 305         unsigned long offset, type;
 306 
 307         if (!entry)
 308                 return;
 309         offset = SWP_OFFSET(entry);
 310         type = SWP_TYPE(entry);
 311         if (type & SHM_SWP_TYPE)
 312                 return;
 313         if (type >= nr_swapfiles) {
 314                 printk("Trying to duplicate nonexistent swap-page\n");
 315                 return;
 316         }
 317         p = type + swap_info;
 318         if (offset >= p->max) {
 319                 printk("swap_duplicate: weirdness\n");
 320                 return;
 321         }
 322         if (!p->swap_map[offset]) {
 323                 printk("swap_duplicate: trying to duplicate unused page\n");
 324                 return;
 325         }
 326         p->swap_map[offset]++;
 327         return;
 328 }
 329 
 330 void swap_free(unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 331 {
 332         struct swap_info_struct * p;
 333         unsigned long offset, type;
 334 
 335         if (!entry)
 336                 return;
 337         type = SWP_TYPE(entry);
 338         if (type & SHM_SWP_TYPE)
 339                 return;
 340         if (type >= nr_swapfiles) {
 341                 printk("Trying to free nonexistent swap-page\n");
 342                 return;
 343         }
 344         p = & swap_info[type];
 345         offset = SWP_OFFSET(entry);
 346         if (offset >= p->max) {
 347                 printk("swap_free: weirdness\n");
 348                 return;
 349         }
 350         if (!(p->flags & SWP_USED)) {
 351                 printk("Trying to free swap from unused swap-device\n");
 352                 return;
 353         }
 354         if (offset < p->lowest_bit)
 355                 p->lowest_bit = offset;
 356         if (offset > p->highest_bit)
 357                 p->highest_bit = offset;
 358         if (!p->swap_map[offset])
 359                 printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
 360         else
 361                 if (!--p->swap_map[offset])
 362                         nr_swap_pages++;
 363         if (p->prio > swap_info[swap_list.next].prio) {
 364             swap_list.next = swap_list.head;
 365         }
 366 }
 367 
 368 /*
 369  * The tests may look silly, but it essentially makes sure that
 370  * no other process did a swap-in on us just as we were waiting.
 371  *
 372  * Also, don't bother to add to the swap cache if this page-in
 373  * was due to a write access.
 374  */
 375 void swap_in(struct task_struct * tsk, struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 376         pte_t * page_table, unsigned long entry, int write_access)
 377 {
 378         unsigned long page = __get_free_page(GFP_KERNEL);
 379 
 380         if (pte_val(*page_table) != entry) {
 381                 free_page(page);
 382                 return;
 383         }
 384         if (!page) {
 385                 set_pte(page_table, BAD_PAGE);
 386                 swap_free(entry);
 387                 oom(tsk);
 388                 return;
 389         }
 390         read_swap_page(entry, (char *) page);
 391         if (pte_val(*page_table) != entry) {
 392                 free_page(page);
 393                 return;
 394         }
 395         vma->vm_mm->rss++;
 396         tsk->maj_flt++;
 397         if (!write_access && add_to_swap_cache(page, entry)) {
 398                 set_pte(page_table, mk_pte(page, vma->vm_page_prot));
 399                 return;
 400         }
 401         set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
 402         swap_free(entry);
 403         return;
 404 }
 405 
 406 /*
 407  * The swap-out functions return 1 if they successfully
 408  * threw something out, and we got a free page. It returns
 409  * zero if it couldn't do anything, and any other value
 410  * indicates it decreased rss, but the page was shared.
 411  *
 412  * NOTE! If it sleeps, it *must* return 1 to make sure we
 413  * don't continue with the swap-out. Otherwise we may be
 414  * using a process that no longer actually exists (it might
 415  * have died while we slept).
 416  */
 417 static inline int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 418         unsigned long address, pte_t * page_table, unsigned long limit)
 419 {
 420         pte_t pte;
 421         unsigned long entry;
 422         unsigned long page;
 423 
 424         pte = *page_table;
 425         if (!pte_present(pte))
 426                 return 0;
 427         page = pte_page(pte);
 428         if (page >= high_memory)
 429                 return 0;
 430         if (page >= limit)
 431                 return 0;
 432 
 433         if (mem_map[MAP_NR(page)].reserved)
 434                 return 0;
 435         /* Deal with page aging.  Pages age from being unused; they
 436          * rejuvinate on being accessed.  Only swap old pages (age==0
 437          * is oldest). */
 438         if ((pte_dirty(pte) && delete_from_swap_cache(page)) 
 439             || pte_young(pte))  {
 440                 set_pte(page_table, pte_mkold(pte));
 441                 touch_page(page);
 442                 return 0;
 443         }       
 444         age_page(page);
 445         if (age_of(page))
 446                 return 0;
 447         if (pte_dirty(pte)) {
 448                 if (vma->vm_ops && vma->vm_ops->swapout) {
 449                         pid_t pid = tsk->pid;
 450                         vma->vm_mm->rss--;
 451                         if (vma->vm_ops->swapout(vma, address - vma->vm_start + vma->vm_offset, page_table))
 452                                 kill_proc(pid, SIGBUS, 1);
 453                 } else {
 454                         if (mem_map[MAP_NR(page)].count != 1)
 455                                 return 0;
 456                         if (!(entry = get_swap_page()))
 457                                 return 0;
 458                         vma->vm_mm->rss--;
 459                         set_pte(page_table, __pte(entry));
 460                         invalidate_page(vma, address);
 461                         tsk->nswap++;
 462                         write_swap_page(entry, (char *) page);
 463                 }
 464                 free_page(page);
 465                 return 1;       /* we slept: the process may not exist any more */
 466         }
 467         if ((entry = find_in_swap_cache(page)))  {
 468                 if (mem_map[MAP_NR(page)].count != 1) {
 469                         set_pte(page_table, pte_mkdirty(pte));
 470                         printk("Aiee.. duplicated cached swap-cache entry\n");
 471                         return 0;
 472                 }
 473                 vma->vm_mm->rss--;
 474                 set_pte(page_table, __pte(entry));
 475                 invalidate_page(vma, address);
 476                 free_page(page);
 477                 return 1;
 478         } 
 479         vma->vm_mm->rss--;
 480         pte_clear(page_table);
 481         invalidate_page(vma, address);
 482         entry = page_unuse(page);
 483         free_page(page);
 484         return entry;
 485 }
 486 
 487 /*
 488  * A new implementation of swap_out().  We do not swap complete processes,
 489  * but only a small number of blocks, before we continue with the next
 490  * process.  The number of blocks actually swapped is determined on the
 491  * number of page faults, that this process actually had in the last time,
 492  * so we won't swap heavily used processes all the time ...
 493  *
 494  * Note: the priority argument is a hint on much CPU to waste with the
 495  *       swap block search, not a hint, of how much blocks to swap with
 496  *       each process.
 497  *
 498  * (C) 1993 Kai Petzke, wpp@marie.physik.tu-berlin.de
 499  */
 500 
 501 static inline int swap_out_pmd(struct task_struct * tsk, struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 502         pmd_t *dir, unsigned long address, unsigned long end, unsigned long limit)
 503 {
 504         pte_t * pte;
 505         unsigned long pmd_end;
 506 
 507         if (pmd_none(*dir))
 508                 return 0;
 509         if (pmd_bad(*dir)) {
 510                 printk("swap_out_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
 511                 pmd_clear(dir);
 512                 return 0;
 513         }
 514         
 515         pte = pte_offset(dir, address);
 516         
 517         pmd_end = (address + PMD_SIZE) & PMD_MASK;
 518         if (end > pmd_end)
 519                 end = pmd_end;
 520 
 521         do {
 522                 int result;
 523                 tsk->swap_address = address + PAGE_SIZE;
 524                 result = try_to_swap_out(tsk, vma, address, pte, limit);
 525                 if (result)
 526                         return result;
 527                 address += PAGE_SIZE;
 528                 pte++;
 529         } while (address < end);
 530         return 0;
 531 }
 532 
 533 static inline int swap_out_pgd(struct task_struct * tsk, struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 534         pgd_t *dir, unsigned long address, unsigned long end, unsigned long limit)
 535 {
 536         pmd_t * pmd;
 537         unsigned long pgd_end;
 538 
 539         if (pgd_none(*dir))
 540                 return 0;
 541         if (pgd_bad(*dir)) {
 542                 printk("swap_out_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
 543                 pgd_clear(dir);
 544                 return 0;
 545         }
 546 
 547         pmd = pmd_offset(dir, address);
 548 
 549         pgd_end = (address + PGDIR_SIZE) & PGDIR_MASK;  
 550         if (end > pgd_end)
 551                 end = pgd_end;
 552         
 553         do {
 554                 int result = swap_out_pmd(tsk, vma, pmd, address, end, limit);
 555                 if (result)
 556                         return result;
 557                 address = (address + PMD_SIZE) & PMD_MASK;
 558                 pmd++;
 559         } while (address < end);
 560         return 0;
 561 }
 562 
 563 static int swap_out_vma(struct task_struct * tsk, struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 564         pgd_t *pgdir, unsigned long start, unsigned long limit)
 565 {
 566         unsigned long end;
 567 
 568         /* Don't swap out areas like shared memory which have their
 569             own separate swapping mechanism or areas which are locked down */
 570         if (vma->vm_flags & (VM_SHM | VM_LOCKED))
 571                 return 0;
 572 
 573         end = vma->vm_end;
 574         while (start < end) {
 575                 int result = swap_out_pgd(tsk, vma, pgdir, start, end, limit);
 576                 if (result)
 577                         return result;
 578                 start = (start + PGDIR_SIZE) & PGDIR_MASK;
 579                 pgdir++;
 580         }
 581         return 0;
 582 }
 583 
 584 static int swap_out_process(struct task_struct * p, unsigned long limit)
     /* [previous][next][first][last][top][bottom][index][help] */
 585 {
 586         unsigned long address;
 587         struct vm_area_struct* vma;
 588 
 589         /*
 590          * Go through process' page directory.
 591          */
 592         address = p->swap_address;
 593         p->swap_address = 0;
 594 
 595         /*
 596          * Find the proper vm-area
 597          */
 598         vma = find_vma(p, address);
 599         if (!vma)
 600                 return 0;
 601         if (address < vma->vm_start)
 602                 address = vma->vm_start;
 603 
 604         for (;;) {
 605                 int result = swap_out_vma(p, vma, pgd_offset(p->mm, address), address, limit);
 606                 if (result)
 607                         return result;
 608                 vma = vma->vm_next;
 609                 if (!vma)
 610                         break;
 611                 address = vma->vm_start;
 612         }
 613         p->swap_address = 0;
 614         return 0;
 615 }
 616 
 617 static int swap_out(unsigned int priority, unsigned long limit)
     /* [previous][next][first][last][top][bottom][index][help] */
 618 {
 619         static int swap_task;
 620         int loop, counter;
 621         struct task_struct *p;
 622 
 623         counter = ((PAGEOUT_WEIGHT * nr_tasks) >> 10) >> priority;
 624         for(; counter >= 0; counter--) {
 625                 /*
 626                  * Check that swap_task is suitable for swapping.  If not, look for
 627                  * the next suitable process.
 628                  */
 629                 loop = 0;
 630                 while(1) {
 631                         if (swap_task >= NR_TASKS) {
 632                                 swap_task = 1;
 633                                 if (loop)
 634                                         /* all processes are unswappable or already swapped out */
 635                                         return 0;
 636                                 loop = 1;
 637                         }
 638 
 639                         p = task[swap_task];
 640                         if (p && p->swappable && p->mm->rss)
 641                                 break;
 642 
 643                         swap_task++;
 644                 }
 645 
 646                 /*
 647                  * Determine the number of pages to swap from this process.
 648                  */
 649                 if (!p->swap_cnt) {
 650                         /* Normalise the number of pages swapped by
 651                            multiplying by (RSS / 1MB) */
 652                         p->swap_cnt = AGE_CLUSTER_SIZE(p->mm->rss);
 653                 }
 654                 if (!--p->swap_cnt)
 655                         swap_task++;
 656                 switch (swap_out_process(p, limit)) {
 657                         case 0:
 658                                 if (p->swap_cnt)
 659                                         swap_task++;
 660                                 break;
 661                         case 1:
 662                                 return 1;
 663                         default:
 664                                 break;
 665                 }
 666         }
 667         return 0;
 668 }
 669 
 670 /*
 671  * We are much more aggressive about trying to swap out than we used
 672  * to be.  This works out OK, because we now do proper aging on page
 673  * contents. 
 674  */
 675 static int try_to_free_page(int priority, unsigned long limit)
     /* [previous][next][first][last][top][bottom][index][help] */
 676 {
 677         static int state = 0;
 678         int i=6;
 679 
 680         switch (state) {
 681                 do {
 682                 case 0:
 683                         if (priority != GFP_NOBUFFER && shrink_buffers(i, limit))
 684                                 return 1;
 685                         state = 1;
 686                 case 1:
 687                         if (shm_swap(i, limit))
 688                                 return 1;
 689                         state = 2;
 690                 case 2:
 691                         if (shrink_mmap(i, limit))
 692                                 return 1;
 693                         state = 3;
 694                 default:
 695                         if (swap_out(i, limit))
 696                                 return 1;
 697                         state = 0;
 698                 } while(i--);
 699         }
 700         return 0;
 701 }
 702 
 703 static inline void add_mem_queue(struct mem_list * head, struct mem_list * entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 704 {
 705         entry->prev = head;
 706         (entry->next = head->next)->prev = entry;
 707         head->next = entry;
 708 }
 709 
 710 static inline void remove_mem_queue(struct mem_list * head, struct mem_list * entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 711 {
 712         entry->next->prev = entry->prev;
 713         entry->prev->next = entry->next;
 714 }
 715 
 716 /*
 717  * Free_page() adds the page to the free lists. This is optimized for
 718  * fast normal cases (no error jumps taken normally).
 719  *
 720  * The way to optimize jumps for gcc-2.2.2 is to:
 721  *  - select the "normal" case and put it inside the if () { XXX }
 722  *  - no else-statements if you can avoid them
 723  *
 724  * With the above two rules, you get a straight-line execution path
 725  * for the normal case, giving better asm-code.
 726  *
 727  * free_page() may sleep since the page being freed may be a buffer
 728  * page or present in the swap cache. It will not sleep, however,
 729  * for a freshly allocated page (get_free_page()).
 730  */
 731 
 732 /*
 733  * Buddy system. Hairy. You really aren't expected to understand this
 734  */
 735 static inline void free_pages_ok(unsigned long addr, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 736 {
 737         unsigned long index = MAP_NR(addr) >> (1 + order);
 738         unsigned long mask = PAGE_MASK << order;
 739 
 740         addr &= mask;
 741         nr_free_pages += 1 << order;
 742         while (order < NR_MEM_LISTS-1) {
 743                 if (!change_bit(index, free_area_map[order]))
 744                         break;
 745                 remove_mem_queue(free_area_list+order, (struct mem_list *) (addr ^ (1+~mask)));
 746                 order++;
 747                 index >>= 1;
 748                 mask <<= 1;
 749                 addr &= mask;
 750         }
 751         add_mem_queue(free_area_list+order, (struct mem_list *) addr);
 752 }
 753 
 754 static inline void check_free_buffers(unsigned long addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 755 {
 756         struct buffer_head * bh;
 757 
 758         bh = buffer_pages[MAP_NR(addr)];
 759         if (bh) {
 760                 struct buffer_head *tmp = bh;
 761                 do {
 762                         if (tmp->b_list == BUF_SHARED
 763                             && tmp->b_dev != B_FREE)
 764                                 refile_buffer(tmp);
 765                         tmp = tmp->b_this_page;
 766                 } while (tmp != bh);
 767         }
 768 }
 769 
 770 void free_pages(unsigned long addr, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 771 {
 772         if (MAP_NR(addr) < MAP_NR(high_memory)) {
 773                 unsigned long flag;
 774                 mem_map_t * map = mem_map + MAP_NR(addr);
 775                 if (map->reserved)
 776                         return;
 777                 if (map->count) {
 778                         save_flags(flag);
 779                         cli();
 780                         if (!--map->count) {
 781                                 free_pages_ok(addr, order);
 782                                 delete_from_swap_cache(addr);
 783                         }
 784                         restore_flags(flag);
 785                         if (map->count == 1)
 786                                 check_free_buffers(addr);
 787                         return;
 788                 }
 789                 printk("Trying to free free memory (%08lx): memory probably corrupted\n",addr);
 790                 printk("PC = %p\n", __builtin_return_address(0));
 791                 return;
 792         }
 793 }
 794 
 795 /*
 796  * Some ugly macros to speed up __get_free_pages()..
 797  */
 798 #define RMQUEUE(order, limit) \
 799 do { struct mem_list * queue = free_area_list+order; \
 800      unsigned long new_order = order; \
 801         do { struct mem_list *prev = queue, *ret; \
 802                 while (queue != (ret = prev->next)) { \
 803                         if ((unsigned long) ret < (limit)) { \
 804                                 (prev->next = ret->next)->prev = prev; \
 805                                 mark_used((unsigned long) ret, new_order); \
 806                                 nr_free_pages -= 1 << order; \
 807                                 restore_flags(flags); \
 808                                 EXPAND(ret, order, new_order); \
 809                                 return (unsigned long) ret; \
 810                         } \
 811                         prev = ret; \
 812                 } \
 813                 new_order++; queue++; \
 814         } while (new_order < NR_MEM_LISTS); \
 815 } while (0)
 816 
 817 static inline int mark_used(unsigned long addr, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 818 {
 819         return change_bit(MAP_NR(addr) >> (1+order), free_area_map[order]);
 820 }
 821 
 822 #define EXPAND(addr,low,high) \
 823 do { unsigned long size = PAGE_SIZE << high; \
 824         while (high > low) { \
 825                 high--; size >>= 1; cli(); \
 826                 add_mem_queue(free_area_list+high, addr); \
 827                 mark_used((unsigned long) addr, high); \
 828                 restore_flags(flags); \
 829                 addr = (struct mem_list *) (size + (unsigned long) addr); \
 830         } mem_map[MAP_NR((unsigned long) addr)].count = 1; \
 831         mem_map[MAP_NR((unsigned long) addr)].age = PAGE_INITIAL_AGE; \
 832 } while (0)
 833 
 834 unsigned long __get_free_pages(int priority, unsigned long order, unsigned long limit)
     /* [previous][next][first][last][top][bottom][index][help] */
 835 {
 836         unsigned long flags;
 837         int reserved_pages;
 838 
 839         if (order >= NR_MEM_LISTS)
 840                 return 0;
 841         if (intr_count && priority != GFP_ATOMIC) {
 842                 static int count = 0;
 843                 if (++count < 5) {
 844                         printk("gfp called nonatomically from interrupt %p\n",
 845                                 __builtin_return_address(0));
 846                         priority = GFP_ATOMIC;
 847                 }
 848         }
 849         reserved_pages = 5;
 850         if (priority != GFP_NFS)
 851                 reserved_pages = min_free_pages;
 852         save_flags(flags);
 853 repeat:
 854         cli();
 855         if ((priority==GFP_ATOMIC) || nr_free_pages > reserved_pages) {
 856                 RMQUEUE(order, limit);
 857                 restore_flags(flags);
 858                 return 0;
 859         }
 860         restore_flags(flags);
 861         if (priority != GFP_BUFFER && try_to_free_page(priority, limit))
 862                 goto repeat;
 863         return 0;
 864 }
 865 
 866 /*
 867  * Show free area list (used inside shift_scroll-lock stuff)
 868  * We also calculate the percentage fragmentation. We do this by counting the
 869  * memory on each free list with the exception of the first item on the list.
 870  */
 871 void show_free_areas(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 872 {
 873         unsigned long order, flags;
 874         unsigned long total = 0;
 875 
 876         printk("Free pages:      %6dkB\n ( ",nr_free_pages<<(PAGE_SHIFT-10));
 877         save_flags(flags);
 878         cli();
 879         for (order=0 ; order < NR_MEM_LISTS; order++) {
 880                 struct mem_list * tmp;
 881                 unsigned long nr = 0;
 882                 for (tmp = free_area_list[order].next ; tmp != free_area_list + order ; tmp = tmp->next) {
 883                         nr ++;
 884                 }
 885                 total += nr * ((PAGE_SIZE>>10) << order);
 886                 printk("%lu*%lukB ", nr, (PAGE_SIZE>>10) << order);
 887         }
 888         restore_flags(flags);
 889         printk("= %lukB)\n", total);
 890 #ifdef SWAP_CACHE_INFO
 891         show_swap_cache_info();
 892 #endif  
 893 }
 894 
 895 /*
 896  * Trying to stop swapping from a file is fraught with races, so
 897  * we repeat quite a bit here when we have to pause. swapoff()
 898  * isn't exactly timing-critical, so who cares (but this is /really/
 899  * inefficient, ugh).
 900  *
 901  * We return 1 after having slept, which makes the process start over
 902  * from the beginning for this process..
 903  */
 904 static inline int unuse_pte(struct vm_area_struct * vma, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 905         pte_t *dir, unsigned int type, unsigned long page)
 906 {
 907         pte_t pte = *dir;
 908 
 909         if (pte_none(pte))
 910                 return 0;
 911         if (pte_present(pte)) {
 912                 unsigned long page = pte_page(pte);
 913                 if (page >= high_memory)
 914                         return 0;
 915                 if (!in_swap_cache(page))
 916                         return 0;
 917                 if (SWP_TYPE(in_swap_cache(page)) != type)
 918                         return 0;
 919                 delete_from_swap_cache(page);
 920                 set_pte(dir, pte_mkdirty(pte));
 921                 return 0;
 922         }
 923         if (SWP_TYPE(pte_val(pte)) != type)
 924                 return 0;
 925         read_swap_page(pte_val(pte), (char *) page);
 926         if (pte_val(*dir) != pte_val(pte)) {
 927                 free_page(page);
 928                 return 1;
 929         }
 930         set_pte(dir, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
 931         ++vma->vm_mm->rss;
 932         swap_free(pte_val(pte));
 933         return 1;
 934 }
 935 
 936 static inline int unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
     /* [previous][next][first][last][top][bottom][index][help] */
 937         unsigned long address, unsigned long size, unsigned long offset,
 938         unsigned int type, unsigned long page)
 939 {
 940         pte_t * pte;
 941         unsigned long end;
 942 
 943         if (pmd_none(*dir))
 944                 return 0;
 945         if (pmd_bad(*dir)) {
 946                 printk("unuse_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
 947                 pmd_clear(dir);
 948                 return 0;
 949         }
 950         pte = pte_offset(dir, address);
 951         offset += address & PMD_MASK;
 952         address &= ~PMD_MASK;
 953         end = address + size;
 954         if (end > PMD_SIZE)
 955                 end = PMD_SIZE;
 956         do {
 957                 if (unuse_pte(vma, offset+address-vma->vm_start, pte, type, page))
 958                         return 1;
 959                 address += PAGE_SIZE;
 960                 pte++;
 961         } while (address < end);
 962         return 0;
 963 }
 964 
 965 static inline int unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
     /* [previous][next][first][last][top][bottom][index][help] */
 966         unsigned long address, unsigned long size,
 967         unsigned int type, unsigned long page)
 968 {
 969         pmd_t * pmd;
 970         unsigned long offset, end;
 971 
 972         if (pgd_none(*dir))
 973                 return 0;
 974         if (pgd_bad(*dir)) {
 975                 printk("unuse_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
 976                 pgd_clear(dir);
 977                 return 0;
 978         }
 979         pmd = pmd_offset(dir, address);
 980         offset = address & PGDIR_MASK;
 981         address &= ~PGDIR_MASK;
 982         end = address + size;
 983         if (end > PGDIR_SIZE)
 984                 end = PGDIR_SIZE;
 985         do {
 986                 if (unuse_pmd(vma, pmd, address, end - address, offset, type, page))
 987                         return 1;
 988                 address = (address + PMD_SIZE) & PMD_MASK;
 989                 pmd++;
 990         } while (address < end);
 991         return 0;
 992 }
 993 
 994 static int unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
     /* [previous][next][first][last][top][bottom][index][help] */
 995         unsigned long start, unsigned long end,
 996         unsigned int type, unsigned long page)
 997 {
 998         while (start < end) {
 999                 if (unuse_pgd(vma, pgdir, start, end - start, type, page))
1000                         return 1;
1001                 start = (start + PGDIR_SIZE) & PGDIR_MASK;
1002                 pgdir++;
1003         }
1004         return 0;
1005 }
1006 
1007 static int unuse_process(struct task_struct * p, unsigned int type, unsigned long page)
     /* [previous][next][first][last][top][bottom][index][help] */
1008 {
1009         struct vm_area_struct* vma;
1010 
1011         /*
1012          * Go through process' page directory.
1013          */
1014         if (!p->mm || pgd_inuse(p->mm->pgd))
1015                 return 0;
1016         vma = p->mm->mmap;
1017         while (vma) {
1018                 pgd_t * pgd = pgd_offset(p->mm, vma->vm_start);
1019                 if (unuse_vma(vma, pgd, vma->vm_start, vma->vm_end, type, page))
1020                         return 1;
1021                 vma = vma->vm_next;
1022         }
1023         return 0;
1024 }
1025 
1026 /*
1027  * To avoid races, we repeat for each process after having
1028  * swapped something in. That gets rid of a few pesky races,
1029  * and "swapoff" isn't exactly timing critical.
1030  */
1031 static int try_to_unuse(unsigned int type)
     /* [previous][next][first][last][top][bottom][index][help] */
1032 {
1033         int nr;
1034         unsigned long page = get_free_page(GFP_KERNEL);
1035 
1036         if (!page)
1037                 return -ENOMEM;
1038         nr = 0;
1039         while (nr < NR_TASKS) {
1040                 if (task[nr]) {
1041                         if (unuse_process(task[nr], type, page)) {
1042                                 page = get_free_page(GFP_KERNEL);
1043                                 if (!page)
1044                                         return -ENOMEM;
1045                                 continue;
1046                         }
1047                 }
1048                 nr++;
1049         }
1050         free_page(page);
1051         return 0;
1052 }
1053 
1054 asmlinkage int sys_swapoff(const char * specialfile)
     /* [previous][next][first][last][top][bottom][index][help] */
1055 {
1056         struct swap_info_struct * p;
1057         struct inode * inode;
1058         struct file filp;
1059         int i, type, prev;
1060 
1061         if (!suser())
1062                 return -EPERM;
1063         i = namei(specialfile,&inode);
1064         if (i)
1065                 return i;
1066         prev = -1;
1067         for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
1068                 p = swap_info + type;
1069                 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
1070                         if (p->swap_file) {
1071                                 if (p->swap_file == inode)
1072                                   break;
1073                         } else {
1074                                 if (S_ISBLK(inode->i_mode)
1075                                     && (p->swap_device == inode->i_rdev))
1076                                   break;
1077                         }
1078                 }
1079                 prev = type;
1080         }
1081         if (type < 0){
1082                 iput(inode);
1083                 return -EINVAL;
1084         }
1085         if (prev < 0) {
1086                 swap_list.head = p->next;
1087         } else {
1088                 swap_info[prev].next = p->next;
1089         }
1090         if (type == swap_list.next) {
1091                 /* just pick something that's safe... */
1092                 swap_list.next = swap_list.head;
1093         }
1094         p->flags = SWP_USED;
1095         i = try_to_unuse(type);
1096         if (i) {
1097                 iput(inode);
1098                 p->flags = SWP_WRITEOK;
1099                 return i;
1100         }
1101 
1102         if(p->swap_device){
1103                 memset(&filp, 0, sizeof(filp));         
1104                 filp.f_inode = inode;
1105                 filp.f_mode = 3; /* read write */
1106                 /* open it again to get fops */
1107                 if( !blkdev_open(inode, &filp) &&
1108                    filp.f_op && filp.f_op->release){
1109                         filp.f_op->release(inode,&filp);
1110                         filp.f_op->release(inode,&filp);
1111                 }
1112         }
1113         iput(inode);
1114 
1115         nr_swap_pages -= p->pages;
1116         iput(p->swap_file);
1117         p->swap_file = NULL;
1118         p->swap_device = 0;
1119         vfree(p->swap_map);
1120         p->swap_map = NULL;
1121         free_page((long) p->swap_lockmap);
1122         p->swap_lockmap = NULL;
1123         p->flags = 0;
1124         return 0;
1125 }
1126 
1127 /*
1128  * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
1129  *
1130  * The swapon system call
1131  */
1132 asmlinkage int sys_swapon(const char * specialfile, int swap_flags)
     /* [previous][next][first][last][top][bottom][index][help] */
1133 {
1134         struct swap_info_struct * p;
1135         struct inode * swap_inode;
1136         unsigned int type;
1137         int i, j, prev;
1138         int error;
1139         struct file filp;
1140         static int least_priority = 0;
1141 
1142         memset(&filp, 0, sizeof(filp));
1143         if (!suser())
1144                 return -EPERM;
1145         p = swap_info;
1146         for (type = 0 ; type < nr_swapfiles ; type++,p++)
1147                 if (!(p->flags & SWP_USED))
1148                         break;
1149         if (type >= MAX_SWAPFILES)
1150                 return -EPERM;
1151         if (type >= nr_swapfiles)
1152                 nr_swapfiles = type+1;
1153         p->flags = SWP_USED;
1154         p->swap_file = NULL;
1155         p->swap_device = 0;
1156         p->swap_map = NULL;
1157         p->swap_lockmap = NULL;
1158         p->lowest_bit = 0;
1159         p->highest_bit = 0;
1160         p->max = 1;
1161         p->next = -1;
1162         if (swap_flags & SWAP_FLAG_PREFER) {
1163                 p->prio =
1164                   (swap_flags & SWAP_FLAG_PRIO_MASK)>>SWAP_FLAG_PRIO_SHIFT;
1165         } else {
1166                 p->prio = --least_priority;
1167         }
1168         error = namei(specialfile,&swap_inode);
1169         if (error)
1170                 goto bad_swap_2;
1171         p->swap_file = swap_inode;
1172         error = -EBUSY;
1173         if (swap_inode->i_count != 1)
1174                 goto bad_swap_2;
1175         error = -EINVAL;
1176 
1177         if (S_ISBLK(swap_inode->i_mode)) {
1178                 p->swap_device = swap_inode->i_rdev;
1179 
1180                 filp.f_inode = swap_inode;
1181                 filp.f_mode = 3; /* read write */
1182                 error = blkdev_open(swap_inode, &filp);
1183                 p->swap_file = NULL;
1184                 iput(swap_inode);
1185                 if(error)
1186                         goto bad_swap_2;
1187                 error = -ENODEV;
1188                 if (!p->swap_device)
1189                         goto bad_swap;
1190                 error = -EBUSY;
1191                 for (i = 0 ; i < nr_swapfiles ; i++) {
1192                         if (i == type)
1193                                 continue;
1194                         if (p->swap_device == swap_info[i].swap_device)
1195                                 goto bad_swap;
1196                 }
1197         } else if (!S_ISREG(swap_inode->i_mode))
1198                 goto bad_swap;
1199         p->swap_lockmap = (unsigned char *) get_free_page(GFP_USER);
1200         if (!p->swap_lockmap) {
1201                 printk("Unable to start swapping: out of memory :-)\n");
1202                 error = -ENOMEM;
1203                 goto bad_swap;
1204         }
1205         read_swap_page(SWP_ENTRY(type,0), (char *) p->swap_lockmap);
1206         if (memcmp("SWAP-SPACE",p->swap_lockmap+PAGE_SIZE-10,10)) {
1207                 printk("Unable to find swap-space signature\n");
1208                 error = -EINVAL;
1209                 goto bad_swap;
1210         }
1211         memset(p->swap_lockmap+PAGE_SIZE-10,0,10);
1212         j = 0;
1213         p->lowest_bit = 0;
1214         p->highest_bit = 0;
1215         for (i = 1 ; i < 8*PAGE_SIZE ; i++) {
1216                 if (test_bit(i,p->swap_lockmap)) {
1217                         if (!p->lowest_bit)
1218                                 p->lowest_bit = i;
1219                         p->highest_bit = i;
1220                         p->max = i+1;
1221                         j++;
1222                 }
1223         }
1224         if (!j) {
1225                 printk("Empty swap-file\n");
1226                 error = -EINVAL;
1227                 goto bad_swap;
1228         }
1229         p->swap_map = (unsigned char *) vmalloc(p->max);
1230         if (!p->swap_map) {
1231                 error = -ENOMEM;
1232                 goto bad_swap;
1233         }
1234         for (i = 1 ; i < p->max ; i++) {
1235                 if (test_bit(i,p->swap_lockmap))
1236                         p->swap_map[i] = 0;
1237                 else
1238                         p->swap_map[i] = 0x80;
1239         }
1240         p->swap_map[0] = 0x80;
1241         memset(p->swap_lockmap,0,PAGE_SIZE);
1242         p->flags = SWP_WRITEOK;
1243         p->pages = j;
1244         nr_swap_pages += j;
1245         printk("Adding Swap: %dk swap-space\n",j<<(PAGE_SHIFT-10));
1246 
1247         /* insert swap space into swap_list: */
1248         prev = -1;
1249         for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
1250                 if (p->prio >= swap_info[i].prio) {
1251                         break;
1252                 }
1253                 prev = i;
1254         }
1255         p->next = i;
1256         if (prev < 0) {
1257                 swap_list.head = swap_list.next = p - swap_info;
1258         } else {
1259                 swap_info[prev].next = p - swap_info;
1260         }
1261         return 0;
1262 bad_swap:
1263         if(filp.f_op && filp.f_op->release)
1264                 filp.f_op->release(filp.f_inode,&filp);
1265 bad_swap_2:
1266         free_page((long) p->swap_lockmap);
1267         vfree(p->swap_map);
1268         iput(p->swap_file);
1269         p->swap_device = 0;
1270         p->swap_file = NULL;
1271         p->swap_map = NULL;
1272         p->swap_lockmap = NULL;
1273         p->flags = 0;
1274         return error;
1275 }
1276 
1277 void si_swapinfo(struct sysinfo *val)
     /* [previous][next][first][last][top][bottom][index][help] */
1278 {
1279         unsigned int i, j;
1280 
1281         val->freeswap = val->totalswap = 0;
1282         for (i = 0; i < nr_swapfiles; i++) {
1283                 if ((swap_info[i].flags & SWP_WRITEOK) != SWP_WRITEOK)
1284                         continue;
1285                 for (j = 0; j < swap_info[i].max; ++j)
1286                         switch (swap_info[i].swap_map[j]) {
1287                                 case 128:
1288                                         continue;
1289                                 case 0:
1290                                         ++val->freeswap;
1291                                 default:
1292                                         ++val->totalswap;
1293                         }
1294         }
1295         val->freeswap <<= PAGE_SHIFT;
1296         val->totalswap <<= PAGE_SHIFT;
1297         return;
1298 }
1299 
1300 #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
1301 
1302 /*
1303  * set up the free-area data structures:
1304  *   - mark all pages reserved
1305  *   - mark all memory queues empty
1306  *   - clear the memory bitmaps
1307  */
1308 unsigned long free_area_init(unsigned long start_mem, unsigned long end_mem)
     /* [previous][next][first][last][top][bottom][index][help] */
1309 {
1310         mem_map_t * p;
1311         unsigned long mask = PAGE_MASK;
1312         int i;
1313 
1314         /*
1315          * select nr of pages we try to keep free for important stuff
1316          * with a minimum of 16 pages. This is totally arbitrary
1317          */
1318         i = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT+6);
1319         if (i < 16)
1320                 i = 16;
1321         min_free_pages = i;
1322         start_mem = init_swap_cache(start_mem, end_mem);
1323         mem_map = (mem_map_t *) start_mem;
1324         p = mem_map + MAP_NR(end_mem);
1325         start_mem = LONG_ALIGN((unsigned long) p);
1326         while (p > mem_map) {
1327                 --p;
1328                 p->count = 0;
1329                 p->dirty = 0;
1330                 p->reserved = 1;
1331                 p->inode = NULL;
1332                 p->offset = 0;
1333                 p->write_list = NULL;
1334                 p->next = p->prev = NULL;
1335                 p->next_hash = p->next_hash = NULL;
1336         }
1337 
1338         for (i = 0 ; i < NR_MEM_LISTS ; i++) {
1339                 unsigned long bitmap_size;
1340                 free_area_list[i].prev = free_area_list[i].next = &free_area_list[i];
1341                 mask += mask;
1342                 end_mem = (end_mem + ~mask) & mask;
1343                 bitmap_size = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT + i);
1344                 bitmap_size = (bitmap_size + 7) >> 3;
1345                 bitmap_size = LONG_ALIGN(bitmap_size);
1346                 free_area_map[i] = (unsigned char *) start_mem;
1347                 memset((void *) start_mem, 0, bitmap_size);
1348                 start_mem += bitmap_size;
1349         }
1350         return start_mem;
1351 }

/* [previous][next][first][last][top][bottom][index][help] */