root/mm/swap.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. show_swap_cache_info
  2. add_to_swap_cache
  3. init_swap_cache
  4. swap_setup
  5. buff_setup
  6. rw_swap_page
  7. get_swap_page
  8. swap_duplicate
  9. swap_free
  10. swap_in
  11. try_to_swap_out
  12. swap_out_pmd
  13. swap_out_pgd
  14. swap_out_vma
  15. swap_out_process
  16. swap_out
  17. try_to_free_page
  18. add_mem_queue
  19. remove_mem_queue
  20. free_pages_ok
  21. check_free_buffers
  22. free_pages
  23. mark_used
  24. __get_free_pages
  25. show_free_areas
  26. unuse_pte
  27. unuse_pmd
  28. unuse_pgd
  29. unuse_vma
  30. unuse_process
  31. try_to_unuse
  32. sys_swapoff
  33. sys_swapon
  34. si_swapinfo
  35. free_area_init

   1 /*
   2  *  linux/mm/swap.c
   3  *
   4  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   5  */
   6 
   7 /*
   8  * This file should contain most things doing the swapping from/to disk.
   9  * Started 18.12.91
  10  *
  11  * Swap aging added 23.2.95, Stephen Tweedie.
  12  */
  13 
  14 #include <linux/mm.h>
  15 #include <linux/sched.h>
  16 #include <linux/head.h>
  17 #include <linux/kernel.h>
  18 #include <linux/kernel_stat.h>
  19 #include <linux/errno.h>
  20 #include <linux/string.h>
  21 #include <linux/stat.h>
  22 #include <linux/swap.h>
  23 #include <linux/fs.h>
  24 #include <linux/swapctl.h>
  25 
  26 #include <asm/dma.h>
  27 #include <asm/system.h> /* for cli()/sti() */
  28 #include <asm/segment.h> /* for memcpy_to/fromfs */
  29 #include <asm/bitops.h>
  30 #include <asm/pgtable.h>
  31 
  32 #define MAX_SWAPFILES 8
  33 
  34 #define SWP_USED        1
  35 #define SWP_WRITEOK     3
  36 
  37 int min_free_pages = 20;
  38 
  39 /*
  40  * Constants for the page aging mechanism: the maximum age (actually,
  41  * the maximum "youthfulness"); the quanta by which pages rejuvinate
  42  * and age; and the initial age for new pages. 
  43  */
  44 
  45 swap_control_t swap_control = {
  46         20, 3, 1, 3,            /* Page aging */
  47         10, 2, 2, 0,            /* Buffer aging */
  48         32, 4,                  /* Aging cluster */
  49         8192, 4096,             /* Pageout and bufferout weights */
  50         -200,                   /* Buffer grace */
  51         1, 1,                   /* Buffs/pages to free */
  52         RCL_ROUND_ROBIN         /* Balancing policy */
  53 };
  54 
  55 static int nr_swapfiles = 0;
  56 static struct wait_queue * lock_queue = NULL;
  57 static struct {
  58         int head;       /* head of priority-ordered swapfile list */
  59         int next;       /* swapfile to be used next */
  60 } swap_list = {-1, -1};
  61 
  62 static struct swap_info_struct {
  63         unsigned int flags;
  64         kdev_t swap_device;
  65         struct inode * swap_file;
  66         unsigned char * swap_map;
  67         unsigned char * swap_lockmap;
  68         int lowest_bit;
  69         int highest_bit;
  70         int prio;                       /* swap priority */
  71         int pages;
  72         unsigned long max;
  73         int next;                       /* next entry on swap list */
  74 } swap_info[MAX_SWAPFILES];
  75 
  76 extern int shm_swap (int, unsigned long);
  77 
  78 /*
  79  * To save us from swapping out pages which have just been swapped in and
  80  * have not been modified since then, we keep in swap_cache[page>>PAGE_SHIFT]
  81  * the swap entry which was last used to fill the page, or zero if the
  82  * page does not currently correspond to a page in swap. PAGE_DIRTY makes
  83  * this info useless.
  84  */
  85 unsigned long *swap_cache;
  86 
  87 #ifdef SWAP_CACHE_INFO
  88 unsigned long swap_cache_add_total = 0;
  89 unsigned long swap_cache_add_success = 0;
  90 unsigned long swap_cache_del_total = 0;
  91 unsigned long swap_cache_del_success = 0;
  92 unsigned long swap_cache_find_total = 0;
  93 unsigned long swap_cache_find_success = 0;
  94 
  95 extern inline void show_swap_cache_info(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  96 {
  97         printk("Swap cache: add %ld/%ld, delete %ld/%ld, find %ld/%ld\n",
  98                 swap_cache_add_total, swap_cache_add_success, 
  99                 swap_cache_del_total, swap_cache_del_success,
 100                 swap_cache_find_total, swap_cache_find_success);
 101 }
 102 #endif
 103 
 104 static int add_to_swap_cache(unsigned long addr, unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 105 {
 106         struct swap_info_struct * p = &swap_info[SWP_TYPE(entry)];
 107 
 108 #ifdef SWAP_CACHE_INFO
 109         swap_cache_add_total++;
 110 #endif
 111         if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
 112                 entry = xchg(swap_cache + MAP_NR(addr), entry);
 113                 if (entry)  {
 114                         printk("swap_cache: replacing non-NULL entry\n");
 115                 }
 116 #ifdef SWAP_CACHE_INFO
 117                 swap_cache_add_success++;
 118 #endif
 119                 return 1;
 120         }
 121         return 0;
 122 }
 123 
 124 static unsigned long init_swap_cache(unsigned long mem_start,
     /* [previous][next][first][last][top][bottom][index][help] */
 125         unsigned long mem_end)
 126 {
 127         unsigned long swap_cache_size;
 128 
 129         mem_start = (mem_start + 15) & ~15;
 130         swap_cache = (unsigned long *) mem_start;
 131         swap_cache_size = MAP_NR(mem_end);
 132         memset(swap_cache, 0, swap_cache_size * sizeof (unsigned long));
 133         return (unsigned long) (swap_cache + swap_cache_size);
 134 }
 135 
 136 /* General swap control */
 137 
 138 /* Parse the kernel command line "swap=" option at load time: */
 139 void swap_setup(char *str, int *ints)
     /* [previous][next][first][last][top][bottom][index][help] */
 140 {
 141         int * swap_vars[8] = {
 142                 &MAX_PAGE_AGE,
 143                 &PAGE_ADVANCE,
 144                 &PAGE_DECLINE,
 145                 &PAGE_INITIAL_AGE,
 146                 &AGE_CLUSTER_FRACT,
 147                 &AGE_CLUSTER_MIN,
 148                 &PAGEOUT_WEIGHT,
 149                 &BUFFEROUT_WEIGHT
 150         };
 151         int i;
 152         for (i=0; i < ints[0] && i < 8; i++) {
 153                 if (ints[i+1])
 154                         *(swap_vars[i]) = ints[i+1];
 155         }
 156 }
 157 
 158 /* Parse the kernel command line "buff=" option at load time: */
 159 void buff_setup(char *str, int *ints)
     /* [previous][next][first][last][top][bottom][index][help] */
 160 {
 161         int * buff_vars[6] = {
 162                 &MAX_BUFF_AGE,
 163                 &BUFF_ADVANCE,
 164                 &BUFF_DECLINE,
 165                 &BUFF_INITIAL_AGE,
 166                 &BUFFEROUT_WEIGHT,
 167                 &BUFFERMEM_GRACE
 168         };
 169         int i;
 170         for (i=0; i < ints[0] && i < 6; i++) {
 171                 if (ints[i+1])
 172                         *(buff_vars[i]) = ints[i+1];
 173         }
 174 }
 175 
 176 /* Page aging */
 177 
 178 void rw_swap_page(int rw, unsigned long entry, char * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 179 {
 180         unsigned long type, offset;
 181         struct swap_info_struct * p;
 182 
 183         type = SWP_TYPE(entry);
 184         if (type >= nr_swapfiles) {
 185                 printk("Internal error: bad swap-device\n");
 186                 return;
 187         }
 188         p = &swap_info[type];
 189         offset = SWP_OFFSET(entry);
 190         if (offset >= p->max) {
 191                 printk("rw_swap_page: weirdness\n");
 192                 return;
 193         }
 194         if (p->swap_map && !p->swap_map[offset]) {
 195                 printk("Hmm.. Trying to use unallocated swap (%08lx)\n", entry);
 196                 return;
 197         }
 198         if (!(p->flags & SWP_USED)) {
 199                 printk("Trying to swap to unused swap-device\n");
 200                 return;
 201         }
 202         while (set_bit(offset,p->swap_lockmap))
 203                 sleep_on(&lock_queue);
 204         if (rw == READ)
 205                 kstat.pswpin++;
 206         else
 207                 kstat.pswpout++;
 208         if (p->swap_device) {
 209                 ll_rw_page(rw,p->swap_device,offset,buf);
 210         } else if (p->swap_file) {
 211                 struct inode *swapf = p->swap_file;
 212                 unsigned int zones[PAGE_SIZE/512];
 213                 int i;
 214                 if (swapf->i_op->bmap == NULL
 215                         && swapf->i_op->smap != NULL){
 216                         /*
 217                                 With MsDOS, we use msdos_smap which return
 218                                 a sector number (not a cluster or block number).
 219                                 It is a patch to enable the UMSDOS project.
 220                                 Other people are working on better solution.
 221 
 222                                 It sounds like ll_rw_swap_file defined
 223                                 it operation size (sector size) based on
 224                                 PAGE_SIZE and the number of block to read.
 225                                 So using bmap or smap should work even if
 226                                 smap will require more blocks.
 227                         */
 228                         int j;
 229                         unsigned int block = offset << 3;
 230 
 231                         for (i=0, j=0; j< PAGE_SIZE ; i++, j += 512){
 232                                 if (!(zones[i] = swapf->i_op->smap(swapf,block++))) {
 233                                         printk("rw_swap_page: bad swap file\n");
 234                                         return;
 235                                 }
 236                         }
 237                 }else{
 238                         int j;
 239                         unsigned int block = offset
 240                                 << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits);
 241 
 242                         for (i=0, j=0; j< PAGE_SIZE ; i++, j +=swapf->i_sb->s_blocksize)
 243                                 if (!(zones[i] = bmap(swapf,block++))) {
 244                                         printk("rw_swap_page: bad swap file\n");
 245                                         return;
 246                                 }
 247                 }
 248                 ll_rw_swap_file(rw,swapf->i_dev, zones, i,buf);
 249         } else
 250                 printk("re_swap_page: no swap file or device\n");
 251         if (offset && !clear_bit(offset,p->swap_lockmap))
 252                 printk("rw_swap_page: lock already cleared\n");
 253         wake_up(&lock_queue);
 254 }
 255 
 256 unsigned long get_swap_page(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 257 {
 258         struct swap_info_struct * p;
 259         unsigned long offset, entry;
 260         int type, wrapped = 0;
 261 
 262         type = swap_list.next;
 263         if (type < 0)
 264           return 0;
 265 
 266         while (1) {
 267                 p = &swap_info[type];
 268                 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
 269                         for (offset = p->lowest_bit; offset <= p->highest_bit ; offset++) {
 270                                 if (p->swap_map[offset])
 271                                   continue;
 272                                 if (test_bit(offset, p->swap_lockmap))
 273                                   continue;
 274                                 p->swap_map[offset] = 1;
 275                                 nr_swap_pages--;
 276                                 if (offset == p->highest_bit)
 277                                   p->highest_bit--;
 278                                 p->lowest_bit = offset;
 279                                 entry = SWP_ENTRY(type,offset);
 280 
 281                                 type = swap_info[type].next;
 282                                 if (type < 0 || p->prio != swap_info[type].prio) {
 283                                     swap_list.next = swap_list.head;
 284                                 } else {
 285                                     swap_list.next = type;
 286                                 }
 287                                 return entry;
 288                         }
 289                 }
 290                 type = p->next;
 291                 if (!wrapped) {
 292                         if (type < 0 || p->prio != swap_info[type].prio) {
 293                                 type = swap_list.head;
 294                                 wrapped = 1;
 295                         }
 296                 } else if (type < 0) {
 297                         return 0;       /* out of swap space */
 298                 }
 299         }
 300 }
 301 
 302 void swap_duplicate(unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 303 {
 304         struct swap_info_struct * p;
 305         unsigned long offset, type;
 306 
 307         if (!entry)
 308                 return;
 309         offset = SWP_OFFSET(entry);
 310         type = SWP_TYPE(entry);
 311         if (type & SHM_SWP_TYPE)
 312                 return;
 313         if (type >= nr_swapfiles) {
 314                 printk("Trying to duplicate nonexistent swap-page\n");
 315                 return;
 316         }
 317         p = type + swap_info;
 318         if (offset >= p->max) {
 319                 printk("swap_duplicate: weirdness\n");
 320                 return;
 321         }
 322         if (!p->swap_map[offset]) {
 323                 printk("swap_duplicate: trying to duplicate unused page\n");
 324                 return;
 325         }
 326         p->swap_map[offset]++;
 327         return;
 328 }
 329 
 330 void swap_free(unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 331 {
 332         struct swap_info_struct * p;
 333         unsigned long offset, type;
 334 
 335         if (!entry)
 336                 return;
 337         type = SWP_TYPE(entry);
 338         if (type & SHM_SWP_TYPE)
 339                 return;
 340         if (type >= nr_swapfiles) {
 341                 printk("Trying to free nonexistent swap-page\n");
 342                 return;
 343         }
 344         p = & swap_info[type];
 345         offset = SWP_OFFSET(entry);
 346         if (offset >= p->max) {
 347                 printk("swap_free: weirdness\n");
 348                 return;
 349         }
 350         if (!(p->flags & SWP_USED)) {
 351                 printk("Trying to free swap from unused swap-device\n");
 352                 return;
 353         }
 354         if (offset < p->lowest_bit)
 355                 p->lowest_bit = offset;
 356         if (offset > p->highest_bit)
 357                 p->highest_bit = offset;
 358         if (!p->swap_map[offset])
 359                 printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
 360         else
 361                 if (!--p->swap_map[offset])
 362                         nr_swap_pages++;
 363         if (p->prio > swap_info[swap_list.next].prio) {
 364             swap_list.next = swap_list.head;
 365         }
 366 }
 367 
 368 /*
 369  * The tests may look silly, but it essentially makes sure that
 370  * no other process did a swap-in on us just as we were waiting.
 371  *
 372  * Also, don't bother to add to the swap cache if this page-in
 373  * was due to a write access.
 374  */
 375 void swap_in(struct task_struct * tsk, struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 376         pte_t * page_table, unsigned long entry, int write_access)
 377 {
 378         unsigned long page = __get_free_page(GFP_KERNEL);
 379 
 380         if (pte_val(*page_table) != entry) {
 381                 free_page(page);
 382                 return;
 383         }
 384         if (!page) {
 385                 set_pte(page_table, BAD_PAGE);
 386                 swap_free(entry);
 387                 oom(tsk);
 388                 return;
 389         }
 390         read_swap_page(entry, (char *) page);
 391         if (pte_val(*page_table) != entry) {
 392                 free_page(page);
 393                 return;
 394         }
 395         vma->vm_mm->rss++;
 396         tsk->maj_flt++;
 397         if (!write_access && add_to_swap_cache(page, entry)) {
 398                 set_pte(page_table, mk_pte(page, vma->vm_page_prot));
 399                 return;
 400         }
 401         set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
 402         swap_free(entry);
 403         return;
 404 }
 405 
 406 /*
 407  * The swap-out functions return 1 if they successfully
 408  * threw something out, and we got a free page. It returns
 409  * zero if it couldn't do anything, and any other value
 410  * indicates it decreased rss, but the page was shared.
 411  *
 412  * NOTE! If it sleeps, it *must* return 1 to make sure we
 413  * don't continue with the swap-out. Otherwise we may be
 414  * using a process that no longer actually exists (it might
 415  * have died while we slept).
 416  */
 417 static inline int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 418         unsigned long address, pte_t * page_table, unsigned long limit)
 419 {
 420         pte_t pte;
 421         unsigned long entry;
 422         unsigned long page;
 423 
 424         pte = *page_table;
 425         if (!pte_present(pte))
 426                 return 0;
 427         page = pte_page(pte);
 428         if (page >= high_memory)
 429                 return 0;
 430         if (page >= limit)
 431                 return 0;
 432 
 433         if (mem_map[MAP_NR(page)].reserved)
 434                 return 0;
 435         /* Deal with page aging.  Pages age from being unused; they
 436          * rejuvinate on being accessed.  Only swap old pages (age==0
 437          * is oldest). */
 438         if ((pte_dirty(pte) && delete_from_swap_cache(page)) 
 439             || pte_young(pte))  {
 440                 set_pte(page_table, pte_mkold(pte));
 441                 touch_page(page);
 442                 return 0;
 443         }       
 444         age_page(page);
 445         if (age_of(page))
 446                 return 0;
 447         if (pte_dirty(pte)) {
 448                 if (vma->vm_ops && vma->vm_ops->swapout) {
 449                         pid_t pid = tsk->pid;
 450                         vma->vm_mm->rss--;
 451                         if (vma->vm_ops->swapout(vma, address - vma->vm_start + vma->vm_offset, page_table))
 452                                 kill_proc(pid, SIGBUS, 1);
 453                 } else {
 454                         if (mem_map[MAP_NR(page)].count != 1)
 455                                 return 0;
 456                         if (!(entry = get_swap_page()))
 457                                 return 0;
 458                         vma->vm_mm->rss--;
 459                         set_pte(page_table, __pte(entry));
 460                         invalidate_page(vma, address);
 461                         tsk->nswap++;
 462                         write_swap_page(entry, (char *) page);
 463                 }
 464                 free_page(page);
 465                 return 1;       /* we slept: the process may not exist any more */
 466         }
 467         if ((entry = find_in_swap_cache(page)))  {
 468                 if (mem_map[MAP_NR(page)].count != 1) {
 469                         set_pte(page_table, pte_mkdirty(pte));
 470                         printk("Aiee.. duplicated cached swap-cache entry\n");
 471                         return 0;
 472                 }
 473                 vma->vm_mm->rss--;
 474                 set_pte(page_table, __pte(entry));
 475                 invalidate_page(vma, address);
 476                 free_page(page);
 477                 return 1;
 478         } 
 479         vma->vm_mm->rss--;
 480         pte_clear(page_table);
 481         invalidate_page(vma, address);
 482         entry = mem_map[MAP_NR(page)].count;
 483         free_page(page);
 484         return entry;
 485 }
 486 
 487 /*
 488  * A new implementation of swap_out().  We do not swap complete processes,
 489  * but only a small number of blocks, before we continue with the next
 490  * process.  The number of blocks actually swapped is determined on the
 491  * number of page faults, that this process actually had in the last time,
 492  * so we won't swap heavily used processes all the time ...
 493  *
 494  * Note: the priority argument is a hint on much CPU to waste with the
 495  *       swap block search, not a hint, of how much blocks to swap with
 496  *       each process.
 497  *
 498  * (C) 1993 Kai Petzke, wpp@marie.physik.tu-berlin.de
 499  */
 500 
 501 static inline int swap_out_pmd(struct task_struct * tsk, struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 502         pmd_t *dir, unsigned long address, unsigned long end, unsigned long limit)
 503 {
 504         pte_t * pte;
 505         unsigned long pmd_end;
 506 
 507         if (pmd_none(*dir))
 508                 return 0;
 509         if (pmd_bad(*dir)) {
 510                 printk("swap_out_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
 511                 pmd_clear(dir);
 512                 return 0;
 513         }
 514         
 515         pte = pte_offset(dir, address);
 516         
 517         pmd_end = (address + PMD_SIZE) & PMD_MASK;
 518         if (end > pmd_end)
 519                 end = pmd_end;
 520 
 521         do {
 522                 int result;
 523                 tsk->swap_address = address + PAGE_SIZE;
 524                 result = try_to_swap_out(tsk, vma, address, pte, limit);
 525                 if (result)
 526                         return result;
 527                 address += PAGE_SIZE;
 528                 pte++;
 529         } while (address < end);
 530         return 0;
 531 }
 532 
 533 static inline int swap_out_pgd(struct task_struct * tsk, struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 534         pgd_t *dir, unsigned long address, unsigned long end, unsigned long limit)
 535 {
 536         pmd_t * pmd;
 537         unsigned long pgd_end;
 538 
 539         if (pgd_none(*dir))
 540                 return 0;
 541         if (pgd_bad(*dir)) {
 542                 printk("swap_out_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
 543                 pgd_clear(dir);
 544                 return 0;
 545         }
 546 
 547         pmd = pmd_offset(dir, address);
 548 
 549         pgd_end = (address + PGDIR_SIZE) & PGDIR_MASK;  
 550         if (end > pgd_end)
 551                 end = pgd_end;
 552         
 553         do {
 554                 int result = swap_out_pmd(tsk, vma, pmd, address, end, limit);
 555                 if (result)
 556                         return result;
 557                 address = (address + PMD_SIZE) & PMD_MASK;
 558                 pmd++;
 559         } while (address < end);
 560         return 0;
 561 }
 562 
 563 static int swap_out_vma(struct task_struct * tsk, struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 564         pgd_t *pgdir, unsigned long start, unsigned long limit)
 565 {
 566         unsigned long end;
 567 
 568         /* Don't swap out areas like shared memory which have their
 569             own separate swapping mechanism or areas which are locked down */
 570         if (vma->vm_flags & (VM_SHM | VM_LOCKED))
 571                 return 0;
 572 
 573         end = vma->vm_end;
 574         while (start < end) {
 575                 int result = swap_out_pgd(tsk, vma, pgdir, start, end, limit);
 576                 if (result)
 577                         return result;
 578                 start = (start + PGDIR_SIZE) & PGDIR_MASK;
 579                 pgdir++;
 580         }
 581         return 0;
 582 }
 583 
 584 static int swap_out_process(struct task_struct * p, unsigned long limit)
     /* [previous][next][first][last][top][bottom][index][help] */
 585 {
 586         unsigned long address;
 587         struct vm_area_struct* vma;
 588 
 589         /*
 590          * Go through process' page directory.
 591          */
 592         address = p->swap_address;
 593         p->swap_address = 0;
 594 
 595         /*
 596          * Find the proper vm-area
 597          */
 598         vma = find_vma(p, address);
 599         if (!vma)
 600                 return 0;
 601         if (address < vma->vm_start)
 602                 address = vma->vm_start;
 603 
 604         for (;;) {
 605                 int result = swap_out_vma(p, vma, pgd_offset(p->mm, address), address, limit);
 606                 if (result)
 607                         return result;
 608                 vma = vma->vm_next;
 609                 if (!vma)
 610                         break;
 611                 address = vma->vm_start;
 612         }
 613         p->swap_address = 0;
 614         return 0;
 615 }
 616 
 617 static int swap_out(unsigned int priority, unsigned long limit)
     /* [previous][next][first][last][top][bottom][index][help] */
 618 {
 619         static int swap_task;
 620         int loop, counter;
 621         struct task_struct *p;
 622 
 623         counter = ((PAGEOUT_WEIGHT * nr_tasks) >> 10) >> priority;
 624         for(; counter >= 0; counter--) {
 625                 /*
 626                  * Check that swap_task is suitable for swapping.  If not, look for
 627                  * the next suitable process.
 628                  */
 629                 loop = 0;
 630                 while(1) {
 631                         if (swap_task >= NR_TASKS) {
 632                                 swap_task = 1;
 633                                 if (loop)
 634                                         /* all processes are unswappable or already swapped out */
 635                                         return 0;
 636                                 loop = 1;
 637                         }
 638 
 639                         p = task[swap_task];
 640                         if (p && p->swappable && p->mm->rss)
 641                                 break;
 642 
 643                         swap_task++;
 644                 }
 645 
 646                 /*
 647                  * Determine the number of pages to swap from this process.
 648                  */
 649                 if (!p->swap_cnt) {
 650                         /* Normalise the number of pages swapped by
 651                            multiplying by (RSS / 1MB) */
 652                         p->swap_cnt = AGE_CLUSTER_SIZE(p->mm->rss);
 653                 }
 654                 if (!--p->swap_cnt)
 655                         swap_task++;
 656                 switch (swap_out_process(p, limit)) {
 657                         case 0:
 658                                 if (p->swap_cnt)
 659                                         swap_task++;
 660                                 break;
 661                         case 1:
 662                                 return 1;
 663                         default:
 664                                 break;
 665                 }
 666         }
 667         return 0;
 668 }
 669 
 670 /*
 671  * We are much more aggressive about trying to swap out than we used
 672  * to be.  This works out OK, because we now do proper aging on page
 673  * contents. 
 674  */
 675 static int try_to_free_page(int priority, unsigned long limit)
     /* [previous][next][first][last][top][bottom][index][help] */
 676 {
 677         static int state = 0;
 678         int i=6;
 679 
 680         switch (state) {
 681                 do {
 682                 case 0:
 683                         if (priority != GFP_NOBUFFER && shrink_buffers(i, limit))
 684                                 return 1;
 685                         state = 1;
 686                 case 1:
 687                         if (shm_swap(i, limit))
 688                                 return 1;
 689                         state = 2;
 690                 default:
 691                         if (swap_out(i, limit))
 692                                 return 1;
 693                         state = 0;
 694                 } while(i--);
 695         }
 696         return 0;
 697 }
 698 
 699 static inline void add_mem_queue(struct mem_list * head, struct mem_list * entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 700 {
 701         entry->prev = head;
 702         (entry->next = head->next)->prev = entry;
 703         head->next = entry;
 704 }
 705 
 706 static inline void remove_mem_queue(struct mem_list * head, struct mem_list * entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 707 {
 708         entry->next->prev = entry->prev;
 709         entry->prev->next = entry->next;
 710 }
 711 
 712 /*
 713  * Free_page() adds the page to the free lists. This is optimized for
 714  * fast normal cases (no error jumps taken normally).
 715  *
 716  * The way to optimize jumps for gcc-2.2.2 is to:
 717  *  - select the "normal" case and put it inside the if () { XXX }
 718  *  - no else-statements if you can avoid them
 719  *
 720  * With the above two rules, you get a straight-line execution path
 721  * for the normal case, giving better asm-code.
 722  *
 723  * free_page() may sleep since the page being freed may be a buffer
 724  * page or present in the swap cache. It will not sleep, however,
 725  * for a freshly allocated page (get_free_page()).
 726  */
 727 
 728 /*
 729  * Buddy system. Hairy. You really aren't expected to understand this
 730  */
 731 static inline void free_pages_ok(unsigned long addr, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 732 {
 733         unsigned long index = MAP_NR(addr) >> (1 + order);
 734         unsigned long mask = PAGE_MASK << order;
 735 
 736         addr &= mask;
 737         nr_free_pages += 1 << order;
 738         while (order < NR_MEM_LISTS-1) {
 739                 if (!change_bit(index, free_area_map[order]))
 740                         break;
 741                 remove_mem_queue(free_area_list+order, (struct mem_list *) (addr ^ (1+~mask)));
 742                 order++;
 743                 index >>= 1;
 744                 mask <<= 1;
 745                 addr &= mask;
 746         }
 747         add_mem_queue(free_area_list+order, (struct mem_list *) addr);
 748 }
 749 
 750 static inline void check_free_buffers(unsigned long addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 751 {
 752         struct buffer_head * bh;
 753 
 754         bh = buffer_pages[MAP_NR(addr)];
 755         if (bh) {
 756                 struct buffer_head *tmp = bh;
 757                 do {
 758                         if (tmp->b_list == BUF_SHARED
 759                             && tmp->b_dev != B_FREE)
 760                                 refile_buffer(tmp);
 761                         tmp = tmp->b_this_page;
 762                 } while (tmp != bh);
 763         }
 764 }
 765 
 766 void free_pages(unsigned long addr, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 767 {
 768         if (MAP_NR(addr) < MAP_NR(high_memory)) {
 769                 unsigned long flag;
 770                 mem_map_t * map = mem_map + MAP_NR(addr);
 771                 if (map->reserved)
 772                         return;
 773                 if (map->count) {
 774                         save_flags(flag);
 775                         cli();
 776                         if (!--map->count) {
 777                                 free_pages_ok(addr, order);
 778                                 delete_from_swap_cache(addr);
 779                         }
 780                         restore_flags(flag);
 781                         if (map->count == 1)
 782                                 check_free_buffers(addr);
 783                         return;
 784                 }
 785                 printk("Trying to free free memory (%08lx): memory probably corrupted\n",addr);
 786                 printk("PC = %p\n", __builtin_return_address(0));
 787                 return;
 788         }
 789 }
 790 
 791 /*
 792  * Some ugly macros to speed up __get_free_pages()..
 793  */
 794 #define RMQUEUE(order, limit) \
 795 do { struct mem_list * queue = free_area_list+order; \
 796      unsigned long new_order = order; \
 797         do { struct mem_list *prev = queue, *ret; \
 798                 while (queue != (ret = prev->next)) { \
 799                         if ((unsigned long) ret < (limit)) { \
 800                                 (prev->next = ret->next)->prev = prev; \
 801                                 mark_used((unsigned long) ret, new_order); \
 802                                 nr_free_pages -= 1 << order; \
 803                                 restore_flags(flags); \
 804                                 EXPAND(ret, order, new_order); \
 805                                 return (unsigned long) ret; \
 806                         } \
 807                         prev = ret; \
 808                 } \
 809                 new_order++; queue++; \
 810         } while (new_order < NR_MEM_LISTS); \
 811 } while (0)
 812 
 813 static inline int mark_used(unsigned long addr, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 814 {
 815         return change_bit(MAP_NR(addr) >> (1+order), free_area_map[order]);
 816 }
 817 
 818 #define EXPAND(addr,low,high) \
 819 do { unsigned long size = PAGE_SIZE << high; \
 820         while (high > low) { \
 821                 high--; size >>= 1; cli(); \
 822                 add_mem_queue(free_area_list+high, addr); \
 823                 mark_used((unsigned long) addr, high); \
 824                 restore_flags(flags); \
 825                 addr = (struct mem_list *) (size + (unsigned long) addr); \
 826         } mem_map[MAP_NR((unsigned long) addr)].count = 1; \
 827         mem_map[MAP_NR((unsigned long) addr)].age = PAGE_INITIAL_AGE; \
 828 } while (0)
 829 
 830 unsigned long __get_free_pages(int priority, unsigned long order, unsigned long limit)
     /* [previous][next][first][last][top][bottom][index][help] */
 831 {
 832         unsigned long flags;
 833         int reserved_pages;
 834 
 835         if (order >= NR_MEM_LISTS)
 836                 return 0;
 837         if (intr_count && priority != GFP_ATOMIC) {
 838                 static int count = 0;
 839                 if (++count < 5) {
 840                         printk("gfp called nonatomically from interrupt %p\n",
 841                                 __builtin_return_address(0));
 842                         priority = GFP_ATOMIC;
 843                 }
 844         }
 845         reserved_pages = 5;
 846         if (priority != GFP_NFS)
 847                 reserved_pages = min_free_pages;
 848         save_flags(flags);
 849 repeat:
 850         cli();
 851         if ((priority==GFP_ATOMIC) || nr_free_pages > reserved_pages) {
 852                 RMQUEUE(order, limit);
 853                 restore_flags(flags);
 854                 return 0;
 855         }
 856         restore_flags(flags);
 857         if (priority != GFP_BUFFER && try_to_free_page(priority, limit))
 858                 goto repeat;
 859         return 0;
 860 }
 861 
 862 /*
 863  * Show free area list (used inside shift_scroll-lock stuff)
 864  * We also calculate the percentage fragmentation. We do this by counting the
 865  * memory on each free list with the exception of the first item on the list.
 866  */
 867 void show_free_areas(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 868 {
 869         unsigned long order, flags;
 870         unsigned long total = 0;
 871 
 872         printk("Free pages:      %6dkB\n ( ",nr_free_pages<<(PAGE_SHIFT-10));
 873         save_flags(flags);
 874         cli();
 875         for (order=0 ; order < NR_MEM_LISTS; order++) {
 876                 struct mem_list * tmp;
 877                 unsigned long nr = 0;
 878                 for (tmp = free_area_list[order].next ; tmp != free_area_list + order ; tmp = tmp->next) {
 879                         nr ++;
 880                 }
 881                 total += nr * ((PAGE_SIZE>>10) << order);
 882                 printk("%lu*%lukB ", nr, (PAGE_SIZE>>10) << order);
 883         }
 884         restore_flags(flags);
 885         printk("= %lukB)\n", total);
 886 #ifdef SWAP_CACHE_INFO
 887         show_swap_cache_info();
 888 #endif  
 889 }
 890 
 891 /*
 892  * Trying to stop swapping from a file is fraught with races, so
 893  * we repeat quite a bit here when we have to pause. swapoff()
 894  * isn't exactly timing-critical, so who cares (but this is /really/
 895  * inefficient, ugh).
 896  *
 897  * We return 1 after having slept, which makes the process start over
 898  * from the beginning for this process..
 899  */
 900 static inline int unuse_pte(struct vm_area_struct * vma, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 901         pte_t *dir, unsigned int type, unsigned long page)
 902 {
 903         pte_t pte = *dir;
 904 
 905         if (pte_none(pte))
 906                 return 0;
 907         if (pte_present(pte)) {
 908                 unsigned long page = pte_page(pte);
 909                 if (page >= high_memory)
 910                         return 0;
 911                 if (!in_swap_cache(page))
 912                         return 0;
 913                 if (SWP_TYPE(in_swap_cache(page)) != type)
 914                         return 0;
 915                 delete_from_swap_cache(page);
 916                 set_pte(dir, pte_mkdirty(pte));
 917                 return 0;
 918         }
 919         if (SWP_TYPE(pte_val(pte)) != type)
 920                 return 0;
 921         read_swap_page(pte_val(pte), (char *) page);
 922         if (pte_val(*dir) != pte_val(pte)) {
 923                 free_page(page);
 924                 return 1;
 925         }
 926         set_pte(dir, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
 927         ++vma->vm_mm->rss;
 928         swap_free(pte_val(pte));
 929         return 1;
 930 }
 931 
 932 static inline int unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
     /* [previous][next][first][last][top][bottom][index][help] */
 933         unsigned long address, unsigned long size, unsigned long offset,
 934         unsigned int type, unsigned long page)
 935 {
 936         pte_t * pte;
 937         unsigned long end;
 938 
 939         if (pmd_none(*dir))
 940                 return 0;
 941         if (pmd_bad(*dir)) {
 942                 printk("unuse_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
 943                 pmd_clear(dir);
 944                 return 0;
 945         }
 946         pte = pte_offset(dir, address);
 947         offset += address & PMD_MASK;
 948         address &= ~PMD_MASK;
 949         end = address + size;
 950         if (end > PMD_SIZE)
 951                 end = PMD_SIZE;
 952         do {
 953                 if (unuse_pte(vma, offset+address-vma->vm_start, pte, type, page))
 954                         return 1;
 955                 address += PAGE_SIZE;
 956                 pte++;
 957         } while (address < end);
 958         return 0;
 959 }
 960 
 961 static inline int unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
     /* [previous][next][first][last][top][bottom][index][help] */
 962         unsigned long address, unsigned long size,
 963         unsigned int type, unsigned long page)
 964 {
 965         pmd_t * pmd;
 966         unsigned long offset, end;
 967 
 968         if (pgd_none(*dir))
 969                 return 0;
 970         if (pgd_bad(*dir)) {
 971                 printk("unuse_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
 972                 pgd_clear(dir);
 973                 return 0;
 974         }
 975         pmd = pmd_offset(dir, address);
 976         offset = address & PGDIR_MASK;
 977         address &= ~PGDIR_MASK;
 978         end = address + size;
 979         if (end > PGDIR_SIZE)
 980                 end = PGDIR_SIZE;
 981         do {
 982                 if (unuse_pmd(vma, pmd, address, end - address, offset, type, page))
 983                         return 1;
 984                 address = (address + PMD_SIZE) & PMD_MASK;
 985                 pmd++;
 986         } while (address < end);
 987         return 0;
 988 }
 989 
 990 static int unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
     /* [previous][next][first][last][top][bottom][index][help] */
 991         unsigned long start, unsigned long end,
 992         unsigned int type, unsigned long page)
 993 {
 994         while (start < end) {
 995                 if (unuse_pgd(vma, pgdir, start, end - start, type, page))
 996                         return 1;
 997                 start = (start + PGDIR_SIZE) & PGDIR_MASK;
 998                 pgdir++;
 999         }
1000         return 0;
1001 }
1002 
1003 static int unuse_process(struct task_struct * p, unsigned int type, unsigned long page)
     /* [previous][next][first][last][top][bottom][index][help] */
1004 {
1005         struct vm_area_struct* vma;
1006 
1007         /*
1008          * Go through process' page directory.
1009          */
1010         if (!p->mm || pgd_inuse(p->mm->pgd))
1011                 return 0;
1012         vma = p->mm->mmap;
1013         while (vma) {
1014                 pgd_t * pgd = pgd_offset(p->mm, vma->vm_start);
1015                 if (unuse_vma(vma, pgd, vma->vm_start, vma->vm_end, type, page))
1016                         return 1;
1017                 vma = vma->vm_next;
1018         }
1019         return 0;
1020 }
1021 
1022 /*
1023  * To avoid races, we repeat for each process after having
1024  * swapped something in. That gets rid of a few pesky races,
1025  * and "swapoff" isn't exactly timing critical.
1026  */
1027 static int try_to_unuse(unsigned int type)
     /* [previous][next][first][last][top][bottom][index][help] */
1028 {
1029         int nr;
1030         unsigned long page = get_free_page(GFP_KERNEL);
1031 
1032         if (!page)
1033                 return -ENOMEM;
1034         nr = 0;
1035         while (nr < NR_TASKS) {
1036                 if (task[nr]) {
1037                         if (unuse_process(task[nr], type, page)) {
1038                                 page = get_free_page(GFP_KERNEL);
1039                                 if (!page)
1040                                         return -ENOMEM;
1041                                 continue;
1042                         }
1043                 }
1044                 nr++;
1045         }
1046         free_page(page);
1047         return 0;
1048 }
1049 
1050 asmlinkage int sys_swapoff(const char * specialfile)
     /* [previous][next][first][last][top][bottom][index][help] */
1051 {
1052         struct swap_info_struct * p;
1053         struct inode * inode;
1054         struct file filp;
1055         int i, type, prev;
1056 
1057         if (!suser())
1058                 return -EPERM;
1059         i = namei(specialfile,&inode);
1060         if (i)
1061                 return i;
1062         prev = -1;
1063         for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
1064                 p = swap_info + type;
1065                 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
1066                         if (p->swap_file) {
1067                                 if (p->swap_file == inode)
1068                                   break;
1069                         } else {
1070                                 if (S_ISBLK(inode->i_mode)
1071                                     && (p->swap_device == inode->i_rdev))
1072                                   break;
1073                         }
1074                 }
1075                 prev = type;
1076         }
1077         if (type < 0){
1078                 iput(inode);
1079                 return -EINVAL;
1080         }
1081         if (prev < 0) {
1082                 swap_list.head = p->next;
1083         } else {
1084                 swap_info[prev].next = p->next;
1085         }
1086         if (type == swap_list.next) {
1087                 /* just pick something that's safe... */
1088                 swap_list.next = swap_list.head;
1089         }
1090         p->flags = SWP_USED;
1091         i = try_to_unuse(type);
1092         if (i) {
1093                 iput(inode);
1094                 p->flags = SWP_WRITEOK;
1095                 return i;
1096         }
1097 
1098         if(p->swap_device){
1099                 memset(&filp, 0, sizeof(filp));         
1100                 filp.f_inode = inode;
1101                 filp.f_mode = 3; /* read write */
1102                 /* open it again to get fops */
1103                 if( !blkdev_open(inode, &filp) &&
1104                    filp.f_op && filp.f_op->release){
1105                         filp.f_op->release(inode,&filp);
1106                         filp.f_op->release(inode,&filp);
1107                 }
1108         }
1109         iput(inode);
1110 
1111         nr_swap_pages -= p->pages;
1112         iput(p->swap_file);
1113         p->swap_file = NULL;
1114         p->swap_device = 0;
1115         vfree(p->swap_map);
1116         p->swap_map = NULL;
1117         free_page((long) p->swap_lockmap);
1118         p->swap_lockmap = NULL;
1119         p->flags = 0;
1120         return 0;
1121 }
1122 
1123 /*
1124  * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
1125  *
1126  * The swapon system call
1127  */
1128 asmlinkage int sys_swapon(const char * specialfile, int swap_flags)
     /* [previous][next][first][last][top][bottom][index][help] */
1129 {
1130         struct swap_info_struct * p;
1131         struct inode * swap_inode;
1132         unsigned int type;
1133         int i, j, prev;
1134         int error;
1135         struct file filp;
1136         static int least_priority = 0;
1137 
1138         memset(&filp, 0, sizeof(filp));
1139         if (!suser())
1140                 return -EPERM;
1141         p = swap_info;
1142         for (type = 0 ; type < nr_swapfiles ; type++,p++)
1143                 if (!(p->flags & SWP_USED))
1144                         break;
1145         if (type >= MAX_SWAPFILES)
1146                 return -EPERM;
1147         if (type >= nr_swapfiles)
1148                 nr_swapfiles = type+1;
1149         p->flags = SWP_USED;
1150         p->swap_file = NULL;
1151         p->swap_device = 0;
1152         p->swap_map = NULL;
1153         p->swap_lockmap = NULL;
1154         p->lowest_bit = 0;
1155         p->highest_bit = 0;
1156         p->max = 1;
1157         p->next = -1;
1158         if (swap_flags & SWAP_FLAG_PREFER) {
1159                 p->prio =
1160                   (swap_flags & SWAP_FLAG_PRIO_MASK)>>SWAP_FLAG_PRIO_SHIFT;
1161         } else {
1162                 p->prio = --least_priority;
1163         }
1164         error = namei(specialfile,&swap_inode);
1165         if (error)
1166                 goto bad_swap_2;
1167         p->swap_file = swap_inode;
1168         error = -EBUSY;
1169         if (swap_inode->i_count != 1)
1170                 goto bad_swap_2;
1171         error = -EINVAL;
1172 
1173         if (S_ISBLK(swap_inode->i_mode)) {
1174                 p->swap_device = swap_inode->i_rdev;
1175 
1176                 filp.f_inode = swap_inode;
1177                 filp.f_mode = 3; /* read write */
1178                 error = blkdev_open(swap_inode, &filp);
1179                 p->swap_file = NULL;
1180                 iput(swap_inode);
1181                 if(error)
1182                         goto bad_swap_2;
1183                 error = -ENODEV;
1184                 if (!p->swap_device)
1185                         goto bad_swap;
1186                 error = -EBUSY;
1187                 for (i = 0 ; i < nr_swapfiles ; i++) {
1188                         if (i == type)
1189                                 continue;
1190                         if (p->swap_device == swap_info[i].swap_device)
1191                                 goto bad_swap;
1192                 }
1193         } else if (!S_ISREG(swap_inode->i_mode))
1194                 goto bad_swap;
1195         p->swap_lockmap = (unsigned char *) get_free_page(GFP_USER);
1196         if (!p->swap_lockmap) {
1197                 printk("Unable to start swapping: out of memory :-)\n");
1198                 error = -ENOMEM;
1199                 goto bad_swap;
1200         }
1201         read_swap_page(SWP_ENTRY(type,0), (char *) p->swap_lockmap);
1202         if (memcmp("SWAP-SPACE",p->swap_lockmap+PAGE_SIZE-10,10)) {
1203                 printk("Unable to find swap-space signature\n");
1204                 error = -EINVAL;
1205                 goto bad_swap;
1206         }
1207         memset(p->swap_lockmap+PAGE_SIZE-10,0,10);
1208         j = 0;
1209         p->lowest_bit = 0;
1210         p->highest_bit = 0;
1211         for (i = 1 ; i < 8*PAGE_SIZE ; i++) {
1212                 if (test_bit(i,p->swap_lockmap)) {
1213                         if (!p->lowest_bit)
1214                                 p->lowest_bit = i;
1215                         p->highest_bit = i;
1216                         p->max = i+1;
1217                         j++;
1218                 }
1219         }
1220         if (!j) {
1221                 printk("Empty swap-file\n");
1222                 error = -EINVAL;
1223                 goto bad_swap;
1224         }
1225         p->swap_map = (unsigned char *) vmalloc(p->max);
1226         if (!p->swap_map) {
1227                 error = -ENOMEM;
1228                 goto bad_swap;
1229         }
1230         for (i = 1 ; i < p->max ; i++) {
1231                 if (test_bit(i,p->swap_lockmap))
1232                         p->swap_map[i] = 0;
1233                 else
1234                         p->swap_map[i] = 0x80;
1235         }
1236         p->swap_map[0] = 0x80;
1237         memset(p->swap_lockmap,0,PAGE_SIZE);
1238         p->flags = SWP_WRITEOK;
1239         p->pages = j;
1240         nr_swap_pages += j;
1241         printk("Adding Swap: %dk swap-space\n",j<<(PAGE_SHIFT-10));
1242 
1243         /* insert swap space into swap_list: */
1244         prev = -1;
1245         for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
1246                 if (p->prio >= swap_info[i].prio) {
1247                         break;
1248                 }
1249                 prev = i;
1250         }
1251         p->next = i;
1252         if (prev < 0) {
1253                 swap_list.head = swap_list.next = p - swap_info;
1254         } else {
1255                 swap_info[prev].next = p - swap_info;
1256         }
1257         return 0;
1258 bad_swap:
1259         if(filp.f_op && filp.f_op->release)
1260                 filp.f_op->release(filp.f_inode,&filp);
1261 bad_swap_2:
1262         free_page((long) p->swap_lockmap);
1263         vfree(p->swap_map);
1264         iput(p->swap_file);
1265         p->swap_device = 0;
1266         p->swap_file = NULL;
1267         p->swap_map = NULL;
1268         p->swap_lockmap = NULL;
1269         p->flags = 0;
1270         return error;
1271 }
1272 
1273 void si_swapinfo(struct sysinfo *val)
     /* [previous][next][first][last][top][bottom][index][help] */
1274 {
1275         unsigned int i, j;
1276 
1277         val->freeswap = val->totalswap = 0;
1278         for (i = 0; i < nr_swapfiles; i++) {
1279                 if ((swap_info[i].flags & SWP_WRITEOK) != SWP_WRITEOK)
1280                         continue;
1281                 for (j = 0; j < swap_info[i].max; ++j)
1282                         switch (swap_info[i].swap_map[j]) {
1283                                 case 128:
1284                                         continue;
1285                                 case 0:
1286                                         ++val->freeswap;
1287                                 default:
1288                                         ++val->totalswap;
1289                         }
1290         }
1291         val->freeswap <<= PAGE_SHIFT;
1292         val->totalswap <<= PAGE_SHIFT;
1293         return;
1294 }
1295 
1296 #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
1297 
1298 /*
1299  * set up the free-area data structures:
1300  *   - mark all pages reserved
1301  *   - mark all memory queues empty
1302  *   - clear the memory bitmaps
1303  */
1304 unsigned long free_area_init(unsigned long start_mem, unsigned long end_mem)
     /* [previous][next][first][last][top][bottom][index][help] */
1305 {
1306         mem_map_t * p;
1307         unsigned long mask = PAGE_MASK;
1308         int i;
1309 
1310         /*
1311          * select nr of pages we try to keep free for important stuff
1312          * with a minimum of 16 pages. This is totally arbitrary
1313          */
1314         i = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT+6);
1315         if (i < 16)
1316                 i = 16;
1317         min_free_pages = i;
1318         start_mem = init_swap_cache(start_mem, end_mem);
1319         mem_map = (mem_map_t *) start_mem;
1320         p = mem_map + MAP_NR(end_mem);
1321         start_mem = LONG_ALIGN((unsigned long) p);
1322         while (p > mem_map) {
1323                 --p;
1324                 p->count = 0;
1325                 p->dirty = 0;
1326                 p->reserved = 1;
1327         }
1328 
1329         for (i = 0 ; i < NR_MEM_LISTS ; i++) {
1330                 unsigned long bitmap_size;
1331                 free_area_list[i].prev = free_area_list[i].next = &free_area_list[i];
1332                 mask += mask;
1333                 end_mem = (end_mem + ~mask) & mask;
1334                 bitmap_size = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT + i);
1335                 bitmap_size = (bitmap_size + 7) >> 3;
1336                 bitmap_size = LONG_ALIGN(bitmap_size);
1337                 free_area_map[i] = (unsigned char *) start_mem;
1338                 memset((void *) start_mem, 0, bitmap_size);
1339                 start_mem += bitmap_size;
1340         }
1341         return start_mem;
1342 }

/* [previous][next][first][last][top][bottom][index][help] */