root/mm/swap.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. show_swap_cache_info
  2. add_to_swap_cache
  3. init_swap_cache
  4. rw_swap_page
  5. get_swap_page
  6. swap_duplicate
  7. swap_free
  8. swap_in
  9. try_to_swap_out
  10. swap_out_process
  11. swap_out
  12. try_to_free_page
  13. add_mem_queue
  14. remove_mem_queue
  15. free_pages_ok
  16. check_free_buffers
  17. free_pages
  18. mark_used
  19. __get_free_pages
  20. __get_dma_pages
  21. show_free_areas
  22. try_to_unuse
  23. sys_swapoff
  24. sys_swapon
  25. si_swapinfo
  26. free_area_init

   1 /*
   2  *  linux/mm/swap.c
   3  *
   4  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   5  */
   6 
   7 /*
   8  * This file should contain most things doing the swapping from/to disk.
   9  * Started 18.12.91
  10  */
  11 
  12 #include <linux/mm.h>
  13 #include <linux/sched.h>
  14 #include <linux/head.h>
  15 #include <linux/kernel.h>
  16 #include <linux/kernel_stat.h>
  17 #include <linux/errno.h>
  18 #include <linux/string.h>
  19 #include <linux/stat.h>
  20 #include <linux/fs.h>
  21 
  22 #include <asm/dma.h>
  23 #include <asm/system.h> /* for cli()/sti() */
  24 #include <asm/bitops.h>
  25 
  26 #define MAX_SWAPFILES 8
  27 
  28 #define SWP_USED        1
  29 #define SWP_WRITEOK     3
  30 
  31 #define SWP_TYPE(entry) (((entry) & 0xfe) >> 1)
  32 #define SWP_OFFSET(entry) ((entry) >> PAGE_SHIFT)
  33 #define SWP_ENTRY(type,offset) (((type) << 1) | ((offset) << PAGE_SHIFT))
  34 
  35 int min_free_pages = 20;
  36 
  37 static int nr_swapfiles = 0;
  38 static struct wait_queue * lock_queue = NULL;
  39 
  40 static struct swap_info_struct {
  41         unsigned long flags;
  42         struct inode * swap_file;
  43         unsigned int swap_device;
  44         unsigned char * swap_map;
  45         unsigned char * swap_lockmap;
  46         int pages;
  47         int lowest_bit;
  48         int highest_bit;
  49         unsigned long max;
  50 } swap_info[MAX_SWAPFILES];
  51 
  52 extern int shm_swap (int);
  53 
  54 unsigned long *swap_cache;
  55 
  56 #ifdef SWAP_CACHE_INFO
  57 unsigned long swap_cache_add_total = 0;
  58 unsigned long swap_cache_add_success = 0;
  59 unsigned long swap_cache_del_total = 0;
  60 unsigned long swap_cache_del_success = 0;
  61 unsigned long swap_cache_find_total = 0;
  62 unsigned long swap_cache_find_success = 0;
  63 
  64 extern inline void show_swap_cache_info(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  65 {
  66         printk("Swap cache: add %ld/%ld, delete %ld/%ld, find %ld/%ld\n",
  67                 swap_cache_add_total, swap_cache_add_success, 
  68                 swap_cache_del_total, swap_cache_del_success,
  69                 swap_cache_find_total, swap_cache_find_success);
  70 }
  71 #endif
  72 
  73 extern inline int add_to_swap_cache(unsigned long addr, unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
  74 {
  75         struct swap_info_struct * p = &swap_info[SWP_TYPE(entry)];
  76         
  77 #ifdef SWAP_CACHE_INFO
  78         swap_cache_add_total++;
  79 #endif
  80         if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) { 
  81                 __asm__ __volatile__ (
  82                                       "xchgl %0,%1\n"
  83                                       : "=m" (swap_cache[addr >> PAGE_SHIFT]),
  84                                        "=r" (entry)
  85                                       : "0" (swap_cache[addr >> PAGE_SHIFT]),
  86                                        "1" (entry)
  87                                       );
  88                 if (entry)  {
  89                         printk("swap_cache: replacing non-NULL entry\n");
  90                 }
  91 #ifdef SWAP_CACHE_INFO
  92                 swap_cache_add_success++;
  93 #endif
  94                 return 1;
  95         }
  96         return 0;
  97 }
  98 
  99 static unsigned long init_swap_cache(unsigned long mem_start,
     /* [previous][next][first][last][top][bottom][index][help] */
 100         unsigned long mem_end)
 101 {
 102         unsigned long swap_cache_size;
 103 
 104         mem_start = (mem_start + 15) & ~15;
 105         swap_cache = (unsigned long *) mem_start;
 106         swap_cache_size = mem_end >> PAGE_SHIFT;
 107         memset(swap_cache, 0, swap_cache_size * sizeof (unsigned long));
 108         return (unsigned long) (swap_cache + swap_cache_size);
 109 }
 110 
 111 void rw_swap_page(int rw, unsigned long entry, char * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 112 {
 113         unsigned long type, offset;
 114         struct swap_info_struct * p;
 115 
 116         type = SWP_TYPE(entry);
 117         if (type >= nr_swapfiles) {
 118                 printk("Internal error: bad swap-device\n");
 119                 return;
 120         }
 121         p = &swap_info[type];
 122         offset = SWP_OFFSET(entry);
 123         if (offset >= p->max) {
 124                 printk("rw_swap_page: weirdness\n");
 125                 return;
 126         }
 127         if (!(p->flags & SWP_USED)) {
 128                 printk("Trying to swap to unused swap-device\n");
 129                 return;
 130         }
 131         while (set_bit(offset,p->swap_lockmap))
 132                 sleep_on(&lock_queue);
 133         if (rw == READ)
 134                 kstat.pswpin++;
 135         else
 136                 kstat.pswpout++;
 137         if (p->swap_device) {
 138                 ll_rw_page(rw,p->swap_device,offset,buf);
 139         } else if (p->swap_file) {
 140                 struct inode *swapf = p->swap_file;
 141                 unsigned int zones[8];
 142                 int i;
 143                 if (swapf->i_op->bmap == NULL
 144                         && swapf->i_op->smap != NULL){
 145                         /*
 146                                 With MsDOS, we use msdos_smap which return
 147                                 a sector number (not a cluster or block number).
 148                                 It is a patch to enable the UMSDOS project.
 149                                 Other people are working on better solution.
 150 
 151                                 It sounds like ll_rw_swap_file defined
 152                                 it operation size (sector size) based on
 153                                 PAGE_SIZE and the number of block to read.
 154                                 So using bmap or smap should work even if
 155                                 smap will require more blocks.
 156                         */
 157                         int j;
 158                         unsigned int block = offset << 3;
 159 
 160                         for (i=0, j=0; j< PAGE_SIZE ; i++, j += 512){
 161                                 if (!(zones[i] = swapf->i_op->smap(swapf,block++))) {
 162                                         printk("rw_swap_page: bad swap file\n");
 163                                         return;
 164                                 }
 165                         }
 166                 }else{
 167                         int j;
 168                         unsigned int block = offset
 169                                 << (12 - swapf->i_sb->s_blocksize_bits);
 170 
 171                         for (i=0, j=0; j< PAGE_SIZE ; i++, j +=swapf->i_sb->s_blocksize)
 172                                 if (!(zones[i] = bmap(swapf,block++))) {
 173                                         printk("rw_swap_page: bad swap file\n");
 174                                         return;
 175                                 }
 176                 }
 177                 ll_rw_swap_file(rw,swapf->i_dev, zones, i,buf);
 178         } else
 179                 printk("re_swap_page: no swap file or device\n");
 180         if (offset && !clear_bit(offset,p->swap_lockmap))
 181                 printk("rw_swap_page: lock already cleared\n");
 182         wake_up(&lock_queue);
 183 }
 184 
 185 unsigned int get_swap_page(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 186 {
 187         struct swap_info_struct * p;
 188         unsigned int offset, type;
 189 
 190         p = swap_info;
 191         for (type = 0 ; type < nr_swapfiles ; type++,p++) {
 192                 if ((p->flags & SWP_WRITEOK) != SWP_WRITEOK)
 193                         continue;
 194                 for (offset = p->lowest_bit; offset <= p->highest_bit ; offset++) {
 195                         if (p->swap_map[offset])
 196                                 continue;
 197                         p->swap_map[offset] = 1;
 198                         nr_swap_pages--;
 199                         if (offset == p->highest_bit)
 200                                 p->highest_bit--;
 201                         p->lowest_bit = offset;
 202                         return SWP_ENTRY(type,offset);
 203                 }
 204         }
 205         return 0;
 206 }
 207 
 208 unsigned long swap_duplicate(unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 209 {
 210         struct swap_info_struct * p;
 211         unsigned long offset, type;
 212 
 213         if (!entry)
 214                 return 0;
 215         offset = SWP_OFFSET(entry);
 216         type = SWP_TYPE(entry);
 217         if (type == SHM_SWP_TYPE)
 218                 return entry;
 219         if (type >= nr_swapfiles) {
 220                 printk("Trying to duplicate nonexistent swap-page\n");
 221                 return 0;
 222         }
 223         p = type + swap_info;
 224         if (offset >= p->max) {
 225                 printk("swap_duplicate: weirdness\n");
 226                 return 0;
 227         }
 228         if (!p->swap_map[offset]) {
 229                 printk("swap_duplicate: trying to duplicate unused page\n");
 230                 return 0;
 231         }
 232         p->swap_map[offset]++;
 233         return entry;
 234 }
 235 
 236 void swap_free(unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 237 {
 238         struct swap_info_struct * p;
 239         unsigned long offset, type;
 240 
 241         if (!entry)
 242                 return;
 243         type = SWP_TYPE(entry);
 244         if (type == SHM_SWP_TYPE)
 245                 return;
 246         if (type >= nr_swapfiles) {
 247                 printk("Trying to free nonexistent swap-page\n");
 248                 return;
 249         }
 250         p = & swap_info[type];
 251         offset = SWP_OFFSET(entry);
 252         if (offset >= p->max) {
 253                 printk("swap_free: weirdness\n");
 254                 return;
 255         }
 256         if (!(p->flags & SWP_USED)) {
 257                 printk("Trying to free swap from unused swap-device\n");
 258                 return;
 259         }
 260         while (set_bit(offset,p->swap_lockmap))
 261                 sleep_on(&lock_queue);
 262         if (offset < p->lowest_bit)
 263                 p->lowest_bit = offset;
 264         if (offset > p->highest_bit)
 265                 p->highest_bit = offset;
 266         if (!p->swap_map[offset])
 267                 printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
 268         else
 269                 if (!--p->swap_map[offset])
 270                         nr_swap_pages++;
 271         if (!clear_bit(offset,p->swap_lockmap))
 272                 printk("swap_free: lock already cleared\n");
 273         wake_up(&lock_queue);
 274 }
 275 
 276 unsigned long swap_in(unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 277 {
 278         unsigned long page;
 279 
 280         if (!(page = get_free_page(GFP_KERNEL))) {
 281                 oom(current);
 282                 return BAD_PAGE;
 283         }
 284         read_swap_page(entry, (char *) page);
 285         if (add_to_swap_cache(page, entry))
 286                 return page | PAGE_PRESENT;
 287         swap_free(entry);
 288         return page | PAGE_DIRTY | PAGE_PRESENT;
 289 }
 290 
 291 static inline int try_to_swap_out(struct vm_area_struct* vma, unsigned offset, unsigned long * table_ptr)
     /* [previous][next][first][last][top][bottom][index][help] */
 292 {
 293         unsigned long page, entry;
 294 
 295         page = *table_ptr;
 296         if (!(PAGE_PRESENT & page))
 297                 return 0;
 298         if (page >= high_memory)
 299                 return 0;
 300         if (mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED)
 301                 return 0;
 302         
 303         if ((PAGE_DIRTY & page) && delete_from_swap_cache(page))  {
 304                 *table_ptr &= ~PAGE_ACCESSED;
 305                 return 0;
 306         }
 307         if (PAGE_ACCESSED & page) {
 308                 *table_ptr &= ~PAGE_ACCESSED;
 309                 return 0;
 310         }
 311         if (PAGE_DIRTY & page) {
 312                 page &= PAGE_MASK;
 313                 if (mem_map[MAP_NR(page)] != 1)
 314                         return 0;
 315                 if (vma->vm_ops && vma->vm_ops->swapout)
 316                         vma->vm_ops->swapout(vma, offset, table_ptr);
 317                 else
 318                 {
 319                         if (!(entry = get_swap_page()))
 320                                 return 0;
 321                         *table_ptr = entry;
 322                         invalidate();
 323                         write_swap_page(entry, (char *) page);
 324                 }
 325                 free_page(page);
 326                 return 1 + mem_map[MAP_NR(page)];
 327         }
 328         if ((entry = find_in_swap_cache(page)))  {
 329                 if (mem_map[MAP_NR(page)] != 1) {
 330                         *table_ptr |= PAGE_DIRTY;
 331                         printk("Aiee.. duplicated cached swap-cache entry\n");
 332                         return 0;
 333                 }
 334                 *table_ptr = entry;
 335                 invalidate();
 336                 free_page(page & PAGE_MASK);
 337                 return 1;
 338         } 
 339         page &= PAGE_MASK;
 340         *table_ptr = 0;
 341         invalidate();
 342         free_page(page);
 343         return 1 + mem_map[MAP_NR(page)];
 344 }
 345 
 346 /*
 347  * A new implementation of swap_out().  We do not swap complete processes,
 348  * but only a small number of blocks, before we continue with the next
 349  * process.  The number of blocks actually swapped is determined on the
 350  * number of page faults, that this process actually had in the last time,
 351  * so we won't swap heavily used processes all the time ...
 352  *
 353  * Note: the priority argument is a hint on much CPU to waste with the
 354  *       swap block search, not a hint, of how much blocks to swap with
 355  *       each process.
 356  *
 357  * (C) 1993 Kai Petzke, wpp@marie.physik.tu-berlin.de
 358  */
 359 
 360 /*
 361  * These are the minimum and maximum number of pages to swap from one process,
 362  * before proceeding to the next:
 363  */
 364 #define SWAP_MIN        4
 365 #define SWAP_MAX        32
 366 
 367 /*
 368  * The actual number of pages to swap is determined as:
 369  * SWAP_RATIO / (number of recent major page faults)
 370  */
 371 #define SWAP_RATIO      128
 372 
 373 static int swap_out_process(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 374 {
 375         unsigned long address;
 376         unsigned long offset;
 377         unsigned long *pgdir;
 378         unsigned long pg_table;
 379         struct vm_area_struct* vma;
 380 
 381         /*
 382          * Go through process' page directory.
 383          */
 384         address = p->mm->swap_address;
 385         p->mm->swap_address = 0;
 386 
 387         /*
 388          * Find the proper vm-area
 389          */
 390         vma = p->mm->mmap;
 391         for (;;) {
 392                 if (!vma)
 393                         return 0;
 394                 if (address <= vma->vm_end)
 395                         break;
 396                 vma = vma->vm_next;
 397         }
 398         if (address < vma->vm_start)
 399                 address = vma->vm_start;
 400 
 401         pgdir = (address >> PGDIR_SHIFT) + (unsigned long *) p->tss.cr3;
 402         offset = address & ~PGDIR_MASK;
 403         address &= PGDIR_MASK;
 404         for ( ; address < TASK_SIZE ;
 405         pgdir++, address = address + PGDIR_SIZE, offset = 0) {
 406                 pg_table = *pgdir;
 407                 if (pg_table >= high_memory)
 408                         continue;
 409                 if (mem_map[MAP_NR(pg_table)] & MAP_PAGE_RESERVED)
 410                         continue;
 411                 if (!(PAGE_PRESENT & pg_table)) {
 412                         printk("swap_out_process (%s): bad page-table at vm %08lx: %08lx\n",
 413                                         p->comm, address + offset, pg_table);
 414                         *pgdir = 0;
 415                         continue;
 416                 }
 417                 pg_table &= 0xfffff000;
 418 
 419                 /*
 420                  * Go through this page table.
 421                  */
 422                 for( ; offset < ~PGDIR_MASK ; offset += PAGE_SIZE) {
 423                         /*
 424                          * Update vma again..
 425                          */
 426                         for (;;) {
 427                                 if (address+offset < vma->vm_end)
 428                                         break;
 429                                 vma = vma->vm_next;
 430                                 if (!vma)
 431                                         return 0;
 432                         }
 433 
 434                         switch(try_to_swap_out(vma, offset+address-vma->vm_start, (unsigned long *) (pg_table + (offset >> 10)))) {
 435                                 case 0:
 436                                         break;
 437 
 438                                 case 1:
 439                                         p->mm->rss--;
 440                                         /* continue with the following page the next time */
 441                                         p->mm->swap_address = address + offset + PAGE_SIZE;
 442                                         return 1;
 443 
 444                                 default:
 445                                         p->mm->rss--;
 446                                         break;
 447                         }
 448                 }
 449         }
 450         /*
 451          * Finish work with this process, if we reached the end of the page
 452          * directory.
 453          */
 454         return 0;
 455 }
 456 
 457 static int swap_out(unsigned int priority)
     /* [previous][next][first][last][top][bottom][index][help] */
 458 {
 459         static int swap_task;
 460         int loop;
 461         int counter = NR_TASKS * 2 >> priority;
 462         struct task_struct *p;
 463 
 464         counter = NR_TASKS * 2 >> priority;
 465         for(; counter >= 0; counter--, swap_task++) {
 466                 /*
 467                  * Check that swap_task is suitable for swapping.  If not, look for
 468                  * the next suitable process.
 469                  */
 470                 loop = 0;
 471                 while(1) {
 472                         if (swap_task >= NR_TASKS) {
 473                                 swap_task = 1;
 474                                 if (loop)
 475                                         /* all processes are unswappable or already swapped out */
 476                                         return 0;
 477                                 loop = 1;
 478                         }
 479 
 480                         p = task[swap_task];
 481                         if (p && p->mm->swappable && p->mm->rss)
 482                                 break;
 483 
 484                         swap_task++;
 485                 }
 486 
 487                 /*
 488                  * Determine the number of pages to swap from this process.
 489                  */
 490                 if (!p->mm->swap_cnt) {
 491                         p->mm->dec_flt = (p->mm->dec_flt * 3) / 4 + p->mm->maj_flt - p->mm->old_maj_flt;
 492                         p->mm->old_maj_flt = p->mm->maj_flt;
 493 
 494                         if (p->mm->dec_flt >= SWAP_RATIO / SWAP_MIN) {
 495                                 p->mm->dec_flt = SWAP_RATIO / SWAP_MIN;
 496                                 p->mm->swap_cnt = SWAP_MIN;
 497                         } else if (p->mm->dec_flt <= SWAP_RATIO / SWAP_MAX)
 498                                 p->mm->swap_cnt = SWAP_MAX;
 499                         else
 500                                 p->mm->swap_cnt = SWAP_RATIO / p->mm->dec_flt;
 501                 }
 502                 if (swap_out_process(p)) {
 503                         if ((--p->mm->swap_cnt) == 0)
 504                                 swap_task++;
 505                         return 1;
 506                 }
 507         }
 508         return 0;
 509 }
 510 
 511 static int try_to_free_page(int priority)
     /* [previous][next][first][last][top][bottom][index][help] */
 512 {
 513         int i=6;
 514 
 515         while (i--) {
 516                 if (priority != GFP_NOBUFFER && shrink_buffers(i))
 517                         return 1;
 518                 if (shm_swap(i))
 519                         return 1;
 520                 if (swap_out(i))
 521                         return 1;
 522         }
 523         return 0;
 524 }
 525 
 526 static inline void add_mem_queue(struct mem_list * head, struct mem_list * entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 527 {
 528         entry->prev = head;
 529         entry->next = head->next;
 530         entry->next->prev = entry;
 531         head->next = entry;
 532 }
 533 
 534 static inline void remove_mem_queue(struct mem_list * head, struct mem_list * entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 535 {
 536         entry->next->prev = entry->prev;
 537         entry->prev->next = entry->next;
 538 }
 539 
 540 /*
 541  * Free_page() adds the page to the free lists. This is optimized for
 542  * fast normal cases (no error jumps taken normally).
 543  *
 544  * The way to optimize jumps for gcc-2.2.2 is to:
 545  *  - select the "normal" case and put it inside the if () { XXX }
 546  *  - no else-statements if you can avoid them
 547  *
 548  * With the above two rules, you get a straight-line execution path
 549  * for the normal case, giving better asm-code.
 550  */
 551 
 552 /*
 553  * Buddy system. Hairy. You really aren't expected to understand this
 554  */
 555 static inline void free_pages_ok(unsigned long addr, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 556 {
 557         unsigned long index = addr >> (PAGE_SHIFT + 1 + order);
 558         unsigned long mask = PAGE_MASK << order;
 559 
 560         addr &= mask;
 561         nr_free_pages += 1 << order;
 562         while (order < NR_MEM_LISTS-1) {
 563                 if (!change_bit(index, free_area_map[order]))
 564                         break;
 565                 remove_mem_queue(free_area_list+order, (struct mem_list *) (addr ^ (1+~mask)));
 566                 order++;
 567                 index >>= 1;
 568                 mask <<= 1;
 569                 addr &= mask;
 570         }
 571         add_mem_queue(free_area_list+order, (struct mem_list *) addr);
 572 }
 573 
 574 static inline void check_free_buffers(unsigned long addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 575 {
 576         struct buffer_head * bh;
 577 
 578         bh = buffer_pages[MAP_NR(addr)];
 579         if (bh) {
 580                 struct buffer_head *tmp = bh;
 581                 do {
 582                         if (tmp->b_list == BUF_SHARED && tmp->b_dev != 0xffff)
 583                                 refile_buffer(tmp);
 584                         tmp = tmp->b_this_page;
 585                 } while (tmp != bh);
 586         }
 587 }
 588 
 589 void free_pages(unsigned long addr, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 590 {
 591         if (addr < high_memory) {
 592                 unsigned long flag;
 593                 unsigned short * map = mem_map + MAP_NR(addr);
 594                 if (*map) {
 595                         if (!(*map & MAP_PAGE_RESERVED)) {
 596                                 save_flags(flag);
 597                                 cli();
 598                                 if (!--*map)  {
 599                                         free_pages_ok(addr, order);
 600                                         delete_from_swap_cache(addr);
 601                                 }
 602                                 restore_flags(flag);
 603                                 if (*map == 1)
 604                                         check_free_buffers(addr);
 605                         }
 606                         return;
 607                 }
 608                 printk("Trying to free free memory (%08lx): memory probably corrupted\n",addr);
 609                 printk("PC = %08lx\n",*(((unsigned long *)&addr)-1));
 610                 return;
 611         }
 612 }
 613 
 614 /*
 615  * Some ugly macros to speed up __get_free_pages()..
 616  */
 617 #define RMQUEUE(order) \
 618 do { struct mem_list * queue = free_area_list+order; \
 619      unsigned long new_order = order; \
 620         do { struct mem_list *next = queue->next; \
 621                 if (queue != next) { \
 622                         (queue->next = next->next)->prev = queue; \
 623                         mark_used((unsigned long) next, new_order); \
 624                         nr_free_pages -= 1 << order; \
 625                         restore_flags(flags); \
 626                         EXPAND(next, order, new_order); \
 627                         return (unsigned long) next; \
 628                 } new_order++; queue++; \
 629         } while (new_order < NR_MEM_LISTS); \
 630 } while (0)
 631 
 632 static inline int mark_used(unsigned long addr, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 633 {
 634         return change_bit(addr >> (PAGE_SHIFT+1+order), free_area_map[order]);
 635 }
 636 
 637 #define EXPAND(addr,low,high) \
 638 do { unsigned long size = PAGE_SIZE << high; \
 639         while (high > low) { \
 640                 high--; size >>= 1; cli(); \
 641                 add_mem_queue(free_area_list+high, addr); \
 642                 mark_used((unsigned long) addr, high); \
 643                 restore_flags(flags); \
 644                 addr = (struct mem_list *) (size + (unsigned long) addr); \
 645         } mem_map[MAP_NR((unsigned long) addr)] = 1; \
 646 } while (0)
 647 
 648 unsigned long __get_free_pages(int priority, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 649 {
 650         unsigned long flags;
 651         int reserved_pages;
 652 
 653         if (intr_count && priority != GFP_ATOMIC) {
 654                 static int count = 0;
 655                 if (++count < 5) {
 656                         printk("gfp called nonatomically from interrupt %p\n",
 657                                 __builtin_return_address(0));
 658                         priority = GFP_ATOMIC;
 659                 }
 660         }
 661         reserved_pages = 5;
 662         if (priority != GFP_NFS)
 663                 reserved_pages = min_free_pages;
 664         save_flags(flags);
 665 repeat:
 666         cli();
 667         if ((priority==GFP_ATOMIC) || nr_free_pages > reserved_pages) {
 668                 RMQUEUE(order);
 669                 restore_flags(flags);
 670                 return 0;
 671         }
 672         restore_flags(flags);
 673         if (priority != GFP_BUFFER && try_to_free_page(priority))
 674                 goto repeat;
 675         return 0;
 676 }
 677 
 678 /*
 679  * Yes, I know this is ugly. Don't tell me.
 680  */
 681 unsigned long __get_dma_pages(int priority, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 682 {
 683         unsigned long list = 0;
 684         unsigned long result;
 685         unsigned long limit = MAX_DMA_ADDRESS;
 686 
 687         /* if (EISA_bus) limit = ~0UL; */
 688         if (priority != GFP_ATOMIC)
 689                 priority = GFP_BUFFER;
 690         for (;;) {
 691                 result = __get_free_pages(priority, order);
 692                 if (result < limit) /* covers failure as well */
 693                         break;
 694                 *(unsigned long *) result = list;
 695                 list = result;
 696         }
 697         while (list) {
 698                 unsigned long tmp = list;
 699                 list = *(unsigned long *) list;
 700                 free_pages(tmp, order);
 701         }
 702         return result;
 703 }
 704 
 705 /*
 706  * Show free area list (used inside shift_scroll-lock stuff)
 707  * We also calculate the percentage fragmentation. We do this by counting the
 708  * memory on each free list with the exception of the first item on the list.
 709  */
 710 void show_free_areas(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 711 {
 712         unsigned long order, flags;
 713         unsigned long total = 0;
 714 
 715         printk("Free pages:      %6dkB\n ( ",nr_free_pages<<(PAGE_SHIFT-10));
 716         save_flags(flags);
 717         cli();
 718         for (order=0 ; order < NR_MEM_LISTS; order++) {
 719                 struct mem_list * tmp;
 720                 unsigned long nr = 0;
 721                 for (tmp = free_area_list[order].next ; tmp != free_area_list + order ; tmp = tmp->next) {
 722                         nr ++;
 723                 }
 724                 total += nr * (4 << order);
 725                 printk("%lu*%ukB ", nr, 4 << order);
 726         }
 727         restore_flags(flags);
 728         printk("= %lukB)\n", total);
 729 #ifdef SWAP_CACHE_INFO
 730         show_swap_cache_info();
 731 #endif  
 732 }
 733 
 734 /*
 735  * Trying to stop swapping from a file is fraught with races, so
 736  * we repeat quite a bit here when we have to pause. swapoff()
 737  * isn't exactly timing-critical, so who cares?
 738  */
 739 static int try_to_unuse(unsigned int type)
     /* [previous][next][first][last][top][bottom][index][help] */
 740 {
 741         int nr, pgt, pg;
 742         unsigned long page, *ppage;
 743         unsigned long tmp = 0;
 744         struct task_struct *p;
 745 
 746         nr = 0;
 747         
 748 /*
 749  * When we have to sleep, we restart the whole algorithm from the same
 750  * task we stopped in. That at least rids us of all races.
 751  */
 752 repeat:
 753         for (; nr < NR_TASKS ; nr++) {
 754                 p = task[nr];
 755                 if (!p)
 756                         continue;
 757                 for (pgt = 0 ; pgt < PTRS_PER_PAGE ; pgt++) {
 758                         ppage = pgt + ((unsigned long *) p->tss.cr3);
 759                         page = *ppage;
 760                         if (!page)
 761                                 continue;
 762                         if (!(page & PAGE_PRESENT) || (page >= high_memory))
 763                                 continue;
 764                         if (mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED)
 765                                 continue;
 766                         ppage = (unsigned long *) (page & PAGE_MASK);   
 767                         for (pg = 0 ; pg < PTRS_PER_PAGE ; pg++,ppage++) {
 768                                 page = *ppage;
 769                                 if (!page)
 770                                         continue;
 771                                 if (page & PAGE_PRESENT) {
 772                                         if (page >= high_memory)
 773                                                 continue;
 774                                         if (!(page = in_swap_cache(page)))
 775                                                 continue;
 776                                         if (SWP_TYPE(page) != type)
 777                                                 continue;
 778                                         *ppage |= PAGE_DIRTY;
 779                                         delete_from_swap_cache(*ppage);
 780                                         continue;
 781                                 }
 782                                 if (SWP_TYPE(page) != type)
 783                                         continue;
 784                                 if (!tmp) {
 785                                         if (!(tmp = __get_free_page(GFP_KERNEL)))
 786                                                 return -ENOMEM;
 787                                         goto repeat;
 788                                 }
 789                                 read_swap_page(page, (char *) tmp);
 790                                 if (*ppage == page) {
 791                                         *ppage = tmp | (PAGE_DIRTY | PAGE_PRIVATE);
 792                                         ++p->mm->rss;
 793                                         swap_free(page);
 794                                         tmp = 0;
 795                                 }
 796                                 goto repeat;
 797                         }
 798                 }
 799         }
 800         free_page(tmp);
 801         return 0;
 802 }
 803 
 804 asmlinkage int sys_swapoff(const char * specialfile)
     /* [previous][next][first][last][top][bottom][index][help] */
 805 {
 806         struct swap_info_struct * p;
 807         struct inode * inode;
 808         unsigned int type;
 809         struct file filp;
 810         int i;
 811 
 812         if (!suser())
 813                 return -EPERM;
 814         i = namei(specialfile,&inode);
 815         if (i)
 816                 return i;
 817         p = swap_info;
 818         for (type = 0 ; type < nr_swapfiles ; type++,p++) {
 819                 if ((p->flags & SWP_WRITEOK) != SWP_WRITEOK)
 820                         continue;
 821                 if (p->swap_file) {
 822                         if (p->swap_file == inode)
 823                                 break;
 824                 } else {
 825                         if (!S_ISBLK(inode->i_mode))
 826                                 continue;
 827                         if (p->swap_device == inode->i_rdev)
 828                                 break;
 829                 }
 830         }
 831 
 832         if (type >= nr_swapfiles){
 833                 iput(inode);
 834                 return -EINVAL;
 835         }
 836         p->flags = SWP_USED;
 837         i = try_to_unuse(type);
 838         if (i) {
 839                 iput(inode);
 840                 p->flags = SWP_WRITEOK;
 841                 return i;
 842         }
 843 
 844         if(p->swap_device){
 845                 memset(&filp, 0, sizeof(filp));         
 846                 filp.f_inode = inode;
 847                 filp.f_mode = 3; /* read write */
 848                 /* open it again to get fops */
 849                 if( !blkdev_open(inode, &filp) &&
 850                    filp.f_op && filp.f_op->release){
 851                         filp.f_op->release(inode,&filp);
 852                         filp.f_op->release(inode,&filp);
 853                 }
 854         }
 855         iput(inode);
 856 
 857         nr_swap_pages -= p->pages;
 858         iput(p->swap_file);
 859         p->swap_file = NULL;
 860         p->swap_device = 0;
 861         vfree(p->swap_map);
 862         p->swap_map = NULL;
 863         free_page((long) p->swap_lockmap);
 864         p->swap_lockmap = NULL;
 865         p->flags = 0;
 866         return 0;
 867 }
 868 
 869 /*
 870  * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
 871  *
 872  * The swapon system call
 873  */
 874 asmlinkage int sys_swapon(const char * specialfile)
     /* [previous][next][first][last][top][bottom][index][help] */
 875 {
 876         struct swap_info_struct * p;
 877         struct inode * swap_inode;
 878         unsigned int type;
 879         int i,j;
 880         int error;
 881         struct file filp;
 882 
 883         memset(&filp, 0, sizeof(filp));
 884         if (!suser())
 885                 return -EPERM;
 886         p = swap_info;
 887         for (type = 0 ; type < nr_swapfiles ; type++,p++)
 888                 if (!(p->flags & SWP_USED))
 889                         break;
 890         if (type >= MAX_SWAPFILES)
 891                 return -EPERM;
 892         if (type >= nr_swapfiles)
 893                 nr_swapfiles = type+1;
 894         p->flags = SWP_USED;
 895         p->swap_file = NULL;
 896         p->swap_device = 0;
 897         p->swap_map = NULL;
 898         p->swap_lockmap = NULL;
 899         p->lowest_bit = 0;
 900         p->highest_bit = 0;
 901         p->max = 1;
 902         error = namei(specialfile,&swap_inode);
 903         if (error)
 904                 goto bad_swap_2;
 905         p->swap_file = swap_inode;
 906         error = -EBUSY;
 907         if (swap_inode->i_count != 1)
 908                 goto bad_swap_2;
 909         error = -EINVAL;
 910 
 911         if (S_ISBLK(swap_inode->i_mode)) {
 912                 p->swap_device = swap_inode->i_rdev;
 913 
 914                 filp.f_inode = swap_inode;
 915                 filp.f_mode = 3; /* read write */
 916                 error = blkdev_open(swap_inode, &filp);
 917                 p->swap_file = NULL;
 918                 iput(swap_inode);
 919                 if(error)
 920                         goto bad_swap_2;
 921                 error = -ENODEV;
 922                 if (!p->swap_device)
 923                         goto bad_swap;
 924                 error = -EBUSY;
 925                 for (i = 0 ; i < nr_swapfiles ; i++) {
 926                         if (i == type)
 927                                 continue;
 928                         if (p->swap_device == swap_info[i].swap_device)
 929                                 goto bad_swap;
 930                 }
 931         } else if (!S_ISREG(swap_inode->i_mode))
 932                 goto bad_swap;
 933         p->swap_lockmap = (unsigned char *) get_free_page(GFP_USER);
 934         if (!p->swap_lockmap) {
 935                 printk("Unable to start swapping: out of memory :-)\n");
 936                 error = -ENOMEM;
 937                 goto bad_swap;
 938         }
 939         read_swap_page(SWP_ENTRY(type,0), (char *) p->swap_lockmap);
 940         if (memcmp("SWAP-SPACE",p->swap_lockmap+4086,10)) {
 941                 printk("Unable to find swap-space signature\n");
 942                 error = -EINVAL;
 943                 goto bad_swap;
 944         }
 945         memset(p->swap_lockmap+PAGE_SIZE-10,0,10);
 946         j = 0;
 947         p->lowest_bit = 0;
 948         p->highest_bit = 0;
 949         for (i = 1 ; i < 8*PAGE_SIZE ; i++) {
 950                 if (test_bit(i,p->swap_lockmap)) {
 951                         if (!p->lowest_bit)
 952                                 p->lowest_bit = i;
 953                         p->highest_bit = i;
 954                         p->max = i+1;
 955                         j++;
 956                 }
 957         }
 958         if (!j) {
 959                 printk("Empty swap-file\n");
 960                 error = -EINVAL;
 961                 goto bad_swap;
 962         }
 963         p->swap_map = (unsigned char *) vmalloc(p->max);
 964         if (!p->swap_map) {
 965                 error = -ENOMEM;
 966                 goto bad_swap;
 967         }
 968         for (i = 1 ; i < p->max ; i++) {
 969                 if (test_bit(i,p->swap_lockmap))
 970                         p->swap_map[i] = 0;
 971                 else
 972                         p->swap_map[i] = 0x80;
 973         }
 974         p->swap_map[0] = 0x80;
 975         memset(p->swap_lockmap,0,PAGE_SIZE);
 976         p->flags = SWP_WRITEOK;
 977         p->pages = j;
 978         nr_swap_pages += j;
 979         printk("Adding Swap: %dk swap-space\n",j<<2);
 980         return 0;
 981 bad_swap:
 982         if(filp.f_op && filp.f_op->release)
 983                 filp.f_op->release(filp.f_inode,&filp);
 984 bad_swap_2:
 985         free_page((long) p->swap_lockmap);
 986         vfree(p->swap_map);
 987         iput(p->swap_file);
 988         p->swap_device = 0;
 989         p->swap_file = NULL;
 990         p->swap_map = NULL;
 991         p->swap_lockmap = NULL;
 992         p->flags = 0;
 993         return error;
 994 }
 995 
 996 void si_swapinfo(struct sysinfo *val)
     /* [previous][next][first][last][top][bottom][index][help] */
 997 {
 998         unsigned int i, j;
 999 
1000         val->freeswap = val->totalswap = 0;
1001         for (i = 0; i < nr_swapfiles; i++) {
1002                 if ((swap_info[i].flags & SWP_WRITEOK) != SWP_WRITEOK)
1003                         continue;
1004                 for (j = 0; j < swap_info[i].max; ++j)
1005                         switch (swap_info[i].swap_map[j]) {
1006                                 case 128:
1007                                         continue;
1008                                 case 0:
1009                                         ++val->freeswap;
1010                                 default:
1011                                         ++val->totalswap;
1012                         }
1013         }
1014         val->freeswap <<= PAGE_SHIFT;
1015         val->totalswap <<= PAGE_SHIFT;
1016         return;
1017 }
1018 
1019 /*
1020  * set up the free-area data structures:
1021  *   - mark all pages MAP_PAGE_RESERVED
1022  *   - mark all memory queues empty
1023  *   - clear the memory bitmaps
1024  */
1025 unsigned long free_area_init(unsigned long start_mem, unsigned long end_mem)
     /* [previous][next][first][last][top][bottom][index][help] */
1026 {
1027         unsigned short * p;
1028         unsigned long mask = PAGE_MASK;
1029         int i;
1030 
1031         /*
1032          * select nr of pages we try to keep free for important stuff
1033          * with a minimum of 16 pages. This is totally arbitrary
1034          */
1035         i = end_mem >> (PAGE_SHIFT+6);
1036         if (i < 16)
1037                 i = 16;
1038         min_free_pages = i;
1039         start_mem = init_swap_cache(start_mem, end_mem);
1040         mem_map = (unsigned short *) start_mem;
1041         p = mem_map + MAP_NR(end_mem);
1042         start_mem = (unsigned long) p;
1043         while (p > mem_map)
1044                 *--p = MAP_PAGE_RESERVED;
1045 
1046         for (i = 0 ; i < NR_MEM_LISTS ; i++, mask <<= 1) {
1047                 unsigned long bitmap_size;
1048                 free_area_list[i].prev = free_area_list[i].next = &free_area_list[i];
1049                 end_mem = (end_mem + ~mask) & mask;
1050                 bitmap_size = end_mem >> (PAGE_SHIFT + i);
1051                 bitmap_size = (bitmap_size + 7) >> 3;
1052                 free_area_map[i] = (unsigned char *) start_mem;
1053                 memset((void *) start_mem, 0, bitmap_size);
1054                 start_mem += bitmap_size;
1055         }
1056         return start_mem;
1057 }

/* [previous][next][first][last][top][bottom][index][help] */