root/mm/swap.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. show_swap_cache_info
  2. add_to_swap_cache
  3. init_swap_cache
  4. rw_swap_page
  5. get_swap_page
  6. swap_duplicate
  7. swap_free
  8. swap_in
  9. try_to_swap_out
  10. swap_out_process
  11. swap_out
  12. try_to_free_page
  13. add_mem_queue
  14. remove_mem_queue
  15. free_pages_ok
  16. check_free_buffers
  17. free_pages
  18. mark_used
  19. __get_free_pages
  20. show_free_areas
  21. try_to_unuse
  22. sys_swapoff
  23. sys_swapon
  24. si_swapinfo
  25. free_area_init

   1 /*
   2  *  linux/mm/swap.c
   3  *
   4  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   5  */
   6 
   7 /*
   8  * This file should contain most things doing the swapping from/to disk.
   9  * Started 18.12.91
  10  */
  11 
  12 #include <linux/mm.h>
  13 #include <linux/sched.h>
  14 #include <linux/head.h>
  15 #include <linux/kernel.h>
  16 #include <linux/kernel_stat.h>
  17 #include <linux/errno.h>
  18 #include <linux/string.h>
  19 #include <linux/stat.h>
  20 #include <linux/fs.h>
  21 
  22 #include <asm/system.h> /* for cli()/sti() */
  23 #include <asm/bitops.h>
  24 
  25 #define MAX_SWAPFILES 8
  26 
  27 #define SWP_USED        1
  28 #define SWP_WRITEOK     3
  29 
  30 #define SWP_TYPE(entry) (((entry) & 0xfe) >> 1)
  31 #define SWP_OFFSET(entry) ((entry) >> PAGE_SHIFT)
  32 #define SWP_ENTRY(type,offset) (((type) << 1) | ((offset) << PAGE_SHIFT))
  33 
  34 static int nr_swapfiles = 0;
  35 static struct wait_queue * lock_queue = NULL;
  36 
  37 static struct swap_info_struct {
  38         unsigned long flags;
  39         struct inode * swap_file;
  40         unsigned int swap_device;
  41         unsigned char * swap_map;
  42         unsigned char * swap_lockmap;
  43         int pages;
  44         int lowest_bit;
  45         int highest_bit;
  46         unsigned long max;
  47 } swap_info[MAX_SWAPFILES];
  48 
  49 extern int shm_swap (int);
  50 
  51 unsigned long *swap_cache;
  52 
  53 #ifdef SWAP_CACHE_INFO
  54 unsigned long swap_cache_add_total = 0;
  55 unsigned long swap_cache_add_success = 0;
  56 unsigned long swap_cache_del_total = 0;
  57 unsigned long swap_cache_del_success = 0;
  58 unsigned long swap_cache_find_total = 0;
  59 unsigned long swap_cache_find_success = 0;
  60 
  61 extern inline void show_swap_cache_info(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  62 {
  63         printk("Swap cache: add %ld/%ld, delete %ld/%ld, find %ld/%ld\n",
  64                 swap_cache_add_total, swap_cache_add_success, 
  65                 swap_cache_del_total, swap_cache_del_success,
  66                 swap_cache_find_total, swap_cache_find_success);
  67 }
  68 #endif
  69 
  70 extern inline int add_to_swap_cache(unsigned long addr, unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
  71 {
  72         struct swap_info_struct * p = &swap_info[SWP_TYPE(entry)];
  73         
  74 #ifdef SWAP_CACHE_INFO
  75         swap_cache_add_total++;
  76 #endif
  77         if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) { 
  78                 __asm__ __volatile__ (
  79                                       "xchgl %0,%1\n"
  80                                       : "=m" (swap_cache[addr >> PAGE_SHIFT]),
  81                                        "=r" (entry)
  82                                       : "0" (swap_cache[addr >> PAGE_SHIFT]),
  83                                        "1" (entry)
  84                                       );
  85                 if (entry)  {
  86                         printk("swap_cache: replacing non-NULL entry\n");
  87                 }
  88 #ifdef SWAP_CACHE_INFO
  89                 swap_cache_add_success++;
  90 #endif
  91                 return 1;
  92         }
  93         return 0;
  94 }
  95 
  96 static unsigned long init_swap_cache(unsigned long mem_start,
     /* [previous][next][first][last][top][bottom][index][help] */
  97         unsigned long mem_end)
  98 {
  99         unsigned long swap_cache_size;
 100 
 101         mem_start = (mem_start + 15) & ~15;
 102         swap_cache = (unsigned long *) mem_start;
 103         swap_cache_size = mem_end >> PAGE_SHIFT;
 104         memset(swap_cache, 0, swap_cache_size * sizeof (unsigned long));
 105 #ifdef SWAP_CACHE_INFO
 106         printk("%ld bytes for swap cache allocated\n",
 107                swap_cache_size * sizeof(unsigned long));
 108 #endif  
 109         
 110         return (unsigned long) (swap_cache + swap_cache_size);
 111 }
 112 
 113 void rw_swap_page(int rw, unsigned long entry, char * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 114 {
 115         unsigned long type, offset;
 116         struct swap_info_struct * p;
 117 
 118         type = SWP_TYPE(entry);
 119         if (type >= nr_swapfiles) {
 120                 printk("Internal error: bad swap-device\n");
 121                 return;
 122         }
 123         p = &swap_info[type];
 124         offset = SWP_OFFSET(entry);
 125         if (offset >= p->max) {
 126                 printk("rw_swap_page: weirdness\n");
 127                 return;
 128         }
 129         if (!(p->flags & SWP_USED)) {
 130                 printk("Trying to swap to unused swap-device\n");
 131                 return;
 132         }
 133         while (set_bit(offset,p->swap_lockmap))
 134                 sleep_on(&lock_queue);
 135         if (rw == READ)
 136                 kstat.pswpin++;
 137         else
 138                 kstat.pswpout++;
 139         if (p->swap_device) {
 140                 ll_rw_page(rw,p->swap_device,offset,buf);
 141         } else if (p->swap_file) {
 142                 struct inode *swapf = p->swap_file;
 143                 unsigned int zones[8];
 144                 int i;
 145                 if (swapf->i_op->bmap == NULL
 146                         && swapf->i_op->smap != NULL){
 147                         /*
 148                                 With MsDOS, we use msdos_smap which return
 149                                 a sector number (not a cluster or block number).
 150                                 It is a patch to enable the UMSDOS project.
 151                                 Other people are working on better solution.
 152 
 153                                 It sounds like ll_rw_swap_file defined
 154                                 it operation size (sector size) based on
 155                                 PAGE_SIZE and the number of block to read.
 156                                 So using bmap ou smap should work even if
 157                                 smap will requiered more blocks.
 158                         */
 159                         int j;
 160                         unsigned int block = offset << 3;
 161 
 162                         for (i=0, j=0; j< PAGE_SIZE ; i++, j += 512){
 163                                 if (!(zones[i] = swapf->i_op->smap(swapf,block++))) {
 164                                         printk("rw_swap_page: bad swap file\n");
 165                                         return;
 166                                 }
 167                         }
 168                 }else{
 169                         int j;
 170                         unsigned int block = offset
 171                                 << (12 - swapf->i_sb->s_blocksize_bits);
 172 
 173                         for (i=0, j=0; j< PAGE_SIZE ; i++, j +=swapf->i_sb->s_blocksize)
 174                                 if (!(zones[i] = bmap(swapf,block++))) {
 175                                         printk("rw_swap_page: bad swap file\n");
 176                                         return;
 177                                 }
 178                 }
 179                 ll_rw_swap_file(rw,swapf->i_dev, zones, i,buf);
 180         } else
 181                 printk("re_swap_page: no swap file or device\n");
 182         if (offset && !clear_bit(offset,p->swap_lockmap))
 183                 printk("rw_swap_page: lock already cleared\n");
 184         wake_up(&lock_queue);
 185 }
 186 
 187 unsigned int get_swap_page(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 188 {
 189         struct swap_info_struct * p;
 190         unsigned int offset, type;
 191 
 192         p = swap_info;
 193         for (type = 0 ; type < nr_swapfiles ; type++,p++) {
 194                 if ((p->flags & SWP_WRITEOK) != SWP_WRITEOK)
 195                         continue;
 196                 for (offset = p->lowest_bit; offset <= p->highest_bit ; offset++) {
 197                         if (p->swap_map[offset])
 198                                 continue;
 199                         p->swap_map[offset] = 1;
 200                         nr_swap_pages--;
 201                         if (offset == p->highest_bit)
 202                                 p->highest_bit--;
 203                         p->lowest_bit = offset;
 204                         return SWP_ENTRY(type,offset);
 205                 }
 206         }
 207         return 0;
 208 }
 209 
 210 unsigned long swap_duplicate(unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 211 {
 212         struct swap_info_struct * p;
 213         unsigned long offset, type;
 214 
 215         if (!entry)
 216                 return 0;
 217         offset = SWP_OFFSET(entry);
 218         type = SWP_TYPE(entry);
 219         if (type == SHM_SWP_TYPE)
 220                 return entry;
 221         if (type >= nr_swapfiles) {
 222                 printk("Trying to duplicate nonexistent swap-page\n");
 223                 return 0;
 224         }
 225         p = type + swap_info;
 226         if (offset >= p->max) {
 227                 printk("swap_duplicate: weirdness\n");
 228                 return 0;
 229         }
 230         if (!p->swap_map[offset]) {
 231                 printk("swap_duplicate: trying to duplicate unused page\n");
 232                 return 0;
 233         }
 234         p->swap_map[offset]++;
 235         return entry;
 236 }
 237 
 238 void swap_free(unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 239 {
 240         struct swap_info_struct * p;
 241         unsigned long offset, type;
 242 
 243         if (!entry)
 244                 return;
 245         type = SWP_TYPE(entry);
 246         if (type == SHM_SWP_TYPE)
 247                 return;
 248         if (type >= nr_swapfiles) {
 249                 printk("Trying to free nonexistent swap-page\n");
 250                 return;
 251         }
 252         p = & swap_info[type];
 253         offset = SWP_OFFSET(entry);
 254         if (offset >= p->max) {
 255                 printk("swap_free: weirdness\n");
 256                 return;
 257         }
 258         if (!(p->flags & SWP_USED)) {
 259                 printk("Trying to free swap from unused swap-device\n");
 260                 return;
 261         }
 262         while (set_bit(offset,p->swap_lockmap))
 263                 sleep_on(&lock_queue);
 264         if (offset < p->lowest_bit)
 265                 p->lowest_bit = offset;
 266         if (offset > p->highest_bit)
 267                 p->highest_bit = offset;
 268         if (!p->swap_map[offset])
 269                 printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
 270         else
 271                 if (!--p->swap_map[offset])
 272                         nr_swap_pages++;
 273         if (!clear_bit(offset,p->swap_lockmap))
 274                 printk("swap_free: lock already cleared\n");
 275         wake_up(&lock_queue);
 276 }
 277 
 278 unsigned long swap_in(unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 279 {
 280         unsigned long page;
 281 
 282         if (!(page = get_free_page(GFP_KERNEL))) {
 283                 oom(current);
 284                 return BAD_PAGE;
 285         }
 286         read_swap_page(entry, (char *) page);
 287         if (add_to_swap_cache(page, entry))
 288                 return page | PAGE_PRIVATE;
 289         swap_free(entry);
 290         return page | PAGE_DIRTY | PAGE_PRIVATE;
 291 }
 292 
 293 static inline int try_to_swap_out(unsigned long * table_ptr)
     /* [previous][next][first][last][top][bottom][index][help] */
 294 {
 295         unsigned long page, entry;
 296 
 297         page = *table_ptr;
 298         if (!(PAGE_PRESENT & page))
 299                 return 0;
 300         if (page >= high_memory)
 301                 return 0;
 302         if (mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED)
 303                 return 0;
 304         
 305         if ((PAGE_DIRTY & page) && delete_from_swap_cache(page))  {
 306                 *table_ptr &= ~PAGE_ACCESSED;
 307                 return 0;
 308         }
 309         if (PAGE_ACCESSED & page) {
 310                 *table_ptr &= ~PAGE_ACCESSED;
 311                 return 0;
 312         }
 313         if (PAGE_DIRTY & page) {
 314                 page &= PAGE_MASK;
 315                 if (mem_map[MAP_NR(page)] != 1)
 316                         return 0;
 317                 if (!(entry = get_swap_page()))
 318                         return 0;
 319                 *table_ptr = entry;
 320                 invalidate();
 321                 write_swap_page(entry, (char *) page);
 322                 free_page(page);
 323                 return 1;
 324         }
 325         if ((entry = find_in_swap_cache(page)))  {
 326                 if (mem_map[MAP_NR(page)] != 1) {
 327                         *table_ptr |= PAGE_DIRTY;
 328                         printk("Aiee.. duplicated cached swap-cache entry\n");
 329                         return 0;
 330                 }
 331                 *table_ptr = entry;
 332                 invalidate();
 333                 free_page(page & PAGE_MASK);
 334                 return 1;
 335         } 
 336         page &= PAGE_MASK;
 337         *table_ptr = 0;
 338         invalidate();
 339         free_page(page);
 340         return 1 + mem_map[MAP_NR(page)];
 341 }
 342 
 343 /*
 344  * A new implementation of swap_out().  We do not swap complete processes,
 345  * but only a small number of blocks, before we continue with the next
 346  * process.  The number of blocks actually swapped is determined on the
 347  * number of page faults, that this process actually had in the last time,
 348  * so we won't swap heavily used processes all the time ...
 349  *
 350  * Note: the priority argument is a hint on much CPU to waste with the
 351  *       swap block search, not a hint, of how much blocks to swap with
 352  *       each process.
 353  *
 354  * (C) 1993 Kai Petzke, wpp@marie.physik.tu-berlin.de
 355  */
 356 
 357 /*
 358  * These are the miminum and maximum number of pages to swap from one process,
 359  * before proceeding to the next:
 360  */
 361 #define SWAP_MIN        4
 362 #define SWAP_MAX        32
 363 
 364 /*
 365  * The actual number of pages to swap is determined as:
 366  * SWAP_RATIO / (number of recent major page faults)
 367  */
 368 #define SWAP_RATIO      128
 369 
 370 static int swap_out_process(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 371 {
 372         unsigned long address;
 373         unsigned long offset;
 374         unsigned long *pgdir;
 375         unsigned long pg_table;
 376 
 377         /*
 378          * Go through process' page directory.
 379          */
 380         address = p->mm->swap_address;
 381         pgdir = (address >> PGDIR_SHIFT) + (unsigned long *) p->tss.cr3;
 382         offset = address & ~PGDIR_MASK;
 383         address &= PGDIR_MASK;
 384         for ( ; address < TASK_SIZE ;
 385         pgdir++, address = address + PGDIR_SIZE, offset = 0) {
 386                 pg_table = *pgdir;
 387                 if (pg_table >= high_memory)
 388                         continue;
 389                 if (mem_map[MAP_NR(pg_table)] & MAP_PAGE_RESERVED)
 390                         continue;
 391                 if (!(PAGE_PRESENT & pg_table)) {
 392                         printk("swap_out_process (%s): bad page-table at vm %08lx: %08lx\n",
 393                                         p->comm, address + offset, pg_table);
 394                         *pgdir = 0;
 395                         continue;
 396                 }
 397                 pg_table &= 0xfffff000;
 398 
 399                 /*
 400                  * Go through this page table.
 401                  */
 402                 for( ; offset < ~PGDIR_MASK ; offset += PAGE_SIZE) {
 403                         switch(try_to_swap_out((unsigned long *) (pg_table + (offset >> 10)))) {
 404                                 case 0:
 405                                         break;
 406 
 407                                 case 1:
 408                                         p->mm->rss--;
 409                                         /* continue with the following page the next time */
 410                                         p->mm->swap_address = address + offset + PAGE_SIZE;
 411                                         return 1;
 412 
 413                                 default:
 414                                         p->mm->rss--;
 415                                         break;
 416                         }
 417                 }
 418         }
 419         /*
 420          * Finish work with this process, if we reached the end of the page
 421          * directory.  Mark restart from the beginning the next time.
 422          */
 423         p->mm->swap_address = 0;
 424         return 0;
 425 }
 426 
 427 static int swap_out(unsigned int priority)
     /* [previous][next][first][last][top][bottom][index][help] */
 428 {
 429         static int swap_task;
 430         int loop;
 431         int counter = NR_TASKS * 2 >> priority;
 432         struct task_struct *p;
 433 
 434         counter = NR_TASKS * 2 >> priority;
 435         for(; counter >= 0; counter--, swap_task++) {
 436                 /*
 437                  * Check that swap_task is suitable for swapping.  If not, look for
 438                  * the next suitable process.
 439                  */
 440                 loop = 0;
 441                 while(1) {
 442                         if (swap_task >= NR_TASKS) {
 443                                 swap_task = 1;
 444                                 if (loop)
 445                                         /* all processes are unswappable or already swapped out */
 446                                         return 0;
 447                                 loop = 1;
 448                         }
 449 
 450                         p = task[swap_task];
 451                         if (p && p->mm->swappable && p->mm->rss)
 452                                 break;
 453 
 454                         swap_task++;
 455                 }
 456 
 457                 /*
 458                  * Determine the number of pages to swap from this process.
 459                  */
 460                 if (!p->mm->swap_cnt) {
 461                         p->mm->dec_flt = (p->mm->dec_flt * 3) / 4 + p->mm->maj_flt - p->mm->old_maj_flt;
 462                         p->mm->old_maj_flt = p->mm->maj_flt;
 463 
 464                         if (p->mm->dec_flt >= SWAP_RATIO / SWAP_MIN) {
 465                                 p->mm->dec_flt = SWAP_RATIO / SWAP_MIN;
 466                                 p->mm->swap_cnt = SWAP_MIN;
 467                         } else if (p->mm->dec_flt <= SWAP_RATIO / SWAP_MAX)
 468                                 p->mm->swap_cnt = SWAP_MAX;
 469                         else
 470                                 p->mm->swap_cnt = SWAP_RATIO / p->mm->dec_flt;
 471                 }
 472                 if (swap_out_process(p)) {
 473                         if ((--p->mm->swap_cnt) == 0)
 474                                 swap_task++;
 475                         return 1;
 476                 }
 477         }
 478         return 0;
 479 }
 480 
 481 static int try_to_free_page(int priority)
     /* [previous][next][first][last][top][bottom][index][help] */
 482 {
 483         int i=6;
 484 
 485         while (i--) {
 486                 if (priority != GFP_NOBUFFER && shrink_buffers(i))
 487                         return 1;
 488                 if (shm_swap(i))
 489                         return 1;
 490                 if (swap_out(i))
 491                         return 1;
 492         }
 493         return 0;
 494 }
 495 
 496 static inline void add_mem_queue(struct mem_list * head, struct mem_list * entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 497 {
 498         entry->prev = head;
 499         entry->next = head->next;
 500         entry->next->prev = entry;
 501         head->next = entry;
 502 }
 503 
 504 static inline void remove_mem_queue(struct mem_list * head, struct mem_list * entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 505 {
 506         entry->next->prev = entry->prev;
 507         entry->prev->next = entry->next;
 508 }
 509 
 510 /*
 511  * Free_page() adds the page to the free lists. This is optimized for
 512  * fast normal cases (no error jumps taken normally).
 513  *
 514  * The way to optimize jumps for gcc-2.2.2 is to:
 515  *  - select the "normal" case and put it inside the if () { XXX }
 516  *  - no else-statements if you can avoid them
 517  *
 518  * With the above two rules, you get a straight-line execution path
 519  * for the normal case, giving better asm-code.
 520  */
 521 
 522 /*
 523  * Buddy system. Hairy. You really aren't expected to understand this
 524  */
 525 static inline void free_pages_ok(unsigned long addr, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 526 {
 527         unsigned long index = addr >> (PAGE_SHIFT + 1 + order);
 528         unsigned long mask = PAGE_MASK << order;
 529 
 530         addr &= mask;
 531         nr_free_pages += 1 << order;
 532         while (order < NR_MEM_LISTS-1) {
 533                 if (!change_bit(index, free_area_map[order]))
 534                         break;
 535                 remove_mem_queue(free_area_list+order, (struct mem_list *) (addr ^ (1+~mask)));
 536                 order++;
 537                 index >>= 1;
 538                 mask <<= 1;
 539                 addr &= mask;
 540         }
 541         add_mem_queue(free_area_list+order, (struct mem_list *) addr);
 542 }
 543 
 544 static inline void check_free_buffers(unsigned long addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 545 {
 546         struct buffer_head * bh;
 547 
 548         bh = buffer_pages[MAP_NR(addr)];
 549         if (bh) {
 550                 struct buffer_head *tmp = bh;
 551                 do {
 552                         if (tmp->b_list == BUF_SHARED && tmp->b_dev != 0xffff)
 553                                 refile_buffer(tmp);
 554                         tmp = tmp->b_this_page;
 555                 } while (tmp != bh);
 556         }
 557 }
 558 
 559 void free_pages(unsigned long addr, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 560 {
 561         if (addr < high_memory) {
 562                 unsigned long flag;
 563                 unsigned short * map = mem_map + MAP_NR(addr);
 564                 if (*map) {
 565                         if (!(*map & MAP_PAGE_RESERVED)) {
 566                                 save_flags(flag);
 567                                 cli();
 568                                 if (!--*map)  {
 569                                         free_pages_ok(addr, order);
 570                                         delete_from_swap_cache(addr);
 571                                 }
 572                                 restore_flags(flag);
 573                                 if (*map == 1)
 574                                         check_free_buffers(addr);
 575                         }
 576                         return;
 577                 }
 578                 printk("Trying to free free memory (%08lx): memory probabably corrupted\n",addr);
 579                 printk("PC = %08lx\n",*(((unsigned long *)&addr)-1));
 580                 return;
 581         }
 582 }
 583 
 584 /*
 585  * Some ugly macros to speed up __get_free_pages()..
 586  */
 587 #define RMQUEUE(order) \
 588 do { struct mem_list * queue = free_area_list+order; \
 589      unsigned long new_order = order; \
 590         do { struct mem_list *next = queue->next; \
 591                 if (queue != next) { \
 592                         queue->next = next->next; \
 593                         next->next->prev = queue; \
 594                         mark_used((unsigned long) next, new_order); \
 595                         nr_free_pages -= 1 << order; \
 596                         restore_flags(flags); \
 597                         EXPAND(next, order, new_order); \
 598                         return (unsigned long) next; \
 599                 } new_order++; queue++; \
 600         } while (new_order < NR_MEM_LISTS); \
 601 } while (0)
 602 
 603 static inline int mark_used(unsigned long addr, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 604 {
 605         return change_bit(addr >> (PAGE_SHIFT+1+order), free_area_map[order]);
 606 }
 607 
 608 #define EXPAND(addr,low,high) \
 609 do { unsigned long size = PAGE_SIZE << high; \
 610         while (high > low) { \
 611                 high--; size >>= 1; cli(); \
 612                 add_mem_queue(free_area_list+high, addr); \
 613                 mark_used((unsigned long) addr, high); \
 614                 restore_flags(flags); \
 615                 addr = (struct mem_list *) (size + (unsigned long) addr); \
 616         } mem_map[MAP_NR((unsigned long) addr)] = 1; \
 617 } while (0)
 618 
 619 unsigned long __get_free_pages(int priority, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 620 {
 621         unsigned long flags;
 622 
 623         if (intr_count && priority != GFP_ATOMIC) {
 624                 static int count = 0;
 625                 if (++count < 5) {
 626                         printk("gfp called nonatomically from interrupt %08lx\n",
 627                                 ((unsigned long *)&priority)[-1]);
 628                         priority = GFP_ATOMIC;
 629                 }
 630         }
 631         save_flags(flags);
 632 repeat:
 633         cli();
 634         if ((priority==GFP_ATOMIC) || nr_free_pages > MAX_SECONDARY_PAGES) {
 635                 RMQUEUE(order);
 636                 restore_flags(flags);
 637                 return 0;
 638         }
 639         restore_flags(flags);
 640         if (priority != GFP_BUFFER && try_to_free_page(priority))
 641                 goto repeat;
 642         return 0;
 643 }
 644 
 645 /*
 646  * Show free area list (used inside shift_scroll-lock stuff)
 647  * We also calculate the percentage fragmentation. We do this by counting the
 648  * memory on each free list with the exception of the first item on the list.
 649  */
 650 void show_free_areas(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 651 {
 652         unsigned long order, flags;
 653         unsigned long total = 0;
 654 
 655         printk("Free pages:      %6dkB\n ( ",nr_free_pages<<(PAGE_SHIFT-10));
 656         save_flags(flags);
 657         cli();
 658         for (order=0 ; order < NR_MEM_LISTS; order++) {
 659                 struct mem_list * tmp;
 660                 unsigned long nr = 0;
 661                 for (tmp = free_area_list[order].next ; tmp != free_area_list + order ; tmp = tmp->next) {
 662                         nr ++;
 663                 }
 664                 total += nr * (4 << order);
 665                 printk("%lu*%ukB ", nr, 4 << order);
 666         }
 667         restore_flags(flags);
 668         printk("= %lukB)\n", total);
 669 #ifdef SWAP_CACHE_INFO
 670         show_swap_cache_info();
 671 #endif  
 672 }
 673 
 674 /*
 675  * Trying to stop swapping from a file is fraught with races, so
 676  * we repeat quite a bit here when we have to pause. swapoff()
 677  * isn't exactly timing-critical, so who cares?
 678  */
 679 static int try_to_unuse(unsigned int type)
     /* [previous][next][first][last][top][bottom][index][help] */
 680 {
 681         int nr, pgt, pg;
 682         unsigned long page, *ppage;
 683         unsigned long tmp = 0;
 684         struct task_struct *p;
 685 
 686         nr = 0;
 687         
 688 /*
 689  * When we have to sleep, we restart the whole algorithm from the same
 690  * task we stopped in. That at least rids us of all races.
 691  */
 692 repeat:
 693         for (; nr < NR_TASKS ; nr++) {
 694                 p = task[nr];
 695                 if (!p)
 696                         continue;
 697                 for (pgt = 0 ; pgt < PTRS_PER_PAGE ; pgt++) {
 698                         ppage = pgt + ((unsigned long *) p->tss.cr3);
 699                         page = *ppage;
 700                         if (!page)
 701                                 continue;
 702                         if (!(page & PAGE_PRESENT) || (page >= high_memory))
 703                                 continue;
 704                         if (mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED)
 705                                 continue;
 706                         ppage = (unsigned long *) (page & PAGE_MASK);   
 707                         for (pg = 0 ; pg < PTRS_PER_PAGE ; pg++,ppage++) {
 708                                 page = *ppage;
 709                                 if (!page)
 710                                         continue;
 711                                 if (page & PAGE_PRESENT) {
 712                                         if (!(page = in_swap_cache(page)))
 713                                                 continue;
 714                                         if (SWP_TYPE(page) != type)
 715                                                 continue;
 716                                         *ppage |= PAGE_DIRTY;
 717                                         delete_from_swap_cache(*ppage);
 718                                         continue;
 719                                 }
 720                                 if (SWP_TYPE(page) != type)
 721                                         continue;
 722                                 if (!tmp) {
 723                                         if (!(tmp = __get_free_page(GFP_KERNEL)))
 724                                                 return -ENOMEM;
 725                                         goto repeat;
 726                                 }
 727                                 read_swap_page(page, (char *) tmp);
 728                                 if (*ppage == page) {
 729                                         *ppage = tmp | (PAGE_DIRTY | PAGE_PRIVATE);
 730                                         ++p->mm->rss;
 731                                         swap_free(page);
 732                                         tmp = 0;
 733                                 }
 734                                 goto repeat;
 735                         }
 736                 }
 737         }
 738         free_page(tmp);
 739         return 0;
 740 }
 741 
 742 asmlinkage int sys_swapoff(const char * specialfile)
     /* [previous][next][first][last][top][bottom][index][help] */
 743 {
 744         struct swap_info_struct * p;
 745         struct inode * inode;
 746         unsigned int type;
 747         int i;
 748 
 749         if (!suser())
 750                 return -EPERM;
 751         i = namei(specialfile,&inode);
 752         if (i)
 753                 return i;
 754         p = swap_info;
 755         for (type = 0 ; type < nr_swapfiles ; type++,p++) {
 756                 if ((p->flags & SWP_WRITEOK) != SWP_WRITEOK)
 757                         continue;
 758                 if (p->swap_file) {
 759                         if (p->swap_file == inode)
 760                                 break;
 761                 } else {
 762                         if (!S_ISBLK(inode->i_mode))
 763                                 continue;
 764                         if (p->swap_device == inode->i_rdev)
 765                                 break;
 766                 }
 767         }
 768         iput(inode);
 769         if (type >= nr_swapfiles)
 770                 return -EINVAL;
 771         p->flags = SWP_USED;
 772         i = try_to_unuse(type);
 773         if (i) {
 774                 p->flags = SWP_WRITEOK;
 775                 return i;
 776         }
 777         nr_swap_pages -= p->pages;
 778         iput(p->swap_file);
 779         p->swap_file = NULL;
 780         p->swap_device = 0;
 781         vfree(p->swap_map);
 782         p->swap_map = NULL;
 783         free_page((long) p->swap_lockmap);
 784         p->swap_lockmap = NULL;
 785         p->flags = 0;
 786         return 0;
 787 }
 788 
 789 /*
 790  * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
 791  *
 792  * The swapon system call
 793  */
 794 asmlinkage int sys_swapon(const char * specialfile)
     /* [previous][next][first][last][top][bottom][index][help] */
 795 {
 796         struct swap_info_struct * p;
 797         struct inode * swap_inode;
 798         unsigned int type;
 799         int i,j;
 800         int error;
 801 
 802         if (!suser())
 803                 return -EPERM;
 804         p = swap_info;
 805         for (type = 0 ; type < nr_swapfiles ; type++,p++)
 806                 if (!(p->flags & SWP_USED))
 807                         break;
 808         if (type >= MAX_SWAPFILES)
 809                 return -EPERM;
 810         if (type >= nr_swapfiles)
 811                 nr_swapfiles = type+1;
 812         p->flags = SWP_USED;
 813         p->swap_file = NULL;
 814         p->swap_device = 0;
 815         p->swap_map = NULL;
 816         p->swap_lockmap = NULL;
 817         p->lowest_bit = 0;
 818         p->highest_bit = 0;
 819         p->max = 1;
 820         error = namei(specialfile,&swap_inode);
 821         if (error)
 822                 goto bad_swap;
 823         p->swap_file = swap_inode;
 824         error = -EBUSY;
 825         if (swap_inode->i_count != 1)
 826                 goto bad_swap;
 827         error = -EINVAL;
 828         if (S_ISBLK(swap_inode->i_mode)) {
 829                 p->swap_device = swap_inode->i_rdev;
 830                 p->swap_file = NULL;
 831                 iput(swap_inode);
 832                 error = -ENODEV;
 833                 if (!p->swap_device)
 834                         goto bad_swap;
 835                 error = -EBUSY;
 836                 for (i = 0 ; i < nr_swapfiles ; i++) {
 837                         if (i == type)
 838                                 continue;
 839                         if (p->swap_device == swap_info[i].swap_device)
 840                                 goto bad_swap;
 841                 }
 842         } else if (!S_ISREG(swap_inode->i_mode))
 843                 goto bad_swap;
 844         p->swap_lockmap = (unsigned char *) get_free_page(GFP_USER);
 845         if (!p->swap_lockmap) {
 846                 printk("Unable to start swapping: out of memory :-)\n");
 847                 error = -ENOMEM;
 848                 goto bad_swap;
 849         }
 850         read_swap_page(SWP_ENTRY(type,0), (char *) p->swap_lockmap);
 851         if (memcmp("SWAP-SPACE",p->swap_lockmap+4086,10)) {
 852                 printk("Unable to find swap-space signature\n");
 853                 error = -EINVAL;
 854                 goto bad_swap;
 855         }
 856         memset(p->swap_lockmap+PAGE_SIZE-10,0,10);
 857         j = 0;
 858         p->lowest_bit = 0;
 859         p->highest_bit = 0;
 860         for (i = 1 ; i < 8*PAGE_SIZE ; i++) {
 861                 if (test_bit(i,p->swap_lockmap)) {
 862                         if (!p->lowest_bit)
 863                                 p->lowest_bit = i;
 864                         p->highest_bit = i;
 865                         p->max = i+1;
 866                         j++;
 867                 }
 868         }
 869         if (!j) {
 870                 printk("Empty swap-file\n");
 871                 error = -EINVAL;
 872                 goto bad_swap;
 873         }
 874         p->swap_map = (unsigned char *) vmalloc(p->max);
 875         if (!p->swap_map) {
 876                 error = -ENOMEM;
 877                 goto bad_swap;
 878         }
 879         for (i = 1 ; i < p->max ; i++) {
 880                 if (test_bit(i,p->swap_lockmap))
 881                         p->swap_map[i] = 0;
 882                 else
 883                         p->swap_map[i] = 0x80;
 884         }
 885         p->swap_map[0] = 0x80;
 886         memset(p->swap_lockmap,0,PAGE_SIZE);
 887         p->flags = SWP_WRITEOK;
 888         p->pages = j;
 889         nr_swap_pages += j;
 890         printk("Adding Swap: %dk swap-space\n",j<<2);
 891         return 0;
 892 bad_swap:
 893         free_page((long) p->swap_lockmap);
 894         vfree(p->swap_map);
 895         iput(p->swap_file);
 896         p->swap_device = 0;
 897         p->swap_file = NULL;
 898         p->swap_map = NULL;
 899         p->swap_lockmap = NULL;
 900         p->flags = 0;
 901         return error;
 902 }
 903 
 904 void si_swapinfo(struct sysinfo *val)
     /* [previous][next][first][last][top][bottom][index][help] */
 905 {
 906         unsigned int i, j;
 907 
 908         val->freeswap = val->totalswap = 0;
 909         for (i = 0; i < nr_swapfiles; i++) {
 910                 if ((swap_info[i].flags & SWP_WRITEOK) != SWP_WRITEOK)
 911                         continue;
 912                 for (j = 0; j < swap_info[i].max; ++j)
 913                         switch (swap_info[i].swap_map[j]) {
 914                                 case 128:
 915                                         continue;
 916                                 case 0:
 917                                         ++val->freeswap;
 918                                 default:
 919                                         ++val->totalswap;
 920                         }
 921         }
 922         val->freeswap <<= PAGE_SHIFT;
 923         val->totalswap <<= PAGE_SHIFT;
 924         return;
 925 }
 926 
 927 /*
 928  * set up the free-area data structures:
 929  *   - mark all pages MAP_PAGE_RESERVED
 930  *   - mark all memory queues empty
 931  *   - clear the memory bitmaps
 932  */
 933 unsigned long free_area_init(unsigned long start_mem, unsigned long end_mem)
     /* [previous][next][first][last][top][bottom][index][help] */
 934 {
 935         unsigned short * p;
 936         unsigned long mask = PAGE_MASK;
 937         int i;
 938 
 939         start_mem = init_swap_cache(start_mem, end_mem);
 940         mem_map = (unsigned short *) start_mem;
 941         p = mem_map + MAP_NR(end_mem);
 942         start_mem = (unsigned long) p;
 943         while (p > mem_map)
 944                 *--p = MAP_PAGE_RESERVED;
 945 
 946         for (i = 0 ; i < NR_MEM_LISTS ; i++, mask <<= 1) {
 947                 unsigned long bitmap_size;
 948                 free_area_list[i].prev = free_area_list[i].next = &free_area_list[i];
 949                 end_mem = (end_mem + ~mask) & mask;
 950                 bitmap_size = end_mem >> (PAGE_SHIFT + i);
 951                 bitmap_size = (bitmap_size + 7) >> 3;
 952                 free_area_map[i] = (unsigned char *) start_mem;
 953                 memset((void *) start_mem, 0, bitmap_size);
 954                 start_mem += bitmap_size;
 955         }
 956         return start_mem;
 957 }

/* [previous][next][first][last][top][bottom][index][help] */