root/mm/swap.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. show_swap_cache_info
  2. add_to_swap_cache
  3. init_swap_cache
  4. rw_swap_page
  5. get_swap_page
  6. swap_duplicate
  7. swap_free
  8. swap_in
  9. try_to_swap_out
  10. swap_out_process
  11. swap_out
  12. try_to_free_page
  13. add_mem_queue
  14. remove_mem_queue
  15. free_pages_ok
  16. check_free_buffers
  17. free_pages
  18. mark_used
  19. __get_free_pages
  20. __get_dma_pages
  21. show_free_areas
  22. try_to_unuse
  23. sys_swapoff
  24. sys_swapon
  25. si_swapinfo
  26. free_area_init

   1 /*
   2  *  linux/mm/swap.c
   3  *
   4  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   5  */
   6 
   7 /*
   8  * This file should contain most things doing the swapping from/to disk.
   9  * Started 18.12.91
  10  */
  11 
  12 #include <linux/mm.h>
  13 #include <linux/sched.h>
  14 #include <linux/head.h>
  15 #include <linux/kernel.h>
  16 #include <linux/kernel_stat.h>
  17 #include <linux/errno.h>
  18 #include <linux/string.h>
  19 #include <linux/stat.h>
  20 #include <linux/fs.h>
  21 
  22 #include <asm/system.h> /* for cli()/sti() */
  23 #include <asm/bitops.h>
  24 
  25 #define MAX_SWAPFILES 8
  26 
  27 #define SWP_USED        1
  28 #define SWP_WRITEOK     3
  29 
  30 #define SWP_TYPE(entry) (((entry) & 0xfe) >> 1)
  31 #define SWP_OFFSET(entry) ((entry) >> PAGE_SHIFT)
  32 #define SWP_ENTRY(type,offset) (((type) << 1) | ((offset) << PAGE_SHIFT))
  33 
  34 static int min_free_pages = 20;
  35 
  36 static int nr_swapfiles = 0;
  37 static struct wait_queue * lock_queue = NULL;
  38 
  39 static struct swap_info_struct {
  40         unsigned long flags;
  41         struct inode * swap_file;
  42         unsigned int swap_device;
  43         unsigned char * swap_map;
  44         unsigned char * swap_lockmap;
  45         int pages;
  46         int lowest_bit;
  47         int highest_bit;
  48         unsigned long max;
  49 } swap_info[MAX_SWAPFILES];
  50 
  51 extern int shm_swap (int);
  52 
  53 unsigned long *swap_cache;
  54 
  55 #ifdef SWAP_CACHE_INFO
  56 unsigned long swap_cache_add_total = 0;
  57 unsigned long swap_cache_add_success = 0;
  58 unsigned long swap_cache_del_total = 0;
  59 unsigned long swap_cache_del_success = 0;
  60 unsigned long swap_cache_find_total = 0;
  61 unsigned long swap_cache_find_success = 0;
  62 
  63 extern inline void show_swap_cache_info(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  64 {
  65         printk("Swap cache: add %ld/%ld, delete %ld/%ld, find %ld/%ld\n",
  66                 swap_cache_add_total, swap_cache_add_success, 
  67                 swap_cache_del_total, swap_cache_del_success,
  68                 swap_cache_find_total, swap_cache_find_success);
  69 }
  70 #endif
  71 
  72 extern inline int add_to_swap_cache(unsigned long addr, unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
  73 {
  74         struct swap_info_struct * p = &swap_info[SWP_TYPE(entry)];
  75         
  76 #ifdef SWAP_CACHE_INFO
  77         swap_cache_add_total++;
  78 #endif
  79         if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) { 
  80                 __asm__ __volatile__ (
  81                                       "xchgl %0,%1\n"
  82                                       : "=m" (swap_cache[addr >> PAGE_SHIFT]),
  83                                        "=r" (entry)
  84                                       : "0" (swap_cache[addr >> PAGE_SHIFT]),
  85                                        "1" (entry)
  86                                       );
  87                 if (entry)  {
  88                         printk("swap_cache: replacing non-NULL entry\n");
  89                 }
  90 #ifdef SWAP_CACHE_INFO
  91                 swap_cache_add_success++;
  92 #endif
  93                 return 1;
  94         }
  95         return 0;
  96 }
  97 
  98 static unsigned long init_swap_cache(unsigned long mem_start,
     /* [previous][next][first][last][top][bottom][index][help] */
  99         unsigned long mem_end)
 100 {
 101         unsigned long swap_cache_size;
 102 
 103         mem_start = (mem_start + 15) & ~15;
 104         swap_cache = (unsigned long *) mem_start;
 105         swap_cache_size = mem_end >> PAGE_SHIFT;
 106         memset(swap_cache, 0, swap_cache_size * sizeof (unsigned long));
 107         return (unsigned long) (swap_cache + swap_cache_size);
 108 }
 109 
 110 void rw_swap_page(int rw, unsigned long entry, char * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 111 {
 112         unsigned long type, offset;
 113         struct swap_info_struct * p;
 114 
 115         type = SWP_TYPE(entry);
 116         if (type >= nr_swapfiles) {
 117                 printk("Internal error: bad swap-device\n");
 118                 return;
 119         }
 120         p = &swap_info[type];
 121         offset = SWP_OFFSET(entry);
 122         if (offset >= p->max) {
 123                 printk("rw_swap_page: weirdness\n");
 124                 return;
 125         }
 126         if (!(p->flags & SWP_USED)) {
 127                 printk("Trying to swap to unused swap-device\n");
 128                 return;
 129         }
 130         while (set_bit(offset,p->swap_lockmap))
 131                 sleep_on(&lock_queue);
 132         if (rw == READ)
 133                 kstat.pswpin++;
 134         else
 135                 kstat.pswpout++;
 136         if (p->swap_device) {
 137                 ll_rw_page(rw,p->swap_device,offset,buf);
 138         } else if (p->swap_file) {
 139                 struct inode *swapf = p->swap_file;
 140                 unsigned int zones[8];
 141                 int i;
 142                 if (swapf->i_op->bmap == NULL
 143                         && swapf->i_op->smap != NULL){
 144                         /*
 145                                 With MsDOS, we use msdos_smap which return
 146                                 a sector number (not a cluster or block number).
 147                                 It is a patch to enable the UMSDOS project.
 148                                 Other people are working on better solution.
 149 
 150                                 It sounds like ll_rw_swap_file defined
 151                                 it operation size (sector size) based on
 152                                 PAGE_SIZE and the number of block to read.
 153                                 So using bmap or smap should work even if
 154                                 smap will require more blocks.
 155                         */
 156                         int j;
 157                         unsigned int block = offset << 3;
 158 
 159                         for (i=0, j=0; j< PAGE_SIZE ; i++, j += 512){
 160                                 if (!(zones[i] = swapf->i_op->smap(swapf,block++))) {
 161                                         printk("rw_swap_page: bad swap file\n");
 162                                         return;
 163                                 }
 164                         }
 165                 }else{
 166                         int j;
 167                         unsigned int block = offset
 168                                 << (12 - swapf->i_sb->s_blocksize_bits);
 169 
 170                         for (i=0, j=0; j< PAGE_SIZE ; i++, j +=swapf->i_sb->s_blocksize)
 171                                 if (!(zones[i] = bmap(swapf,block++))) {
 172                                         printk("rw_swap_page: bad swap file\n");
 173                                         return;
 174                                 }
 175                 }
 176                 ll_rw_swap_file(rw,swapf->i_dev, zones, i,buf);
 177         } else
 178                 printk("re_swap_page: no swap file or device\n");
 179         if (offset && !clear_bit(offset,p->swap_lockmap))
 180                 printk("rw_swap_page: lock already cleared\n");
 181         wake_up(&lock_queue);
 182 }
 183 
 184 unsigned int get_swap_page(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 185 {
 186         struct swap_info_struct * p;
 187         unsigned int offset, type;
 188 
 189         p = swap_info;
 190         for (type = 0 ; type < nr_swapfiles ; type++,p++) {
 191                 if ((p->flags & SWP_WRITEOK) != SWP_WRITEOK)
 192                         continue;
 193                 for (offset = p->lowest_bit; offset <= p->highest_bit ; offset++) {
 194                         if (p->swap_map[offset])
 195                                 continue;
 196                         p->swap_map[offset] = 1;
 197                         nr_swap_pages--;
 198                         if (offset == p->highest_bit)
 199                                 p->highest_bit--;
 200                         p->lowest_bit = offset;
 201                         return SWP_ENTRY(type,offset);
 202                 }
 203         }
 204         return 0;
 205 }
 206 
 207 unsigned long swap_duplicate(unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 208 {
 209         struct swap_info_struct * p;
 210         unsigned long offset, type;
 211 
 212         if (!entry)
 213                 return 0;
 214         offset = SWP_OFFSET(entry);
 215         type = SWP_TYPE(entry);
 216         if (type == SHM_SWP_TYPE)
 217                 return entry;
 218         if (type >= nr_swapfiles) {
 219                 printk("Trying to duplicate nonexistent swap-page\n");
 220                 return 0;
 221         }
 222         p = type + swap_info;
 223         if (offset >= p->max) {
 224                 printk("swap_duplicate: weirdness\n");
 225                 return 0;
 226         }
 227         if (!p->swap_map[offset]) {
 228                 printk("swap_duplicate: trying to duplicate unused page\n");
 229                 return 0;
 230         }
 231         p->swap_map[offset]++;
 232         return entry;
 233 }
 234 
 235 void swap_free(unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 236 {
 237         struct swap_info_struct * p;
 238         unsigned long offset, type;
 239 
 240         if (!entry)
 241                 return;
 242         type = SWP_TYPE(entry);
 243         if (type == SHM_SWP_TYPE)
 244                 return;
 245         if (type >= nr_swapfiles) {
 246                 printk("Trying to free nonexistent swap-page\n");
 247                 return;
 248         }
 249         p = & swap_info[type];
 250         offset = SWP_OFFSET(entry);
 251         if (offset >= p->max) {
 252                 printk("swap_free: weirdness\n");
 253                 return;
 254         }
 255         if (!(p->flags & SWP_USED)) {
 256                 printk("Trying to free swap from unused swap-device\n");
 257                 return;
 258         }
 259         while (set_bit(offset,p->swap_lockmap))
 260                 sleep_on(&lock_queue);
 261         if (offset < p->lowest_bit)
 262                 p->lowest_bit = offset;
 263         if (offset > p->highest_bit)
 264                 p->highest_bit = offset;
 265         if (!p->swap_map[offset])
 266                 printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
 267         else
 268                 if (!--p->swap_map[offset])
 269                         nr_swap_pages++;
 270         if (!clear_bit(offset,p->swap_lockmap))
 271                 printk("swap_free: lock already cleared\n");
 272         wake_up(&lock_queue);
 273 }
 274 
 275 unsigned long swap_in(unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 276 {
 277         unsigned long page;
 278 
 279         if (!(page = get_free_page(GFP_KERNEL))) {
 280                 oom(current);
 281                 return BAD_PAGE;
 282         }
 283         read_swap_page(entry, (char *) page);
 284         if (add_to_swap_cache(page, entry))
 285                 return page | PAGE_PRESENT;
 286         swap_free(entry);
 287         return page | PAGE_DIRTY | PAGE_PRESENT;
 288 }
 289 
 290 static inline int try_to_swap_out(unsigned long * table_ptr)
     /* [previous][next][first][last][top][bottom][index][help] */
 291 {
 292         unsigned long page, entry;
 293 
 294         page = *table_ptr;
 295         if (!(PAGE_PRESENT & page))
 296                 return 0;
 297         if (page >= high_memory)
 298                 return 0;
 299         if (mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED)
 300                 return 0;
 301         
 302         if ((PAGE_DIRTY & page) && delete_from_swap_cache(page))  {
 303                 *table_ptr &= ~PAGE_ACCESSED;
 304                 return 0;
 305         }
 306         if (PAGE_ACCESSED & page) {
 307                 *table_ptr &= ~PAGE_ACCESSED;
 308                 return 0;
 309         }
 310         if (PAGE_DIRTY & page) {
 311                 page &= PAGE_MASK;
 312                 if (mem_map[MAP_NR(page)] != 1)
 313                         return 0;
 314                 if (!(entry = get_swap_page()))
 315                         return 0;
 316                 *table_ptr = entry;
 317                 invalidate();
 318                 write_swap_page(entry, (char *) page);
 319                 free_page(page);
 320                 return 1;
 321         }
 322         if ((entry = find_in_swap_cache(page)))  {
 323                 if (mem_map[MAP_NR(page)] != 1) {
 324                         *table_ptr |= PAGE_DIRTY;
 325                         printk("Aiee.. duplicated cached swap-cache entry\n");
 326                         return 0;
 327                 }
 328                 *table_ptr = entry;
 329                 invalidate();
 330                 free_page(page & PAGE_MASK);
 331                 return 1;
 332         } 
 333         page &= PAGE_MASK;
 334         *table_ptr = 0;
 335         invalidate();
 336         free_page(page);
 337         return 1 + mem_map[MAP_NR(page)];
 338 }
 339 
 340 /*
 341  * A new implementation of swap_out().  We do not swap complete processes,
 342  * but only a small number of blocks, before we continue with the next
 343  * process.  The number of blocks actually swapped is determined on the
 344  * number of page faults, that this process actually had in the last time,
 345  * so we won't swap heavily used processes all the time ...
 346  *
 347  * Note: the priority argument is a hint on much CPU to waste with the
 348  *       swap block search, not a hint, of how much blocks to swap with
 349  *       each process.
 350  *
 351  * (C) 1993 Kai Petzke, wpp@marie.physik.tu-berlin.de
 352  */
 353 
 354 /*
 355  * These are the minimum and maximum number of pages to swap from one process,
 356  * before proceeding to the next:
 357  */
 358 #define SWAP_MIN        4
 359 #define SWAP_MAX        32
 360 
 361 /*
 362  * The actual number of pages to swap is determined as:
 363  * SWAP_RATIO / (number of recent major page faults)
 364  */
 365 #define SWAP_RATIO      128
 366 
 367 static int swap_out_process(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 368 {
 369         unsigned long address;
 370         unsigned long offset;
 371         unsigned long *pgdir;
 372         unsigned long pg_table;
 373 
 374         /*
 375          * Go through process' page directory.
 376          */
 377         address = p->mm->swap_address;
 378         pgdir = (address >> PGDIR_SHIFT) + (unsigned long *) p->tss.cr3;
 379         offset = address & ~PGDIR_MASK;
 380         address &= PGDIR_MASK;
 381         for ( ; address < TASK_SIZE ;
 382         pgdir++, address = address + PGDIR_SIZE, offset = 0) {
 383                 pg_table = *pgdir;
 384                 if (pg_table >= high_memory)
 385                         continue;
 386                 if (mem_map[MAP_NR(pg_table)] & MAP_PAGE_RESERVED)
 387                         continue;
 388                 if (!(PAGE_PRESENT & pg_table)) {
 389                         printk("swap_out_process (%s): bad page-table at vm %08lx: %08lx\n",
 390                                         p->comm, address + offset, pg_table);
 391                         *pgdir = 0;
 392                         continue;
 393                 }
 394                 pg_table &= 0xfffff000;
 395 
 396                 /*
 397                  * Go through this page table.
 398                  */
 399                 for( ; offset < ~PGDIR_MASK ; offset += PAGE_SIZE) {
 400                         switch(try_to_swap_out((unsigned long *) (pg_table + (offset >> 10)))) {
 401                                 case 0:
 402                                         break;
 403 
 404                                 case 1:
 405                                         p->mm->rss--;
 406                                         /* continue with the following page the next time */
 407                                         p->mm->swap_address = address + offset + PAGE_SIZE;
 408                                         return 1;
 409 
 410                                 default:
 411                                         p->mm->rss--;
 412                                         break;
 413                         }
 414                 }
 415         }
 416         /*
 417          * Finish work with this process, if we reached the end of the page
 418          * directory.  Mark restart from the beginning the next time.
 419          */
 420         p->mm->swap_address = 0;
 421         return 0;
 422 }
 423 
 424 static int swap_out(unsigned int priority)
     /* [previous][next][first][last][top][bottom][index][help] */
 425 {
 426         static int swap_task;
 427         int loop;
 428         int counter = NR_TASKS * 2 >> priority;
 429         struct task_struct *p;
 430 
 431         counter = NR_TASKS * 2 >> priority;
 432         for(; counter >= 0; counter--, swap_task++) {
 433                 /*
 434                  * Check that swap_task is suitable for swapping.  If not, look for
 435                  * the next suitable process.
 436                  */
 437                 loop = 0;
 438                 while(1) {
 439                         if (swap_task >= NR_TASKS) {
 440                                 swap_task = 1;
 441                                 if (loop)
 442                                         /* all processes are unswappable or already swapped out */
 443                                         return 0;
 444                                 loop = 1;
 445                         }
 446 
 447                         p = task[swap_task];
 448                         if (p && p->mm->swappable && p->mm->rss)
 449                                 break;
 450 
 451                         swap_task++;
 452                 }
 453 
 454                 /*
 455                  * Determine the number of pages to swap from this process.
 456                  */
 457                 if (!p->mm->swap_cnt) {
 458                         p->mm->dec_flt = (p->mm->dec_flt * 3) / 4 + p->mm->maj_flt - p->mm->old_maj_flt;
 459                         p->mm->old_maj_flt = p->mm->maj_flt;
 460 
 461                         if (p->mm->dec_flt >= SWAP_RATIO / SWAP_MIN) {
 462                                 p->mm->dec_flt = SWAP_RATIO / SWAP_MIN;
 463                                 p->mm->swap_cnt = SWAP_MIN;
 464                         } else if (p->mm->dec_flt <= SWAP_RATIO / SWAP_MAX)
 465                                 p->mm->swap_cnt = SWAP_MAX;
 466                         else
 467                                 p->mm->swap_cnt = SWAP_RATIO / p->mm->dec_flt;
 468                 }
 469                 if (swap_out_process(p)) {
 470                         if ((--p->mm->swap_cnt) == 0)
 471                                 swap_task++;
 472                         return 1;
 473                 }
 474         }
 475         return 0;
 476 }
 477 
 478 static int try_to_free_page(int priority)
     /* [previous][next][first][last][top][bottom][index][help] */
 479 {
 480         int i=6;
 481 
 482         while (i--) {
 483                 if (priority != GFP_NOBUFFER && shrink_buffers(i))
 484                         return 1;
 485                 if (shm_swap(i))
 486                         return 1;
 487                 if (swap_out(i))
 488                         return 1;
 489         }
 490         return 0;
 491 }
 492 
 493 static inline void add_mem_queue(struct mem_list * head, struct mem_list * entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 494 {
 495         entry->prev = head;
 496         entry->next = head->next;
 497         entry->next->prev = entry;
 498         head->next = entry;
 499 }
 500 
 501 static inline void remove_mem_queue(struct mem_list * head, struct mem_list * entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 502 {
 503         entry->next->prev = entry->prev;
 504         entry->prev->next = entry->next;
 505 }
 506 
 507 /*
 508  * Free_page() adds the page to the free lists. This is optimized for
 509  * fast normal cases (no error jumps taken normally).
 510  *
 511  * The way to optimize jumps for gcc-2.2.2 is to:
 512  *  - select the "normal" case and put it inside the if () { XXX }
 513  *  - no else-statements if you can avoid them
 514  *
 515  * With the above two rules, you get a straight-line execution path
 516  * for the normal case, giving better asm-code.
 517  */
 518 
 519 /*
 520  * Buddy system. Hairy. You really aren't expected to understand this
 521  */
 522 static inline void free_pages_ok(unsigned long addr, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 523 {
 524         unsigned long index = addr >> (PAGE_SHIFT + 1 + order);
 525         unsigned long mask = PAGE_MASK << order;
 526 
 527         addr &= mask;
 528         nr_free_pages += 1 << order;
 529         while (order < NR_MEM_LISTS-1) {
 530                 if (!change_bit(index, free_area_map[order]))
 531                         break;
 532                 remove_mem_queue(free_area_list+order, (struct mem_list *) (addr ^ (1+~mask)));
 533                 order++;
 534                 index >>= 1;
 535                 mask <<= 1;
 536                 addr &= mask;
 537         }
 538         add_mem_queue(free_area_list+order, (struct mem_list *) addr);
 539 }
 540 
 541 static inline void check_free_buffers(unsigned long addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 542 {
 543         struct buffer_head * bh;
 544 
 545         bh = buffer_pages[MAP_NR(addr)];
 546         if (bh) {
 547                 struct buffer_head *tmp = bh;
 548                 do {
 549                         if (tmp->b_list == BUF_SHARED && tmp->b_dev != 0xffff)
 550                                 refile_buffer(tmp);
 551                         tmp = tmp->b_this_page;
 552                 } while (tmp != bh);
 553         }
 554 }
 555 
 556 void free_pages(unsigned long addr, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 557 {
 558         if (addr < high_memory) {
 559                 unsigned long flag;
 560                 unsigned short * map = mem_map + MAP_NR(addr);
 561                 if (*map) {
 562                         if (!(*map & MAP_PAGE_RESERVED)) {
 563                                 save_flags(flag);
 564                                 cli();
 565                                 if (!--*map)  {
 566                                         free_pages_ok(addr, order);
 567                                         delete_from_swap_cache(addr);
 568                                 }
 569                                 restore_flags(flag);
 570                                 if (*map == 1)
 571                                         check_free_buffers(addr);
 572                         }
 573                         return;
 574                 }
 575                 printk("Trying to free free memory (%08lx): memory probably corrupted\n",addr);
 576                 printk("PC = %08lx\n",*(((unsigned long *)&addr)-1));
 577                 return;
 578         }
 579 }
 580 
 581 /*
 582  * Some ugly macros to speed up __get_free_pages()..
 583  */
 584 #define RMQUEUE(order) \
 585 do { struct mem_list * queue = free_area_list+order; \
 586      unsigned long new_order = order; \
 587         do { struct mem_list *next = queue->next; \
 588                 if (queue != next) { \
 589                         queue->next = next->next; \
 590                         next->next->prev = queue; \
 591                         mark_used((unsigned long) next, new_order); \
 592                         nr_free_pages -= 1 << order; \
 593                         restore_flags(flags); \
 594                         EXPAND(next, order, new_order); \
 595                         return (unsigned long) next; \
 596                 } new_order++; queue++; \
 597         } while (new_order < NR_MEM_LISTS); \
 598 } while (0)
 599 
 600 static inline int mark_used(unsigned long addr, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 601 {
 602         return change_bit(addr >> (PAGE_SHIFT+1+order), free_area_map[order]);
 603 }
 604 
 605 #define EXPAND(addr,low,high) \
 606 do { unsigned long size = PAGE_SIZE << high; \
 607         while (high > low) { \
 608                 high--; size >>= 1; cli(); \
 609                 add_mem_queue(free_area_list+high, addr); \
 610                 mark_used((unsigned long) addr, high); \
 611                 restore_flags(flags); \
 612                 addr = (struct mem_list *) (size + (unsigned long) addr); \
 613         } mem_map[MAP_NR((unsigned long) addr)] = 1; \
 614 } while (0)
 615 
 616 unsigned long __get_free_pages(int priority, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 617 {
 618         unsigned long flags;
 619         int reserved_pages;
 620 
 621         if (intr_count && priority != GFP_ATOMIC) {
 622                 static int count = 0;
 623                 if (++count < 5) {
 624                         printk("gfp called nonatomically from interrupt %p\n",
 625                                 __builtin_return_address(0));
 626                         priority = GFP_ATOMIC;
 627                 }
 628         }
 629         reserved_pages = 5;
 630         if (priority != GFP_NFS)
 631                 reserved_pages = min_free_pages;
 632         save_flags(flags);
 633 repeat:
 634         cli();
 635         if ((priority==GFP_ATOMIC) || nr_free_pages > reserved_pages) {
 636                 RMQUEUE(order);
 637                 restore_flags(flags);
 638                 return 0;
 639         }
 640         restore_flags(flags);
 641         if (priority != GFP_BUFFER && try_to_free_page(priority))
 642                 goto repeat;
 643         return 0;
 644 }
 645 
 646 /*
 647  * Yes, I know this is ugly. Don't tell me.
 648  */
 649 unsigned long __get_dma_pages(int priority, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 650 {
 651         unsigned long list = 0; 
 652         unsigned long result;
 653         unsigned long limit = 16*1024*1024;
 654         
 655         /* if (EISA_bus) limit = ~0UL; */
 656         if (priority != GFP_ATOMIC)
 657                 priority = GFP_BUFFER;
 658         for (;;) {
 659                 result = __get_free_pages(priority, order);
 660                 if (result < limit) /* covers failure as well */
 661                         break;
 662                 *(unsigned long *) result = list;
 663                 list = result;
 664         }
 665         while (list) {
 666                 unsigned long tmp = list;
 667                 list = *(unsigned long *) list;
 668                 free_pages(tmp, order);
 669         }
 670         return result;
 671 }
 672 
 673 /*
 674  * Show free area list (used inside shift_scroll-lock stuff)
 675  * We also calculate the percentage fragmentation. We do this by counting the
 676  * memory on each free list with the exception of the first item on the list.
 677  */
 678 void show_free_areas(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 679 {
 680         unsigned long order, flags;
 681         unsigned long total = 0;
 682 
 683         printk("Free pages:      %6dkB\n ( ",nr_free_pages<<(PAGE_SHIFT-10));
 684         save_flags(flags);
 685         cli();
 686         for (order=0 ; order < NR_MEM_LISTS; order++) {
 687                 struct mem_list * tmp;
 688                 unsigned long nr = 0;
 689                 for (tmp = free_area_list[order].next ; tmp != free_area_list + order ; tmp = tmp->next) {
 690                         nr ++;
 691                 }
 692                 total += nr * (4 << order);
 693                 printk("%lu*%ukB ", nr, 4 << order);
 694         }
 695         restore_flags(flags);
 696         printk("= %lukB)\n", total);
 697 #ifdef SWAP_CACHE_INFO
 698         show_swap_cache_info();
 699 #endif  
 700 }
 701 
 702 /*
 703  * Trying to stop swapping from a file is fraught with races, so
 704  * we repeat quite a bit here when we have to pause. swapoff()
 705  * isn't exactly timing-critical, so who cares?
 706  */
 707 static int try_to_unuse(unsigned int type)
     /* [previous][next][first][last][top][bottom][index][help] */
 708 {
 709         int nr, pgt, pg;
 710         unsigned long page, *ppage;
 711         unsigned long tmp = 0;
 712         struct task_struct *p;
 713 
 714         nr = 0;
 715         
 716 /*
 717  * When we have to sleep, we restart the whole algorithm from the same
 718  * task we stopped in. That at least rids us of all races.
 719  */
 720 repeat:
 721         for (; nr < NR_TASKS ; nr++) {
 722                 p = task[nr];
 723                 if (!p)
 724                         continue;
 725                 for (pgt = 0 ; pgt < PTRS_PER_PAGE ; pgt++) {
 726                         ppage = pgt + ((unsigned long *) p->tss.cr3);
 727                         page = *ppage;
 728                         if (!page)
 729                                 continue;
 730                         if (!(page & PAGE_PRESENT) || (page >= high_memory))
 731                                 continue;
 732                         if (mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED)
 733                                 continue;
 734                         ppage = (unsigned long *) (page & PAGE_MASK);   
 735                         for (pg = 0 ; pg < PTRS_PER_PAGE ; pg++,ppage++) {
 736                                 page = *ppage;
 737                                 if (!page)
 738                                         continue;
 739                                 if (page & PAGE_PRESENT) {
 740                                         if (!(page = in_swap_cache(page)))
 741                                                 continue;
 742                                         if (SWP_TYPE(page) != type)
 743                                                 continue;
 744                                         *ppage |= PAGE_DIRTY;
 745                                         delete_from_swap_cache(*ppage);
 746                                         continue;
 747                                 }
 748                                 if (SWP_TYPE(page) != type)
 749                                         continue;
 750                                 if (!tmp) {
 751                                         if (!(tmp = __get_free_page(GFP_KERNEL)))
 752                                                 return -ENOMEM;
 753                                         goto repeat;
 754                                 }
 755                                 read_swap_page(page, (char *) tmp);
 756                                 if (*ppage == page) {
 757                                         *ppage = tmp | (PAGE_DIRTY | PAGE_PRIVATE);
 758                                         ++p->mm->rss;
 759                                         swap_free(page);
 760                                         tmp = 0;
 761                                 }
 762                                 goto repeat;
 763                         }
 764                 }
 765         }
 766         free_page(tmp);
 767         return 0;
 768 }
 769 
 770 asmlinkage int sys_swapoff(const char * specialfile)
     /* [previous][next][first][last][top][bottom][index][help] */
 771 {
 772         struct swap_info_struct * p;
 773         struct inode * inode;
 774         unsigned int type;
 775         int i;
 776 
 777         if (!suser())
 778                 return -EPERM;
 779         i = namei(specialfile,&inode);
 780         if (i)
 781                 return i;
 782         p = swap_info;
 783         for (type = 0 ; type < nr_swapfiles ; type++,p++) {
 784                 if ((p->flags & SWP_WRITEOK) != SWP_WRITEOK)
 785                         continue;
 786                 if (p->swap_file) {
 787                         if (p->swap_file == inode)
 788                                 break;
 789                 } else {
 790                         if (!S_ISBLK(inode->i_mode))
 791                                 continue;
 792                         if (p->swap_device == inode->i_rdev)
 793                                 break;
 794                 }
 795         }
 796         iput(inode);
 797         if (type >= nr_swapfiles)
 798                 return -EINVAL;
 799         p->flags = SWP_USED;
 800         i = try_to_unuse(type);
 801         if (i) {
 802                 p->flags = SWP_WRITEOK;
 803                 return i;
 804         }
 805         nr_swap_pages -= p->pages;
 806         iput(p->swap_file);
 807         p->swap_file = NULL;
 808         p->swap_device = 0;
 809         vfree(p->swap_map);
 810         p->swap_map = NULL;
 811         free_page((long) p->swap_lockmap);
 812         p->swap_lockmap = NULL;
 813         p->flags = 0;
 814         return 0;
 815 }
 816 
 817 /*
 818  * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
 819  *
 820  * The swapon system call
 821  */
 822 asmlinkage int sys_swapon(const char * specialfile)
     /* [previous][next][first][last][top][bottom][index][help] */
 823 {
 824         struct swap_info_struct * p;
 825         struct inode * swap_inode;
 826         unsigned int type;
 827         int i,j;
 828         int error;
 829 
 830         if (!suser())
 831                 return -EPERM;
 832         p = swap_info;
 833         for (type = 0 ; type < nr_swapfiles ; type++,p++)
 834                 if (!(p->flags & SWP_USED))
 835                         break;
 836         if (type >= MAX_SWAPFILES)
 837                 return -EPERM;
 838         if (type >= nr_swapfiles)
 839                 nr_swapfiles = type+1;
 840         p->flags = SWP_USED;
 841         p->swap_file = NULL;
 842         p->swap_device = 0;
 843         p->swap_map = NULL;
 844         p->swap_lockmap = NULL;
 845         p->lowest_bit = 0;
 846         p->highest_bit = 0;
 847         p->max = 1;
 848         error = namei(specialfile,&swap_inode);
 849         if (error)
 850                 goto bad_swap;
 851         p->swap_file = swap_inode;
 852         error = -EBUSY;
 853         if (swap_inode->i_count != 1)
 854                 goto bad_swap;
 855         error = -EINVAL;
 856         if (S_ISBLK(swap_inode->i_mode)) {
 857                 p->swap_device = swap_inode->i_rdev;
 858                 p->swap_file = NULL;
 859                 iput(swap_inode);
 860                 error = -ENODEV;
 861                 if (!p->swap_device)
 862                         goto bad_swap;
 863                 error = -EBUSY;
 864                 for (i = 0 ; i < nr_swapfiles ; i++) {
 865                         if (i == type)
 866                                 continue;
 867                         if (p->swap_device == swap_info[i].swap_device)
 868                                 goto bad_swap;
 869                 }
 870         } else if (!S_ISREG(swap_inode->i_mode))
 871                 goto bad_swap;
 872         p->swap_lockmap = (unsigned char *) get_free_page(GFP_USER);
 873         if (!p->swap_lockmap) {
 874                 printk("Unable to start swapping: out of memory :-)\n");
 875                 error = -ENOMEM;
 876                 goto bad_swap;
 877         }
 878         read_swap_page(SWP_ENTRY(type,0), (char *) p->swap_lockmap);
 879         if (memcmp("SWAP-SPACE",p->swap_lockmap+4086,10)) {
 880                 printk("Unable to find swap-space signature\n");
 881                 error = -EINVAL;
 882                 goto bad_swap;
 883         }
 884         memset(p->swap_lockmap+PAGE_SIZE-10,0,10);
 885         j = 0;
 886         p->lowest_bit = 0;
 887         p->highest_bit = 0;
 888         for (i = 1 ; i < 8*PAGE_SIZE ; i++) {
 889                 if (test_bit(i,p->swap_lockmap)) {
 890                         if (!p->lowest_bit)
 891                                 p->lowest_bit = i;
 892                         p->highest_bit = i;
 893                         p->max = i+1;
 894                         j++;
 895                 }
 896         }
 897         if (!j) {
 898                 printk("Empty swap-file\n");
 899                 error = -EINVAL;
 900                 goto bad_swap;
 901         }
 902         p->swap_map = (unsigned char *) vmalloc(p->max);
 903         if (!p->swap_map) {
 904                 error = -ENOMEM;
 905                 goto bad_swap;
 906         }
 907         for (i = 1 ; i < p->max ; i++) {
 908                 if (test_bit(i,p->swap_lockmap))
 909                         p->swap_map[i] = 0;
 910                 else
 911                         p->swap_map[i] = 0x80;
 912         }
 913         p->swap_map[0] = 0x80;
 914         memset(p->swap_lockmap,0,PAGE_SIZE);
 915         p->flags = SWP_WRITEOK;
 916         p->pages = j;
 917         nr_swap_pages += j;
 918         printk("Adding Swap: %dk swap-space\n",j<<2);
 919         return 0;
 920 bad_swap:
 921         free_page((long) p->swap_lockmap);
 922         vfree(p->swap_map);
 923         iput(p->swap_file);
 924         p->swap_device = 0;
 925         p->swap_file = NULL;
 926         p->swap_map = NULL;
 927         p->swap_lockmap = NULL;
 928         p->flags = 0;
 929         return error;
 930 }
 931 
 932 void si_swapinfo(struct sysinfo *val)
     /* [previous][next][first][last][top][bottom][index][help] */
 933 {
 934         unsigned int i, j;
 935 
 936         val->freeswap = val->totalswap = 0;
 937         for (i = 0; i < nr_swapfiles; i++) {
 938                 if ((swap_info[i].flags & SWP_WRITEOK) != SWP_WRITEOK)
 939                         continue;
 940                 for (j = 0; j < swap_info[i].max; ++j)
 941                         switch (swap_info[i].swap_map[j]) {
 942                                 case 128:
 943                                         continue;
 944                                 case 0:
 945                                         ++val->freeswap;
 946                                 default:
 947                                         ++val->totalswap;
 948                         }
 949         }
 950         val->freeswap <<= PAGE_SHIFT;
 951         val->totalswap <<= PAGE_SHIFT;
 952         return;
 953 }
 954 
 955 /*
 956  * set up the free-area data structures:
 957  *   - mark all pages MAP_PAGE_RESERVED
 958  *   - mark all memory queues empty
 959  *   - clear the memory bitmaps
 960  */
 961 unsigned long free_area_init(unsigned long start_mem, unsigned long end_mem)
     /* [previous][next][first][last][top][bottom][index][help] */
 962 {
 963         unsigned short * p;
 964         unsigned long mask = PAGE_MASK;
 965         int i;
 966 
 967         /*
 968          * select nr of pages we try to keep free for important stuff
 969          * with a minimum of 16 pages. This is totally arbitrary
 970          */
 971         i = end_mem >> (PAGE_SHIFT+6);
 972         if (i < 16)
 973                 i = 16;
 974         min_free_pages = i;
 975         start_mem = init_swap_cache(start_mem, end_mem);
 976         mem_map = (unsigned short *) start_mem;
 977         p = mem_map + MAP_NR(end_mem);
 978         start_mem = (unsigned long) p;
 979         while (p > mem_map)
 980                 *--p = MAP_PAGE_RESERVED;
 981 
 982         for (i = 0 ; i < NR_MEM_LISTS ; i++, mask <<= 1) {
 983                 unsigned long bitmap_size;
 984                 free_area_list[i].prev = free_area_list[i].next = &free_area_list[i];
 985                 end_mem = (end_mem + ~mask) & mask;
 986                 bitmap_size = end_mem >> (PAGE_SHIFT + i);
 987                 bitmap_size = (bitmap_size + 7) >> 3;
 988                 free_area_map[i] = (unsigned char *) start_mem;
 989                 memset((void *) start_mem, 0, bitmap_size);
 990                 start_mem += bitmap_size;
 991         }
 992         return start_mem;
 993 }

/* [previous][next][first][last][top][bottom][index][help] */