root/mm/swap.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. show_swap_cache_info
  2. add_to_swap_cache
  3. init_swap_cache
  4. rw_swap_page
  5. get_swap_page
  6. swap_duplicate
  7. swap_free
  8. swap_in
  9. try_to_swap_out
  10. swap_out_process
  11. swap_out
  12. try_to_free_page
  13. add_mem_queue
  14. remove_mem_queue
  15. free_pages_ok
  16. check_free_buffers
  17. free_pages
  18. mark_used
  19. __get_free_pages
  20. __get_dma_pages
  21. show_free_areas
  22. try_to_unuse
  23. sys_swapoff
  24. sys_swapon
  25. si_swapinfo
  26. free_area_init

   1 /*
   2  *  linux/mm/swap.c
   3  *
   4  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   5  */
   6 
   7 /*
   8  * This file should contain most things doing the swapping from/to disk.
   9  * Started 18.12.91
  10  */
  11 
  12 #include <linux/mm.h>
  13 #include <linux/sched.h>
  14 #include <linux/head.h>
  15 #include <linux/kernel.h>
  16 #include <linux/kernel_stat.h>
  17 #include <linux/errno.h>
  18 #include <linux/string.h>
  19 #include <linux/stat.h>
  20 #include <linux/fs.h>
  21 
  22 #include <asm/dma.h>
  23 #include <asm/system.h> /* for cli()/sti() */
  24 #include <asm/bitops.h>
  25 
  26 #define MAX_SWAPFILES 8
  27 
  28 #define SWP_USED        1
  29 #define SWP_WRITEOK     3
  30 
  31 #define SWP_TYPE(entry) (((entry) & 0xfe) >> 1)
  32 #define SWP_OFFSET(entry) ((entry) >> PAGE_SHIFT)
  33 #define SWP_ENTRY(type,offset) (((type) << 1) | ((offset) << PAGE_SHIFT))
  34 
  35 int min_free_pages = 20;
  36 
  37 static int nr_swapfiles = 0;
  38 static struct wait_queue * lock_queue = NULL;
  39 
  40 static struct swap_info_struct {
  41         unsigned long flags;
  42         struct inode * swap_file;
  43         unsigned int swap_device;
  44         unsigned char * swap_map;
  45         unsigned char * swap_lockmap;
  46         int pages;
  47         int lowest_bit;
  48         int highest_bit;
  49         unsigned long max;
  50 } swap_info[MAX_SWAPFILES];
  51 
  52 extern int shm_swap (int);
  53 
  54 unsigned long *swap_cache;
  55 
  56 #ifdef SWAP_CACHE_INFO
  57 unsigned long swap_cache_add_total = 0;
  58 unsigned long swap_cache_add_success = 0;
  59 unsigned long swap_cache_del_total = 0;
  60 unsigned long swap_cache_del_success = 0;
  61 unsigned long swap_cache_find_total = 0;
  62 unsigned long swap_cache_find_success = 0;
  63 
  64 extern inline void show_swap_cache_info(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  65 {
  66         printk("Swap cache: add %ld/%ld, delete %ld/%ld, find %ld/%ld\n",
  67                 swap_cache_add_total, swap_cache_add_success, 
  68                 swap_cache_del_total, swap_cache_del_success,
  69                 swap_cache_find_total, swap_cache_find_success);
  70 }
  71 #endif
  72 
  73 extern inline int add_to_swap_cache(unsigned long addr, unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
  74 {
  75         struct swap_info_struct * p = &swap_info[SWP_TYPE(entry)];
  76         
  77 #ifdef SWAP_CACHE_INFO
  78         swap_cache_add_total++;
  79 #endif
  80         if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
  81                 entry = (unsigned long) xchg_ptr(swap_cache + MAP_NR(addr), (void *) entry);
  82                 if (entry)  {
  83                         printk("swap_cache: replacing non-NULL entry\n");
  84                 }
  85 #ifdef SWAP_CACHE_INFO
  86                 swap_cache_add_success++;
  87 #endif
  88                 return 1;
  89         }
  90         return 0;
  91 }
  92 
  93 static unsigned long init_swap_cache(unsigned long mem_start,
     /* [previous][next][first][last][top][bottom][index][help] */
  94         unsigned long mem_end)
  95 {
  96         unsigned long swap_cache_size;
  97 
  98         mem_start = (mem_start + 15) & ~15;
  99         swap_cache = (unsigned long *) mem_start;
 100         swap_cache_size = MAP_NR(mem_end);
 101         memset(swap_cache, 0, swap_cache_size * sizeof (unsigned long));
 102         return (unsigned long) (swap_cache + swap_cache_size);
 103 }
 104 
 105 void rw_swap_page(int rw, unsigned long entry, char * buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 106 {
 107         unsigned long type, offset;
 108         struct swap_info_struct * p;
 109 
 110         type = SWP_TYPE(entry);
 111         if (type >= nr_swapfiles) {
 112                 printk("Internal error: bad swap-device\n");
 113                 return;
 114         }
 115         p = &swap_info[type];
 116         offset = SWP_OFFSET(entry);
 117         if (offset >= p->max) {
 118                 printk("rw_swap_page: weirdness\n");
 119                 return;
 120         }
 121         if (!(p->flags & SWP_USED)) {
 122                 printk("Trying to swap to unused swap-device\n");
 123                 return;
 124         }
 125         while (set_bit(offset,p->swap_lockmap))
 126                 sleep_on(&lock_queue);
 127         if (rw == READ)
 128                 kstat.pswpin++;
 129         else
 130                 kstat.pswpout++;
 131         if (p->swap_device) {
 132                 ll_rw_page(rw,p->swap_device,offset,buf);
 133         } else if (p->swap_file) {
 134                 struct inode *swapf = p->swap_file;
 135                 unsigned int zones[8];
 136                 int i;
 137                 if (swapf->i_op->bmap == NULL
 138                         && swapf->i_op->smap != NULL){
 139                         /*
 140                                 With MsDOS, we use msdos_smap which return
 141                                 a sector number (not a cluster or block number).
 142                                 It is a patch to enable the UMSDOS project.
 143                                 Other people are working on better solution.
 144 
 145                                 It sounds like ll_rw_swap_file defined
 146                                 it operation size (sector size) based on
 147                                 PAGE_SIZE and the number of block to read.
 148                                 So using bmap or smap should work even if
 149                                 smap will require more blocks.
 150                         */
 151                         int j;
 152                         unsigned int block = offset << 3;
 153 
 154                         for (i=0, j=0; j< PAGE_SIZE ; i++, j += 512){
 155                                 if (!(zones[i] = swapf->i_op->smap(swapf,block++))) {
 156                                         printk("rw_swap_page: bad swap file\n");
 157                                         return;
 158                                 }
 159                         }
 160                 }else{
 161                         int j;
 162                         unsigned int block = offset
 163                                 << (12 - swapf->i_sb->s_blocksize_bits);
 164 
 165                         for (i=0, j=0; j< PAGE_SIZE ; i++, j +=swapf->i_sb->s_blocksize)
 166                                 if (!(zones[i] = bmap(swapf,block++))) {
 167                                         printk("rw_swap_page: bad swap file\n");
 168                                         return;
 169                                 }
 170                 }
 171                 ll_rw_swap_file(rw,swapf->i_dev, zones, i,buf);
 172         } else
 173                 printk("re_swap_page: no swap file or device\n");
 174         if (offset && !clear_bit(offset,p->swap_lockmap))
 175                 printk("rw_swap_page: lock already cleared\n");
 176         wake_up(&lock_queue);
 177 }
 178 
 179 unsigned int get_swap_page(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 180 {
 181         struct swap_info_struct * p;
 182         unsigned int offset, type;
 183 
 184         p = swap_info;
 185         for (type = 0 ; type < nr_swapfiles ; type++,p++) {
 186                 if ((p->flags & SWP_WRITEOK) != SWP_WRITEOK)
 187                         continue;
 188                 for (offset = p->lowest_bit; offset <= p->highest_bit ; offset++) {
 189                         if (p->swap_map[offset])
 190                                 continue;
 191                         p->swap_map[offset] = 1;
 192                         nr_swap_pages--;
 193                         if (offset == p->highest_bit)
 194                                 p->highest_bit--;
 195                         p->lowest_bit = offset;
 196                         return SWP_ENTRY(type,offset);
 197                 }
 198         }
 199         return 0;
 200 }
 201 
 202 unsigned long swap_duplicate(unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 203 {
 204         struct swap_info_struct * p;
 205         unsigned long offset, type;
 206 
 207         if (!entry)
 208                 return 0;
 209         offset = SWP_OFFSET(entry);
 210         type = SWP_TYPE(entry);
 211         if (type == SHM_SWP_TYPE)
 212                 return entry;
 213         if (type >= nr_swapfiles) {
 214                 printk("Trying to duplicate nonexistent swap-page\n");
 215                 return 0;
 216         }
 217         p = type + swap_info;
 218         if (offset >= p->max) {
 219                 printk("swap_duplicate: weirdness\n");
 220                 return 0;
 221         }
 222         if (!p->swap_map[offset]) {
 223                 printk("swap_duplicate: trying to duplicate unused page\n");
 224                 return 0;
 225         }
 226         p->swap_map[offset]++;
 227         return entry;
 228 }
 229 
 230 void swap_free(unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 231 {
 232         struct swap_info_struct * p;
 233         unsigned long offset, type;
 234 
 235         if (!entry)
 236                 return;
 237         type = SWP_TYPE(entry);
 238         if (type == SHM_SWP_TYPE)
 239                 return;
 240         if (type >= nr_swapfiles) {
 241                 printk("Trying to free nonexistent swap-page\n");
 242                 return;
 243         }
 244         p = & swap_info[type];
 245         offset = SWP_OFFSET(entry);
 246         if (offset >= p->max) {
 247                 printk("swap_free: weirdness\n");
 248                 return;
 249         }
 250         if (!(p->flags & SWP_USED)) {
 251                 printk("Trying to free swap from unused swap-device\n");
 252                 return;
 253         }
 254         while (set_bit(offset,p->swap_lockmap))
 255                 sleep_on(&lock_queue);
 256         if (offset < p->lowest_bit)
 257                 p->lowest_bit = offset;
 258         if (offset > p->highest_bit)
 259                 p->highest_bit = offset;
 260         if (!p->swap_map[offset])
 261                 printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
 262         else
 263                 if (!--p->swap_map[offset])
 264                         nr_swap_pages++;
 265         if (!clear_bit(offset,p->swap_lockmap))
 266                 printk("swap_free: lock already cleared\n");
 267         wake_up(&lock_queue);
 268 }
 269 
 270 unsigned long swap_in(unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 271 {
 272         unsigned long page;
 273 
 274         if (!(page = get_free_page(GFP_KERNEL))) {
 275                 oom(current);
 276                 return BAD_PAGE;
 277         }
 278         read_swap_page(entry, (char *) page);
 279         if (add_to_swap_cache(page, entry))
 280                 return page | PAGE_PRESENT;
 281         swap_free(entry);
 282         return page | PAGE_DIRTY | PAGE_PRESENT;
 283 }
 284 
 285 static inline int try_to_swap_out(struct vm_area_struct* vma, unsigned offset, unsigned long * table_ptr)
     /* [previous][next][first][last][top][bottom][index][help] */
 286 {
 287         unsigned long page, entry;
 288 
 289         page = *table_ptr;
 290         if (!(PAGE_PRESENT & page))
 291                 return 0;
 292         if (page >= high_memory)
 293                 return 0;
 294         if (mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED)
 295                 return 0;
 296         
 297         if ((PAGE_DIRTY & page) && delete_from_swap_cache(page))  {
 298                 *table_ptr &= ~PAGE_ACCESSED;
 299                 return 0;
 300         }
 301         if (PAGE_ACCESSED & page) {
 302                 *table_ptr &= ~PAGE_ACCESSED;
 303                 return 0;
 304         }
 305         if (PAGE_DIRTY & page) {
 306                 page &= PAGE_MASK;
 307                 if (mem_map[MAP_NR(page)] != 1)
 308                         return 0;
 309                 if (vma->vm_ops && vma->vm_ops->swapout)
 310                         vma->vm_ops->swapout(vma, offset, table_ptr);
 311                 else
 312                 {
 313                         if (!(entry = get_swap_page()))
 314                                 return 0;
 315                         *table_ptr = entry;
 316                         invalidate();
 317                         write_swap_page(entry, (char *) page);
 318                 }
 319                 free_page(page);
 320                 return 1 + mem_map[MAP_NR(page)];
 321         }
 322         if ((entry = find_in_swap_cache(page)))  {
 323                 if (mem_map[MAP_NR(page)] != 1) {
 324                         *table_ptr |= PAGE_DIRTY;
 325                         printk("Aiee.. duplicated cached swap-cache entry\n");
 326                         return 0;
 327                 }
 328                 *table_ptr = entry;
 329                 invalidate();
 330                 free_page(page & PAGE_MASK);
 331                 return 1;
 332         } 
 333         page &= PAGE_MASK;
 334         *table_ptr = 0;
 335         invalidate();
 336         free_page(page);
 337         return 1 + mem_map[MAP_NR(page)];
 338 }
 339 
 340 /*
 341  * A new implementation of swap_out().  We do not swap complete processes,
 342  * but only a small number of blocks, before we continue with the next
 343  * process.  The number of blocks actually swapped is determined on the
 344  * number of page faults, that this process actually had in the last time,
 345  * so we won't swap heavily used processes all the time ...
 346  *
 347  * Note: the priority argument is a hint on much CPU to waste with the
 348  *       swap block search, not a hint, of how much blocks to swap with
 349  *       each process.
 350  *
 351  * (C) 1993 Kai Petzke, wpp@marie.physik.tu-berlin.de
 352  */
 353 
 354 /*
 355  * These are the minimum and maximum number of pages to swap from one process,
 356  * before proceeding to the next:
 357  */
 358 #define SWAP_MIN        4
 359 #define SWAP_MAX        32
 360 
 361 /*
 362  * The actual number of pages to swap is determined as:
 363  * SWAP_RATIO / (number of recent major page faults)
 364  */
 365 #define SWAP_RATIO      128
 366 
 367 static int swap_out_process(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 368 {
 369         unsigned long address;
 370         unsigned long offset;
 371         unsigned long *pgdir;
 372         unsigned long pg_table;
 373         struct vm_area_struct* vma;
 374 
 375         /*
 376          * Go through process' page directory.
 377          */
 378         address = p->mm->swap_address;
 379         p->mm->swap_address = 0;
 380 
 381         /*
 382          * Find the proper vm-area
 383          */
 384         vma = p->mm->mmap;
 385         for (;;) {
 386                 if (!vma)
 387                         return 0;
 388                 if (address <= vma->vm_end)
 389                         break;
 390                 vma = vma->vm_next;
 391         }
 392         if (address < vma->vm_start)
 393                 address = vma->vm_start;
 394 
 395         pgdir = PAGE_DIR_OFFSET(p, address);
 396         offset = address & ~PGDIR_MASK;
 397         address &= PGDIR_MASK;
 398         for ( ; address < TASK_SIZE ;
 399         pgdir++, address = address + PGDIR_SIZE, offset = 0) {
 400                 pg_table = *pgdir;
 401                 if (pg_table >= high_memory)
 402                         continue;
 403                 if (mem_map[MAP_NR(pg_table)] & MAP_PAGE_RESERVED)
 404                         continue;
 405                 if (!(PAGE_PRESENT & pg_table)) {
 406                         printk("swap_out_process (%s): bad page-table at vm %08lx: %08lx\n",
 407                                         p->comm, address + offset, pg_table);
 408                         *pgdir = 0;
 409                         continue;
 410                 }
 411                 pg_table &= 0xfffff000;
 412 
 413                 /*
 414                  * Go through this page table.
 415                  */
 416                 for( ; offset < ~PGDIR_MASK ; offset += PAGE_SIZE) {
 417                         /*
 418                          * Update vma again..
 419                          */
 420                         for (;;) {
 421                                 if (address+offset < vma->vm_end)
 422                                         break;
 423                                 vma = vma->vm_next;
 424                                 if (!vma)
 425                                         return 0;
 426                         }
 427 
 428                         switch(try_to_swap_out(vma, offset+address-vma->vm_start, (unsigned long *) (pg_table + (offset >> 10)))) {
 429                                 case 0:
 430                                         break;
 431 
 432                                 case 1:
 433                                         p->mm->rss--;
 434                                         /* continue with the following page the next time */
 435                                         p->mm->swap_address = address + offset + PAGE_SIZE;
 436                                         return 1;
 437 
 438                                 default:
 439                                         p->mm->rss--;
 440                                         break;
 441                         }
 442                 }
 443         }
 444         /*
 445          * Finish work with this process, if we reached the end of the page
 446          * directory.
 447          */
 448         return 0;
 449 }
 450 
 451 static int swap_out(unsigned int priority)
     /* [previous][next][first][last][top][bottom][index][help] */
 452 {
 453         static int swap_task;
 454         int loop;
 455         int counter = NR_TASKS * 2 >> priority;
 456         struct task_struct *p;
 457 
 458         counter = NR_TASKS * 2 >> priority;
 459         for(; counter >= 0; counter--, swap_task++) {
 460                 /*
 461                  * Check that swap_task is suitable for swapping.  If not, look for
 462                  * the next suitable process.
 463                  */
 464                 loop = 0;
 465                 while(1) {
 466                         if (swap_task >= NR_TASKS) {
 467                                 swap_task = 1;
 468                                 if (loop)
 469                                         /* all processes are unswappable or already swapped out */
 470                                         return 0;
 471                                 loop = 1;
 472                         }
 473 
 474                         p = task[swap_task];
 475                         if (p && p->mm->swappable && p->mm->rss)
 476                                 break;
 477 
 478                         swap_task++;
 479                 }
 480 
 481                 /*
 482                  * Determine the number of pages to swap from this process.
 483                  */
 484                 if (!p->mm->swap_cnt) {
 485                         p->mm->dec_flt = (p->mm->dec_flt * 3) / 4 + p->mm->maj_flt - p->mm->old_maj_flt;
 486                         p->mm->old_maj_flt = p->mm->maj_flt;
 487 
 488                         if (p->mm->dec_flt >= SWAP_RATIO / SWAP_MIN) {
 489                                 p->mm->dec_flt = SWAP_RATIO / SWAP_MIN;
 490                                 p->mm->swap_cnt = SWAP_MIN;
 491                         } else if (p->mm->dec_flt <= SWAP_RATIO / SWAP_MAX)
 492                                 p->mm->swap_cnt = SWAP_MAX;
 493                         else
 494                                 p->mm->swap_cnt = SWAP_RATIO / p->mm->dec_flt;
 495                 }
 496                 if (swap_out_process(p)) {
 497                         if ((--p->mm->swap_cnt) == 0)
 498                                 swap_task++;
 499                         return 1;
 500                 }
 501         }
 502         return 0;
 503 }
 504 
 505 static int try_to_free_page(int priority)
     /* [previous][next][first][last][top][bottom][index][help] */
 506 {
 507         int i=6;
 508 
 509         while (i--) {
 510                 if (priority != GFP_NOBUFFER && shrink_buffers(i))
 511                         return 1;
 512                 if (shm_swap(i))
 513                         return 1;
 514                 if (swap_out(i))
 515                         return 1;
 516         }
 517         return 0;
 518 }
 519 
 520 static inline void add_mem_queue(struct mem_list * head, struct mem_list * entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 521 {
 522         entry->prev = head;
 523         (entry->next = head->next)->prev = entry;
 524         head->next = entry;
 525 }
 526 
 527 static inline void remove_mem_queue(struct mem_list * head, struct mem_list * entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 528 {
 529         entry->next->prev = entry->prev;
 530         entry->prev->next = entry->next;
 531 }
 532 
 533 /*
 534  * Free_page() adds the page to the free lists. This is optimized for
 535  * fast normal cases (no error jumps taken normally).
 536  *
 537  * The way to optimize jumps for gcc-2.2.2 is to:
 538  *  - select the "normal" case and put it inside the if () { XXX }
 539  *  - no else-statements if you can avoid them
 540  *
 541  * With the above two rules, you get a straight-line execution path
 542  * for the normal case, giving better asm-code.
 543  */
 544 
 545 /*
 546  * Buddy system. Hairy. You really aren't expected to understand this
 547  */
 548 static inline void free_pages_ok(unsigned long addr, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 549 {
 550         unsigned long index = MAP_NR(addr) >> (1 + order);
 551         unsigned long mask = PAGE_MASK << order;
 552 
 553         addr &= mask;
 554         nr_free_pages += 1 << order;
 555         while (order < NR_MEM_LISTS-1) {
 556                 if (!change_bit(index, free_area_map[order]))
 557                         break;
 558                 remove_mem_queue(free_area_list+order, (struct mem_list *) (addr ^ (1+~mask)));
 559                 order++;
 560                 index >>= 1;
 561                 mask <<= 1;
 562                 addr &= mask;
 563         }
 564         add_mem_queue(free_area_list+order, (struct mem_list *) addr);
 565 }
 566 
 567 static inline void check_free_buffers(unsigned long addr)
     /* [previous][next][first][last][top][bottom][index][help] */
 568 {
 569         struct buffer_head * bh;
 570 
 571         bh = buffer_pages[MAP_NR(addr)];
 572         if (bh) {
 573                 struct buffer_head *tmp = bh;
 574                 do {
 575                         if (tmp->b_list == BUF_SHARED && tmp->b_dev != 0xffff)
 576                                 refile_buffer(tmp);
 577                         tmp = tmp->b_this_page;
 578                 } while (tmp != bh);
 579         }
 580 }
 581 
 582 void free_pages(unsigned long addr, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 583 {
 584         if (addr < high_memory) {
 585                 unsigned long flag;
 586                 mem_map_t * map = mem_map + MAP_NR(addr);
 587                 if (*map) {
 588                         if (!(*map & MAP_PAGE_RESERVED)) {
 589                                 save_flags(flag);
 590                                 cli();
 591                                 if (!--*map)  {
 592                                         free_pages_ok(addr, order);
 593                                         delete_from_swap_cache(addr);
 594                                 }
 595                                 restore_flags(flag);
 596                                 if (*map == 1)
 597                                         check_free_buffers(addr);
 598                         }
 599                         return;
 600                 }
 601                 printk("Trying to free free memory (%08lx): memory probably corrupted\n",addr);
 602                 printk("PC = %08lx\n",*(((unsigned long *)&addr)-1));
 603                 return;
 604         }
 605 }
 606 
 607 /*
 608  * Some ugly macros to speed up __get_free_pages()..
 609  */
 610 #define RMQUEUE(order) \
 611 do { struct mem_list * queue = free_area_list+order; \
 612      unsigned long new_order = order; \
 613         do { struct mem_list *next = queue->next; \
 614                 if (queue != next) { \
 615                         (queue->next = next->next)->prev = queue; \
 616                         mark_used((unsigned long) next, new_order); \
 617                         nr_free_pages -= 1 << order; \
 618                         restore_flags(flags); \
 619                         EXPAND(next, order, new_order); \
 620                         return (unsigned long) next; \
 621                 } new_order++; queue++; \
 622         } while (new_order < NR_MEM_LISTS); \
 623 } while (0)
 624 
 625 static inline int mark_used(unsigned long addr, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 626 {
 627         return change_bit(MAP_NR(addr) >> (1+order), free_area_map[order]);
 628 }
 629 
 630 #define EXPAND(addr,low,high) \
 631 do { unsigned long size = PAGE_SIZE << high; \
 632         while (high > low) { \
 633                 high--; size >>= 1; cli(); \
 634                 add_mem_queue(free_area_list+high, addr); \
 635                 mark_used((unsigned long) addr, high); \
 636                 restore_flags(flags); \
 637                 addr = (struct mem_list *) (size + (unsigned long) addr); \
 638         } mem_map[MAP_NR((unsigned long) addr)] = 1; \
 639 } while (0)
 640 
 641 unsigned long __get_free_pages(int priority, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 642 {
 643         unsigned long flags;
 644         int reserved_pages;
 645 
 646         if (intr_count && priority != GFP_ATOMIC) {
 647                 static int count = 0;
 648                 if (++count < 5) {
 649                         printk("gfp called nonatomically from interrupt %p\n",
 650                                 __builtin_return_address(0));
 651                         priority = GFP_ATOMIC;
 652                 }
 653         }
 654         reserved_pages = 5;
 655         if (priority != GFP_NFS)
 656                 reserved_pages = min_free_pages;
 657         save_flags(flags);
 658 repeat:
 659         cli();
 660         if ((priority==GFP_ATOMIC) || nr_free_pages > reserved_pages) {
 661                 RMQUEUE(order);
 662                 restore_flags(flags);
 663                 return 0;
 664         }
 665         restore_flags(flags);
 666         if (priority != GFP_BUFFER && try_to_free_page(priority))
 667                 goto repeat;
 668         return 0;
 669 }
 670 
 671 /*
 672  * Yes, I know this is ugly. Don't tell me.
 673  */
 674 unsigned long __get_dma_pages(int priority, unsigned long order)
     /* [previous][next][first][last][top][bottom][index][help] */
 675 {
 676         unsigned long list = 0;
 677         unsigned long result;
 678         unsigned long limit = MAX_DMA_ADDRESS;
 679 
 680         /* if (EISA_bus) limit = ~0UL; */
 681         if (priority != GFP_ATOMIC)
 682                 priority = GFP_BUFFER;
 683         for (;;) {
 684                 result = __get_free_pages(priority, order);
 685                 if (result < limit) /* covers failure as well */
 686                         break;
 687                 *(unsigned long *) result = list;
 688                 list = result;
 689         }
 690         while (list) {
 691                 unsigned long tmp = list;
 692                 list = *(unsigned long *) list;
 693                 free_pages(tmp, order);
 694         }
 695         return result;
 696 }
 697 
 698 /*
 699  * Show free area list (used inside shift_scroll-lock stuff)
 700  * We also calculate the percentage fragmentation. We do this by counting the
 701  * memory on each free list with the exception of the first item on the list.
 702  */
 703 void show_free_areas(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 704 {
 705         unsigned long order, flags;
 706         unsigned long total = 0;
 707 
 708         printk("Free pages:      %6dkB\n ( ",nr_free_pages<<(PAGE_SHIFT-10));
 709         save_flags(flags);
 710         cli();
 711         for (order=0 ; order < NR_MEM_LISTS; order++) {
 712                 struct mem_list * tmp;
 713                 unsigned long nr = 0;
 714                 for (tmp = free_area_list[order].next ; tmp != free_area_list + order ; tmp = tmp->next) {
 715                         nr ++;
 716                 }
 717                 total += nr * (4 << order);
 718                 printk("%lu*%ukB ", nr, 4 << order);
 719         }
 720         restore_flags(flags);
 721         printk("= %lukB)\n", total);
 722 #ifdef SWAP_CACHE_INFO
 723         show_swap_cache_info();
 724 #endif  
 725 }
 726 
 727 /*
 728  * Trying to stop swapping from a file is fraught with races, so
 729  * we repeat quite a bit here when we have to pause. swapoff()
 730  * isn't exactly timing-critical, so who cares?
 731  */
 732 static int try_to_unuse(unsigned int type)
     /* [previous][next][first][last][top][bottom][index][help] */
 733 {
 734         int nr, pgt, pg;
 735         unsigned long page, *ppage;
 736         unsigned long tmp = 0;
 737         struct task_struct *p;
 738 
 739         nr = 0;
 740         
 741 /*
 742  * When we have to sleep, we restart the whole algorithm from the same
 743  * task we stopped in. That at least rids us of all races.
 744  */
 745 repeat:
 746         for (; nr < NR_TASKS ; nr++) {
 747                 p = task[nr];
 748                 if (!p)
 749                         continue;
 750                 for (pgt = 0 ; pgt < PTRS_PER_PAGE ; pgt++) {
 751                         ppage = pgt + PAGE_DIR_OFFSET(p, 0);
 752                         page = *ppage;
 753                         if (!page)
 754                                 continue;
 755                         if (!(page & PAGE_PRESENT) || (page >= high_memory))
 756                                 continue;
 757                         if (mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED)
 758                                 continue;
 759                         ppage = (unsigned long *) (page & PAGE_MASK);   
 760                         for (pg = 0 ; pg < PTRS_PER_PAGE ; pg++,ppage++) {
 761                                 page = *ppage;
 762                                 if (!page)
 763                                         continue;
 764                                 if (page & PAGE_PRESENT) {
 765                                         if (page >= high_memory)
 766                                                 continue;
 767                                         if (!(page = in_swap_cache(page)))
 768                                                 continue;
 769                                         if (SWP_TYPE(page) != type)
 770                                                 continue;
 771                                         *ppage |= PAGE_DIRTY;
 772                                         delete_from_swap_cache(*ppage);
 773                                         continue;
 774                                 }
 775                                 if (SWP_TYPE(page) != type)
 776                                         continue;
 777                                 if (!tmp) {
 778                                         if (!(tmp = __get_free_page(GFP_KERNEL)))
 779                                                 return -ENOMEM;
 780                                         goto repeat;
 781                                 }
 782                                 read_swap_page(page, (char *) tmp);
 783                                 if (*ppage == page) {
 784                                         *ppage = tmp | (PAGE_DIRTY | PAGE_PRIVATE);
 785                                         ++p->mm->rss;
 786                                         swap_free(page);
 787                                         tmp = 0;
 788                                 }
 789                                 goto repeat;
 790                         }
 791                 }
 792         }
 793         free_page(tmp);
 794         return 0;
 795 }
 796 
 797 asmlinkage int sys_swapoff(const char * specialfile)
     /* [previous][next][first][last][top][bottom][index][help] */
 798 {
 799         struct swap_info_struct * p;
 800         struct inode * inode;
 801         unsigned int type;
 802         struct file filp;
 803         int i;
 804 
 805         if (!suser())
 806                 return -EPERM;
 807         i = namei(specialfile,&inode);
 808         if (i)
 809                 return i;
 810         p = swap_info;
 811         for (type = 0 ; type < nr_swapfiles ; type++,p++) {
 812                 if ((p->flags & SWP_WRITEOK) != SWP_WRITEOK)
 813                         continue;
 814                 if (p->swap_file) {
 815                         if (p->swap_file == inode)
 816                                 break;
 817                 } else {
 818                         if (!S_ISBLK(inode->i_mode))
 819                                 continue;
 820                         if (p->swap_device == inode->i_rdev)
 821                                 break;
 822                 }
 823         }
 824 
 825         if (type >= nr_swapfiles){
 826                 iput(inode);
 827                 return -EINVAL;
 828         }
 829         p->flags = SWP_USED;
 830         i = try_to_unuse(type);
 831         if (i) {
 832                 iput(inode);
 833                 p->flags = SWP_WRITEOK;
 834                 return i;
 835         }
 836 
 837         if(p->swap_device){
 838                 memset(&filp, 0, sizeof(filp));         
 839                 filp.f_inode = inode;
 840                 filp.f_mode = 3; /* read write */
 841                 /* open it again to get fops */
 842                 if( !blkdev_open(inode, &filp) &&
 843                    filp.f_op && filp.f_op->release){
 844                         filp.f_op->release(inode,&filp);
 845                         filp.f_op->release(inode,&filp);
 846                 }
 847         }
 848         iput(inode);
 849 
 850         nr_swap_pages -= p->pages;
 851         iput(p->swap_file);
 852         p->swap_file = NULL;
 853         p->swap_device = 0;
 854         vfree(p->swap_map);
 855         p->swap_map = NULL;
 856         free_page((long) p->swap_lockmap);
 857         p->swap_lockmap = NULL;
 858         p->flags = 0;
 859         return 0;
 860 }
 861 
 862 /*
 863  * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
 864  *
 865  * The swapon system call
 866  */
 867 asmlinkage int sys_swapon(const char * specialfile)
     /* [previous][next][first][last][top][bottom][index][help] */
 868 {
 869         struct swap_info_struct * p;
 870         struct inode * swap_inode;
 871         unsigned int type;
 872         int i,j;
 873         int error;
 874         struct file filp;
 875 
 876         memset(&filp, 0, sizeof(filp));
 877         if (!suser())
 878                 return -EPERM;
 879         p = swap_info;
 880         for (type = 0 ; type < nr_swapfiles ; type++,p++)
 881                 if (!(p->flags & SWP_USED))
 882                         break;
 883         if (type >= MAX_SWAPFILES)
 884                 return -EPERM;
 885         if (type >= nr_swapfiles)
 886                 nr_swapfiles = type+1;
 887         p->flags = SWP_USED;
 888         p->swap_file = NULL;
 889         p->swap_device = 0;
 890         p->swap_map = NULL;
 891         p->swap_lockmap = NULL;
 892         p->lowest_bit = 0;
 893         p->highest_bit = 0;
 894         p->max = 1;
 895         error = namei(specialfile,&swap_inode);
 896         if (error)
 897                 goto bad_swap_2;
 898         p->swap_file = swap_inode;
 899         error = -EBUSY;
 900         if (swap_inode->i_count != 1)
 901                 goto bad_swap_2;
 902         error = -EINVAL;
 903 
 904         if (S_ISBLK(swap_inode->i_mode)) {
 905                 p->swap_device = swap_inode->i_rdev;
 906 
 907                 filp.f_inode = swap_inode;
 908                 filp.f_mode = 3; /* read write */
 909                 error = blkdev_open(swap_inode, &filp);
 910                 p->swap_file = NULL;
 911                 iput(swap_inode);
 912                 if(error)
 913                         goto bad_swap_2;
 914                 error = -ENODEV;
 915                 if (!p->swap_device)
 916                         goto bad_swap;
 917                 error = -EBUSY;
 918                 for (i = 0 ; i < nr_swapfiles ; i++) {
 919                         if (i == type)
 920                                 continue;
 921                         if (p->swap_device == swap_info[i].swap_device)
 922                                 goto bad_swap;
 923                 }
 924         } else if (!S_ISREG(swap_inode->i_mode))
 925                 goto bad_swap;
 926         p->swap_lockmap = (unsigned char *) get_free_page(GFP_USER);
 927         if (!p->swap_lockmap) {
 928                 printk("Unable to start swapping: out of memory :-)\n");
 929                 error = -ENOMEM;
 930                 goto bad_swap;
 931         }
 932         read_swap_page(SWP_ENTRY(type,0), (char *) p->swap_lockmap);
 933         if (memcmp("SWAP-SPACE",p->swap_lockmap+4086,10)) {
 934                 printk("Unable to find swap-space signature\n");
 935                 error = -EINVAL;
 936                 goto bad_swap;
 937         }
 938         memset(p->swap_lockmap+PAGE_SIZE-10,0,10);
 939         j = 0;
 940         p->lowest_bit = 0;
 941         p->highest_bit = 0;
 942         for (i = 1 ; i < 8*PAGE_SIZE ; i++) {
 943                 if (test_bit(i,p->swap_lockmap)) {
 944                         if (!p->lowest_bit)
 945                                 p->lowest_bit = i;
 946                         p->highest_bit = i;
 947                         p->max = i+1;
 948                         j++;
 949                 }
 950         }
 951         if (!j) {
 952                 printk("Empty swap-file\n");
 953                 error = -EINVAL;
 954                 goto bad_swap;
 955         }
 956         p->swap_map = (unsigned char *) vmalloc(p->max);
 957         if (!p->swap_map) {
 958                 error = -ENOMEM;
 959                 goto bad_swap;
 960         }
 961         for (i = 1 ; i < p->max ; i++) {
 962                 if (test_bit(i,p->swap_lockmap))
 963                         p->swap_map[i] = 0;
 964                 else
 965                         p->swap_map[i] = 0x80;
 966         }
 967         p->swap_map[0] = 0x80;
 968         memset(p->swap_lockmap,0,PAGE_SIZE);
 969         p->flags = SWP_WRITEOK;
 970         p->pages = j;
 971         nr_swap_pages += j;
 972         printk("Adding Swap: %dk swap-space\n",j<<2);
 973         return 0;
 974 bad_swap:
 975         if(filp.f_op && filp.f_op->release)
 976                 filp.f_op->release(filp.f_inode,&filp);
 977 bad_swap_2:
 978         free_page((long) p->swap_lockmap);
 979         vfree(p->swap_map);
 980         iput(p->swap_file);
 981         p->swap_device = 0;
 982         p->swap_file = NULL;
 983         p->swap_map = NULL;
 984         p->swap_lockmap = NULL;
 985         p->flags = 0;
 986         return error;
 987 }
 988 
 989 void si_swapinfo(struct sysinfo *val)
     /* [previous][next][first][last][top][bottom][index][help] */
 990 {
 991         unsigned int i, j;
 992 
 993         val->freeswap = val->totalswap = 0;
 994         for (i = 0; i < nr_swapfiles; i++) {
 995                 if ((swap_info[i].flags & SWP_WRITEOK) != SWP_WRITEOK)
 996                         continue;
 997                 for (j = 0; j < swap_info[i].max; ++j)
 998                         switch (swap_info[i].swap_map[j]) {
 999                                 case 128:
1000                                         continue;
1001                                 case 0:
1002                                         ++val->freeswap;
1003                                 default:
1004                                         ++val->totalswap;
1005                         }
1006         }
1007         val->freeswap <<= PAGE_SHIFT;
1008         val->totalswap <<= PAGE_SHIFT;
1009         return;
1010 }
1011 
1012 /*
1013  * set up the free-area data structures:
1014  *   - mark all pages MAP_PAGE_RESERVED
1015  *   - mark all memory queues empty
1016  *   - clear the memory bitmaps
1017  */
1018 unsigned long free_area_init(unsigned long start_mem, unsigned long end_mem)
     /* [previous][next][first][last][top][bottom][index][help] */
1019 {
1020         mem_map_t * p;
1021         unsigned long mask = PAGE_MASK;
1022         int i;
1023 
1024         /*
1025          * select nr of pages we try to keep free for important stuff
1026          * with a minimum of 16 pages. This is totally arbitrary
1027          */
1028         i = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT+6);
1029         if (i < 16)
1030                 i = 16;
1031         min_free_pages = i;
1032         start_mem = init_swap_cache(start_mem, end_mem);
1033         mem_map = (mem_map_t *) start_mem;
1034         p = mem_map + MAP_NR(end_mem);
1035         start_mem = (unsigned long) p;
1036         while (p > mem_map)
1037                 *--p = MAP_PAGE_RESERVED;
1038 
1039         for (i = 0 ; i < NR_MEM_LISTS ; i++) {
1040                 unsigned long bitmap_size;
1041                 free_area_list[i].prev = free_area_list[i].next = &free_area_list[i];
1042                 mask += mask;
1043                 end_mem = (end_mem + ~mask) & mask;
1044                 bitmap_size = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT + i);
1045                 bitmap_size = (bitmap_size + 7) >> 3;
1046                 bitmap_size = (bitmap_size + sizeof(unsigned long) - 1) & ~(sizeof(unsigned long)-1);
1047                 free_area_map[i] = (unsigned char *) start_mem;
1048                 memset((void *) start_mem, 0, bitmap_size);
1049                 start_mem += bitmap_size;
1050         }
1051         return start_mem;
1052 }

/* [previous][next][first][last][top][bottom][index][help] */