root/mm/vmscan.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. try_to_swap_out
  2. swap_out_pmd
  3. swap_out_pgd
  4. swap_out_vma
  5. swap_out_process
  6. swap_out
  7. try_to_free_page
  8. kswapd
  9. swap_tick
  10. init_swap_timer

   1 /*
   2  *  linux/mm/vmscan.c
   3  *
   4  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   5  *
   6  *  Swap reorganised 29.12.95, Stephen Tweedie.
   7  *  kswapd added: 7.1.96  sct
   8  *  Version: $Id: vmscan.c,v 1.4.2.2 1996/01/20 18:22:47 linux Exp $
   9  */
  10 
  11 #include <linux/mm.h>
  12 #include <linux/sched.h>
  13 #include <linux/head.h>
  14 #include <linux/kernel.h>
  15 #include <linux/kernel_stat.h>
  16 #include <linux/errno.h>
  17 #include <linux/string.h>
  18 #include <linux/stat.h>
  19 #include <linux/swap.h>
  20 #include <linux/fs.h>
  21 #include <linux/swapctl.h>
  22 #include <linux/smp_lock.h>
  23 
  24 #include <asm/dma.h>
  25 #include <asm/system.h> /* for cli()/sti() */
  26 #include <asm/segment.h> /* for memcpy_to/fromfs */
  27 #include <asm/bitops.h>
  28 #include <asm/pgtable.h>
  29 
  30 /* 
  31  * When are we next due for a page scan? 
  32  */
  33 static int next_swap_jiffies = 0;
  34 
  35 /* 
  36  * How often do we do a pageout scan during normal conditions?
  37  * Default is four times a second.
  38  */
  39 int swapout_interval = HZ / 4;
  40 
  41 /* 
  42  * The wait queue for waking up the pageout daemon:
  43  */
  44 static struct wait_queue * kswapd_wait = NULL;
  45 
  46 /* 
  47  * We avoid doing a reschedule if the pageout daemon is already awake;
  48  */
  49 static int kswapd_awake = 0;
  50 
  51 /*
  52  * sysctl-modifiable parameters to control the aggressiveness of the
  53  * page-searching within the kswapd page recovery daemon.
  54  */
  55 kswapd_control_t kswapd_ctl = {4, -1, -1, -1, -1};
  56 
  57 static void init_swap_timer(void);
  58 
  59 /*
  60  * The swap-out functions return 1 if they successfully
  61  * threw something out, and we got a free page. It returns
  62  * zero if it couldn't do anything, and any other value
  63  * indicates it decreased rss, but the page was shared.
  64  *
  65  * NOTE! If it sleeps, it *must* return 1 to make sure we
  66  * don't continue with the swap-out. Otherwise we may be
  67  * using a process that no longer actually exists (it might
  68  * have died while we slept).
  69  */
  70 static inline int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
     /* [previous][next][first][last][top][bottom][index][help] */
  71         unsigned long address, pte_t * page_table, int dma, int wait)
  72 {
  73         pte_t pte;
  74         unsigned long entry;
  75         unsigned long page;
  76         struct page * page_map;
  77 
  78         pte = *page_table;
  79         if (!pte_present(pte))
  80                 return 0;
  81         page = pte_page(pte);
  82         if (MAP_NR(page) >= MAP_NR(high_memory))
  83                 return 0;
  84 
  85         page_map = mem_map + MAP_NR(page);
  86         if (PageReserved(page_map)
  87             || PageLocked(page_map)
  88             || (dma && !PageDMA(page_map)))
  89                 return 0;
  90         /* Deal with page aging.  Pages age from being unused; they
  91          * rejuvenate on being accessed.  Only swap old pages (age==0
  92          * is oldest). */
  93         if ((pte_dirty(pte) && delete_from_swap_cache(MAP_NR(page))) 
  94             || pte_young(pte))  {
  95                 set_pte(page_table, pte_mkold(pte));
  96                 touch_page(page_map);
  97                 return 0;
  98         }
  99         age_page(page_map);
 100         if (page_map->age)
 101                 return 0;
 102         if (pte_dirty(pte)) {
 103                 if (vma->vm_ops && vma->vm_ops->swapout) {
 104                         pid_t pid = tsk->pid;
 105                         vma->vm_mm->rss--;
 106                         if (vma->vm_ops->swapout(vma, address - vma->vm_start + vma->vm_offset, page_table))
 107                                 kill_proc(pid, SIGBUS, 1);
 108                 } else {
 109                         if (page_map->count != 1)
 110                                 return 0;
 111                         if (!(entry = get_swap_page()))
 112                                 return 0;
 113                         vma->vm_mm->rss--;
 114                         flush_cache_page(vma, address);
 115                         set_pte(page_table, __pte(entry));
 116                         flush_tlb_page(vma, address);
 117                         tsk->nswap++;
 118                         rw_swap_page(WRITE, entry, (char *) page, wait);
 119                 }
 120                 free_page(page);
 121                 return 1;       /* we slept: the process may not exist any more */
 122         }
 123         if ((entry = find_in_swap_cache(MAP_NR(page))))  {
 124                 if (page_map->count != 1) {
 125                         set_pte(page_table, pte_mkdirty(pte));
 126                         printk("Aiee.. duplicated cached swap-cache entry\n");
 127                         return 0;
 128                 }
 129                 vma->vm_mm->rss--;
 130                 flush_cache_page(vma, address);
 131                 set_pte(page_table, __pte(entry));
 132                 flush_tlb_page(vma, address);
 133                 free_page(page);
 134                 return 1;
 135         } 
 136         vma->vm_mm->rss--;
 137         flush_cache_page(vma, address);
 138         pte_clear(page_table);
 139         flush_tlb_page(vma, address);
 140         entry = page_unuse(page);
 141         free_page(page);
 142         return entry;
 143 }
 144 
 145 /*
 146  * A new implementation of swap_out().  We do not swap complete processes,
 147  * but only a small number of blocks, before we continue with the next
 148  * process.  The number of blocks actually swapped is determined on the
 149  * number of page faults, that this process actually had in the last time,
 150  * so we won't swap heavily used processes all the time ...
 151  *
 152  * Note: the priority argument is a hint on much CPU to waste with the
 153  *       swap block search, not a hint, of how much blocks to swap with
 154  *       each process.
 155  *
 156  * (C) 1993 Kai Petzke, wpp@marie.physik.tu-berlin.de
 157  */
 158 
 159 static inline int swap_out_pmd(struct task_struct * tsk, struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 160         pmd_t *dir, unsigned long address, unsigned long end, int dma, int wait)
 161 {
 162         pte_t * pte;
 163         unsigned long pmd_end;
 164 
 165         if (pmd_none(*dir))
 166                 return 0;
 167         if (pmd_bad(*dir)) {
 168                 printk("swap_out_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
 169                 pmd_clear(dir);
 170                 return 0;
 171         }
 172         
 173         pte = pte_offset(dir, address);
 174         
 175         pmd_end = (address + PMD_SIZE) & PMD_MASK;
 176         if (end > pmd_end)
 177                 end = pmd_end;
 178 
 179         do {
 180                 int result;
 181                 tsk->swap_address = address + PAGE_SIZE;
 182                 result = try_to_swap_out(tsk, vma, address, pte, dma, wait);
 183                 if (result)
 184                         return result;
 185                 address += PAGE_SIZE;
 186                 pte++;
 187         } while (address < end);
 188         return 0;
 189 }
 190 
 191 static inline int swap_out_pgd(struct task_struct * tsk, struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 192         pgd_t *dir, unsigned long address, unsigned long end, int dma, int wait)
 193 {
 194         pmd_t * pmd;
 195         unsigned long pgd_end;
 196 
 197         if (pgd_none(*dir))
 198                 return 0;
 199         if (pgd_bad(*dir)) {
 200                 printk("swap_out_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
 201                 pgd_clear(dir);
 202                 return 0;
 203         }
 204 
 205         pmd = pmd_offset(dir, address);
 206 
 207         pgd_end = (address + PGDIR_SIZE) & PGDIR_MASK;  
 208         if (end > pgd_end)
 209                 end = pgd_end;
 210         
 211         do {
 212                 int result = swap_out_pmd(tsk, vma, pmd, address, end, dma, wait);
 213                 if (result)
 214                         return result;
 215                 address = (address + PMD_SIZE) & PMD_MASK;
 216                 pmd++;
 217         } while (address < end);
 218         return 0;
 219 }
 220 
 221 static int swap_out_vma(struct task_struct * tsk, struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 222         pgd_t *pgdir, unsigned long start, int dma, int wait)
 223 {
 224         unsigned long end;
 225 
 226         /* Don't swap out areas like shared memory which have their
 227             own separate swapping mechanism or areas which are locked down */
 228         if (vma->vm_flags & (VM_SHM | VM_LOCKED))
 229                 return 0;
 230 
 231         end = vma->vm_end;
 232         while (start < end) {
 233                 int result = swap_out_pgd(tsk, vma, pgdir, start, end, dma, wait);
 234                 if (result)
 235                         return result;
 236                 start = (start + PGDIR_SIZE) & PGDIR_MASK;
 237                 pgdir++;
 238         }
 239         return 0;
 240 }
 241 
 242 static int swap_out_process(struct task_struct * p, int dma, int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 243 {
 244         unsigned long address;
 245         struct vm_area_struct* vma;
 246 
 247         /*
 248          * Go through process' page directory.
 249          */
 250         address = p->swap_address;
 251         p->swap_address = 0;
 252 
 253         /*
 254          * Find the proper vm-area
 255          */
 256         vma = find_vma(p, address);
 257         if (!vma)
 258                 return 0;
 259         if (address < vma->vm_start)
 260                 address = vma->vm_start;
 261 
 262         for (;;) {
 263                 int result = swap_out_vma(p, vma, pgd_offset(p->mm, address), address, dma, wait);
 264                 if (result)
 265                         return result;
 266                 vma = vma->vm_next;
 267                 if (!vma)
 268                         break;
 269                 address = vma->vm_start;
 270         }
 271         p->swap_address = 0;
 272         return 0;
 273 }
 274 
 275 static int swap_out(unsigned int priority, int dma, int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 276 {
 277         static int swap_task;
 278         int loop, counter;
 279         struct task_struct *p;
 280 
 281         counter = ((PAGEOUT_WEIGHT * nr_tasks) >> 10) >> priority;
 282         for(; counter >= 0; counter--) {
 283                 /*
 284                  * Check that swap_task is suitable for swapping.  If not, look for
 285                  * the next suitable process.
 286                  */
 287                 loop = 0;
 288                 while(1) {
 289                         if (swap_task >= NR_TASKS) {
 290                                 swap_task = 1;
 291                                 if (loop)
 292                                         /* all processes are unswappable or already swapped out */
 293                                         return 0;
 294                                 loop = 1;
 295                         }
 296 
 297                         p = task[swap_task];
 298                         if (p && p->swappable && p->mm->rss)
 299                                 break;
 300 
 301                         swap_task++;
 302                 }
 303 
 304                 /*
 305                  * Determine the number of pages to swap from this process.
 306                  */
 307                 if (!p->swap_cnt) {
 308                         /* Normalise the number of pages swapped by
 309                            multiplying by (RSS / 1MB) */
 310                         p->swap_cnt = AGE_CLUSTER_SIZE(p->mm->rss);
 311                 }
 312                 if (!--p->swap_cnt)
 313                         swap_task++;
 314                 switch (swap_out_process(p, dma, wait)) {
 315                         case 0:
 316                                 if (p->swap_cnt)
 317                                         swap_task++;
 318                                 break;
 319                         case 1:
 320                                 return 1;
 321                         default:
 322                                 break;
 323                 }
 324         }
 325         return 0;
 326 }
 327 
 328 /*
 329  * We are much more aggressive about trying to swap out than we used
 330  * to be.  This works out OK, because we now do proper aging on page
 331  * contents. 
 332  */
 333 int try_to_free_page(int priority, int dma, int wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 334 {
 335         static int state = 0;
 336         int i=6;
 337 
 338         switch (state) {
 339                 do {
 340                 case 0:
 341                         if (shrink_mmap(i, dma))
 342                                 return 1;
 343                         state = 1;
 344                 case 1:
 345                         if (shm_swap(i, dma))
 346                                 return 1;
 347                         state = 2;
 348                 default:
 349                         if (swap_out(i, dma, wait))
 350                                 return 1;
 351                         state = 0;
 352                 } while (i--);
 353         }
 354         return 0;
 355 }
 356 
 357 
 358 /*
 359  * The background pageout daemon.
 360  * Started as a kernel thread from the init process.
 361  */
 362 int kswapd(void *unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 363 {
 364         int i;
 365         char *revision="$Revision: 1.4.2.2 $", *s, *e;
 366         
 367         current->session = 1;
 368         current->pgrp = 1;
 369         sprintf(current->comm, "kswapd");
 370         current->blocked = ~0UL;
 371         
 372         /*
 373          *      As a kernel thread we want to tamper with system buffers
 374          *      and other internals and thus be subject to the SMP locking
 375          *      rules. (On a uniprocessor box this does nothing).
 376          */
 377          
 378 #ifdef __SMP__
 379         lock_kernel();
 380         syscall_count++;
 381 #endif
 382 
 383         /* Give kswapd a realtime priority. */
 384         current->policy = SCHED_FIFO;
 385         current->priority = 32;  /* Fixme --- we need to standardise our
 386                                     namings for POSIX.4 realtime scheduling
 387                                     priorities.  */
 388 
 389         init_swap_timer();
 390         
 391         if ((s = strchr(revision, ':')) &&
 392             (e = strchr(s, '$')))
 393                 s++, i = e - s;
 394         else
 395                 s = revision, i = -1;
 396         printk ("Started kswapd v%.*s\n", i, s);
 397 
 398         while (1) {
 399                 kswapd_awake = 0;
 400                 current->signal = 0;
 401                 interruptible_sleep_on(&kswapd_wait);
 402                 kswapd_awake = 1;
 403                 swapstats.wakeups++;
 404                 /* Do the background pageout: */
 405                 for (i=0; i < kswapd_ctl.maxpages; i++)
 406                         try_to_free_page(GFP_KERNEL, 0, 0);
 407         }
 408 }
 409 
 410 /* 
 411  * The swap_tick function gets called on every clock tick.
 412  */
 413 
 414 void swap_tick(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 415 {
 416         if ((nr_free_pages + nr_async_pages) < free_pages_low ||
 417             ((nr_free_pages + nr_async_pages) < free_pages_high && 
 418              jiffies >= next_swap_jiffies)) {
 419                 if (!kswapd_awake && kswapd_ctl.maxpages > 0) {
 420                         wake_up(&kswapd_wait);
 421                         need_resched = 1;
 422                         kswapd_awake = 1;
 423                 }
 424                 next_swap_jiffies = jiffies + swapout_interval;
 425         }
 426         timer_active |= (1<<SWAP_TIMER);
 427 }
 428 
 429 
 430 /* 
 431  * Initialise the swap timer
 432  */
 433 
 434 void init_swap_timer(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 435 {
 436         timer_table[SWAP_TIMER].expires = 0;
 437         timer_table[SWAP_TIMER].fn = swap_tick;
 438         timer_active |= (1<<SWAP_TIMER);
 439 }

/* [previous][next][first][last][top][bottom][index][help] */