root/mm/vmscan.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. try_to_swap_out
  2. swap_out_pmd
  3. swap_out_pgd
  4. swap_out_vma
  5. swap_out_process
  6. swap_out
  7. try_to_free_page
  8. kswapd
  9. swap_tick
  10. init_swap_timer

   1 /*
   2  *  linux/mm/vmscan.c
   3  *
   4  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   5  *
   6  *  Swap reorganised 29.12.95, 
   7  */
   8 
   9 #include <linux/mm.h>
  10 #include <linux/sched.h>
  11 #include <linux/head.h>
  12 #include <linux/kernel.h>
  13 #include <linux/kernel_stat.h>
  14 #include <linux/errno.h>
  15 #include <linux/string.h>
  16 #include <linux/stat.h>
  17 #include <linux/swap.h>
  18 #include <linux/fs.h>
  19 #include <linux/swapctl.h>
  20 #include <linux/smp_lock.h>
  21 
  22 #include <asm/dma.h>
  23 #include <asm/system.h> /* for cli()/sti() */
  24 #include <asm/segment.h> /* for memcpy_to/fromfs */
  25 #include <asm/bitops.h>
  26 #include <asm/pgtable.h>
  27 
  28 /* 
  29  * When are we next due for a page scan? 
  30  */
  31 static int next_swap_jiffies = 0;
  32 
  33 /* 
  34  * How often do we do a pageout scan during normal conditions?
  35  * Default is four times a second.
  36  */
  37 int swapout_interval = HZ / 4;
  38 
  39 /* 
  40  * The wait queue for waking up the pageout daemon:
  41  */
  42 static struct wait_queue * kswapd_wait = NULL;
  43 
  44 /* 
  45  * We avoid doing a reschedule if the pageout daemon is already awake;
  46  */
  47 static int kswapd_awake = 0;
  48 
  49 /*
  50  * sysctl-modifiable parameters to control the aggressiveness of the
  51  * page-searching within the kswapd page recovery daemon.
  52  */
  53 kswapd_control_t kswapd_ctl = {4, -1, -1, -1, -1};
  54 
  55 static void init_swap_timer(void);
  56 
  57 /*
  58  * The swap-out functions return 1 if they successfully
  59  * threw something out, and we got a free page. It returns
  60  * zero if it couldn't do anything, and any other value
  61  * indicates it decreased rss, but the page was shared.
  62  *
  63  * NOTE! If it sleeps, it *must* return 1 to make sure we
  64  * don't continue with the swap-out. Otherwise we may be
  65  * using a process that no longer actually exists (it might
  66  * have died while we slept).
  67  */
  68 static inline int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
     /* [previous][next][first][last][top][bottom][index][help] */
  69         unsigned long address, pte_t * page_table, unsigned long limit)
  70 {
  71         pte_t pte;
  72         unsigned long entry;
  73         unsigned long page;
  74         struct page * page_map;
  75 
  76         pte = *page_table;
  77         if (!pte_present(pte))
  78                 return 0;
  79         page = pte_page(pte);
  80         if (MAP_NR(page) >= MAP_NR(high_memory))
  81                 return 0;
  82         if (page >= limit)
  83                 return 0;
  84 
  85         page_map = mem_map + MAP_NR(page);
  86         if (page_map->reserved)
  87                 return 0;
  88         /* Deal with page aging.  Pages age from being unused; they
  89          * rejuvinate on being accessed.  Only swap old pages (age==0
  90          * is oldest). */
  91         if ((pte_dirty(pte) && delete_from_swap_cache(page)) 
  92             || pte_young(pte))  {
  93                 set_pte(page_table, pte_mkold(pte));
  94                 touch_page(page_map);
  95                 return 0;
  96         }
  97         age_page(page_map);
  98         if (page_map->age)
  99                 return 0;
 100         if (pte_dirty(pte)) {
 101                 if (vma->vm_ops && vma->vm_ops->swapout) {
 102                         pid_t pid = tsk->pid;
 103                         vma->vm_mm->rss--;
 104                         if (vma->vm_ops->swapout(vma, address - vma->vm_start + vma->vm_offset, page_table))
 105                                 kill_proc(pid, SIGBUS, 1);
 106                 } else {
 107                         if (page_map->count != 1)
 108                                 return 0;
 109                         if (!(entry = get_swap_page()))
 110                                 return 0;
 111                         vma->vm_mm->rss--;
 112                         set_pte(page_table, __pte(entry));
 113                         invalidate_page(vma, address);
 114                         tsk->nswap++;
 115                         write_swap_page(entry, (char *) page);
 116                 }
 117                 free_page(page);
 118                 return 1;       /* we slept: the process may not exist any more */
 119         }
 120         if ((entry = find_in_swap_cache(page)))  {
 121                 if (page_map->count != 1) {
 122                         set_pte(page_table, pte_mkdirty(pte));
 123                         printk("Aiee.. duplicated cached swap-cache entry\n");
 124                         return 0;
 125                 }
 126                 vma->vm_mm->rss--;
 127                 set_pte(page_table, __pte(entry));
 128                 invalidate_page(vma, address);
 129                 free_page(page);
 130                 return 1;
 131         } 
 132         vma->vm_mm->rss--;
 133         pte_clear(page_table);
 134         invalidate_page(vma, address);
 135         entry = page_unuse(page);
 136         free_page(page);
 137         return entry;
 138 }
 139 
 140 /*
 141  * A new implementation of swap_out().  We do not swap complete processes,
 142  * but only a small number of blocks, before we continue with the next
 143  * process.  The number of blocks actually swapped is determined on the
 144  * number of page faults, that this process actually had in the last time,
 145  * so we won't swap heavily used processes all the time ...
 146  *
 147  * Note: the priority argument is a hint on much CPU to waste with the
 148  *       swap block search, not a hint, of how much blocks to swap with
 149  *       each process.
 150  *
 151  * (C) 1993 Kai Petzke, wpp@marie.physik.tu-berlin.de
 152  */
 153 
 154 static inline int swap_out_pmd(struct task_struct * tsk, struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 155         pmd_t *dir, unsigned long address, unsigned long end, unsigned long limit)
 156 {
 157         pte_t * pte;
 158         unsigned long pmd_end;
 159 
 160         if (pmd_none(*dir))
 161                 return 0;
 162         if (pmd_bad(*dir)) {
 163                 printk("swap_out_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
 164                 pmd_clear(dir);
 165                 return 0;
 166         }
 167         
 168         pte = pte_offset(dir, address);
 169         
 170         pmd_end = (address + PMD_SIZE) & PMD_MASK;
 171         if (end > pmd_end)
 172                 end = pmd_end;
 173 
 174         do {
 175                 int result;
 176                 tsk->swap_address = address + PAGE_SIZE;
 177                 result = try_to_swap_out(tsk, vma, address, pte, limit);
 178                 if (result)
 179                         return result;
 180                 address += PAGE_SIZE;
 181                 pte++;
 182         } while (address < end);
 183         return 0;
 184 }
 185 
 186 static inline int swap_out_pgd(struct task_struct * tsk, struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 187         pgd_t *dir, unsigned long address, unsigned long end, unsigned long limit)
 188 {
 189         pmd_t * pmd;
 190         unsigned long pgd_end;
 191 
 192         if (pgd_none(*dir))
 193                 return 0;
 194         if (pgd_bad(*dir)) {
 195                 printk("swap_out_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
 196                 pgd_clear(dir);
 197                 return 0;
 198         }
 199 
 200         pmd = pmd_offset(dir, address);
 201 
 202         pgd_end = (address + PGDIR_SIZE) & PGDIR_MASK;  
 203         if (end > pgd_end)
 204                 end = pgd_end;
 205         
 206         do {
 207                 int result = swap_out_pmd(tsk, vma, pmd, address, end, limit);
 208                 if (result)
 209                         return result;
 210                 address = (address + PMD_SIZE) & PMD_MASK;
 211                 pmd++;
 212         } while (address < end);
 213         return 0;
 214 }
 215 
 216 static int swap_out_vma(struct task_struct * tsk, struct vm_area_struct * vma,
     /* [previous][next][first][last][top][bottom][index][help] */
 217         pgd_t *pgdir, unsigned long start, unsigned long limit)
 218 {
 219         unsigned long end;
 220 
 221         /* Don't swap out areas like shared memory which have their
 222             own separate swapping mechanism or areas which are locked down */
 223         if (vma->vm_flags & (VM_SHM | VM_LOCKED))
 224                 return 0;
 225 
 226         end = vma->vm_end;
 227         while (start < end) {
 228                 int result = swap_out_pgd(tsk, vma, pgdir, start, end, limit);
 229                 if (result)
 230                         return result;
 231                 start = (start + PGDIR_SIZE) & PGDIR_MASK;
 232                 pgdir++;
 233         }
 234         return 0;
 235 }
 236 
 237 static int swap_out_process(struct task_struct * p, unsigned long limit)
     /* [previous][next][first][last][top][bottom][index][help] */
 238 {
 239         unsigned long address;
 240         struct vm_area_struct* vma;
 241 
 242         /*
 243          * Go through process' page directory.
 244          */
 245         address = p->swap_address;
 246         p->swap_address = 0;
 247 
 248         /*
 249          * Find the proper vm-area
 250          */
 251         vma = find_vma(p, address);
 252         if (!vma)
 253                 return 0;
 254         if (address < vma->vm_start)
 255                 address = vma->vm_start;
 256 
 257         for (;;) {
 258                 int result = swap_out_vma(p, vma, pgd_offset(p->mm, address), address, limit);
 259                 if (result)
 260                         return result;
 261                 vma = vma->vm_next;
 262                 if (!vma)
 263                         break;
 264                 address = vma->vm_start;
 265         }
 266         p->swap_address = 0;
 267         return 0;
 268 }
 269 
 270 static int swap_out(unsigned int priority, unsigned long limit)
     /* [previous][next][first][last][top][bottom][index][help] */
 271 {
 272         static int swap_task;
 273         int loop, counter;
 274         struct task_struct *p;
 275 
 276         counter = ((PAGEOUT_WEIGHT * nr_tasks) >> 10) >> priority;
 277         for(; counter >= 0; counter--) {
 278                 /*
 279                  * Check that swap_task is suitable for swapping.  If not, look for
 280                  * the next suitable process.
 281                  */
 282                 loop = 0;
 283                 while(1) {
 284                         if (swap_task >= NR_TASKS) {
 285                                 swap_task = 1;
 286                                 if (loop)
 287                                         /* all processes are unswappable or already swapped out */
 288                                         return 0;
 289                                 loop = 1;
 290                         }
 291 
 292                         p = task[swap_task];
 293                         if (p && p->swappable && p->mm->rss)
 294                                 break;
 295 
 296                         swap_task++;
 297                 }
 298 
 299                 /*
 300                  * Determine the number of pages to swap from this process.
 301                  */
 302                 if (!p->swap_cnt) {
 303                         /* Normalise the number of pages swapped by
 304                            multiplying by (RSS / 1MB) */
 305                         p->swap_cnt = AGE_CLUSTER_SIZE(p->mm->rss);
 306                 }
 307                 if (!--p->swap_cnt)
 308                         swap_task++;
 309                 switch (swap_out_process(p, limit)) {
 310                         case 0:
 311                                 if (p->swap_cnt)
 312                                         swap_task++;
 313                                 break;
 314                         case 1:
 315                                 return 1;
 316                         default:
 317                                 break;
 318                 }
 319         }
 320         return 0;
 321 }
 322 
 323 /*
 324  * We are much more aggressive about trying to swap out than we used
 325  * to be.  This works out OK, because we now do proper aging on page
 326  * contents. 
 327  */
 328 int try_to_free_page(int priority, unsigned long limit)
     /* [previous][next][first][last][top][bottom][index][help] */
 329 {
 330         static int state = 0;
 331         int i=6;
 332 
 333         switch (state) {
 334                 do {
 335                 case 0:
 336                         if (shrink_mmap(i, limit))
 337                                 return 1;
 338                         state = 1;
 339                 case 1:
 340                         if (shm_swap(i, limit))
 341                                 return 1;
 342                         state = 2;
 343                 default:
 344                         if (swap_out(i, limit))
 345                                 return 1;
 346                         state = 0;
 347                 } while(i--);
 348         }
 349         return 0;
 350 }
 351 
 352 
 353 /*
 354  * The background pageout daemon.
 355  * Started as a kernel thread from the init process.
 356  */
 357 int kswapd(void *unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 358 {
 359         int i;
 360         
 361         current->session = 1;
 362         current->pgrp = 1;
 363         sprintf(current->comm, "kswapd");
 364         current->blocked = ~0UL;
 365         
 366         /*
 367          *      As a kernel thread we want to tamper with system buffers
 368          *      and other internals and thus be subject to the SMP locking
 369          *      rules. (On a uniprocessor box this does nothing).
 370          */
 371          
 372 #ifdef __SMP__
 373         lock_kernel();
 374         syscall_count++;
 375 #endif
 376 
 377         /* Give kswapd a realtime priority. */
 378         current->policy = SCHED_FIFO;
 379         current->priority = 32;  /* Fixme --- we need to standardise our
 380                                     namings for POSIX.4 realtime scheduling
 381                                     priorities.  */
 382 
 383         printk ("Started kswapd v$Revision: 1.1.2.3 $\n");
 384         init_swap_timer();
 385         
 386         while (1) {
 387                 kswapd_awake = 0;
 388                 current->signal = 0;
 389                 interruptible_sleep_on(&kswapd_wait);
 390                 kswapd_awake = 1;
 391                 
 392                 /* Do the background pageout: */
 393                 for (i=0; i < kswapd_ctl.maxpages; i++)
 394                         try_to_free_page(GFP_KERNEL, ~0UL);
 395         }
 396 }
 397 
 398 /* 
 399  * The swap_tick function gets called on every clock tick.
 400  */
 401 
 402 void swap_tick(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 403 {
 404         if (nr_free_pages < free_pages_low ||
 405             (nr_free_pages < free_pages_high && 
 406              jiffies >= next_swap_jiffies)) {
 407                 if (!kswapd_awake && kswapd_ctl.maxpages > 0) {
 408                         wake_up(&kswapd_wait);
 409                         need_resched = 1;
 410                         kswapd_awake = 1;
 411                 }
 412                 next_swap_jiffies = jiffies + swapout_interval;
 413         }
 414         timer_active |= (1<<SWAP_TIMER);
 415 }
 416 
 417 
 418 /* 
 419  * Initialise the swap timer
 420  */
 421 
 422 void init_swap_timer(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 423 {
 424         timer_table[SWAP_TIMER].expires = 0;
 425         timer_table[SWAP_TIMER].fn = swap_tick;
 426         timer_active |= (1<<SWAP_TIMER);
 427 }

/* [previous][next][first][last][top][bottom][index][help] */