root/mm/swapfile.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. scan_swap_map
  2. get_swap_page
  3. swap_free
  4. unuse_pte
  5. unuse_pmd
  6. unuse_pgd
  7. unuse_vma
  8. unuse_process
  9. try_to_unuse
  10. sys_swapoff
  11. sys_swapon
  12. si_swapinfo

   1 /*
   2  *  linux/mm/swapfile.c
   3  *
   4  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   5  *  Swap reorganised 29.12.95, Stephen Tweedie
   6  */
   7 
   8 #include <linux/mm.h>
   9 #include <linux/sched.h>
  10 #include <linux/head.h>
  11 #include <linux/kernel.h>
  12 #include <linux/kernel_stat.h>
  13 #include <linux/errno.h>
  14 #include <linux/string.h>
  15 #include <linux/stat.h>
  16 #include <linux/swap.h>
  17 #include <linux/fs.h>
  18 #include <linux/swapctl.h>
  19 
  20 #include <asm/dma.h>
  21 #include <asm/system.h> /* for cli()/sti() */
  22 #include <asm/segment.h> /* for memcpy_to/fromfs */
  23 #include <asm/bitops.h>
  24 #include <asm/pgtable.h>
  25 
  26 int nr_swapfiles = 0;
  27 static struct {
  28         int head;       /* head of priority-ordered swapfile list */
  29         int next;       /* swapfile to be used next */
  30 } swap_list = {-1, -1};
  31 
  32 struct swap_info_struct swap_info[MAX_SWAPFILES];
  33 
  34 
  35 static inline int scan_swap_map(struct swap_info_struct *si)
     /* [previous][next][first][last][top][bottom][index][help] */
  36 {
  37         int offset;
  38         /* 
  39          * We try to cluster swap pages by allocating them
  40          * sequentially in swap.  Once we've allocated
  41          * SWAP_CLUSTER_MAX pages this way, however, we resort to
  42          * first-free allocation, starting a new cluster.  This
  43          * prevents us from scattering swap pages all over the entire
  44          * swap partition, so that we reduce overall disk seek times
  45          * between swap pages.  -- sct */
  46         if (si->cluster_nr) {
  47                 while (si->cluster_next <= si->highest_bit) {
  48                         offset = si->cluster_next++;
  49                         if (si->swap_map[offset])
  50                                 continue;
  51                         if (test_bit(offset, si->swap_lockmap))
  52                                 continue;
  53                         si->cluster_nr--;
  54                         goto got_page;
  55                 }
  56         }
  57         si->cluster_nr = SWAP_CLUSTER_MAX;
  58         for (offset = si->lowest_bit; offset <= si->highest_bit ; offset++) {
  59                 if (si->swap_map[offset])
  60                         continue;
  61                 if (test_bit(offset, si->swap_lockmap))
  62                         continue;
  63                 si->lowest_bit = offset;
  64 got_page:
  65                 si->swap_map[offset] = 1;
  66                 nr_swap_pages--;
  67                 if (offset == si->highest_bit)
  68                         si->highest_bit--;
  69                 si->cluster_next = offset;
  70                 return offset;
  71         }
  72         return 0;
  73 }
  74 
  75 unsigned long get_swap_page(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  76 {
  77         struct swap_info_struct * p;
  78         unsigned long offset, entry;
  79         int type, wrapped = 0;
  80 
  81         type = swap_list.next;
  82         if (type < 0)
  83           return 0;
  84 
  85         while (1) {
  86                 p = &swap_info[type];
  87                 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
  88                         offset = scan_swap_map(p);
  89                         if (offset) {
  90                                 entry = SWP_ENTRY(type,offset);
  91                                 type = swap_info[type].next;
  92                                 if (type < 0 ||
  93                                         p->prio != swap_info[type].prio) 
  94                                 {
  95                                                 swap_list.next = swap_list.head;
  96                                 }
  97                                 else
  98                                 {
  99                                         swap_list.next = type;
 100                                 }
 101                                 return entry;
 102                         }
 103                 }
 104                 type = p->next;
 105                 if (!wrapped) {
 106                         if (type < 0 || p->prio != swap_info[type].prio) {
 107                                 type = swap_list.head;
 108                                 wrapped = 1;
 109                         }
 110                 } else if (type < 0) {
 111                         return 0;       /* out of swap space */
 112                 }
 113         }
 114 }
 115 
 116 void swap_free(unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 117 {
 118         struct swap_info_struct * p;
 119         unsigned long offset, type;
 120 
 121         if (!entry)
 122                 return;
 123         type = SWP_TYPE(entry);
 124         if (type & SHM_SWP_TYPE)
 125                 return;
 126         if (type >= nr_swapfiles) {
 127                 printk("Trying to free nonexistent swap-page\n");
 128                 return;
 129         }
 130         p = & swap_info[type];
 131         offset = SWP_OFFSET(entry);
 132         if (offset >= p->max) {
 133                 printk("swap_free: weirdness\n");
 134                 return;
 135         }
 136         if (!(p->flags & SWP_USED)) {
 137                 printk("Trying to free swap from unused swap-device\n");
 138                 return;
 139         }
 140         if (offset < p->lowest_bit)
 141                 p->lowest_bit = offset;
 142         if (offset > p->highest_bit)
 143                 p->highest_bit = offset;
 144         if (!p->swap_map[offset])
 145                 printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
 146         else
 147                 if (!--p->swap_map[offset])
 148                         nr_swap_pages++;
 149         if (p->prio > swap_info[swap_list.next].prio) {
 150             swap_list.next = swap_list.head;
 151         }
 152 }
 153 
 154 /*
 155  * Trying to stop swapping from a file is fraught with races, so
 156  * we repeat quite a bit here when we have to pause. swapoff()
 157  * isn't exactly timing-critical, so who cares (but this is /really/
 158  * inefficient, ugh).
 159  *
 160  * We return 1 after having slept, which makes the process start over
 161  * from the beginning for this process..
 162  */
 163 static inline int unuse_pte(struct vm_area_struct * vma, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 164         pte_t *dir, unsigned int type, unsigned long page)
 165 {
 166         pte_t pte = *dir;
 167 
 168         if (pte_none(pte))
 169                 return 0;
 170         if (pte_present(pte)) {
 171                 unsigned long page = pte_page(pte);
 172                 if (page >= high_memory)
 173                         return 0;
 174                 if (!in_swap_cache(page))
 175                         return 0;
 176                 if (SWP_TYPE(in_swap_cache(page)) != type)
 177                         return 0;
 178                 delete_from_swap_cache(page);
 179                 set_pte(dir, pte_mkdirty(pte));
 180                 return 0;
 181         }
 182         if (SWP_TYPE(pte_val(pte)) != type)
 183                 return 0;
 184         read_swap_page(pte_val(pte), (char *) page);
 185         if (pte_val(*dir) != pte_val(pte)) {
 186                 free_page(page);
 187                 return 1;
 188         }
 189         set_pte(dir, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
 190         ++vma->vm_mm->rss;
 191         swap_free(pte_val(pte));
 192         return 1;
 193 }
 194 
 195 static inline int unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
     /* [previous][next][first][last][top][bottom][index][help] */
 196         unsigned long address, unsigned long size, unsigned long offset,
 197         unsigned int type, unsigned long page)
 198 {
 199         pte_t * pte;
 200         unsigned long end;
 201 
 202         if (pmd_none(*dir))
 203                 return 0;
 204         if (pmd_bad(*dir)) {
 205                 printk("unuse_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
 206                 pmd_clear(dir);
 207                 return 0;
 208         }
 209         pte = pte_offset(dir, address);
 210         offset += address & PMD_MASK;
 211         address &= ~PMD_MASK;
 212         end = address + size;
 213         if (end > PMD_SIZE)
 214                 end = PMD_SIZE;
 215         do {
 216                 if (unuse_pte(vma, offset+address-vma->vm_start, pte, type, page))
 217                         return 1;
 218                 address += PAGE_SIZE;
 219                 pte++;
 220         } while (address < end);
 221         return 0;
 222 }
 223 
 224 static inline int unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
     /* [previous][next][first][last][top][bottom][index][help] */
 225         unsigned long address, unsigned long size,
 226         unsigned int type, unsigned long page)
 227 {
 228         pmd_t * pmd;
 229         unsigned long offset, end;
 230 
 231         if (pgd_none(*dir))
 232                 return 0;
 233         if (pgd_bad(*dir)) {
 234                 printk("unuse_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
 235                 pgd_clear(dir);
 236                 return 0;
 237         }
 238         pmd = pmd_offset(dir, address);
 239         offset = address & PGDIR_MASK;
 240         address &= ~PGDIR_MASK;
 241         end = address + size;
 242         if (end > PGDIR_SIZE)
 243                 end = PGDIR_SIZE;
 244         do {
 245                 if (unuse_pmd(vma, pmd, address, end - address, offset, type, page))
 246                         return 1;
 247                 address = (address + PMD_SIZE) & PMD_MASK;
 248                 pmd++;
 249         } while (address < end);
 250         return 0;
 251 }
 252 
 253 static int unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
     /* [previous][next][first][last][top][bottom][index][help] */
 254         unsigned long start, unsigned long end,
 255         unsigned int type, unsigned long page)
 256 {
 257         while (start < end) {
 258                 if (unuse_pgd(vma, pgdir, start, end - start, type, page))
 259                         return 1;
 260                 start = (start + PGDIR_SIZE) & PGDIR_MASK;
 261                 pgdir++;
 262         }
 263         return 0;
 264 }
 265 
 266 static int unuse_process(struct task_struct * p, unsigned int type, unsigned long page)
     /* [previous][next][first][last][top][bottom][index][help] */
 267 {
 268         struct vm_area_struct* vma;
 269 
 270         /*
 271          * Go through process' page directory.
 272          */
 273         if (!p->mm || pgd_inuse(p->mm->pgd))
 274                 return 0;
 275         vma = p->mm->mmap;
 276         while (vma) {
 277                 pgd_t * pgd = pgd_offset(p->mm, vma->vm_start);
 278                 if (unuse_vma(vma, pgd, vma->vm_start, vma->vm_end, type, page))
 279                         return 1;
 280                 vma = vma->vm_next;
 281         }
 282         return 0;
 283 }
 284 
 285 /*
 286  * To avoid races, we repeat for each process after having
 287  * swapped something in. That gets rid of a few pesky races,
 288  * and "swapoff" isn't exactly timing critical.
 289  */
 290 static int try_to_unuse(unsigned int type)
     /* [previous][next][first][last][top][bottom][index][help] */
 291 {
 292         int nr;
 293         unsigned long page = get_free_page(GFP_KERNEL);
 294 
 295         if (!page)
 296                 return -ENOMEM;
 297         nr = 0;
 298         while (nr < NR_TASKS) {
 299                 if (task[nr]) {
 300                         if (unuse_process(task[nr], type, page)) {
 301                                 page = get_free_page(GFP_KERNEL);
 302                                 if (!page)
 303                                         return -ENOMEM;
 304                                 continue;
 305                         }
 306                 }
 307                 nr++;
 308         }
 309         free_page(page);
 310         return 0;
 311 }
 312 
 313 asmlinkage int sys_swapoff(const char * specialfile)
     /* [previous][next][first][last][top][bottom][index][help] */
 314 {
 315         struct swap_info_struct * p;
 316         struct inode * inode;
 317         struct file filp;
 318         int i, type, prev;
 319 
 320         if (!suser())
 321                 return -EPERM;
 322         i = namei(specialfile,&inode);
 323         if (i)
 324                 return i;
 325         prev = -1;
 326         for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
 327                 p = swap_info + type;
 328                 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
 329                         if (p->swap_file) {
 330                                 if (p->swap_file == inode)
 331                                   break;
 332                         } else {
 333                                 if (S_ISBLK(inode->i_mode)
 334                                     && (p->swap_device == inode->i_rdev))
 335                                   break;
 336                         }
 337                 }
 338                 prev = type;
 339         }
 340         if (type < 0){
 341                 iput(inode);
 342                 return -EINVAL;
 343         }
 344         if (prev < 0) {
 345                 swap_list.head = p->next;
 346         } else {
 347                 swap_info[prev].next = p->next;
 348         }
 349         if (type == swap_list.next) {
 350                 /* just pick something that's safe... */
 351                 swap_list.next = swap_list.head;
 352         }
 353         p->flags = SWP_USED;
 354         i = try_to_unuse(type);
 355         if (i) {
 356                 iput(inode);
 357                 p->flags = SWP_WRITEOK;
 358                 return i;
 359         }
 360 
 361         if(p->swap_device){
 362                 memset(&filp, 0, sizeof(filp));         
 363                 filp.f_inode = inode;
 364                 filp.f_mode = 3; /* read write */
 365                 /* open it again to get fops */
 366                 if( !blkdev_open(inode, &filp) &&
 367                    filp.f_op && filp.f_op->release){
 368                         filp.f_op->release(inode,&filp);
 369                         filp.f_op->release(inode,&filp);
 370                 }
 371         }
 372         iput(inode);
 373 
 374         nr_swap_pages -= p->pages;
 375         iput(p->swap_file);
 376         p->swap_file = NULL;
 377         p->swap_device = 0;
 378         vfree(p->swap_map);
 379         p->swap_map = NULL;
 380         free_page((long) p->swap_lockmap);
 381         p->swap_lockmap = NULL;
 382         p->flags = 0;
 383         return 0;
 384 }
 385 
 386 /*
 387  * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
 388  *
 389  * The swapon system call
 390  */
 391 asmlinkage int sys_swapon(const char * specialfile, int swap_flags)
     /* [previous][next][first][last][top][bottom][index][help] */
 392 {
 393         struct swap_info_struct * p;
 394         struct inode * swap_inode;
 395         unsigned int type;
 396         int i, j, prev;
 397         int error;
 398         struct file filp;
 399         static int least_priority = 0;
 400 
 401         memset(&filp, 0, sizeof(filp));
 402         if (!suser())
 403                 return -EPERM;
 404         p = swap_info;
 405         for (type = 0 ; type < nr_swapfiles ; type++,p++)
 406                 if (!(p->flags & SWP_USED))
 407                         break;
 408         if (type >= MAX_SWAPFILES)
 409                 return -EPERM;
 410         if (type >= nr_swapfiles)
 411                 nr_swapfiles = type+1;
 412         p->flags = SWP_USED;
 413         p->swap_file = NULL;
 414         p->swap_device = 0;
 415         p->swap_map = NULL;
 416         p->swap_lockmap = NULL;
 417         p->lowest_bit = 0;
 418         p->highest_bit = 0;
 419         p->cluster_nr = 0;
 420         p->max = 1;
 421         p->next = -1;
 422         if (swap_flags & SWAP_FLAG_PREFER) {
 423                 p->prio =
 424                   (swap_flags & SWAP_FLAG_PRIO_MASK)>>SWAP_FLAG_PRIO_SHIFT;
 425         } else {
 426                 p->prio = --least_priority;
 427         }
 428         error = namei(specialfile,&swap_inode);
 429         if (error)
 430                 goto bad_swap_2;
 431         p->swap_file = swap_inode;
 432         error = -EBUSY;
 433         if (swap_inode->i_count != 1)
 434                 goto bad_swap_2;
 435         error = -EINVAL;
 436 
 437         if (S_ISBLK(swap_inode->i_mode)) {
 438                 p->swap_device = swap_inode->i_rdev;
 439                 set_blocksize(p->swap_device, PAGE_SIZE);
 440                 
 441                 filp.f_inode = swap_inode;
 442                 filp.f_mode = 3; /* read write */
 443                 error = blkdev_open(swap_inode, &filp);
 444                 p->swap_file = NULL;
 445                 iput(swap_inode);
 446                 if(error)
 447                         goto bad_swap_2;
 448                 error = -ENODEV;
 449                 if (!p->swap_device)
 450                         goto bad_swap;
 451                 error = -EBUSY;
 452                 for (i = 0 ; i < nr_swapfiles ; i++) {
 453                         if (i == type)
 454                                 continue;
 455                         if (p->swap_device == swap_info[i].swap_device)
 456                                 goto bad_swap;
 457                 }
 458         } else if (!S_ISREG(swap_inode->i_mode))
 459                 goto bad_swap;
 460         p->swap_lockmap = (unsigned char *) get_free_page(GFP_USER);
 461         if (!p->swap_lockmap) {
 462                 printk("Unable to start swapping: out of memory :-)\n");
 463                 error = -ENOMEM;
 464                 goto bad_swap;
 465         }
 466         read_swap_page(SWP_ENTRY(type,0), (char *) p->swap_lockmap);
 467         if (memcmp("SWAP-SPACE",p->swap_lockmap+PAGE_SIZE-10,10)) {
 468                 printk("Unable to find swap-space signature\n");
 469                 error = -EINVAL;
 470                 goto bad_swap;
 471         }
 472         memset(p->swap_lockmap+PAGE_SIZE-10,0,10);
 473         j = 0;
 474         p->lowest_bit = 0;
 475         p->highest_bit = 0;
 476         for (i = 1 ; i < 8*PAGE_SIZE ; i++) {
 477                 if (test_bit(i,p->swap_lockmap)) {
 478                         if (!p->lowest_bit)
 479                                 p->lowest_bit = i;
 480                         p->highest_bit = i;
 481                         p->max = i+1;
 482                         j++;
 483                 }
 484         }
 485         if (!j) {
 486                 printk("Empty swap-file\n");
 487                 error = -EINVAL;
 488                 goto bad_swap;
 489         }
 490         p->swap_map = (unsigned char *) vmalloc(p->max);
 491         if (!p->swap_map) {
 492                 error = -ENOMEM;
 493                 goto bad_swap;
 494         }
 495         for (i = 1 ; i < p->max ; i++) {
 496                 if (test_bit(i,p->swap_lockmap))
 497                         p->swap_map[i] = 0;
 498                 else
 499                         p->swap_map[i] = 0x80;
 500         }
 501         p->swap_map[0] = 0x80;
 502         memset(p->swap_lockmap,0,PAGE_SIZE);
 503         p->flags = SWP_WRITEOK;
 504         p->pages = j;
 505         nr_swap_pages += j;
 506         printk("Adding Swap: %dk swap-space\n",j<<(PAGE_SHIFT-10));
 507 
 508         /* insert swap space into swap_list: */
 509         prev = -1;
 510         for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
 511                 if (p->prio >= swap_info[i].prio) {
 512                         break;
 513                 }
 514                 prev = i;
 515         }
 516         p->next = i;
 517         if (prev < 0) {
 518                 swap_list.head = swap_list.next = p - swap_info;
 519         } else {
 520                 swap_info[prev].next = p - swap_info;
 521         }
 522         return 0;
 523 bad_swap:
 524         if(filp.f_op && filp.f_op->release)
 525                 filp.f_op->release(filp.f_inode,&filp);
 526 bad_swap_2:
 527         free_page((long) p->swap_lockmap);
 528         vfree(p->swap_map);
 529         iput(p->swap_file);
 530         p->swap_device = 0;
 531         p->swap_file = NULL;
 532         p->swap_map = NULL;
 533         p->swap_lockmap = NULL;
 534         p->flags = 0;
 535         return error;
 536 }
 537 
 538 void si_swapinfo(struct sysinfo *val)
     /* [previous][next][first][last][top][bottom][index][help] */
 539 {
 540         unsigned int i, j;
 541 
 542         val->freeswap = val->totalswap = 0;
 543         for (i = 0; i < nr_swapfiles; i++) {
 544                 if ((swap_info[i].flags & SWP_WRITEOK) != SWP_WRITEOK)
 545                         continue;
 546                 for (j = 0; j < swap_info[i].max; ++j)
 547                         switch (swap_info[i].swap_map[j]) {
 548                                 case 128:
 549                                         continue;
 550                                 case 0:
 551                                         ++val->freeswap;
 552                                 default:
 553                                         ++val->totalswap;
 554                         }
 555         }
 556         val->freeswap <<= PAGE_SHIFT;
 557         val->totalswap <<= PAGE_SHIFT;
 558         return;
 559 }
 560 

/* [previous][next][first][last][top][bottom][index][help] */