root/mm/swapfile.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. scan_swap_map
  2. get_swap_page
  3. swap_free
  4. unuse_pte
  5. unuse_pmd
  6. unuse_pgd
  7. unuse_vma
  8. unuse_process
  9. try_to_unuse
  10. sys_swapoff
  11. sys_swapon
  12. si_swapinfo

   1 /*
   2  *  linux/mm/swapfile.c
   3  *
   4  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   5  *  Swap reorganised 29.12.95, Stephen Tweedie
   6  */
   7 
   8 #include <linux/mm.h>
   9 #include <linux/sched.h>
  10 #include <linux/head.h>
  11 #include <linux/kernel.h>
  12 #include <linux/kernel_stat.h>
  13 #include <linux/errno.h>
  14 #include <linux/string.h>
  15 #include <linux/stat.h>
  16 #include <linux/swap.h>
  17 #include <linux/fs.h>
  18 #include <linux/swapctl.h>
  19 
  20 #include <asm/dma.h>
  21 #include <asm/system.h> /* for cli()/sti() */
  22 #include <asm/segment.h> /* for memcpy_to/fromfs */
  23 #include <asm/bitops.h>
  24 #include <asm/pgtable.h>
  25 
  26 int nr_swapfiles = 0;
  27 static struct {
  28         int head;       /* head of priority-ordered swapfile list */
  29         int next;       /* swapfile to be used next */
  30 } swap_list = {-1, -1};
  31 
  32 struct swap_info_struct swap_info[MAX_SWAPFILES];
  33 
  34 
  35 static inline int scan_swap_map(struct swap_info_struct *si)
     /* [previous][next][first][last][top][bottom][index][help] */
  36 {
  37         int offset;
  38         /* 
  39          * We try to cluster swap pages by allocating them
  40          * sequentially in swap.  Once we've allocated
  41          * SWAP_CLUSTER_MAX pages this way, however, we resort to
  42          * first-free allocation, starting a new cluster.  This
  43          * prevents us from scattering swap pages all over the entire
  44          * swap partition, so that we reduce overall disk seek times
  45          * between swap pages.  -- sct */
  46         if (si->cluster_nr) {
  47                 while (si->cluster_next <= si->highest_bit) {
  48                         offset = si->cluster_next++;
  49                         if (si->swap_map[offset])
  50                                 continue;
  51                         if (test_bit(offset, si->swap_lockmap))
  52                                 continue;
  53                         si->cluster_nr--;
  54                         goto got_page;
  55                 }
  56         }
  57         si->cluster_nr = SWAP_CLUSTER_MAX;
  58         for (offset = si->lowest_bit; offset <= si->highest_bit ; offset++) {
  59                 if (si->swap_map[offset])
  60                         continue;
  61                 if (test_bit(offset, si->swap_lockmap))
  62                         continue;
  63                 si->lowest_bit = offset;
  64 got_page:
  65                 si->swap_map[offset] = 1;
  66                 nr_swap_pages--;
  67                 if (offset == si->highest_bit)
  68                         si->highest_bit--;
  69                 si->cluster_next = offset;
  70                 return offset;
  71         }
  72         return 0;
  73 }
  74 
  75 unsigned long get_swap_page(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  76 {
  77         struct swap_info_struct * p;
  78         unsigned long offset, entry;
  79         int type, wrapped = 0;
  80 
  81         type = swap_list.next;
  82         if (type < 0)
  83           return 0;
  84 
  85         while (1) {
  86                 p = &swap_info[type];
  87                 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
  88                         offset = scan_swap_map(p);
  89                         if (offset) {
  90                                 entry = SWP_ENTRY(type,offset);
  91                                 type = swap_info[type].next;
  92                                 if (type < 0 ||
  93                                         p->prio != swap_info[type].prio) 
  94                                 {
  95                                                 swap_list.next = swap_list.head;
  96                                 }
  97                                 else
  98                                 {
  99                                         swap_list.next = type;
 100                                 }
 101                                 return entry;
 102                         }
 103                 }
 104                 type = p->next;
 105                 if (!wrapped) {
 106                         if (type < 0 || p->prio != swap_info[type].prio) {
 107                                 type = swap_list.head;
 108                                 wrapped = 1;
 109                         }
 110                 } else if (type < 0) {
 111                         return 0;       /* out of swap space */
 112                 }
 113         }
 114 }
 115 
 116 void swap_free(unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
 117 {
 118         struct swap_info_struct * p;
 119         unsigned long offset, type;
 120 
 121         if (!entry)
 122                 return;
 123         type = SWP_TYPE(entry);
 124         if (type & SHM_SWP_TYPE)
 125                 return;
 126         if (type >= nr_swapfiles) {
 127                 printk("Trying to free nonexistent swap-page\n");
 128                 return;
 129         }
 130         p = & swap_info[type];
 131         offset = SWP_OFFSET(entry);
 132         if (offset >= p->max) {
 133                 printk("swap_free: weirdness\n");
 134                 return;
 135         }
 136         if (!(p->flags & SWP_USED)) {
 137                 printk("Trying to free swap from unused swap-device\n");
 138                 return;
 139         }
 140         if (offset < p->lowest_bit)
 141                 p->lowest_bit = offset;
 142         if (offset > p->highest_bit)
 143                 p->highest_bit = offset;
 144         if (!p->swap_map[offset])
 145                 printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
 146         else
 147                 if (!--p->swap_map[offset])
 148                         nr_swap_pages++;
 149         if (p->prio > swap_info[swap_list.next].prio) {
 150             swap_list.next = swap_list.head;
 151         }
 152 }
 153 
 154 /*
 155  * Trying to stop swapping from a file is fraught with races, so
 156  * we repeat quite a bit here when we have to pause. swapoff()
 157  * isn't exactly timing-critical, so who cares (but this is /really/
 158  * inefficient, ugh).
 159  *
 160  * We return 1 after having slept, which makes the process start over
 161  * from the beginning for this process..
 162  */
 163 static inline int unuse_pte(struct vm_area_struct * vma, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 164         pte_t *dir, unsigned int type, unsigned long page)
 165 {
 166         pte_t pte = *dir;
 167 
 168         if (pte_none(pte))
 169                 return 0;
 170         if (pte_present(pte)) {
 171                 unsigned long page_nr = MAP_NR(pte_page(pte));
 172                 if (page_nr >= MAP_NR(high_memory))
 173                         return 0;
 174                 if (!in_swap_cache(page_nr))
 175                         return 0;
 176                 if (SWP_TYPE(in_swap_cache(page_nr)) != type)
 177                         return 0;
 178                 delete_from_swap_cache(page_nr);
 179                 set_pte(dir, pte_mkdirty(pte));
 180                 return 0;
 181         }
 182         if (SWP_TYPE(pte_val(pte)) != type)
 183                 return 0;
 184         read_swap_page(pte_val(pte), (char *) page);
 185         if (pte_val(*dir) != pte_val(pte)) {
 186                 free_page(page);
 187                 return 1;
 188         }
 189         set_pte(dir, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
 190         ++vma->vm_mm->rss;
 191         swap_free(pte_val(pte));
 192         return 1;
 193 }
 194 
 195 static inline int unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
     /* [previous][next][first][last][top][bottom][index][help] */
 196         unsigned long address, unsigned long size, unsigned long offset,
 197         unsigned int type, unsigned long page)
 198 {
 199         pte_t * pte;
 200         unsigned long end;
 201 
 202         if (pmd_none(*dir))
 203                 return 0;
 204         if (pmd_bad(*dir)) {
 205                 printk("unuse_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
 206                 pmd_clear(dir);
 207                 return 0;
 208         }
 209         pte = pte_offset(dir, address);
 210         offset += address & PMD_MASK;
 211         address &= ~PMD_MASK;
 212         end = address + size;
 213         if (end > PMD_SIZE)
 214                 end = PMD_SIZE;
 215         do {
 216                 if (unuse_pte(vma, offset+address-vma->vm_start, pte, type, page))
 217                         return 1;
 218                 address += PAGE_SIZE;
 219                 pte++;
 220         } while (address < end);
 221         return 0;
 222 }
 223 
 224 static inline int unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
     /* [previous][next][first][last][top][bottom][index][help] */
 225         unsigned long address, unsigned long size,
 226         unsigned int type, unsigned long page)
 227 {
 228         pmd_t * pmd;
 229         unsigned long offset, end;
 230 
 231         if (pgd_none(*dir))
 232                 return 0;
 233         if (pgd_bad(*dir)) {
 234                 printk("unuse_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
 235                 pgd_clear(dir);
 236                 return 0;
 237         }
 238         pmd = pmd_offset(dir, address);
 239         offset = address & PGDIR_MASK;
 240         address &= ~PGDIR_MASK;
 241         end = address + size;
 242         if (end > PGDIR_SIZE)
 243                 end = PGDIR_SIZE;
 244         do {
 245                 if (unuse_pmd(vma, pmd, address, end - address, offset, type, page))
 246                         return 1;
 247                 address = (address + PMD_SIZE) & PMD_MASK;
 248                 pmd++;
 249         } while (address < end);
 250         return 0;
 251 }
 252 
 253 static int unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
     /* [previous][next][first][last][top][bottom][index][help] */
 254         unsigned long start, unsigned long end,
 255         unsigned int type, unsigned long page)
 256 {
 257         while (start < end) {
 258                 if (unuse_pgd(vma, pgdir, start, end - start, type, page))
 259                         return 1;
 260                 start = (start + PGDIR_SIZE) & PGDIR_MASK;
 261                 pgdir++;
 262         }
 263         return 0;
 264 }
 265 
 266 static int unuse_process(struct mm_struct * mm, unsigned int type, unsigned long page)
     /* [previous][next][first][last][top][bottom][index][help] */
 267 {
 268         struct vm_area_struct* vma;
 269 
 270         /*
 271          * Go through process' page directory.
 272          */
 273         if (!mm || mm == &init_mm)
 274                 return 0;
 275         vma = mm->mmap;
 276         while (vma) {
 277                 pgd_t * pgd = pgd_offset(mm, vma->vm_start);
 278                 if (unuse_vma(vma, pgd, vma->vm_start, vma->vm_end, type, page))
 279                         return 1;
 280                 vma = vma->vm_next;
 281         }
 282         return 0;
 283 }
 284 
 285 /*
 286  * To avoid races, we repeat for each process after having
 287  * swapped something in. That gets rid of a few pesky races,
 288  * and "swapoff" isn't exactly timing critical.
 289  */
 290 static int try_to_unuse(unsigned int type)
     /* [previous][next][first][last][top][bottom][index][help] */
 291 {
 292         int nr;
 293         unsigned long page = get_free_page(GFP_KERNEL);
 294 
 295         if (!page)
 296                 return -ENOMEM;
 297         nr = 0;
 298         while (nr < NR_TASKS) {
 299                 struct task_struct * p = task[nr];
 300                 if (p) {
 301                         if (unuse_process(p->mm, type, page)) {
 302                                 page = get_free_page(GFP_KERNEL);
 303                                 if (!page)
 304                                         return -ENOMEM;
 305                                 continue;
 306                         }
 307                 }
 308                 nr++;
 309         }
 310         free_page(page);
 311         return 0;
 312 }
 313 
 314 asmlinkage int sys_swapoff(const char * specialfile)
     /* [previous][next][first][last][top][bottom][index][help] */
 315 {
 316         struct swap_info_struct * p;
 317         struct inode * inode;
 318         struct file filp;
 319         int i, type, prev;
 320 
 321         if (!suser())
 322                 return -EPERM;
 323         i = namei(specialfile,&inode);
 324         if (i)
 325                 return i;
 326         prev = -1;
 327         for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
 328                 p = swap_info + type;
 329                 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
 330                         if (p->swap_file) {
 331                                 if (p->swap_file == inode)
 332                                   break;
 333                         } else {
 334                                 if (S_ISBLK(inode->i_mode)
 335                                     && (p->swap_device == inode->i_rdev))
 336                                   break;
 337                         }
 338                 }
 339                 prev = type;
 340         }
 341         if (type < 0){
 342                 iput(inode);
 343                 return -EINVAL;
 344         }
 345         if (prev < 0) {
 346                 swap_list.head = p->next;
 347         } else {
 348                 swap_info[prev].next = p->next;
 349         }
 350         if (type == swap_list.next) {
 351                 /* just pick something that's safe... */
 352                 swap_list.next = swap_list.head;
 353         }
 354         p->flags = SWP_USED;
 355         i = try_to_unuse(type);
 356         if (i) {
 357                 iput(inode);
 358                 p->flags = SWP_WRITEOK;
 359                 return i;
 360         }
 361 
 362         if(p->swap_device){
 363                 memset(&filp, 0, sizeof(filp));         
 364                 filp.f_inode = inode;
 365                 filp.f_mode = 3; /* read write */
 366                 /* open it again to get fops */
 367                 if( !blkdev_open(inode, &filp) &&
 368                    filp.f_op && filp.f_op->release){
 369                         filp.f_op->release(inode,&filp);
 370                         filp.f_op->release(inode,&filp);
 371                 }
 372         }
 373         iput(inode);
 374 
 375         nr_swap_pages -= p->pages;
 376         iput(p->swap_file);
 377         p->swap_file = NULL;
 378         p->swap_device = 0;
 379         vfree(p->swap_map);
 380         p->swap_map = NULL;
 381         free_page((long) p->swap_lockmap);
 382         p->swap_lockmap = NULL;
 383         p->flags = 0;
 384         return 0;
 385 }
 386 
 387 /*
 388  * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
 389  *
 390  * The swapon system call
 391  */
 392 asmlinkage int sys_swapon(const char * specialfile, int swap_flags)
     /* [previous][next][first][last][top][bottom][index][help] */
 393 {
 394         struct swap_info_struct * p;
 395         struct inode * swap_inode;
 396         unsigned int type;
 397         int i, j, prev;
 398         int error;
 399         struct file filp;
 400         static int least_priority = 0;
 401 
 402         memset(&filp, 0, sizeof(filp));
 403         if (!suser())
 404                 return -EPERM;
 405         p = swap_info;
 406         for (type = 0 ; type < nr_swapfiles ; type++,p++)
 407                 if (!(p->flags & SWP_USED))
 408                         break;
 409         if (type >= MAX_SWAPFILES)
 410                 return -EPERM;
 411         if (type >= nr_swapfiles)
 412                 nr_swapfiles = type+1;
 413         p->flags = SWP_USED;
 414         p->swap_file = NULL;
 415         p->swap_device = 0;
 416         p->swap_map = NULL;
 417         p->swap_lockmap = NULL;
 418         p->lowest_bit = 0;
 419         p->highest_bit = 0;
 420         p->cluster_nr = 0;
 421         p->max = 1;
 422         p->next = -1;
 423         if (swap_flags & SWAP_FLAG_PREFER) {
 424                 p->prio =
 425                   (swap_flags & SWAP_FLAG_PRIO_MASK)>>SWAP_FLAG_PRIO_SHIFT;
 426         } else {
 427                 p->prio = --least_priority;
 428         }
 429         error = namei(specialfile,&swap_inode);
 430         if (error)
 431                 goto bad_swap_2;
 432         p->swap_file = swap_inode;
 433         error = -EBUSY;
 434         if (swap_inode->i_count != 1)
 435                 goto bad_swap_2;
 436         error = -EINVAL;
 437 
 438         if (S_ISBLK(swap_inode->i_mode)) {
 439                 p->swap_device = swap_inode->i_rdev;
 440                 set_blocksize(p->swap_device, PAGE_SIZE);
 441                 
 442                 filp.f_inode = swap_inode;
 443                 filp.f_mode = 3; /* read write */
 444                 error = blkdev_open(swap_inode, &filp);
 445                 p->swap_file = NULL;
 446                 iput(swap_inode);
 447                 if(error)
 448                         goto bad_swap_2;
 449                 error = -ENODEV;
 450                 if (!p->swap_device)
 451                         goto bad_swap;
 452                 error = -EBUSY;
 453                 for (i = 0 ; i < nr_swapfiles ; i++) {
 454                         if (i == type)
 455                                 continue;
 456                         if (p->swap_device == swap_info[i].swap_device)
 457                                 goto bad_swap;
 458                 }
 459         } else if (!S_ISREG(swap_inode->i_mode))
 460                 goto bad_swap;
 461         p->swap_lockmap = (unsigned char *) get_free_page(GFP_USER);
 462         if (!p->swap_lockmap) {
 463                 printk("Unable to start swapping: out of memory :-)\n");
 464                 error = -ENOMEM;
 465                 goto bad_swap;
 466         }
 467         read_swap_page(SWP_ENTRY(type,0), (char *) p->swap_lockmap);
 468         if (memcmp("SWAP-SPACE",p->swap_lockmap+PAGE_SIZE-10,10)) {
 469                 printk("Unable to find swap-space signature\n");
 470                 error = -EINVAL;
 471                 goto bad_swap;
 472         }
 473         memset(p->swap_lockmap+PAGE_SIZE-10,0,10);
 474         j = 0;
 475         p->lowest_bit = 0;
 476         p->highest_bit = 0;
 477         for (i = 1 ; i < 8*PAGE_SIZE ; i++) {
 478                 if (test_bit(i,p->swap_lockmap)) {
 479                         if (!p->lowest_bit)
 480                                 p->lowest_bit = i;
 481                         p->highest_bit = i;
 482                         p->max = i+1;
 483                         j++;
 484                 }
 485         }
 486         if (!j) {
 487                 printk("Empty swap-file\n");
 488                 error = -EINVAL;
 489                 goto bad_swap;
 490         }
 491         p->swap_map = (unsigned char *) vmalloc(p->max);
 492         if (!p->swap_map) {
 493                 error = -ENOMEM;
 494                 goto bad_swap;
 495         }
 496         for (i = 1 ; i < p->max ; i++) {
 497                 if (test_bit(i,p->swap_lockmap))
 498                         p->swap_map[i] = 0;
 499                 else
 500                         p->swap_map[i] = 0x80;
 501         }
 502         p->swap_map[0] = 0x80;
 503         memset(p->swap_lockmap,0,PAGE_SIZE);
 504         p->flags = SWP_WRITEOK;
 505         p->pages = j;
 506         nr_swap_pages += j;
 507         printk("Adding Swap: %dk swap-space\n",j<<(PAGE_SHIFT-10));
 508 
 509         /* insert swap space into swap_list: */
 510         prev = -1;
 511         for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
 512                 if (p->prio >= swap_info[i].prio) {
 513                         break;
 514                 }
 515                 prev = i;
 516         }
 517         p->next = i;
 518         if (prev < 0) {
 519                 swap_list.head = swap_list.next = p - swap_info;
 520         } else {
 521                 swap_info[prev].next = p - swap_info;
 522         }
 523         return 0;
 524 bad_swap:
 525         if(filp.f_op && filp.f_op->release)
 526                 filp.f_op->release(filp.f_inode,&filp);
 527 bad_swap_2:
 528         free_page((long) p->swap_lockmap);
 529         vfree(p->swap_map);
 530         iput(p->swap_file);
 531         p->swap_device = 0;
 532         p->swap_file = NULL;
 533         p->swap_map = NULL;
 534         p->swap_lockmap = NULL;
 535         p->flags = 0;
 536         return error;
 537 }
 538 
 539 void si_swapinfo(struct sysinfo *val)
     /* [previous][next][first][last][top][bottom][index][help] */
 540 {
 541         unsigned int i, j;
 542 
 543         val->freeswap = val->totalswap = 0;
 544         for (i = 0; i < nr_swapfiles; i++) {
 545                 if ((swap_info[i].flags & SWP_WRITEOK) != SWP_WRITEOK)
 546                         continue;
 547                 for (j = 0; j < swap_info[i].max; ++j)
 548                         switch (swap_info[i].swap_map[j]) {
 549                                 case 128:
 550                                         continue;
 551                                 case 0:
 552                                         ++val->freeswap;
 553                                 default:
 554                                         ++val->totalswap;
 555                         }
 556         }
 557         val->freeswap <<= PAGE_SHIFT;
 558         val->totalswap <<= PAGE_SHIFT;
 559         return;
 560 }
 561 

/* [previous][next][first][last][top][bottom][index][help] */