root/mm/swapfile.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. get_swap_page
  2. swap_free
  3. unuse_pte
  4. unuse_pmd
  5. unuse_pgd
  6. unuse_vma
  7. unuse_process
  8. try_to_unuse
  9. sys_swapoff
  10. sys_swapon
  11. si_swapinfo

   1 /*
   2  *  linux/mm/swapfile.c
   3  *
   4  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   5  *  Swap reorganised 29.12.95, Stephen Tweedie
   6  */
   7 
   8 #include <linux/mm.h>
   9 #include <linux/sched.h>
  10 #include <linux/head.h>
  11 #include <linux/kernel.h>
  12 #include <linux/kernel_stat.h>
  13 #include <linux/errno.h>
  14 #include <linux/string.h>
  15 #include <linux/stat.h>
  16 #include <linux/swap.h>
  17 #include <linux/fs.h>
  18 #include <linux/swapctl.h>
  19 
  20 #include <asm/dma.h>
  21 #include <asm/system.h> /* for cli()/sti() */
  22 #include <asm/segment.h> /* for memcpy_to/fromfs */
  23 #include <asm/bitops.h>
  24 #include <asm/pgtable.h>
  25 
  26 int nr_swapfiles = 0;
  27 static struct {
  28         int head;       /* head of priority-ordered swapfile list */
  29         int next;       /* swapfile to be used next */
  30 } swap_list = {-1, -1};
  31 
  32 struct swap_info_struct swap_info[MAX_SWAPFILES];
  33 
  34 unsigned long get_swap_page(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  35 {
  36         struct swap_info_struct * p;
  37         unsigned long offset, entry;
  38         int type, wrapped = 0;
  39 
  40         type = swap_list.next;
  41         if (type < 0)
  42           return 0;
  43 
  44         while (1) {
  45                 p = &swap_info[type];
  46                 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
  47                         for (offset = p->lowest_bit; offset <= p->highest_bit ; offset++) {
  48                                 if (p->swap_map[offset])
  49                                   continue;
  50                                 if (test_bit(offset, p->swap_lockmap))
  51                                   continue;
  52                                 p->swap_map[offset] = 1;
  53                                 nr_swap_pages--;
  54                                 if (offset == p->highest_bit)
  55                                   p->highest_bit--;
  56                                 p->lowest_bit = offset;
  57                                 entry = SWP_ENTRY(type,offset);
  58 
  59                                 type = swap_info[type].next;
  60                                 if (type < 0 || p->prio != swap_info[type].prio) {
  61                                     swap_list.next = swap_list.head;
  62                                 } else {
  63                                     swap_list.next = type;
  64                                 }
  65                                 return entry;
  66                         }
  67                 }
  68                 type = p->next;
  69                 if (!wrapped) {
  70                         if (type < 0 || p->prio != swap_info[type].prio) {
  71                                 type = swap_list.head;
  72                                 wrapped = 1;
  73                         }
  74                 } else if (type < 0) {
  75                         return 0;       /* out of swap space */
  76                 }
  77         }
  78 }
  79 
  80 void swap_free(unsigned long entry)
     /* [previous][next][first][last][top][bottom][index][help] */
  81 {
  82         struct swap_info_struct * p;
  83         unsigned long offset, type;
  84 
  85         if (!entry)
  86                 return;
  87         type = SWP_TYPE(entry);
  88         if (type & SHM_SWP_TYPE)
  89                 return;
  90         if (type >= nr_swapfiles) {
  91                 printk("Trying to free nonexistent swap-page\n");
  92                 return;
  93         }
  94         p = & swap_info[type];
  95         offset = SWP_OFFSET(entry);
  96         if (offset >= p->max) {
  97                 printk("swap_free: weirdness\n");
  98                 return;
  99         }
 100         if (!(p->flags & SWP_USED)) {
 101                 printk("Trying to free swap from unused swap-device\n");
 102                 return;
 103         }
 104         if (offset < p->lowest_bit)
 105                 p->lowest_bit = offset;
 106         if (offset > p->highest_bit)
 107                 p->highest_bit = offset;
 108         if (!p->swap_map[offset])
 109                 printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
 110         else
 111                 if (!--p->swap_map[offset])
 112                         nr_swap_pages++;
 113         if (p->prio > swap_info[swap_list.next].prio) {
 114             swap_list.next = swap_list.head;
 115         }
 116 }
 117 
 118 /*
 119  * Trying to stop swapping from a file is fraught with races, so
 120  * we repeat quite a bit here when we have to pause. swapoff()
 121  * isn't exactly timing-critical, so who cares (but this is /really/
 122  * inefficient, ugh).
 123  *
 124  * We return 1 after having slept, which makes the process start over
 125  * from the beginning for this process..
 126  */
 127 static inline int unuse_pte(struct vm_area_struct * vma, unsigned long address,
     /* [previous][next][first][last][top][bottom][index][help] */
 128         pte_t *dir, unsigned int type, unsigned long page)
 129 {
 130         pte_t pte = *dir;
 131 
 132         if (pte_none(pte))
 133                 return 0;
 134         if (pte_present(pte)) {
 135                 unsigned long page = pte_page(pte);
 136                 if (page >= high_memory)
 137                         return 0;
 138                 if (!in_swap_cache(page))
 139                         return 0;
 140                 if (SWP_TYPE(in_swap_cache(page)) != type)
 141                         return 0;
 142                 delete_from_swap_cache(page);
 143                 set_pte(dir, pte_mkdirty(pte));
 144                 return 0;
 145         }
 146         if (SWP_TYPE(pte_val(pte)) != type)
 147                 return 0;
 148         read_swap_page(pte_val(pte), (char *) page);
 149         if (pte_val(*dir) != pte_val(pte)) {
 150                 free_page(page);
 151                 return 1;
 152         }
 153         set_pte(dir, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
 154         ++vma->vm_mm->rss;
 155         swap_free(pte_val(pte));
 156         return 1;
 157 }
 158 
 159 static inline int unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
     /* [previous][next][first][last][top][bottom][index][help] */
 160         unsigned long address, unsigned long size, unsigned long offset,
 161         unsigned int type, unsigned long page)
 162 {
 163         pte_t * pte;
 164         unsigned long end;
 165 
 166         if (pmd_none(*dir))
 167                 return 0;
 168         if (pmd_bad(*dir)) {
 169                 printk("unuse_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
 170                 pmd_clear(dir);
 171                 return 0;
 172         }
 173         pte = pte_offset(dir, address);
 174         offset += address & PMD_MASK;
 175         address &= ~PMD_MASK;
 176         end = address + size;
 177         if (end > PMD_SIZE)
 178                 end = PMD_SIZE;
 179         do {
 180                 if (unuse_pte(vma, offset+address-vma->vm_start, pte, type, page))
 181                         return 1;
 182                 address += PAGE_SIZE;
 183                 pte++;
 184         } while (address < end);
 185         return 0;
 186 }
 187 
 188 static inline int unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
     /* [previous][next][first][last][top][bottom][index][help] */
 189         unsigned long address, unsigned long size,
 190         unsigned int type, unsigned long page)
 191 {
 192         pmd_t * pmd;
 193         unsigned long offset, end;
 194 
 195         if (pgd_none(*dir))
 196                 return 0;
 197         if (pgd_bad(*dir)) {
 198                 printk("unuse_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
 199                 pgd_clear(dir);
 200                 return 0;
 201         }
 202         pmd = pmd_offset(dir, address);
 203         offset = address & PGDIR_MASK;
 204         address &= ~PGDIR_MASK;
 205         end = address + size;
 206         if (end > PGDIR_SIZE)
 207                 end = PGDIR_SIZE;
 208         do {
 209                 if (unuse_pmd(vma, pmd, address, end - address, offset, type, page))
 210                         return 1;
 211                 address = (address + PMD_SIZE) & PMD_MASK;
 212                 pmd++;
 213         } while (address < end);
 214         return 0;
 215 }
 216 
 217 static int unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
     /* [previous][next][first][last][top][bottom][index][help] */
 218         unsigned long start, unsigned long end,
 219         unsigned int type, unsigned long page)
 220 {
 221         while (start < end) {
 222                 if (unuse_pgd(vma, pgdir, start, end - start, type, page))
 223                         return 1;
 224                 start = (start + PGDIR_SIZE) & PGDIR_MASK;
 225                 pgdir++;
 226         }
 227         return 0;
 228 }
 229 
 230 static int unuse_process(struct task_struct * p, unsigned int type, unsigned long page)
     /* [previous][next][first][last][top][bottom][index][help] */
 231 {
 232         struct vm_area_struct* vma;
 233 
 234         /*
 235          * Go through process' page directory.
 236          */
 237         if (!p->mm || pgd_inuse(p->mm->pgd))
 238                 return 0;
 239         vma = p->mm->mmap;
 240         while (vma) {
 241                 pgd_t * pgd = pgd_offset(p->mm, vma->vm_start);
 242                 if (unuse_vma(vma, pgd, vma->vm_start, vma->vm_end, type, page))
 243                         return 1;
 244                 vma = vma->vm_next;
 245         }
 246         return 0;
 247 }
 248 
 249 /*
 250  * To avoid races, we repeat for each process after having
 251  * swapped something in. That gets rid of a few pesky races,
 252  * and "swapoff" isn't exactly timing critical.
 253  */
 254 static int try_to_unuse(unsigned int type)
     /* [previous][next][first][last][top][bottom][index][help] */
 255 {
 256         int nr;
 257         unsigned long page = get_free_page(GFP_KERNEL);
 258 
 259         if (!page)
 260                 return -ENOMEM;
 261         nr = 0;
 262         while (nr < NR_TASKS) {
 263                 if (task[nr]) {
 264                         if (unuse_process(task[nr], type, page)) {
 265                                 page = get_free_page(GFP_KERNEL);
 266                                 if (!page)
 267                                         return -ENOMEM;
 268                                 continue;
 269                         }
 270                 }
 271                 nr++;
 272         }
 273         free_page(page);
 274         return 0;
 275 }
 276 
 277 asmlinkage int sys_swapoff(const char * specialfile)
     /* [previous][next][first][last][top][bottom][index][help] */
 278 {
 279         struct swap_info_struct * p;
 280         struct inode * inode;
 281         struct file filp;
 282         int i, type, prev;
 283 
 284         if (!suser())
 285                 return -EPERM;
 286         i = namei(specialfile,&inode);
 287         if (i)
 288                 return i;
 289         prev = -1;
 290         for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
 291                 p = swap_info + type;
 292                 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
 293                         if (p->swap_file) {
 294                                 if (p->swap_file == inode)
 295                                   break;
 296                         } else {
 297                                 if (S_ISBLK(inode->i_mode)
 298                                     && (p->swap_device == inode->i_rdev))
 299                                   break;
 300                         }
 301                 }
 302                 prev = type;
 303         }
 304         if (type < 0){
 305                 iput(inode);
 306                 return -EINVAL;
 307         }
 308         if (prev < 0) {
 309                 swap_list.head = p->next;
 310         } else {
 311                 swap_info[prev].next = p->next;
 312         }
 313         if (type == swap_list.next) {
 314                 /* just pick something that's safe... */
 315                 swap_list.next = swap_list.head;
 316         }
 317         p->flags = SWP_USED;
 318         i = try_to_unuse(type);
 319         if (i) {
 320                 iput(inode);
 321                 p->flags = SWP_WRITEOK;
 322                 return i;
 323         }
 324 
 325         if(p->swap_device){
 326                 memset(&filp, 0, sizeof(filp));         
 327                 filp.f_inode = inode;
 328                 filp.f_mode = 3; /* read write */
 329                 /* open it again to get fops */
 330                 if( !blkdev_open(inode, &filp) &&
 331                    filp.f_op && filp.f_op->release){
 332                         filp.f_op->release(inode,&filp);
 333                         filp.f_op->release(inode,&filp);
 334                 }
 335         }
 336         iput(inode);
 337 
 338         nr_swap_pages -= p->pages;
 339         iput(p->swap_file);
 340         p->swap_file = NULL;
 341         p->swap_device = 0;
 342         vfree(p->swap_map);
 343         p->swap_map = NULL;
 344         free_page((long) p->swap_lockmap);
 345         p->swap_lockmap = NULL;
 346         p->flags = 0;
 347         return 0;
 348 }
 349 
 350 /*
 351  * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
 352  *
 353  * The swapon system call
 354  */
 355 asmlinkage int sys_swapon(const char * specialfile, int swap_flags)
     /* [previous][next][first][last][top][bottom][index][help] */
 356 {
 357         struct swap_info_struct * p;
 358         struct inode * swap_inode;
 359         unsigned int type;
 360         int i, j, prev;
 361         int error;
 362         struct file filp;
 363         static int least_priority = 0;
 364 
 365         memset(&filp, 0, sizeof(filp));
 366         if (!suser())
 367                 return -EPERM;
 368         p = swap_info;
 369         for (type = 0 ; type < nr_swapfiles ; type++,p++)
 370                 if (!(p->flags & SWP_USED))
 371                         break;
 372         if (type >= MAX_SWAPFILES)
 373                 return -EPERM;
 374         if (type >= nr_swapfiles)
 375                 nr_swapfiles = type+1;
 376         p->flags = SWP_USED;
 377         p->swap_file = NULL;
 378         p->swap_device = 0;
 379         p->swap_map = NULL;
 380         p->swap_lockmap = NULL;
 381         p->lowest_bit = 0;
 382         p->highest_bit = 0;
 383         p->max = 1;
 384         p->next = -1;
 385         if (swap_flags & SWAP_FLAG_PREFER) {
 386                 p->prio =
 387                   (swap_flags & SWAP_FLAG_PRIO_MASK)>>SWAP_FLAG_PRIO_SHIFT;
 388         } else {
 389                 p->prio = --least_priority;
 390         }
 391         error = namei(specialfile,&swap_inode);
 392         if (error)
 393                 goto bad_swap_2;
 394         p->swap_file = swap_inode;
 395         error = -EBUSY;
 396         if (swap_inode->i_count != 1)
 397                 goto bad_swap_2;
 398         error = -EINVAL;
 399 
 400         if (S_ISBLK(swap_inode->i_mode)) {
 401                 p->swap_device = swap_inode->i_rdev;
 402 
 403                 filp.f_inode = swap_inode;
 404                 filp.f_mode = 3; /* read write */
 405                 error = blkdev_open(swap_inode, &filp);
 406                 p->swap_file = NULL;
 407                 iput(swap_inode);
 408                 if(error)
 409                         goto bad_swap_2;
 410                 error = -ENODEV;
 411                 if (!p->swap_device)
 412                         goto bad_swap;
 413                 error = -EBUSY;
 414                 for (i = 0 ; i < nr_swapfiles ; i++) {
 415                         if (i == type)
 416                                 continue;
 417                         if (p->swap_device == swap_info[i].swap_device)
 418                                 goto bad_swap;
 419                 }
 420         } else if (!S_ISREG(swap_inode->i_mode))
 421                 goto bad_swap;
 422         p->swap_lockmap = (unsigned char *) get_free_page(GFP_USER);
 423         if (!p->swap_lockmap) {
 424                 printk("Unable to start swapping: out of memory :-)\n");
 425                 error = -ENOMEM;
 426                 goto bad_swap;
 427         }
 428         read_swap_page(SWP_ENTRY(type,0), (char *) p->swap_lockmap);
 429         if (memcmp("SWAP-SPACE",p->swap_lockmap+PAGE_SIZE-10,10)) {
 430                 printk("Unable to find swap-space signature\n");
 431                 error = -EINVAL;
 432                 goto bad_swap;
 433         }
 434         memset(p->swap_lockmap+PAGE_SIZE-10,0,10);
 435         j = 0;
 436         p->lowest_bit = 0;
 437         p->highest_bit = 0;
 438         for (i = 1 ; i < 8*PAGE_SIZE ; i++) {
 439                 if (test_bit(i,p->swap_lockmap)) {
 440                         if (!p->lowest_bit)
 441                                 p->lowest_bit = i;
 442                         p->highest_bit = i;
 443                         p->max = i+1;
 444                         j++;
 445                 }
 446         }
 447         if (!j) {
 448                 printk("Empty swap-file\n");
 449                 error = -EINVAL;
 450                 goto bad_swap;
 451         }
 452         p->swap_map = (unsigned char *) vmalloc(p->max);
 453         if (!p->swap_map) {
 454                 error = -ENOMEM;
 455                 goto bad_swap;
 456         }
 457         for (i = 1 ; i < p->max ; i++) {
 458                 if (test_bit(i,p->swap_lockmap))
 459                         p->swap_map[i] = 0;
 460                 else
 461                         p->swap_map[i] = 0x80;
 462         }
 463         p->swap_map[0] = 0x80;
 464         memset(p->swap_lockmap,0,PAGE_SIZE);
 465         p->flags = SWP_WRITEOK;
 466         p->pages = j;
 467         nr_swap_pages += j;
 468         printk("Adding Swap: %dk swap-space\n",j<<(PAGE_SHIFT-10));
 469 
 470         /* insert swap space into swap_list: */
 471         prev = -1;
 472         for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
 473                 if (p->prio >= swap_info[i].prio) {
 474                         break;
 475                 }
 476                 prev = i;
 477         }
 478         p->next = i;
 479         if (prev < 0) {
 480                 swap_list.head = swap_list.next = p - swap_info;
 481         } else {
 482                 swap_info[prev].next = p - swap_info;
 483         }
 484         return 0;
 485 bad_swap:
 486         if(filp.f_op && filp.f_op->release)
 487                 filp.f_op->release(filp.f_inode,&filp);
 488 bad_swap_2:
 489         free_page((long) p->swap_lockmap);
 490         vfree(p->swap_map);
 491         iput(p->swap_file);
 492         p->swap_device = 0;
 493         p->swap_file = NULL;
 494         p->swap_map = NULL;
 495         p->swap_lockmap = NULL;
 496         p->flags = 0;
 497         return error;
 498 }
 499 
 500 void si_swapinfo(struct sysinfo *val)
     /* [previous][next][first][last][top][bottom][index][help] */
 501 {
 502         unsigned int i, j;
 503 
 504         val->freeswap = val->totalswap = 0;
 505         for (i = 0; i < nr_swapfiles; i++) {
 506                 if ((swap_info[i].flags & SWP_WRITEOK) != SWP_WRITEOK)
 507                         continue;
 508                 for (j = 0; j < swap_info[i].max; ++j)
 509                         switch (swap_info[i].swap_map[j]) {
 510                                 case 128:
 511                                         continue;
 512                                 case 0:
 513                                         ++val->freeswap;
 514                                 default:
 515                                         ++val->totalswap;
 516                         }
 517         }
 518         val->freeswap <<= PAGE_SHIFT;
 519         val->totalswap <<= PAGE_SHIFT;
 520         return;
 521 }
 522 

/* [previous][next][first][last][top][bottom][index][help] */