root/lib/malloc.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. init_bucket_desc
  2. kmalloc
  3. kfree_s

   1 /*
   2  * malloc.c --- a general purpose kernel memory allocator for Linux.
   3  *
   4  * Written by Theodore Ts'o (tytso@mit.edu), 11/29/91
   5  *
   6  * This routine is written to be as fast as possible, so that it
   7  * can be called from the interrupt level.
   8  *
   9  * Limitations: maximum size of memory we can allocate using this routine
  10  *      is 4k, the size of a page in Linux.
  11  *
  12  * The general game plan is that each page (called a bucket) will only hold
  13  * objects of a given size.  When all of the object on a page are released,
  14  * the page can be returned to the general free pool.  When kmalloc() is
  15  * called, it looks for the smallest bucket size which will fulfill its
  16  * request, and allocate a piece of memory from that bucket pool.
  17  *
  18  * Each bucket has as its control block a bucket descriptor which keeps
  19  * track of how many objects are in use on that page, and the free list
  20  * for that page.  Like the buckets themselves, bucket descriptors are
  21  * stored on pages requested from get_free_page().  However, unlike buckets,
  22  * pages devoted to bucket descriptor pages are never released back to the
  23  * system.  Fortunately, a system should probably only need 1 or 2 bucket
  24  * descriptor pages, since a page can hold 256 bucket descriptors (which
  25  * corresponds to 1 megabyte worth of bucket pages.)  If the kernel is using
  26  * that much allocated memory, it's probably doing something wrong.  :-)
  27  *
  28  * Note: kmalloc() and kfree() both call get_free_page() and free_page()
  29  *      in sections of code where interrupts are turned off, to allow
  30  *      kmalloc() and kfree() to be safely called from an interrupt routine.
  31  *      (We will probably need this functionality when networking code,
  32  *      particularily things like NFS, is added to Linux.)  However, this
  33  *      presumes that get_free_page() and free_page() are interrupt-level
  34  *      safe, which they may not be once paging is added.  If this is the
  35  *      case, we will need to modify kmalloc() to keep a few unused pages
  36  *      "pre-allocated" so that it can safely draw upon those pages if
  37  *      it is called from an interrupt routine.
  38  *
  39  *      Another concern is that get_free_page() should not sleep; if it
  40  *      does, the code is carefully ordered so as to avoid any race
  41  *      conditions.  The catch is that if kmalloc() is called re-entrantly,
  42  *      there is a chance that unecessary pages will be grabbed from the
  43  *      system.  Except for the pages for the bucket descriptor page, the
  44  *      extra pages will eventually get released back to the system, though,
  45  *      so it isn't all that bad.
  46  */
  47 
  48 /* I'm going to modify it to keep some free pages around.  Get free page
  49    can sleep, and tcp/ip needs to call kmalloc at interrupt time  (Or keep
  50    big buffers around for itself.)  I guess I'll have return from
  51    syscall fill up the free page descriptors. -RAB */
  52 
  53 /* since the advent of GFP_ATOMIC, I've changed the kmalloc code to
  54    use it and return NULL if it can't get a page. -RAB  */
  55 /* (mostly just undid the previous changes -RAB) */
  56 
  57 /* I've added the priority argument to kmalloc so routines can
  58    sleep on memory if they want. - RAB */
  59 
  60 /* I've also got to make sure that kmalloc is reentrant now. */
  61 
  62 #include <linux/kernel.h>
  63 #include <linux/mm.h>
  64 #include <linux/string.h>
  65 
  66 #include <asm/system.h>
  67 
  68 struct bucket_desc {    /* 16 bytes */
  69         void                    *page;
  70         struct bucket_desc      *next;
  71         void                    *freeptr;
  72         unsigned short          refcnt;
  73         unsigned short          bucket_size;
  74 };
  75 
  76 struct _bucket_dir {    /* 8 bytes */
  77         int                     size;
  78         struct bucket_desc      *chain;
  79 };
  80 
  81 /*
  82  * The following is the where we store a pointer to the first bucket
  83  * descriptor for a given size.
  84  *
  85  * If it turns out that the Linux kernel allocates a lot of objects of a
  86  * specific size, then we may want to add that specific size to this list,
  87  * since that will allow the memory to be allocated more efficiently.
  88  * However, since an entire page must be dedicated to each specific size
  89  * on this list, some amount of temperance must be exercised here.
  90  *
  91  * Note that this list *must* be kept in order.
  92  */
  93 struct _bucket_dir bucket_dir[] = {
  94         { 16,   (struct bucket_desc *) 0},
  95         { 32,   (struct bucket_desc *) 0},
  96         { 64,   (struct bucket_desc *) 0},
  97         { 128,  (struct bucket_desc *) 0},
  98         { 256,  (struct bucket_desc *) 0},
  99         { 512,  (struct bucket_desc *) 0},
 100         { 1024, (struct bucket_desc *) 0},
 101         { 2048, (struct bucket_desc *) 0},
 102         { 4096, (struct bucket_desc *) 0},
 103         { 0,    (struct bucket_desc *) 0}};   /* End of list marker */
 104 
 105 /*
 106  * This contains a linked list of free bucket descriptor blocks
 107  */
 108 static struct bucket_desc *free_bucket_desc = (struct bucket_desc *) 0;
 109 
 110 /*
 111  * This routine initializes a bucket description page.
 112  */
 113 
 114 /* It assumes it is called with interrupts on. and will
 115    return that way.  It also can sleep if priority != GFP_ATOMIC. */
 116  
 117 static inline int init_bucket_desc(int priority)
     /* [previous][next][first][last][top][bottom][index][help] */
 118 {
 119         struct bucket_desc *bdesc, *first;
 120         int i;
 121         /* this turns interrupt on, so we should be carefull. */
 122         first = bdesc = (struct bucket_desc *) get_free_page(priority);
 123         if (!bdesc)
 124                 return 1;
 125 
 126         /* At this point it is possible that we have slept and 
 127            free has been called etc.  So we might not actually need
 128            this page anymore. */
 129 
 130         for (i = PAGE_SIZE/sizeof(struct bucket_desc); i > 1; i--) {
 131                 bdesc->next = bdesc+1;
 132                 bdesc++;
 133         }
 134         /*
 135          * This is done last, to avoid race conditions in case
 136          * get_free_page() sleeps and this routine gets called again....
 137          */
 138 
 139         cli();
 140         bdesc->next = free_bucket_desc;
 141         free_bucket_desc = first;
 142         sti();
 143         return (0);
 144 }
 145 
 146 void *
 147 kmalloc(unsigned int len, int priority)
     /* [previous][next][first][last][top][bottom][index][help] */
 148 {
 149         struct _bucket_dir      *bdir;
 150         struct bucket_desc      *bdesc;
 151         void                    *retval;
 152 
 153         /*
 154          * First we search the bucket_dir to find the right bucket change
 155          * for this request.
 156          */
 157 
 158         /* The sizes are static so there is no reentry problem here. */
 159         for (bdir = bucket_dir; bdir->size; bdir++)
 160                 if (bdir->size >= len)
 161                         break;
 162 
 163         if (!bdir->size) {
 164                /* This should be changed for sizes > 1 page. */
 165                 printk("kmalloc called with impossibly large argument (%d)\n", len);
 166                 return NULL;
 167         }
 168 
 169         /*
 170          * Now we search for a bucket descriptor which has free space
 171          */
 172         cli();  /* Avoid race conditions */
 173         for (bdesc = bdir->chain; bdesc != NULL; bdesc = bdesc->next)
 174                 if (bdesc->freeptr != NULL || bdesc->page == NULL)
 175                         break;
 176         /*
 177          * If we didn't find a bucket with free space, then we'll
 178          * allocate a new one.
 179          */
 180         if (!bdesc)
 181           {
 182             char *cp;
 183             int i;
 184             
 185             /* This must be a while because it is possible
 186                to get interrupted after init_bucket_desc
 187                and before cli.  The interrupt could steal
 188                our free_desc. */
 189             
 190             while (!free_bucket_desc)
 191               {
 192                 sti();  /* This might happen anyway, so we
 193                            might as well make it explicit. */
 194                 if (init_bucket_desc(priority))
 195                   {
 196                     return NULL;
 197                   }
 198                 cli(); /* Turn them back off. */
 199               }
 200             
 201             bdesc = free_bucket_desc;
 202             free_bucket_desc = bdesc->next;
 203 
 204             /* get_free_page will turn interrupts back
 205                on.  So we might as well do it
 206                ourselves. */
 207 
 208             sti();
 209             bdesc->refcnt = 0;
 210             bdesc->bucket_size = bdir->size;
 211             bdesc->page = bdesc->freeptr =
 212               (void *)cp = get_free_page(priority);
 213             
 214             if (!cp)
 215               {
 216 
 217                 /* put bdesc back on the free list. */
 218                 cli();
 219                 bdesc->next = free_bucket_desc;
 220                 free_bucket_desc = bdesc->next;
 221                 sti();
 222 
 223                 return NULL;
 224               }
 225             
 226             /* Set up the chain of free objects */
 227             for (i=PAGE_SIZE/bdir->size; i > 1; i--)
 228               {
 229                 *((char **) cp) = cp + bdir->size;
 230                 cp += bdir->size;
 231               }
 232             
 233             *((char **) cp) = 0;
 234 
 235             /* turn interrupts back off for putting the
 236                thing onto the chain. */
 237             cli();
 238             /* remember bdir is not changed. */
 239             bdesc->next = bdir->chain; /* OK, link it in! */
 240             bdir->chain = bdesc;
 241 
 242           }
 243 
 244         retval = (void *) bdesc->freeptr;
 245         bdesc->freeptr = *((void **) retval);
 246         bdesc->refcnt++;
 247         sti();  /* OK, we're safe again */
 248         return retval;
 249 }
 250 
 251 /*
 252  * Here is the kfree routine.  If you know the size of the object that you
 253  * are freeing, then kfree_s() will use that information to speed up the
 254  * search for the bucket descriptor.
 255  *
 256  * We will #define a macro so that "kfree(x)" is becomes "kfree_s(x, 0)"
 257  */
 258 void kfree_s(void *obj, int size)
     /* [previous][next][first][last][top][bottom][index][help] */
 259 {
 260         void            *page;
 261         struct _bucket_dir      *bdir;
 262         struct bucket_desc      *bdesc, *prev;
 263 
 264         /* Calculate what page this object lives in */
 265         page = (void *)  ((unsigned long) obj & 0xfffff000);
 266 
 267         /* Now search the buckets looking for that page */
 268         for (bdir = bucket_dir; bdir->size; bdir++)
 269           {
 270             prev = 0;
 271             /* If size is zero then this conditional is always true */
 272             if (bdir->size >= size)
 273               {
 274                 /* We have to turn off interrupts here because
 275                    we are descending the chain.  If something
 276                    changes it in the middle we could suddenly
 277                    find ourselves descending the free list.
 278                    I think this would only cause a memory
 279                    leak, but better safe than sorry. */
 280                 cli(); /* To avoid race conditions */
 281                 for (bdesc = bdir->chain; bdesc; bdesc = bdesc->next)
 282                   {
 283                     if (bdesc->page == page)
 284                       goto found;
 285                     prev = bdesc;
 286                   }
 287               }
 288           }
 289 
 290         printk("Bad address passed to kernel kfree_s(%X, %d)\n",obj, size);
 291         sti();
 292         return;
 293 found:
 294         /* interrupts are off here. */
 295         *((void **)obj) = bdesc->freeptr;
 296         bdesc->freeptr = obj;
 297         bdesc->refcnt--;
 298         if (bdesc->refcnt == 0) {
 299                 /*
 300                  * We need to make sure that prev is still accurate.  It
 301                  * may not be, if someone rudely interrupted us....
 302                  */
 303                 if ((prev && (prev->next != bdesc)) ||
 304                     (!prev && (bdir->chain != bdesc)))
 305                         for (prev = bdir->chain; prev; prev = prev->next)
 306                                 if (prev->next == bdesc)
 307                                         break;
 308                 if (prev)
 309                         prev->next = bdesc->next;
 310                 else {
 311                         if (bdir->chain != bdesc)
 312                                 panic("kmalloc bucket chains corrupted");
 313                         bdir->chain = bdesc->next;
 314                 }
 315                 bdesc->next = free_bucket_desc;
 316                 free_bucket_desc = bdesc;
 317                 free_page((unsigned long) bdesc->page);
 318         }
 319         sti();
 320         return;
 321 }

/* [previous][next][first][last][top][bottom][index][help] */