1 /* 2 * malloc.c --- a general purpose kernel memory allocator for Linux. 3 * 4 * Written by Theodore Ts'o (tytso@mit.edu), 11/29/91 5 * 6 * This routine is written to be as fast as possible, so that it 7 * can be called from the interrupt level. 8 * 9 * Limitations: maximum size of memory we can allocate using this routine 10 * is 4k, the size of a page in Linux. 11 * 12 * The general game plan is that each page (called a bucket) will only hold 13 * objects of a given size. When all of the object on a page are released, 14 * the page can be returned to the general free pool. When malloc() is 15 * called, it looks for the smallest bucket size which will fulfill its 16 * request, and allocate a piece of memory from that bucket pool. 17 * 18 * Each bucket has as its control block a bucket descriptor which keeps 19 * track of how many objects are in use on that page, and the free list 20 * for that page. Like the buckets themselves, bucket descriptors are 21 * stored on pages requested from get_free_page(). However, unlike buckets, 22 * pages devoted to bucket descriptor pages are never released back to the 23 * system. Fortunately, a system should probably only need 1 or 2 bucket 24 * descriptor pages, since a page can hold 256 bucket descriptors (which 25 * corresponds to 1 megabyte worth of bucket pages.) If the kernel is using 26 * that much allocated memory, it's probably doing something wrong. :-) 27 * 28 * Note: malloc() and free() both call get_free_page() and free_page() 29 * in sections of code where interrupts are turned off, to allow 30 * malloc() and free() to be safely called from an interrupt routine. 31 * (We will probably need this functionality when networking code, 32 * particularily things like NFS, is added to Linux.) However, this 33 * presumes that get_free_page() and free_page() are interrupt-level 34 * safe, which they may not be once paging is added. If this is the 35 * case, we will need to modify malloc() to keep a few unused pages 36 * "pre-allocated" so that it can safely draw upon those pages if 37 * it is called from an interrupt routine. 38 * 39 * Another concern is that get_free_page() should not sleep; if it 40 * does, the code is carefully ordered so as to avoid any race 41 * conditions. The catch is that if malloc() is called re-entrantly, 42 * there is a chance that unecessary pages will be grabbed from the 43 * system. Except for the pages for the bucket descriptor page, the 44 * extra pages will eventually get released back to the system, though, 45 * so it isn't all that bad. 46 */ 47 48 /* I'm going to modify it to keep some free pages around. Get free page 49 can sleep, and tcp/ip needs to call malloc at interrupt time (Or keep 50 big buffers around for itself.) I guess I'll have return from 51 syscall fill up the free page descriptors. -RAB */ 52 53 /* since the advent of GFP_ATOMIC, I've changed the malloc code to 54 use it and return NULL if it can't get a page. -RAB */ 55 56 #include <linux/kernel.h> 57 #include <linux/mm.h> 58 #include <linux/string.h> 59 60 #include <asm/system.h> 61 62 struct bucket_desc { /* 16 bytes */ 63 void *page; 64 struct bucket_desc *next; 65 void *freeptr; 66 unsigned short refcnt; 67 unsigned short bucket_size; 68 }; 69 70 struct _bucket_dir { /* 8 bytes */ 71 int size; 72 struct bucket_desc *chain; 73 }; 74 75 /* 76 * The following is the where we store a pointer to the first bucket 77 * descriptor for a given size. 78 * 79 * If it turns out that the Linux kernel allocates a lot of objects of a 80 * specific size, then we may want to add that specific size to this list, 81 * since that will allow the memory to be allocated more efficiently. 82 * However, since an entire page must be dedicated to each specific size 83 * on this list, some amount of temperance must be exercised here. 84 * 85 * Note that this list *must* be kept in order. 86 */ 87 struct _bucket_dir bucket_dir[] = { 88 { 16, (struct bucket_desc *) 0}, 89 { 32, (struct bucket_desc *) 0}, 90 { 64, (struct bucket_desc *) 0}, 91 { 128, (struct bucket_desc *) 0}, 92 { 256, (struct bucket_desc *) 0}, 93 { 512, (struct bucket_desc *) 0}, 94 { 1024, (struct bucket_desc *) 0}, 95 { 2048, (struct bucket_desc *) 0}, 96 { 4096, (struct bucket_desc *) 0}, 97 { 0, (struct bucket_desc *) 0}}; /* End of list marker */ 98 99 /* 100 * This contains a linked list of free bucket descriptor blocks 101 */ 102 static struct bucket_desc *free_bucket_desc = (struct bucket_desc *) 0; 103 104 /* 105 * This routine initializes a bucket description page. 106 */ 107 static inline int init_bucket_desc() /* */ 108 { 109 struct bucket_desc *bdesc, *first; 110 int i; 111 /* this turns interrupt on, so we should be carefull. */ 112 first = bdesc = (struct bucket_desc *) get_free_page(GFP_ATOMIC); 113 if (!bdesc) 114 return 1; 115 for (i = PAGE_SIZE/sizeof(struct bucket_desc); i > 1; i--) { 116 bdesc->next = bdesc+1; 117 bdesc++; 118 } 119 /* 120 * This is done last, to avoid race conditions in case 121 * get_free_page() sleeps and this routine gets called again.... 122 */ 123 /* Get free page will not sleep because of the GFP_ATOMIC */ 124 bdesc->next = free_bucket_desc; 125 free_bucket_desc = first; 126 return (0); 127 } 128 129 void *malloc(unsigned int len) /* */ 130 { 131 struct _bucket_dir *bdir; 132 struct bucket_desc *bdesc; 133 void *retval; 134 135 /* 136 * First we search the bucket_dir to find the right bucket change 137 * for this request. 138 */ 139 for (bdir = bucket_dir; bdir->size; bdir++) 140 if (bdir->size >= len) 141 break; 142 143 if (!bdir->size) { 144 printk("malloc called with impossibly large argument (%d)\n", len); 145 return NULL; 146 } 147 len = bdir->size; 148 /* 149 * Now we search for a bucket descriptor which has free space 150 */ 151 cli(); /* Avoid race conditions */ 152 for (bdesc = bdir->chain; bdesc; bdesc = bdesc->next) 153 if (bdesc->freeptr) 154 break; 155 /* 156 * If we didn't find a bucket with free space, then we'll 157 * allocate a new one. 158 */ 159 if (!bdesc) { 160 char *cp; 161 int i; 162 163 if (!free_bucket_desc) 164 if (init_bucket_desc()) { 165 sti(); 166 return NULL; 167 } 168 bdesc = free_bucket_desc; 169 free_bucket_desc = bdesc->next; 170 bdesc->refcnt = 0; 171 bdesc->bucket_size = len; 172 bdesc->page = bdesc->freeptr = 173 (void *) cp = get_free_page(GFP_ATOMIC); 174 if (!cp) { 175 sti(); 176 return NULL; 177 } 178 /* Set up the chain of free objects */ 179 for (i=PAGE_SIZE/len; i > 1; i--) { 180 *((char **) cp) = cp + len; 181 cp += len; 182 } 183 *((char **) cp) = 0; 184 bdesc->next = bdir->chain; /* OK, link it in! */ 185 bdir->chain = bdesc; 186 } 187 retval = (void *) bdesc->freeptr; 188 bdesc->freeptr = *((void **) retval); 189 bdesc->refcnt++; 190 sti(); /* OK, we're safe again */ 191 memset(retval, 0, len); 192 return retval; 193 } 194 195 /* 196 * Here is the free routine. If you know the size of the object that you 197 * are freeing, then free_s() will use that information to speed up the 198 * search for the bucket descriptor. 199 * 200 * We will #define a macro so that "free(x)" is becomes "free_s(x, 0)" 201 */ 202 void free_s(void *obj, int size) /* */ 203 { 204 void *page; 205 struct _bucket_dir *bdir; 206 struct bucket_desc *bdesc, *prev; 207 208 /* Calculate what page this object lives in */ 209 page = (void *) ((unsigned long) obj & 0xfffff000); 210 /* Now search the buckets looking for that page */ 211 for (bdir = bucket_dir; bdir->size; bdir++) { 212 prev = 0; 213 /* If size is zero then this conditional is always false */ 214 if (bdir->size < size) 215 continue; 216 for (bdesc = bdir->chain; bdesc; bdesc = bdesc->next) { 217 if (bdesc->page == page) 218 goto found; 219 prev = bdesc; 220 } 221 } 222 printk("Bad address passed to kernel free_s()"); 223 return; 224 found: 225 cli(); /* To avoid race conditions */ 226 *((void **)obj) = bdesc->freeptr; 227 bdesc->freeptr = obj; 228 bdesc->refcnt--; 229 if (bdesc->refcnt == 0) { 230 /* 231 * We need to make sure that prev is still accurate. It 232 * may not be, if someone rudely interrupted us.... 233 */ 234 if ((prev && (prev->next != bdesc)) || 235 (!prev && (bdir->chain != bdesc))) 236 for (prev = bdir->chain; prev; prev = prev->next) 237 if (prev->next == bdesc) 238 break; 239 if (prev) 240 prev->next = bdesc->next; 241 else { 242 if (bdir->chain != bdesc) 243 panic("malloc bucket chains corrupted"); 244 bdir->chain = bdesc->next; 245 } 246 free_page((unsigned long) bdesc->page); 247 bdesc->next = free_bucket_desc; 248 free_bucket_desc = bdesc; 249 } 250 sti(); 251 return; 252 }