1 /* 2 * malloc.c --- a general purpose kernel memory allocator for Linux. 3 * 4 * Written by Theodore Ts'o (tytso@mit.edu), 11/29/91 5 * 6 * This routine is written to be as fast as possible, so that it 7 * can be called from the interrupt level. 8 * 9 * Limitations: maximum size of memory we can allocate using this routine 10 * is 4k, the size of a page in Linux. 11 * 12 * The general game plan is that each page (called a bucket) will only hold 13 * objects of a given size. When all of the object on a page are released, 14 * the page can be returned to the general free pool. When kmalloc() is 15 * called, it looks for the smallest bucket size which will fulfill its 16 * request, and allocate a piece of memory from that bucket pool. 17 * 18 * Each bucket has as its control block a bucket descriptor which keeps 19 * track of how many objects are in use on that page, and the free list 20 * for that page. Like the buckets themselves, bucket descriptors are 21 * stored on pages requested from get_free_page(). However, unlike buckets, 22 * pages devoted to bucket descriptor pages are never released back to the 23 * system. Fortunately, a system should probably only need 1 or 2 bucket 24 * descriptor pages, since a page can hold 256 bucket descriptors (which 25 * corresponds to 1 megabyte worth of bucket pages.) If the kernel is using 26 * that much allocated memory, it's probably doing something wrong. :-) 27 * 28 * Note: kmalloc() and kfree() both call get_free_page() and free_page() 29 * in sections of code where interrupts are turned off, to allow 30 * kmalloc() and kfree() to be safely called from an interrupt routine. 31 * (We will probably need this functionality when networking code, 32 * particularily things like NFS, is added to Linux.) However, this 33 * presumes that get_free_page() and free_page() are interrupt-level 34 * safe, which they may not be once paging is added. If this is the 35 * case, we will need to modify kmalloc() to keep a few unused pages 36 * "pre-allocated" so that it can safely draw upon those pages if 37 * it is called from an interrupt routine. 38 * 39 * Another concern is that get_free_page() should not sleep; if it 40 * does, the code is carefully ordered so as to avoid any race 41 * conditions. The catch is that if kmalloc() is called re-entrantly, 42 * there is a chance that unecessary pages will be grabbed from the 43 * system. Except for the pages for the bucket descriptor page, the 44 * extra pages will eventually get released back to the system, though, 45 * so it isn't all that bad. 46 */ 47
48 /* I'm going to modify it to keep some free pages around. Get free page 49 can sleep, and tcp/ip needs to call kmalloc at interrupt time (Or keep 50 big buffers around for itself.) I guess I'll have return from 51 syscall fill up the free page descriptors. -RAB */ 52
53 /* since the advent of GFP_ATOMIC, I've changed the kmalloc code to 54 use it and return NULL if it can't get a page. -RAB */ 55 /* (mostly just undid the previous changes -RAB) */ 56
57 /* I've added the priority argument to kmalloc so routines can 58 sleep on memory if they want. - RAB */ 59
60 /* I've also got to make sure that kmalloc is reentrant now. */ 61
62 /* Debugging support: add file/line info, add beginning+end markers. -M.U- */ 63
64 #include <linux/kernel.h>
65 #include <linux/mm.h>
66 #include <linux/string.h>
67
68 #include <asm/system.h>
69
70 structbucket_desc{/* 16 bytes */ 71 void *page;
72 structbucket_desc *next;
73 void *freeptr;
74 unsignedshortrefcnt;
75 unsignedshortbucket_size;
76 };
77
78 struct_bucket_dir{/* 8 bytes */ 79 unsignedintsize;
80 structbucket_desc *chain;
81 };
82
83 #ifdefCONFIG_DEBUG_MALLOC 84
85 structhdr_start{ 86 constchar *file;
87 constchar *ok_file;
88 unsignedshortline;
89 unsignedshortok_line;
90 unsignedshortsize;
91 intmagic;
92 };
93 structhdr_end{ 94 intmagic;
95 };
96
97 #defineDEB_MAGIC_FREE 0x13579BDF /* free block */ 98 #defineDEB_MAGIC_ALLOC 0x2468ACE0 /* allocated block */ 99 #defineDEB_MAGIC_USED 0x147AD036 /* allocated but bad */ 100 #defineDEB_MAGIC_FREED 0x258BE169 /* free but abused */ 101
102 #defineDEB_MAGIC_END 0x369CF258 /* end marker */ 103
104 #endif 105 /* 106 * The following is the where we store a pointer to the first bucket 107 * descriptor for a given size. 108 * 109 * If it turns out that the Linux kernel allocates a lot of objects of a 110 * specific size, then we may want to add that specific size to this list, 111 * since that will allow the memory to be allocated more efficiently. 112 * However, since an entire page must be dedicated to each specific size 113 * on this list, some amount of temperance must be exercised here. 114 * 115 * Note that this list *must* be kept in order. 116 */ 117 struct_bucket_dirbucket_dir[] = { 118 #ifndefCONFIG_DEBUG_MALLOC/* Debug headers have too much overhead */ 119 { 16, (structbucket_desc *) 0},
120 #endif 121 { 32, (structbucket_desc *) 0},
122 { 64, (structbucket_desc *) 0},
123 { 128, (structbucket_desc *) 0},
124 { 256, (structbucket_desc *) 0},
125 { 512, (structbucket_desc *) 0},
126 { 1024, (structbucket_desc *) 0},
127 { 2048, (structbucket_desc *) 0},
128 { 4096, (structbucket_desc *) 0},
129 { 0, (structbucket_desc *) 0}}; /* End of list marker */ 130
131 /* 132 * This contains a linked list of free bucket descriptor blocks 133 */ 134 staticstructbucket_desc *free_bucket_desc = (structbucket_desc *) 0;
135
136 /* 137 * This routine initializes a bucket description page. 138 */ 139
140 /* It assumes it is called with interrupts on. and will 141 return that way. It also can sleep if priority != GFP_ATOMIC. */ 142
143 staticinlinevoidinit_bucket_desc(unsignedlongpage)
/* */ 144 { 145 structbucket_desc *bdesc;
146 inti;
147
148 bdesc = (structbucket_desc *) page;
149 for (i = PAGE_SIZE/sizeof(structbucket_desc); --i > 0; bdesc++ )
150 bdesc->next = bdesc+1;
151 /* 152 * This is done last, to avoid race conditions in case 153 * get_free_page() sleeps and this routine gets called again.... 154 */ 155 cli();
156 bdesc->next = free_bucket_desc;
157 free_bucket_desc = (structbucket_desc *) page;
158 } 159
160 /* 161 * Re-organized some code to give cleaner assembly output for easier 162 * verification.. LBT 163 */ 164 #ifdefCONFIG_DEBUG_MALLOC 165 void *
166 deb_kmalloc(constchar *deb_file, unsignedshortdeb_line,
/* */ 167 unsignedintlen, intpriority)
168 #else 169 void *
170 kmalloc(unsignedintlen, intpriority)
171 #endif 172 { 173 inti;
174 unsignedlongflags;
175 unsignedlongpage;
176 struct_bucket_dir *bdir;
177 structbucket_desc *bdesc;
178 void *retval;
179
180 #ifdefCONFIG_DEBUG_MALLOC 181 len += sizeof(structhdr_start)+sizeof(structhdr_end);
182 #endif 183 /* 184 * First we search the bucket_dir to find the right bucket change 185 * for this request. 186 */ 187
188 /* The sizes are static so there is no reentry problem here. */ 189 bdir = bucket_dir;
190 for (bdir = bucket_dir ; bdir->size < len ; bdir++) { 191 if (!bdir->size)
192 gototoo_large;
193 } 194
195 /* 196 * Now we search for a bucket descriptor which has free space 197 */ 198 save_flags(flags);
199 cli(); /* Avoid race conditions */ 200 for (bdesc = bdir->chain; bdesc != NULL; bdesc = bdesc->next)
201 if (bdesc->freeptr)
202 gotofound_bdesc;
203 /* 204 * If we didn't find a bucket with free space, then we'll 205 * allocate a new one. 206 */ 207
208 /* 209 * Note that init_bucket_descriptor() does its 210 * own cli() before returning, and guarantees that 211 * there is a bucket desc in the page. 212 */ 213 if (!free_bucket_desc) { 214 restore_flags(flags);
215 if(!(page=__get_free_page(priority)))
216 returnNULL;
217 init_bucket_desc(page);
218 } 219
220 bdesc = free_bucket_desc;
221 free_bucket_desc = bdesc->next;
222 restore_flags(flags);
223
224 if(!(page=__get_free_page(priority))) { 225 /* 226 * Out of memory? Put the bucket descriptor back on the free list 227 */ 228 cli();
229 bdesc->next = free_bucket_desc;
230 free_bucket_desc = bdesc;
231 restore_flags(flags);
232 returnNULL;
233 } 234
235 bdesc->refcnt = 0;
236 bdesc->bucket_size = bdir->size;
237 bdesc->page = bdesc->freeptr = (void *) page;
238
239 /* Set up the chain of free objects */ 240 for (i=PAGE_SIZE/bdir->size; i > 0 ; i--) { 241 #ifdefCONFIG_DEBUG_MALLOC 242 structhdr_start *hd;
243 structhdr_end *he;
244 hd = (structhdr_start *) page;
245 he = (structhdr_end *)(page+(bdir->size-sizeof(structhdr_end)));
246 hd->magic = DEB_MAGIC_FREE;
247 hd->file = hd->ok_file = "(expand)";
248 hd->line = hd->ok_line = 0;
249 hd->size = bdir->size-sizeof(structhdr_start)-sizeof(structhdr_end);
250 he->magic = DEB_MAGIC_END;
251
252 memset(hd+1,0xF8,hd->size);
253
254 *((void **) (hd+1)) = (i==1) ? NULL : (void *)(page + bdir->size);
255 #else 256 *((void **) page) = (i==1) ? NULL : (void *)(page + bdir->size);
257 #endif 258 page += bdir->size;
259 } 260
261 /* turn interrupts back off for putting the 262 thing onto the chain. */ 263 cli();
264 /* remember bdir is not changed. */ 265 bdesc->next = bdir->chain; /* OK, link it in! */ 266 bdir->chain = bdesc;
267
268 found_bdesc:
269 retval = (void *) bdesc->freeptr;
270 #ifdefCONFIG_DEBUG_MALLOC 271 bdesc->freeptr = *((void **) (((char *)retval)+sizeof(structhdr_start)));
272 #else 273 bdesc->freeptr = *((void **) retval);
274 #endif 275 bdesc->refcnt++;
276 restore_flags(flags); /* OK, we're safe again */ 277 #ifdefCONFIG_DEBUG_MALLOC 278 { 279 structhdr_start *hd;
280 structhdr_end *he;
281
282 hd = (structhdr_start *) retval;
283 retval = hd+1;
284 len -= sizeof(structhdr_start)+sizeof(structhdr_end);
285 if(hd->magic != DEB_MAGIC_FREE && hd->magic != DEB_MAGIC_FREED) { 286 printk("DEB_MALLOC allocating %s block 0x%x (head 0x%x) from %s:%d, magic %x\n",
287 (hd->magic == DEB_MAGIC_ALLOC) ? "nonfree" : "trashed",
288 retval,hd,deb_file,deb_line,hd->magic);
289 returnNULL;
290 } 291 if(len > hd->size || len > bdir->size-sizeof(structhdr_start)-sizeof(structhdr_end)) { 292 printk("DEB_MALLOC got %x:%x-byte block, wanted %x, from %s:%d, last %s:%d\n",
293 hd->size,bdir->size,len,hd->file,hd->line,deb_file,deb_line);
294 returnNULL;
295 } 296 { 297 unsignedchar *x = (unsignedchar *) retval;
298 unsignedshortpos = 4;
299 x += pos;
300 while(pos < hd->size) { 301 if(*x++ != 0xF8) { 302 printk("DEB_MALLOC used 0x%x:%x(%x) while free, from %s:%d\n",
303 retval,pos,hd->size,hd->file,hd->line);
304 returnNULL;
305 } 306 pos++;
307 } 308 } 309 he = (structhdr_end *)(((char *)retval)+hd->size);
310 if(he->magic != DEB_MAGIC_END) { 311 printk("DEB_MALLOC overran 0x%x:%d while free, from %s:%d\n",retval,hd->size,hd->file,hd->line);
312 } 313 memset(retval, 0xf0, len);
314 he = (structhdr_end *)(((char *)retval)+len);
315 hd->file = hd->ok_file = deb_file;
316 hd->line = hd->ok_line = deb_line;
317 hd->size = len;
318 hd->magic = DEB_MAGIC_ALLOC;
319 he->magic = DEB_MAGIC_END;
320 } 321 #endif 322 returnretval;
323
324 too_large:
325 /* This should be changed for sizes > 1 page. */ 326 printk("kmalloc called with impossibly large argument (%d)\n", len);
327 returnNULL;
328 } 329
330 #ifdefCONFIG_DEBUG_MALLOC 331 voiddeb_kcheck_s(constchar *deb_file, unsignedshortdeb_line,
/* */ 332 void *obj, intsize)
333 { 334 structhdr_start *hd;
335 structhdr_end *he;
336
337 if (!obj)
338 return;
339 hd = (structhdr_start *) obj;
340 hd--;
341
342 if(hd->magic != DEB_MAGIC_ALLOC) { 343 if(hd->magic == DEB_MAGIC_FREE) { 344 printk("DEB_MALLOC Using free block of 0x%x at %s:%d, by %s:%d, wasOK %s:%d\n",
345 obj,deb_file,deb_line,hd->file,hd->line,hd->ok_file,hd->ok_line);
346 /* For any other condition it is either superfluous or dangerous to print something. */ 347 hd->magic = DEB_MAGIC_FREED;
348 } 349 return;
350 } 351 if(hd->size != size) { 352 if(size != 0) { 353 printk("DEB_MALLOC size for 0x%x given as %d, stored %d, at %s:%d, wasOK %s:%d\n",
354 obj,size,hd->size,deb_file,deb_line,hd->ok_file,hd->ok_line);
355 } 356 size = hd->size;
357 } 358 he = (structhdr_end *)(((char *)obj)+size);
359 if(he->magic != DEB_MAGIC_END) { 360 printk("DEB_MALLOC overran block 0x%x:%d, at %s:%d, wasOK %s:%d\n",
361 obj,hd->size,deb_file,deb_line,hd->ok_file,hd->ok_line);
362 hd->magic = DEB_MAGIC_USED;
363 return;
364 } 365 hd->ok_file = deb_file;
366 hd->ok_line = deb_line;
367 } 368 #endif 369
370 /* 371 * Here is the kfree routine. If you know the size of the object that you 372 * are freeing, then kfree_s() will use that information to speed up the 373 * search for the bucket descriptor. 374 * 375 * We will #define a macro so that "kfree(x)" is becomes "kfree_s(x, 0)" 376 */ 377 #ifdefCONFIG_DEBUG_MALLOC 378 voiddeb_kfree_s(constchar *deb_file, unsignedshortdeb_line,
/* */ 379 void *obj, intsize)
380 #else 381 voidkfree_s(void *obj, intsize)
382 #endif 383 { 384 unsignedlongflags;
385 void *page;
386 struct_bucket_dir *bdir;
387 structbucket_desc *bdesc, *prev;
388
389 if (!obj)
390 return;
391 #ifdefCONFIG_DEBUG_MALLOC 392 { 393 structhdr_start *hd;
394 structhdr_end *he;
395 hd = (structhdr_start *) obj;
396 hd--;
397
398 if(hd->magic == DEB_MAGIC_FREE) { 399 printk("DEB_MALLOC dup free of 0x%x at %s:%d by %s:%d, wasOK %s:%d\n",
400 obj,deb_file,deb_line,hd->file,hd->line,hd->ok_file,hd->ok_line);
401 return;
402 } 403 if(hd->size != size) { 404 if(size != 0) { 405 if(hd->magic != DEB_MAGIC_USED)
406 printk("DEB_MALLOC size for 0x%x given as %d, stored %d, at %s:%d, wasOK %s:%d\n",
407 obj,size,hd->size,deb_file,deb_line,hd->ok_file,hd->ok_line);
408 } 409 size = hd->size;
410 } 411 he = (structhdr_end *)(((char *)obj)+size);
412 if(he->magic != DEB_MAGIC_END) { 413 if(hd->magic != DEB_MAGIC_USED)
414 printk("DEB_MALLOC overran block 0x%x:%d, at %s:%d, from %s:%d, wasOK %s:%d\n",
415 obj,hd->size,deb_file,deb_line,hd->file,hd->line,hd->ok_file,hd->ok_line);
416 } 417 size += sizeof(structhdr_start)+sizeof(structhdr_end);
418 } 419 #endif 420 save_flags(flags);
421 /* Calculate what page this object lives in */ 422 page = (void *) ((unsignedlong) obj & PAGE_MASK);
423
424 /* Now search the buckets looking for that page */ 425 for (bdir = bucket_dir; bdir->size; bdir++) { 426 prev = 0;
427 /* If size is zero then this conditional is always true */ 428 if (bdir->size >= size) { 429 /* We have to turn off interrupts here because 430 we are descending the chain. If something 431 changes it in the middle we could suddenly 432 find ourselves descending the free list. 433 I think this would only cause a memory 434 leak, but better safe than sorry. */ 435 cli(); /* To avoid race conditions */ 436 for (bdesc = bdir->chain; bdesc; bdesc = bdesc->next) { 437 if (bdesc->page == page)
438 gotofound;
439 prev = bdesc;
440 } 441 } 442 } 443
444 restore_flags(flags);
445 printk("Bad address passed to kernel kfree_s(%p, %d)\n",obj, size);
446 #ifdefCONFIG_DEBUG_MALLOC 447 printk("Offending code: %s:%d\n",deb_file,deb_line);
448 #else 449 printk("Offending eip: %08x\n",((unsignedlong *) &obj)[-1]);
450 #endif 451 return;
452
453 found:
454 /* interrupts are off here. */ 455 #ifdefCONFIG_DEBUG_MALLOC 456
457 { 458 structhdr_start *hd;
459 structhdr_end *he;
460 hd = (structhdr_start *) obj;
461 hd--;
462
463 hd->file = deb_file;
464 hd->line = deb_line;
465 hd->magic = DEB_MAGIC_FREE;
466 hd->size = bdir->size-sizeof(structhdr_start)-sizeof(structhdr_end);
467 he = (structhdr_end *)(((char *)obj)+hd->size);
468 memset(obj, 0xf8, hd->size);
469 he->magic = DEB_MAGIC_END;
470 *((void **)obj) = bdesc->freeptr;
471 obj = hd;
472 } 473 #else 474 *((void **)obj) = bdesc->freeptr;
475 #endif 476
477 bdesc->freeptr = obj;
478 bdesc->refcnt--;
479 if (bdesc->refcnt == 0) { 480 /* 481 * We need to make sure that prev is still accurate. It 482 * may not be, if someone rudely interrupted us.... 483 */ 484 if ((prev && (prev->next != bdesc)) ||
485 (!prev && (bdir->chain != bdesc)))
486 for (prev = bdir->chain; prev; prev = prev->next)
487 if (prev->next == bdesc)
488 break;
489 if (prev)
490 prev->next = bdesc->next;
491 else{ 492 if (bdir->chain != bdesc)
493 panic("kmalloc bucket chains corrupted");
494 bdir->chain = bdesc->next;
495 } 496 bdesc->next = free_bucket_desc;
497 free_bucket_desc = bdesc;
498 free_page((unsignedlong) bdesc->page);
499 } 500 restore_flags(flags);
501 return;
502 } 503
504 #ifdefCONFIG_DEBUG_MALLOC 505 intget_malloc(char *buffer)
/* */ 506 { 507 intlen = 0;
508 inti;
509 unsignedlongflags;
510 void *page;
511 struct_bucket_dir *bdir;
512 structbucket_desc *bdesc;
513
514 save_flags(flags);
515 cli(); /* To avoid race conditions */ 516 for (bdir = bucket_dir; bdir->size; bdir++) { 517 for (bdesc = bdir->chain; bdesc; bdesc = bdesc->next) { 518 page = bdesc->page;
519 for (i=PAGE_SIZE/bdir->size; i > 0 ; i--) { 520 structhdr_start *hd;
521 hd = (structhdr_start *)page;
522 if(hd->magic == DEB_MAGIC_ALLOC) { 523 if(len > PAGE_SIZE-80) { 524 restore_flags(flags);
525 len += sprintf(buffer+len,"...\n");
526 returnlen;
527 } 528 len += sprintf(buffer+len,"%08x:%03x %s:%d %s:%d\n",
529 (long)(page+sizeof(structhdr_start)),hd->size,hd->file,hd->line,hd->ok_file,hd->ok_line);
530 } 531 page += bdir->size;
532 } 533 } 534 } 535
536 restore_flags(flags);
537 returnlen;
538 } 539 #endif