1 /* 2 * malloc.c --- a general purpose kernel memory allocator for Linux. 3 * 4 * Written by Theodore Ts'o (tytso@mit.edu), 11/29/91 5 * 6 * This routine is written to be as fast as possible, so that it 7 * can be called from the interrupt level. 8 * 9 * Limitations: maximum size of memory we can allocate using this routine 10 * is 4k, the size of a page in Linux. 11 * 12 * The general game plan is that each page (called a bucket) will only hold 13 * objects of a given size. When all of the object on a page are released, 14 * the page can be returned to the general free pool. When kmalloc() is 15 * called, it looks for the smallest bucket size which will fulfill its 16 * request, and allocate a piece of memory from that bucket pool. 17 * 18 * Each bucket has as its control block a bucket descriptor which keeps 19 * track of how many objects are in use on that page, and the free list 20 * for that page. Like the buckets themselves, bucket descriptors are 21 * stored on pages requested from get_free_page(). However, unlike buckets, 22 * pages devoted to bucket descriptor pages are never released back to the 23 * system. Fortunately, a system should probably only need 1 or 2 bucket 24 * descriptor pages, since a page can hold 256 bucket descriptors (which 25 * corresponds to 1 megabyte worth of bucket pages.) If the kernel is using 26 * that much allocated memory, it's probably doing something wrong. :-) 27 * 28 * Note: kmalloc() and kfree() both call get_free_page() and free_page() 29 * in sections of code where interrupts are turned off, to allow 30 * kmalloc() and kfree() to be safely called from an interrupt routine. 31 * (We will probably need this functionality when networking code, 32 * particularily things like NFS, is added to Linux.) However, this 33 * presumes that get_free_page() and free_page() are interrupt-level 34 * safe, which they may not be once paging is added. If this is the 35 * case, we will need to modify kmalloc() to keep a few unused pages 36 * "pre-allocated" so that it can safely draw upon those pages if 37 * it is called from an interrupt routine. 38 * 39 * Another concern is that get_free_page() should not sleep; if it 40 * does, the code is carefully ordered so as to avoid any race 41 * conditions. The catch is that if kmalloc() is called re-entrantly, 42 * there is a chance that unecessary pages will be grabbed from the 43 * system. Except for the pages for the bucket descriptor page, the 44 * extra pages will eventually get released back to the system, though, 45 * so it isn't all that bad. 46 */ 47
48 /* I'm going to modify it to keep some free pages around. Get free page 49 can sleep, and tcp/ip needs to call kmalloc at interrupt time (Or keep 50 big buffers around for itself.) I guess I'll have return from 51 syscall fill up the free page descriptors. -RAB */ 52
53 /* since the advent of GFP_ATOMIC, I've changed the kmalloc code to 54 use it and return NULL if it can't get a page. -RAB */ 55 /* (mostly just undid the previous changes -RAB) */ 56
57 /* I've added the priority argument to kmalloc so routines can 58 sleep on memory if they want. - RAB */ 59
60 /* I've also got to make sure that kmalloc is reentrant now. */ 61
62 /* Debugging support: add file/line info, add beginning+end markers. -M.U- */ 63
64 #include <linux/kernel.h>
65 #include <linux/mm.h>
66 #include <linux/string.h>
67 #include <linux/malloc.h>
68
69 #include <asm/system.h>
70
71 structbucket_desc{/* 16 bytes */ 72 void *page;
73 structbucket_desc *next;
74 void *freeptr;
75 unsignedshortrefcnt;
76 unsignedshortbucket_size;
77 };
78
79 struct_bucket_dir{/* 8 bytes */ 80 unsignedintsize;
81 structbucket_desc *chain;
82 };
83
84 #ifdefCONFIG_DEBUG_MALLOC 85
86 structhdr_start{ 87 constchar *file;
88 constchar *ok_file;
89 unsignedshortline;
90 unsignedshortok_line;
91 unsignedshortsize;
92 intmagic;
93 };
94 structhdr_end{ 95 intmagic;
96 };
97
98 #defineDEB_MAGIC_FREE 0x13579BDF /* free block */ 99 #defineDEB_MAGIC_ALLOC 0x2468ACE0 /* allocated block */ 100 #defineDEB_MAGIC_USED 0x147AD036 /* allocated but bad */ 101 #defineDEB_MAGIC_FREED 0x258BE169 /* free but abused */ 102
103 #defineDEB_MAGIC_END 0x369CF258 /* end marker */ 104
105 #endif 106 /* 107 * The following is the where we store a pointer to the first bucket 108 * descriptor for a given size. 109 * 110 * If it turns out that the Linux kernel allocates a lot of objects of a 111 * specific size, then we may want to add that specific size to this list, 112 * since that will allow the memory to be allocated more efficiently. 113 * However, since an entire page must be dedicated to each specific size 114 * on this list, some amount of temperance must be exercised here. 115 * 116 * Note that this list *must* be kept in order. 117 */ 118 struct_bucket_dirbucket_dir[] = { 119 #ifndefCONFIG_DEBUG_MALLOC/* Debug headers have too much overhead */ 120 { 16, (structbucket_desc *) 0},
121 #endif 122 { 32, (structbucket_desc *) 0},
123 { 64, (structbucket_desc *) 0},
124 { 128, (structbucket_desc *) 0},
125 { 256, (structbucket_desc *) 0},
126 { 512, (structbucket_desc *) 0},
127 { 1024, (structbucket_desc *) 0},
128 { 2048, (structbucket_desc *) 0},
129 { 4096, (structbucket_desc *) 0},
130 { 0, (structbucket_desc *) 0}}; /* End of list marker */ 131
132 /* 133 * This contains a linked list of free bucket descriptor blocks 134 */ 135 staticstructbucket_desc *free_bucket_desc = (structbucket_desc *) 0;
136
137 /* 138 * This routine initializes a bucket description page. 139 */ 140
141 /* It assumes it is called with interrupts on. and will 142 return that way. It also can sleep if priority != GFP_ATOMIC. */ 143
144 staticinlinevoidinit_bucket_desc(unsignedlongpage)
/* */ 145 { 146 structbucket_desc *bdesc;
147 inti;
148
149 bdesc = (structbucket_desc *) page;
150 for (i = PAGE_SIZE/sizeof(structbucket_desc); --i > 0; bdesc++ )
151 bdesc->next = bdesc+1;
152 /* 153 * This is done last, to avoid race conditions in case 154 * get_free_page() sleeps and this routine gets called again.... 155 */ 156 cli();
157 bdesc->next = free_bucket_desc;
158 free_bucket_desc = (structbucket_desc *) page;
159 } 160
161 /* 162 * Re-organized some code to give cleaner assembly output for easier 163 * verification.. LBT 164 */ 165 #ifdefCONFIG_DEBUG_MALLOC 166 void *
167 deb_kmalloc(constchar *deb_file, unsignedshortdeb_line,
/* */ 168 unsignedintlen, intpriority)
169 #else 170 void *
171 kmalloc(unsignedintlen, intpriority)
172 #endif 173 { 174 inti;
175 unsignedlongflags;
176 unsignedlongpage;
177 struct_bucket_dir *bdir;
178 structbucket_desc *bdesc;
179 void *retval;
180
181 #ifdefCONFIG_DEBUG_MALLOC 182 len += sizeof(structhdr_start)+sizeof(structhdr_end);
183 #endif 184 /* 185 * First we search the bucket_dir to find the right bucket change 186 * for this request. 187 */ 188
189 /* The sizes are static so there is no reentry problem here. */ 190 bdir = bucket_dir;
191 for (bdir = bucket_dir ; bdir->size < len ; bdir++) { 192 if (!bdir->size)
193 gototoo_large;
194 } 195
196 /* 197 * Now we search for a bucket descriptor which has free space 198 */ 199 save_flags(flags);
200 cli(); /* Avoid race conditions */ 201 for (bdesc = bdir->chain; bdesc != NULL; bdesc = bdesc->next)
202 if (bdesc->freeptr)
203 gotofound_bdesc;
204 /* 205 * If we didn't find a bucket with free space, then we'll 206 * allocate a new one. 207 */ 208
209 /* 210 * Note that init_bucket_descriptor() does its 211 * own cli() before returning, and guarantees that 212 * there is a bucket desc in the page. 213 */ 214 if (!free_bucket_desc) { 215 restore_flags(flags);
216 if(!(page=__get_free_page(priority)))
217 returnNULL;
218 init_bucket_desc(page);
219 } 220
221 bdesc = free_bucket_desc;
222 free_bucket_desc = bdesc->next;
223 restore_flags(flags);
224
225 if(!(page=__get_free_page(priority))) { 226 /* 227 * Out of memory? Put the bucket descriptor back on the free list 228 */ 229 cli();
230 bdesc->next = free_bucket_desc;
231 free_bucket_desc = bdesc;
232 restore_flags(flags);
233 returnNULL;
234 } 235
236 bdesc->refcnt = 0;
237 bdesc->bucket_size = bdir->size;
238 bdesc->page = bdesc->freeptr = (void *) page;
239
240 /* Set up the chain of free objects */ 241 for (i=PAGE_SIZE/bdir->size; i > 0 ; i--) { 242 #ifdefCONFIG_DEBUG_MALLOC 243 structhdr_start *hd;
244 structhdr_end *he;
245 hd = (structhdr_start *) page;
246 he = (structhdr_end *)(page+(bdir->size-sizeof(structhdr_end)));
247 hd->magic = DEB_MAGIC_FREE;
248 hd->file = hd->ok_file = "(expand)";
249 hd->line = hd->ok_line = 0;
250 hd->size = bdir->size-sizeof(structhdr_start)-sizeof(structhdr_end);
251 he->magic = DEB_MAGIC_END;
252
253 memset(hd+1,0xF8,hd->size);
254
255 *((void **) (hd+1)) = (i==1) ? NULL : (void *)(page + bdir->size);
256 #else 257 *((void **) page) = (i==1) ? NULL : (void *)(page + bdir->size);
258 #endif 259 page += bdir->size;
260 } 261
262 /* turn interrupts back off for putting the 263 thing onto the chain. */ 264 cli();
265 /* remember bdir is not changed. */ 266 bdesc->next = bdir->chain; /* OK, link it in! */ 267 bdir->chain = bdesc;
268
269 found_bdesc:
270 retval = (void *) bdesc->freeptr;
271 #ifdefCONFIG_DEBUG_MALLOC 272 bdesc->freeptr = *((void **) (((char *)retval)+sizeof(structhdr_start)));
273 #else 274 bdesc->freeptr = *((void **) retval);
275 #endif 276 bdesc->refcnt++;
277 restore_flags(flags); /* OK, we're safe again */ 278 #ifdefCONFIG_DEBUG_MALLOC 279 { 280 structhdr_start *hd;
281 structhdr_end *he;
282
283 hd = (structhdr_start *) retval;
284 retval = hd+1;
285 len -= sizeof(structhdr_start)+sizeof(structhdr_end);
286 if(hd->magic != DEB_MAGIC_FREE && hd->magic != DEB_MAGIC_FREED) { 287 printk("DEB_MALLOC allocating %s block 0x%x (head 0x%x) from %s:%d, magic %x\n",
288 (hd->magic == DEB_MAGIC_ALLOC) ? "nonfree" : "trashed",
289 retval,hd,deb_file,deb_line,hd->magic);
290 returnNULL;
291 } 292 if(len > hd->size || len > bdir->size-sizeof(structhdr_start)-sizeof(structhdr_end)) { 293 printk("DEB_MALLOC got %x:%x-byte block, wanted %x, from %s:%d, last %s:%d\n",
294 hd->size,bdir->size,len,hd->file,hd->line,deb_file,deb_line);
295 returnNULL;
296 } 297 { 298 unsignedchar *x = (unsignedchar *) retval;
299 unsignedshortpos = 4;
300 x += pos;
301 while(pos < hd->size) { 302 if(*x++ != 0xF8) { 303 printk("DEB_MALLOC used 0x%x:%x(%x) while free, from %s:%d\n",
304 retval,pos,hd->size,hd->file,hd->line);
305 returnNULL;
306 } 307 pos++;
308 } 309 } 310 he = (structhdr_end *)(((char *)retval)+hd->size);
311 if(he->magic != DEB_MAGIC_END) { 312 printk("DEB_MALLOC overran 0x%x:%d while free, from %s:%d\n",retval,hd->size,hd->file,hd->line);
313 } 314 memset(retval, 0xf0, len);
315 he = (structhdr_end *)(((char *)retval)+len);
316 hd->file = hd->ok_file = deb_file;
317 hd->line = hd->ok_line = deb_line;
318 hd->size = len;
319 hd->magic = DEB_MAGIC_ALLOC;
320 he->magic = DEB_MAGIC_END;
321 } 322 #endif 323 returnretval;
324
325 too_large:
326 /* This should be changed for sizes > 1 page. */ 327 printk("kmalloc called with impossibly large argument (%d)\n", len);
328 returnNULL;
329 } 330
331 #ifdefCONFIG_DEBUG_MALLOC 332 voiddeb_kcheck_s(constchar *deb_file, unsignedshortdeb_line,
/* */ 333 void *obj, intsize)
334 { 335 structhdr_start *hd;
336 structhdr_end *he;
337
338 if (!obj)
339 return;
340 hd = (structhdr_start *) obj;
341 hd--;
342
343 if(hd->magic != DEB_MAGIC_ALLOC) { 344 if(hd->magic == DEB_MAGIC_FREE) { 345 printk("DEB_MALLOC Using free block of 0x%x at %s:%d, by %s:%d, wasOK %s:%d\n",
346 obj,deb_file,deb_line,hd->file,hd->line,hd->ok_file,hd->ok_line);
347 /* For any other condition it is either superfluous or dangerous to print something. */ 348 hd->magic = DEB_MAGIC_FREED;
349 } 350 return;
351 } 352 if(hd->size != size) { 353 if(size != 0) { 354 printk("DEB_MALLOC size for 0x%x given as %d, stored %d, at %s:%d, wasOK %s:%d\n",
355 obj,size,hd->size,deb_file,deb_line,hd->ok_file,hd->ok_line);
356 } 357 size = hd->size;
358 } 359 he = (structhdr_end *)(((char *)obj)+size);
360 if(he->magic != DEB_MAGIC_END) { 361 printk("DEB_MALLOC overran block 0x%x:%d, at %s:%d, wasOK %s:%d\n",
362 obj,hd->size,deb_file,deb_line,hd->ok_file,hd->ok_line);
363 hd->magic = DEB_MAGIC_USED;
364 return;
365 } 366 hd->ok_file = deb_file;
367 hd->ok_line = deb_line;
368 } 369 #endif 370
371 /* 372 * Here is the kfree routine. If you know the size of the object that you 373 * are freeing, then kfree_s() will use that information to speed up the 374 * search for the bucket descriptor. 375 * 376 * We will #define a macro so that "kfree(x)" is becomes "kfree_s(x, 0)" 377 */ 378 #ifdefCONFIG_DEBUG_MALLOC 379 voiddeb_kfree_s(constchar *deb_file, unsignedshortdeb_line,
/* */ 380 void *obj, intsize)
381 #else 382 voidkfree_s(void *obj, intsize)
383 #endif 384 { 385 unsignedlongflags;
386 void *page;
387 struct_bucket_dir *bdir;
388 structbucket_desc *bdesc, *prev;
389
390 if (!obj)
391 return;
392 #ifdefCONFIG_DEBUG_MALLOC 393 { 394 structhdr_start *hd;
395 structhdr_end *he;
396 hd = (structhdr_start *) obj;
397 hd--;
398
399 if(hd->magic == DEB_MAGIC_FREE) { 400 printk("DEB_MALLOC dup free of 0x%x at %s:%d by %s:%d, wasOK %s:%d\n",
401 obj,deb_file,deb_line,hd->file,hd->line,hd->ok_file,hd->ok_line);
402 return;
403 } 404 if(hd->size != size) { 405 if(size != 0) { 406 if(hd->magic != DEB_MAGIC_USED)
407 printk("DEB_MALLOC size for 0x%x given as %d, stored %d, at %s:%d, wasOK %s:%d\n",
408 obj,size,hd->size,deb_file,deb_line,hd->ok_file,hd->ok_line);
409 } 410 size = hd->size;
411 } 412 he = (structhdr_end *)(((char *)obj)+size);
413 if(he->magic != DEB_MAGIC_END) { 414 if(hd->magic != DEB_MAGIC_USED)
415 printk("DEB_MALLOC overran block 0x%x:%d, at %s:%d, from %s:%d, wasOK %s:%d\n",
416 obj,hd->size,deb_file,deb_line,hd->file,hd->line,hd->ok_file,hd->ok_line);
417 } 418 size += sizeof(structhdr_start)+sizeof(structhdr_end);
419 } 420 #endif 421 save_flags(flags);
422 /* Calculate what page this object lives in */ 423 page = (void *) ((unsignedlong) obj & PAGE_MASK);
424
425 /* Now search the buckets looking for that page */ 426 for (bdir = bucket_dir; bdir->size; bdir++) { 427 prev = 0;
428 /* If size is zero then this conditional is always true */ 429 if (bdir->size >= size) { 430 /* We have to turn off interrupts here because 431 we are descending the chain. If something 432 changes it in the middle we could suddenly 433 find ourselves descending the free list. 434 I think this would only cause a memory 435 leak, but better safe than sorry. */ 436 cli(); /* To avoid race conditions */ 437 for (bdesc = bdir->chain; bdesc; bdesc = bdesc->next) { 438 if (bdesc->page == page)
439 gotofound;
440 prev = bdesc;
441 } 442 } 443 } 444
445 restore_flags(flags);
446 printk("Bad address passed to kernel kfree_s(%p, %d)\n",obj, size);
447 #ifdefCONFIG_DEBUG_MALLOC 448 printk("Offending code: %s:%d\n",deb_file,deb_line);
449 #else 450 printk("Offending eip: %08x\n",((unsignedlong *) &obj)[-1]);
451 #endif 452 return;
453
454 found:
455 /* interrupts are off here. */ 456 #ifdefCONFIG_DEBUG_MALLOC 457
458 { 459 structhdr_start *hd;
460 structhdr_end *he;
461 hd = (structhdr_start *) obj;
462 hd--;
463
464 hd->file = deb_file;
465 hd->line = deb_line;
466 hd->magic = DEB_MAGIC_FREE;
467 hd->size = bdir->size-sizeof(structhdr_start)-sizeof(structhdr_end);
468 he = (structhdr_end *)(((char *)obj)+hd->size);
469 memset(obj, 0xf8, hd->size);
470 he->magic = DEB_MAGIC_END;
471 *((void **)obj) = bdesc->freeptr;
472 obj = hd;
473 } 474 #else 475 *((void **)obj) = bdesc->freeptr;
476 #endif 477
478 bdesc->freeptr = obj;
479 bdesc->refcnt--;
480 if (bdesc->refcnt == 0) { 481 /* 482 * We need to make sure that prev is still accurate. It 483 * may not be, if someone rudely interrupted us.... 484 */ 485 if ((prev && (prev->next != bdesc)) ||
486 (!prev && (bdir->chain != bdesc)))
487 for (prev = bdir->chain; prev; prev = prev->next)
488 if (prev->next == bdesc)
489 break;
490 if (prev)
491 prev->next = bdesc->next;
492 else{ 493 if (bdir->chain != bdesc)
494 panic("kmalloc bucket chains corrupted");
495 bdir->chain = bdesc->next;
496 } 497 bdesc->next = free_bucket_desc;
498 free_bucket_desc = bdesc;
499 free_page((unsignedlong) bdesc->page);
500 } 501 restore_flags(flags);
502 return;
503 } 504
505 #ifdefCONFIG_DEBUG_MALLOC 506 intget_malloc(char *buffer)
/* */ 507 { 508 intlen = 0;
509 inti;
510 unsignedlongflags;
511 void *page;
512 struct_bucket_dir *bdir;
513 structbucket_desc *bdesc;
514
515 save_flags(flags);
516 cli(); /* To avoid race conditions */ 517 for (bdir = bucket_dir; bdir->size; bdir++) { 518 for (bdesc = bdir->chain; bdesc; bdesc = bdesc->next) { 519 page = bdesc->page;
520 for (i=PAGE_SIZE/bdir->size; i > 0 ; i--) { 521 structhdr_start *hd;
522 hd = (structhdr_start *)page;
523 if(hd->magic == DEB_MAGIC_ALLOC) { 524 if(len > PAGE_SIZE-80) { 525 restore_flags(flags);
526 len += sprintf(buffer+len,"...\n");
527 returnlen;
528 } 529 len += sprintf(buffer+len,"%08x:%03x %s:%d %s:%d\n",
530 (long)(page+sizeof(structhdr_start)),hd->size,hd->file,hd->line,hd->ok_file,hd->ok_line);
531 } 532 page += bdir->size;
533 } 534 } 535 } 536
537 restore_flags(flags);
538 returnlen;
539 } 540 #endif