1 /*
2 * malloc.c --- a general purpose kernel memory allocator for Linux.
3 *
4 * Written by Theodore Ts'o (tytso@mit.edu), 11/29/91
5 *
6 * This routine is written to be as fast as possible, so that it
7 * can be called from the interrupt level.
8 *
9 * Limitations: maximum size of memory we can allocate using this routine
10 * is 4k, the size of a page in Linux.
11 *
12 * The general game plan is that each page (called a bucket) will only hold
13 * objects of a given size. When all of the object on a page are released,
14 * the page can be returned to the general free pool. When kmalloc() is
15 * called, it looks for the smallest bucket size which will fulfill its
16 * request, and allocate a piece of memory from that bucket pool.
17 *
18 * Each bucket has as its control block a bucket descriptor which keeps
19 * track of how many objects are in use on that page, and the free list
20 * for that page. Like the buckets themselves, bucket descriptors are
21 * stored on pages requested from get_free_page(). However, unlike buckets,
22 * pages devoted to bucket descriptor pages are never released back to the
23 * system. Fortunately, a system should probably only need 1 or 2 bucket
24 * descriptor pages, since a page can hold 256 bucket descriptors (which
25 * corresponds to 1 megabyte worth of bucket pages.) If the kernel is using
26 * that much allocated memory, it's probably doing something wrong. :-)
27 *
28 * Note: kmalloc() and kfree() both call get_free_page() and free_page()
29 * in sections of code where interrupts are turned off, to allow
30 * kmalloc() and kfree() to be safely called from an interrupt routine.
31 * (We will probably need this functionality when networking code,
32 * particularily things like NFS, is added to Linux.) However, this
33 * presumes that get_free_page() and free_page() are interrupt-level
34 * safe, which they may not be once paging is added. If this is the
35 * case, we will need to modify kmalloc() to keep a few unused pages
36 * "pre-allocated" so that it can safely draw upon those pages if
37 * it is called from an interrupt routine.
38 *
39 * Another concern is that get_free_page() should not sleep; if it
40 * does, the code is carefully ordered so as to avoid any race
41 * conditions. The catch is that if kmalloc() is called re-entrantly,
42 * there is a chance that unecessary pages will be grabbed from the
43 * system. Except for the pages for the bucket descriptor page, the
44 * extra pages will eventually get released back to the system, though,
45 * so it isn't all that bad.
46 */
47
48 /* I'm going to modify it to keep some free pages around. Get free page
49 can sleep, and tcp/ip needs to call kmalloc at interrupt time (Or keep
50 big buffers around for itself.) I guess I'll have return from
51 syscall fill up the free page descriptors. -RAB */
52
53 /* since the advent of GFP_ATOMIC, I've changed the kmalloc code to
54 use it and return NULL if it can't get a page. -RAB */
55 /* (mostly just undid the previous changes -RAB) */
56
57 /* I've added the priority argument to kmalloc so routines can
58 sleep on memory if they want. - RAB */
59
60 /* I've also got to make sure that kmalloc is reentrant now. */
61
62 /* Debugging support: add file/line info, add beginning+end markers. -M.U- */
63
64 #include <linux/kernel.h>
65 #include <linux/mm.h>
66 #include <linux/string.h>
67 #include <linux/malloc.h>
68
69 #include <asm/system.h>
70
71 struct bucket_desc { /* 16 bytes */
72 void *page;
73 struct bucket_desc *next;
74 void *freeptr;
75 unsigned short refcnt;
76 unsigned short bucket_size;
77 };
78
79 struct _bucket_dir { /* 8 bytes */
80 unsigned int size;
81 struct bucket_desc *chain;
82 };
83
84 #ifdef CONFIG_DEBUG_MALLOC
85
86 struct hdr_start {
87 const char *file;
88 const char *ok_file;
89 unsigned short line;
90 unsigned short ok_line;
91 unsigned short size;
92 int magic;
93 };
94 struct hdr_end {
95 int magic;
96 };
97
98 #define DEB_MAGIC_FREE 0x13579BDF /* free block */
99 #define DEB_MAGIC_ALLOC 0x2468ACE0 /* allocated block */
100 #define DEB_MAGIC_USED 0x147AD036 /* allocated but bad */
101 #define DEB_MAGIC_FREED 0x258BE169 /* free but abused */
102
103 #define DEB_MAGIC_END 0x369CF258 /* end marker */
104
105 #endif
106 /*
107 * The following is the where we store a pointer to the first bucket
108 * descriptor for a given size.
109 *
110 * If it turns out that the Linux kernel allocates a lot of objects of a
111 * specific size, then we may want to add that specific size to this list,
112 * since that will allow the memory to be allocated more efficiently.
113 * However, since an entire page must be dedicated to each specific size
114 * on this list, some amount of temperance must be exercised here.
115 *
116 * Note that this list *must* be kept in order.
117 */
118 struct _bucket_dir bucket_dir[] = {
119 #ifndef CONFIG_DEBUG_MALLOC /* Debug headers have too much overhead */
120 { 16, (struct bucket_desc *) 0},
121 #endif
122 { 32, (struct bucket_desc *) 0},
123 { 64, (struct bucket_desc *) 0},
124 { 128, (struct bucket_desc *) 0},
125 { 256, (struct bucket_desc *) 0},
126 { 512, (struct bucket_desc *) 0},
127 { 1024, (struct bucket_desc *) 0},
128 { 2048, (struct bucket_desc *) 0},
129 { 4096, (struct bucket_desc *) 0},
130 { 0, (struct bucket_desc *) 0}}; /* End of list marker */
131
132 /*
133 * This contains a linked list of free bucket descriptor blocks
134 */
135 static struct bucket_desc *free_bucket_desc = (struct bucket_desc *) 0;
136
137 /*
138 * This routine initializes a bucket description page.
139 */
140
141 /* It assumes it is called with interrupts on. and will
142 return that way. It also can sleep if priority != GFP_ATOMIC. */
143
144 static inline void init_bucket_desc(unsigned long page)
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
145 {
146 struct bucket_desc *bdesc;
147 int i;
148
149 bdesc = (struct bucket_desc *) page;
150 for (i = PAGE_SIZE/sizeof(struct bucket_desc); --i > 0; bdesc++ )
151 bdesc->next = bdesc+1;
152 /*
153 * This is done last, to avoid race conditions in case
154 * get_free_page() sleeps and this routine gets called again....
155 */
156 cli();
157 bdesc->next = free_bucket_desc;
158 free_bucket_desc = (struct bucket_desc *) page;
159 }
160
161 /*
162 * Re-organized some code to give cleaner assembly output for easier
163 * verification.. LBT
164 */
165 #ifdef CONFIG_DEBUG_MALLOC
166 void *
167 deb_kmalloc(const char *deb_file, unsigned short deb_line,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
168 unsigned int len, int priority)
169 #else
170 void *
171 kmalloc(unsigned int len, int priority)
172 #endif
173 {
174 int i;
175 unsigned long flags;
176 unsigned long page;
177 struct _bucket_dir *bdir;
178 struct bucket_desc *bdesc;
179 void *retval;
180
181 #ifdef CONFIG_DEBUG_MALLOC
182 len += sizeof(struct hdr_start)+sizeof(struct hdr_end);
183 #endif
184 /*
185 * First we search the bucket_dir to find the right bucket change
186 * for this request.
187 */
188
189 /* The sizes are static so there is no reentry problem here. */
190 bdir = bucket_dir;
191 for (bdir = bucket_dir ; bdir->size < len ; bdir++) {
192 if (!bdir->size)
193 goto too_large;
194 }
195
196 /*
197 * Now we search for a bucket descriptor which has free space
198 */
199 save_flags(flags);
200 cli(); /* Avoid race conditions */
201 for (bdesc = bdir->chain; bdesc != NULL; bdesc = bdesc->next)
202 if (bdesc->freeptr)
203 goto found_bdesc;
204 /*
205 * If we didn't find a bucket with free space, then we'll
206 * allocate a new one.
207 */
208
209 /*
210 * Note that init_bucket_descriptor() does its
211 * own cli() before returning, and guarantees that
212 * there is a bucket desc in the page.
213 */
214 if (!free_bucket_desc) {
215 restore_flags(flags);
216 if(!(page=__get_free_page(priority)))
217 return NULL;
218 init_bucket_desc(page);
219 }
220
221 bdesc = free_bucket_desc;
222 free_bucket_desc = bdesc->next;
223 restore_flags(flags);
224
225 if(!(page=__get_free_page(priority))) {
226 /*
227 * Out of memory? Put the bucket descriptor back on the free list
228 */
229 cli();
230 bdesc->next = free_bucket_desc;
231 free_bucket_desc = bdesc;
232 restore_flags(flags);
233 return NULL;
234 }
235
236 bdesc->refcnt = 0;
237 bdesc->bucket_size = bdir->size;
238 bdesc->page = bdesc->freeptr = (void *) page;
239
240 /* Set up the chain of free objects */
241 for (i=PAGE_SIZE/bdir->size; i > 0 ; i--) {
242 #ifdef CONFIG_DEBUG_MALLOC
243 struct hdr_start *hd;
244 struct hdr_end *he;
245 hd = (struct hdr_start *) page;
246 he = (struct hdr_end *)(page+(bdir->size-sizeof(struct hdr_end)));
247 hd->magic = DEB_MAGIC_FREE;
248 hd->file = hd->ok_file = "(expand)";
249 hd->line = hd->ok_line = 0;
250 hd->size = bdir->size-sizeof(struct hdr_start)-sizeof(struct hdr_end);
251 he->magic = DEB_MAGIC_END;
252
253 memset(hd+1,0xF8,hd->size);
254
255 *((void **) (hd+1)) = (i==1) ? NULL : (void *)(page + bdir->size);
256 #else
257 *((void **) page) = (i==1) ? NULL : (void *)(page + bdir->size);
258 #endif
259 page += bdir->size;
260 }
261
262 /* turn interrupts back off for putting the
263 thing onto the chain. */
264 cli();
265 /* remember bdir is not changed. */
266 bdesc->next = bdir->chain; /* OK, link it in! */
267 bdir->chain = bdesc;
268
269 found_bdesc:
270 retval = (void *) bdesc->freeptr;
271 #ifdef CONFIG_DEBUG_MALLOC
272 bdesc->freeptr = *((void **) (((char *)retval)+sizeof(struct hdr_start)));
273 #else
274 bdesc->freeptr = *((void **) retval);
275 #endif
276 bdesc->refcnt++;
277 restore_flags(flags); /* OK, we're safe again */
278 #ifdef CONFIG_DEBUG_MALLOC
279 {
280 struct hdr_start *hd;
281 struct hdr_end *he;
282
283 hd = (struct hdr_start *) retval;
284 retval = hd+1;
285 len -= sizeof(struct hdr_start)+sizeof(struct hdr_end);
286 if(hd->magic != DEB_MAGIC_FREE && hd->magic != DEB_MAGIC_FREED) {
287 printk("DEB_MALLOC allocating %s block 0x%x (head 0x%x) from %s:%d, magic %x\n",
288 (hd->magic == DEB_MAGIC_ALLOC) ? "nonfree" : "trashed",
289 retval,hd,deb_file,deb_line,hd->magic);
290 return NULL;
291 }
292 if(len > hd->size || len > bdir->size-sizeof(struct hdr_start)-sizeof(struct hdr_end)) {
293 printk("DEB_MALLOC got %x:%x-byte block, wanted %x, from %s:%d, last %s:%d\n",
294 hd->size,bdir->size,len,hd->file,hd->line,deb_file,deb_line);
295 return NULL;
296 }
297 {
298 unsigned char *x = (unsigned char *) retval;
299 unsigned short pos = 4;
300 x += pos;
301 while(pos < hd->size) {
302 if(*x++ != 0xF8) {
303 printk("DEB_MALLOC used 0x%x:%x(%x) while free, from %s:%d\n",
304 retval,pos,hd->size,hd->file,hd->line);
305 return NULL;
306 }
307 pos++;
308 }
309 }
310 he = (struct hdr_end *)(((char *)retval)+hd->size);
311 if(he->magic != DEB_MAGIC_END) {
312 printk("DEB_MALLOC overran 0x%x:%d while free, from %s:%d\n",retval,hd->size,hd->file,hd->line);
313 }
314 memset(retval, 0xf0, len);
315 he = (struct hdr_end *)(((char *)retval)+len);
316 hd->file = hd->ok_file = deb_file;
317 hd->line = hd->ok_line = deb_line;
318 hd->size = len;
319 hd->magic = DEB_MAGIC_ALLOC;
320 he->magic = DEB_MAGIC_END;
321 }
322 #endif
323 return retval;
324
325 too_large:
326 /* This should be changed for sizes > 1 page. */
327 printk("kmalloc called with impossibly large argument (%d)\n", len);
328 return NULL;
329 }
330
331 #ifdef CONFIG_DEBUG_MALLOC
332 void deb_kcheck_s(const char *deb_file, unsigned short deb_line,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
333 void *obj, int size)
334 {
335 struct hdr_start *hd;
336 struct hdr_end *he;
337
338 if (!obj)
339 return;
340 hd = (struct hdr_start *) obj;
341 hd--;
342
343 if(hd->magic != DEB_MAGIC_ALLOC) {
344 if(hd->magic == DEB_MAGIC_FREE) {
345 printk("DEB_MALLOC Using free block of 0x%x at %s:%d, by %s:%d, wasOK %s:%d\n",
346 obj,deb_file,deb_line,hd->file,hd->line,hd->ok_file,hd->ok_line);
347 /* For any other condition it is either superfluous or dangerous to print something. */
348 hd->magic = DEB_MAGIC_FREED;
349 }
350 return;
351 }
352 if(hd->size != size) {
353 if(size != 0) {
354 printk("DEB_MALLOC size for 0x%x given as %d, stored %d, at %s:%d, wasOK %s:%d\n",
355 obj,size,hd->size,deb_file,deb_line,hd->ok_file,hd->ok_line);
356 }
357 size = hd->size;
358 }
359 he = (struct hdr_end *)(((char *)obj)+size);
360 if(he->magic != DEB_MAGIC_END) {
361 printk("DEB_MALLOC overran block 0x%x:%d, at %s:%d, wasOK %s:%d\n",
362 obj,hd->size,deb_file,deb_line,hd->ok_file,hd->ok_line);
363 hd->magic = DEB_MAGIC_USED;
364 return;
365 }
366 hd->ok_file = deb_file;
367 hd->ok_line = deb_line;
368 }
369 #endif
370
371 /*
372 * Here is the kfree routine. If you know the size of the object that you
373 * are freeing, then kfree_s() will use that information to speed up the
374 * search for the bucket descriptor.
375 *
376 * We will #define a macro so that "kfree(x)" is becomes "kfree_s(x, 0)"
377 */
378 #ifdef CONFIG_DEBUG_MALLOC
379 void deb_kfree_s(const char *deb_file, unsigned short deb_line,
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
380 void *obj, int size)
381 #else
382 void kfree_s(void *obj, int size)
383 #endif
384 {
385 unsigned long flags;
386 void *page;
387 struct _bucket_dir *bdir;
388 struct bucket_desc *bdesc, *prev;
389
390 if (!obj)
391 return;
392 #ifdef CONFIG_DEBUG_MALLOC
393 {
394 struct hdr_start *hd;
395 struct hdr_end *he;
396 hd = (struct hdr_start *) obj;
397 hd--;
398
399 if(hd->magic == DEB_MAGIC_FREE) {
400 printk("DEB_MALLOC dup free of 0x%x at %s:%d by %s:%d, wasOK %s:%d\n",
401 obj,deb_file,deb_line,hd->file,hd->line,hd->ok_file,hd->ok_line);
402 return;
403 }
404 if(hd->size != size) {
405 if(size != 0) {
406 if(hd->magic != DEB_MAGIC_USED)
407 printk("DEB_MALLOC size for 0x%x given as %d, stored %d, at %s:%d, wasOK %s:%d\n",
408 obj,size,hd->size,deb_file,deb_line,hd->ok_file,hd->ok_line);
409 }
410 size = hd->size;
411 }
412 he = (struct hdr_end *)(((char *)obj)+size);
413 if(he->magic != DEB_MAGIC_END) {
414 if(hd->magic != DEB_MAGIC_USED)
415 printk("DEB_MALLOC overran block 0x%x:%d, at %s:%d, from %s:%d, wasOK %s:%d\n",
416 obj,hd->size,deb_file,deb_line,hd->file,hd->line,hd->ok_file,hd->ok_line);
417 }
418 size += sizeof(struct hdr_start)+sizeof(struct hdr_end);
419 }
420 #endif
421 save_flags(flags);
422 /* Calculate what page this object lives in */
423 page = (void *) ((unsigned long) obj & PAGE_MASK);
424
425 /* Now search the buckets looking for that page */
426 for (bdir = bucket_dir; bdir->size; bdir++) {
427 prev = 0;
428 /* If size is zero then this conditional is always true */
429 if (bdir->size >= size) {
430 /* We have to turn off interrupts here because
431 we are descending the chain. If something
432 changes it in the middle we could suddenly
433 find ourselves descending the free list.
434 I think this would only cause a memory
435 leak, but better safe than sorry. */
436 cli(); /* To avoid race conditions */
437 for (bdesc = bdir->chain; bdesc; bdesc = bdesc->next) {
438 if (bdesc->page == page)
439 goto found;
440 prev = bdesc;
441 }
442 }
443 }
444
445 restore_flags(flags);
446 printk("Bad address passed to kernel kfree_s(%p, %d)\n",obj, size);
447 #ifdef CONFIG_DEBUG_MALLOC
448 printk("Offending code: %s:%d\n",deb_file,deb_line);
449 #else
450 printk("Offending eip: %08x\n",((unsigned long *) &obj)[-1]);
451 #endif
452 return;
453
454 found:
455 /* interrupts are off here. */
456 #ifdef CONFIG_DEBUG_MALLOC
457
458 {
459 struct hdr_start *hd;
460 struct hdr_end *he;
461 hd = (struct hdr_start *) obj;
462 hd--;
463
464 hd->file = deb_file;
465 hd->line = deb_line;
466 hd->magic = DEB_MAGIC_FREE;
467 hd->size = bdir->size-sizeof(struct hdr_start)-sizeof(struct hdr_end);
468 he = (struct hdr_end *)(((char *)obj)+hd->size);
469 memset(obj, 0xf8, hd->size);
470 he->magic = DEB_MAGIC_END;
471 *((void **)obj) = bdesc->freeptr;
472 obj = hd;
473 }
474 #else
475 *((void **)obj) = bdesc->freeptr;
476 #endif
477
478 bdesc->freeptr = obj;
479 bdesc->refcnt--;
480 if (bdesc->refcnt == 0) {
481 /*
482 * We need to make sure that prev is still accurate. It
483 * may not be, if someone rudely interrupted us....
484 */
485 if ((prev && (prev->next != bdesc)) ||
486 (!prev && (bdir->chain != bdesc)))
487 for (prev = bdir->chain; prev; prev = prev->next)
488 if (prev->next == bdesc)
489 break;
490 if (prev)
491 prev->next = bdesc->next;
492 else {
493 if (bdir->chain != bdesc)
494 panic("kmalloc bucket chains corrupted");
495 bdir->chain = bdesc->next;
496 }
497 bdesc->next = free_bucket_desc;
498 free_bucket_desc = bdesc;
499 free_page((unsigned long) bdesc->page);
500 }
501 restore_flags(flags);
502 return;
503 }
504
505 #ifdef CONFIG_DEBUG_MALLOC
506 int get_malloc(char *buffer)
/* ![[previous]](../icons/left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
507 {
508 int len = 0;
509 int i;
510 unsigned long flags;
511 void *page;
512 struct _bucket_dir *bdir;
513 struct bucket_desc *bdesc;
514
515 save_flags(flags);
516 cli(); /* To avoid race conditions */
517 for (bdir = bucket_dir; bdir->size; bdir++) {
518 for (bdesc = bdir->chain; bdesc; bdesc = bdesc->next) {
519 page = bdesc->page;
520 for (i=PAGE_SIZE/bdir->size; i > 0 ; i--) {
521 struct hdr_start *hd;
522 hd = (struct hdr_start *)page;
523 if(hd->magic == DEB_MAGIC_ALLOC) {
524 if(len > PAGE_SIZE-80) {
525 restore_flags(flags);
526 len += sprintf(buffer+len,"...\n");
527 return len;
528 }
529 len += sprintf(buffer+len,"%08x:%03x %s:%d %s:%d\n",
530 (long)(page+sizeof(struct hdr_start)),hd->size,hd->file,hd->line,hd->ok_file,hd->ok_line);
531 }
532 page += bdir->size;
533 }
534 }
535 }
536
537 restore_flags(flags);
538 return len;
539 }
540 #endif