1 /*
2 * malloc.c --- a general purpose kernel memory allocator for Linux.
3 *
4 * Written by Theodore Ts'o (tytso@mit.edu), 11/29/91
5 *
6 * This routine is written to be as fast as possible, so that it
7 * can be called from the interrupt level.
8 *
9 * Limitations: maximum size of memory we can allocate using this routine
10 * is 4k, the size of a page in Linux.
11 *
12 * The general game plan is that each page (called a bucket) will only hold
13 * objects of a given size. When all of the object on a page are released,
14 * the page can be returned to the general free pool. When kmalloc() is
15 * called, it looks for the smallest bucket size which will fulfill its
16 * request, and allocate a piece of memory from that bucket pool.
17 *
18 * Each bucket has as its control block a bucket descriptor which keeps
19 * track of how many objects are in use on that page, and the free list
20 * for that page. Like the buckets themselves, bucket descriptors are
21 * stored on pages requested from get_free_page(). However, unlike buckets,
22 * pages devoted to bucket descriptor pages are never released back to the
23 * system. Fortunately, a system should probably only need 1 or 2 bucket
24 * descriptor pages, since a page can hold 256 bucket descriptors (which
25 * corresponds to 1 megabyte worth of bucket pages.) If the kernel is using
26 * that much allocated memory, it's probably doing something wrong. :-)
27 *
28 * Note: kmalloc() and kfree() both call get_free_page() and free_page()
29 * in sections of code where interrupts are turned off, to allow
30 * kmalloc() and kfree() to be safely called from an interrupt routine.
31 * (We will probably need this functionality when networking code,
32 * particularily things like NFS, is added to Linux.) However, this
33 * presumes that get_free_page() and free_page() are interrupt-level
34 * safe, which they may not be once paging is added. If this is the
35 * case, we will need to modify kmalloc() to keep a few unused pages
36 * "pre-allocated" so that it can safely draw upon those pages if
37 * it is called from an interrupt routine.
38 *
39 * Another concern is that get_free_page() should not sleep; if it
40 * does, the code is carefully ordered so as to avoid any race
41 * conditions. The catch is that if kmalloc() is called re-entrantly,
42 * there is a chance that unecessary pages will be grabbed from the
43 * system. Except for the pages for the bucket descriptor page, the
44 * extra pages will eventually get released back to the system, though,
45 * so it isn't all that bad.
46 */
47
48 /* I'm going to modify it to keep some free pages around. Get free page
49 can sleep, and tcp/ip needs to call kmalloc at interrupt time (Or keep
50 big buffers around for itself.) I guess I'll have return from
51 syscall fill up the free page descriptors. -RAB */
52
53 /* since the advent of GFP_ATOMIC, I've changed the kmalloc code to
54 use it and return NULL if it can't get a page. -RAB */
55 /* (mostly just undid the previous changes -RAB) */
56
57 /* I've added the priority argument to kmalloc so routines can
58 sleep on memory if they want. - RAB */
59
60 /* I've also got to make sure that kmalloc is reentrant now. */
61
62 #include <linux/kernel.h>
63 #include <linux/mm.h>
64 #include <linux/string.h>
65
66 #include <asm/system.h>
67
68 struct bucket_desc { /* 16 bytes */
69 void *page;
70 struct bucket_desc *next;
71 void *freeptr;
72 unsigned short refcnt;
73 unsigned short bucket_size;
74 };
75
76 struct _bucket_dir { /* 8 bytes */
77 int size;
78 struct bucket_desc *chain;
79 };
80
81 /*
82 * The following is the where we store a pointer to the first bucket
83 * descriptor for a given size.
84 *
85 * If it turns out that the Linux kernel allocates a lot of objects of a
86 * specific size, then we may want to add that specific size to this list,
87 * since that will allow the memory to be allocated more efficiently.
88 * However, since an entire page must be dedicated to each specific size
89 * on this list, some amount of temperance must be exercised here.
90 *
91 * Note that this list *must* be kept in order.
92 */
93 struct _bucket_dir bucket_dir[] = {
94 { 16, (struct bucket_desc *) 0},
95 { 32, (struct bucket_desc *) 0},
96 { 64, (struct bucket_desc *) 0},
97 { 128, (struct bucket_desc *) 0},
98 { 256, (struct bucket_desc *) 0},
99 { 512, (struct bucket_desc *) 0},
100 { 1024, (struct bucket_desc *) 0},
101 { 2048, (struct bucket_desc *) 0},
102 { 4096, (struct bucket_desc *) 0},
103 { 0, (struct bucket_desc *) 0}}; /* End of list marker */
104
105 /*
106 * This contains a linked list of free bucket descriptor blocks
107 */
108 static struct bucket_desc *free_bucket_desc = (struct bucket_desc *) 0;
109
110 /*
111 * This routine initializes a bucket description page.
112 */
113
114 /* It assumes it is called with interrupts on. and will
115 return that way. It also can sleep if priority != GFP_ATOMIC. */
116
117 static inline int init_bucket_desc(int priority)
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
118 {
119 struct bucket_desc *bdesc, *first;
120 int i;
121 /* this turns interrupt on, so we should be carefull. */
122 first = bdesc = (struct bucket_desc *) get_free_page(priority);
123 if (!bdesc)
124 return 1;
125
126 /* At this point it is possible that we have slept and
127 free has been called etc. So we might not actually need
128 this page anymore. */
129
130 for (i = PAGE_SIZE/sizeof(struct bucket_desc); i > 1; i--) {
131 bdesc->next = bdesc+1;
132 bdesc++;
133 }
134 /*
135 * This is done last, to avoid race conditions in case
136 * get_free_page() sleeps and this routine gets called again....
137 */
138
139 cli();
140 bdesc->next = free_bucket_desc;
141 free_bucket_desc = first;
142 sti();
143 return (0);
144 }
145
146 void *
147 kmalloc(unsigned int len, int priority)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
148 {
149 struct _bucket_dir *bdir;
150 struct bucket_desc *bdesc;
151 void *retval;
152
153 /*
154 * First we search the bucket_dir to find the right bucket change
155 * for this request.
156 */
157
158 /* The sizes are static so there is no reentry problem here. */
159 for (bdir = bucket_dir; bdir->size; bdir++)
160 if (bdir->size >= len)
161 break;
162
163 if (!bdir->size) {
164 /* This should be changed for sizes > 1 page. */
165 printk("kmalloc called with impossibly large argument (%d)\n", len);
166 return NULL;
167 }
168
169 /*
170 * Now we search for a bucket descriptor which has free space
171 */
172 cli(); /* Avoid race conditions */
173 for (bdesc = bdir->chain; bdesc != NULL; bdesc = bdesc->next)
174 if (bdesc->freeptr != NULL || bdesc->page == NULL)
175 break;
176 /*
177 * If we didn't find a bucket with free space, then we'll
178 * allocate a new one.
179 */
180 if (!bdesc)
181 {
182 char *cp;
183 int i;
184
185 /* This must be a while because it is possible
186 to get interrupted after init_bucket_desc
187 and before cli. The interrupt could steal
188 our free_desc. */
189
190 while (!free_bucket_desc)
191 {
192 sti(); /* This might happen anyway, so we
193 might as well make it explicit. */
194 if (init_bucket_desc(priority))
195 {
196 return NULL;
197 }
198 cli(); /* Turn them back off. */
199 }
200
201 bdesc = free_bucket_desc;
202 free_bucket_desc = bdesc->next;
203
204 /* get_free_page will turn interrupts back
205 on. So we might as well do it
206 ourselves. */
207
208 sti();
209 bdesc->refcnt = 0;
210 bdesc->bucket_size = bdir->size;
211 bdesc->page = bdesc->freeptr =
212 (void *)cp = get_free_page(priority);
213
214 if (!cp)
215 {
216
217 /* put bdesc back on the free list. */
218 cli();
219 bdesc->next = free_bucket_desc;
220 free_bucket_desc = bdesc->next;
221 sti();
222
223 return NULL;
224 }
225
226 /* Set up the chain of free objects */
227 for (i=PAGE_SIZE/bdir->size; i > 1; i--)
228 {
229 *((char **) cp) = cp + bdir->size;
230 cp += bdir->size;
231 }
232
233 *((char **) cp) = 0;
234
235 /* turn interrupts back off for putting the
236 thing onto the chain. */
237 cli();
238 /* remember bdir is not changed. */
239 bdesc->next = bdir->chain; /* OK, link it in! */
240 bdir->chain = bdesc;
241
242 }
243
244 retval = (void *) bdesc->freeptr;
245 bdesc->freeptr = *((void **) retval);
246 bdesc->refcnt++;
247 sti(); /* OK, we're safe again */
248 return retval;
249 }
250
251 /*
252 * Here is the kfree routine. If you know the size of the object that you
253 * are freeing, then kfree_s() will use that information to speed up the
254 * search for the bucket descriptor.
255 *
256 * We will #define a macro so that "kfree(x)" is becomes "kfree_s(x, 0)"
257 */
258 void kfree_s(void *obj, int size)
/* ![[previous]](../icons/left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
259 {
260 void *page;
261 struct _bucket_dir *bdir;
262 struct bucket_desc *bdesc, *prev;
263
264 /* Calculate what page this object lives in */
265 page = (void *) ((unsigned long) obj & 0xfffff000);
266
267 /* Now search the buckets looking for that page */
268 for (bdir = bucket_dir; bdir->size; bdir++)
269 {
270 prev = 0;
271 /* If size is zero then this conditional is always true */
272 if (bdir->size >= size)
273 {
274 /* We have to turn off interrupts here because
275 we are descending the chain. If something
276 changes it in the middle we could suddenly
277 find ourselves descending the free list.
278 I think this would only cause a memory
279 leak, but better safe than sorry. */
280 cli(); /* To avoid race conditions */
281 for (bdesc = bdir->chain; bdesc; bdesc = bdesc->next)
282 {
283 if (bdesc->page == page)
284 goto found;
285 prev = bdesc;
286 }
287 }
288 }
289
290 printk("Bad address passed to kernel kfree_s(%X, %d)\n",obj, size);
291 sti();
292 return;
293 found:
294 /* interrupts are off here. */
295 *((void **)obj) = bdesc->freeptr;
296 bdesc->freeptr = obj;
297 bdesc->refcnt--;
298 if (bdesc->refcnt == 0) {
299 /*
300 * We need to make sure that prev is still accurate. It
301 * may not be, if someone rudely interrupted us....
302 */
303 if ((prev && (prev->next != bdesc)) ||
304 (!prev && (bdir->chain != bdesc)))
305 for (prev = bdir->chain; prev; prev = prev->next)
306 if (prev->next == bdesc)
307 break;
308 if (prev)
309 prev->next = bdesc->next;
310 else {
311 if (bdir->chain != bdesc)
312 panic("kmalloc bucket chains corrupted");
313 bdir->chain = bdesc->next;
314 }
315 bdesc->next = free_bucket_desc;
316 free_bucket_desc = bdesc;
317 free_page((unsigned long) bdesc->page);
318 }
319 sti();
320 return;
321 }