1 #ifndef_LINUX_MM_H 2 #define_LINUX_MM_H 3
4 #include <linux/sched.h>
5 #include <linux/errno.h>
6 #include <linux/kernel.h>
7
8 #ifdef__KERNEL__ 9
10 #include <linux/string.h>
11
12 externunsignedlonghigh_memory;
13
14 #include <asm/page.h>
15 #include <asm/atomic.h>
16
17 #defineVERIFY_READ 0
18 #defineVERIFY_WRITE 1
19
20 externintverify_area(int, constvoid *, unsignedlong);
21
22 /* 23 * Linux kernel virtual memory manager primitives. 24 * The idea being to have a "virtual" mm in the same way 25 * we have a virtual fs - giving a cleaner interface to the 26 * mm details, and allowing different kinds of memory mappings 27 * (from shared memory to executable loading to arbitrary 28 * mmap() functions). 29 */ 30
31 /* 32 * This struct defines a memory VMM memory area. There is one of these 33 * per VM-area/task. A VM area is any part of the process virtual memory 34 * space that has a special rule for the page-fault handlers (ie a shared 35 * library, the executable area etc). 36 */ 37 structvm_area_struct{ 38 structmm_struct * vm_mm; /* VM area parameters */ 39 unsignedlongvm_start;
40 unsignedlongvm_end;
41 pgprot_tvm_page_prot;
42 unsignedshortvm_flags;
43 /* AVL tree of VM areas per task, sorted by address */ 44 shortvm_avl_height;
45 structvm_area_struct * vm_avl_left;
46 structvm_area_struct * vm_avl_right;
47 /* linked list of VM areas per task, sorted by address */ 48 structvm_area_struct * vm_next;
49 /* for areas with inode, the circular list inode->i_mmap */ 50 /* for shm areas, the circular list of attaches */ 51 /* otherwise unused */ 52 structvm_area_struct * vm_next_share;
53 structvm_area_struct * vm_prev_share;
54 /* more */ 55 structvm_operations_struct * vm_ops;
56 unsignedlongvm_offset;
57 structinode * vm_inode;
58 unsignedlongvm_pte; /* shared mem */ 59 };
60
61 /* 62 * vm_flags.. 63 */ 64 #defineVM_READ 0x0001 /* currently active flags */ 65 #defineVM_WRITE 0x0002
66 #defineVM_EXEC 0x0004
67 #defineVM_SHARED 0x0008
68
69 #defineVM_MAYREAD 0x0010 /* limits for mprotect() etc */ 70 #defineVM_MAYWRITE 0x0020
71 #defineVM_MAYEXEC 0x0040
72 #defineVM_MAYSHARE 0x0080
73
74 #defineVM_GROWSDOWN 0x0100 /* general info on the segment */ 75 #defineVM_GROWSUP 0x0200
76 #defineVM_SHM 0x0400 /* shared memory area, don't swap out */ 77 #defineVM_DENYWRITE 0x0800 /* ETXTBSY on write attempts.. */ 78
79 #defineVM_EXECUTABLE 0x1000
80 #defineVM_LOCKED 0x2000
81
82 #defineVM_STACK_FLAGS 0x0177
83
84 /* 85 * mapping from the currently active vm_flags protection bits (the 86 * low four bits) to a page protection mask.. 87 */ 88 externpgprot_tprotection_map[16];
89
90
91 /* 92 * These are the virtual MM functions - opening of an area, closing and 93 * unmapping it (needed to keep files on disk up-to-date etc), pointer 94 * to the functions called when a no-page or a wp-page exception occurs. 95 */ 96 structvm_operations_struct{ 97 void (*open)(structvm_area_struct * area);
98 void (*close)(structvm_area_struct * area);
99 void (*unmap)(structvm_area_struct *area, unsignedlong, size_t);
100 void (*protect)(structvm_area_struct *area, unsignedlong, size_t, unsignedintnewprot);
101 int (*sync)(structvm_area_struct *area, unsignedlong, size_t, unsignedintflags);
102 void (*advise)(structvm_area_struct *area, unsignedlong, size_t, unsignedint advise);
103 unsignedlong (*nopage)(structvm_area_struct * area, unsignedlongaddress, intwrite_access);
104 unsignedlong (*wppage)(structvm_area_struct * area, unsignedlongaddress,
105 unsignedlongpage);
106 int (*swapout)(structvm_area_struct *, unsignedlong, pte_t *);
107 pte_t (*swapin)(structvm_area_struct *, unsignedlong, unsignedlong);
108 };
109
110 /* 111 * Try to keep the most commonly accessed fields in single cache lines 112 * here (16 bytes or greater). This ordering should be particularly 113 * beneficial on 32-bit processors. 114 * 115 * The first line is data used in linear searches (eg. clock algorithm 116 * scans). The second line is data used in page searches through the 117 * page-cache. -- sct 118 */ 119 typedefstructpage{ 120 atomic_tcount;
121 unsigneddirty:16,
122 age:8;
123 unsignedflags; /* atomic flags, some possibly updated asynchronously */ 124 structwait_queue *wait;
125 structpage *next;
126 structpage *next_hash;
127 unsignedlongoffset;
128 structinode *inode;
129 structpage *prev;
130 structpage *prev_hash;
131 structbuffer_head * buffers;
132 unsignedlongswap_unlock_entry;
133 unsignedlongmap_nr; /* page->map_nr == page - mem_map */ 134 }mem_map_t;
135
136 /* Page flag bit values */ 137 #definePG_locked 0
138 #definePG_error 1
139 #definePG_referenced 2
140 #definePG_uptodate 3
141 #definePG_free_after 4
142 #definePG_decr_after 5
143 #definePG_swap_unlock_after 6
144 #definePG_DMA 7
145 #definePG_reserved 31
146
147 /* Make it prettier to test the above... */ 148 #definePageLocked(page) (test_bit(PG_locked, &(page)->flags))
149 #definePageError(page) (test_bit(PG_error, &(page)->flags))
150 #define PageReferenced(page) (test_bit(PG_referenced, &(page)->flags))
151 #define PageDirty(page) (test_bit(PG_dirty, &(page)->flags))
152 #definePageUptodate(page) (test_bit(PG_uptodate, &(page)->flags))
153 #define PageFreeAfter(page) (test_bit(PG_free_after, &(page)->flags))
154 #define PageDecrAfter(page) (test_bit(PG_decr_after, &(page)->flags))
155 #define PageSwapUnlockAfter(page) (test_bit(PG_swap_unlock_after, &(page)->flags))
156 #definePageDMA(page) (test_bit(PG_DMA, &(page)->flags))
157 #definePageReserved(page) (test_bit(PG_reserved, &(page)->flags))
158
159 /* 160 * page->reserved denotes a page which must never be accessed (which 161 * may not even be present). 162 * 163 * page->dma is set for those pages which lie in the range of 164 * physical addresses capable of carrying DMA transfers. 165 * 166 * Multiple processes may "see" the same page. E.g. for untouched 167 * mappings of /dev/null, all processes see the same page full of 168 * zeroes, and text pages of executables and shared libraries have 169 * only one copy in memory, at most, normally. 170 * 171 * For the non-reserved pages, page->count denotes a reference count. 172 * page->count == 0 means the page is free. 173 * page->count == 1 means the page is used for exactly one purpose 174 * (e.g. a private data page of one process). 175 * 176 * A page may be used for kmalloc() or anyone else who does a 177 * get_free_page(). In this case the page->count is at least 1, and 178 * all other fields are unused but should be 0 or NULL. The 179 * management of this page is the responsibility of the one who uses 180 * it. 181 * 182 * The other pages (we may call them "process pages") are completely 183 * managed by the Linux memory manager: I/O, buffers, swapping etc. 184 * The following discussion applies only to them. 185 * 186 * A page may belong to an inode's memory mapping. In this case, 187 * page->inode is the inode, and page->offset is the file offset 188 * of the page (not necessarily a multiple of PAGE_SIZE). 189 * 190 * A page may have buffers allocated to it. In this case, 191 * page->buffers is a circular list of these buffer heads. Else, 192 * page->buffers == NULL. 193 * 194 * For pages belonging to inodes, the page->count is the number of 195 * attaches, plus 1 if buffers are allocated to the page. 196 * 197 * All pages belonging to an inode make up a doubly linked list 198 * inode->i_pages, using the fields page->next and page->prev. (These 199 * fields are also used for freelist management when page->count==0.) 200 * There is also a hash table mapping (inode,offset) to the page 201 * in memory if present. The lists for this hash table use the fields 202 * page->next_hash and page->prev_hash. 203 * 204 * All process pages can do I/O: 205 * - inode pages may need to be read from disk, 206 * - inode pages which have been modified and are MAP_SHARED may need 207 * to be written to disk, 208 * - private pages which have been modified may need to be swapped out 209 * to swap space and (later) to be read back into memory. 210 * During disk I/O, page->locked is true. This bit is set before I/O 211 * and reset when I/O completes. page->wait is a wait queue of all 212 * tasks waiting for the I/O on this page to complete. 213 * page->uptodate tells whether the page's contents is valid. 214 * When a read completes, the page becomes uptodate, unless a disk I/O 215 * error happened. 216 * When a write completes, and page->free_after is true, the page is 217 * freed without any further delay. 218 * 219 * For choosing which pages to swap out, inode pages carry a 220 * page->referenced bit, which is set any time the system accesses 221 * that page through the (inode,offset) hash table. 222 * There is also the page->age counter, which implements a linear 223 * decay (why not an exponential decay?), see swapctl.h. 224 */ 225
226 externmem_map_t * mem_map;
227
228 /* 229 * This is timing-critical - most of the time in getting a new page 230 * goes to clearing the page. If you want a page without the clearing 231 * overhead, just use __get_free_page() directly.. 232 */ 233 #define__get_free_page(priority) __get_free_pages((priority),0,0)
234 #define__get_dma_pages(priority, order) __get_free_pages((priority),(order),1)
235 externunsignedlong__get_free_pages(intpriority, unsignedlonggfporder, intdma);
236
237 externinlineunsignedlongget_free_page(intpriority)
/* */ 238 { 239 unsignedlongpage;
240
241 page = __get_free_page(priority);
242 if (page)
243 memset((void *) page, 0, PAGE_SIZE);
244 returnpage;
245 } 246
247 /* memory.c & swap.c*/ 248
249 #definefree_page(addr) free_pages((addr),0)
250 externvoidfree_pages(unsignedlongaddr, unsignedlongorder);
251
252 externvoidshow_free_areas(void);
253 externunsignedlongput_dirty_page(structtask_struct * tsk,unsignedlongpage,
254 unsignedlongaddress);
255
256 externvoidfree_page_tables(structmm_struct * mm);
257 externvoidclear_page_tables(structtask_struct * tsk);
258 externintnew_page_tables(structtask_struct * tsk);
259 externint copy_page_tables(structtask_struct * to);
260
261 externintzap_page_range(structmm_struct *mm, unsignedlongaddress, unsignedlongsize);
262 externintcopy_page_range(structmm_struct *dst, structmm_struct *src, structvm_area_struct *vma);
263 externintremap_page_range(unsignedlongfrom, unsignedlongto, unsignedlongsize, pgprot_tprot);
264 externintzeromap_page_range(unsignedlongfrom, unsignedlongsize, pgprot_tprot);
265
266 externvoidvmtruncate(structinode * inode, unsignedlongoffset);
267 externvoidhandle_mm_fault(structvm_area_struct *vma, unsignedlongaddress, intwrite_access);
268 externvoiddo_wp_page(structtask_struct * tsk, structvm_area_struct * vma, unsignedlongaddress, intwrite_access);
269 externvoiddo_no_page(structtask_struct * tsk, structvm_area_struct * vma, unsignedlongaddress, intwrite_access);
270
271 externunsignedlongpaging_init(unsignedlongstart_mem, unsignedlongend_mem);
272 externvoidmem_init(unsignedlongstart_mem, unsignedlongend_mem);
273 externvoidshow_mem(void);
274 externvoidoom(structtask_struct * tsk);
275 externvoidsi_meminfo(structsysinfo * val);
276
277 /* vmalloc.c */ 278
279 externvoid * vmalloc(unsignedlongsize);
280 externvoid * vremap(unsignedlongoffset, unsignedlongsize);
281 externvoidvfree(void * addr);
282 externintvread(char *buf, char *addr, intcount);
283
284 /* mmap.c */ 285 externunsignedlongdo_mmap(structfile * file, unsignedlongaddr, unsignedlonglen,
286 unsignedlongprot, unsignedlongflags, unsignedlongoff);
287 externvoidmerge_segments(structtask_struct *, unsignedlong, unsignedlong);
288 externvoidinsert_vm_struct(structtask_struct *, structvm_area_struct *);
289 externvoidremove_shared_vm_struct(structvm_area_struct *);
290 externvoidbuild_mmap_avl(structmm_struct *);
291 externvoidexit_mmap(structmm_struct *);
292 externintdo_munmap(unsignedlong, size_t);
293 externunsignedlongget_unmapped_area(unsignedlong, unsignedlong);
294
295 /* filemap.c */ 296 externunsignedlongpage_unuse(unsignedlong);
297 externintshrink_mmap(int, int);
298 externvoidtruncate_inode_pages(structinode *, unsignedlong);
299
300 #defineGFP_BUFFER 0x00
301 #defineGFP_ATOMIC 0x01
302 #defineGFP_USER 0x02
303 #defineGFP_KERNEL 0x03
304 #defineGFP_NOBUFFER 0x04
305 #defineGFP_NFS 0x05
306
307 /* Flag - indicates that the buffer will be suitable for DMA. Ignored on some 308 platforms, used as appropriate on others */ 309
310 #defineGFP_DMA 0x80
311
312 #defineGFP_LEVEL_MASK 0xf
313
314 /* vma is the first one with address < vma->vm_end, 315 * and even address < vma->vm_start. Have to extend vma. */ 316 staticinlineintexpand_stack(structvm_area_struct * vma, unsignedlongaddress)
/* */ 317 { 318 unsignedlonggrow;
319
320 address &= PAGE_MASK;
321 if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur)
322 return -ENOMEM;
323 grow = vma->vm_start - address;
324 vma->vm_start = address;
325 vma->vm_offset -= grow;
326 vma->vm_mm->total_vm += grow >> PAGE_SHIFT;
327 if (vma->vm_flags & VM_LOCKED)
328 vma->vm_mm->locked_vm += grow >> PAGE_SHIFT;
329 return 0;
330 } 331
332 #defineavl_empty (structvm_area_struct *) NULL 333
334 /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ 335 staticinlinestructvm_area_struct * find_vma (structtask_struct * task, unsignedlongaddr)
/* */ 336 { 337 structvm_area_struct * result = NULL;
338
339 if (task->mm) { 340 structvm_area_struct * tree = task->mm->mmap_avl;
341 for (;;) { 342 if (tree == avl_empty)
343 break;
344 if (tree->vm_end > addr) { 345 result = tree;
346 if (tree->vm_start <= addr)
347 break;
348 tree = tree->vm_avl_left;
349 }else 350 tree = tree->vm_avl_right;
351 } 352 } 353 returnresult;
354 } 355
356 /* Look up the first VMA which intersects the interval start_addr..end_addr-1, 357 NULL if none. Assume start_addr < end_addr. */ 358 staticinlinestructvm_area_struct * find_vma_intersection (structtask_struct * task, unsignedlongstart_addr, unsignedlongend_addr)
/* */ 359 { 360 structvm_area_struct * vma;
361
362 vma = find_vma(task,start_addr);
363 if (!vma || end_addr <= vma->vm_start)
364 returnNULL;
365 returnvma;
366 } 367
368 #endif/* __KERNEL__ */ 369
370 #endif