1 #ifndef_LINUX_MM_H 2 #define_LINUX_MM_H 3
4 #include <linux/sched.h>
5 #include <linux/errno.h>
6 #include <linux/kernel.h>
7 #include <linux/string.h>
8
9 externunsignedlonghigh_memory;
10
11 #include <asm/page.h>
12 #include <asm/atomic.h>
13
14 #ifdef__KERNEL__ 15
16 #defineVERIFY_READ 0
17 #defineVERIFY_WRITE 1
18
19 externintverify_area(int, constvoid *, unsignedlong);
20
21 /* 22 * Linux kernel virtual memory manager primitives. 23 * The idea being to have a "virtual" mm in the same way 24 * we have a virtual fs - giving a cleaner interface to the 25 * mm details, and allowing different kinds of memory mappings 26 * (from shared memory to executable loading to arbitrary 27 * mmap() functions). 28 */ 29
30 /* 31 * This struct defines a memory VMM memory area. There is one of these 32 * per VM-area/task. A VM area is any part of the process virtual memory 33 * space that has a special rule for the page-fault handlers (ie a shared 34 * library, the executable area etc). 35 */ 36 structvm_area_struct{ 37 structmm_struct * vm_mm; /* VM area parameters */ 38 unsignedlongvm_start;
39 unsignedlongvm_end;
40 pgprot_tvm_page_prot;
41 unsignedshortvm_flags;
42 /* AVL tree of VM areas per task, sorted by address */ 43 shortvm_avl_height;
44 structvm_area_struct * vm_avl_left;
45 structvm_area_struct * vm_avl_right;
46 /* linked list of VM areas per task, sorted by address */ 47 structvm_area_struct * vm_next;
48 /* for areas with inode, the circular list inode->i_mmap */ 49 /* for shm areas, the circular list of attaches */ 50 /* otherwise unused */ 51 structvm_area_struct * vm_next_share;
52 structvm_area_struct * vm_prev_share;
53 /* more */ 54 structvm_operations_struct * vm_ops;
55 unsignedlongvm_offset;
56 structinode * vm_inode;
57 unsignedlongvm_pte; /* shared mem */ 58 };
59
60 /* 61 * vm_flags.. 62 */ 63 #defineVM_READ 0x0001 /* currently active flags */ 64 #defineVM_WRITE 0x0002
65 #defineVM_EXEC 0x0004
66 #defineVM_SHARED 0x0008
67
68 #defineVM_MAYREAD 0x0010 /* limits for mprotect() etc */ 69 #defineVM_MAYWRITE 0x0020
70 #defineVM_MAYEXEC 0x0040
71 #defineVM_MAYSHARE 0x0080
72
73 #defineVM_GROWSDOWN 0x0100 /* general info on the segment */ 74 #defineVM_GROWSUP 0x0200
75 #defineVM_SHM 0x0400 /* shared memory area, don't swap out */ 76 #defineVM_DENYWRITE 0x0800 /* ETXTBSY on write attempts.. */ 77
78 #defineVM_EXECUTABLE 0x1000
79 #defineVM_LOCKED 0x2000
80
81 #defineVM_STACK_FLAGS 0x0177
82
83 /* 84 * mapping from the currently active vm_flags protection bits (the 85 * low four bits) to a page protection mask.. 86 */ 87 externpgprot_tprotection_map[16];
88
89
90 /* 91 * These are the virtual MM functions - opening of an area, closing and 92 * unmapping it (needed to keep files on disk up-to-date etc), pointer 93 * to the functions called when a no-page or a wp-page exception occurs. 94 */ 95 structvm_operations_struct{ 96 void (*open)(structvm_area_struct * area);
97 void (*close)(structvm_area_struct * area);
98 void (*unmap)(structvm_area_struct *area, unsignedlong, size_t);
99 void (*protect)(structvm_area_struct *area, unsignedlong, size_t, unsignedintnewprot);
100 int (*sync)(structvm_area_struct *area, unsignedlong, size_t, unsignedintflags);
101 void (*advise)(structvm_area_struct *area, unsignedlong, size_t, unsignedint advise);
102 unsignedlong (*nopage)(structvm_area_struct * area, unsignedlongaddress, intwrite_access);
103 unsignedlong (*wppage)(structvm_area_struct * area, unsignedlongaddress,
104 unsignedlongpage);
105 int (*swapout)(structvm_area_struct *, unsignedlong, pte_t *);
106 pte_t (*swapin)(structvm_area_struct *, unsignedlong, unsignedlong);
107 };
108
109 /* 110 * Try to keep the most commonly accessed fields in single cache lines 111 * here (16 bytes or greater). This ordering should be particularly 112 * beneficial on 32-bit processors. 113 * 114 * The first line is data used in linear searches (eg. clock algorithm 115 * scans). The second line is data used in page searches through the 116 * page-cache. -- sct 117 */ 118 typedefstructpage{ 119 atomic_tcount;
120 unsigneddirty:16,
121 age:8;
122 unsignedflags; /* atomic flags, some possibly updated asynchronously */ 123 structwait_queue *wait;
124 structpage *next;
125 structpage *next_hash;
126 unsignedlongoffset;
127 structinode *inode;
128 structpage *prev;
129 structpage *prev_hash;
130 structbuffer_head * buffers;
131 unsignedlongswap_unlock_entry;
132 unsignedlongmap_nr; /* page->map_nr == page - mem_map */ 133 }mem_map_t;
134
135 /* Page flag bit values */ 136 #definePG_locked 0
137 #definePG_error 1
138 #definePG_referenced 2
139 #definePG_uptodate 3
140 #definePG_free_after 4
141 #definePG_decr_after 5
142 #definePG_swap_unlock_after 6
143 #definePG_DMA 7
144 #definePG_reserved 31
145
146 /* Make it prettier to test the above... */ 147 #definePageLocked(page) (test_bit(PG_locked, &(page)->flags))
148 #definePageError(page) (test_bit(PG_error, &(page)->flags))
149 #define PageReferenced(page) (test_bit(PG_referenced, &(page)->flags))
150 #define PageDirty(page) (test_bit(PG_dirty, &(page)->flags))
151 #definePageUptodate(page) (test_bit(PG_uptodate, &(page)->flags))
152 #define PageFreeAfter(page) (test_bit(PG_free_after, &(page)->flags))
153 #define PageDecrAfter(page) (test_bit(PG_decr_after, &(page)->flags))
154 #define PageSwapUnlockAfter(page) (test_bit(PG_swap_unlock_after, &(page)->flags))
155 #definePageDMA(page) (test_bit(PG_DMA, &(page)->flags))
156 #definePageReserved(page) (test_bit(PG_reserved, &(page)->flags))
157
158 /* 159 * page->reserved denotes a page which must never be accessed (which 160 * may not even be present). 161 * 162 * page->dma is set for those pages which lie in the range of 163 * physical addresses capable of carrying DMA transfers. 164 * 165 * Multiple processes may "see" the same page. E.g. for untouched 166 * mappings of /dev/null, all processes see the same page full of 167 * zeroes, and text pages of executables and shared libraries have 168 * only one copy in memory, at most, normally. 169 * 170 * For the non-reserved pages, page->count denotes a reference count. 171 * page->count == 0 means the page is free. 172 * page->count == 1 means the page is used for exactly one purpose 173 * (e.g. a private data page of one process). 174 * 175 * A page may be used for kmalloc() or anyone else who does a 176 * get_free_page(). In this case the page->count is at least 1, and 177 * all other fields are unused but should be 0 or NULL. The 178 * managament of this page is the responsibility of the one who uses 179 * it. 180 * 181 * The other pages (we may call them "process pages") are completely 182 * managed by the Linux memory manager: I/O, buffers, swapping etc. 183 * The following discussion applies only to them. 184 * 185 * A page may belong to an inode's memory mapping. In this case, 186 * page->inode is the inode, and page->offset is the file offset 187 * of the page (not necessarily a multiple of PAGE_SIZE). 188 * 189 * A page may have buffers allocated to it. In this case, 190 * page->buffers is a circular list of these buffer heads. Else, 191 * page->buffers == NULL. 192 * 193 * For pages belonging to inodes, the page->count is the number of 194 * attaches, plus 1 if buffers are allocated to the page. 195 * 196 * All pages belonging to an inode make up a doubly linked list 197 * inode->i_pages, using the fields page->next and page->prev. (These 198 * fields are also used for freelist management when page->count==0.) 199 * There is also a hash table mapping (inode,offset) to the page 200 * in memory if present. The lists for this hash table use the fields 201 * page->next_hash and page->prev_hash. 202 * 203 * All process pages can do I/O: 204 * - inode pages may need to be read from disk, 205 * - inode pages which have been modified and are MAP_SHARED may need 206 * to be written to disk, 207 * - private pages which have been modified may need to be swapped out 208 * to swap space and (later) to be read back into memory. 209 * During disk I/O, page->locked is true. This bit is set before I/O 210 * and reset when I/O completes. page->wait is a wait queue of all 211 * tasks waiting for the I/O on this page to complete. 212 * page->uptodate tells whether the page's contents is valid. 213 * When a read completes, the page becomes uptodate, unless a disk I/O 214 * error happened. 215 * When a write completes, and page->free_after is true, the page is 216 * freed without any further delay. 217 * 218 * For choosing which pages to swap out, inode pages carry a 219 * page->referenced bit, which is set any time the system accesses 220 * that page through the (inode,offset) hash table. 221 * There is also the page->age counter, which implements a linear 222 * decay (why not an exponential decay?), see swapctl.h. 223 */ 224
225 externmem_map_t * mem_map;
226
227 /* 228 * This is timing-critical - most of the time in getting a new page 229 * goes to clearing the page. If you want a page without the clearing 230 * overhead, just use __get_free_page() directly.. 231 */ 232 #define__get_free_page(priority) __get_free_pages((priority),0,0)
233 #define__get_dma_pages(priority, order) __get_free_pages((priority),(order),1)
234 externunsignedlong__get_free_pages(intpriority, unsignedlonggfporder, intdma);
235
236 externinlineunsignedlongget_free_page(intpriority)
/* */ 237 { 238 unsignedlongpage;
239
240 page = __get_free_page(priority);
241 if (page)
242 memset((void *) page, 0, PAGE_SIZE);
243 returnpage;
244 } 245
246 /* memory.c & swap.c*/ 247
248 #definefree_page(addr) free_pages((addr),0)
249 externvoidfree_pages(unsignedlongaddr, unsignedlongorder);
250
251 externvoidshow_free_areas(void);
252 externunsignedlongput_dirty_page(structtask_struct * tsk,unsignedlongpage,
253 unsignedlongaddress);
254
255 externvoidfree_page_tables(structtask_struct * tsk);
256 externvoidclear_page_tables(structtask_struct * tsk);
257 externintnew_page_tables(structtask_struct * tsk);
258 externint copy_page_tables(structtask_struct * to);
259
260 externintzap_page_range(structmm_struct *mm, unsignedlongaddress, unsignedlongsize);
261 externintcopy_page_range(structmm_struct *dst, structmm_struct *src, structvm_area_struct *vma);
262 externintremap_page_range(unsignedlongfrom, unsignedlongto, unsignedlongsize, pgprot_tprot);
263 externintzeromap_page_range(unsignedlongfrom, unsignedlongsize, pgprot_tprot);
264
265 externvoidvmtruncate(structinode * inode, unsignedlongoffset);
266 externvoidhandle_mm_fault(structvm_area_struct *vma, unsignedlongaddress, intwrite_access);
267 externvoiddo_wp_page(structtask_struct * tsk, structvm_area_struct * vma, unsignedlongaddress, intwrite_access);
268 externvoiddo_no_page(structtask_struct * tsk, structvm_area_struct * vma, unsignedlongaddress, intwrite_access);
269
270 externunsignedlongpaging_init(unsignedlongstart_mem, unsignedlongend_mem);
271 externvoidmem_init(unsignedlongstart_mem, unsignedlongend_mem);
272 externvoidshow_mem(void);
273 externvoidoom(structtask_struct * tsk);
274 externvoidsi_meminfo(structsysinfo * val);
275
276 /* vmalloc.c */ 277
278 externvoid * vmalloc(unsignedlongsize);
279 externvoid * vremap(unsignedlongoffset, unsignedlongsize);
280 externvoidvfree(void * addr);
281 externintvread(char *buf, char *addr, intcount);
282
283 /* mmap.c */ 284 externunsignedlongdo_mmap(structfile * file, unsignedlongaddr, unsignedlonglen,
285 unsignedlongprot, unsignedlongflags, unsignedlongoff);
286 externvoidmerge_segments(structtask_struct *, unsignedlong, unsignedlong);
287 externvoidinsert_vm_struct(structtask_struct *, structvm_area_struct *);
288 externvoidremove_shared_vm_struct(structvm_area_struct *);
289 externvoidbuild_mmap_avl(structmm_struct *);
290 externvoidexit_mmap(structmm_struct *);
291 externintdo_munmap(unsignedlong, size_t);
292 externunsignedlongget_unmapped_area(unsignedlong, unsignedlong);
293
294 /* filemap.c */ 295 externunsignedlongpage_unuse(unsignedlong);
296 externintshrink_mmap(int, int);
297 externvoidtruncate_inode_pages(structinode *, unsignedlong);
298
299 #defineGFP_BUFFER 0x00
300 #defineGFP_ATOMIC 0x01
301 #defineGFP_USER 0x02
302 #defineGFP_KERNEL 0x03
303 #defineGFP_NOBUFFER 0x04
304 #defineGFP_NFS 0x05
305
306 /* Flag - indicates that the buffer will be suitable for DMA. Ignored on some 307 platforms, used as appropriate on others */ 308
309 #defineGFP_DMA 0x80
310
311 #defineGFP_LEVEL_MASK 0xf
312
313 /* vma is the first one with address < vma->vm_end, 314 * and even address < vma->vm_start. Have to extend vma. */ 315 staticinlineintexpand_stack(structvm_area_struct * vma, unsignedlongaddress)
/* */ 316 { 317 unsignedlonggrow;
318
319 address &= PAGE_MASK;
320 if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur)
321 return -ENOMEM;
322 grow = vma->vm_start - address;
323 vma->vm_start = address;
324 vma->vm_offset -= grow;
325 vma->vm_mm->total_vm += grow >> PAGE_SHIFT;
326 if (vma->vm_flags & VM_LOCKED)
327 vma->vm_mm->locked_vm += grow >> PAGE_SHIFT;
328 return 0;
329 } 330
331 #defineavl_empty (structvm_area_struct *) NULL 332
333 /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ 334 staticinlinestructvm_area_struct * find_vma (structtask_struct * task, unsignedlongaddr)
/* */ 335 { 336 structvm_area_struct * result = NULL;
337
338 if (task->mm) { 339 structvm_area_struct * tree = task->mm->mmap_avl;
340 for (;;) { 341 if (tree == avl_empty)
342 break;
343 if (tree->vm_end > addr) { 344 result = tree;
345 if (tree->vm_start <= addr)
346 break;
347 tree = tree->vm_avl_left;
348 }else 349 tree = tree->vm_avl_right;
350 } 351 } 352 returnresult;
353 } 354
355 /* Look up the first VMA which intersects the interval start_addr..end_addr-1, 356 NULL if none. Assume start_addr < end_addr. */ 357 staticinlinestructvm_area_struct * find_vma_intersection (structtask_struct * task, unsignedlongstart_addr, unsignedlongend_addr)
/* */ 358 { 359 structvm_area_struct * vma;
360
361 vma = find_vma(task,start_addr);
362 if (!vma || end_addr <= vma->vm_start)
363 returnNULL;
364 returnvma;
365 } 366
367 #endif/* __KERNEL__ */ 368
369 #endif