1 /* The paper 2 3 Duncan, Roy 4 Design goals and implementation of the new High Performance File System 5 Microsoft Systems Journal Sept 1989 v4 n5 p1(13) 6 7 describes what HPFS looked like when it was new, and it is the source 8 of most of the information given here. The rest is conjecture. 9 10 For definitive information on the Duncan paper, see it, not this file. 11 For definitive information on HPFS, ask somebody else -- this is guesswork. 12 There are certain to be many mistakes. */ 13 14 /* Notation */ 15 16 typedef unsigned secno; /* sector number, partition relative */ 17 18 typedef secno dnode_secno; /* sector number of a dnode */ 19 typedef secno fnode_secno; /* sector number of an fnode */ 20 typedef secno anode_secno; /* sector number of an anode */ 21 22 /* sector 0 */ 23 24 /* The boot block is very like a FAT boot block, except that the 25 29h signature byte is 28h instead, and the ID string is "HPFS". */ 26 27 struct hpfs_boot_block 28 { 29 unsigned char jmp[3]; 30 unsigned char oem_id[8]; 31 unsigned char bytes_per_sector[2]; /* 512 */ 32 unsigned char sectors_per_cluster; 33 unsigned char n_reserved_sectors[2]; 34 unsigned char n_fats; 35 unsigned char n_rootdir_entries[2]; 36 unsigned char n_sectors_s[2]; 37 unsigned char media_byte; 38 unsigned short sectors_per_fat; 39 unsigned short sectors_per_track; 40 unsigned short heads_per_cyl; 41 unsigned int n_hidden_sectors; 42 unsigned int n_sectors_l; /* size of partition */ 43 unsigned char drive_number; 44 unsigned char mbz; 45 unsigned char sig_28h; /* 28h */ 46 unsigned char vol_serno[4]; 47 unsigned char vol_label[11]; 48 unsigned char sig_hpfs[8]; /* "HPFS " */ 49 unsigned char pad[448]; 50 unsigned short magic; /* aa55 */ 51 }; 52 53 54 /* sector 16 */ 55 56 /* The super block has the pointer to the root directory. */ 57 58 #define SB_MAGIC 0xf995e849 59 60 struct hpfs_super_block 61 { 62 unsigned magic; /* f995 e849 */ 63 unsigned magic1; /* fa53 e9c5, more magic? */ 64 unsigned huh202; /* ?? 202 = N. of B. in 1.00390625 S.*/ 65 fnode_secno root; /* fnode of root directory */ 66 secno n_sectors; /* size of filesystem */ 67 unsigned n_badblocks; /* number of bad blocks */ 68 secno bitmaps; /* pointers to free space bit maps */ 69 unsigned zero1; /* 0 */ 70 secno badblocks; /* bad block list */ 71 unsigned zero3; /* 0 */ 72 time_t last_chkdsk; /* date last checked, 0 if never */ 73 unsigned zero4; /* 0 */ 74 secno n_dir_band; /* number of sectors in dir band */ 75 secno dir_band_start; /* first sector in dir band */ 76 secno dir_band_end; /* last sector in dir band */ 77 secno dir_band_bitmap; /* free space map, 1 dnode per bit */ 78 unsigned zero5[8]; /* 0 */ 79 secno scratch_dnodes; /* ?? 8 preallocated sectors near dir 80 band, 4-aligned. */ 81 unsigned zero6[103]; /* 0 */ 82 }; 83 84 85 /* sector 17 */ 86 87 /* The spare block has pointers to spare sectors. */ 88 89 #define SP_MAGIC 0xf9911849 90 91 struct hpfs_spare_block 92 { 93 unsigned magic; /* f991 1849 */ 94 unsigned magic1; /* fa52 29c5, more magic? */ 95 unsigned dirty; /* 0 clean, 1 "improperly stopped" */ 96 97 secno hotfix_map; /* info about remapped bad sectors */ 98 unsigned n_spares_used; /* number of hotfixes */ 99 unsigned n_spares; /* number of spares in hotfix map */ 100 unsigned n_dnode_spares_free; /* spare dnodes unused */ 101 unsigned n_dnode_spares; /* length of spare_dnodes[] list, 102 follows in this block*/ 103 secno code_page_dir; /* code page directory block */ 104 unsigned n_code_pages; /* number of code pages */ 105 unsigned large_numbers[2]; /* ?? */ 106 unsigned zero1[15]; 107 dnode_secno spare_dnodes[20]; /* emergency free dnode list */ 108 unsigned zero2[81]; /* room for more? */ 109 }; 110 111 /* The bad block list is 4 sectors long. The first word must be zero, 112 the remaining words give n_badblocks bad block numbers. 113 I bet you can see it coming... */ 114 115 #define BAD_MAGIC 0 116 117 /* The hotfix map is 4 sectors long. It looks like 118 119 secno from[n_spares]; 120 secno to[n_spares]; 121 122 The to[] list is initalized to point to n_spares preallocated empty 123 sectors. The from[] list contains the sector numbers of bad blocks 124 which have been remapped to corresponding sectors in the to[] list. 125 n_spares_used gives the length of the from[] list. */ 126 127 128 /* Sectors 18 and 19 are preallocated and unused. 129 Maybe they're spares for 16 and 17, but simple substitution fails. */ 130 131 132 /* The code page info pointed to by the spare block consists of an index 133 block and blocks containing character maps. The following is pretty 134 sketchy, but Linux is Latin-1 so it doesn't matter. */ 135 136 /* block pointed to by spareblock->code_page_dir */ 137 138 #define CP_DIR_MAGIC 0x494521f7 139 140 struct code_page_directory 141 { 142 unsigned magic; /* 4945 21f7 */ 143 unsigned n_code_pages; /* number of pointers following */ 144 unsigned zero1[2]; 145 struct { 146 unsigned short ix; /* index */ 147 unsigned short code_page_number; /* code page number */ 148 unsigned bounds; /* matches corresponding word 149 in data block */ 150 secno code_page_data; /* sector number of a code_page_data 151 containing c.p. array */ 152 unsigned index; /* index in c.p. array in that sector*/ 153 } array[31]; /* unknown length */ 154 }; 155 156 /* blocks pointed to by code_page_directory */ 157 158 #define CP_DATA_MAGIC 0x894521f7 159 160 struct code_page_data 161 { 162 unsigned magic; /* 8945 21f7 */ 163 unsigned n_used; /* # elements used in c_p_data[] */ 164 unsigned bounds[3]; /* looks a bit like 165 (beg1,end1), (beg2,end2) 166 one byte each */ 167 unsigned short offs[3]; /* offsets from start of sector 168 to start of c_p_data[ix] */ 169 struct { 170 unsigned short ix; /* index */ 171 unsigned short code_page_number; /* code page number */ 172 unsigned short zero1; 173 unsigned char map[128]; /* map for chars 80..ff */ 174 unsigned short zero2; 175 } code_page[3]; 176 unsigned char incognita[78]; 177 }; 178 179 180 /* Free space bitmaps are 4 sectors long, which is 16384 bits. 181 16384 sectors is 8 meg, and each 8 meg band has a 4-sector bitmap. 182 Bit order in the maps is little-endian. 0 means taken, 1 means free. 183 184 Bit map sectors are marked allocated in the bit maps, and so are sectors 185 off the end of the partition. 186 187 Band 0 is sectors 0-3fff, its map is in sectors 18-1b. 188 Band 1 is 4000-7fff, its map is in 7ffc-7fff. 189 Band 2 is 8000-ffff, its map is in 8000-8003. 190 The remaining bands have maps in their first (even) or last (odd) 4 sectors 191 -- if the last, partial, band is odd its map is in its last 4 sectors. 192 193 The bitmap locations are given in a table pointed to by the super block. 194 No doubt they aren't constrained to be at 18, 7ffc, 8000, ...; that is 195 just where they usually are. 196 197 The "directory band" is a bunch of sectors preallocated for dnodes. 198 It has a 4-sector free space bitmap of its own. Each bit in the map 199 corresponds to one 4-sector dnode, bit 0 of the map corresponding to 200 the first 4 sectors of the directory band. The entire band is marked 201 allocated in the main bitmap. The super block gives the locations 202 of the directory band and its bitmap. ("band" doesn't mean it is 203 8 meg long; it isn't.) */ 204 205 206 /* dnode: directory. 4 sectors long */ 207 208 /* A directory is a tree of dnodes. The fnode for a directory 209 contains one pointer, to the root dnode of the tree. The fnode 210 never moves, the dnodes do the B-tree thing, splitting and merging 211 as files are added and removed. */ 212 213 #define DNODE_MAGIC 0x77e40aae 214 215 struct dnode { 216 unsigned magic; /* 77e4 0aae */ 217 unsigned first_free; /* offset from start of dnode to 218 first free dir entry */ 219 unsigned increment_me; /* some kind of activity counter? 220 Neither HPFS.IFS nor CHKDSK cares 221 if you change this word */ 222 secno up; /* (root dnode) directory's fnode 223 (nonroot) parent dnode */ 224 dnode_secno self; /* pointer to this dnode */ 225 unsigned char dirent[2028]; /* one or more dirents */ 226 }; 227 228 struct hpfs_dirent { 229 unsigned short length; /* offset to next dirent */ 230 unsigned first: 1; /* set on phony ^A^A (".") entry */ 231 unsigned flag1: 1; 232 unsigned down: 1; /* down pointer present (after name) */ 233 unsigned last: 1; /* set on phony \377 entry */ 234 unsigned flag4: 1; 235 unsigned flag5: 1; 236 unsigned flag6: 1; 237 unsigned has_needea: 1; /* ?? some EA has NEEDEA set 238 I have no idea why this is 239 interesting in a dir entry */ 240 unsigned read_only: 1; /* dos attrib */ 241 unsigned hidden: 1; /* dos attrib */ 242 unsigned system: 1; /* dos attrib */ 243 unsigned flag11: 1; /* would be volume label dos attrib */ 244 unsigned directory: 1; /* dos attrib */ 245 unsigned archive: 1; /* dos attrib */ 246 unsigned not_8x3: 1; /* name is not 8.3 */ 247 unsigned flag15: 1; 248 fnode_secno fnode; /* fnode giving allocation info */ 249 time_t write_date; /* mtime */ 250 unsigned file_size; /* file length, bytes */ 251 time_t read_date; /* atime */ 252 time_t creation_date; /* ctime */ 253 unsigned ea_size; /* total EA length, bytes */ 254 unsigned char zero1; 255 unsigned char locality; /* 0=unk 1=seq 2=random 3=both */ 256 unsigned char namelen, name[1]; /* file name */ 257 /* dnode_secno down; btree down pointer, if present, 258 follows name on next word boundary, or maybe it's 259 precedes next dirent, which is on a word boundary. */ 260 }; 261 262 /* The b-tree down pointer from a dir entry */ 263 264 static inline dnode_secno de_down_pointer (struct hpfs_dirent *de) /* */ 265 { 266 return *(dnode_secno *) ((void *) de + de->length - 4); 267 } 268 269 /* The first dir entry in a dnode */ 270 271 static inline struct hpfs_dirent *dnode_first_de (struct dnode *dnode) /* */ 272 { 273 return (void *) dnode->dirent; 274 } 275 276 /* The end+1 of the dir entries */ 277 278 static inline struct hpfs_dirent *dnode_end_de (struct dnode *dnode) /* */ 279 { 280 return (void *) dnode + dnode->first_free; 281 } 282 283 /* The dir entry after dir entry de */ 284 285 static inline struct hpfs_dirent *de_next_de (struct hpfs_dirent *de) /* */ 286 { 287 return (void *) de + de->length; 288 } 289 290 291 /* B+ tree: allocation info in fnodes and anodes */ 292 293 /* dnodes point to fnodes which are responsible for listing the sectors 294 assigned to the file. This is done with trees of (length,address) 295 pairs. (Actually triples, of (length, file-address, disk-address) 296 which can represent holes. Find out if HPFS does that.) 297 At any rate, fnodes contain a small tree; if subtrees are needed 298 they occupy essentially a full block in anodes. A leaf-level tree node 299 has 3-word entries giving sector runs, a non-leaf node has 2-word 300 entries giving subtree pointers. A flag in the header says which. */ 301 302 struct bplus_leaf_node 303 { 304 unsigned file_secno; /* first file sector in extent */ 305 unsigned length; /* length, sectors */ 306 secno disk_secno; /* first corresponding disk sector */ 307 }; 308 309 struct bplus_internal_node 310 { 311 unsigned file_secno; /* subtree maps sectors < this */ 312 anode_secno down; /* pointer to subtree */ 313 }; 314 315 struct bplus_header 316 { 317 unsigned flag0: 1; 318 unsigned flag1: 1; 319 unsigned flag2: 1; 320 unsigned flag3: 1; 321 unsigned flag4: 1; 322 unsigned fnode_parent: 1; /* ? we're pointed to by an fnode, 323 the data btree or some ea or the 324 main ea bootage pointer ea_secno */ 325 /* also can get set in fnodes, which 326 may be a chkdsk glitch or may mean 327 this bit is irrelevant in fnodes, 328 or this interpretation is all wet */ 329 unsigned flag6: 1; 330 unsigned internal: 1; /* 1 -> (internal) tree of anodes 331 0 -> (leaf) list of extents */ 332 unsigned char fill[3]; 333 unsigned char n_free_nodes; /* free nodes in following array */ 334 unsigned char n_used_nodes; /* used nodes in following array */ 335 unsigned short first_free; /* offset from start of header to 336 first free node in array */ 337 union { 338 struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving 339 subtree pointers */ 340 struct bplus_leaf_node external[0]; /* (external) 3-word entries giving 341 sector runs */ 342 } u; 343 }; 344 345 /* fnode: root of allocation b+ tree, and EA's */ 346 347 /* Every file and every directory has one fnode, pointed to by the directory 348 entry and pointing to the file's sectors or directory's root dnode. EA's 349 are also stored here, and there are said to be ACL's somewhere here too. */ 350 351 #define FNODE_MAGIC 0xf7e40aae 352 353 struct fnode 354 { 355 unsigned magic; /* f7e4 0aae */ 356 unsigned zero1[2]; 357 unsigned char len, name[15]; /* true length, truncated name */ 358 fnode_secno up; /* pointer to file's directory fnode */ 359 unsigned zero2[3]; 360 unsigned ea_size_l; /* length of disk-resident ea's */ 361 secno ea_secno; /* first sector of disk-resident ea's*/ 362 unsigned short ea_size_s; /* length of fnode-resident ea's */ 363 364 unsigned flag0: 1; 365 unsigned ea_anode: 1; /* 1 -> ea_secno is an anode */ 366 unsigned flag2: 1; 367 unsigned flag3: 1; 368 unsigned flag4: 1; 369 unsigned flag5: 1; 370 unsigned flag6: 1; 371 unsigned flag7: 1; 372 unsigned dirflag: 1; /* 1 -> directory. first & only extent 373 points to dnode. */ 374 unsigned flag9: 1; 375 unsigned flag10: 1; 376 unsigned flag11: 1; 377 unsigned flag12: 1; 378 unsigned flag13: 1; 379 unsigned flag14: 1; 380 unsigned flag15: 1; 381 382 struct bplus_header btree; /* b+ tree, 8 extents or 12 subtrees */ 383 union { 384 struct bplus_leaf_node external[8]; 385 struct bplus_internal_node internal[12]; 386 } u; 387 388 unsigned file_size; /* file length, bytes */ 389 unsigned n_needea; /* number of EA's with NEEDEA set */ 390 unsigned zero4[4]; 391 unsigned ea_offs; /* offset from start of fnode 392 to first fnode-resident ea */ 393 unsigned zero5[2]; 394 unsigned char ea[316]; /* zero or more EA's, packed together 395 with no alignment padding. 396 (Do not use this name, get here 397 via fnode + ea_offs. I think.) */ 398 }; 399 400 401 /* anode: 99.44% pure allocation tree */ 402 403 #define ANODE_MAGIC 0x37e40aae 404 405 struct anode 406 { 407 unsigned magic; /* 37e4 0aae */ 408 anode_secno self; /* pointer to this anode */ 409 secno up; /* parent anode or fnode */ 410 411 struct bplus_header btree; /* b+tree, 40 extents or 60 subtrees */ 412 union { 413 struct bplus_leaf_node external[40]; 414 struct bplus_internal_node internal[60]; 415 } u; 416 417 unsigned fill[3]; /* unused */ 418 }; 419 420 421 /* extended attributes. 422 423 A file's EA info is stored as a list of (name,value) pairs. It is 424 usually in the fnode, but (if it's large) it is moved to a single 425 sector run outside the fnode, or to multiple runs with an anode tree 426 that points to them. 427 428 The value of a single EA is stored along with the name, or (if large) 429 it is moved to a single sector run, or multiple runs pointed to by an 430 anode tree, pointed to by the value field of the (name,value) pair. 431 432 Flags in the EA tell whether the value is immediate, in a single sector 433 run, or in multiple runs. Flags in the fnode tell whether the EA list 434 is immediate, in a single run, or in multiple runs. */ 435 436 struct extended_attribute 437 { 438 unsigned indirect: 1; /* 1 -> value gives sector number 439 where real value starts */ 440 unsigned anode: 1; /* 1 -> sector is an anode 441 that points to fragmented value */ 442 unsigned flag2: 1; 443 unsigned flag3: 1; 444 unsigned flag4: 1; 445 unsigned flag5: 1; 446 unsigned flag6: 1; 447 unsigned needea: 1; /* required ea */ 448 unsigned char namelen; /* length of name, bytes */ 449 unsigned short valuelen; /* length of value, bytes */ 450 /* 451 unsigned char name[namelen]; ascii attrib name 452 unsigned char nul; terminating '\0', not counted 453 unsigned char value[valuelen]; value, arbitrary 454 if this.indirect, valuelen is 8 and the value is 455 unsigned length; real length of value, bytes 456 secno secno; sector address where it starts 457 if this.anode, the above sector number is the root of an anode tree 458 which points to the value. 459 */ 460 }; 461 462 static inline unsigned char *ea_name (struct extended_attribute *ea) /* */ 463 { 464 return (void *) ea + sizeof *ea; 465 } 466 467 static inline unsigned char *ea_value (struct extended_attribute *ea) /* */ 468 { 469 return (void *) ea + sizeof *ea + ea->namelen + 1; 470 } 471 472 static inline struct extended_attribute * 473 ea_next_ea (struct extended_attribute *ea) /* */ 474 { 475 return (void *) ea + sizeof *ea + ea->namelen + 1 + ea->valuelen; 476 } 477 478 static inline unsigned ea_indirect_length (struct extended_attribute *ea) /* */ 479 { 480 unsigned *v = (void *) ea_value (ea); 481 return v[0]; 482 } 483 484 static inline secno ea_indirect_secno (struct extended_attribute *ea) /* */ 485 { 486 unsigned *v = (void *) ea_value (ea); 487 return v[1]; 488 } 489 490 /* 491 Local Variables: 492 comment-column: 40 493 End: 494 */