1 /* The paper 2 3 Duncan, Roy 4 Design goals and implementation of the new High Performance File System 5 Microsoft Systems Journal Sept 1989 v4 n5 p1(13) 6 7 describes what HPFS looked like when it was new, and it is the source 8 of most of the information given here. The rest is conjecture. 9 10 For definitive information on the Duncan paper, see it, not this file. 11 For definitive information on HPFS, ask somebody else -- this is guesswork. 12 There are certain to be many mistakes. */ 13 14 /* Notation */ 15 16 typedef unsigned secno; /* sector number, partition relative */ 17 18 typedef secno dnode_secno; /* sector number of a dnode */ 19 typedef secno fnode_secno; /* sector number of an fnode */ 20 typedef secno anode_secno; /* sector number of an anode */ 21 22 /* sector 0 */ 23 24 /* The boot block is very like a FAT boot block, except that the 25 29h signature byte is 28h instead, and the ID string is "HPFS". */ 26 27 struct hpfs_boot_block 28 { 29 unsigned char jmp[3]; 30 unsigned char oem_id[8]; 31 unsigned char bytes_per_sector[2]; /* 512 */ 32 unsigned char sectors_per_cluster; 33 unsigned char n_reserved_sectors[2]; 34 unsigned char n_fats; 35 unsigned char n_rootdir_entries[2]; 36 unsigned char n_sectors_s[2]; 37 unsigned char media_byte; 38 unsigned short sectors_per_fat; 39 unsigned short sectors_per_track; 40 unsigned short heads_per_cyl; 41 unsigned int n_hidden_sectors; 42 unsigned int n_sectors_l; /* size of partition */ 43 unsigned char drive_number; 44 unsigned char mbz; 45 unsigned char sig_28h; /* 28h */ 46 unsigned char vol_serno[4]; 47 unsigned char vol_label[11]; 48 unsigned char sig_hpfs[8]; /* "HPFS " */ 49 unsigned char pad[448]; 50 unsigned short magic; /* aa55 */ 51 }; 52 53 54 /* sector 16 */ 55 56 /* The super block has the pointer to the root directory. */ 57 58 #define SB_MAGIC 0xf995e849 59 60 struct hpfs_super_block 61 { 62 unsigned magic; /* f995 e849 */ 63 unsigned magic1; /* fa53 e9c5, more magic? */ 64 unsigned huh202; /* ?? 202 = N. of B. in 1.00390625 S.*/ 65 fnode_secno root; /* fnode of root directory */ 66 secno n_sectors; /* size of filesystem */ 67 unsigned n_badblocks; /* number of bad blocks */ 68 secno bitmaps; /* pointers to free space bit maps */ 69 unsigned zero1; /* 0 */ 70 secno badblocks; /* bad block list */ 71 unsigned zero3; /* 0 */ 72 time_t last_chkdsk; /* date last checked, 0 if never */ 73 unsigned zero4; /* 0 */ 74 secno n_dir_band; /* number of sectors in dir band */ 75 secno dir_band_start; /* first sector in dir band */ 76 secno dir_band_end; /* last sector in dir band */ 77 secno dir_band_bitmap; /* free space map, 1 dnode per bit */ 78 unsigned zero5[8]; /* 0 */ 79 secno scratch_dnodes; /* ?? 8 preallocated sectors near dir 80 band, 4-aligned. */ 81 unsigned zero6[103]; /* 0 */ 82 }; 83 84 85 /* sector 17 */ 86 87 /* The spare block has pointers to spare sectors. */ 88 89 #define SP_MAGIC 0xf9911849 90 91 struct hpfs_spare_block 92 { 93 unsigned magic; /* f991 1849 */ 94 unsigned magic1; /* fa52 29c5, more magic? */ 95 96 unsigned dirty: 1; /* 0 clean, 1 "improperly stopped" */ 97 unsigned flag1234: 4; /* unknown flags */ 98 unsigned fast: 1; /* partition was fast formatted */ 99 unsigned flag6to31: 26; /* unknown flags */ 100 101 secno hotfix_map; /* info about remapped bad sectors */ 102 unsigned n_spares_used; /* number of hotfixes */ 103 unsigned n_spares; /* number of spares in hotfix map */ 104 unsigned n_dnode_spares_free; /* spare dnodes unused */ 105 unsigned n_dnode_spares; /* length of spare_dnodes[] list, 106 follows in this block*/ 107 secno code_page_dir; /* code page directory block */ 108 unsigned n_code_pages; /* number of code pages */ 109 unsigned large_numbers[2]; /* ?? */ 110 unsigned zero1[15]; 111 dnode_secno spare_dnodes[20]; /* emergency free dnode list */ 112 unsigned zero2[81]; /* room for more? */ 113 }; 114 115 /* The bad block list is 4 sectors long. The first word must be zero, 116 the remaining words give n_badblocks bad block numbers. 117 I bet you can see it coming... */ 118 119 #define BAD_MAGIC 0 120 121 /* The hotfix map is 4 sectors long. It looks like 122 123 secno from[n_spares]; 124 secno to[n_spares]; 125 126 The to[] list is initialized to point to n_spares preallocated empty 127 sectors. The from[] list contains the sector numbers of bad blocks 128 which have been remapped to corresponding sectors in the to[] list. 129 n_spares_used gives the length of the from[] list. */ 130 131 132 /* Sectors 18 and 19 are preallocated and unused. 133 Maybe they're spares for 16 and 17, but simple substitution fails. */ 134 135 136 /* The code page info pointed to by the spare block consists of an index 137 block and blocks containing uppercasing tables. I don't know what 138 these are for (CHKDSK, maybe?) -- OS/2 does not seem to use them 139 itself. Linux doesn't use them either. */ 140 141 /* block pointed to by spareblock->code_page_dir */ 142 143 #define CP_DIR_MAGIC 0x494521f7 144 145 struct code_page_directory 146 { 147 unsigned magic; /* 4945 21f7 */ 148 unsigned n_code_pages; /* number of pointers following */ 149 unsigned zero1[2]; 150 struct { 151 unsigned short ix; /* index */ 152 unsigned short code_page_number; /* code page number */ 153 unsigned bounds; /* matches corresponding word 154 in data block */ 155 secno code_page_data; /* sector number of a code_page_data 156 containing c.p. array */ 157 unsigned index; /* index in c.p. array in that sector*/ 158 } array[31]; /* unknown length */ 159 }; 160 161 /* blocks pointed to by code_page_directory */ 162 163 #define CP_DATA_MAGIC 0x894521f7 164 165 struct code_page_data 166 { 167 unsigned magic; /* 8945 21f7 */ 168 unsigned n_used; /* # elements used in c_p_data[] */ 169 unsigned bounds[3]; /* looks a bit like 170 (beg1,end1), (beg2,end2) 171 one byte each */ 172 unsigned short offs[3]; /* offsets from start of sector 173 to start of c_p_data[ix] */ 174 struct { 175 unsigned short ix; /* index */ 176 unsigned short code_page_number; /* code page number */ 177 unsigned short zero1; 178 unsigned char map[128]; /* upcase table for chars 80..ff */ 179 unsigned short zero2; 180 } code_page[3]; 181 unsigned char incognita[78]; 182 }; 183 184 185 /* Free space bitmaps are 4 sectors long, which is 16384 bits. 186 16384 sectors is 8 meg, and each 8 meg band has a 4-sector bitmap. 187 Bit order in the maps is little-endian. 0 means taken, 1 means free. 188 189 Bit map sectors are marked allocated in the bit maps, and so are sectors 190 off the end of the partition. 191 192 Band 0 is sectors 0-3fff, its map is in sectors 18-1b. 193 Band 1 is 4000-7fff, its map is in 7ffc-7fff. 194 Band 2 is 8000-ffff, its map is in 8000-8003. 195 The remaining bands have maps in their first (even) or last (odd) 4 sectors 196 -- if the last, partial, band is odd its map is in its last 4 sectors. 197 198 The bitmap locations are given in a table pointed to by the super block. 199 No doubt they aren't constrained to be at 18, 7ffc, 8000, ...; that is 200 just where they usually are. 201 202 The "directory band" is a bunch of sectors preallocated for dnodes. 203 It has a 4-sector free space bitmap of its own. Each bit in the map 204 corresponds to one 4-sector dnode, bit 0 of the map corresponding to 205 the first 4 sectors of the directory band. The entire band is marked 206 allocated in the main bitmap. The super block gives the locations 207 of the directory band and its bitmap. ("band" doesn't mean it is 208 8 meg long; it isn't.) */ 209 210 211 /* dnode: directory. 4 sectors long */ 212 213 /* A directory is a tree of dnodes. The fnode for a directory 214 contains one pointer, to the root dnode of the tree. The fnode 215 never moves, the dnodes do the B-tree thing, splitting and merging 216 as files are added and removed. */ 217 218 #define DNODE_MAGIC 0x77e40aae 219 220 struct dnode { 221 unsigned magic; /* 77e4 0aae */ 222 unsigned first_free; /* offset from start of dnode to 223 first free dir entry */ 224 unsigned increment_me; /* some kind of activity counter? 225 Neither HPFS.IFS nor CHKDSK cares 226 if you change this word */ 227 secno up; /* (root dnode) directory's fnode 228 (nonroot) parent dnode */ 229 dnode_secno self; /* pointer to this dnode */ 230 unsigned char dirent[2028]; /* one or more dirents */ 231 }; 232 233 struct hpfs_dirent { 234 unsigned short length; /* offset to next dirent */ 235 unsigned first: 1; /* set on phony ^A^A (".") entry */ 236 unsigned flag1: 1; 237 unsigned down: 1; /* down pointer present (after name) */ 238 unsigned last: 1; /* set on phony \377 entry */ 239 unsigned flag4: 1; 240 unsigned flag5: 1; 241 unsigned flag6: 1; 242 unsigned has_needea: 1; /* ?? some EA has NEEDEA set 243 I have no idea why this is 244 interesting in a dir entry */ 245 unsigned read_only: 1; /* dos attrib */ 246 unsigned hidden: 1; /* dos attrib */ 247 unsigned system: 1; /* dos attrib */ 248 unsigned flag11: 1; /* would be volume label dos attrib */ 249 unsigned directory: 1; /* dos attrib */ 250 unsigned archive: 1; /* dos attrib */ 251 unsigned not_8x3: 1; /* name is not 8.3 */ 252 unsigned flag15: 1; 253 fnode_secno fnode; /* fnode giving allocation info */ 254 time_t write_date; /* mtime */ 255 unsigned file_size; /* file length, bytes */ 256 time_t read_date; /* atime */ 257 time_t creation_date; /* ctime */ 258 unsigned ea_size; /* total EA length, bytes */ 259 unsigned char zero1; 260 unsigned char ix; /* code page index (of filename), see 261 struct code_page_data */ 262 unsigned char namelen, name[1]; /* file name */ 263 /* dnode_secno down; btree down pointer, if present, 264 follows name on next word boundary, or maybe it 265 precedes next dirent, which is on a word boundary. */ 266 }; 267 268 /* The b-tree down pointer from a dir entry */ 269 270 static inline dnode_secno de_down_pointer (struct hpfs_dirent *de) /* */ 271 { 272 return *(dnode_secno *) ((void *) de + de->length - 4); 273 } 274 275 /* The first dir entry in a dnode */ 276 277 static inline struct hpfs_dirent *dnode_first_de (struct dnode *dnode) /* */ 278 { 279 return (void *) dnode->dirent; 280 } 281 282 /* The end+1 of the dir entries */ 283 284 static inline struct hpfs_dirent *dnode_end_de (struct dnode *dnode) /* */ 285 { 286 return (void *) dnode + dnode->first_free; 287 } 288 289 /* The dir entry after dir entry de */ 290 291 static inline struct hpfs_dirent *de_next_de (struct hpfs_dirent *de) /* */ 292 { 293 return (void *) de + de->length; 294 } 295 296 297 /* B+ tree: allocation info in fnodes and anodes */ 298 299 /* dnodes point to fnodes which are responsible for listing the sectors 300 assigned to the file. This is done with trees of (length,address) 301 pairs. (Actually triples, of (length, file-address, disk-address) 302 which can represent holes. Find out if HPFS does that.) 303 At any rate, fnodes contain a small tree; if subtrees are needed 304 they occupy essentially a full block in anodes. A leaf-level tree node 305 has 3-word entries giving sector runs, a non-leaf node has 2-word 306 entries giving subtree pointers. A flag in the header says which. */ 307 308 struct bplus_leaf_node 309 { 310 unsigned file_secno; /* first file sector in extent */ 311 unsigned length; /* length, sectors */ 312 secno disk_secno; /* first corresponding disk sector */ 313 }; 314 315 struct bplus_internal_node 316 { 317 unsigned file_secno; /* subtree maps sectors < this */ 318 anode_secno down; /* pointer to subtree */ 319 }; 320 321 struct bplus_header 322 { 323 unsigned flag0: 1; 324 unsigned flag1: 1; 325 unsigned flag2: 1; 326 unsigned flag3: 1; 327 unsigned flag4: 1; 328 unsigned fnode_parent: 1; /* ? we're pointed to by an fnode, 329 the data btree or some ea or the 330 main ea bootage pointer ea_secno */ 331 /* also can get set in fnodes, which 332 may be a chkdsk glitch or may mean 333 this bit is irrelevant in fnodes, 334 or this interpretation is all wet */ 335 unsigned flag6: 1; 336 unsigned internal: 1; /* 1 -> (internal) tree of anodes 337 0 -> (leaf) list of extents */ 338 unsigned char fill[3]; 339 unsigned char n_free_nodes; /* free nodes in following array */ 340 unsigned char n_used_nodes; /* used nodes in following array */ 341 unsigned short first_free; /* offset from start of header to 342 first free node in array */ 343 union { 344 struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving 345 subtree pointers */ 346 struct bplus_leaf_node external[0]; /* (external) 3-word entries giving 347 sector runs */ 348 } u; 349 }; 350 351 /* fnode: root of allocation b+ tree, and EA's */ 352 353 /* Every file and every directory has one fnode, pointed to by the directory 354 entry and pointing to the file's sectors or directory's root dnode. EA's 355 are also stored here, and there are said to be ACL's somewhere here too. */ 356 357 #define FNODE_MAGIC 0xf7e40aae 358 359 struct fnode 360 { 361 unsigned magic; /* f7e4 0aae */ 362 unsigned zero1[2]; 363 unsigned char len, name[15]; /* true length, truncated name */ 364 fnode_secno up; /* pointer to file's directory fnode */ 365 unsigned zero2[3]; 366 unsigned ea_size_l; /* length of disk-resident ea's */ 367 secno ea_secno; /* first sector of disk-resident ea's*/ 368 unsigned short ea_size_s; /* length of fnode-resident ea's */ 369 370 unsigned flag0: 1; 371 unsigned ea_anode: 1; /* 1 -> ea_secno is an anode */ 372 unsigned flag2: 1; 373 unsigned flag3: 1; 374 unsigned flag4: 1; 375 unsigned flag5: 1; 376 unsigned flag6: 1; 377 unsigned flag7: 1; 378 unsigned dirflag: 1; /* 1 -> directory. first & only extent 379 points to dnode. */ 380 unsigned flag9: 1; 381 unsigned flag10: 1; 382 unsigned flag11: 1; 383 unsigned flag12: 1; 384 unsigned flag13: 1; 385 unsigned flag14: 1; 386 unsigned flag15: 1; 387 388 struct bplus_header btree; /* b+ tree, 8 extents or 12 subtrees */ 389 union { 390 struct bplus_leaf_node external[8]; 391 struct bplus_internal_node internal[12]; 392 } u; 393 394 unsigned file_size; /* file length, bytes */ 395 unsigned n_needea; /* number of EA's with NEEDEA set */ 396 unsigned zero4[4]; 397 unsigned ea_offs; /* offset from start of fnode 398 to first fnode-resident ea */ 399 unsigned zero5[2]; 400 unsigned char ea[316]; /* zero or more EA's, packed together 401 with no alignment padding. 402 (Do not use this name, get here 403 via fnode + ea_offs. I think.) */ 404 }; 405 406 407 /* anode: 99.44% pure allocation tree */ 408 409 #define ANODE_MAGIC 0x37e40aae 410 411 struct anode 412 { 413 unsigned magic; /* 37e4 0aae */ 414 anode_secno self; /* pointer to this anode */ 415 secno up; /* parent anode or fnode */ 416 417 struct bplus_header btree; /* b+tree, 40 extents or 60 subtrees */ 418 union { 419 struct bplus_leaf_node external[40]; 420 struct bplus_internal_node internal[60]; 421 } u; 422 423 unsigned fill[3]; /* unused */ 424 }; 425 426 427 /* extended attributes. 428 429 A file's EA info is stored as a list of (name,value) pairs. It is 430 usually in the fnode, but (if it's large) it is moved to a single 431 sector run outside the fnode, or to multiple runs with an anode tree 432 that points to them. 433 434 The value of a single EA is stored along with the name, or (if large) 435 it is moved to a single sector run, or multiple runs pointed to by an 436 anode tree, pointed to by the value field of the (name,value) pair. 437 438 Flags in the EA tell whether the value is immediate, in a single sector 439 run, or in multiple runs. Flags in the fnode tell whether the EA list 440 is immediate, in a single run, or in multiple runs. */ 441 442 struct extended_attribute 443 { 444 unsigned indirect: 1; /* 1 -> value gives sector number 445 where real value starts */ 446 unsigned anode: 1; /* 1 -> sector is an anode 447 that points to fragmented value */ 448 unsigned flag2: 1; 449 unsigned flag3: 1; 450 unsigned flag4: 1; 451 unsigned flag5: 1; 452 unsigned flag6: 1; 453 unsigned needea: 1; /* required ea */ 454 unsigned char namelen; /* length of name, bytes */ 455 unsigned short valuelen; /* length of value, bytes */ 456 /* 457 unsigned char name[namelen]; ascii attrib name 458 unsigned char nul; terminating '\0', not counted 459 unsigned char value[valuelen]; value, arbitrary 460 if this.indirect, valuelen is 8 and the value is 461 unsigned length; real length of value, bytes 462 secno secno; sector address where it starts 463 if this.anode, the above sector number is the root of an anode tree 464 which points to the value. 465 */ 466 }; 467 468 static inline unsigned char *ea_name (struct extended_attribute *ea) /* */ 469 { 470 return (void *) ea + sizeof *ea; 471 } 472 473 static inline unsigned char *ea_value (struct extended_attribute *ea) /* */ 474 { 475 return (void *) ea + sizeof *ea + ea->namelen + 1; 476 } 477 478 static inline struct extended_attribute * 479 ea_next_ea (struct extended_attribute *ea) /* */ 480 { 481 return (void *) ea + sizeof *ea + ea->namelen + 1 + ea->valuelen; 482 } 483 484 static inline unsigned ea_indirect_length (struct extended_attribute *ea) /* */ 485 { 486 unsigned *v = (void *) ea_value (ea); 487 return v[0]; 488 } 489 490 static inline secno ea_indirect_secno (struct extended_attribute *ea) /* */ 491 { 492 unsigned *v = (void *) ea_value (ea); 493 return v[1]; 494 } 495 496 /* 497 Local Variables: 498 comment-column: 40 499 End: 500 */