1 /* The paper 2 3 Duncan, Roy 4 Design goals and implementation of the new High Performance File System 5 Microsoft Systems Journal Sept 1989 v4 n5 p1(13) 6 7 describes what HPFS looked like when it was new, and it is the source 8 of most of the information given here. The rest is conjecture. 9 10 For definitive information on the Duncan paper, see it, not this file. 11 For definitive information on HPFS, ask somebody else -- this is guesswork. 12 There are certain to be many mistakes. */ 13 14 /* Notation */ 15 16 typedef unsigned secno; /* sector number, partition relative */ 17 18 typedef secno dnode_secno; /* sector number of a dnode */ 19 typedef secno fnode_secno; /* sector number of an fnode */ 20 typedef secno anode_secno; /* sector number of an anode */ 21 22 /* sector 0 */ 23 24 /* The boot block is very like a FAT boot block, except that the 25 29h signature byte is 28h instead, and the ID string is "HPFS". */ 26 27 struct hpfs_boot_block 28 { 29 unsigned char jmp[3]; 30 unsigned char oem_id[8]; 31 unsigned char bytes_per_sector[2]; /* 512 */ 32 unsigned char sectors_per_cluster; 33 unsigned char n_reserved_sectors[2]; 34 unsigned char n_fats; 35 unsigned char n_rootdir_entries[2]; 36 unsigned char n_sectors_s[2]; 37 unsigned char media_byte; 38 unsigned short sectors_per_fat; 39 unsigned short sectors_per_track; 40 unsigned short heads_per_cyl; 41 unsigned int n_hidden_sectors; 42 unsigned int n_sectors_l; /* size of partition */ 43 unsigned char drive_number; 44 unsigned char mbz; 45 unsigned char sig_28h; /* 28h */ 46 unsigned char vol_serno[4]; 47 unsigned char vol_label[11]; 48 unsigned char sig_hpfs[8]; /* "HPFS " */ 49 unsigned char pad[448]; 50 unsigned short magic; /* aa55 */ 51 }; 52 53 54 /* sector 16 */ 55 56 /* The super block has the pointer to the root directory. */ 57 58 #define SB_MAGIC 0xf995e849 59 60 struct hpfs_super_block 61 { 62 unsigned magic; /* f995 e849 */ 63 unsigned magic1; /* fa53 e9c5, more magic? */ 64 unsigned huh202; /* ?? 202 = N. of B. in 1.00390625 S.*/ 65 fnode_secno root; /* fnode of root directory */ 66 secno n_sectors; /* size of filesystem */ 67 unsigned n_badblocks; /* number of bad blocks */ 68 secno bitmaps; /* pointers to free space bit maps */ 69 unsigned zero1; /* 0 */ 70 secno badblocks; /* bad block list */ 71 unsigned zero3; /* 0 */ 72 time_t last_chkdsk; /* date last checked, 0 if never */ 73 unsigned zero4; /* 0 */ 74 secno n_dir_band; /* number of sectors in dir band */ 75 secno dir_band_start; /* first sector in dir band */ 76 secno dir_band_end; /* last sector in dir band */ 77 secno dir_band_bitmap; /* free space map, 1 dnode per bit */ 78 unsigned zero5[8]; /* 0 */ 79 secno scratch_dnodes; /* ?? 8 preallocated sectors near dir 80 band, 4-aligned. */ 81 unsigned zero6[103]; /* 0 */ 82 }; 83 84 85 /* sector 17 */ 86 87 /* The spare block has pointers to spare sectors. */ 88 89 #define SP_MAGIC 0xf9911849 90 91 struct hpfs_spare_block 92 { 93 unsigned magic; /* f991 1849 */ 94 unsigned magic1; /* fa52 29c5, more magic? */ 95 96 unsigned dirty: 1; /* 0 clean, 1 "improperly stopped" */ 97 unsigned flag1234: 4; /* unknown flags */ 98 unsigned fast: 1; /* partition was fast formatted */ 99 unsigned flag6to31: 26; /* unknown flags */ 100 101 secno hotfix_map; /* info about remapped bad sectors */ 102 unsigned n_spares_used; /* number of hotfixes */ 103 unsigned n_spares; /* number of spares in hotfix map */ 104 unsigned n_dnode_spares_free; /* spare dnodes unused */ 105 unsigned n_dnode_spares; /* length of spare_dnodes[] list, 106 follows in this block*/ 107 secno code_page_dir; /* code page directory block */ 108 unsigned n_code_pages; /* number of code pages */ 109 unsigned large_numbers[2]; /* ?? */ 110 unsigned zero1[15]; 111 dnode_secno spare_dnodes[20]; /* emergency free dnode list */ 112 unsigned zero2[81]; /* room for more? */ 113 }; 114 115 /* The bad block list is 4 sectors long. The first word must be zero, 116 the remaining words give n_badblocks bad block numbers. 117 I bet you can see it coming... */ 118 119 #define BAD_MAGIC 0 120 121 /* The hotfix map is 4 sectors long. It looks like 122 123 secno from[n_spares]; 124 secno to[n_spares]; 125 126 The to[] list is initialized to point to n_spares preallocated empty 127 sectors. The from[] list contains the sector numbers of bad blocks 128 which have been remapped to corresponding sectors in the to[] list. 129 n_spares_used gives the length of the from[] list. */ 130 131 132 /* Sectors 18 and 19 are preallocated and unused. 133 Maybe they're spares for 16 and 17, but simple substitution fails. */ 134 135 136 /* The code page info pointed to by the spare block consists of an index 137 block and blocks containing character maps. The following is pretty 138 sketchy, but Linux doesn't use code pages so it doesn't matter. */ 139 140 /* block pointed to by spareblock->code_page_dir */ 141 142 #define CP_DIR_MAGIC 0x494521f7 143 144 struct code_page_directory 145 { 146 unsigned magic; /* 4945 21f7 */ 147 unsigned n_code_pages; /* number of pointers following */ 148 unsigned zero1[2]; 149 struct { 150 unsigned short ix; /* index */ 151 unsigned short code_page_number; /* code page number */ 152 unsigned bounds; /* matches corresponding word 153 in data block */ 154 secno code_page_data; /* sector number of a code_page_data 155 containing c.p. array */ 156 unsigned index; /* index in c.p. array in that sector*/ 157 } array[31]; /* unknown length */ 158 }; 159 160 /* blocks pointed to by code_page_directory */ 161 162 #define CP_DATA_MAGIC 0x894521f7 163 164 struct code_page_data 165 { 166 unsigned magic; /* 8945 21f7 */ 167 unsigned n_used; /* # elements used in c_p_data[] */ 168 unsigned bounds[3]; /* looks a bit like 169 (beg1,end1), (beg2,end2) 170 one byte each */ 171 unsigned short offs[3]; /* offsets from start of sector 172 to start of c_p_data[ix] */ 173 struct { 174 unsigned short ix; /* index */ 175 unsigned short code_page_number; /* code page number */ 176 unsigned short zero1; 177 unsigned char map[128]; /* map for chars 80..ff */ 178 unsigned short zero2; 179 } code_page[3]; 180 unsigned char incognita[78]; 181 }; 182 183 184 /* Free space bitmaps are 4 sectors long, which is 16384 bits. 185 16384 sectors is 8 meg, and each 8 meg band has a 4-sector bitmap. 186 Bit order in the maps is little-endian. 0 means taken, 1 means free. 187 188 Bit map sectors are marked allocated in the bit maps, and so are sectors 189 off the end of the partition. 190 191 Band 0 is sectors 0-3fff, its map is in sectors 18-1b. 192 Band 1 is 4000-7fff, its map is in 7ffc-7fff. 193 Band 2 is 8000-ffff, its map is in 8000-8003. 194 The remaining bands have maps in their first (even) or last (odd) 4 sectors 195 -- if the last, partial, band is odd its map is in its last 4 sectors. 196 197 The bitmap locations are given in a table pointed to by the super block. 198 No doubt they aren't constrained to be at 18, 7ffc, 8000, ...; that is 199 just where they usually are. 200 201 The "directory band" is a bunch of sectors preallocated for dnodes. 202 It has a 4-sector free space bitmap of its own. Each bit in the map 203 corresponds to one 4-sector dnode, bit 0 of the map corresponding to 204 the first 4 sectors of the directory band. The entire band is marked 205 allocated in the main bitmap. The super block gives the locations 206 of the directory band and its bitmap. ("band" doesn't mean it is 207 8 meg long; it isn't.) */ 208 209 210 /* dnode: directory. 4 sectors long */ 211 212 /* A directory is a tree of dnodes. The fnode for a directory 213 contains one pointer, to the root dnode of the tree. The fnode 214 never moves, the dnodes do the B-tree thing, splitting and merging 215 as files are added and removed. */ 216 217 #define DNODE_MAGIC 0x77e40aae 218 219 struct dnode { 220 unsigned magic; /* 77e4 0aae */ 221 unsigned first_free; /* offset from start of dnode to 222 first free dir entry */ 223 unsigned increment_me; /* some kind of activity counter? 224 Neither HPFS.IFS nor CHKDSK cares 225 if you change this word */ 226 secno up; /* (root dnode) directory's fnode 227 (nonroot) parent dnode */ 228 dnode_secno self; /* pointer to this dnode */ 229 unsigned char dirent[2028]; /* one or more dirents */ 230 }; 231 232 struct hpfs_dirent { 233 unsigned short length; /* offset to next dirent */ 234 unsigned first: 1; /* set on phony ^A^A (".") entry */ 235 unsigned flag1: 1; 236 unsigned down: 1; /* down pointer present (after name) */ 237 unsigned last: 1; /* set on phony \377 entry */ 238 unsigned flag4: 1; 239 unsigned flag5: 1; 240 unsigned flag6: 1; 241 unsigned has_needea: 1; /* ?? some EA has NEEDEA set 242 I have no idea why this is 243 interesting in a dir entry */ 244 unsigned read_only: 1; /* dos attrib */ 245 unsigned hidden: 1; /* dos attrib */ 246 unsigned system: 1; /* dos attrib */ 247 unsigned flag11: 1; /* would be volume label dos attrib */ 248 unsigned directory: 1; /* dos attrib */ 249 unsigned archive: 1; /* dos attrib */ 250 unsigned not_8x3: 1; /* name is not 8.3 */ 251 unsigned flag15: 1; 252 fnode_secno fnode; /* fnode giving allocation info */ 253 time_t write_date; /* mtime */ 254 unsigned file_size; /* file length, bytes */ 255 time_t read_date; /* atime */ 256 time_t creation_date; /* ctime */ 257 unsigned ea_size; /* total EA length, bytes */ 258 unsigned char zero1; 259 unsigned char locality; /* 0=unk 1=seq 2=random 3=both */ 260 unsigned char namelen, name[1]; /* file name */ 261 /* dnode_secno down; btree down pointer, if present, 262 follows name on next word boundary, or maybe it's 263 precedes next dirent, which is on a word boundary. */ 264 }; 265 266 /* The b-tree down pointer from a dir entry */ 267 268 static inline dnode_secno de_down_pointer (struct hpfs_dirent *de) /* */ 269 { 270 return *(dnode_secno *) ((void *) de + de->length - 4); 271 } 272 273 /* The first dir entry in a dnode */ 274 275 static inline struct hpfs_dirent *dnode_first_de (struct dnode *dnode) /* */ 276 { 277 return (void *) dnode->dirent; 278 } 279 280 /* The end+1 of the dir entries */ 281 282 static inline struct hpfs_dirent *dnode_end_de (struct dnode *dnode) /* */ 283 { 284 return (void *) dnode + dnode->first_free; 285 } 286 287 /* The dir entry after dir entry de */ 288 289 static inline struct hpfs_dirent *de_next_de (struct hpfs_dirent *de) /* */ 290 { 291 return (void *) de + de->length; 292 } 293 294 295 /* B+ tree: allocation info in fnodes and anodes */ 296 297 /* dnodes point to fnodes which are responsible for listing the sectors 298 assigned to the file. This is done with trees of (length,address) 299 pairs. (Actually triples, of (length, file-address, disk-address) 300 which can represent holes. Find out if HPFS does that.) 301 At any rate, fnodes contain a small tree; if subtrees are needed 302 they occupy essentially a full block in anodes. A leaf-level tree node 303 has 3-word entries giving sector runs, a non-leaf node has 2-word 304 entries giving subtree pointers. A flag in the header says which. */ 305 306 struct bplus_leaf_node 307 { 308 unsigned file_secno; /* first file sector in extent */ 309 unsigned length; /* length, sectors */ 310 secno disk_secno; /* first corresponding disk sector */ 311 }; 312 313 struct bplus_internal_node 314 { 315 unsigned file_secno; /* subtree maps sectors < this */ 316 anode_secno down; /* pointer to subtree */ 317 }; 318 319 struct bplus_header 320 { 321 unsigned flag0: 1; 322 unsigned flag1: 1; 323 unsigned flag2: 1; 324 unsigned flag3: 1; 325 unsigned flag4: 1; 326 unsigned fnode_parent: 1; /* ? we're pointed to by an fnode, 327 the data btree or some ea or the 328 main ea bootage pointer ea_secno */ 329 /* also can get set in fnodes, which 330 may be a chkdsk glitch or may mean 331 this bit is irrelevant in fnodes, 332 or this interpretation is all wet */ 333 unsigned flag6: 1; 334 unsigned internal: 1; /* 1 -> (internal) tree of anodes 335 0 -> (leaf) list of extents */ 336 unsigned char fill[3]; 337 unsigned char n_free_nodes; /* free nodes in following array */ 338 unsigned char n_used_nodes; /* used nodes in following array */ 339 unsigned short first_free; /* offset from start of header to 340 first free node in array */ 341 union { 342 struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving 343 subtree pointers */ 344 struct bplus_leaf_node external[0]; /* (external) 3-word entries giving 345 sector runs */ 346 } u; 347 }; 348 349 /* fnode: root of allocation b+ tree, and EA's */ 350 351 /* Every file and every directory has one fnode, pointed to by the directory 352 entry and pointing to the file's sectors or directory's root dnode. EA's 353 are also stored here, and there are said to be ACL's somewhere here too. */ 354 355 #define FNODE_MAGIC 0xf7e40aae 356 357 struct fnode 358 { 359 unsigned magic; /* f7e4 0aae */ 360 unsigned zero1[2]; 361 unsigned char len, name[15]; /* true length, truncated name */ 362 fnode_secno up; /* pointer to file's directory fnode */ 363 unsigned zero2[3]; 364 unsigned ea_size_l; /* length of disk-resident ea's */ 365 secno ea_secno; /* first sector of disk-resident ea's*/ 366 unsigned short ea_size_s; /* length of fnode-resident ea's */ 367 368 unsigned flag0: 1; 369 unsigned ea_anode: 1; /* 1 -> ea_secno is an anode */ 370 unsigned flag2: 1; 371 unsigned flag3: 1; 372 unsigned flag4: 1; 373 unsigned flag5: 1; 374 unsigned flag6: 1; 375 unsigned flag7: 1; 376 unsigned dirflag: 1; /* 1 -> directory. first & only extent 377 points to dnode. */ 378 unsigned flag9: 1; 379 unsigned flag10: 1; 380 unsigned flag11: 1; 381 unsigned flag12: 1; 382 unsigned flag13: 1; 383 unsigned flag14: 1; 384 unsigned flag15: 1; 385 386 struct bplus_header btree; /* b+ tree, 8 extents or 12 subtrees */ 387 union { 388 struct bplus_leaf_node external[8]; 389 struct bplus_internal_node internal[12]; 390 } u; 391 392 unsigned file_size; /* file length, bytes */ 393 unsigned n_needea; /* number of EA's with NEEDEA set */ 394 unsigned zero4[4]; 395 unsigned ea_offs; /* offset from start of fnode 396 to first fnode-resident ea */ 397 unsigned zero5[2]; 398 unsigned char ea[316]; /* zero or more EA's, packed together 399 with no alignment padding. 400 (Do not use this name, get here 401 via fnode + ea_offs. I think.) */ 402 }; 403 404 405 /* anode: 99.44% pure allocation tree */ 406 407 #define ANODE_MAGIC 0x37e40aae 408 409 struct anode 410 { 411 unsigned magic; /* 37e4 0aae */ 412 anode_secno self; /* pointer to this anode */ 413 secno up; /* parent anode or fnode */ 414 415 struct bplus_header btree; /* b+tree, 40 extents or 60 subtrees */ 416 union { 417 struct bplus_leaf_node external[40]; 418 struct bplus_internal_node internal[60]; 419 } u; 420 421 unsigned fill[3]; /* unused */ 422 }; 423 424 425 /* extended attributes. 426 427 A file's EA info is stored as a list of (name,value) pairs. It is 428 usually in the fnode, but (if it's large) it is moved to a single 429 sector run outside the fnode, or to multiple runs with an anode tree 430 that points to them. 431 432 The value of a single EA is stored along with the name, or (if large) 433 it is moved to a single sector run, or multiple runs pointed to by an 434 anode tree, pointed to by the value field of the (name,value) pair. 435 436 Flags in the EA tell whether the value is immediate, in a single sector 437 run, or in multiple runs. Flags in the fnode tell whether the EA list 438 is immediate, in a single run, or in multiple runs. */ 439 440 struct extended_attribute 441 { 442 unsigned indirect: 1; /* 1 -> value gives sector number 443 where real value starts */ 444 unsigned anode: 1; /* 1 -> sector is an anode 445 that points to fragmented value */ 446 unsigned flag2: 1; 447 unsigned flag3: 1; 448 unsigned flag4: 1; 449 unsigned flag5: 1; 450 unsigned flag6: 1; 451 unsigned needea: 1; /* required ea */ 452 unsigned char namelen; /* length of name, bytes */ 453 unsigned short valuelen; /* length of value, bytes */ 454 /* 455 unsigned char name[namelen]; ascii attrib name 456 unsigned char nul; terminating '\0', not counted 457 unsigned char value[valuelen]; value, arbitrary 458 if this.indirect, valuelen is 8 and the value is 459 unsigned length; real length of value, bytes 460 secno secno; sector address where it starts 461 if this.anode, the above sector number is the root of an anode tree 462 which points to the value. 463 */ 464 }; 465 466 static inline unsigned char *ea_name (struct extended_attribute *ea) /* */ 467 { 468 return (void *) ea + sizeof *ea; 469 } 470 471 static inline unsigned char *ea_value (struct extended_attribute *ea) /* */ 472 { 473 return (void *) ea + sizeof *ea + ea->namelen + 1; 474 } 475 476 static inline struct extended_attribute * 477 ea_next_ea (struct extended_attribute *ea) /* */ 478 { 479 return (void *) ea + sizeof *ea + ea->namelen + 1 + ea->valuelen; 480 } 481 482 static inline unsigned ea_indirect_length (struct extended_attribute *ea) /* */ 483 { 484 unsigned *v = (void *) ea_value (ea); 485 return v[0]; 486 } 487 488 static inline secno ea_indirect_secno (struct extended_attribute *ea) /* */ 489 { 490 unsigned *v = (void *) ea_value (ea); 491 return v[1]; 492 } 493 494 /* 495 Local Variables: 496 comment-column: 40 497 End: 498 */