1 /* The paper
2
3 Duncan, Roy
4 Design goals and implementation of the new High Performance File System
5 Microsoft Systems Journal Sept 1989 v4 n5 p1(13)
6
7 describes what HPFS looked like when it was new, and it is the source
8 of most of the information given here. The rest is conjecture.
9
10 For definitive information on the Duncan paper, see it, not this file.
11 For definitive information on HPFS, ask somebody else -- this is guesswork.
12 There are certain to be many mistakes. */
13
14 /* Notation */
15
16 typedef unsigned secno; /* sector number, partition relative */
17
18 typedef secno dnode_secno; /* sector number of a dnode */
19 typedef secno fnode_secno; /* sector number of an fnode */
20 typedef secno anode_secno; /* sector number of an anode */
21
22 /* sector 0 */
23
24 /* The boot block is very like a FAT boot block, except that the
25 29h signature byte is 28h instead, and the ID string is "HPFS". */
26
27 struct hpfs_boot_block
28 {
29 unsigned char jmp[3];
30 unsigned char oem_id[8];
31 unsigned char bytes_per_sector[2]; /* 512 */
32 unsigned char sectors_per_cluster;
33 unsigned char n_reserved_sectors[2];
34 unsigned char n_fats;
35 unsigned char n_rootdir_entries[2];
36 unsigned char n_sectors_s[2];
37 unsigned char media_byte;
38 unsigned short sectors_per_fat;
39 unsigned short sectors_per_track;
40 unsigned short heads_per_cyl;
41 unsigned int n_hidden_sectors;
42 unsigned int n_sectors_l; /* size of partition */
43 unsigned char drive_number;
44 unsigned char mbz;
45 unsigned char sig_28h; /* 28h */
46 unsigned char vol_serno[4];
47 unsigned char vol_label[11];
48 unsigned char sig_hpfs[8]; /* "HPFS " */
49 unsigned char pad[448];
50 unsigned short magic; /* aa55 */
51 };
52
53
54 /* sector 16 */
55
56 /* The super block has the pointer to the root directory. */
57
58 #define SB_MAGIC 0xf995e849
59
60 struct hpfs_super_block
61 {
62 unsigned magic; /* f995 e849 */
63 unsigned magic1; /* fa53 e9c5, more magic? */
64 unsigned huh202; /* ?? 202 = N. of B. in 1.00390625 S.*/
65 fnode_secno root; /* fnode of root directory */
66 secno n_sectors; /* size of filesystem */
67 unsigned n_badblocks; /* number of bad blocks */
68 secno bitmaps; /* pointers to free space bit maps */
69 unsigned zero1; /* 0 */
70 secno badblocks; /* bad block list */
71 unsigned zero3; /* 0 */
72 time_t last_chkdsk; /* date last checked, 0 if never */
73 unsigned zero4; /* 0 */
74 secno n_dir_band; /* number of sectors in dir band */
75 secno dir_band_start; /* first sector in dir band */
76 secno dir_band_end; /* last sector in dir band */
77 secno dir_band_bitmap; /* free space map, 1 dnode per bit */
78 unsigned zero5[8]; /* 0 */
79 secno scratch_dnodes; /* ?? 8 preallocated sectors near dir
80 band, 4-aligned. */
81 unsigned zero6[103]; /* 0 */
82 };
83
84
85 /* sector 17 */
86
87 /* The spare block has pointers to spare sectors. */
88
89 #define SP_MAGIC 0xf9911849
90
91 struct hpfs_spare_block
92 {
93 unsigned magic; /* f991 1849 */
94 unsigned magic1; /* fa52 29c5, more magic? */
95
96 unsigned dirty: 1; /* 0 clean, 1 "improperly stopped" */
97 unsigned flag1234: 4; /* unknown flags */
98 unsigned fast: 1; /* partition was fast formatted */
99 unsigned flag6to31: 26; /* unknown flags */
100
101 secno hotfix_map; /* info about remapped bad sectors */
102 unsigned n_spares_used; /* number of hotfixes */
103 unsigned n_spares; /* number of spares in hotfix map */
104 unsigned n_dnode_spares_free; /* spare dnodes unused */
105 unsigned n_dnode_spares; /* length of spare_dnodes[] list,
106 follows in this block*/
107 secno code_page_dir; /* code page directory block */
108 unsigned n_code_pages; /* number of code pages */
109 unsigned large_numbers[2]; /* ?? */
110 unsigned zero1[15];
111 dnode_secno spare_dnodes[20]; /* emergency free dnode list */
112 unsigned zero2[81]; /* room for more? */
113 };
114
115 /* The bad block list is 4 sectors long. The first word must be zero,
116 the remaining words give n_badblocks bad block numbers.
117 I bet you can see it coming... */
118
119 #define BAD_MAGIC 0
120
121 /* The hotfix map is 4 sectors long. It looks like
122
123 secno from[n_spares];
124 secno to[n_spares];
125
126 The to[] list is initialized to point to n_spares preallocated empty
127 sectors. The from[] list contains the sector numbers of bad blocks
128 which have been remapped to corresponding sectors in the to[] list.
129 n_spares_used gives the length of the from[] list. */
130
131
132 /* Sectors 18 and 19 are preallocated and unused.
133 Maybe they're spares for 16 and 17, but simple substitution fails. */
134
135
136 /* The code page info pointed to by the spare block consists of an index
137 block and blocks containing uppercasing tables. I don't know what
138 these are for (CHKDSK, maybe?) -- OS/2 does not seem to use them
139 itself. Linux doesn't use them either. */
140
141 /* block pointed to by spareblock->code_page_dir */
142
143 #define CP_DIR_MAGIC 0x494521f7
144
145 struct code_page_directory
146 {
147 unsigned magic; /* 4945 21f7 */
148 unsigned n_code_pages; /* number of pointers following */
149 unsigned zero1[2];
150 struct {
151 unsigned short ix; /* index */
152 unsigned short code_page_number; /* code page number */
153 unsigned bounds; /* matches corresponding word
154 in data block */
155 secno code_page_data; /* sector number of a code_page_data
156 containing c.p. array */
157 unsigned index; /* index in c.p. array in that sector*/
158 } array[31]; /* unknown length */
159 };
160
161 /* blocks pointed to by code_page_directory */
162
163 #define CP_DATA_MAGIC 0x894521f7
164
165 struct code_page_data
166 {
167 unsigned magic; /* 8945 21f7 */
168 unsigned n_used; /* # elements used in c_p_data[] */
169 unsigned bounds[3]; /* looks a bit like
170 (beg1,end1), (beg2,end2)
171 one byte each */
172 unsigned short offs[3]; /* offsets from start of sector
173 to start of c_p_data[ix] */
174 struct {
175 unsigned short ix; /* index */
176 unsigned short code_page_number; /* code page number */
177 unsigned short zero1;
178 unsigned char map[128]; /* upcase table for chars 80..ff */
179 unsigned short zero2;
180 } code_page[3];
181 unsigned char incognita[78];
182 };
183
184
185 /* Free space bitmaps are 4 sectors long, which is 16384 bits.
186 16384 sectors is 8 meg, and each 8 meg band has a 4-sector bitmap.
187 Bit order in the maps is little-endian. 0 means taken, 1 means free.
188
189 Bit map sectors are marked allocated in the bit maps, and so are sectors
190 off the end of the partition.
191
192 Band 0 is sectors 0-3fff, its map is in sectors 18-1b.
193 Band 1 is 4000-7fff, its map is in 7ffc-7fff.
194 Band 2 is 8000-ffff, its map is in 8000-8003.
195 The remaining bands have maps in their first (even) or last (odd) 4 sectors
196 -- if the last, partial, band is odd its map is in its last 4 sectors.
197
198 The bitmap locations are given in a table pointed to by the super block.
199 No doubt they aren't constrained to be at 18, 7ffc, 8000, ...; that is
200 just where they usually are.
201
202 The "directory band" is a bunch of sectors preallocated for dnodes.
203 It has a 4-sector free space bitmap of its own. Each bit in the map
204 corresponds to one 4-sector dnode, bit 0 of the map corresponding to
205 the first 4 sectors of the directory band. The entire band is marked
206 allocated in the main bitmap. The super block gives the locations
207 of the directory band and its bitmap. ("band" doesn't mean it is
208 8 meg long; it isn't.) */
209
210
211 /* dnode: directory. 4 sectors long */
212
213 /* A directory is a tree of dnodes. The fnode for a directory
214 contains one pointer, to the root dnode of the tree. The fnode
215 never moves, the dnodes do the B-tree thing, splitting and merging
216 as files are added and removed. */
217
218 #define DNODE_MAGIC 0x77e40aae
219
220 struct dnode {
221 unsigned magic; /* 77e4 0aae */
222 unsigned first_free; /* offset from start of dnode to
223 first free dir entry */
224 unsigned increment_me; /* some kind of activity counter?
225 Neither HPFS.IFS nor CHKDSK cares
226 if you change this word */
227 secno up; /* (root dnode) directory's fnode
228 (nonroot) parent dnode */
229 dnode_secno self; /* pointer to this dnode */
230 unsigned char dirent[2028]; /* one or more dirents */
231 };
232
233 struct hpfs_dirent {
234 unsigned short length; /* offset to next dirent */
235 unsigned first: 1; /* set on phony ^A^A (".") entry */
236 unsigned flag1: 1;
237 unsigned down: 1; /* down pointer present (after name) */
238 unsigned last: 1; /* set on phony \377 entry */
239 unsigned flag4: 1;
240 unsigned flag5: 1;
241 unsigned flag6: 1;
242 unsigned has_needea: 1; /* ?? some EA has NEEDEA set
243 I have no idea why this is
244 interesting in a dir entry */
245 unsigned read_only: 1; /* dos attrib */
246 unsigned hidden: 1; /* dos attrib */
247 unsigned system: 1; /* dos attrib */
248 unsigned flag11: 1; /* would be volume label dos attrib */
249 unsigned directory: 1; /* dos attrib */
250 unsigned archive: 1; /* dos attrib */
251 unsigned not_8x3: 1; /* name is not 8.3 */
252 unsigned flag15: 1;
253 fnode_secno fnode; /* fnode giving allocation info */
254 time_t write_date; /* mtime */
255 unsigned file_size; /* file length, bytes */
256 time_t read_date; /* atime */
257 time_t creation_date; /* ctime */
258 unsigned ea_size; /* total EA length, bytes */
259 unsigned char zero1;
260 unsigned char ix; /* code page index (of filename), see
261 struct code_page_data */
262 unsigned char namelen, name[1]; /* file name */
263 /* dnode_secno down; btree down pointer, if present,
264 follows name on next word boundary, or maybe it
265 precedes next dirent, which is on a word boundary. */
266 };
267
268 /* The b-tree down pointer from a dir entry */
269
270 static inline dnode_secno de_down_pointer (struct hpfs_dirent *de)
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
271 {
272 return *(dnode_secno *) ((void *) de + de->length - 4);
273 }
274
275 /* The first dir entry in a dnode */
276
277 static inline struct hpfs_dirent *dnode_first_de (struct dnode *dnode)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
278 {
279 return (void *) dnode->dirent;
280 }
281
282 /* The end+1 of the dir entries */
283
284 static inline struct hpfs_dirent *dnode_end_de (struct dnode *dnode)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
285 {
286 return (void *) dnode + dnode->first_free;
287 }
288
289 /* The dir entry after dir entry de */
290
291 static inline struct hpfs_dirent *de_next_de (struct hpfs_dirent *de)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
292 {
293 return (void *) de + de->length;
294 }
295
296
297 /* B+ tree: allocation info in fnodes and anodes */
298
299 /* dnodes point to fnodes which are responsible for listing the sectors
300 assigned to the file. This is done with trees of (length,address)
301 pairs. (Actually triples, of (length, file-address, disk-address)
302 which can represent holes. Find out if HPFS does that.)
303 At any rate, fnodes contain a small tree; if subtrees are needed
304 they occupy essentially a full block in anodes. A leaf-level tree node
305 has 3-word entries giving sector runs, a non-leaf node has 2-word
306 entries giving subtree pointers. A flag in the header says which. */
307
308 struct bplus_leaf_node
309 {
310 unsigned file_secno; /* first file sector in extent */
311 unsigned length; /* length, sectors */
312 secno disk_secno; /* first corresponding disk sector */
313 };
314
315 struct bplus_internal_node
316 {
317 unsigned file_secno; /* subtree maps sectors < this */
318 anode_secno down; /* pointer to subtree */
319 };
320
321 struct bplus_header
322 {
323 unsigned flag0: 1;
324 unsigned flag1: 1;
325 unsigned flag2: 1;
326 unsigned flag3: 1;
327 unsigned flag4: 1;
328 unsigned fnode_parent: 1; /* ? we're pointed to by an fnode,
329 the data btree or some ea or the
330 main ea bootage pointer ea_secno */
331 /* also can get set in fnodes, which
332 may be a chkdsk glitch or may mean
333 this bit is irrelevant in fnodes,
334 or this interpretation is all wet */
335 unsigned flag6: 1;
336 unsigned internal: 1; /* 1 -> (internal) tree of anodes
337 0 -> (leaf) list of extents */
338 unsigned char fill[3];
339 unsigned char n_free_nodes; /* free nodes in following array */
340 unsigned char n_used_nodes; /* used nodes in following array */
341 unsigned short first_free; /* offset from start of header to
342 first free node in array */
343 union {
344 struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving
345 subtree pointers */
346 struct bplus_leaf_node external[0]; /* (external) 3-word entries giving
347 sector runs */
348 } u;
349 };
350
351 /* fnode: root of allocation b+ tree, and EA's */
352
353 /* Every file and every directory has one fnode, pointed to by the directory
354 entry and pointing to the file's sectors or directory's root dnode. EA's
355 are also stored here, and there are said to be ACL's somewhere here too. */
356
357 #define FNODE_MAGIC 0xf7e40aae
358
359 struct fnode
360 {
361 unsigned magic; /* f7e4 0aae */
362 unsigned zero1[2];
363 unsigned char len, name[15]; /* true length, truncated name */
364 fnode_secno up; /* pointer to file's directory fnode */
365 unsigned zero2[3];
366 unsigned ea_size_l; /* length of disk-resident ea's */
367 secno ea_secno; /* first sector of disk-resident ea's*/
368 unsigned short ea_size_s; /* length of fnode-resident ea's */
369
370 unsigned flag0: 1;
371 unsigned ea_anode: 1; /* 1 -> ea_secno is an anode */
372 unsigned flag2: 1;
373 unsigned flag3: 1;
374 unsigned flag4: 1;
375 unsigned flag5: 1;
376 unsigned flag6: 1;
377 unsigned flag7: 1;
378 unsigned dirflag: 1; /* 1 -> directory. first & only extent
379 points to dnode. */
380 unsigned flag9: 1;
381 unsigned flag10: 1;
382 unsigned flag11: 1;
383 unsigned flag12: 1;
384 unsigned flag13: 1;
385 unsigned flag14: 1;
386 unsigned flag15: 1;
387
388 struct bplus_header btree; /* b+ tree, 8 extents or 12 subtrees */
389 union {
390 struct bplus_leaf_node external[8];
391 struct bplus_internal_node internal[12];
392 } u;
393
394 unsigned file_size; /* file length, bytes */
395 unsigned n_needea; /* number of EA's with NEEDEA set */
396 unsigned zero4[4];
397 unsigned ea_offs; /* offset from start of fnode
398 to first fnode-resident ea */
399 unsigned zero5[2];
400 unsigned char ea[316]; /* zero or more EA's, packed together
401 with no alignment padding.
402 (Do not use this name, get here
403 via fnode + ea_offs. I think.) */
404 };
405
406
407 /* anode: 99.44% pure allocation tree */
408
409 #define ANODE_MAGIC 0x37e40aae
410
411 struct anode
412 {
413 unsigned magic; /* 37e4 0aae */
414 anode_secno self; /* pointer to this anode */
415 secno up; /* parent anode or fnode */
416
417 struct bplus_header btree; /* b+tree, 40 extents or 60 subtrees */
418 union {
419 struct bplus_leaf_node external[40];
420 struct bplus_internal_node internal[60];
421 } u;
422
423 unsigned fill[3]; /* unused */
424 };
425
426
427 /* extended attributes.
428
429 A file's EA info is stored as a list of (name,value) pairs. It is
430 usually in the fnode, but (if it's large) it is moved to a single
431 sector run outside the fnode, or to multiple runs with an anode tree
432 that points to them.
433
434 The value of a single EA is stored along with the name, or (if large)
435 it is moved to a single sector run, or multiple runs pointed to by an
436 anode tree, pointed to by the value field of the (name,value) pair.
437
438 Flags in the EA tell whether the value is immediate, in a single sector
439 run, or in multiple runs. Flags in the fnode tell whether the EA list
440 is immediate, in a single run, or in multiple runs. */
441
442 struct extended_attribute
443 {
444 unsigned indirect: 1; /* 1 -> value gives sector number
445 where real value starts */
446 unsigned anode: 1; /* 1 -> sector is an anode
447 that points to fragmented value */
448 unsigned flag2: 1;
449 unsigned flag3: 1;
450 unsigned flag4: 1;
451 unsigned flag5: 1;
452 unsigned flag6: 1;
453 unsigned needea: 1; /* required ea */
454 unsigned char namelen; /* length of name, bytes */
455 unsigned short valuelen; /* length of value, bytes */
456 /*
457 unsigned char name[namelen]; ascii attrib name
458 unsigned char nul; terminating '\0', not counted
459 unsigned char value[valuelen]; value, arbitrary
460 if this.indirect, valuelen is 8 and the value is
461 unsigned length; real length of value, bytes
462 secno secno; sector address where it starts
463 if this.anode, the above sector number is the root of an anode tree
464 which points to the value.
465 */
466 };
467
468 static inline unsigned char *ea_name (struct extended_attribute *ea)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
469 {
470 return (void *) ea + sizeof *ea;
471 }
472
473 static inline unsigned char *ea_value (struct extended_attribute *ea)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
474 {
475 return (void *) ea + sizeof *ea + ea->namelen + 1;
476 }
477
478 static inline struct extended_attribute *
479 ea_next_ea (struct extended_attribute *ea)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
480 {
481 return (void *) ea + sizeof *ea + ea->namelen + 1 + ea->valuelen;
482 }
483
484 static inline unsigned ea_indirect_length (struct extended_attribute *ea)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
485 {
486 unsigned *v = (void *) ea_value (ea);
487 return v[0];
488 }
489
490 static inline secno ea_indirect_secno (struct extended_attribute *ea)
/* ![[previous]](../icons/left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
491 {
492 unsigned *v = (void *) ea_value (ea);
493 return v[1];
494 }
495
496 /*
497 Local Variables:
498 comment-column: 40
499 End:
500 */