1 /* The paper
2
3 Duncan, Roy
4 Design goals and implementation of the new High Performance File System
5 Microsoft Systems Journal Sept 1989 v4 n5 p1(13)
6
7 describes what HPFS looked like when it was new, and it is the source
8 of most of the information given here. The rest is conjecture.
9
10 For definitive information on the Duncan paper, see it, not this file.
11 For definitive information on HPFS, ask somebody else -- this is guesswork.
12 There are certain to be many mistakes. */
13
14 /* Notation */
15
16 typedef unsigned secno; /* sector number, partition relative */
17
18 typedef secno dnode_secno; /* sector number of a dnode */
19 typedef secno fnode_secno; /* sector number of an fnode */
20 typedef secno anode_secno; /* sector number of an anode */
21
22 /* sector 0 */
23
24 /* The boot block is very like a FAT boot block, except that the
25 29h signature byte is 28h instead, and the ID string is "HPFS". */
26
27 struct hpfs_boot_block
28 {
29 unsigned char jmp[3];
30 unsigned char oem_id[8];
31 unsigned char bytes_per_sector[2]; /* 512 */
32 unsigned char sectors_per_cluster;
33 unsigned char n_reserved_sectors[2];
34 unsigned char n_fats;
35 unsigned char n_rootdir_entries[2];
36 unsigned char n_sectors_s[2];
37 unsigned char media_byte;
38 unsigned short sectors_per_fat;
39 unsigned short sectors_per_track;
40 unsigned short heads_per_cyl;
41 unsigned int n_hidden_sectors;
42 unsigned int n_sectors_l; /* size of partition */
43 unsigned char drive_number;
44 unsigned char mbz;
45 unsigned char sig_28h; /* 28h */
46 unsigned char vol_serno[4];
47 unsigned char vol_label[11];
48 unsigned char sig_hpfs[8]; /* "HPFS " */
49 unsigned char pad[448];
50 unsigned short magic; /* aa55 */
51 };
52
53
54 /* sector 16 */
55
56 /* The super block has the pointer to the root directory. */
57
58 #define SB_MAGIC 0xf995e849
59
60 struct hpfs_super_block
61 {
62 unsigned magic; /* f995 e849 */
63 unsigned magic1; /* fa53 e9c5, more magic? */
64 unsigned huh202; /* ?? 202 = N. of B. in 1.00390625 S.*/
65 fnode_secno root; /* fnode of root directory */
66 secno n_sectors; /* size of filesystem */
67 unsigned n_badblocks; /* number of bad blocks */
68 secno bitmaps; /* pointers to free space bit maps */
69 unsigned zero1; /* 0 */
70 secno badblocks; /* bad block list */
71 unsigned zero3; /* 0 */
72 time_t last_chkdsk; /* date last checked, 0 if never */
73 unsigned zero4; /* 0 */
74 secno n_dir_band; /* number of sectors in dir band */
75 secno dir_band_start; /* first sector in dir band */
76 secno dir_band_end; /* last sector in dir band */
77 secno dir_band_bitmap; /* free space map, 1 dnode per bit */
78 unsigned zero5[8]; /* 0 */
79 secno scratch_dnodes; /* ?? 8 preallocated sectors near dir
80 band, 4-aligned. */
81 unsigned zero6[103]; /* 0 */
82 };
83
84
85 /* sector 17 */
86
87 /* The spare block has pointers to spare sectors. */
88
89 #define SP_MAGIC 0xf9911849
90
91 struct hpfs_spare_block
92 {
93 unsigned magic; /* f991 1849 */
94 unsigned magic1; /* fa52 29c5, more magic? */
95
96 unsigned dirty: 1; /* 0 clean, 1 "improperly stopped" */
97 unsigned flag1234: 4; /* unknown flags */
98 unsigned fast: 1; /* partition was fast formatted */
99 unsigned flag6to31: 26; /* unknown flags */
100
101 secno hotfix_map; /* info about remapped bad sectors */
102 unsigned n_spares_used; /* number of hotfixes */
103 unsigned n_spares; /* number of spares in hotfix map */
104 unsigned n_dnode_spares_free; /* spare dnodes unused */
105 unsigned n_dnode_spares; /* length of spare_dnodes[] list,
106 follows in this block*/
107 secno code_page_dir; /* code page directory block */
108 unsigned n_code_pages; /* number of code pages */
109 unsigned large_numbers[2]; /* ?? */
110 unsigned zero1[15];
111 dnode_secno spare_dnodes[20]; /* emergency free dnode list */
112 unsigned zero2[81]; /* room for more? */
113 };
114
115 /* The bad block list is 4 sectors long. The first word must be zero,
116 the remaining words give n_badblocks bad block numbers.
117 I bet you can see it coming... */
118
119 #define BAD_MAGIC 0
120
121 /* The hotfix map is 4 sectors long. It looks like
122
123 secno from[n_spares];
124 secno to[n_spares];
125
126 The to[] list is initialized to point to n_spares preallocated empty
127 sectors. The from[] list contains the sector numbers of bad blocks
128 which have been remapped to corresponding sectors in the to[] list.
129 n_spares_used gives the length of the from[] list. */
130
131
132 /* Sectors 18 and 19 are preallocated and unused.
133 Maybe they're spares for 16 and 17, but simple substitution fails. */
134
135
136 /* The code page info pointed to by the spare block consists of an index
137 block and blocks containing character maps. The following is pretty
138 sketchy, but Linux doesn't use code pages so it doesn't matter. */
139
140 /* block pointed to by spareblock->code_page_dir */
141
142 #define CP_DIR_MAGIC 0x494521f7
143
144 struct code_page_directory
145 {
146 unsigned magic; /* 4945 21f7 */
147 unsigned n_code_pages; /* number of pointers following */
148 unsigned zero1[2];
149 struct {
150 unsigned short ix; /* index */
151 unsigned short code_page_number; /* code page number */
152 unsigned bounds; /* matches corresponding word
153 in data block */
154 secno code_page_data; /* sector number of a code_page_data
155 containing c.p. array */
156 unsigned index; /* index in c.p. array in that sector*/
157 } array[31]; /* unknown length */
158 };
159
160 /* blocks pointed to by code_page_directory */
161
162 #define CP_DATA_MAGIC 0x894521f7
163
164 struct code_page_data
165 {
166 unsigned magic; /* 8945 21f7 */
167 unsigned n_used; /* # elements used in c_p_data[] */
168 unsigned bounds[3]; /* looks a bit like
169 (beg1,end1), (beg2,end2)
170 one byte each */
171 unsigned short offs[3]; /* offsets from start of sector
172 to start of c_p_data[ix] */
173 struct {
174 unsigned short ix; /* index */
175 unsigned short code_page_number; /* code page number */
176 unsigned short zero1;
177 unsigned char map[128]; /* map for chars 80..ff */
178 unsigned short zero2;
179 } code_page[3];
180 unsigned char incognita[78];
181 };
182
183
184 /* Free space bitmaps are 4 sectors long, which is 16384 bits.
185 16384 sectors is 8 meg, and each 8 meg band has a 4-sector bitmap.
186 Bit order in the maps is little-endian. 0 means taken, 1 means free.
187
188 Bit map sectors are marked allocated in the bit maps, and so are sectors
189 off the end of the partition.
190
191 Band 0 is sectors 0-3fff, its map is in sectors 18-1b.
192 Band 1 is 4000-7fff, its map is in 7ffc-7fff.
193 Band 2 is 8000-ffff, its map is in 8000-8003.
194 The remaining bands have maps in their first (even) or last (odd) 4 sectors
195 -- if the last, partial, band is odd its map is in its last 4 sectors.
196
197 The bitmap locations are given in a table pointed to by the super block.
198 No doubt they aren't constrained to be at 18, 7ffc, 8000, ...; that is
199 just where they usually are.
200
201 The "directory band" is a bunch of sectors preallocated for dnodes.
202 It has a 4-sector free space bitmap of its own. Each bit in the map
203 corresponds to one 4-sector dnode, bit 0 of the map corresponding to
204 the first 4 sectors of the directory band. The entire band is marked
205 allocated in the main bitmap. The super block gives the locations
206 of the directory band and its bitmap. ("band" doesn't mean it is
207 8 meg long; it isn't.) */
208
209
210 /* dnode: directory. 4 sectors long */
211
212 /* A directory is a tree of dnodes. The fnode for a directory
213 contains one pointer, to the root dnode of the tree. The fnode
214 never moves, the dnodes do the B-tree thing, splitting and merging
215 as files are added and removed. */
216
217 #define DNODE_MAGIC 0x77e40aae
218
219 struct dnode {
220 unsigned magic; /* 77e4 0aae */
221 unsigned first_free; /* offset from start of dnode to
222 first free dir entry */
223 unsigned increment_me; /* some kind of activity counter?
224 Neither HPFS.IFS nor CHKDSK cares
225 if you change this word */
226 secno up; /* (root dnode) directory's fnode
227 (nonroot) parent dnode */
228 dnode_secno self; /* pointer to this dnode */
229 unsigned char dirent[2028]; /* one or more dirents */
230 };
231
232 struct hpfs_dirent {
233 unsigned short length; /* offset to next dirent */
234 unsigned first: 1; /* set on phony ^A^A (".") entry */
235 unsigned flag1: 1;
236 unsigned down: 1; /* down pointer present (after name) */
237 unsigned last: 1; /* set on phony \377 entry */
238 unsigned flag4: 1;
239 unsigned flag5: 1;
240 unsigned flag6: 1;
241 unsigned has_needea: 1; /* ?? some EA has NEEDEA set
242 I have no idea why this is
243 interesting in a dir entry */
244 unsigned read_only: 1; /* dos attrib */
245 unsigned hidden: 1; /* dos attrib */
246 unsigned system: 1; /* dos attrib */
247 unsigned flag11: 1; /* would be volume label dos attrib */
248 unsigned directory: 1; /* dos attrib */
249 unsigned archive: 1; /* dos attrib */
250 unsigned not_8x3: 1; /* name is not 8.3 */
251 unsigned flag15: 1;
252 fnode_secno fnode; /* fnode giving allocation info */
253 time_t write_date; /* mtime */
254 unsigned file_size; /* file length, bytes */
255 time_t read_date; /* atime */
256 time_t creation_date; /* ctime */
257 unsigned ea_size; /* total EA length, bytes */
258 unsigned char zero1;
259 unsigned char locality; /* 0=unk 1=seq 2=random 3=both */
260 unsigned char namelen, name[1]; /* file name */
261 /* dnode_secno down; btree down pointer, if present,
262 follows name on next word boundary, or maybe it's
263 precedes next dirent, which is on a word boundary. */
264 };
265
266 /* The b-tree down pointer from a dir entry */
267
268 static inline dnode_secno de_down_pointer (struct hpfs_dirent *de)
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
269 {
270 return *(dnode_secno *) ((void *) de + de->length - 4);
271 }
272
273 /* The first dir entry in a dnode */
274
275 static inline struct hpfs_dirent *dnode_first_de (struct dnode *dnode)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
276 {
277 return (void *) dnode->dirent;
278 }
279
280 /* The end+1 of the dir entries */
281
282 static inline struct hpfs_dirent *dnode_end_de (struct dnode *dnode)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
283 {
284 return (void *) dnode + dnode->first_free;
285 }
286
287 /* The dir entry after dir entry de */
288
289 static inline struct hpfs_dirent *de_next_de (struct hpfs_dirent *de)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
290 {
291 return (void *) de + de->length;
292 }
293
294
295 /* B+ tree: allocation info in fnodes and anodes */
296
297 /* dnodes point to fnodes which are responsible for listing the sectors
298 assigned to the file. This is done with trees of (length,address)
299 pairs. (Actually triples, of (length, file-address, disk-address)
300 which can represent holes. Find out if HPFS does that.)
301 At any rate, fnodes contain a small tree; if subtrees are needed
302 they occupy essentially a full block in anodes. A leaf-level tree node
303 has 3-word entries giving sector runs, a non-leaf node has 2-word
304 entries giving subtree pointers. A flag in the header says which. */
305
306 struct bplus_leaf_node
307 {
308 unsigned file_secno; /* first file sector in extent */
309 unsigned length; /* length, sectors */
310 secno disk_secno; /* first corresponding disk sector */
311 };
312
313 struct bplus_internal_node
314 {
315 unsigned file_secno; /* subtree maps sectors < this */
316 anode_secno down; /* pointer to subtree */
317 };
318
319 struct bplus_header
320 {
321 unsigned flag0: 1;
322 unsigned flag1: 1;
323 unsigned flag2: 1;
324 unsigned flag3: 1;
325 unsigned flag4: 1;
326 unsigned fnode_parent: 1; /* ? we're pointed to by an fnode,
327 the data btree or some ea or the
328 main ea bootage pointer ea_secno */
329 /* also can get set in fnodes, which
330 may be a chkdsk glitch or may mean
331 this bit is irrelevant in fnodes,
332 or this interpretation is all wet */
333 unsigned flag6: 1;
334 unsigned internal: 1; /* 1 -> (internal) tree of anodes
335 0 -> (leaf) list of extents */
336 unsigned char fill[3];
337 unsigned char n_free_nodes; /* free nodes in following array */
338 unsigned char n_used_nodes; /* used nodes in following array */
339 unsigned short first_free; /* offset from start of header to
340 first free node in array */
341 union {
342 struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving
343 subtree pointers */
344 struct bplus_leaf_node external[0]; /* (external) 3-word entries giving
345 sector runs */
346 } u;
347 };
348
349 /* fnode: root of allocation b+ tree, and EA's */
350
351 /* Every file and every directory has one fnode, pointed to by the directory
352 entry and pointing to the file's sectors or directory's root dnode. EA's
353 are also stored here, and there are said to be ACL's somewhere here too. */
354
355 #define FNODE_MAGIC 0xf7e40aae
356
357 struct fnode
358 {
359 unsigned magic; /* f7e4 0aae */
360 unsigned zero1[2];
361 unsigned char len, name[15]; /* true length, truncated name */
362 fnode_secno up; /* pointer to file's directory fnode */
363 unsigned zero2[3];
364 unsigned ea_size_l; /* length of disk-resident ea's */
365 secno ea_secno; /* first sector of disk-resident ea's*/
366 unsigned short ea_size_s; /* length of fnode-resident ea's */
367
368 unsigned flag0: 1;
369 unsigned ea_anode: 1; /* 1 -> ea_secno is an anode */
370 unsigned flag2: 1;
371 unsigned flag3: 1;
372 unsigned flag4: 1;
373 unsigned flag5: 1;
374 unsigned flag6: 1;
375 unsigned flag7: 1;
376 unsigned dirflag: 1; /* 1 -> directory. first & only extent
377 points to dnode. */
378 unsigned flag9: 1;
379 unsigned flag10: 1;
380 unsigned flag11: 1;
381 unsigned flag12: 1;
382 unsigned flag13: 1;
383 unsigned flag14: 1;
384 unsigned flag15: 1;
385
386 struct bplus_header btree; /* b+ tree, 8 extents or 12 subtrees */
387 union {
388 struct bplus_leaf_node external[8];
389 struct bplus_internal_node internal[12];
390 } u;
391
392 unsigned file_size; /* file length, bytes */
393 unsigned n_needea; /* number of EA's with NEEDEA set */
394 unsigned zero4[4];
395 unsigned ea_offs; /* offset from start of fnode
396 to first fnode-resident ea */
397 unsigned zero5[2];
398 unsigned char ea[316]; /* zero or more EA's, packed together
399 with no alignment padding.
400 (Do not use this name, get here
401 via fnode + ea_offs. I think.) */
402 };
403
404
405 /* anode: 99.44% pure allocation tree */
406
407 #define ANODE_MAGIC 0x37e40aae
408
409 struct anode
410 {
411 unsigned magic; /* 37e4 0aae */
412 anode_secno self; /* pointer to this anode */
413 secno up; /* parent anode or fnode */
414
415 struct bplus_header btree; /* b+tree, 40 extents or 60 subtrees */
416 union {
417 struct bplus_leaf_node external[40];
418 struct bplus_internal_node internal[60];
419 } u;
420
421 unsigned fill[3]; /* unused */
422 };
423
424
425 /* extended attributes.
426
427 A file's EA info is stored as a list of (name,value) pairs. It is
428 usually in the fnode, but (if it's large) it is moved to a single
429 sector run outside the fnode, or to multiple runs with an anode tree
430 that points to them.
431
432 The value of a single EA is stored along with the name, or (if large)
433 it is moved to a single sector run, or multiple runs pointed to by an
434 anode tree, pointed to by the value field of the (name,value) pair.
435
436 Flags in the EA tell whether the value is immediate, in a single sector
437 run, or in multiple runs. Flags in the fnode tell whether the EA list
438 is immediate, in a single run, or in multiple runs. */
439
440 struct extended_attribute
441 {
442 unsigned indirect: 1; /* 1 -> value gives sector number
443 where real value starts */
444 unsigned anode: 1; /* 1 -> sector is an anode
445 that points to fragmented value */
446 unsigned flag2: 1;
447 unsigned flag3: 1;
448 unsigned flag4: 1;
449 unsigned flag5: 1;
450 unsigned flag6: 1;
451 unsigned needea: 1; /* required ea */
452 unsigned char namelen; /* length of name, bytes */
453 unsigned short valuelen; /* length of value, bytes */
454 /*
455 unsigned char name[namelen]; ascii attrib name
456 unsigned char nul; terminating '\0', not counted
457 unsigned char value[valuelen]; value, arbitrary
458 if this.indirect, valuelen is 8 and the value is
459 unsigned length; real length of value, bytes
460 secno secno; sector address where it starts
461 if this.anode, the above sector number is the root of an anode tree
462 which points to the value.
463 */
464 };
465
466 static inline unsigned char *ea_name (struct extended_attribute *ea)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
467 {
468 return (void *) ea + sizeof *ea;
469 }
470
471 static inline unsigned char *ea_value (struct extended_attribute *ea)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
472 {
473 return (void *) ea + sizeof *ea + ea->namelen + 1;
474 }
475
476 static inline struct extended_attribute *
477 ea_next_ea (struct extended_attribute *ea)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
478 {
479 return (void *) ea + sizeof *ea + ea->namelen + 1 + ea->valuelen;
480 }
481
482 static inline unsigned ea_indirect_length (struct extended_attribute *ea)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
483 {
484 unsigned *v = (void *) ea_value (ea);
485 return v[0];
486 }
487
488 static inline secno ea_indirect_secno (struct extended_attribute *ea)
/* ![[previous]](../icons/left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
489 {
490 unsigned *v = (void *) ea_value (ea);
491 return v[1];
492 }
493
494 /*
495 Local Variables:
496 comment-column: 40
497 End:
498 */