1 /* Capitalization rules for HPFS */ 2
3 /* In OS/2, HPFS filenames preserve upper and lower case letter distinctions 4 but filename matching ignores case. That is, creating a file "Foo" 5 actually creates a file named "Foo" which can be looked up as "Foo", 6 "foo", or "FOO", among other possibilities. 7
8 Also, HPFS is internationalized -- a table giving the uppercase 9 equivalent of every character is stored in the filesystem, so that 10 any national character set may be used. If several different 11 national character sets are in use, several tables are stored 12 in the filesystem. 13
14 It would be perfectly reasonable for Linux HPFS to act as a Unix 15 filesystem and match "Foo" only if asked for "Foo" exactly. But 16 the sort order of HPFS directories is case-insensitive, so Linux 17 still has to know the capitalization rules used by OS/2. Because 18 of this, it turns out to be more natural for us to be case-insensitive 19 than not. 20
21 Currently the standard character set used by Linux is Latin-1. 22 Work is underway to permit people to use UTF-8 instead, therefore 23 all code that depends on the character set is segregated here. 24
25 (It would be wonderful if Linux HPFS could be independent of what 26 character set is in use on the Linux side, but because of the 27 necessary case folding this is impossible.) 28
29 There is a map from Latin-1 into code page 850 for every printing 30 character in Latin-1. The NLS documentation of OS/2 shows that 31 everybody has 850 available unless they don't have Western latin 32 chars available at all (so fitting them to Linux without Unicode 33 is a doomed exercise). 34
35 It is not clear exactly how HPFS.IFS handles the situation when 36 multiple code pages are in use. Experiments show that 37
38 - tables on the disk give uppercasing rules for the installed code pages 39
40 - each directory entry is tagged with what code page was current 41 when that name was created 42
43 - doing just CHCP, without changing what's on the disk in any way, 44 can change what DIR reports, and what name a case-folded match 45 will match. 46
47 This means, I think, that HPFS.IFS operates in the current code 48 page, without regard to the uppercasing information recorded in 49 the tables on the disk. It does record the uppercasing rules 50 it used, perhaps for CHKDSK, but it does not appear to use them 51 itself. 52
53 So: Linux, a Latin-1 system, will operate in code page 850. We 54 recode between 850 and Latin-1 when dealing with the names actually 55 on the disk. We don't use the uppercasing tables either. 56
57 In a hypothetical UTF-8 implementation, one reasonable way to 58 proceed that matches OS/2 (for least surprise) is: do case 59 translation in UTF-8, and recode to/from one of the code pages 60 available on the mounted filesystem. Reject as invalid any name 61 containing chars that can't be represented on disk by one of the 62 code pages OS/2 is using. Recoding from on-disk names to UTF-8 63 could use the code page tags, though this is not what OS/2 does. */ 64
65 #ifdefMODULE 66 #include <linux/module.h>
67 #include <linux/version.h>
68 #else 69 #defineMOD_INC_USE_COUNT 70 #defineMOD_DEC_USE_COUNT 71 #endif 72
73
74 staticconstunsignedchartb_cp850_to_latin1[128] =
75 { 76 199, 252, 233, 226, 228, 224, 229, 231,
77 234, 235, 232, 239, 238, 236, 196, 197,
78 201, 230, 198, 244, 246, 242, 251, 249,
79 255, 214, 220, 248, 163, 216, 215, 159,
80 225, 237, 243, 250, 241, 209, 170, 186,
81 191, 174, 172, 189, 188, 161, 171, 187,
82 155, 156, 157, 144, 151, 193, 194, 192,
83 169, 135, 128, 131, 133, 162, 165, 147,
84 148, 153, 152, 150, 145, 154, 227, 195,
85 132, 130, 137, 136, 134, 129, 138, 164,
86 240, 208, 202, 203, 200, 158, 205, 206,
87 207, 149, 146, 141, 140, 166, 204, 139,
88 211, 223, 212, 210, 245, 213, 181, 254,
89 222, 218, 219, 217, 253, 221, 175, 180,
90 173, 177, 143, 190, 182, 167, 247, 184,
91 176, 168, 183, 185, 179, 178, 142, 160,
92 };
93
94 #if 0
95 staticconstunsignedchartb_latin1_to_cp850[128] =
96 { 97 186, 205, 201, 187, 200, 188, 204, 185,
98 203, 202, 206, 223, 220, 219, 254, 242,
99 179, 196, 218, 191, 192, 217, 195, 180,
100 194, 193, 197, 176, 177, 178, 213, 159,
101 255, 173, 189, 156, 207, 190, 221, 245,
102 249, 184, 166, 174, 170, 240, 169, 238,
103 248, 241, 253, 252, 239, 230, 244, 250,
104 247, 251, 167, 175, 172, 171, 243, 168,
105 183, 181, 182, 199, 142, 143, 146, 128,
106 212, 144, 210, 211, 222, 214, 215, 216,
107 209, 165, 227, 224, 226, 229, 153, 158,
108 157, 235, 233, 234, 154, 237, 232, 225,
109 133, 160, 131, 198, 132, 134, 145, 135,
110 138, 130, 136, 137, 141, 161, 140, 139,
111 208, 164, 149, 162, 147, 228, 148, 246,
112 155, 151, 163, 150, 129, 236, 231, 152,
113 };
114 #endif 115
116 #defineA_GRAVE 0300
117 #defineTHORN 0336
118 #defineMULTIPLY 0327
119 #definea_grave 0340
120 #definethorn 0376
121 #definedivide 0367
122
123 staticinlineunsignedlatin1_upcase (unsignedc)
/* */ 124 { 125 if (c - 'a' <= 'z' - 'a'
126 || (c - a_grave <= thorn - a_grave 127 && c != divide))
128 returnc - 'a' + 'A';
129 else 130 returnc;
131 } 132
133 staticinlineunsignedlatin1_downcase (unsignedc)
/* */ 134 { 135 if (c - 'A' <= 'Z' - 'A'
136 || (c - A_GRAVE <= THORN - A_GRAVE 137 && c != MULTIPLY))
138 returnc + 'a' - 'A';
139 else 140 returnc;
141 } 142
143 #if 0
144 staticinlineunsigned latin1_to_cp850 (unsignedc)
/* */ 145 { 146 if ((signed) c - 128 >= 0)
147 returntb_latin1_to_cp850[c - 128];
148 else 149 returnc;
150 } 151 #endif 152
153 staticinlineunsignedcp850_to_latin1 (unsignedc)
/* */ 154 { 155 if ((signed) c - 128 >= 0)
156 returntb_cp850_to_latin1[c - 128];
157 else 158 returnc;
159 } 160
161 unsignedhpfs_char_to_upper_linux (unsignedc)
/* */ 162 { 163 returnlatin1_upcase (cp850_to_latin1 (c));
164 } 165
166 unsignedlinux_char_to_upper_linux (unsignedc)
/* */ 167 { 168 returnlatin1_upcase (c);
169 } 170
171 unsignedhpfs_char_to_lower_linux (unsignedc)
/* */ 172 { 173 returnlatin1_downcase (cp850_to_latin1 (c));
174 } 175
176 unsignedhpfs_char_to_linux (unsignedc)
/* */ 177 { 178 returncp850_to_latin1 (c);
179 }