1 /*
2 * linux/fs/umsdos/mangle.c
3 *
4 * Written 1993 by Jacques Gelinas
5 *
6 * Control the mangling of file name to fit msdos name space.
7 * Many optimisation by GLU == dglaude@is1.vub.ac.be (GLAUDE DAVID)
8 */
9 #include <linux/errno.h>
10 #include <linux/ctype.h>
11 #include <linux/string.h>
12 #include <linux/kernel.h>
13 #include <linux/umsdos_fs.h>
14
15 /*
16 Complete the mangling of the MSDOS fake name
17 based on the position of the entry in the EMD file.
18
19 Simply complete the job of umsdos_parse; fill the extension.
20
21 Beware that info->f_pos must be set.
22 */
23 void umsdos_manglename (struct umsdos_info *info)
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
24 {
25 if (info->msdos_reject){
26 /* #Specification: file name / non MSDOS conforming / mangling
27 Each non MSDOS conforming file has a special extension
28 build from the entry position in the EMD file.
29
30 This number is then transform in a base 32 number, where
31 each digit is expressed like hexadecimal number, using
32 digit and letter, except it uses 22 letters from 'a' to 'v'.
33 The number 32 comes from 2**5. It is faster to split a binary
34 number using a base which is a power of two. And I was 32
35 when I started this project. Pick your answer :-) .
36
37 If the result is '0', it is replace with '_', simply
38 to make it odd.
39
40 This is true for the first two character of the extension.
41 The last one is taken from a list of odd character, which
42 are:
43
44 { } ( ) ! ` ^ & @
45
46 With this scheme, we can produce 9216 ( 9* 32 * 32)
47 different extensions which should not clash with any useful
48 extension already popular or meaningful. Since most directory
49 have much less than 32 * 32 files in it, the first character
50 of the extension of any mangle name will be {.
51
52 Here are the reason to do this (this kind of mangling).
53
54 -The mangling is deterministic. Just by the extension, we
55 are able to locate the entry in the EMD file.
56
57 -By keeping to beginning of the file name almost unchanged,
58 we are helping the MSDOS user.
59
60 -The mangling produces names not too ugly, so an msdos user
61 may live with it (remember it, type it, etc...).
62
63 -The mangling produces names ugly enough so no one will
64 ever think of using such a name in real life. This is not
65 fool proof. I don't think there is a total solution to this.
66 */
67 union {
68 int entry_num;
69 struct {
70 unsigned num1:5,num2:5,num3:5;
71 }num;
72 } u;
73 char *pt = info->fake.fname + info->fake.len;
74 /* lookup for encoding the last character of the extension */
75 /* It contain valid character after the ugly one to make sure */
76 /* even if someone overflow the 32 * 32 * 9 limit, it still do */
77 /* something */
78 #define SPECIAL_MANGLING '{','}','(',')','!','`','^','&','@'
79 static char lookup3[]={
80 SPECIAL_MANGLING,
81 /* This is the start of lookup12 */
82 '_','1','2','3','4','5','6','7','8','9',
83 'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
84 'p','q','r','s','t','u','v'
85 };
86 #define lookup12 (lookup3+9)
87 u.entry_num = info->f_pos / UMSDOS_REC_SIZE;
88 if (u.entry_num > (9* 32 * 32)){
89 printk ("UMSDOS: More than 9216 file in a directory.\n"
90 "This may break the mangling strategy.\n"
91 "Not a killer problem. See doc.\n");
92 }
93 *pt++ = '.';
94 *pt++ = lookup3 [u.num.num3];
95 *pt++ = lookup12[u.num.num2];
96 *pt++ = lookup12[u.num.num1];
97 *pt = '\0'; /* help doing printk */
98 info->fake.len += 4;
99 info->msdos_reject = 0; /* Avoid mangling twice */
100 }
101 }
102
103 /*
104 Evaluate the record size needed to store of name of len character.
105 The value returned is a multiple of UMSDOS_REC_SIZE.
106 */
107 int umsdos_evalrecsize (int len)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
108 {
109 struct umsdos_dirent dirent;
110 int nbrec = 1+((len-1+(dirent.name-(char*)&dirent))
111 / UMSDOS_REC_SIZE);
112 return nbrec * UMSDOS_REC_SIZE;
113 /*
114 GLU This should be inlined or something to speed it up to the max.
115 GLU nbrec is absolutely not needed to return the value.
116 */
117 }
118 #ifdef TEST
119 int umsdos_evalrecsize_old (int len)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
120 {
121 struct umsdos_dirent dirent;
122 int size = len + (dirent.name-(char*)&dirent);
123 int nbrec = size / UMSDOS_REC_SIZE;
124 int extra = size % UMSDOS_REC_SIZE;
125 if (extra > 0) nbrec++;
126 return nbrec * UMSDOS_REC_SIZE;
127 }
128 #endif
129 /*
130 Fill the struct info with the full and msdos name of a file
131 Return 0 if all is ok, a negative error code otherwise.
132 */
133 int umsdos_parse (
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
134 const char *fname,
135 int len,
136 struct umsdos_info *info)
137 {
138 int ret = -ENAMETOOLONG;
139 /* #Specification: file name / too long
140 If a file name exceed UMSDOS maxima, the file name is silently
141 truncated. This makes it conformant with the other file system
142 of Linux (minix and ext2 at least).
143 */
144 if (len > UMSDOS_MAXNAME) len = UMSDOS_MAXNAME;
145 {
146 const char *firstpt=NULL; /* First place we saw a . in fname */
147 /* #Specification: file name / non MSDOS conforming / base length 0
148 file name beginning with a period '.' are invalid for MsDOS.
149 It needs absolutely a base name. So the file name is mangled
150 */
151 int ivldchar = fname[0] == '.';/* At least one invalid character */
152 int msdos_len = len;
153 int base_len;
154 /*
155 cardinal_per_size tells if there exist at least one
156 DOS pseudo devices on length n. See the test below.
157 */
158 static const char cardinal_per_size[9]={
159 0, 0, 0, 1, 1, 0, 1, 0, 1
160 };
161 /*
162 lkp translate all character to acceptable character (for DOS).
163 When lkp[n] == n, it means also it is an acceptable one.
164 So it serve both as a flag and as a translator.
165 */
166 static char lkp[256];
167 static char is_init=0;
168 if (!is_init){
169 /*
170 Initialisation of the array is easier and less error prone
171 like this.
172 */
173 int i;
174 static char *spc = "\"*+,/:;<=>?[\\]|~";
175 is_init = 1;
176 for (i=0; i<=32; i++) lkp[i] = '#';
177 for (i=33; i<'A'; i++) lkp[i] = (char)i;
178 for (i='A'; i<='Z'; i++) lkp[i] = (char)(i+('a'-'A'));
179 for (i='Z'+1; i<127; i++) lkp[i] = (char)i;
180 for (i=128; i<256; i++) lkp[i] = '#';
181
182 lkp['.'] = '_';
183 while (*spc != '\0') lkp[(unsigned char)(*spc++)] = '#';
184 }
185 /* GLU
186 file name which are longer than 8+'.'+3 are invalid for MsDOS.
187 So the file name is to be mangled no more test needed.
188 This Speed Up for long and very long name.
189 The position of the last point is no more necessary anyway.
190 */
191 if (len<=(8+1+3)){
192 const char *pt = fname;
193 const char *endpt = fname + len;
194 while (pt < endpt){
195 if (*pt == '.'){
196 if (firstpt != NULL){
197 /* 2 . in a file name. Reject */
198 ivldchar = 1;
199 break;
200 }else{
201 int extlen = (int)(endpt - pt);
202 firstpt = pt;
203 if (firstpt - fname > 8){
204 /* base name longer than 8: reject */
205 ivldchar = 1;
206 break;
207 }else if (extlen > 4){
208 /* Extension longer than 4 (including .): reject */
209 ivldchar = 1;
210 break;
211 }else if (extlen == 1){
212 /* #Specification: file name / non MSDOS conforming / last char == .
213 If the last character of a file name is
214 a period, mangling is applied. MsDOS do
215 not support those file name.
216 */
217 ivldchar = 1;
218 break;
219 }else if (extlen == 4){
220 /* #Specification: file name / non MSDOS conforming / mangling clash
221 To avoid clash with the umsdos mangling, any file
222 with a special character as the first character
223 of the extension will be mangled. This solve the
224 following problem:
225
226 touch FILE
227 # FILE is invalid for DOS, so mangling is applied
228 # file.{_1 is created in the DOS directory
229 touch file.{_1
230 # To UMSDOS file point to a single DOS entry.
231 # So file.{_1 has to be mangled.
232 */
233 static char special[]={
234 SPECIAL_MANGLING,'\0'
235 };
236 if (strchr(special,firstpt[1])!= NULL){
237 ivldchar = 1;
238 break;
239 }
240 }
241 }
242 }else if (lkp[(unsigned char)(*pt)] != *pt){
243 ivldchar = 1;
244 break;
245 }
246 pt++;
247 }
248 }else{
249 ivldchar = 1;
250 }
251 if (ivldchar
252 || (firstpt == NULL && len > 8)
253 || (len == UMSDOS_EMD_NAMELEN
254 && memcmp(fname,UMSDOS_EMD_FILE,UMSDOS_EMD_NAMELEN)==0)){
255 /* #Specification: file name / --linux-.---
256 The name of the EMD file --linux-.--- is map to a mangled
257 name. So UMSDOS does not restrict its use.
258 */
259 /* #Specification: file name / non MSDOS conforming / mangling
260 Non MSDOS conforming file name must use some alias to fit
261 in the MSDOS name space.
262
263 The strategy is simple. The name is simply truncated to
264 8 char. points are replace with underscore and a
265 number is given as an extension. This number correspond
266 to the entry number in the EMD file. The EMD file
267 only need to carry the real name.
268
269 Upper case is also convert to lower case.
270 Control character are converted to #.
271 Space are converted to #.
272 The following character are also converted to #.
273 " * + , / : ; < = > ? [ \ ] | ~
274
275 Sometime, the problem is not in MsDOS itself but in
276 command.com.
277 */
278 int i;
279 char *pt = info->fake.fname;
280 base_len = msdos_len = (msdos_len>8) ? 8 : msdos_len;
281 /*
282 There is no '.' any more so we know for a fact that
283 the base length is the length.
284 */
285 memcpy (info->fake.fname,fname,msdos_len);
286 for (i=0; i<msdos_len; i++, pt++) *pt = lkp[(unsigned char)(*pt)];
287 *pt = '\0'; /* GLU C'est sur on a un 0 a la fin */
288 info->msdos_reject = 1;
289 /*
290 The numeric extension is added only when we know
291 the position in the EMD file, in umsdos_newentry(),
292 umsdos_delentry(), and umsdos_findentry().
293 See umsdos_manglename().
294 */
295 }else{
296 /* Conforming MSDOS file name */
297 strcpy (info->fake.fname,fname); /* GLU C'est sur on a un 0 a la fin */
298 info->msdos_reject = 0;
299 base_len = firstpt != NULL ? (int)(firstpt - fname) : len;
300 }
301 if (cardinal_per_size[base_len]){
302 /* #Specification: file name / MSDOS devices / mangling
303 To avoid unreachable file from MsDOS, any MsDOS conforming
304 file with a basename equal to one of the MsDOS pseudo
305 devices will be mangled.
306
307 If a file such as "prn" was created, it would be unreachable
308 under MsDOS because prn is assumed to be the printer, even
309 if the file does have an extension.
310
311 Since the extension is unimportant to MsDOS, we must patch
312 the basename also. We simply insert a minus '-'. To avoid
313 conflict with valid file with a minus in front (such as
314 "-prn"), we add an mangled extension like any other
315 mangled file name.
316
317 Here is the list of DOS pseudo devices:
318
319 "prn","con","aux","nul",
320 "lpt1","lpt2","lpt3","lpt4",
321 "com1","com2","com3","com4",
322 "clock$"
323
324 and some standard ones for common DOS programs
325
326 "emmxxxx0","xmsxxxx0","setverxx"
327
328 (Thanks to Chris Hall <CAH17@PHOENIX.CAMBRIDGE.AC.UK>
329 for pointing these to me).
330
331 Is there one missing ?
332 */
333 /* This table must be ordered by length */
334 static const char *tbdev[]={
335 "prn","con","aux","nul",
336 "lpt1","lpt2","lpt3","lpt4",
337 "com1","com2","com3","com4",
338 "clock$",
339 "emmxxxx0","xmsxxxx0","setverxx"
340 };
341 /* Tell where to find in tbdev[], the first name of */
342 /* a certain length */
343 static const char start_ind_dev[9]={
344 0, 0, 0, 4, 12, 12, 13, 13, 16
345 };
346 char basen[9];
347 int i;
348 for (i=start_ind_dev[base_len-1]; i<start_ind_dev[base_len]; i++){
349 if (memcmp(info->fake.fname,tbdev[i],base_len)==0){
350 memcpy (basen,info->fake.fname,base_len);
351 basen[base_len] = '\0'; /* GLU C'est sur on a un 0 a la fin */
352 /*
353 GLU On ne fait cela que si necessaire, on essaye d'etre le
354 GLU simple dans le cas general (le plus frequent).
355 */
356 info->fake.fname[0] = '-';
357 strcpy (info->fake.fname+1,basen); /* GLU C'est sur on a un 0 a la fin */
358 msdos_len = (base_len==8) ? 8 : base_len + 1;
359 info->msdos_reject = 1;
360 break;
361 }
362 }
363 }
364 info->fake.fname[msdos_len] = '\0'; /* Help doing printk */
365 /* GLU Ce zero devrais deja y etre ! (invariant ?) */
366 info->fake.len = msdos_len;
367 /* Pourquoi ne pas utiliser info->fake.len partout ??? plus long ?*/
368 memcpy (info->entry.name,fname,len);
369 info->entry.name_len = len;
370 ret = 0;
371 }
372 /*
373 Evaluate how many record are needed to store this entry.
374 */
375 info->recsize = umsdos_evalrecsize (len);
376 return ret;
377 }
378
379 #ifdef TEST
380
381 struct MANG_TEST{
382 char *fname; /* Name to validate */
383 int msdos_reject; /* Expected msdos_reject flag */
384 char *msname; /* Expected msdos name */
385 };
386
387 struct MANG_TEST tb[]={
388 "hello", 0, "hello",
389 "hello.1", 0, "hello.1",
390 "hello.1_", 0, "hello.1_",
391 "prm", 0, "prm",
392
393 #ifdef PROPOSITION
394 "HELLO", 1, "hello",
395 "Hello.1", 1, "hello.1",
396 "Hello.c", 1, "hello.c",
397 #elseif
398 /*
399 Je trouve les trois exemples ci-dessous tres "malheureux".
400 Je propose de mettre en minuscule dans un passe preliminaire,
401 et de tester apres si il y a d'autres caracters "mechants".
402 Bon, je ne l'ai pas fait, parceque ce n'est pas si facilement
403 modifiable que ca. Mais c'est pour le principe.
404 Evidemment cela augmente les chances de "Collision",
405 par exemple: entre "HELLO" et "Hello", mais ces problemes
406 peuvent etre traiter ailleur avec les autres collisions.
407 */
408 "HELLO", 1, "hello",
409 "Hello.1", 1, "hello_1",
410 "Hello.c", 1, "hello_c",
411 #endif
412
413 "hello.{_1", 1, "hello_{_",
414 "hello\t", 1, "hello#",
415 "hello.1.1", 1, "hello_1_",
416 "hel,lo", 1, "hel#lo",
417 "Salut.Tu.vas.bien?", 1, "salut_tu",
418 ".profile", 1, "_profile",
419 ".xv", 1, "_xv",
420 "toto.", 1, "toto_",
421 "clock$.x", 1, "-clock$",
422 "emmxxxx0", 1, "-emmxxxx",
423 "emmxxxx0.abcd", 1, "-emmxxxx",
424 "aux", 1, "-aux",
425 "prn", 1, "-prn",
426 "prn.abc", 1, "-prn",
427 "PRN", 1, "-prn",
428 /*
429 GLU ATTENTION : Le resultat de ceux-ci sont differents avec ma version
430 GLU du mangle par rapport au mangle originale.
431 GLU CAUSE: La maniere de calculer la variable baselen.
432 GLU Pour toi c'est toujours 3
433 GLU Pour moi c'est respectivement 7, 8 et 8
434 */
435 "PRN.abc", 1, "prn_abc",
436 "Prn.abcd", 1, "prn_abcd",
437 "prn.abcd", 1, "prn_abcd",
438 "Prn.abcdefghij", 1, "prn_abcd"
439 };
440
441 int main (int argc, char *argv[])
/* ![[previous]](../icons/left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
442 {
443 int i,rold,rnew;
444 printf ("Testing the umsdos_parse.\n");
445 for (i=0; i<sizeof(tb)/sizeof(tb[0]); i++){
446 struct MANG_TEST *pttb = tb+i;
447 struct umsdos_info info;
448 int ok = umsdos_parse (pttb->fname,strlen(pttb->fname),&info);
449 if (strcmp(info.fake.fname,pttb->msname)!=0){
450 printf ("**** %s -> ",pttb->fname);
451 printf ("%s <> %s\n",info.fake.fname,pttb->msname);
452 }else if (info.msdos_reject != pttb->msdos_reject){
453 printf ("**** %s -> %s ",pttb->fname,pttb->msname);
454 printf ("%d <> %d\n",info.msdos_reject,pttb->msdos_reject);
455 }else{
456 printf (" %s -> %s %d\n",pttb->fname,pttb->msname
457 ,pttb->msdos_reject);
458 }
459 }
460 printf ("Testing the new umsdos_evalrecsize.");
461 for (i=0; i<UMSDOS_MAXNAME ; i++){
462 rnew=umsdos_evalrecsize (i);
463 rold=umsdos_evalrecsize_old (i);
464 if (!(i%UMSDOS_REC_SIZE)){
465 printf ("\n%d:\t",i);
466 }
467 if (rnew!=rold){
468 printf ("**** %d newres: %d != %d \n", i, rnew, rold);
469 }else{
470 printf(".");
471 }
472 }
473 printf ("\nEnd of Testing.\n");
474
475 return 0;
476 }
477
478 #endif