1 /* 2 * linux/fs/umsdos/mangle.c 3 * 4 * Written 1993 by Jacques Gelinas 5 * 6 * Control the mangling of file name to fit msdos name space. 7 * Many optimisation by GLU == dglaude@is1.vub.ac.be (GLAUDE DAVID) 8 */ 9
10 #include <linux/errno.h>
11 #include <linux/string.h>
12 #include <linux/kernel.h>
13 #include <linux/umsdos_fs.h>
14
15 /* 16 Complete the mangling of the MSDOS fake name 17 based on the position of the entry in the EMD file. 18
19 Simply complete the job of umsdos_parse; fill the extension. 20
21 Beware that info->f_pos must be set. 22 */ 23 voidumsdos_manglename (structumsdos_info *info)
/* */ 24 { 25 if (info->msdos_reject){ 26 /* #Specification: file name / non MSDOS conforming / mangling 27 Each non MSDOS conforming file has a special extension 28 build from the entry position in the EMD file. 29
30 This number is then transform in a base 32 number, where 31 each digit is expressed like hexadecimal number, using 32 digit and letter, except it uses 22 letters from 'a' to 'v'. 33 The number 32 comes from 2**5. It is faster to split a binary 34 number using a base which is a power of two. And I was 32 35 when I started this project. Pick your answer :-) . 36
37 If the result is '0', it is replace with '_', simply 38 to make it odd. 39
40 This is true for the first two character of the extension. 41 The last one is taken from a list of odd character, which 42 are: 43
44 { } ( ) ! ` ^ & @ 45
46 With this scheme, we can produce 9216 ( 9* 32 * 32) 47 different extensions which should not clash with any useful 48 extension already popular or meaningful. Since most directory 49 have much less than 32 * 32 files in it, the first character 50 of the extension of any mangle name will be {. 51
52 Here are the reason to do this (this kind of mangling). 53
54 -The mangling is deterministic. Just by the extension, we 55 are able to locate the entry in the EMD file. 56
57 -By keeping to beginning of the file name almost unchanged, 58 we are helping the MSDOS user. 59
60 -The mangling produces names not too ugly, so an msdos user 61 may live with it (remember it, type it, etc...). 62
63 -The mangling produces names ugly enough so no one will 64 ever think of using such a name in real life. This is not 65 fool proof. I don't think there is a total solution to this. 66 */ 67 union{ 68 intentry_num;
69 struct{ 70 unsignednum1:5,num2:5,num3:5;
71 }num;
72 }u;
73 char *pt = info->fake.fname + info->fake.len;
74 /* lookup for encoding the last character of the extension */ 75 /* It contain valid character after the ugly one to make sure */ 76 /* even if someone overflow the 32 * 32 * 9 limit, it still do */ 77 /* something */ 78 #defineSPECIAL_MANGLING '{','}','(',')','!','`','^','&','@'
79 staticcharlookup3[]={ 80 SPECIAL_MANGLING,
81 /* This is the start of lookup12 */ 82 '_','1','2','3','4','5','6','7','8','9',
83 'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
84 'p','q','r','s','t','u','v'
85 };
86 #definelookup12 (lookup3+9)
87 u.entry_num = info->f_pos / UMSDOS_REC_SIZE;
88 if (u.entry_num > (9* 32 * 32)){ 89 printk ("UMSDOS: More than 9216 file in a directory.\n"
90 "This may break the mangling strategy.\n"
91 "Not a killer problem. See doc.\n");
92 } 93 *pt++ = '.';
94 *pt++ = lookup3 [u.num.num3];
95 *pt++ = lookup12[u.num.num2];
96 *pt++ = lookup12[u.num.num1];
97 *pt = '\0'; /* help doing printk */ 98 info->fake.len += 4;
99 info->msdos_reject = 0; /* Avoid mangling twice */ 100 } 101 } 102
103 /* 104 Evaluate the record size needed to store of name of len character. 105 The value returned is a multiple of UMSDOS_REC_SIZE. 106 */ 107 intumsdos_evalrecsize (intlen)
/* */ 108 { 109 structumsdos_direntdirent;
110 intnbrec = 1+((len-1+(dirent.name-(char*)&dirent))
111 / UMSDOS_REC_SIZE);
112 returnnbrec * UMSDOS_REC_SIZE;
113 /* 114 GLU This should be inlined or something to speed it up to the max. 115 GLU nbrec is absolutely not needed to return the value. 116 */ 117 } 118 #ifdefTEST 119 intumsdos_evalrecsize_old (intlen)
/* */ 120 { 121 structumsdos_direntdirent;
122 intsize = len + (dirent.name-(char*)&dirent);
123 intnbrec = size / UMSDOS_REC_SIZE;
124 intextra = size % UMSDOS_REC_SIZE;
125 if (extra > 0) nbrec++;
126 returnnbrec * UMSDOS_REC_SIZE;
127 } 128 #endif 129 /* 130 Fill the struct info with the full and msdos name of a file 131 Return 0 if all is ok, a negative error code otherwise. 132 */ 133 intumsdos_parse (
/* */ 134 constchar *fname,
135 intlen,
136 structumsdos_info *info)
137 { 138 intret = -ENAMETOOLONG;
139 /* #Specification: file name / too long 140 If a file name exceed UMSDOS maxima, the file name is silently 141 truncated. This makes it conformant with the other file system 142 of Linux (minix and ext2 at least). 143 */ 144 if (len > UMSDOS_MAXNAME) len = UMSDOS_MAXNAME;
145 { 146 constchar *firstpt=NULL; /* First place we saw a . in fname */ 147 /* #Specification: file name / non MSDOS conforming / base length 0 148 file name beginning with a period '.' are invalid for MsDOS. 149 It needs absolutely a base name. So the file name is mangled 150 */ 151 intivldchar = fname[0] == '.';/* At least one invalid character */ 152 intmsdos_len = len;
153 intbase_len;
154 /* 155 cardinal_per_size tells if there exist at least one 156 DOS pseudo devices on length n. See the test below. 157 */ 158 staticconstcharcardinal_per_size[9]={ 159 0, 0, 0, 1, 1, 0, 1, 0, 1
160 };
161 /* 162 lkp translate all character to acceptable character (for DOS). 163 When lkp[n] == n, it means also it is an acceptable one. 164 So it serve both as a flag and as a translator. 165 */ 166 staticcharlkp[256];
167 staticcharis_init=0;
168 if (!is_init){ 169 /* 170 Initialisation of the array is easier and less error prone 171 like this. 172 */ 173 inti;
174 staticconstchar *spc = "\"*+,/:;<=>?[\\]|~";
175 is_init = 1;
176 for (i=0; i<=32; i++) lkp[i] = '#';
177 for (i=33; i<'A'; i++) lkp[i] = (char)i;
178 for (i='A'; i<='Z'; i++) lkp[i] = (char)(i+('a'-'A'));
179 for (i='Z'+1; i<127; i++) lkp[i] = (char)i;
180 for (i=128; i<256; i++) lkp[i] = '#';
181
182 lkp['.'] = '_';
183 while (*spc != '\0') lkp[(unsignedchar)(*spc++)] = '#';
184 } 185 /* GLU 186 file name which are longer than 8+'.'+3 are invalid for MsDOS. 187 So the file name is to be mangled no more test needed. 188 This Speed Up for long and very long name. 189 The position of the last point is no more necessary anyway. 190 */ 191 if (len<=(8+1+3)){ 192 constchar *pt = fname;
193 constchar *endpt = fname + len;
194 while (pt < endpt){ 195 if (*pt == '.'){ 196 if (firstpt != NULL){ 197 /* 2 . in a file name. Reject */ 198 ivldchar = 1;
199 break;
200 }else{ 201 intextlen = (int)(endpt - pt);
202 firstpt = pt;
203 if (firstpt - fname > 8){ 204 /* base name longer than 8: reject */ 205 ivldchar = 1;
206 break;
207 }elseif (extlen > 4){ 208 /* Extension longer than 4 (including .): reject */ 209 ivldchar = 1;
210 break;
211 }elseif (extlen == 1){ 212 /* #Specification: file name / non MSDOS conforming / last char == . 213 If the last character of a file name is 214 a period, mangling is applied. MsDOS do 215 not support those file name. 216 */ 217 ivldchar = 1;
218 break;
219 }elseif (extlen == 4){ 220 /* #Specification: file name / non MSDOS conforming / mangling clash 221 To avoid clash with the umsdos mangling, any file 222 with a special character as the first character 223 of the extension will be mangled. This solve the 224 following problem: 225
226 # 227 touch FILE 228 # FILE is invalid for DOS, so mangling is applied 229 # file.{_1 is created in the DOS directory 230 touch file.{_1 231 # To UMSDOS file point to a single DOS entry. 232 # So file.{_1 has to be mangled. 233 # 234 */ 235 staticcharspecial[]={ 236 SPECIAL_MANGLING,'\0'
237 };
238 if (strchr(special,firstpt[1])!= NULL){ 239 ivldchar = 1;
240 break;
241 } 242 } 243 } 244 }elseif (lkp[(unsignedchar)(*pt)] != *pt){ 245 ivldchar = 1;
246 break;
247 } 248 pt++;
249 } 250 }else{ 251 ivldchar = 1;
252 } 253 if (ivldchar 254 || (firstpt == NULL && len > 8)
255 || (len == UMSDOS_EMD_NAMELEN 256 && memcmp(fname,UMSDOS_EMD_FILE,UMSDOS_EMD_NAMELEN)==0)){ 257 /* #Specification: file name / --linux-.--- 258 The name of the EMD file --linux-.--- is map to a mangled 259 name. So UMSDOS does not restrict its use. 260 */ 261 /* #Specification: file name / non MSDOS conforming / mangling 262 Non MSDOS conforming file name must use some alias to fit 263 in the MSDOS name space. 264
265 The strategy is simple. The name is simply truncated to 266 8 char. points are replace with underscore and a 267 number is given as an extension. This number correspond 268 to the entry number in the EMD file. The EMD file 269 only need to carry the real name. 270
271 Upper case is also convert to lower case. 272 Control character are converted to #. 273 Space are converted to #. 274 The following character are also converted to #. 275 # 276 " * + , / : ; < = > ? [ \ ] | ~ 277 # 278
279 Sometime, the problem is not in MsDOS itself but in 280 command.com. 281 */ 282 inti;
283 char *pt = info->fake.fname;
284 base_len = msdos_len = (msdos_len>8) ? 8 : msdos_len;
285 /* 286 There is no '.' any more so we know for a fact that 287 the base length is the length. 288 */ 289 memcpy (info->fake.fname,fname,msdos_len);
290 for (i=0; i<msdos_len; i++, pt++) *pt = lkp[(unsignedchar)(*pt)];
291 *pt = '\0'; /* GLU C'est sur on a un 0 a la fin */ 292 info->msdos_reject = 1;
293 /* 294 The numeric extension is added only when we know 295 the position in the EMD file, in umsdos_newentry(), 296 umsdos_delentry(), and umsdos_findentry(). 297 See umsdos_manglename(). 298 */ 299 }else{ 300 /* Conforming MSDOS file name */ 301 strncpy (info->fake.fname,fname,len);
302 info->msdos_reject = 0;
303 base_len = firstpt != NULL ? (int)(firstpt - fname) : len;
304 } 305 if (cardinal_per_size[base_len]){ 306 /* #Specification: file name / MSDOS devices / mangling 307 To avoid unreachable file from MsDOS, any MsDOS conforming 308 file with a basename equal to one of the MsDOS pseudo 309 devices will be mangled. 310
311 If a file such as "prn" was created, it would be unreachable 312 under MsDOS because prn is assumed to be the printer, even 313 if the file does have an extension. 314
315 Since the extension is unimportant to MsDOS, we must patch 316 the basename also. We simply insert a minus '-'. To avoid 317 conflict with valid file with a minus in front (such as 318 "-prn"), we add an mangled extension like any other 319 mangled file name. 320
321 Here is the list of DOS pseudo devices: 322
323 # 324 "prn","con","aux","nul", 325 "lpt1","lpt2","lpt3","lpt4", 326 "com1","com2","com3","com4", 327 "clock$" 328 # 329
330 and some standard ones for common DOS programs 331
332 "emmxxxx0","xmsxxxx0","setverxx" 333
334 (Thanks to Chris Hall <CAH17@PHOENIX.CAMBRIDGE.AC.UK> 335 for pointing these to me). 336
337 Is there one missing ? 338 */ 339 /* This table must be ordered by length */ 340 staticconstchar *tbdev[]={ 341 "prn","con","aux","nul",
342 "lpt1","lpt2","lpt3","lpt4",
343 "com1","com2","com3","com4",
344 "clock$",
345 "emmxxxx0","xmsxxxx0","setverxx"
346 };
347 /* Tell where to find in tbdev[], the first name of */ 348 /* a certain length */ 349 staticconstcharstart_ind_dev[9]={ 350 0, 0, 0, 4, 12, 12, 13, 13, 16
351 };
352 charbasen[9];
353 inti;
354 for (i=start_ind_dev[base_len-1]; i<start_ind_dev[base_len]; i++){ 355 if (memcmp(info->fake.fname,tbdev[i],base_len)==0){ 356 memcpy (basen,info->fake.fname,base_len);
357 basen[base_len] = '\0'; /* GLU C'est sur on a un 0 a la fin */ 358 /* 359 GLU On ne fait cela que si necessaire, on essaye d'etre le 360 GLU simple dans le cas general (le plus frequent). 361 */ 362 info->fake.fname[0] = '-';
363 strcpy (info->fake.fname+1,basen); /* GLU C'est sur on a un 0 a la fin */ 364 msdos_len = (base_len==8) ? 8 : base_len + 1;
365 info->msdos_reject = 1;
366 break;
367 } 368 } 369 } 370 info->fake.fname[msdos_len] = '\0'; /* Help doing printk */ 371 /* GLU Ce zero devrais deja y etre ! (invariant ?) */ 372 info->fake.len = msdos_len;
373 /* Pourquoi ne pas utiliser info->fake.len partout ??? plus long ?*/ 374 memcpy (info->entry.name,fname,len);
375 info->entry.name_len = len;
376 ret = 0;
377 } 378 /* 379 Evaluate how many record are needed to store this entry. 380 */ 381 info->recsize = umsdos_evalrecsize (len);
382 returnret;
383 } 384
385 #ifdefTEST 386
387 structMANG_TEST{ 388 char *fname; /* Name to validate */ 389 intmsdos_reject; /* Expected msdos_reject flag */ 390 char *msname; /* Expected msdos name */ 391 };
392
393 structMANG_TESTtb[]={ 394 "hello", 0, "hello",
395 "hello.1", 0, "hello.1",
396 "hello.1_", 0, "hello.1_",
397 "prm", 0, "prm",
398
399 #ifdef PROPOSITION
400 "HELLO", 1, "hello",
401 "Hello.1", 1, "hello.1",
402 "Hello.c", 1, "hello.c",
403 #elseif
404 /* 405 Je trouve les trois exemples ci-dessous tres "malheureux". 406 Je propose de mettre en minuscule dans un passe preliminaire, 407 et de tester apres si il y a d'autres caracters "mechants". 408 Bon, je ne l'ai pas fait, parceque ce n'est pas si facilement 409 modifiable que ca. Mais c'est pour le principe. 410 Evidemment cela augmente les chances de "Collision", 411 par exemple: entre "HELLO" et "Hello", mais ces problemes 412 peuvent etre traiter ailleur avec les autres collisions. 413 */ 414 "HELLO", 1, "hello",
415 "Hello.1", 1, "hello_1",
416 "Hello.c", 1, "hello_c",
417 #endif 418
419 "hello.{_1", 1, "hello_{_",
420 "hello\t", 1, "hello#",
421 "hello.1.1", 1, "hello_1_",
422 "hel,lo", 1, "hel#lo",
423 "Salut.Tu.vas.bien?", 1, "salut_tu",
424 ".profile", 1, "_profile",
425 ".xv", 1, "_xv",
426 "toto.", 1, "toto_",
427 "clock$.x", 1, "-clock$",
428 "emmxxxx0", 1, "-emmxxxx",
429 "emmxxxx0.abcd", 1, "-emmxxxx",
430 "aux", 1, "-aux",
431 "prn", 1, "-prn",
432 "prn.abc", 1, "-prn",
433 "PRN", 1, "-prn",
434 /* 435 GLU ATTENTION : Le resultat de ceux-ci sont differents avec ma version 436 GLU du mangle par rapport au mangle originale. 437 GLU CAUSE: La maniere de calculer la variable baselen. 438 GLU Pour toi c'est toujours 3 439 GLU Pour moi c'est respectivement 7, 8 et 8 440 */ 441 "PRN.abc", 1, "prn_abc",
442 "Prn.abcd", 1, "prn_abcd",
443 "prn.abcd", 1, "prn_abcd",
444 "Prn.abcdefghij", 1, "prn_abcd"
445 };
446
447 int main (intargc, char *argv[])
/* */ 448 { 449 inti,rold,rnew;
450 printf ("Testing the umsdos_parse.\n");
451 for (i=0; i<sizeof(tb)/sizeof(tb[0]); i++){ 452 structMANG_TEST *pttb = tb+i;
453 structumsdos_infoinfo;
454 intok = umsdos_parse (pttb->fname,strlen(pttb->fname),&info);
455 if (strcmp(info.fake.fname,pttb->msname)!=0){ 456 printf ("**** %s -> ",pttb->fname);
457 printf ("%s <> %s\n",info.fake.fname,pttb->msname);
458 }elseif (info.msdos_reject != pttb->msdos_reject){ 459 printf ("**** %s -> %s ",pttb->fname,pttb->msname);
460 printf ("%d <> %d\n",info.msdos_reject,pttb->msdos_reject);
461 }else{ 462 printf (" %s -> %s %d\n",pttb->fname,pttb->msname 463 ,pttb->msdos_reject);
464 } 465 } 466 printf ("Testing the new umsdos_evalrecsize.");
467 for (i=0; i<UMSDOS_MAXNAME ; i++){ 468 rnew=umsdos_evalrecsize (i);
469 rold=umsdos_evalrecsize_old (i);
470 if (!(i%UMSDOS_REC_SIZE)){ 471 printf ("\n%d:\t",i);
472 } 473 if (rnew!=rold){ 474 printf ("**** %d newres: %d != %d \n", i, rnew, rold);
475 }else{ 476 printf(".");
477 } 478 } 479 printf ("\nEnd of Testing.\n");
480
481 return 0;
482 } 483
484 #endif