1 /* 2 * linux/fs/umsdos/mangle.c 3 * 4 * Written 1993 by Jacques Gelinas 5 * 6 * Control the mangling of file name to fit msdos name space. 7 * Many optimisation by GLU == dglaude@is1.vub.ac.be (GLAUDE DAVID) 8 */ 9 #include <linux/errno.h>
10 #include <linux/ctype.h>
11 #include <linux/string.h>
12 #include <linux/kernel.h>
13 #include <linux/umsdos_fs.h>
14
15 /* 16 Complete the mangling of the MSDOS fake name 17 based on the position of the entry in the EMD file. 18
19 Simply complete the job of umsdos_parse; fill the extension. 20
21 Beware that info->f_pos must be set. 22 */ 23 voidumsdos_manglename (structumsdos_info *info)
/* */ 24 { 25 if (info->msdos_reject){ 26 /* #Specification: file name / non MSDOS conforming / mangling 27 Each non MSDOS conforming file has a special extension 28 build from the entry position in the EMD file. 29
30 This number is then transform in a base 32 number, where 31 each digit is expressed like hexadecimal number, using 32 digit and letter, except it uses 22 letters from 'a' to 'v'. 33 The number 32 comes from 2**5. It is faster to split a binary 34 number using a base which is a power of two. And I was 32 35 when I started this project. Pick your answer :-) . 36
37 If the result is '0', it is replace with '_', simply 38 to make it odd. 39
40 This is true for the first two character of the extension. 41 The last one is taken from a list of odd character, which 42 are: 43
44 { } ( ) ! ` ^ & @ 45
46 With this scheme, we can produce 9216 ( 9* 32 * 32) 47 different extensions which should not clash with any useful 48 extension already popular or meaningful. Since most directory 49 have much less than 32 * 32 files in it, the first character 50 of the extension of any mangle name will be {. 51
52 Here are the reason to do this (this kind of mangling). 53
54 -The mangling is deterministic. Just by the extension, we 55 are able to locate the entry in the EMD file. 56
57 -By keeping to beginning of the file name almost unchanged, 58 we are helping the MSDOS user. 59
60 -The mangling produces names not too ugly, so an msdos user 61 may live with it (remember it, type it, etc...). 62
63 -The mangling produces names ugly enough so no one will 64 ever think of using such a name in real life. This is not 65 fool proof. I don't think there is a total solution to this. 66 */ 67 union{ 68 intentry_num;
69 struct{ 70 unsignednum1:5,num2:5,num3:5;
71 }num;
72 }u;
73 char *pt = info->fake.fname + info->fake.len;
74 /* lookup for encoding the last character of the extension */ 75 /* It contain valid character after the ugly one to make sure */ 76 /* even if someone overflow the 32 * 32 * 9 limit, it still do */ 77 /* something */ 78 #defineSPECIAL_MANGLING '{','}','(',')','!','`','^','&','@'
79 staticcharlookup3[]={ 80 SPECIAL_MANGLING,
81 /* This is the start of lookup12 */ 82 '_','1','2','3','4','5','6','7','8','9',
83 'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
84 'p','q','r','s','t','u','v'
85 };
86 #definelookup12 (lookup3+9)
87 u.entry_num = info->f_pos / UMSDOS_REC_SIZE;
88 if (u.entry_num > (9* 32 * 32)){ 89 printk ("UMSDOS: More than 9216 file in a directory.\n"
90 "This may break the mangling strategy.\n"
91 "Not a killer problem. See doc.\n");
92 } 93 *pt++ = '.';
94 *pt++ = lookup3 [u.num.num3];
95 *pt++ = lookup12[u.num.num2];
96 *pt++ = lookup12[u.num.num1];
97 *pt = '\0'; /* help doing printk */ 98 info->fake.len += 4;
99 info->msdos_reject = 0; /* Avoid mangling twice */ 100 } 101 } 102
103 /* 104 Evaluate the record size needed to store of name of len character. 105 The value returned is a multiple of UMSDOS_REC_SIZE. 106 */ 107 intumsdos_evalrecsize (intlen)
/* */ 108 { 109 structumsdos_direntdirent;
110 intnbrec = 1+((len-1+(dirent.name-(char*)&dirent))
111 / UMSDOS_REC_SIZE);
112 returnnbrec * UMSDOS_REC_SIZE;
113 /* 114 GLU This should be inlined or something to speed it up to the max. 115 GLU nbrec is absolutely not needed to return the value. 116 */ 117 } 118 #ifdefTEST 119 intumsdos_evalrecsize_old (intlen)
/* */ 120 { 121 structumsdos_direntdirent;
122 intsize = len + (dirent.name-(char*)&dirent);
123 intnbrec = size / UMSDOS_REC_SIZE;
124 intextra = size % UMSDOS_REC_SIZE;
125 if (extra > 0) nbrec++;
126 returnnbrec * UMSDOS_REC_SIZE;
127 } 128 #endif 129 /* 130 Fill the struct info with the full and msdos name of a file 131 Return 0 if all is ok, a negative error code otherwise. 132 */ 133 intumsdos_parse (
/* */ 134 constchar *fname,
135 intlen,
136 structumsdos_info *info)
137 { 138 intret = -ENAMETOOLONG;
139 /* #Specification: file name / too long 140 If a file name exceed UMSDOS maxima, the file name is silently 141 truncated. This makes it conformant with the other file system 142 of Linux (minix and ext2 at least). 143 */ 144 if (len > UMSDOS_MAXNAME) len = UMSDOS_MAXNAME;
145 { 146 constchar *firstpt=NULL; /* First place we saw a . in fname */ 147 /* #Specification: file name / non MSDOS conforming / base length 0 148 file name beginning with a period '.' are invalid for MsDOS. 149 It needs absolutely a base name. So the file name is mangled 150 */ 151 intivldchar = fname[0] == '.';/* At least one invalid character */ 152 intmsdos_len = len;
153 intbase_len;
154 /* 155 cardinal_per_size tells if there exist at least one 156 DOS pseudo devices on length n. See the test below. 157 */ 158 staticconstcharcardinal_per_size[9]={ 159 0, 0, 0, 1, 1, 0, 1, 0, 1
160 };
161 /* 162 lkp translate all character to acceptable character (for DOS). 163 When lkp[n] == n, it means also it is an acceptable one. 164 So it serve both as a flag and as a translator. 165 */ 166 staticcharlkp[256];
167 staticcharis_init=0;
168 if (!is_init){ 169 /* 170 Initialisation of the array is easier and less error prone 171 like this. 172 */ 173 inti;
174 staticchar *spc = "\"*+,/:;<=>?[\\]|~";
175 is_init = 1;
176 for (i=0; i<=32; i++) lkp[i] = '#';
177 for (i=33; i<'A'; i++) lkp[i] = (char)i;
178 for (i='A'; i<='Z'; i++) lkp[i] = (char)(i+('a'-'A'));
179 for (i='Z'+1; i<127; i++) lkp[i] = (char)i;
180 for (i=128; i<256; i++) lkp[i] = '#';
181
182 lkp['.'] = '_';
183 while (*spc != '\0') lkp[(unsignedchar)(*spc++)] = '#';
184 } 185 /* GLU 186 file name which are longer than 8+'.'+3 are invalid for MsDOS. 187 So the file name is to be mangled no more test needed. 188 This Speed Up for long and very long name. 189 The position of the last point is no more necessary anyway. 190 */ 191 if (len<=(8+1+3)){ 192 constchar *pt = fname;
193 constchar *endpt = fname + len;
194 while (pt < endpt){ 195 if (*pt == '.'){ 196 if (firstpt != NULL){ 197 /* 2 . in a file name. Reject */ 198 ivldchar = 1;
199 break;
200 }else{ 201 intextlen = (int)(endpt - pt);
202 firstpt = pt;
203 if (firstpt - fname > 8){ 204 /* base name longer than 8: reject */ 205 ivldchar = 1;
206 break;
207 }elseif (extlen > 4){ 208 /* Extension longer than 4 (including .): reject */ 209 ivldchar = 1;
210 break;
211 }elseif (extlen == 1){ 212 /* #Specification: file name / non MSDOS conforming / last char == . 213 If the last character of a file name is 214 a period, mangling is applied. MsDOS do 215 not support those file name. 216 */ 217 ivldchar = 1;
218 break;
219 }elseif (extlen == 4){ 220 /* #Specification: file name / non MSDOS conforming / mangling clash 221 To avoid clash with the umsdos mangling, any file 222 with a special character as the first character 223 of the extension will be mangled. This solve the 224 following problem: 225 226 touch FILE 227 # FILE is invalid for DOS, so mangling is applied 228 # file.{_1 is created in the DOS directory 229 touch file.{_1 230 # To UMSDOS file point to a single DOS entry. 231 # So file.{_1 has to be mangled. 232 */ 233 staticcharspecial[]={ 234 SPECIAL_MANGLING,'\0'
235 };
236 if (strchr(special,firstpt[1])!= NULL){ 237 ivldchar = 1;
238 break;
239 } 240 } 241 } 242 }elseif (lkp[(unsignedchar)(*pt)] != *pt){ 243 ivldchar = 1;
244 break;
245 } 246 pt++;
247 } 248 }else{ 249 ivldchar = 1;
250 } 251 if (ivldchar 252 || (firstpt == NULL && len > 8)
253 || (len == UMSDOS_EMD_NAMELEN 254 && memcmp(fname,UMSDOS_EMD_FILE,UMSDOS_EMD_NAMELEN)==0)){ 255 /* #Specification: file name / --linux-.--- 256 The name of the EMD file --linux-.--- is map to a mangled 257 name. So UMSDOS does not restrict its use. 258 */ 259 /* #Specification: file name / non MSDOS conforming / mangling 260 Non MSDOS conforming file name must use some alias to fit 261 in the MSDOS name space. 262
263 The strategy is simple. The name is simply truncated to 264 8 char. points are replace with underscore and a 265 number is given as an extension. This number correspond 266 to the entry number in the EMD file. The EMD file 267 only need to carry the real name. 268
269 Upper case is also convert to lower case. 270 Control character are converted to #. 271 Space are converted to #. 272 The following character are also converted to #. 273 " * + , / : ; < = > ? [ \ ] | ~ 274
275 Sometime, the problem is not in MsDOS itself but in 276 command.com. 277 */ 278 inti;
279 char *pt = info->fake.fname;
280 base_len = msdos_len = (msdos_len>8) ? 8 : msdos_len;
281 /* 282 There is no '.' any more so we know for a fact that 283 the base length is the length. 284 */ 285 memcpy (info->fake.fname,fname,msdos_len);
286 for (i=0; i<msdos_len; i++, pt++) *pt = lkp[(unsignedchar)(*pt)];
287 *pt = '\0'; /* GLU C'est sur on a un 0 a la fin */ 288 info->msdos_reject = 1;
289 /* 290 The numeric extension is added only when we know 291 the position in the EMD file, in umsdos_newentry(), 292 umsdos_delentry(), and umsdos_findentry(). 293 See umsdos_manglename(). 294 */ 295 }else{ 296 /* Conforming MSDOS file name */ 297 strcpy (info->fake.fname,fname); /* GLU C'est sur on a un 0 a la fin */ 298 info->msdos_reject = 0;
299 base_len = firstpt != NULL ? (int)(firstpt - fname) : len;
300 } 301 if (cardinal_per_size[base_len]){ 302 /* #Specification: file name / MSDOS devices / mangling 303 To avoid unreachable file from MsDOS, any MsDOS conforming 304 file with a basename equal to one of the MsDOS pseudo 305 devices will be mangled. 306
307 If a file such as "prn" was created, it would be unreachable 308 under MsDOS because prn is assumed to be the printer, even 309 if the file does have an extension. 310
311 Since the extension is unimportant to MsDOS, we must patch 312 the basename also. We simply insert a minus '-'. To avoid 313 conflict with valid file with a minus in front (such as 314 "-prn"), we add an mangled extension like any other 315 mangled file name. 316
317 Here is the list of DOS pseudo devices: 318
319 "prn","con","aux","nul", 320 "lpt1","lpt2","lpt3","lpt4", 321 "com1","com2","com3","com4", 322 "clock$" 323
324 and some standard ones for common DOS programs 325
326 "emmxxxx0","xmsxxxx0","setverxx" 327
328 (Thanks to Chris Hall <CAH17@PHOENIX.CAMBRIDGE.AC.UK> 329 for pointing these to me). 330
331 Is there one missing ? 332 */ 333 /* This table must be ordered by length */ 334 staticconstchar *tbdev[]={ 335 "prn","con","aux","nul",
336 "lpt1","lpt2","lpt3","lpt4",
337 "com1","com2","com3","com4",
338 "clock$",
339 "emmxxxx0","xmsxxxx0","setverxx"
340 };
341 /* Tell where to find in tbdev[], the first name of */ 342 /* a certain length */ 343 staticconstcharstart_ind_dev[9]={ 344 0, 0, 0, 4, 12, 12, 13, 13, 16
345 };
346 charbasen[9];
347 inti;
348 for (i=start_ind_dev[base_len-1]; i<start_ind_dev[base_len]; i++){ 349 if (memcmp(info->fake.fname,tbdev[i],base_len)==0){ 350 memcpy (basen,info->fake.fname,base_len);
351 basen[base_len] = '\0'; /* GLU C'est sur on a un 0 a la fin */ 352 /* 353 GLU On ne fait cela que si necessaire, on essaye d'etre le 354 GLU simple dans le cas general (le plus frequent). 355 */ 356 info->fake.fname[0] = '-';
357 strcpy (info->fake.fname+1,basen); /* GLU C'est sur on a un 0 a la fin */ 358 msdos_len = (base_len==8) ? 8 : base_len + 1;
359 info->msdos_reject = 1;
360 break;
361 } 362 } 363 } 364 info->fake.fname[msdos_len] = '\0'; /* Help doing printk */ 365 /* GLU Ce zero devrais deja y etre ! (invariant ?) */ 366 info->fake.len = msdos_len;
367 /* Pourquoi ne pas utiliser info->fake.len partout ??? plus long ?*/ 368 memcpy (info->entry.name,fname,len);
369 info->entry.name_len = len;
370 ret = 0;
371 } 372 /* 373 Evaluate how many record are needed to store this entry. 374 */ 375 info->recsize = umsdos_evalrecsize (len);
376 returnret;
377 } 378
379 #ifdefTEST 380
381 structMANG_TEST{ 382 char *fname; /* Name to validate */ 383 intmsdos_reject; /* Expected msdos_reject flag */ 384 char *msname; /* Expected msdos name */ 385 };
386
387 structMANG_TESTtb[]={ 388 "hello", 0, "hello",
389 "hello.1", 0, "hello.1",
390 "hello.1_", 0, "hello.1_",
391 "prm", 0, "prm",
392
393 #ifdef PROPOSITION
394 "HELLO", 1, "hello",
395 "Hello.1", 1, "hello.1",
396 "Hello.c", 1, "hello.c",
397 #elseif
398 /* 399 Je trouve les trois exemples ci-dessous tres "malheureux". 400 Je propose de mettre en minuscule dans un passe preliminaire, 401 et de tester apres si il y a d'autres caracters "mechants". 402 Bon, je ne l'ai pas fait, parceque ce n'est pas si facilement 403 modifiable que ca. Mais c'est pour le principe. 404 Evidemment cela augmente les chances de "Collision", 405 par exemple: entre "HELLO" et "Hello", mais ces problemes 406 peuvent etre traiter ailleur avec les autres collisions. 407 */ 408 "HELLO", 1, "hello",
409 "Hello.1", 1, "hello_1",
410 "Hello.c", 1, "hello_c",
411 #endif 412
413 "hello.{_1", 1, "hello_{_",
414 "hello\t", 1, "hello#",
415 "hello.1.1", 1, "hello_1_",
416 "hel,lo", 1, "hel#lo",
417 "Salut.Tu.vas.bien?", 1, "salut_tu",
418 ".profile", 1, "_profile",
419 ".xv", 1, "_xv",
420 "toto.", 1, "toto_",
421 "clock$.x", 1, "-clock$",
422 "emmxxxx0", 1, "-emmxxxx",
423 "emmxxxx0.abcd", 1, "-emmxxxx",
424 "aux", 1, "-aux",
425 "prn", 1, "-prn",
426 "prn.abc", 1, "-prn",
427 "PRN", 1, "-prn",
428 /* 429 GLU ATTENTION : Le resultat de ceux-ci sont differents avec ma version 430 GLU du mangle par rapport au mangle originale. 431 GLU CAUSE: La maniere de calculer la variable baselen. 432 GLU Pour toi c'est toujours 3 433 GLU Pour moi c'est respectivement 7, 8 et 8 434 */ 435 "PRN.abc", 1, "prn_abc",
436 "Prn.abcd", 1, "prn_abcd",
437 "prn.abcd", 1, "prn_abcd",
438 "Prn.abcdefghij", 1, "prn_abcd"
439 };
440
441 int main (intargc, char *argv[])
/* */ 442 { 443 inti,rold,rnew;
444 printf ("Testing the umsdos_parse.\n");
445 for (i=0; i<sizeof(tb)/sizeof(tb[0]); i++){ 446 structMANG_TEST *pttb = tb+i;
447 structumsdos_infoinfo;
448 intok = umsdos_parse (pttb->fname,strlen(pttb->fname),&info);
449 if (strcmp(info.fake.fname,pttb->msname)!=0){ 450 printf ("**** %s -> ",pttb->fname);
451 printf ("%s <> %s\n",info.fake.fname,pttb->msname);
452 }elseif (info.msdos_reject != pttb->msdos_reject){ 453 printf ("**** %s -> %s ",pttb->fname,pttb->msname);
454 printf ("%d <> %d\n",info.msdos_reject,pttb->msdos_reject);
455 }else{ 456 printf (" %s -> %s %d\n",pttb->fname,pttb->msname 457 ,pttb->msdos_reject);
458 } 459 } 460 printf ("Testing the new umsdos_evalrecsize.");
461 for (i=0; i<UMSDOS_MAXNAME ; i++){ 462 rnew=umsdos_evalrecsize (i);
463 rold=umsdos_evalrecsize_old (i);
464 if (!(i%UMSDOS_REC_SIZE)){ 465 printf ("\n%d:\t",i);
466 } 467 if (rnew!=rold){ 468 printf ("**** %d newres: %d != %d \n", i, rnew, rold);
469 }else{ 470 printf(".");
471 } 472 } 473 printf ("\nEnd of Testing.\n");
474
475 return 0;
476 } 477
478 #endif