root/fs/hpfs/hpfs_caps.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. latin1_upcase
  2. latin1_downcase
  3. latin1_to_cp850
  4. cp850_to_latin1
  5. hpfs_char_to_upper_linux
  6. linux_char_to_upper_linux
  7. hpfs_char_to_lower_linux
  8. hpfs_char_to_linux

   1 /* Capitalization rules for HPFS */
   2 
   3 /* In OS/2, HPFS filenames preserve upper and lower case letter distinctions
   4    but filename matching ignores case.  That is, creating a file "Foo"
   5    actually creates a file named "Foo" which can be looked up as "Foo",
   6    "foo", or "FOO", among other possibilities.
   7 
   8    Also, HPFS is internationalized -- a table giving the uppercase
   9    equivalent of every character is stored in the filesystem, so that
  10    any national character set may be used.  If several different
  11    national character sets are in use, several tables are stored
  12    in the filesystem.
  13 
  14    It would be perfectly reasonable for Linux HPFS to act as a Unix
  15    filesystem and match "Foo" only if asked for "Foo" exactly.  But
  16    the sort order of HPFS directories is case-insensitive, so Linux
  17    still has to know the capitalization rules used by OS/2.  Because
  18    of this, it turns out to be more natural for us to be case-insensitive
  19    than not.
  20 
  21    Currently the standard character set used by Linux is Latin-1.
  22    Work is underway to permit people to use UTF-8 instead, therefore
  23    all code that depends on the character set is segregated here.
  24 
  25    (It would be wonderful if Linux HPFS could be independent of what
  26    character set is in use on the Linux side, but because of the
  27    necessary case folding this is impossible.)
  28 
  29    There is a map from Latin-1 into code page 850 for every printing
  30    character in Latin-1.  The NLS documentation of OS/2 shows that
  31    everybody has 850 available unless they don't have Western latin
  32    chars available at all (so fitting them to Linux without Unicode
  33    is a doomed exercise).
  34 
  35    It is not clear exactly how HPFS.IFS handles the situation when
  36    multiple code pages are in use.  Experiments show that
  37 
  38    - tables on the disk give uppercasing rules for the installed code pages
  39 
  40    - each directory entry is tagged with what code page was current
  41      when that name was created
  42 
  43    - doing just CHCP, without changing what's on the disk in any way,
  44      can change what DIR reports, and what name a case-folded match
  45      will match.
  46 
  47    This means, I think, that HPFS.IFS operates in the current code
  48    page, without regard to the uppercasing information recorded in
  49    the tables on the disk.  It does record the uppercasing rules
  50    it used, perhaps for CHKDSK, but it does not appear to use them
  51    itself.
  52 
  53    So: Linux, a Latin-1 system, will operate in code page 850.  We
  54    recode between 850 and Latin-1 when dealing with the names actually
  55    on the disk.  We don't use the uppercasing tables either.
  56 
  57    In a hypothetical UTF-8 implementation, one reasonable way to
  58    proceed that matches OS/2 (for least surprise) is: do case
  59    translation in UTF-8, and recode to/from one of the code pages
  60    available on the mounted filesystem.  Reject as invalid any name
  61    containing chars that can't be represented on disk by one of the
  62    code pages OS/2 is using.  Recoding from on-disk names to UTF-8
  63    could use the code page tags, though this is not what OS/2 does. */
  64 
  65 #ifdef MODULE
  66 #include <linux/module.h>
  67 #include <linux/version.h>
  68 #else
  69 #define MOD_INC_USE_COUNT
  70 #define MOD_DEC_USE_COUNT
  71 #endif
  72 
  73 
  74 static const unsigned char tb_cp850_to_latin1[128] =
  75 {
  76   199, 252, 233, 226, 228, 224, 229, 231,
  77   234, 235, 232, 239, 238, 236, 196, 197,
  78   201, 230, 198, 244, 246, 242, 251, 249,
  79   255, 214, 220, 248, 163, 216, 215, 159,
  80   225, 237, 243, 250, 241, 209, 170, 186,
  81   191, 174, 172, 189, 188, 161, 171, 187,
  82   155, 156, 157, 144, 151, 193, 194, 192,
  83   169, 135, 128, 131, 133, 162, 165, 147,
  84   148, 153, 152, 150, 145, 154, 227, 195,
  85   132, 130, 137, 136, 134, 129, 138, 164,
  86   240, 208, 202, 203, 200, 158, 205, 206,
  87   207, 149, 146, 141, 140, 166, 204, 139,
  88   211, 223, 212, 210, 245, 213, 181, 254,
  89   222, 218, 219, 217, 253, 221, 175, 180,
  90   173, 177, 143, 190, 182, 167, 247, 184,
  91   176, 168, 183, 185, 179, 178, 142, 160,
  92 };
  93 
  94 #if 0
  95 static const unsigned char tb_latin1_to_cp850[128] =
  96 {
  97   186, 205, 201, 187, 200, 188, 204, 185,
  98   203, 202, 206, 223, 220, 219, 254, 242,
  99   179, 196, 218, 191, 192, 217, 195, 180,
 100   194, 193, 197, 176, 177, 178, 213, 159,
 101   255, 173, 189, 156, 207, 190, 221, 245,
 102   249, 184, 166, 174, 170, 240, 169, 238,
 103   248, 241, 253, 252, 239, 230, 244, 250,
 104   247, 251, 167, 175, 172, 171, 243, 168,
 105   183, 181, 182, 199, 142, 143, 146, 128,
 106   212, 144, 210, 211, 222, 214, 215, 216,
 107   209, 165, 227, 224, 226, 229, 153, 158,
 108   157, 235, 233, 234, 154, 237, 232, 225,
 109   133, 160, 131, 198, 132, 134, 145, 135,
 110   138, 130, 136, 137, 141, 161, 140, 139,
 111   208, 164, 149, 162, 147, 228, 148, 246,
 112   155, 151, 163, 150, 129, 236, 231, 152,
 113 };
 114 #endif
 115 
 116 #define A_GRAVE 0300
 117 #define THORN   0336   
 118 #define MULTIPLY 0327
 119 #define a_grave 0340
 120 #define thorn   0376
 121 #define divide  0367
 122 
 123 static inline unsigned latin1_upcase (unsigned c)
     /* [previous][next][first][last][top][bottom][index][help] */
 124 {
 125   if (c - 'a' <= 'z' - 'a'
 126       || (c - a_grave <= thorn - a_grave
 127           && c != divide))
 128     return c - 'a' + 'A';
 129   else
 130     return c;
 131 }
 132 
 133 static inline unsigned latin1_downcase (unsigned c)
     /* [previous][next][first][last][top][bottom][index][help] */
 134 {
 135   if (c - 'A' <= 'Z' - 'A'
 136       || (c - A_GRAVE <= THORN - A_GRAVE
 137           && c != MULTIPLY))
 138     return c + 'a' - 'A';
 139   else
 140     return c;
 141 }
 142 
 143 #if 0
 144 static inline unsigned latin1_to_cp850 (unsigned c)
     /* [previous][next][first][last][top][bottom][index][help] */
 145 {
 146   if ((signed) c - 128 >= 0)
 147     return tb_latin1_to_cp850[c - 128];
 148   else
 149     return c;
 150 }
 151 #endif
 152 
 153 static inline unsigned cp850_to_latin1 (unsigned c)
     /* [previous][next][first][last][top][bottom][index][help] */
 154 {
 155   if ((signed) c - 128 >= 0)
 156     return tb_cp850_to_latin1[c - 128];
 157   else
 158     return c;
 159 }
 160 
 161 unsigned hpfs_char_to_upper_linux (unsigned c)
     /* [previous][next][first][last][top][bottom][index][help] */
 162 {
 163   return latin1_upcase (cp850_to_latin1 (c));
 164 }
 165 
 166 unsigned linux_char_to_upper_linux (unsigned c)
     /* [previous][next][first][last][top][bottom][index][help] */
 167 {
 168   return latin1_upcase (c);
 169 }
 170 
 171 unsigned hpfs_char_to_lower_linux (unsigned c)
     /* [previous][next][first][last][top][bottom][index][help] */
 172 {
 173   return latin1_downcase (cp850_to_latin1 (c));
 174 }
 175 
 176 unsigned hpfs_char_to_linux (unsigned c)
     /* [previous][next][first][last][top][bottom][index][help] */
 177 {
 178   return cp850_to_latin1 (c);
 179 }

/* [previous][next][first][last][top][bottom][index][help] */