modules/charset_macosxfs.c

説明を見る。
00001 /* 
00002    Unix SMB/CIFS implementation.
00003    Samba charset module for Mac OS X/Darwin
00004    Copyright (C) Benjamin Riefenstahl 2003
00005    
00006    This program is free software; you can redistribute it and/or modify
00007    it under the terms of the GNU General Public License as published by
00008    the Free Software Foundation; either version 2 of the License, or
00009    (at your option) any later version.
00010    
00011    This program is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014    GNU General Public License for more details.
00015    
00016    You should have received a copy of the GNU General Public License
00017    along with this program; if not, write to the Free Software
00018    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00019 */
00020 
00021 /*
00022  * modules/charset_macosxfs.c
00023  *
00024  * A Samba charset module to use on Mac OS X/Darwin as the filesystem
00025  * and display encoding.
00026  *
00027  * Actually two implementations are provided here.  The default
00028  * implementation is based on the official CFString API.  The other is
00029  * based on internal CFString APIs as defined in the OpenDarwin
00030  * source.
00031  */
00032 
00033 #include "includes.h"
00034 
00035 /*
00036  * Include OS frameworks.  These are only needed in this module.
00037  */
00038 #include <CoreFoundation/CFString.h>
00039 
00040 /*
00041  * See if autoconf has found us the internal headers in some form.
00042  */
00043 #if HAVE_COREFOUNDATION_CFSTRINGENCODINGCONVERTER_H
00044 #       include <Corefoundation/CFStringEncodingConverter.h>
00045 #       include <Corefoundation/CFUnicodePrecomposition.h>
00046 #       define USE_INTERNAL_API 1
00047 #elif HAVE_CFSTRINGENCODINGCONVERTER_H
00048 #       include <CFStringEncodingConverter.h>
00049 #       include <CFUnicodePrecomposition.h>
00050 #       define USE_INTERNAL_API 1
00051 #endif
00052 
00053 /*
00054  * Compile time configuration: Do we want debug output?
00055  */
00056 /* #define DEBUG_STRINGS 1 */
00057 
00058 /*
00059  * A simple, but efficient memory provider for our buffers.
00060  */
00061 static inline void *resize_buffer (void *buffer, size_t *size, size_t newsize)
00062 {
00063         if (newsize > *size) {
00064                 *size = newsize + 128;
00065                 buffer = SMB_REALLOC(buffer, *size);
00066         }
00067         return buffer;
00068 }
00069 
00070 /*
00071  * While there is a version of OpenDarwin for intel, the usual case is
00072  * big-endian PPC.  So we need byte swapping to handle the
00073  * little-endian byte order of the network protocol.  We also need an
00074  * additional dynamic buffer to do this work for incoming data blocks,
00075  * because we have to consider the original data as constant.
00076  *
00077  * We abstract the differences away by providing a simple facade with
00078  * these functions/macros:
00079  *
00080  *      le_to_native(dst,src,len)
00081  *      native_to_le(cp,len)
00082  *      set_ucbuffer_with_le(buffer,bufsize,data,size)
00083  *      set_ucbuffer_with_le_copy(buffer,bufsize,data,size,reserve)
00084  */
00085 #ifdef WORDS_BIGENDIAN
00086 
00087 static inline void swap_bytes (char * dst, const char * src, size_t len)
00088 {
00089         const char *srcend = src + len;
00090         while (src < srcend) {
00091                 dst[0] = src[1];
00092                 dst[1] = src[0];
00093                 dst += 2;
00094                 src += 2;
00095         }
00096 }
00097 static inline void swap_bytes_inplace (char * cp, size_t len)
00098 {
00099         char temp;
00100         char *end = cp + len;
00101         while (cp  < end) {
00102                 temp = cp[1];
00103                 cp[1] = cp[0];
00104                 cp[0] = temp;
00105                 cp += 2;
00106         }
00107 }
00108 
00109 #define le_to_native(dst,src,len)       swap_bytes(dst,src,len)
00110 #define native_to_le(cp,len)            swap_bytes_inplace(cp,len)
00111 #define set_ucbuffer_with_le(buffer,bufsize,data,size) \
00112         set_ucbuffer_with_le_copy(buffer,bufsize,data,size,0)
00113 
00114 #else   /* ! WORDS_BIGENDIAN */
00115 
00116 #define le_to_native(dst,src,len)       memcpy(dst,src,len)
00117 #define native_to_le(cp,len)            /* nothing */
00118 #define set_ucbuffer_with_le(buffer,bufsize,data,size) \
00119         (((void)(bufsize)),(UniChar*)(data))
00120 
00121 #endif
00122 
00123 static inline UniChar *set_ucbuffer_with_le_copy (
00124         UniChar *buffer, size_t *bufsize,
00125         const void *data, size_t size, size_t reserve)
00126 {
00127         buffer = resize_buffer(buffer, bufsize, size+reserve);
00128         le_to_native((char*)buffer,data,size);
00129         return buffer;
00130 }
00131 
00132 
00133 /*
00134  * A simple hexdump function for debugging error conditions.
00135  */
00136 #define debug_out(s)    DEBUG(0,(s))
00137 
00138 #ifdef DEBUG_STRINGS
00139 
00140 static void hexdump( const char * label, const char * s, size_t len )
00141 {
00142         size_t restlen = len;
00143         debug_out("<<<<<<<\n");
00144         debug_out(label);
00145         debug_out("\n");
00146         while (restlen > 0) {
00147                 char line[100];
00148                 size_t i, j;
00149                 char * d = line;
00150 #undef sprintf
00151                 d += sprintf(d, "%04X ", (unsigned)(len-restlen));
00152                 *d++ = ' ';
00153                 for( i = 0; i<restlen && i<8; ++i ) {
00154                         d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
00155                 }
00156                 for( j = i; j<8; ++j ) {
00157                         d += sprintf(d, "   ");
00158                 }
00159                 *d++ = ' ';
00160                 for( i = 8; i<restlen && i<16; ++i ) {
00161                         d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
00162                 }
00163                 for( j = i; j<16; ++j ) {
00164                         d += sprintf(d, "   ");
00165                 }
00166                 *d++ = ' ';
00167                 for( i = 0; i<restlen && i<16; ++i ) {
00168                         if(s[i] < ' ' || s[i] >= 0x7F || !isprint(s[i]))
00169                                 *d++ = '.';
00170                         else
00171                                 *d++ = s[i];
00172                 }
00173                 *d++ = '\n';
00174                 *d = 0;
00175                 restlen -= i;
00176                 s += i;
00177                 debug_out(line);
00178         }
00179         debug_out(">>>>>>>\n");
00180 }
00181 
00182 #else   /* !DEBUG_STRINGS */
00183 
00184 #define hexdump(label,s,len) /* nothing */
00185 
00186 #endif
00187 
00188 
00189 #if !USE_INTERNAL_API
00190 
00191 /*
00192  * An implementation based on documented Mac OS X APIs.
00193  *
00194  * This does a certain amount of memory management, creating and
00195  * manipulating CFString objects.  We try to minimize the impact by
00196  * keeping those objects around and re-using them.  We also use
00197  * external backing store for the CFStrings where this is possible and
00198  * benficial.
00199  *
00200  * The Unicode normalizations forms available at this level are
00201  * generic, not specifically for the file system.  So they may not be
00202  * perfect fits.
00203  */
00204 static size_t macosxfs_encoding_pull(
00205         void *cd,                               /* Encoder handle */
00206         char **inbuf, size_t *inbytesleft,      /* Script string */
00207         char **outbuf, size_t *outbytesleft)    /* UTF-16-LE string */
00208 {
00209         static const int script_code = kCFStringEncodingUTF8;
00210         static CFMutableStringRef cfstring = NULL;
00211         size_t outsize;
00212         CFRange range;
00213 
00214         (void) cd; /* UNUSED */
00215 
00216         if (0 == *inbytesleft) {
00217                 return 0;
00218         }
00219 
00220         if (NULL == cfstring) {
00221                 /*
00222                  * A version with an external backing store as in the
00223                  * push function should have been more efficient, but
00224                  * testing shows, that it is actually slower (!).
00225                  * Maybe kCFAllocatorDefault gets shortcut evaluation
00226                  * internally, while kCFAllocatorNull doesn't.
00227                  */
00228                 cfstring = CFStringCreateMutable(kCFAllocatorDefault,0);
00229         }
00230 
00231         /*
00232          * Three methods of appending to a CFString, choose the most
00233          * efficient.
00234          */
00235         if (0 == (*inbuf)[*inbytesleft-1]) {
00236                 CFStringAppendCString(cfstring, *inbuf, script_code);
00237         } else if (*inbytesleft <= 255) {
00238                 Str255 buffer;
00239                 buffer[0] = *inbytesleft;
00240                 memcpy(buffer+1, *inbuf, buffer[0]);
00241                 CFStringAppendPascalString(cfstring, buffer, script_code);
00242         } else {
00243                 /*
00244                  * We would like to use a fixed buffer and a loop
00245                  * here, but than we can't garantee that the input is
00246                  * well-formed UTF-8, as we are supposed to do.
00247                  */
00248                 static char *buffer = NULL;
00249                 static size_t buflen = 0;
00250                 buffer = resize_buffer(buffer, &buflen, *inbytesleft+1);
00251                 memcpy(buffer, *inbuf, *inbytesleft);
00252                 buffer[*inbytesleft] = 0;
00253                 CFStringAppendCString(cfstring, *inbuf, script_code);
00254         }
00255 
00256         /*
00257          * Compose characters, using the non-canonical composition
00258          * form.
00259          */
00260         CFStringNormalize(cfstring, kCFStringNormalizationFormC);
00261 
00262         outsize = CFStringGetLength(cfstring);
00263         range = CFRangeMake(0,outsize);
00264 
00265         if (outsize == 0) {
00266                 /*
00267                  * HACK: smbd/mangle_hash2.c:is_legal_name() expects
00268                  * errors here.  That function will always pass 2
00269                  * characters.  smbd/open.c:check_for_pipe() cuts a
00270                  * patchname to 10 characters blindly.  Suppress the
00271                  * debug output in those cases.
00272                  */
00273                 if(2 != *inbytesleft && 10 != *inbytesleft) {
00274                         debug_out("String conversion: "
00275                                   "An unknown error occurred\n");
00276                         hexdump("UTF8->UTF16LE (old) input",
00277                                 *inbuf, *inbytesleft);
00278                 }
00279                 errno = EILSEQ; /* Not sure, but this is what we have
00280                                  * actually seen. */
00281                 return -1;
00282         }
00283         if (outsize*2 > *outbytesleft) {
00284                 CFStringDelete(cfstring, range);
00285                 debug_out("String conversion: "
00286                           "Output buffer too small\n");
00287                 hexdump("UTF8->UTF16LE (old) input",
00288                         *inbuf, *inbytesleft);
00289                 errno = E2BIG;
00290                 return -1;
00291         }
00292 
00293         CFStringGetCharacters(cfstring, range, (UniChar*)*outbuf);
00294         CFStringDelete(cfstring, range);
00295 
00296         native_to_le(*outbuf, outsize*2);
00297 
00298         /*
00299          * Add a converted null byte, if the CFString conversions
00300          * prevented that until now.
00301          */
00302         if (0 == (*inbuf)[*inbytesleft-1] && 
00303             (0 != (*outbuf)[outsize*2-1] || 0 != (*outbuf)[outsize*2-2])) {
00304 
00305                 if ((outsize*2+2) > *outbytesleft) {
00306                         debug_out("String conversion: "
00307                                   "Output buffer too small\n");
00308                         hexdump("UTF8->UTF16LE (old) input",
00309                                 *inbuf, *inbytesleft);
00310                         errno = E2BIG;
00311                         return -1;
00312                 }
00313 
00314                 (*outbuf)[outsize*2] = (*outbuf)[outsize*2+1] = 0;
00315                 outsize += 2;
00316         }
00317 
00318         *inbuf += *inbytesleft;
00319         *inbytesleft = 0;
00320         *outbuf += outsize*2;
00321         *outbytesleft -= outsize*2;
00322 
00323         return 0;
00324 }
00325 
00326 static size_t macosxfs_encoding_push(
00327         void *cd,                               /* Encoder handle */
00328         char **inbuf, size_t *inbytesleft,      /* UTF-16-LE string */
00329         char **outbuf, size_t *outbytesleft)    /* Script string */
00330 {
00331         static const int script_code = kCFStringEncodingUTF8;
00332         static CFMutableStringRef cfstring = NULL;
00333         static UniChar *buffer = NULL;
00334         static size_t buflen = 0;
00335         CFIndex outsize, cfsize, charsconverted;
00336 
00337         (void) cd; /* UNUSED */
00338 
00339         if (0 == *inbytesleft) {
00340                 return 0;
00341         }
00342 
00343         /*
00344          * We need a buffer that can hold 4 times the original data,
00345          * because that is the theoretical maximum that decomposition
00346          * can create currently (in Unicode 4.0).
00347          */
00348         buffer = set_ucbuffer_with_le_copy(
00349                 buffer, &buflen, *inbuf, *inbytesleft, 3 * *inbytesleft);
00350 
00351         if (NULL == cfstring) {
00352                 cfstring = CFStringCreateMutableWithExternalCharactersNoCopy(
00353                         kCFAllocatorDefault,
00354                         buffer, *inbytesleft/2, buflen/2,
00355                         kCFAllocatorNull);
00356         } else {
00357                 CFStringSetExternalCharactersNoCopy(
00358                         cfstring,
00359                         buffer, *inbytesleft/2, buflen/2);
00360         }
00361 
00362         /*
00363          * Decompose characters, using the non-canonical decomposition
00364          * form.
00365          *
00366          * NB: This isn't exactly what HFS+ wants (see note on
00367          * kCFStringEncodingUseHFSPlusCanonical in
00368          * CFStringEncodingConverter.h), but AFAIK it's the best that
00369          * the official API can do.
00370          */
00371         CFStringNormalize(cfstring, kCFStringNormalizationFormD);
00372 
00373         cfsize = CFStringGetLength(cfstring);
00374         charsconverted = CFStringGetBytes(
00375                 cfstring, CFRangeMake(0,cfsize),
00376                 script_code, 0, False,
00377                 *outbuf, *outbytesleft, &outsize);
00378 
00379         if (0 == charsconverted) {
00380                 debug_out("String conversion: "
00381                           "Buffer too small or not convertable\n");
00382                 hexdump("UTF16LE->UTF8 (old) input",
00383                         *inbuf, *inbytesleft);
00384                 errno = EILSEQ; /* Probably more likely. */
00385                 return -1;
00386         }
00387 
00388         /*
00389          * Add a converted null byte, if the CFString conversions
00390          * prevented that until now.
00391          */
00392         if (0 == (*inbuf)[*inbytesleft-1] && 0 == (*inbuf)[*inbytesleft-2] &&
00393             (0 != (*outbuf)[outsize-1])) {
00394 
00395                 if (((size_t)outsize+1) > *outbytesleft) {
00396                         debug_out("String conversion: "
00397                                   "Output buffer too small\n");
00398                         hexdump("UTF16LE->UTF8 (old) input",
00399                                 *inbuf, *inbytesleft);
00400                         errno = E2BIG;
00401                         return -1;
00402                 }
00403 
00404                 (*outbuf)[outsize] = 0;
00405                 ++outsize;
00406         }
00407 
00408         *inbuf += *inbytesleft;
00409         *inbytesleft = 0;
00410         *outbuf += outsize;
00411         *outbytesleft -= outsize;
00412 
00413         return 0;
00414 }
00415 
00416 #else /* USE_INTERNAL_API */
00417 
00418 /*
00419  * An implementation based on internal code as known from the
00420  * OpenDarwin CVS.
00421  *
00422  * This code doesn't need much memory management because it uses
00423  * functions that operate on the raw memory directly.
00424  *
00425  * The push routine here is faster and more compatible with HFS+ than
00426  * the other implementation above.  The pull routine is only faster
00427  * for some strings, slightly slower for others.  The pull routine
00428  * looses because it has to iterate over the data twice, once to
00429  * decode UTF-8 and than to do the character composition required by
00430  * Windows.
00431  */
00432 static size_t macosxfs_encoding_pull(
00433         void *cd,                               /* Encoder handle */
00434         char **inbuf, size_t *inbytesleft,      /* Script string */
00435         char **outbuf, size_t *outbytesleft)    /* UTF-16-LE string */
00436 {
00437         static const int script_code = kCFStringEncodingUTF8;
00438         UInt32 srcCharsUsed = 0;
00439         UInt32 dstCharsUsed = 0;
00440         UInt32 result;
00441         uint32_t dstDecomposedUsed = 0;
00442         uint32_t dstPrecomposedUsed = 0;
00443 
00444         (void) cd; /* UNUSED */
00445 
00446         if (0 == *inbytesleft) {
00447                 return 0;
00448         }
00449 
00450         result = CFStringEncodingBytesToUnicode(
00451                 script_code, kCFStringEncodingComposeCombinings,
00452                 *inbuf, *inbytesleft, &srcCharsUsed,
00453                 (UniChar*)*outbuf, *outbytesleft, &dstCharsUsed);
00454 
00455         switch(result) {
00456         case kCFStringEncodingConversionSuccess:
00457                 if (*inbytesleft == srcCharsUsed)
00458                         break;
00459                 else
00460                         ; /*fall through*/
00461         case kCFStringEncodingInsufficientOutputBufferLength:
00462                 debug_out("String conversion: "
00463                           "Output buffer too small\n");
00464                 hexdump("UTF8->UTF16LE (new) input",
00465                         *inbuf, *inbytesleft);
00466                 errno = E2BIG;
00467                 return -1;
00468         case kCFStringEncodingInvalidInputStream:
00469                 /*
00470                  * HACK: smbd/mangle_hash2.c:is_legal_name() expects
00471                  * errors here.  That function will always pass 2
00472                  * characters.  smbd/open.c:check_for_pipe() cuts a
00473                  * patchname to 10 characters blindly.  Suppress the
00474                  * debug output in those cases.
00475                  */
00476                 if(2 != *inbytesleft && 10 != *inbytesleft) {
00477                         debug_out("String conversion: "
00478                                   "Invalid input sequence\n");
00479                         hexdump("UTF8->UTF16LE (new) input",
00480                                 *inbuf, *inbytesleft);
00481                 }
00482                 errno = EILSEQ;
00483                 return -1;
00484         case kCFStringEncodingConverterUnavailable:
00485                 debug_out("String conversion: "
00486                           "Unknown encoding\n");
00487                 hexdump("UTF8->UTF16LE (new) input",
00488                         *inbuf, *inbytesleft);
00489                 errno = EINVAL;
00490                 return -1;
00491         }
00492 
00493         /*
00494          * It doesn't look like CFStringEncodingBytesToUnicode() can
00495          * produce precomposed characters (flags=ComposeCombinings
00496          * doesn't do it), so we need another pass over the data here.
00497          * We can do this in-place, as the string can only get
00498          * shorter.
00499          *
00500          * (Actually in theory there should be an internal
00501          * decomposition and reordering before the actual composition
00502          * step.  But we should be able to rely on that we always get
00503          * fully decomposed strings for input, so this can't create
00504          * problems in reality.)
00505          */
00506         CFUniCharPrecompose(
00507                 (const UTF16Char *)*outbuf, dstCharsUsed, &dstDecomposedUsed,
00508                 (UTF16Char *)*outbuf, dstCharsUsed, &dstPrecomposedUsed);
00509 
00510         native_to_le(*outbuf, dstPrecomposedUsed*2);
00511 
00512         *inbuf += srcCharsUsed;
00513         *inbytesleft -= srcCharsUsed;
00514         *outbuf += dstPrecomposedUsed*2;
00515         *outbytesleft -= dstPrecomposedUsed*2;
00516 
00517         return 0;
00518 }
00519 
00520 static size_t macosxfs_encoding_push(
00521         void *cd,                               /* Encoder handle */
00522         char **inbuf, size_t *inbytesleft,      /* UTF-16-LE string */
00523         char **outbuf, size_t *outbytesleft)    /* Script string */
00524 {
00525         static const int script_code = kCFStringEncodingUTF8;
00526         static UniChar *buffer = NULL;
00527         static size_t buflen = 0;
00528         UInt32 srcCharsUsed=0, dstCharsUsed=0, result;
00529 
00530         (void) cd; /* UNUSED */
00531 
00532         if (0 == *inbytesleft) {
00533                 return 0;
00534         }
00535 
00536         buffer = set_ucbuffer_with_le(
00537                 buffer, &buflen, *inbuf, *inbytesleft);
00538 
00539         result = CFStringEncodingUnicodeToBytes(
00540                 script_code, kCFStringEncodingUseHFSPlusCanonical,
00541                 buffer, *inbytesleft/2, &srcCharsUsed,
00542                 *outbuf, *outbytesleft, &dstCharsUsed);
00543 
00544         switch(result) {
00545         case kCFStringEncodingConversionSuccess:
00546                 if (*inbytesleft/2 == srcCharsUsed)
00547                         break;
00548                 else
00549                         ; /*fall through*/
00550         case kCFStringEncodingInsufficientOutputBufferLength:
00551                 debug_out("String conversion: "
00552                           "Output buffer too small\n");
00553                 hexdump("UTF16LE->UTF8 (new) input",
00554                         *inbuf, *inbytesleft);
00555                 errno = E2BIG;
00556                 return -1;
00557         case kCFStringEncodingInvalidInputStream:
00558                 /*
00559                  * HACK: smbd/open.c:check_for_pipe():is_legal_name()
00560                  * cuts a pathname to 10 characters blindly.  Suppress
00561                  * the debug output in those cases.
00562                  */
00563                 if(10 != *inbytesleft) {
00564                         debug_out("String conversion: "
00565                                   "Invalid input sequence\n");
00566                         hexdump("UTF16LE->UTF8 (new) input",
00567                                 *inbuf, *inbytesleft);
00568                 }
00569                 errno = EILSEQ;
00570                 return -1;
00571         case kCFStringEncodingConverterUnavailable:
00572                 debug_out("String conversion: "
00573                           "Unknown encoding\n");
00574                 hexdump("UTF16LE->UTF8 (new) input",
00575                         *inbuf, *inbytesleft);
00576                 errno = EINVAL;
00577                 return -1;
00578         }
00579 
00580         *inbuf += srcCharsUsed*2;
00581         *inbytesleft -= srcCharsUsed*2;
00582         *outbuf += dstCharsUsed;
00583         *outbytesleft -= dstCharsUsed;
00584 
00585         return 0;
00586 }
00587 
00588 #endif /* USE_INTERNAL_API */
00589 
00590 /*
00591  * For initialization, actually install the encoding as "macosxfs".
00592  */
00593 static struct charset_functions macosxfs_encoding_functions = {
00594         "MACOSXFS", macosxfs_encoding_pull, macosxfs_encoding_push
00595 };
00596 
00597 NTSTATUS charset_macosxfs_init(void)
00598 {
00599         return smb_register_charset(&macosxfs_encoding_functions);
00600 }
00601 
00602 /* eof */

Sambaに対してSat Aug 29 21:23:06 2009に生成されました。  doxygen 1.4.7