Samba: lib/charcnv.c ソースファイル

00001 /* 
00002    Unix SMB/CIFS implementation.
00003    Character set conversion Extensions
00004    Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
00005    Copyright (C) Andrew Tridgell 2001
00006    Copyright (C) Simo Sorce 2001
00007    Copyright (C) Martin Pool 2003
00008    
00009    This program is free software; you can redistribute it and/or modify
00010    it under the terms of the GNU General Public License as published by
00011    the Free Software Foundation; either version 2 of the License, or
00012    (at your option) any later version.
00013    
00014    This program is distributed in the hope that it will be useful,
00015    but WITHOUT ANY WARRANTY; without even the implied warranty of
00016    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017    GNU General Public License for more details.
00018    
00019    You should have received a copy of the GNU General Public License
00020    along with this program; if not, write to the Free Software
00021    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00022 
00023 */
00024 #include "includes.h"
00025 
00026 /* We can parameterize this if someone complains.... JRA. */
00027 
00028 char lp_failed_convert_char(void)
00029 {
00030         return '_';
00031 }
00032 
00033 /**
00034  * @file
00035  *
00036  * @brief Character-set conversion routines built on our iconv.
00037  * 
00038  * @note Samba's internal character set (at least in the 3.0 series)
00039  * is always the same as the one for the Unix filesystem.  It is
00040  * <b>not</b> necessarily UTF-8 and may be different on machines that
00041  * need i18n filenames to be compatible with Unix software.  It does
00042  * have to be a superset of ASCII.  All multibyte sequences must start
00043  * with a byte with the high bit set.
00044  *
00045  * @sa lib/iconv.c
00046  */
00047 
00048 
00049 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
00050 static BOOL conv_silent; /* Should we do a debug if the conversion fails ? */
00051 
00052 /**
00053  * Return the name of a charset to give to iconv().
00054  **/
00055 static const char *charset_name(charset_t ch)
00056 {
00057         const char *ret = NULL;
00058 
00059         if (ch == CH_UTF16LE) ret = "UTF-16LE";
00060         else if (ch == CH_UTF16BE) ret = "UTF-16BE";
00061         else if (ch == CH_UNIX) ret = lp_unix_charset();
00062         else if (ch == CH_DOS) ret = lp_dos_charset();
00063         else if (ch == CH_DISPLAY) ret = lp_display_charset();
00064         else if (ch == CH_UTF8) ret = "UTF8";
00065 
00066 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
00067         if (ret && !strcmp(ret, "LOCALE")) {
00068                 const char *ln = NULL;
00069 
00070 #ifdef HAVE_SETLOCALE
00071                 setlocale(LC_ALL, "");
00072 #endif
00073                 ln = nl_langinfo(CODESET);
00074                 if (ln) {
00075                         /* Check whether the charset name is supported
00076                            by iconv */
00077                         smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
00078                         if (handle == (smb_iconv_t) -1) {
00079                                 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
00080                                 ln = NULL;
00081                         } else {
00082                                 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
00083                                 smb_iconv_close(handle);
00084                         }
00085                 }
00086                 ret = ln;
00087         }
00088 #endif
00089 
00090         if (!ret || !*ret) ret = "ASCII";
00091         return ret;
00092 }
00093 
00094 void lazy_initialize_conv(void)
00095 {
00096         static int initialized = False;
00097 
00098         if (!initialized) {
00099                 initialized = True;
00100                 load_case_tables();
00101                 init_iconv();
00102         }
00103 }
00104 
00105 /**
00106  * Destroy global objects allocated by init_iconv()
00107  **/
00108 void gfree_charcnv(void)
00109 {
00110         int c1, c2;
00111 
00112         for (c1=0;c1<NUM_CHARSETS;c1++) {
00113                 for (c2=0;c2<NUM_CHARSETS;c2++) {
00114                         if ( conv_handles[c1][c2] ) {
00115                                 smb_iconv_close( conv_handles[c1][c2] );
00116                                 conv_handles[c1][c2] = 0;
00117                         }
00118                 }
00119         }
00120 }
00121 
00122 /**
00123  * Initialize iconv conversion descriptors.
00124  *
00125  * This is called the first time it is needed, and also called again
00126  * every time the configuration is reloaded, because the charset or
00127  * codepage might have changed.
00128  **/
00129 void init_iconv(void)
00130 {
00131         int c1, c2;
00132         BOOL did_reload = False;
00133 
00134         /* so that charset_name() works we need to get the UNIX<->UCS2 going
00135            first */
00136         if (!conv_handles[CH_UNIX][CH_UTF16LE])
00137                 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
00138 
00139         if (!conv_handles[CH_UTF16LE][CH_UNIX])
00140                 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
00141 
00142         for (c1=0;c1<NUM_CHARSETS;c1++) {
00143                 for (c2=0;c2<NUM_CHARSETS;c2++) {
00144                         const char *n1 = charset_name((charset_t)c1);
00145                         const char *n2 = charset_name((charset_t)c2);
00146                         if (conv_handles[c1][c2] &&
00147                             strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
00148                             strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
00149                                 continue;
00150 
00151                         did_reload = True;
00152 
00153                         if (conv_handles[c1][c2])
00154                                 smb_iconv_close(conv_handles[c1][c2]);
00155 
00156                         conv_handles[c1][c2] = smb_iconv_open(n2,n1);
00157                         if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
00158                                 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
00159                                          charset_name((charset_t)c1), charset_name((charset_t)c2)));
00160                                 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
00161                                         n1 = "ASCII";
00162                                 }
00163                                 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
00164                                         n2 = "ASCII";
00165                                 }
00166                                 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
00167                                         n1, n2 ));
00168                                 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
00169                                 if (!conv_handles[c1][c2]) {
00170                                         DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
00171                                         smb_panic("init_iconv: conv_handle initialization failed.");
00172                                 }
00173                         }
00174                 }
00175         }
00176 
00177         if (did_reload) {
00178                 /* XXX: Does this really get called every time the dos
00179                  * codepage changes? */
00180                 /* XXX: Is the did_reload test too strict? */
00181                 conv_silent = True;
00182                 init_doschar_table();
00183                 init_valid_table();
00184                 conv_silent = False;
00185         }
00186 }
00187 
00188 /**
00189  * Convert string from one encoding to another, making error checking etc
00190  * Slow path version - uses (slow) iconv.
00191  *
00192  * @param src pointer to source string (multibyte or singlebyte)
00193  * @param srclen length of the source string in bytes
00194  * @param dest pointer to destination string (multibyte or singlebyte)
00195  * @param destlen maximal length allowed for string
00196  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
00197  * @returns the number of bytes occupied in the destination
00198  *
00199  * Ensure the srclen contains the terminating zero.
00200  *
00201  **/
00202 
00203 static size_t convert_string_internal(charset_t from, charset_t to,
00204                       void const *src, size_t srclen, 
00205                       void *dest, size_t destlen, BOOL allow_bad_conv)
00206 {
00207         size_t i_len, o_len;
00208         size_t retval;
00209         const char* inbuf = (const char*)src;
00210         char* outbuf = (char*)dest;
00211         smb_iconv_t descriptor;
00212 
00213         lazy_initialize_conv();
00214 
00215         descriptor = conv_handles[from][to];
00216 
00217         if (srclen == (size_t)-1) {
00218                 if (from == CH_UTF16LE || from == CH_UTF16BE) {
00219                         srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
00220                 } else {
00221                         srclen = strlen((const char *)src)+1;
00222                 }
00223         }
00224 
00225 
00226         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
00227                 if (!conv_silent)
00228                         DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
00229                 return (size_t)-1;
00230         }
00231 
00232         i_len=srclen;
00233         o_len=destlen;
00234 
00235  again:
00236 
00237         retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
00238         if(retval==(size_t)-1) {
00239                 const char *reason="unknown error";
00240                 switch(errno) {
00241                         case EINVAL:
00242                                 reason="Incomplete multibyte sequence";
00243                                 if (!conv_silent)
00244                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
00245                                 if (allow_bad_conv)
00246                                         goto use_as_is;
00247                                 break;
00248                         case E2BIG:
00249                                 reason="No more room"; 
00250                                 if (!conv_silent) {
00251                                         if (from == CH_UNIX) {
00252                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
00253                                                         charset_name(from), charset_name(to),
00254                                                         (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
00255                                         } else {
00256                                                 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
00257                                                         charset_name(from), charset_name(to),
00258                                                         (unsigned int)srclen, (unsigned int)destlen));
00259                                         }
00260                                 }
00261                                 break;
00262                         case EILSEQ:
00263                                 reason="Illegal multibyte sequence";
00264                                 if (!conv_silent)
00265                                         DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
00266                                 if (allow_bad_conv)
00267                                         goto use_as_is;
00268                                 break;
00269                         default:
00270                                 if (!conv_silent)
00271                                         DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
00272                                 break;
00273                 }
00274                 /* smb_panic(reason); */
00275         }
00276         return destlen-o_len;
00277 
00278  use_as_is:
00279 
00280         /* 
00281          * Conversion not supported. This is actually an error, but there are so
00282          * many misconfigured iconv systems and smb.conf's out there we can't just
00283          * fail. Do a very bad conversion instead.... JRA.
00284          */
00285 
00286         {
00287                 if (o_len == 0 || i_len == 0)
00288                         return destlen - o_len;
00289 
00290                 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
00291                                 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
00292                         /* Can't convert from utf16 any endian to multibyte.
00293                            Replace with the default fail char.
00294                         */
00295                         if (i_len < 2)
00296                                 return destlen - o_len;
00297                         if (i_len >= 2) {
00298                                 *outbuf = lp_failed_convert_char();
00299 
00300                                 outbuf++;
00301                                 o_len--;
00302 
00303                                 inbuf += 2;
00304                                 i_len -= 2;
00305                         }
00306 
00307                         if (o_len == 0 || i_len == 0)
00308                                 return destlen - o_len;
00309 
00310                         /* Keep trying with the next char... */
00311                         goto again;
00312 
00313                 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
00314                         /* Can't convert to UTF16LE - just widen by adding the
00315                            default fail char then zero.
00316                         */
00317                         if (o_len < 2)
00318                                 return destlen - o_len;
00319 
00320                         outbuf[0] = lp_failed_convert_char();
00321                         outbuf[1] = '\0';
00322 
00323                         inbuf++;
00324                         i_len--;
00325 
00326                         outbuf += 2;
00327                         o_len -= 2;
00328 
00329                         if (o_len == 0 || i_len == 0)
00330                                 return destlen - o_len;
00331 
00332                         /* Keep trying with the next char... */
00333                         goto again;
00334 
00335                 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
00336                                 to != CH_UTF16LE && to != CH_UTF16BE) {
00337                         /* Failed multibyte to multibyte. Just copy the default fail char and
00338                                 try again. */
00339                         outbuf[0] = lp_failed_convert_char();
00340 
00341                         inbuf++;
00342                         i_len--;
00343 
00344                         outbuf++;
00345                         o_len--;
00346 
00347                         if (o_len == 0 || i_len == 0)
00348                                 return destlen - o_len;
00349 
00350                         /* Keep trying with the next char... */
00351                         goto again;
00352 
00353                 } else {
00354                         /* Keep compiler happy.... */
00355                         return destlen - o_len;
00356                 }
00357         }
00358 }
00359 
00360 /**
00361  * Convert string from one encoding to another, making error checking etc
00362  * Fast path version - handles ASCII first.
00363  *
00364  * @param src pointer to source string (multibyte or singlebyte)
00365  * @param srclen length of the source string in bytes, or -1 for nul terminated.
00366  * @param dest pointer to destination string (multibyte or singlebyte)
00367  * @param destlen maximal length allowed for string - *NEVER* -1.
00368  * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
00369  * @returns the number of bytes occupied in the destination
00370  *
00371  * Ensure the srclen contains the terminating zero.
00372  *
00373  * This function has been hand-tuned to provide a fast path.
00374  * Don't change unless you really know what you are doing. JRA.
00375  **/
00376 
00377 size_t convert_string(charset_t from, charset_t to,
00378                       void const *src, size_t srclen, 
00379                       void *dest, size_t destlen, BOOL allow_bad_conv)
00380 {
00381         /*
00382          * NB. We deliberately don't do a strlen here if srclen == -1.
00383          * This is very expensive over millions of calls and is taken
00384          * care of in the slow path in convert_string_internal. JRA.
00385          */
00386 
00387 #ifdef DEVELOPER
00388         SMB_ASSERT(destlen != (size_t)-1);
00389 #endif
00390 
00391         if (srclen == 0)
00392                 return 0;
00393 
00394         if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
00395                 const unsigned char *p = (const unsigned char *)src;
00396                 unsigned char *q = (unsigned char *)dest;
00397                 size_t slen = srclen;
00398                 size_t dlen = destlen;
00399                 unsigned char lastp = '\0';
00400                 size_t retval = 0;
00401 
00402                 /* If all characters are ascii, fast path here. */
00403                 while (slen && dlen) {
00404                         if ((lastp = *p) <= 0x7f) {
00405                                 *q++ = *p++;
00406                                 if (slen != (size_t)-1) {
00407                                         slen--;
00408                                 }
00409                                 dlen--;
00410                                 retval++;
00411                                 if (!lastp)
00412                                         break;
00413                         } else {
00414 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
00415                                 goto general_case;
00416 #else
00417                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
00418 #endif
00419                         }
00420                 }
00421                 if (!dlen) {
00422                         /* Even if we fast path we should note if we ran out of room. */
00423                         if (((slen != (size_t)-1) && slen) ||
00424                                         ((slen == (size_t)-1) && lastp)) {
00425                                 errno = E2BIG;
00426                         }
00427                 }
00428                 return retval;
00429         } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
00430                 const unsigned char *p = (const unsigned char *)src;
00431                 unsigned char *q = (unsigned char *)dest;
00432                 size_t retval = 0;
00433                 size_t slen = srclen;
00434                 size_t dlen = destlen;
00435                 unsigned char lastp = '\0';
00436 
00437                 /* If all characters are ascii, fast path here. */
00438                 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
00439                         if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
00440                                 *q++ = *p;
00441                                 if (slen != (size_t)-1) {
00442                                         slen -= 2;
00443                                 }
00444                                 p += 2;
00445                                 dlen--;
00446                                 retval++;
00447                                 if (!lastp)
00448                                         break;
00449                         } else {
00450 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
00451                                 goto general_case;
00452 #else
00453                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
00454 #endif
00455                         }
00456                 }
00457                 if (!dlen) {
00458                         /* Even if we fast path we should note if we ran out of room. */
00459                         if (((slen != (size_t)-1) && slen) ||
00460                                         ((slen == (size_t)-1) && lastp)) {
00461                                 errno = E2BIG;
00462                         }
00463                 }
00464                 return retval;
00465         } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
00466                 const unsigned char *p = (const unsigned char *)src;
00467                 unsigned char *q = (unsigned char *)dest;
00468                 size_t retval = 0;
00469                 size_t slen = srclen;
00470                 size_t dlen = destlen;
00471                 unsigned char lastp = '\0';
00472 
00473                 /* If all characters are ascii, fast path here. */
00474                 while (slen && (dlen >= 2)) {
00475                         if ((lastp = *p) <= 0x7F) {
00476                                 *q++ = *p++;
00477                                 *q++ = '\0';
00478                                 if (slen != (size_t)-1) {
00479                                         slen--;
00480                                 }
00481                                 dlen -= 2;
00482                                 retval += 2;
00483                                 if (!lastp)
00484                                         break;
00485                         } else {
00486 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
00487                                 goto general_case;
00488 #else
00489                                 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
00490 #endif
00491                         }
00492                 }
00493                 if (!dlen) {
00494                         /* Even if we fast path we should note if we ran out of room. */
00495                         if (((slen != (size_t)-1) && slen) ||
00496                                         ((slen == (size_t)-1) && lastp)) {
00497                                 errno = E2BIG;
00498                         }
00499                 }
00500                 return retval;
00501         }
00502 
00503 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
00504   general_case:
00505 #endif
00506         return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
00507 }
00508 
00509 /**
00510  * Convert between character sets, allocating a new buffer for the result.
00511  *
00512  * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
00513  * @param srclen length of source buffer.
00514  * @param dest always set at least to NULL
00515  * @note -1 is not accepted for srclen.
00516  *
00517  * @returns Size in bytes of the converted string; or -1 in case of error.
00518  *
00519  * Ensure the srclen contains the terminating zero.
00520  * 
00521  * I hate the goto's in this function. It's embarressing.....
00522  * There has to be a cleaner way to do this. JRA.
00523  **/
00524 
00525 size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
00526                                void const *src, size_t srclen, void *dst, BOOL allow_bad_conv)
00527 {
00528         size_t i_len, o_len, destlen = (srclen * 3) / 2;
00529         size_t retval;
00530         const char *inbuf = (const char *)src;
00531         char *outbuf = NULL, *ob = NULL;
00532         smb_iconv_t descriptor;
00533         void **dest = (void **)dst;
00534 
00535         *dest = NULL;
00536 
00537         if (src == NULL || srclen == (size_t)-1)
00538                 return (size_t)-1;
00539         if (srclen == 0)
00540                 return 0;
00541 
00542         lazy_initialize_conv();
00543 
00544         descriptor = conv_handles[from][to];
00545 
00546         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
00547                 if (!conv_silent)
00548                         DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
00549                 return (size_t)-1;
00550         }
00551 
00552   convert:
00553 
00554         /* +2 is for ucs2 null termination. */
00555         if ((destlen*2)+2 < destlen) {
00556                 /* wrapped ! abort. */
00557                 if (!conv_silent)
00558                         DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
00559                 if (!ctx)
00560                         SAFE_FREE(outbuf);
00561                 return (size_t)-1;
00562         } else {
00563                 destlen = destlen * 2;
00564         }
00565 
00566         /* +2 is for ucs2 null termination. */
00567         if (ctx) {
00568                 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
00569         } else {
00570                 ob = (char *)SMB_REALLOC(ob, destlen + 2);
00571         }
00572 
00573         if (!ob) {
00574                 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
00575                 return (size_t)-1;
00576         }
00577         outbuf = ob;
00578         i_len = srclen;
00579         o_len = destlen;
00580 
00581  again:
00582 
00583         retval = smb_iconv(descriptor,
00584                            &inbuf, &i_len,
00585                            &outbuf, &o_len);
00586         if(retval == (size_t)-1)                {
00587                 const char *reason="unknown error";
00588                 switch(errno) {
00589                         case EINVAL:
00590                                 reason="Incomplete multibyte sequence";
00591                                 if (!conv_silent)
00592                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
00593                                 if (allow_bad_conv)
00594                                         goto use_as_is;
00595                                 break;
00596                         case E2BIG:
00597                                 goto convert;           
00598                         case EILSEQ:
00599                                 reason="Illegal multibyte sequence";
00600                                 if (!conv_silent)
00601                                         DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
00602                                 if (allow_bad_conv)
00603                                         goto use_as_is;
00604                                 break;
00605                 }
00606                 if (!conv_silent)
00607                         DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
00608                 /* smb_panic(reason); */
00609                 return (size_t)-1;
00610         }
00611 
00612   out:
00613 
00614         destlen = destlen - o_len;
00615         if (ctx) {
00616                 /* We're shrinking here so we know the +2 is safe from wrap. */
00617                 ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
00618         } else {
00619                 ob = (char *)SMB_REALLOC(ob,destlen + 2);
00620         }
00621 
00622         if (destlen && !ob) {
00623                 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
00624                 return (size_t)-1;
00625         }
00626 
00627         *dest = ob;
00628 
00629         /* Must ucs2 null terminate in the extra space we allocated. */
00630         ob[destlen] = '\0';
00631         ob[destlen+1] = '\0';
00632 
00633         return destlen;
00634 
00635  use_as_is:
00636 
00637         /* 
00638          * Conversion not supported. This is actually an error, but there are so
00639          * many misconfigured iconv systems and smb.conf's out there we can't just
00640          * fail. Do a very bad conversion instead.... JRA.
00641          */
00642 
00643         {
00644                 if (o_len == 0 || i_len == 0)
00645                         goto out;
00646 
00647                 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
00648                                 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
00649                         /* Can't convert from utf16 any endian to multibyte.
00650                            Replace with the default fail char.
00651                         */
00652 
00653                         if (i_len < 2)
00654                                 goto out;
00655 
00656                         if (i_len >= 2) {
00657                                 *outbuf = lp_failed_convert_char();
00658 
00659                                 outbuf++;
00660                                 o_len--;
00661 
00662                                 inbuf += 2;
00663                                 i_len -= 2;
00664                         }
00665 
00666                         if (o_len == 0 || i_len == 0)
00667                                 goto out;
00668 
00669                         /* Keep trying with the next char... */
00670                         goto again;
00671 
00672                 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
00673                         /* Can't convert to UTF16LE - just widen by adding the
00674                            default fail char then zero.
00675                         */
00676                         if (o_len < 2)
00677                                 goto out;
00678 
00679                         outbuf[0] = lp_failed_convert_char();
00680                         outbuf[1] = '\0';
00681 
00682                         inbuf++;
00683                         i_len--;
00684 
00685                         outbuf += 2;
00686                         o_len -= 2;
00687 
00688                         if (o_len == 0 || i_len == 0)
00689                                 goto out;
00690 
00691                         /* Keep trying with the next char... */
00692                         goto again;
00693 
00694                 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
00695                                 to != CH_UTF16LE && to != CH_UTF16BE) {
00696                         /* Failed multibyte to multibyte. Just copy the default fail char and
00697                            try again. */
00698                         outbuf[0] = lp_failed_convert_char();
00699 
00700                         inbuf++;
00701                         i_len--;
00702 
00703                         outbuf++;
00704                         o_len--;
00705 
00706                         if (o_len == 0 || i_len == 0)
00707                                 goto out;
00708 
00709                         /* Keep trying with the next char... */
00710                         goto again;
00711 
00712                 } else {
00713                         /* Keep compiler happy.... */
00714                         goto out;
00715                 }
00716         }
00717 }
00718 
00719 /**
00720  * Convert between character sets, allocating a new buffer using talloc for the result.
00721  *
00722  * @param srclen length of source buffer.
00723  * @param dest always set at least to NULL 
00724  * @note -1 is not accepted for srclen.
00725  *
00726  * @returns Size in bytes of the converted string; or -1 in case of error.
00727  **/
00728 size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
00729                              void const *src, size_t srclen, void *dst,
00730                              BOOL allow_bad_conv)
00731 {
00732         void **dest = (void **)dst;
00733         size_t dest_len;
00734 
00735         *dest = NULL;
00736         dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest, allow_bad_conv);
00737         if (dest_len == (size_t)-1)
00738                 return (size_t)-1;
00739         if (*dest == NULL)
00740                 return (size_t)-1;
00741         return dest_len;
00742 }
00743 
00744 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
00745 {
00746         size_t size;
00747         smb_ucs2_t *buffer;
00748         
00749         size = push_ucs2_allocate(&buffer, src);
00750         if (size == (size_t)-1) {
00751                 smb_panic("failed to create UCS2 buffer");
00752         }
00753         if (!strupper_w(buffer) && (dest == src)) {
00754                 free(buffer);
00755                 return srclen;
00756         }
00757         
00758         size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
00759         free(buffer);
00760         return size;
00761 }
00762 
00763 /**
00764  strdup() a unix string to upper case.
00765  Max size is pstring.
00766 **/
00767 
00768 char *strdup_upper(const char *s)
00769 {
00770         pstring out_buffer;
00771         const unsigned char *p = (const unsigned char *)s;
00772         unsigned char *q = (unsigned char *)out_buffer;
00773 
00774         /* this is quite a common operation, so we want it to be
00775            fast. We optimise for the ascii case, knowing that all our
00776            supported multi-byte character sets are ascii-compatible
00777            (ie. they match for the first 128 chars) */
00778 
00779         while (1) {
00780                 if (*p & 0x80)
00781                         break;
00782                 *q++ = toupper_ascii(*p);
00783                 if (!*p)
00784                         break;
00785                 p++;
00786                 if (p - ( const unsigned char *)s >= sizeof(pstring))
00787                         break;
00788         }
00789 
00790         if (*p) {
00791                 /* MB case. */
00792                 size_t size;
00793                 wpstring buffer;
00794                 size = convert_string(CH_UNIX, CH_UTF16LE, s, -1, buffer, sizeof(buffer), True);
00795                 if (size == (size_t)-1) {
00796                         return NULL;
00797                 }
00798 
00799                 strupper_w(buffer);
00800         
00801                 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer), True);
00802                 if (size == (size_t)-1) {
00803                         return NULL;
00804                 }
00805         }
00806 
00807         return SMB_STRDUP(out_buffer);
00808 }
00809 
00810 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
00811 {
00812         size_t size;
00813         smb_ucs2_t *buffer = NULL;
00814         
00815         size = convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
00816                                        (void **)(void *)&buffer, True);
00817         if (size == (size_t)-1 || !buffer) {
00818                 smb_panic("failed to create UCS2 buffer");
00819         }
00820         if (!strlower_w(buffer) && (dest == src)) {
00821                 SAFE_FREE(buffer);
00822                 return srclen;
00823         }
00824         size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
00825         SAFE_FREE(buffer);
00826         return size;
00827 }
00828 
00829 /**
00830  strdup() a unix string to lower case.
00831 **/
00832 
00833 char *strdup_lower(const char *s)
00834 {
00835         size_t size;
00836         smb_ucs2_t *buffer = NULL;
00837         char *out_buffer;
00838         
00839         size = push_ucs2_allocate(&buffer, s);
00840         if (size == -1 || !buffer) {
00841                 return NULL;
00842         }
00843 
00844         strlower_w(buffer);
00845         
00846         size = pull_ucs2_allocate(&out_buffer, buffer);
00847         SAFE_FREE(buffer);
00848 
00849         if (size == (size_t)-1) {
00850                 return NULL;
00851         }
00852         
00853         return out_buffer;
00854 }
00855 
00856 static size_t ucs2_align(const void *base_ptr, const void *p, int flags)
00857 {
00858         if (flags & (STR_NOALIGN|STR_ASCII))
00859                 return 0;
00860         return PTR_DIFF(p, base_ptr) & 1;
00861 }
00862 
00863 
00864 /**
00865  * Copy a string from a char* unix src to a dos codepage string destination.
00866  *
00867  * @return the number of bytes occupied by the string in the destination.
00868  *
00869  * @param flags can include
00870  * <dl>
00871  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
00872  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
00873  * </dl>
00874  *
00875  * @param dest_len the maximum length in bytes allowed in the
00876  * destination.  If @p dest_len is -1 then no maximum is used.
00877  **/
00878 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
00879 {
00880         size_t src_len = strlen(src);
00881         pstring tmpbuf;
00882         size_t ret;
00883 
00884         /* No longer allow a length of -1 */
00885         if (dest_len == (size_t)-1)
00886                 smb_panic("push_ascii - dest_len == -1");
00887 
00888         if (flags & STR_UPPER) {
00889                 pstrcpy(tmpbuf, src);
00890                 strupper_m(tmpbuf);
00891                 src = tmpbuf;
00892         }
00893 
00894         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
00895                 src_len++;
00896 
00897         ret =convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
00898         if (ret == (size_t)-1 &&
00899                         (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
00900                         && dest_len > 0) {
00901                 ((char *)dest)[0] = '\0';
00902         }
00903         return ret;
00904 }
00905 
00906 size_t push_ascii_fstring(void *dest, const char *src)
00907 {
00908         return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
00909 }
00910 
00911 size_t push_ascii_pstring(void *dest, const char *src)
00912 {
00913         return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
00914 }
00915 
00916 /********************************************************************
00917  Push an nstring - ensure null terminated. Written by
00918  moriyama@miraclelinux.com (MORIYAMA Masayuki).
00919 ********************************************************************/
00920 
00921 size_t push_ascii_nstring(void *dest, const char *src)
00922 {
00923         size_t i, buffer_len, dest_len;
00924         smb_ucs2_t *buffer;
00925 
00926         conv_silent = True;
00927         buffer_len = push_ucs2_allocate(&buffer, src);
00928         if (buffer_len == (size_t)-1) {
00929                 smb_panic("failed to create UCS2 buffer");
00930         }
00931 
00932         /* We're using buffer_len below to count ucs2 characters, not bytes. */
00933         buffer_len /= sizeof(smb_ucs2_t);
00934 
00935         dest_len = 0;
00936         for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
00937                 unsigned char mb[10];
00938                 /* Convert one smb_ucs2_t character at a time. */
00939                 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
00940                 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
00941                         memcpy((char *)dest + dest_len, mb, mb_len);
00942                         dest_len += mb_len;
00943                 } else {
00944                         errno = E2BIG;
00945                         break;
00946                 }
00947         }
00948         ((char *)dest)[dest_len] = '\0';
00949 
00950         SAFE_FREE(buffer);
00951         conv_silent = False;
00952         return dest_len;
00953 }
00954 
00955 /**
00956  * Copy a string from a dos codepage source to a unix char* destination.
00957  *
00958  * The resulting string in "dest" is always null terminated.
00959  *
00960  * @param flags can have:
00961  * <dl>
00962  * <dt>STR_TERMINATE</dt>
00963  * <dd>STR_TERMINATE means the string in @p src
00964  * is null terminated, and src_len is ignored.</dd>
00965  * </dl>
00966  *
00967  * @param src_len is the length of the source area in bytes.
00968  * @returns the number of bytes occupied by the string in @p src.
00969  **/
00970 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
00971 {
00972         size_t ret;
00973 
00974         if (dest_len == (size_t)-1)
00975                 dest_len = sizeof(pstring);
00976 
00977         if (flags & STR_TERMINATE) {
00978                 if (src_len == (size_t)-1) {
00979                         src_len = strlen((const char *)src) + 1;
00980                 } else {
00981                         size_t len = strnlen((const char *)src, src_len);
00982                         if (len < src_len)
00983                                 len++;
00984                         src_len = len;
00985                 }
00986         }
00987 
00988         ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
00989         if (ret == (size_t)-1) {
00990                 ret = 0;
00991                 dest_len = 0;
00992         }
00993 
00994         if (dest_len && ret) {
00995                 /* Did we already process the terminating zero ? */
00996                 if (dest[MIN(ret-1, dest_len-1)] != 0) {
00997                         dest[MIN(ret, dest_len-1)] = 0;
00998                 }
00999         } else  {
01000                 dest[0] = 0;
01001         }
01002 
01003         return src_len;
01004 }
01005 
01006 size_t pull_ascii_pstring(char *dest, const void *src)
01007 {
01008         return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
01009 }
01010 
01011 size_t pull_ascii_fstring(char *dest, const void *src)
01012 {
01013         return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
01014 }
01015 
01016 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
01017 
01018 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
01019 {
01020         return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
01021 }
01022 
01023 /**
01024  * Copy a string from a char* src to a unicode destination.
01025  *
01026  * @returns the number of bytes occupied by the string in the destination.
01027  *
01028  * @param flags can have:
01029  *
01030  * <dl>
01031  * <dt>STR_TERMINATE <dd>means include the null termination.
01032  * <dt>STR_UPPER     <dd>means uppercase in the destination.
01033  * <dt>STR_NOALIGN   <dd>means don't do alignment.
01034  * </dl>
01035  *
01036  * @param dest_len is the maximum length allowed in the
01037  * destination. If dest_len is -1 then no maxiumum is used.
01038  **/
01039 
01040 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
01041 {
01042         size_t len=0;
01043         size_t src_len;
01044         size_t ret;
01045 
01046         /* treat a pstring as "unlimited" length */
01047         if (dest_len == (size_t)-1)
01048                 dest_len = sizeof(pstring);
01049 
01050         if (flags & STR_TERMINATE)
01051                 src_len = (size_t)-1;
01052         else
01053                 src_len = strlen(src);
01054 
01055         if (ucs2_align(base_ptr, dest, flags)) {
01056                 *(char *)dest = 0;
01057                 dest = (void *)((char *)dest + 1);
01058                 if (dest_len)
01059                         dest_len--;
01060                 len++;
01061         }
01062 
01063         /* ucs2 is always a multiple of 2 bytes */
01064         dest_len &= ~1;
01065 
01066         ret =  convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
01067         if (ret == (size_t)-1) {
01068                 return 0;
01069         }
01070 
01071         len += ret;
01072 
01073         if (flags & STR_UPPER) {
01074                 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
01075                 size_t i;
01076 
01077                 /* We check for i < (ret / 2) below as the dest string isn't null
01078                    terminated if STR_TERMINATE isn't set. */
01079 
01080                 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
01081                         smb_ucs2_t v = toupper_w(dest_ucs2[i]);
01082                         if (v != dest_ucs2[i]) {
01083                                 dest_ucs2[i] = v;
01084                         }
01085                 }
01086         }
01087 
01088         return len;
01089 }
01090 
01091 
01092 /**
01093  * Copy a string from a unix char* src to a UCS2 destination,
01094  * allocating a buffer using talloc().
01095  *
01096  * @param dest always set at least to NULL 
01097  *
01098  * @returns The number of bytes occupied by the string in the destination
01099  *         or -1 in case of error.
01100  **/
01101 size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
01102 {
01103         size_t src_len = strlen(src)+1;
01104 
01105         *dest = NULL;
01106         return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
01107 }
01108 
01109 
01110 /**
01111  * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
01112  *
01113  * @param dest always set at least to NULL 
01114  *
01115  * @returns The number of bytes occupied by the string in the destination
01116  *         or -1 in case of error.
01117  **/
01118 
01119 size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
01120 {
01121         size_t src_len = strlen(src)+1;
01122 
01123         *dest = NULL;
01124         return convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
01125 }
01126 
01127 /**
01128  Copy a string from a char* src to a UTF-8 destination.
01129  Return the number of bytes occupied by the string in the destination
01130  Flags can have:
01131   STR_TERMINATE means include the null termination
01132   STR_UPPER     means uppercase in the destination
01133  dest_len is the maximum length allowed in the destination. If dest_len
01134  is -1 then no maxiumum is used.
01135 **/
01136 
01137 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
01138 {
01139         size_t src_len = strlen(src);
01140         pstring tmpbuf;
01141 
01142         /* treat a pstring as "unlimited" length */
01143         if (dest_len == (size_t)-1)
01144                 dest_len = sizeof(pstring);
01145 
01146         if (flags & STR_UPPER) {
01147                 pstrcpy(tmpbuf, src);
01148                 strupper_m(tmpbuf);
01149                 src = tmpbuf;
01150         }
01151 
01152         if (flags & STR_TERMINATE)
01153                 src_len++;
01154 
01155         return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
01156 }
01157 
01158 size_t push_utf8_fstring(void *dest, const char *src)
01159 {
01160         return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
01161 }
01162 
01163 /**
01164  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
01165  *
01166  * @param dest always set at least to NULL 
01167  *
01168  * @returns The number of bytes occupied by the string in the destination
01169  **/
01170 
01171 size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
01172 {
01173         size_t src_len = strlen(src)+1;
01174 
01175         *dest = NULL;
01176         return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
01177 }
01178 
01179 /**
01180  * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
01181  *
01182  * @param dest always set at least to NULL 
01183  *
01184  * @returns The number of bytes occupied by the string in the destination
01185  **/
01186 
01187 size_t push_utf8_allocate(char **dest, const char *src)
01188 {
01189         size_t src_len = strlen(src)+1;
01190 
01191         *dest = NULL;
01192         return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, True);      
01193 }
01194 
01195 /**
01196  Copy a string from a ucs2 source to a unix char* destination.
01197  Flags can have:
01198   STR_TERMINATE means the string in src is null terminated.
01199   STR_NOALIGN   means don't try to align.
01200  if STR_TERMINATE is set then src_len is ignored if it is -1.
01201  src_len is the length of the source area in bytes
01202  Return the number of bytes occupied by the string in src.
01203  The resulting string in "dest" is always null terminated.
01204 **/
01205 
01206 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
01207 {
01208         size_t ret;
01209 
01210         if (dest_len == (size_t)-1)
01211                 dest_len = sizeof(pstring);
01212 
01213         if (ucs2_align(base_ptr, src, flags)) {
01214                 src = (const void *)((const char *)src + 1);
01215                 if (src_len != (size_t)-1)
01216                         src_len--;
01217         }
01218 
01219         if (flags & STR_TERMINATE) {
01220                 /* src_len -1 is the default for null terminated strings. */
01221                 if (src_len != (size_t)-1) {
01222                         size_t len = strnlen_w((const smb_ucs2_t *)src,
01223                                                 src_len/2);
01224                         if (len < src_len/2)
01225                                 len++;
01226                         src_len = len*2;
01227                 }
01228         }
01229 
01230         /* ucs2 is always a multiple of 2 bytes */
01231         if (src_len != (size_t)-1)
01232                 src_len &= ~1;
01233         
01234         ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
01235         if (ret == (size_t)-1) {
01236                 return 0;
01237         }
01238 
01239         if (src_len == (size_t)-1)
01240                 src_len = ret*2;
01241                 
01242         if (dest_len && ret) {
01243                 /* Did we already process the terminating zero ? */
01244                 if (dest[MIN(ret-1, dest_len-1)] != 0) {
01245                         dest[MIN(ret, dest_len-1)] = 0;
01246                 }
01247         } else {
01248                 dest[0] = 0;
01249         }
01250 
01251         return src_len;
01252 }
01253 
01254 size_t pull_ucs2_pstring(char *dest, const void *src)
01255 {
01256         return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
01257 }
01258 
01259 size_t pull_ucs2_fstring(char *dest, const void *src)
01260 {
01261         return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
01262 }
01263 
01264 /**
01265  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
01266  *
01267  * @param dest always set at least to NULL 
01268  *
01269  * @returns The number of bytes occupied by the string in the destination
01270  **/
01271 
01272 size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
01273 {
01274         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
01275         *dest = NULL;
01276         return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
01277 }
01278 
01279 /**
01280  * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
01281  *
01282  * @param dest always set at least to NULL 
01283  *
01284  * @returns The number of bytes occupied by the string in the destination
01285  **/
01286 
01287 size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src)
01288 {
01289         size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
01290         *dest = NULL;
01291         return convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
01292 }
01293 
01294 /**
01295  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
01296  *
01297  * @param dest always set at least to NULL 
01298  *
01299  * @returns The number of bytes occupied by the string in the destination
01300  **/
01301 
01302 size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
01303 {
01304         size_t src_len = strlen(src)+1;
01305         *dest = NULL;
01306         return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
01307 }
01308 
01309 /**
01310  * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
01311  *
01312  * @param dest always set at least to NULL 
01313  *
01314  * @returns The number of bytes occupied by the string in the destination
01315  **/
01316 
01317 size_t pull_utf8_allocate(char **dest, const char *src)
01318 {
01319         size_t src_len = strlen(src)+1;
01320         *dest = NULL;
01321         return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
01322 }
01323  
01324 /**
01325  * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
01326  *
01327  * @param dest always set at least to NULL 
01328  *
01329  * @returns The number of bytes occupied by the string in the destination
01330  **/
01331 
01332 size_t pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
01333 {
01334         size_t src_len = strlen(src)+1;
01335         *dest = NULL;
01336         return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True);
01337 }
01338 
01339 /**
01340  Copy a string from a char* src to a unicode or ascii
01341  dos codepage destination choosing unicode or ascii based on the 
01342  flags in the SMB buffer starting at base_ptr.
01343  Return the number of bytes occupied by the string in the destination.
01344  flags can have:
01345   STR_TERMINATE means include the null termination.
01346   STR_UPPER     means uppercase in the destination.
01347   STR_ASCII     use ascii even with unicode packet.
01348   STR_NOALIGN   means don't do alignment.
01349  dest_len is the maximum length allowed in the destination. If dest_len
01350  is -1 then no maxiumum is used.
01351 **/
01352 
01353 size_t push_string_fn(const char *function, unsigned int line, const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
01354 {
01355 #ifdef DEVELOPER
01356         /* We really need to zero fill here, not clobber
01357          * region, as we want to ensure that valgrind thinks
01358          * all of the outgoing buffer has been written to
01359          * so a send() or write() won't trap an error.
01360          * JRA.
01361          */
01362 #if 0
01363         if (dest_len != (size_t)-1)
01364                 clobber_region(function, line, dest, dest_len);
01365 #else
01366         if (dest_len != (size_t)-1)
01367                 memset(dest, '\0', dest_len);
01368 #endif
01369 #endif
01370 
01371         if (!(flags & STR_ASCII) && \
01372             ((flags & STR_UNICODE || \
01373               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
01374                 return push_ucs2(base_ptr, dest, src, dest_len, flags);
01375         }
01376         return push_ascii(dest, src, dest_len, flags);
01377 }
01378 
01379 
01380 /**
01381  Copy a string from a unicode or ascii source (depending on
01382  the packet flags) to a char* destination.
01383  Flags can have:
01384   STR_TERMINATE means the string in src is null terminated.
01385   STR_UNICODE   means to force as unicode.
01386   STR_ASCII     use ascii even with unicode packet.
01387   STR_NOALIGN   means don't do alignment.
01388  if STR_TERMINATE is set then src_len is ignored is it is -1
01389  src_len is the length of the source area in bytes.
01390  Return the number of bytes occupied by the string in src.
01391  The resulting string in "dest" is always null terminated.
01392 **/
01393 
01394 size_t pull_string_fn(const char *function, unsigned int line, const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
01395 {
01396 #ifdef DEVELOPER
01397         if (dest_len != (size_t)-1)
01398                 clobber_region(function, line, dest, dest_len);
01399 #endif
01400 
01401         if (!(flags & STR_ASCII) && \
01402             ((flags & STR_UNICODE || \
01403               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
01404                 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
01405         }
01406         return pull_ascii(dest, src, dest_len, src_len, flags);
01407 }
01408 
01409 size_t align_string(const void *base_ptr, const char *p, int flags)
01410 {
01411         if (!(flags & STR_ASCII) && \
01412             ((flags & STR_UNICODE || \
01413               (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
01414                 return ucs2_align(base_ptr, p, flags);
01415         }
01416         return 0;
01417 }
01418 
01419 /*
01420   Return the unicode codepoint for the next multi-byte CH_UNIX character
01421   in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
01422 
01423   Also return the number of bytes consumed (which tells the caller
01424   how many bytes to skip to get to the next CH_UNIX character).
01425 
01426   Return INVALID_CODEPOINT if the next character cannot be converted.
01427 */
01428 
01429 codepoint_t next_codepoint(const char *str, size_t *size)
01430 {
01431         /* It cannot occupy more than 4 bytes in UTF16 format */
01432         uint8_t buf[4];
01433         smb_iconv_t descriptor;
01434         size_t ilen_orig;
01435         size_t ilen;
01436         size_t olen;
01437         char *outbuf;
01438 
01439         if ((str[0] & 0x80) == 0) {
01440                 *size = 1;
01441                 return (codepoint_t)str[0];
01442         }
01443 
01444         /* We assume that no multi-byte character can take
01445            more than 5 bytes. This is OK as we only
01446            support codepoints up to 1M */
01447 
01448         ilen_orig = strnlen(str, 5);
01449         ilen = ilen_orig;
01450 
01451         lazy_initialize_conv();
01452 
01453         descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
01454         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
01455                 *size = 1;
01456                 return INVALID_CODEPOINT;
01457         }
01458 
01459         /* This looks a little strange, but it is needed to cope
01460            with codepoints above 64k which are encoded as per RFC2781. */
01461         olen = 2;
01462         outbuf = (char *)buf;
01463         smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
01464         if (olen == 2) {
01465                 /* We failed to convert to a 2 byte character.
01466                    See if we can convert to a 4 UTF16-LE byte char encoding.
01467                 */
01468                 olen = 4;
01469                 outbuf = (char *)buf;
01470                 smb_iconv(descriptor,  &str, &ilen, &outbuf, &olen);
01471                 if (olen == 4) {
01472                         /* We didn't convert any bytes */
01473                         *size = 1;
01474                         return INVALID_CODEPOINT;
01475                 }
01476                 olen = 4 - olen;
01477         } else {
01478                 olen = 2 - olen;
01479         }
01480 
01481         *size = ilen_orig - ilen;
01482 
01483         if (olen == 2) {
01484                 /* 2 byte, UTF16-LE encoded value. */
01485                 return (codepoint_t)SVAL(buf, 0);
01486         }
01487         if (olen == 4) {
01488                 /* Decode a 4 byte UTF16-LE character manually.
01489                    See RFC2871 for the encoding machanism.
01490                 */
01491                 codepoint_t w1 = SVAL(buf,0) & ~0xD800;
01492                 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
01493 
01494                 return (codepoint_t)0x10000 +
01495                                 (w1 << 10) + w2;
01496         }
01497 
01498         /* no other length is valid */
01499         return INVALID_CODEPOINT;
01500 }