00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "includes.h"
00023
00024
00025
00026
00027
00028
00029 #undef strcasecmp
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054 static_decl_charset;
00055
00056 static size_t ascii_pull(void *,const char **, size_t *, char **, size_t *);
00057 static size_t ascii_push(void *,const char **, size_t *, char **, size_t *);
00058 static size_t latin1_push(void *,const char **, size_t *, char **, size_t *);
00059 static size_t utf8_pull(void *,const char **, size_t *, char **, size_t *);
00060 static size_t utf8_push(void *,const char **, size_t *, char **, size_t *);
00061 static size_t ucs2hex_pull(void *,const char **, size_t *, char **, size_t *);
00062 static size_t ucs2hex_push(void *,const char **, size_t *, char **, size_t *);
00063 static size_t iconv_copy(void *,const char **, size_t *, char **, size_t *);
00064 static size_t iconv_swab (void *,const char **, size_t *, char **, size_t *);
00065
00066 static struct charset_functions builtin_functions[] = {
00067
00068 {"UCS-2LE", iconv_copy, iconv_copy},
00069 {"UTF-16LE", iconv_copy, iconv_copy},
00070 {"UCS-2BE", iconv_swab, iconv_swab},
00071 {"UTF-16BE", iconv_swab, iconv_swab},
00072
00073
00074 {"UTF8", utf8_pull, utf8_push},
00075 {"UTF-8", utf8_pull, utf8_push},
00076 {"ASCII", ascii_pull, ascii_push},
00077 {"646", ascii_pull, ascii_push},
00078 {"ISO-8859-1", ascii_pull, latin1_push},
00079 {"UCS2-HEX", ucs2hex_pull, ucs2hex_push},
00080 {NULL, NULL, NULL}
00081 };
00082
00083 static struct charset_functions *charsets = NULL;
00084
00085 static struct charset_functions *find_charset_functions(const char *name)
00086 {
00087 struct charset_functions *c = charsets;
00088
00089 while(c) {
00090 if (strcasecmp(name, c->name) == 0) {
00091 return c;
00092 }
00093 c = c->next;
00094 }
00095
00096 return NULL;
00097 }
00098
00099 NTSTATUS smb_register_charset(struct charset_functions *funcs)
00100 {
00101 if (!funcs) {
00102 return NT_STATUS_INVALID_PARAMETER;
00103 }
00104
00105 DEBUG(5, ("Attempting to register new charset %s\n", funcs->name));
00106
00107 if (find_charset_functions(funcs->name)) {
00108 DEBUG(0, ("Duplicate charset %s, not registering\n", funcs->name));
00109 return NT_STATUS_OBJECT_NAME_COLLISION;
00110 }
00111
00112 funcs->next = funcs->prev = NULL;
00113 DEBUG(5, ("Registered charset %s\n", funcs->name));
00114 DLIST_ADD(charsets, funcs);
00115 return NT_STATUS_OK;
00116 }
00117
00118 static void lazy_initialize_iconv(void)
00119 {
00120 static BOOL initialized;
00121 int i;
00122
00123 if (!initialized) {
00124 initialized = True;
00125 for(i = 0; builtin_functions[i].name; i++)
00126 smb_register_charset(&builtin_functions[i]);
00127 static_init_charset;
00128 }
00129 }
00130
00131 #ifdef HAVE_NATIVE_ICONV
00132
00133
00134
00135 static size_t sys_iconv(void *cd,
00136 const char **inbuf, size_t *inbytesleft,
00137 char **outbuf, size_t *outbytesleft)
00138 {
00139 size_t ret = iconv((iconv_t)cd,
00140 (char **)inbuf, inbytesleft,
00141 outbuf, outbytesleft);
00142 if (ret == (size_t)-1) {
00143 int saved_errno = errno;
00144 iconv(cd, NULL, NULL, NULL, NULL);
00145 errno = saved_errno;
00146 }
00147 return ret;
00148 }
00149 #endif
00150
00151
00152
00153
00154
00155
00156
00157 size_t smb_iconv(smb_iconv_t cd,
00158 const char **inbuf, size_t *inbytesleft,
00159 char **outbuf, size_t *outbytesleft)
00160 {
00161 char cvtbuf[2048];
00162 char *bufp = cvtbuf;
00163 size_t bufsize;
00164
00165
00166 if (cd->direct) {
00167 return cd->direct(cd->cd_direct,
00168 inbuf, inbytesleft, outbuf, outbytesleft);
00169 }
00170
00171
00172
00173 while (*inbytesleft > 0) {
00174 bufp = cvtbuf;
00175 bufsize = sizeof(cvtbuf);
00176
00177 if (cd->pull(cd->cd_pull,
00178 inbuf, inbytesleft, &bufp, &bufsize) == -1
00179 && errno != E2BIG) return -1;
00180
00181 bufp = cvtbuf;
00182 bufsize = sizeof(cvtbuf) - bufsize;
00183
00184 if (cd->push(cd->cd_push,
00185 (const char **)&bufp, &bufsize,
00186 outbuf, outbytesleft) == -1) return -1;
00187 }
00188
00189 return 0;
00190 }
00191
00192
00193 static BOOL is_utf16(const char *name)
00194 {
00195 return strcasecmp(name, "UCS-2LE") == 0 ||
00196 strcasecmp(name, "UTF-16LE") == 0;
00197 }
00198
00199
00200
00201
00202 smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode)
00203 {
00204 smb_iconv_t ret;
00205 struct charset_functions *from, *to;
00206
00207 lazy_initialize_iconv();
00208 from = charsets;
00209 to = charsets;
00210
00211 ret = SMB_MALLOC_P(struct _smb_iconv_t);
00212 if (!ret) {
00213 errno = ENOMEM;
00214 return (smb_iconv_t)-1;
00215 }
00216 memset(ret, 0, sizeof(struct _smb_iconv_t));
00217
00218 ret->from_name = SMB_STRDUP(fromcode);
00219 ret->to_name = SMB_STRDUP(tocode);
00220
00221
00222 if (strcasecmp(fromcode, tocode) == 0) {
00223 ret->direct = iconv_copy;
00224 return ret;
00225 }
00226
00227
00228 from = find_charset_functions(fromcode);
00229 if(from)ret->pull = from->pull;
00230
00231 to = find_charset_functions(tocode);
00232 if(to)ret->push = to->push;
00233
00234
00235 #ifdef HAVE_NATIVE_ICONV
00236 if (!ret->pull) {
00237 ret->cd_pull = iconv_open("UTF-16LE", fromcode);
00238 if (ret->cd_pull == (iconv_t)-1)
00239 ret->cd_pull = iconv_open("UCS-2LE", fromcode);
00240 if (ret->cd_pull != (iconv_t)-1)
00241 ret->pull = sys_iconv;
00242 }
00243
00244 if (!ret->push) {
00245 ret->cd_push = iconv_open(tocode, "UTF-16LE");
00246 if (ret->cd_push == (iconv_t)-1)
00247 ret->cd_push = iconv_open(tocode, "UCS-2LE");
00248 if (ret->cd_push != (iconv_t)-1)
00249 ret->push = sys_iconv;
00250 }
00251 #endif
00252
00253
00254 if (!ret->pull && NT_STATUS_IS_OK(smb_probe_module("charset", fromcode))) {
00255 if(!(from = find_charset_functions(fromcode)))
00256 DEBUG(0, ("Module %s doesn't provide charset %s!\n", fromcode, fromcode));
00257 else
00258 ret->pull = from->pull;
00259 }
00260
00261 if (!ret->push && NT_STATUS_IS_OK(smb_probe_module("charset", tocode))) {
00262 if(!(to = find_charset_functions(tocode)))
00263 DEBUG(0, ("Module %s doesn't provide charset %s!\n", tocode, tocode));
00264 else
00265 ret->push = to->push;
00266 }
00267
00268 if (!ret->push || !ret->pull) {
00269 SAFE_FREE(ret->from_name);
00270 SAFE_FREE(ret->to_name);
00271 SAFE_FREE(ret);
00272 errno = EINVAL;
00273 return (smb_iconv_t)-1;
00274 }
00275
00276
00277 if (is_utf16(fromcode) && to) {
00278 ret->direct = to->push;
00279 ret->push = ret->pull = NULL;
00280 return ret;
00281 }
00282
00283 if (is_utf16(tocode) && from) {
00284 ret->direct = from->pull;
00285 ret->push = ret->pull = NULL;
00286 return ret;
00287 }
00288
00289
00290 #ifdef HAVE_NATIVE_ICONV
00291 if (is_utf16(fromcode)) {
00292 ret->direct = sys_iconv;
00293 ret->cd_direct = ret->cd_push;
00294 ret->cd_push = NULL;
00295 return ret;
00296 }
00297 if (is_utf16(tocode)) {
00298 ret->direct = sys_iconv;
00299 ret->cd_direct = ret->cd_pull;
00300 ret->cd_pull = NULL;
00301 return ret;
00302 }
00303 #endif
00304
00305 return ret;
00306 }
00307
00308
00309
00310
00311 int smb_iconv_close (smb_iconv_t cd)
00312 {
00313 #ifdef HAVE_NATIVE_ICONV
00314 if (cd->cd_direct) iconv_close((iconv_t)cd->cd_direct);
00315 if (cd->cd_pull) iconv_close((iconv_t)cd->cd_pull);
00316 if (cd->cd_push) iconv_close((iconv_t)cd->cd_push);
00317 #endif
00318
00319 SAFE_FREE(cd->from_name);
00320 SAFE_FREE(cd->to_name);
00321
00322 memset(cd, 0, sizeof(*cd));
00323 SAFE_FREE(cd);
00324 return 0;
00325 }
00326
00327
00328
00329
00330
00331
00332
00333
00334 static size_t ascii_pull(void *cd, const char **inbuf, size_t *inbytesleft,
00335 char **outbuf, size_t *outbytesleft)
00336 {
00337 while (*inbytesleft >= 1 && *outbytesleft >= 2) {
00338 (*outbuf)[0] = (*inbuf)[0];
00339 (*outbuf)[1] = 0;
00340 (*inbytesleft) -= 1;
00341 (*outbytesleft) -= 2;
00342 (*inbuf) += 1;
00343 (*outbuf) += 2;
00344 }
00345
00346 if (*inbytesleft > 0) {
00347 errno = E2BIG;
00348 return -1;
00349 }
00350
00351 return 0;
00352 }
00353
00354 static size_t ascii_push(void *cd, const char **inbuf, size_t *inbytesleft,
00355 char **outbuf, size_t *outbytesleft)
00356 {
00357 int ir_count=0;
00358
00359 while (*inbytesleft >= 2 && *outbytesleft >= 1) {
00360 (*outbuf)[0] = (*inbuf)[0] & 0x7F;
00361 if ((*inbuf)[1]) ir_count++;
00362 (*inbytesleft) -= 2;
00363 (*outbytesleft) -= 1;
00364 (*inbuf) += 2;
00365 (*outbuf) += 1;
00366 }
00367
00368 if (*inbytesleft == 1) {
00369 errno = EINVAL;
00370 return -1;
00371 }
00372
00373 if (*inbytesleft > 1) {
00374 errno = E2BIG;
00375 return -1;
00376 }
00377
00378 return ir_count;
00379 }
00380
00381 static size_t latin1_push(void *cd, const char **inbuf, size_t *inbytesleft,
00382 char **outbuf, size_t *outbytesleft)
00383 {
00384 int ir_count=0;
00385
00386 while (*inbytesleft >= 2 && *outbytesleft >= 1) {
00387 (*outbuf)[0] = (*inbuf)[0];
00388 if ((*inbuf)[1]) ir_count++;
00389 (*inbytesleft) -= 2;
00390 (*outbytesleft) -= 1;
00391 (*inbuf) += 2;
00392 (*outbuf) += 1;
00393 }
00394
00395 if (*inbytesleft == 1) {
00396 errno = EINVAL;
00397 return -1;
00398 }
00399
00400 if (*inbytesleft > 1) {
00401 errno = E2BIG;
00402 return -1;
00403 }
00404
00405 return ir_count;
00406 }
00407
00408 static size_t ucs2hex_pull(void *cd, const char **inbuf, size_t *inbytesleft,
00409 char **outbuf, size_t *outbytesleft)
00410 {
00411 while (*inbytesleft >= 1 && *outbytesleft >= 2) {
00412 unsigned v;
00413
00414 if ((*inbuf)[0] != '@') {
00415
00416 (*outbuf)[0] = (*inbuf)[0];
00417 (*outbuf)[1] = 0;
00418 (*inbytesleft) -= 1;
00419 (*outbytesleft) -= 2;
00420 (*inbuf) += 1;
00421 (*outbuf) += 2;
00422 continue;
00423 }
00424
00425 if (*inbytesleft < 5) {
00426 errno = EINVAL;
00427 return -1;
00428 }
00429
00430 if (sscanf(&(*inbuf)[1], "%04x", &v) != 1) {
00431 errno = EILSEQ;
00432 return -1;
00433 }
00434
00435 (*outbuf)[0] = v&0xff;
00436 (*outbuf)[1] = v>>8;
00437 (*inbytesleft) -= 5;
00438 (*outbytesleft) -= 2;
00439 (*inbuf) += 5;
00440 (*outbuf) += 2;
00441 }
00442
00443 if (*inbytesleft > 0) {
00444 errno = E2BIG;
00445 return -1;
00446 }
00447
00448 return 0;
00449 }
00450
00451 static size_t ucs2hex_push(void *cd, const char **inbuf, size_t *inbytesleft,
00452 char **outbuf, size_t *outbytesleft)
00453 {
00454 while (*inbytesleft >= 2 && *outbytesleft >= 1) {
00455 char buf[6];
00456
00457 if ((*inbuf)[1] == 0 &&
00458 ((*inbuf)[0] & 0x80) == 0 &&
00459 (*inbuf)[0] != '@') {
00460 (*outbuf)[0] = (*inbuf)[0];
00461 (*inbytesleft) -= 2;
00462 (*outbytesleft) -= 1;
00463 (*inbuf) += 2;
00464 (*outbuf) += 1;
00465 continue;
00466 }
00467 if (*outbytesleft < 5) {
00468 errno = E2BIG;
00469 return -1;
00470 }
00471 snprintf(buf, 6, "@%04x", SVAL(*inbuf, 0));
00472 memcpy(*outbuf, buf, 5);
00473 (*inbytesleft) -= 2;
00474 (*outbytesleft) -= 5;
00475 (*inbuf) += 2;
00476 (*outbuf) += 5;
00477 }
00478
00479 if (*inbytesleft == 1) {
00480 errno = EINVAL;
00481 return -1;
00482 }
00483
00484 if (*inbytesleft > 1) {
00485 errno = E2BIG;
00486 return -1;
00487 }
00488
00489 return 0;
00490 }
00491
00492 static size_t iconv_swab(void *cd, const char **inbuf, size_t *inbytesleft,
00493 char **outbuf, size_t *outbytesleft)
00494 {
00495 int n;
00496
00497 n = MIN(*inbytesleft, *outbytesleft);
00498
00499 swab(*inbuf, *outbuf, (n&~1));
00500 if (n&1) {
00501 (*outbuf)[n-1] = 0;
00502 }
00503
00504 (*inbytesleft) -= n;
00505 (*outbytesleft) -= n;
00506 (*inbuf) += n;
00507 (*outbuf) += n;
00508
00509 if (*inbytesleft > 0) {
00510 errno = E2BIG;
00511 return -1;
00512 }
00513
00514 return 0;
00515 }
00516
00517 static size_t iconv_copy(void *cd, const char **inbuf, size_t *inbytesleft,
00518 char **outbuf, size_t *outbytesleft)
00519 {
00520 int n;
00521
00522 n = MIN(*inbytesleft, *outbytesleft);
00523
00524 memmove(*outbuf, *inbuf, n);
00525
00526 (*inbytesleft) -= n;
00527 (*outbytesleft) -= n;
00528 (*inbuf) += n;
00529 (*outbuf) += n;
00530
00531 if (*inbytesleft > 0) {
00532 errno = E2BIG;
00533 return -1;
00534 }
00535
00536 return 0;
00537 }
00538
00539 static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
00540 char **outbuf, size_t *outbytesleft)
00541 {
00542 size_t in_left=*inbytesleft, out_left=*outbytesleft;
00543 const uint8 *c = (const uint8 *)*inbuf;
00544 uint8 *uc = (uint8 *)*outbuf;
00545
00546 while (in_left >= 1 && out_left >= 2) {
00547 unsigned int codepoint;
00548
00549 if ((c[0] & 0x80) == 0) {
00550 uc[0] = c[0];
00551 uc[1] = 0;
00552 c += 1;
00553 in_left -= 1;
00554 out_left -= 2;
00555 uc += 2;
00556 continue;
00557 }
00558
00559 if ((c[0] & 0xe0) == 0xc0) {
00560 if (in_left < 2 ||
00561 (c[1] & 0xc0) != 0x80) {
00562 errno = EILSEQ;
00563 goto error;
00564 }
00565 codepoint = (c[1]&0x3f) | ((c[0]&0x1f)<<6);
00566 if (codepoint < 0x80) {
00567
00568 errno = EILSEQ;
00569 goto error;
00570 }
00571 uc[1] = codepoint >> 8;
00572 uc[0] = codepoint & 0xff;
00573 c += 2;
00574 in_left -= 2;
00575 out_left -= 2;
00576 uc += 2;
00577 continue;
00578 }
00579
00580 if ((c[0] & 0xf0) == 0xe0) {
00581 if (in_left < 3 ||
00582 (c[1] & 0xc0) != 0x80 ||
00583 (c[2] & 0xc0) != 0x80) {
00584 errno = EILSEQ;
00585 goto error;
00586 }
00587 codepoint = (c[2]&0x3f) | ((c[1]&0x3f)<<6) | ((c[0]&0xf)<<12);
00588 if (codepoint < 0x800) {
00589
00590 errno = EILSEQ;
00591 goto error;
00592 }
00593 uc[1] = codepoint >> 8;
00594 uc[0] = codepoint & 0xff;
00595 c += 3;
00596 in_left -= 3;
00597 out_left -= 2;
00598 uc += 2;
00599 continue;
00600 }
00601
00602 if ((c[0] & 0xf8) == 0xf0) {
00603 if (in_left < 4 ||
00604 (c[1] & 0xc0) != 0x80 ||
00605 (c[2] & 0xc0) != 0x80 ||
00606 (c[3] & 0xc0) != 0x80) {
00607 errno = EILSEQ;
00608 goto error;
00609 }
00610 codepoint =
00611 (c[3]&0x3f) |
00612 ((c[2]&0x3f)<<6) |
00613 ((c[1]&0x3f)<<12) |
00614 ((c[0]&0x7)<<18);
00615 if (codepoint < 0x10000 || codepoint > 0x10ffff) {
00616
00617 errno = EILSEQ;
00618 goto error;
00619 }
00620
00621 codepoint -= 0x10000;
00622
00623 if (out_left < 4) {
00624 errno = E2BIG;
00625 goto error;
00626 }
00627
00628 uc[0] = (codepoint>>10) & 0xFF;
00629 uc[1] = (codepoint>>18) | 0xd8;
00630 uc[2] = codepoint & 0xFF;
00631 uc[3] = ((codepoint>>8) & 0x3) | 0xdc;
00632 c += 4;
00633 in_left -= 4;
00634 out_left -= 4;
00635 uc += 4;
00636 continue;
00637 }
00638
00639
00640 errno = EINVAL;
00641 goto error;
00642 }
00643
00644 if (in_left > 0) {
00645 errno = E2BIG;
00646 goto error;
00647 }
00648
00649 *inbytesleft = in_left;
00650 *outbytesleft = out_left;
00651 *inbuf = (char *)c;
00652 *outbuf = (char *)uc;
00653 return 0;
00654
00655 error:
00656 *inbytesleft = in_left;
00657 *outbytesleft = out_left;
00658 *inbuf = (char *)c;
00659 *outbuf = (char *)uc;
00660 return -1;
00661 }
00662
00663 static size_t utf8_push(void *cd, const char **inbuf, size_t *inbytesleft,
00664 char **outbuf, size_t *outbytesleft)
00665 {
00666 size_t in_left=*inbytesleft, out_left=*outbytesleft;
00667 uint8 *c = (uint8 *)*outbuf;
00668 const uint8 *uc = (const uint8 *)*inbuf;
00669
00670 while (in_left >= 2 && out_left >= 1) {
00671 unsigned int codepoint;
00672
00673 if (uc[1] == 0 && !(uc[0] & 0x80)) {
00674
00675 c[0] = uc[0];
00676 in_left -= 2;
00677 out_left -= 1;
00678 uc += 2;
00679 c += 1;
00680 continue;
00681 }
00682
00683 if ((uc[1]&0xf8) == 0) {
00684
00685 if (out_left < 2) {
00686 errno = E2BIG;
00687 goto error;
00688 }
00689 c[0] = 0xc0 | (uc[0]>>6) | (uc[1]<<2);
00690 c[1] = 0x80 | (uc[0] & 0x3f);
00691 in_left -= 2;
00692 out_left -= 2;
00693 uc += 2;
00694 c += 2;
00695 continue;
00696 }
00697
00698 if ((uc[1] & 0xfc) == 0xdc) {
00699
00700 if (in_left < 4) {
00701 errno = EINVAL;
00702 } else {
00703 errno = EILSEQ;
00704 }
00705 goto error;
00706 }
00707
00708 if ((uc[1] & 0xfc) != 0xd8) {
00709 codepoint = uc[0] | (uc[1]<<8);
00710 if (out_left < 3) {
00711 errno = E2BIG;
00712 goto error;
00713 }
00714 c[0] = 0xe0 | (codepoint >> 12);
00715 c[1] = 0x80 | ((codepoint >> 6) & 0x3f);
00716 c[2] = 0x80 | (codepoint & 0x3f);
00717
00718 in_left -= 2;
00719 out_left -= 3;
00720 uc += 2;
00721 c += 3;
00722 continue;
00723 }
00724
00725
00726 if (in_left < 4) {
00727 errno = EINVAL;
00728 goto error;
00729 }
00730 if ((uc[3] & 0xfc) != 0xdc) {
00731 errno = EILSEQ;
00732 goto error;
00733 }
00734 codepoint = 0x10000 + (uc[2] | ((uc[3] & 0x3)<<8) |
00735 (uc[0]<<10) | ((uc[1] & 0x3)<<18));
00736
00737 if (out_left < 4) {
00738 errno = E2BIG;
00739 goto error;
00740 }
00741 c[0] = 0xf0 | (codepoint >> 18);
00742 c[1] = 0x80 | ((codepoint >> 12) & 0x3f);
00743 c[2] = 0x80 | ((codepoint >> 6) & 0x3f);
00744 c[3] = 0x80 | (codepoint & 0x3f);
00745
00746 in_left -= 4;
00747 out_left -= 4;
00748 uc += 4;
00749 c += 4;
00750 }
00751
00752 if (in_left == 1) {
00753 errno = EINVAL;
00754 goto error;
00755 }
00756
00757 if (in_left > 1) {
00758 errno = E2BIG;
00759 goto error;
00760 }
00761
00762 *inbytesleft = in_left;
00763 *outbytesleft = out_left;
00764 *inbuf = (char *)uc;
00765 *outbuf = (char *)c;
00766
00767 return 0;
00768
00769 error:
00770 *inbytesleft = in_left;
00771 *outbytesleft = out_left;
00772 *inbuf = (char *)uc;
00773 *outbuf = (char *)c;
00774 return -1;
00775 }
00776