00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #include "Python.h"
00027
00028
00029
00030
00031 #ifdef HAVE_FSTAT
00032 #undef HAVE_FSTAT
00033 #endif
00034
00035
00036
00037
00038 #include "include/config.h"
00039
00040 #ifdef HAVE_FUNCTION_MACRO
00041 #define FUNCTION_MACRO (__FUNCTION__)
00042 #else
00043 #define FUNCTION_MACRO (__FILE__)
00044 #endif
00045
00046 static PyObject * pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list);
00047 static PyObject * pytdbpack_str(char ch,
00048 PyObject *val_iter, PyObject *packed_list,
00049 const char *encoding);
00050 static PyObject * pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list);
00051
00052 static PyObject *pytdbunpack_item(char, char **pbuf, int *plen, PyObject *);
00053
00054 static PyObject *pytdbpack_data(const char *format_str,
00055 PyObject *val_seq,
00056 PyObject *val_list);
00057
00058 static PyObject *
00059 pytdbunpack_string(char **pbuf, int *plen, const char *encoding);
00060
00061 static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf);
00062
00063
00064 static PyObject *pytdbpack_bad_type(char ch,
00065 const char *expected,
00066 PyObject *val_obj);
00067
00068 static const char * pytdbpack_docstring =
00069 "Convert between Python values and Samba binary encodings.\n"
00070 "\n"
00071 "This module is conceptually similar to the standard 'struct' module, but it\n"
00072 "uses both a different binary format and a different description string.\n"
00073 "\n"
00074 "Samba's encoding is based on that used inside DCE-RPC and SMB: a\n"
00075 "little-endian, unpadded, non-self-describing binary format. It is intended\n"
00076 "that these functions be as similar as possible to the routines in Samba's\n"
00077 "tdb/tdbutil module, with appropriate adjustments for Python datatypes.\n"
00078 "\n"
00079 "Python strings are used to specify the format of data to be packed or\n"
00080 "unpacked.\n"
00081 "\n"
00082 "String encodings are implied by the database format: they may be either DOS\n"
00083 "codepage (currently hardcoded to 850), or Unix codepage (currently hardcoded\n"
00084 "to be the same as the default Python encoding).\n"
00085 "\n"
00086 "tdbpack format strings:\n"
00087 "\n"
00088 " 'f': NUL-terminated string in codepage iso8859-1\n"
00089 " \n"
00090 " 'P': same as 'f'\n"
00091 "\n"
00092 " 'F': NUL-terminated string in iso-8859-1\n"
00093 "\n"
00094 " 'd': 4 byte little-endian unsigned number\n"
00095 "\n"
00096 " 'w': 2 byte little-endian unsigned number\n"
00097 "\n"
00098 " 'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is\n"
00099 " really just an \"exists\" or \"does not exist\" flag. The boolean\n"
00100 " value of the Python object is used.\n"
00101 " \n"
00102 " 'B': 4-byte LE length, followed by that many bytes of binary data.\n"
00103 " Corresponds to a Python integer giving the length, followed by a byte\n"
00104 " string of the appropriate length.\n"
00105 "\n"
00106 " '$': Special flag indicating that the preceding format code should be\n"
00107 " repeated while data remains. This is only supported for unpacking.\n"
00108 "\n"
00109 " Every code corresponds to a single Python object, except 'B' which\n"
00110 " corresponds to two values (length and contents), and '$', which produces\n"
00111 " however many make sense.\n";
00112
00113 static char const pytdbpack_doc[] =
00114 "pack(format, values) -> buffer\n"
00115 "Pack Python objects into Samba binary format according to format string.\n"
00116 "\n"
00117 "arguments:\n"
00118 " format -- string of tdbpack format characters\n"
00119 " values -- sequence of value objects corresponding 1:1 to format characters\n"
00120 "\n"
00121 "returns:\n"
00122 " buffer -- string containing packed data\n"
00123 "\n"
00124 "raises:\n"
00125 " IndexError -- if there are too few values for the format\n"
00126 " ValueError -- if any of the format characters is illegal\n"
00127 " TypeError -- if the format is not a string, or values is not a sequence,\n"
00128 " or any of the values is of the wrong type for the corresponding\n"
00129 " format character\n"
00130 "\n"
00131 "notes:\n"
00132 " For historical reasons, it is not an error to pass more values than are consumed\n"
00133 " by the format.\n";
00134
00135
00136 static char const pytdbunpack_doc[] =
00137 "unpack(format, buffer) -> (values, rest)\n"
00138 "Unpack Samba binary data according to format string.\n"
00139 "\n"
00140 "arguments:\n"
00141 " format -- string of tdbpack characters\n"
00142 " buffer -- string of packed binary data\n"
00143 "\n"
00144 "returns:\n"
00145 " 2-tuple of:\n"
00146 " values -- sequence of values corresponding 1:1 to format characters\n"
00147 " rest -- string containing data that was not decoded, or '' if the\n"
00148 " whole string was consumed\n"
00149 "\n"
00150 "raises:\n"
00151 " IndexError -- if there is insufficient data in the buffer for the\n"
00152 " format (or if the data is corrupt and contains a variable-length\n"
00153 " field extending past the end)\n"
00154 " ValueError -- if any of the format characters is illegal\n"
00155 "\n"
00156 "notes:\n"
00157 " Because unconsumed data is returned, you can feed it back in to the\n"
00158 " unpacker to extract further fields. Alternatively, if you wish to modify\n"
00159 " some fields near the start of the data, you may be able to save time by\n"
00160 " only unpacking and repacking the necessary part.\n";
00161
00162
00163 const char *pytdb_dos_encoding = "cp850";
00164
00165
00166
00167 const char *pytdb_unix_encoding = NULL;
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177 static PyObject *
00178 pytdbpack(PyObject *self,
00179 PyObject *args)
00180 {
00181 char *format_str;
00182 PyObject *val_seq, *val_iter = NULL,
00183 *packed_list = NULL, *packed_str = NULL,
00184 *empty_str = NULL;
00185
00186
00187 if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
00188 return NULL;
00189
00190 if (!(val_iter = PyObject_GetIter(val_seq)))
00191 goto out;
00192
00193
00194 if (!(packed_list = PyList_New(0)))
00195 goto out;
00196
00197 if (!pytdbpack_data(format_str, val_iter, packed_list))
00198 goto out;
00199
00200
00201 if (!(empty_str = PyString_InternFromString("")))
00202 goto out;
00203
00204 packed_str = _PyString_Join(empty_str, packed_list);
00205
00206 out:
00207 Py_XDECREF(empty_str);
00208 Py_XDECREF(val_iter);
00209 Py_XDECREF(packed_list);
00210
00211 return packed_str;
00212 }
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227 PyObject *
00228 pytdbpack_data(const char *format_str,
00229 PyObject *val_iter,
00230 PyObject *packed_list)
00231 {
00232 int format_i, val_i = 0;
00233
00234 for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
00235 char ch = format_str[format_i];
00236
00237 switch (ch) {
00238
00239
00240
00241 case 'w':
00242 case 'd':
00243 case 'p':
00244 if (!(packed_list = pytdbpack_number(ch, val_iter, packed_list)))
00245 return NULL;
00246 break;
00247
00248 case 'f':
00249 case 'P':
00250 if (!(packed_list = pytdbpack_str(ch, val_iter, packed_list, pytdb_unix_encoding)))
00251 return NULL;
00252 break;
00253
00254 case 'B':
00255 if (!(packed_list = pytdbpack_buffer(val_iter, packed_list)))
00256 return NULL;
00257 break;
00258
00259 default:
00260 PyErr_Format(PyExc_ValueError,
00261 "%s: format character '%c' is not supported",
00262 FUNCTION_MACRO, ch);
00263 return NULL;
00264 }
00265 }
00266
00267 return packed_list;
00268 }
00269
00270
00271 static PyObject *
00272 pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list)
00273 {
00274 unsigned long val_long;
00275 PyObject *val_obj = NULL, *long_obj = NULL, *result_obj = NULL;
00276 PyObject *new_list = NULL;
00277 unsigned char pack_buf[4];
00278
00279 if (!(val_obj = PyIter_Next(val_iter)))
00280 goto out;
00281
00282 if (!(long_obj = PyNumber_Long(val_obj))) {
00283 pytdbpack_bad_type(ch, "Number", val_obj);
00284 goto out;
00285 }
00286
00287 val_long = PyLong_AsUnsignedLong(long_obj);
00288 pack_le_uint32(val_long, pack_buf);
00289
00290
00291
00292
00293 if (!(result_obj = PyString_FromStringAndSize(pack_buf, ch == 'w' ? 2 : 4)))
00294 goto out;
00295
00296 if (PyList_Append(packed_list, result_obj) != -1)
00297 new_list = packed_list;
00298
00299 out:
00300 Py_XDECREF(val_obj);
00301 Py_XDECREF(long_obj);
00302 Py_XDECREF(result_obj);
00303
00304 return new_list;
00305 }
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325 static PyObject *
00326 pytdbpack_str(char ch,
00327 PyObject *val_iter, PyObject *packed_list, const char *encoding)
00328 {
00329 PyObject *val_obj = NULL;
00330 PyObject *unicode_obj = NULL;
00331 PyObject *coded_str = NULL;
00332 PyObject *nul_str = NULL;
00333 PyObject *new_list = NULL;
00334
00335 if (!(val_obj = PyIter_Next(val_iter)))
00336 goto out;
00337
00338 if (PyUnicode_Check(val_obj)) {
00339 if (!(coded_str = PyUnicode_AsEncodedString(val_obj, encoding, NULL)))
00340 goto out;
00341 }
00342 else if (PyString_Check(val_obj) && !encoding) {
00343
00344
00345
00346 coded_str = val_obj;
00347 Py_INCREF(coded_str);
00348 }
00349 else if (PyString_Check(val_obj)) {
00350
00351 if (!(unicode_obj = PyString_AsDecodedObject(val_obj, NULL, NULL)))
00352 goto out;
00353 if (!(coded_str = PyUnicode_AsEncodedString(unicode_obj, encoding, NULL)))
00354 goto out;
00355 }
00356 else {
00357 pytdbpack_bad_type(ch, "String or Unicode", val_obj);
00358 goto out;
00359 }
00360
00361 if (!nul_str)
00362
00363 if (!(nul_str = PyString_FromStringAndSize("", 1)))
00364 goto out;
00365
00366 if ((PyList_Append(packed_list, coded_str) != -1)
00367 && (PyList_Append(packed_list, nul_str) != -1))
00368 new_list = packed_list;
00369
00370 out:
00371 Py_XDECREF(val_obj);
00372 Py_XDECREF(unicode_obj);
00373 Py_XDECREF(coded_str);
00374
00375 return new_list;
00376 }
00377
00378
00379
00380
00381
00382
00383
00384
00385 static PyObject *
00386 pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list)
00387 {
00388 PyObject *val_obj;
00389 PyObject *new_list = NULL;
00390
00391
00392 if (!(packed_list = pytdbpack_number('d', val_iter, packed_list)))
00393 return NULL;
00394
00395
00396
00397 if (!(val_obj = PyIter_Next(val_iter)))
00398 return NULL;
00399
00400 if (!PyString_Check(val_obj)) {
00401 pytdbpack_bad_type('B', "String", val_obj);
00402 goto out;
00403 }
00404
00405 if (PyList_Append(packed_list, val_obj) != -1)
00406 new_list = packed_list;
00407
00408 out:
00409 Py_XDECREF(val_obj);
00410 return new_list;
00411 }
00412
00413
00414 static PyObject *pytdbpack_bad_type(char ch,
00415 const char *expected,
00416 PyObject *val_obj)
00417 {
00418 PyObject *r = PyObject_Repr(val_obj);
00419 if (!r)
00420 return NULL;
00421 PyErr_Format(PyExc_TypeError,
00422 "tdbpack: format '%c' requires %s, not %s",
00423 ch, expected, PyString_AS_STRING(r));
00424 Py_DECREF(r);
00425 return val_obj;
00426 }
00427
00428
00429
00430
00431
00432
00433
00434
00435 static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf)
00436 {
00437 pbuf[0] = val_long & 0xff;
00438 pbuf[1] = (val_long >> 8) & 0xff;
00439 pbuf[2] = (val_long >> 16) & 0xff;
00440 pbuf[3] = (val_long >> 24) & 0xff;
00441 }
00442
00443
00444 #if 0
00445 static void pack_bytes(long len, const char *from,
00446 unsigned char **pbuf)
00447 {
00448 memcpy(*pbuf, from, len);
00449 (*pbuf) += len;
00450 }
00451 #endif
00452
00453
00454 static PyObject *
00455 pytdbunpack(PyObject *self,
00456 PyObject *args)
00457 {
00458 char *format_str, *packed_str, *ppacked;
00459 PyObject *val_list = NULL, *ret_tuple = NULL;
00460 PyObject *rest_string = NULL;
00461 int format_len, packed_len;
00462 char last_format = '#';
00463 int i;
00464
00465
00466 if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
00467 return NULL;
00468
00469 format_len = strlen(format_str);
00470
00471
00472
00473 val_list = PyList_New(0);
00474 if (!val_list)
00475 goto failed;
00476 ret_tuple = PyTuple_New(2);
00477 if (!ret_tuple)
00478 goto failed;
00479
00480
00481 for (ppacked = packed_str, i = 0; i < format_len && format_str[i] != '$'; i++) {
00482 last_format = format_str[i];
00483
00484 if (!pytdbunpack_item(format_str[i], &ppacked, &packed_len, val_list))
00485 goto failed;
00486 }
00487
00488
00489 if (format_str[i] == '$') {
00490 if (i == 0) {
00491 PyErr_Format(PyExc_ValueError,
00492 "%s: '$' may not be first character in format",
00493 FUNCTION_MACRO);
00494 return NULL;
00495 }
00496 while (packed_len > 0)
00497 if (!pytdbunpack_item(last_format, &ppacked, &packed_len, val_list))
00498 goto failed;
00499 }
00500
00501
00502 rest_string = PyString_FromStringAndSize(ppacked, packed_len);
00503 if (!rest_string)
00504 goto failed;
00505
00506
00507 PyTuple_SET_ITEM(ret_tuple, 0, val_list);
00508 val_list = NULL;
00509 PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
00510 val_list = NULL;
00511 return ret_tuple;
00512
00513 failed:
00514
00515
00516 Py_XDECREF(val_list);
00517 Py_XDECREF(ret_tuple);
00518 Py_XDECREF(rest_string);
00519 return NULL;
00520 }
00521
00522
00523 static void
00524 pytdbunpack_err_too_short(void)
00525 {
00526 PyErr_Format(PyExc_IndexError,
00527 "%s: data too short for unpack format", FUNCTION_MACRO);
00528 }
00529
00530
00531 static PyObject *
00532 pytdbunpack_uint32(char **pbuf, int *plen)
00533 {
00534 unsigned long v;
00535 unsigned char *b;
00536
00537 if (*plen < 4) {
00538 pytdbunpack_err_too_short();
00539 return NULL;
00540 }
00541
00542 b = *pbuf;
00543 v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
00544
00545 (*pbuf) += 4;
00546 (*plen) -= 4;
00547
00548 return PyLong_FromUnsignedLong(v);
00549 }
00550
00551
00552 static PyObject *pytdbunpack_int16(char **pbuf, int *plen)
00553 {
00554 long v;
00555 unsigned char *b;
00556
00557 if (*plen < 2) {
00558 pytdbunpack_err_too_short();
00559 return NULL;
00560 }
00561
00562 b = *pbuf;
00563 v = b[0] | b[1]<<8;
00564
00565 (*pbuf) += 2;
00566 (*plen) -= 2;
00567
00568 return PyInt_FromLong(v);
00569 }
00570
00571
00572 static PyObject *
00573 pytdbunpack_string(char **pbuf, int *plen, const char *encoding)
00574 {
00575 int len;
00576 char *nul_ptr, *start;
00577
00578 start = *pbuf;
00579
00580 nul_ptr = memchr(start, '\0', *plen);
00581 if (!nul_ptr) {
00582 pytdbunpack_err_too_short();
00583 return NULL;
00584 }
00585
00586 len = nul_ptr - start;
00587
00588 *pbuf += len + 1;
00589 *plen -= len + 1;
00590
00591 return PyString_Decode(start, len, encoding, NULL);
00592 }
00593
00594
00595 static PyObject *
00596 pytdbunpack_buffer(char **pbuf, int *plen, PyObject *val_list)
00597 {
00598
00599 long slen;
00600 unsigned char *b;
00601 unsigned char *start;
00602 PyObject *str_obj = NULL, *len_obj = NULL;
00603
00604 if (*plen < 4) {
00605 pytdbunpack_err_too_short();
00606 return NULL;
00607 }
00608
00609 b = *pbuf;
00610 slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
00611
00612 if (slen < 0) {
00613 PyErr_Format(PyExc_ValueError,
00614 "%s: buffer seems to have negative length", FUNCTION_MACRO);
00615 return NULL;
00616 }
00617
00618 (*pbuf) += 4;
00619 (*plen) -= 4;
00620 start = *pbuf;
00621
00622 if (*plen < slen) {
00623 PyErr_Format(PyExc_IndexError,
00624 "%s: not enough data to unpack buffer: "
00625 "need %d bytes, have %d", FUNCTION_MACRO,
00626 (int) slen, *plen);
00627 return NULL;
00628 }
00629
00630 (*pbuf) += slen;
00631 (*plen) -= slen;
00632
00633 if (!(len_obj = PyInt_FromLong(slen)))
00634 goto failed;
00635
00636 if (PyList_Append(val_list, len_obj) == -1)
00637 goto failed;
00638
00639 if (!(str_obj = PyString_FromStringAndSize(start, slen)))
00640 goto failed;
00641
00642 if (PyList_Append(val_list, str_obj) == -1)
00643 goto failed;
00644
00645 Py_DECREF(len_obj);
00646 Py_DECREF(str_obj);
00647
00648 return val_list;
00649
00650 failed:
00651 Py_XDECREF(len_obj);
00652 Py_XDECREF(str_obj);
00653 return NULL;
00654 }
00655
00656
00657
00658
00659
00660
00661
00662
00663
00664
00665 static PyObject *pytdbunpack_item(char ch,
00666 char **pbuf,
00667 int *plen,
00668 PyObject *val_list)
00669 {
00670 PyObject *unpacked;
00671
00672 if (ch == 'w') {
00673 unpacked = pytdbunpack_int16(pbuf, plen);
00674 }
00675 else if (ch == 'd' || ch == 'p') {
00676
00677 unpacked = pytdbunpack_uint32(pbuf, plen);
00678 }
00679 else if (ch == 'f' || ch == 'P') {
00680 unpacked = pytdbunpack_string(pbuf, plen, pytdb_unix_encoding);
00681 }
00682 else if (ch == 'B') {
00683 return pytdbunpack_buffer(pbuf, plen, val_list);
00684 }
00685 else {
00686 PyErr_Format(PyExc_ValueError,
00687 "%s: format character '%c' is not supported",
00688 FUNCTION_MACRO, ch);
00689
00690 return NULL;
00691 }
00692
00693
00694 if (!unpacked)
00695 return NULL;
00696
00697 if (PyList_Append(val_list, unpacked) == -1)
00698 val_list = NULL;
00699
00700
00701
00702 Py_DECREF(unpacked);
00703
00704 return val_list;
00705 }
00706
00707
00708
00709
00710
00711
00712 static PyMethodDef pytdbpack_methods[] = {
00713 { "pack", pytdbpack, METH_VARARGS, (char *) pytdbpack_doc },
00714 { "unpack", pytdbunpack, METH_VARARGS, (char *) pytdbunpack_doc },
00715 };
00716
00717 DL_EXPORT(void)
00718 inittdbpack(void)
00719 {
00720 Py_InitModule3("tdbpack", pytdbpack_methods,
00721 (char *) pytdbpack_docstring);
00722 }