tdb/common/io.c

説明を見る。
00001  /* 
00002    Unix SMB/CIFS implementation.
00003 
00004    trivial database library
00005 
00006    Copyright (C) Andrew Tridgell              1999-2005
00007    Copyright (C) Paul `Rusty' Russell              2000
00008    Copyright (C) Jeremy Allison                    2000-2003
00009    
00010      ** NOTE! The following LGPL license applies to the tdb
00011      ** library. This does NOT imply that all of Samba is released
00012      ** under the LGPL
00013    
00014    This library is free software; you can redistribute it and/or
00015    modify it under the terms of the GNU Lesser General Public
00016    License as published by the Free Software Foundation; either
00017    version 2 of the License, or (at your option) any later version.
00018 
00019    This library is distributed in the hope that it will be useful,
00020    but WITHOUT ANY WARRANTY; without even the implied warranty of
00021    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00022    Lesser General Public License for more details.
00023 
00024    You should have received a copy of the GNU Lesser General Public
00025    License along with this library; if not, write to the Free Software
00026    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00027 */
00028 
00029 
00030 #include "tdb_private.h"
00031 
00032 /* check for an out of bounds access - if it is out of bounds then
00033    see if the database has been expanded by someone else and expand
00034    if necessary 
00035    note that "len" is the minimum length needed for the db
00036 */
00037 static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, int probe)
00038 {
00039         struct stat st;
00040         if (len <= tdb->map_size)
00041                 return 0;
00042         if (tdb->flags & TDB_INTERNAL) {
00043                 if (!probe) {
00044                         /* Ensure ecode is set for log fn. */
00045                         tdb->ecode = TDB_ERR_IO;
00046                         TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %d beyond internal malloc size %d\n",
00047                                  (int)len, (int)tdb->map_size));
00048                 }
00049                 return TDB_ERRCODE(TDB_ERR_IO, -1);
00050         }
00051 
00052         if (fstat(tdb->fd, &st) == -1) {
00053                 return TDB_ERRCODE(TDB_ERR_IO, -1);
00054         }
00055 
00056         if (st.st_size < (size_t)len) {
00057                 if (!probe) {
00058                         /* Ensure ecode is set for log fn. */
00059                         tdb->ecode = TDB_ERR_IO;
00060                         TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %d beyond eof at %d\n",
00061                                  (int)len, (int)st.st_size));
00062                 }
00063                 return TDB_ERRCODE(TDB_ERR_IO, -1);
00064         }
00065 
00066         /* Unmap, update size, remap */
00067         if (tdb_munmap(tdb) == -1)
00068                 return TDB_ERRCODE(TDB_ERR_IO, -1);
00069         tdb->map_size = st.st_size;
00070         tdb_mmap(tdb);
00071         return 0;
00072 }
00073 
00074 /* write a lump of data at a specified offset */
00075 static int tdb_write(struct tdb_context *tdb, tdb_off_t off, 
00076                      const void *buf, tdb_len_t len)
00077 {
00078         if (len == 0) {
00079                 return 0;
00080         }
00081 
00082         if (tdb->read_only || tdb->traverse_read) {
00083                 tdb->ecode = TDB_ERR_RDONLY;
00084                 return -1;
00085         }
00086 
00087         if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0)
00088                 return -1;
00089 
00090         if (tdb->map_ptr) {
00091                 memcpy(off + (char *)tdb->map_ptr, buf, len);
00092         } else {
00093                 ssize_t written = pwrite(tdb->fd, buf, len, off);
00094                 if ((written != (ssize_t)len) && (written != -1)) {
00095                         /* try once more */
00096                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only "
00097                                  "%d of %d bytes at %d, trying once more\n",
00098                                  (uint32_t)written, len, off));
00099                         errno = ENOSPC;
00100                         written = pwrite(tdb->fd, (void *)((char *)buf+written),
00101                                          len-written,
00102                                          off+written);
00103                 }
00104                 if (written == -1) {
00105                         /* Ensure ecode is set for log fn. */
00106                         tdb->ecode = TDB_ERR_IO;
00107                         TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %d "
00108                                  "len=%d (%s)\n", off, len, strerror(errno)));
00109                         return TDB_ERRCODE(TDB_ERR_IO, -1);
00110                 } else if (written != (ssize_t)len) {
00111                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: failed to "
00112                                  "write %d bytes at %d in two attempts\n",
00113                                  len, off));
00114                         errno = ENOSPC;
00115                         return TDB_ERRCODE(TDB_ERR_IO, -1);
00116                 }
00117         }
00118         return 0;
00119 }
00120 
00121 /* Endian conversion: we only ever deal with 4 byte quantities */
00122 void *tdb_convert(void *buf, u32 size)
00123 {
00124         u32 i, *p = (u32 *)buf;
00125         for (i = 0; i < size / 4; i++)
00126                 p[i] = TDB_BYTEREV(p[i]);
00127         return buf;
00128 }
00129 
00130 
00131 /* read a lump of data at a specified offset, maybe convert */
00132 static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf, 
00133                     tdb_len_t len, int cv)
00134 {
00135         if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0) {
00136                 return -1;
00137         }
00138 
00139         if (tdb->map_ptr) {
00140                 memcpy(buf, off + (char *)tdb->map_ptr, len);
00141         } else {
00142                 ssize_t ret = pread(tdb->fd, buf, len, off);
00143                 if (ret != (ssize_t)len) {
00144                         /* Ensure ecode is set for log fn. */
00145                         tdb->ecode = TDB_ERR_IO;
00146                         TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_read failed at %d "
00147                                  "len=%d ret=%d (%s) map_size=%d\n",
00148                                  (int)off, (int)len, (int)ret, strerror(errno),
00149                                  (int)tdb->map_size));
00150                         return TDB_ERRCODE(TDB_ERR_IO, -1);
00151                 }
00152         }
00153         if (cv) {
00154                 tdb_convert(buf, len);
00155         }
00156         return 0;
00157 }
00158 
00159 
00160 
00161 /*
00162   do an unlocked scan of the hash table heads to find the next non-zero head. The value
00163   will then be confirmed with the lock held
00164 */              
00165 static void tdb_next_hash_chain(struct tdb_context *tdb, u32 *chain)
00166 {
00167         u32 h = *chain;
00168         if (tdb->map_ptr) {
00169                 for (;h < tdb->header.hash_size;h++) {
00170                         if (0 != *(u32 *)(TDB_HASH_TOP(h) + (unsigned char *)tdb->map_ptr)) {
00171                                 break;
00172                         }
00173                 }
00174         } else {
00175                 u32 off=0;
00176                 for (;h < tdb->header.hash_size;h++) {
00177                         if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &off) != 0 || off != 0) {
00178                                 break;
00179                         }
00180                 }
00181         }
00182         (*chain) = h;
00183 }
00184 
00185 
00186 int tdb_munmap(struct tdb_context *tdb)
00187 {
00188         if (tdb->flags & TDB_INTERNAL)
00189                 return 0;
00190 
00191 #ifdef HAVE_MMAP
00192         if (tdb->map_ptr) {
00193                 int ret = munmap(tdb->map_ptr, tdb->map_size);
00194                 if (ret != 0)
00195                         return ret;
00196         }
00197 #endif
00198         tdb->map_ptr = NULL;
00199         return 0;
00200 }
00201 
00202 void tdb_mmap(struct tdb_context *tdb)
00203 {
00204         if (tdb->flags & TDB_INTERNAL)
00205                 return;
00206 
00207 #ifdef HAVE_MMAP
00208         if (!(tdb->flags & TDB_NOMMAP)) {
00209                 tdb->map_ptr = mmap(NULL, tdb->map_size, 
00210                                     PROT_READ|(tdb->read_only? 0:PROT_WRITE), 
00211                                     MAP_SHARED|MAP_FILE, tdb->fd, 0);
00212 
00213                 /*
00214                  * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
00215                  */
00216 
00217                 if (tdb->map_ptr == MAP_FAILED) {
00218                         tdb->map_ptr = NULL;
00219                         TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_mmap failed for size %d (%s)\n", 
00220                                  tdb->map_size, strerror(errno)));
00221                 }
00222         } else {
00223                 tdb->map_ptr = NULL;
00224         }
00225 #else
00226         tdb->map_ptr = NULL;
00227 #endif
00228 }
00229 
00230 /* expand a file.  we prefer to use ftruncate, as that is what posix
00231   says to use for mmap expansion */
00232 static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t addition)
00233 {
00234         char buf[1024];
00235 
00236         if (tdb->read_only || tdb->traverse_read) {
00237                 tdb->ecode = TDB_ERR_RDONLY;
00238                 return -1;
00239         }
00240 
00241         if (ftruncate(tdb->fd, size+addition) == -1) {
00242                 char b = 0;
00243                 ssize_t written = pwrite(tdb->fd,  &b, 1, (size+addition) - 1);
00244                 if (written == 0) {
00245                         /* try once more, potentially revealing errno */
00246                         written = pwrite(tdb->fd,  &b, 1, (size+addition) - 1);
00247                 }
00248                 if (written == 0) {
00249                         /* again - give up, guessing errno */
00250                         errno = ENOSPC;
00251                 }
00252                 if (written != 1) {
00253                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file to %d failed (%s)\n", 
00254                                  size+addition, strerror(errno)));
00255                         return -1;
00256                 }
00257         }
00258 
00259         /* now fill the file with something. This ensures that the
00260            file isn't sparse, which would be very bad if we ran out of
00261            disk. This must be done with write, not via mmap */
00262         memset(buf, TDB_PAD_BYTE, sizeof(buf));
00263         while (addition) {
00264                 size_t n = addition>sizeof(buf)?sizeof(buf):addition;
00265                 ssize_t written = pwrite(tdb->fd, buf, n, size);
00266                 if (written == 0) {
00267                         /* prevent infinite loops: try _once_ more */
00268                         written = pwrite(tdb->fd, buf, n, size);
00269                 }
00270                 if (written == 0) {
00271                         /* give up, trying to provide a useful errno */
00272                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write "
00273                                 "returned 0 twice: giving up!\n"));
00274                         errno = ENOSPC;
00275                         return -1;
00276                 } else if (written == -1) {
00277                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write of "
00278                                 "%d bytes failed (%s)\n", (uint32_t)n, strerror(errno)));
00279                         return -1;
00280                 } else if (written != n) {
00281                         TDB_LOG((tdb, TDB_DEBUG_WARNING, "expand_file: wrote "
00282                                 "only %d of %d bytes - retrying\n",
00283                                 (uint32_t)written, (uint32_t)n));
00284                 }
00285                 addition -= written;
00286                 size += written;
00287         }
00288         return 0;
00289 }
00290 
00291 
00292 /* expand the database at least size bytes by expanding the underlying
00293    file and doing the mmap again if necessary */
00294 int tdb_expand(struct tdb_context *tdb, tdb_off_t size)
00295 {
00296         struct list_struct rec;
00297         tdb_off_t offset;
00298 
00299         if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
00300                 TDB_LOG((tdb, TDB_DEBUG_ERROR, "lock failed in tdb_expand\n"));
00301                 return -1;
00302         }
00303 
00304         /* must know about any previous expansions by another process */
00305         tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1);
00306 
00307         /* always make room for at least 10 more records, and round
00308            the database up to a multiple of the page size */
00309         size = TDB_ALIGN(tdb->map_size + size*10, tdb->page_size) - tdb->map_size;
00310 
00311         if (!(tdb->flags & TDB_INTERNAL))
00312                 tdb_munmap(tdb);
00313 
00314         /*
00315          * We must ensure the file is unmapped before doing this
00316          * to ensure consistency with systems like OpenBSD where
00317          * writes and mmaps are not consistent.
00318          */
00319 
00320         /* expand the file itself */
00321         if (!(tdb->flags & TDB_INTERNAL)) {
00322                 if (tdb->methods->tdb_expand_file(tdb, tdb->map_size, size) != 0)
00323                         goto fail;
00324         }
00325 
00326         tdb->map_size += size;
00327 
00328         if (tdb->flags & TDB_INTERNAL) {
00329                 char *new_map_ptr = (char *)realloc(tdb->map_ptr,
00330                                                     tdb->map_size);
00331                 if (!new_map_ptr) {
00332                         tdb->map_size -= size;
00333                         goto fail;
00334                 }
00335                 tdb->map_ptr = new_map_ptr;
00336         } else {
00337                 /*
00338                  * We must ensure the file is remapped before adding the space
00339                  * to ensure consistency with systems like OpenBSD where
00340                  * writes and mmaps are not consistent.
00341                  */
00342 
00343                 /* We're ok if the mmap fails as we'll fallback to read/write */
00344                 tdb_mmap(tdb);
00345         }
00346 
00347         /* form a new freelist record */
00348         memset(&rec,'\0',sizeof(rec));
00349         rec.rec_len = size - sizeof(rec);
00350 
00351         /* link it into the free list */
00352         offset = tdb->map_size - size;
00353         if (tdb_free(tdb, offset, &rec) == -1)
00354                 goto fail;
00355 
00356         tdb_unlock(tdb, -1, F_WRLCK);
00357         return 0;
00358  fail:
00359         tdb_unlock(tdb, -1, F_WRLCK);
00360         return -1;
00361 }
00362 
00363 /* read/write a tdb_off_t */
00364 int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
00365 {
00366         return tdb->methods->tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV());
00367 }
00368 
00369 int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
00370 {
00371         tdb_off_t off = *d;
00372         return tdb->methods->tdb_write(tdb, offset, CONVERT(off), sizeof(*d));
00373 }
00374 
00375 
00376 /* read a lump of data, allocating the space for it */
00377 char *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
00378 {
00379         char *buf;
00380 
00381         /* some systems don't like zero length malloc */
00382         if (len == 0) {
00383                 len = 1;
00384         }
00385 
00386         if (!(buf = (char *)malloc(len))) {
00387                 /* Ensure ecode is set for log fn. */
00388                 tdb->ecode = TDB_ERR_OOM;
00389                 TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_alloc_read malloc failed len=%d (%s)\n",
00390                            len, strerror(errno)));
00391                 return TDB_ERRCODE(TDB_ERR_OOM, buf);
00392         }
00393         if (tdb->methods->tdb_read(tdb, offset, buf, len, 0) == -1) {
00394                 SAFE_FREE(buf);
00395                 return NULL;
00396         }
00397         return buf;
00398 }
00399 
00400 /* Give a piece of tdb data to a parser */
00401 
00402 int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key,
00403                    tdb_off_t offset, tdb_len_t len,
00404                    int (*parser)(TDB_DATA key, TDB_DATA data,
00405                                  void *private_data),
00406                    void *private_data)
00407 {
00408         TDB_DATA data;
00409         int result;
00410 
00411         data.dsize = len;
00412 
00413         if ((tdb->transaction == NULL) && (tdb->map_ptr != NULL)) {
00414                 /*
00415                  * Optimize by avoiding the malloc/memcpy/free, point the
00416                  * parser directly at the mmap area.
00417                  */
00418                 if (tdb->methods->tdb_oob(tdb, offset+len, 0) != 0) {
00419                         return -1;
00420                 }
00421                 data.dptr = offset + (char *)tdb->map_ptr;
00422                 return parser(key, data, private_data);
00423         }
00424 
00425         if (!(data.dptr = tdb_alloc_read(tdb, offset, len))) {
00426                 return -1;
00427         }
00428 
00429         result = parser(key, data, private_data);
00430         free(data.dptr);
00431         return result;
00432 }
00433 
00434 /* read/write a record */
00435 int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec)
00436 {
00437         if (tdb->methods->tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1)
00438                 return -1;
00439         if (TDB_BAD_MAGIC(rec)) {
00440                 /* Ensure ecode is set for log fn. */
00441                 tdb->ecode = TDB_ERR_CORRUPT;
00442                 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset));
00443                 return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
00444         }
00445         return tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0);
00446 }
00447 
00448 int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec)
00449 {
00450         struct list_struct r = *rec;
00451         return tdb->methods->tdb_write(tdb, offset, CONVERT(r), sizeof(r));
00452 }
00453 
00454 static const struct tdb_methods io_methods = {
00455         tdb_read,
00456         tdb_write,
00457         tdb_next_hash_chain,
00458         tdb_oob,
00459         tdb_expand_file,
00460         tdb_brlock
00461 };
00462 
00463 /*
00464   initialise the default methods table
00465 */
00466 void tdb_io_init(struct tdb_context *tdb)
00467 {
00468         tdb->methods = &io_methods;
00469 }

Sambaに対してSat Aug 29 21:23:26 2009に生成されました。  doxygen 1.4.7