match.c

ソースコードを見る。


関数

static void build_hash_table (struct sum_struct *s)
static void matched (int f, struct sum_struct *s, struct map_struct *buf, OFF_T offset, int32 i)
 Transmit a literal and/or match token.
static void hash_search (int f, struct sum_struct *s, struct map_struct *buf, OFF_T len)
void match_sums (int f, struct sum_struct *s, struct map_struct *buf, OFF_T len)
 Scan through a origin file, looking for sections that match checksums from the generator, and transmit either literal or token data.
void match_report (void)

変数

int verbose
int do_progress
int checksum_seed
int append_mode
int updating_basis_file
static int false_alarms
static int hash_hits
static int matches
static int64 data_transfer
static int total_false_alarms
static int total_hash_hits
static int total_matches
stats stats
static int32 * hash_table
static OFF_T last_match

関数

static void build_hash_table ( struct sum_struct s  )  [static]

match.c47 行で定義されています。

参照先 sum_buf::chainsum_struct::counthash_tableout_of_memory()sum_buf::sum1sum_struct::sums.

参照元 match_sums().

00048 {
00049         int32 i;
00050 
00051         if (!hash_table) {
00052                 hash_table = new_array(int32, TABLESIZE);
00053                 if (!hash_table)
00054                         out_of_memory("build_hash_table");
00055         }
00056 
00057         memset(hash_table, 0xFF, TABLESIZE * sizeof hash_table[0]);
00058 
00059         for (i = 0; i < s->count; i++) {
00060                 uint32 t = SUM2HASH(s->sums[i].sum1);
00061                 s->sums[i].chain = hash_table[t];
00062                 hash_table[t] = i;
00063         }
00064 }

static void matched ( int  f,
struct sum_struct s,
struct map_struct buf,
OFF_T  offset,
int32  i 
) [static]

Transmit a literal and/or match token.

This delightfully-named function is called either when we find a match and need to transmit all the unmatched data leading up to it, or when we get bored of accumulating literal data and just need to transmit it. As a result of this second case, it is called even if we have not matched at all!

引数:
i If >0, the number of a matched token. If 0, indicates we have only literal data.

match.c82 行で定義されています。

参照先 bufdata_transferdo_progressFINFOlast_matchsum_buf::lenmap_ptr()stats::matched_datarprintf()send_token()show_progress()statssum_update()sum_struct::sumsverbose.

参照元 dowild()hash_search()run_test().

00084 {
00085         int32 n = offset - last_match; /* max value: block_size (int32) */
00086         int32 j;
00087 
00088         if (verbose > 2 && i >= 0) {
00089                 rprintf(FINFO,
00090                         "match at %.0f last_match=%.0f j=%d len=%ld n=%ld\n",
00091                         (double)offset, (double)last_match, i,
00092                         (long)s->sums[i].len, (long)n);
00093         }
00094 
00095         send_token(f, i, buf, last_match, n, i < 0 ? 0 : s->sums[i].len);
00096         data_transfer += n;
00097 
00098         if (i >= 0) {
00099                 stats.matched_data += s->sums[i].len;
00100                 n += s->sums[i].len;
00101         }
00102 
00103         for (j = 0; j < n; j += CHUNK_SIZE) {
00104                 int32 n1 = MIN(CHUNK_SIZE, n - j);
00105                 sum_update(map_ptr(buf, last_match + j, n1), n1);
00106         }
00107 
00108         if (i >= 0)
00109                 last_match = offset + s->sums[i].len;
00110         else
00111                 last_match = offset;
00112 
00113         if (buf && do_progress)
00114                 show_progress(last_match, buf->file_size);
00115 }

static void hash_search ( int  f,
struct sum_struct s,
struct map_struct buf,
OFF_T  len 
) [static]

match.c118 行で定義されています。

参照先 sum_struct::blengthbufsum_buf::chainsum_struct::countfalse_alarmsFINFOsum_buf::flagsget_checksum1()get_checksum2()hash_hitshash_tablelast_matchsum_buf::lenmap_ptr()matched()matchessum_buf::offsetrprintf()sum_struct::s2lengthsum_buf::sum1sum_buf::sum2sum_struct::sumsupdating_basis_fileverbose.

参照元 match_sums().

00120 {
00121         OFF_T offset, end, backup;
00122         int32 k, want_i;
00123         char sum2[SUM_LENGTH];
00124         uint32 s1, s2, sum;
00125         int more;
00126         schar *map;
00127 
00128         /* want_i is used to encourage adjacent matches, allowing the RLL
00129          * coding of the output to work more efficiently. */
00130         want_i = 0;
00131 
00132         if (verbose > 2) {
00133                 rprintf(FINFO, "hash search b=%ld len=%.0f\n",
00134                         (long)s->blength, (double)len);
00135         }
00136 
00137         k = (int32)MIN(len, (OFF_T)s->blength);
00138 
00139         map = (schar *)map_ptr(buf, 0, k);
00140 
00141         sum = get_checksum1((char *)map, k);
00142         s1 = sum & 0xFFFF;
00143         s2 = sum >> 16;
00144         if (verbose > 3)
00145                 rprintf(FINFO, "sum=%.8x k=%ld\n", sum, (long)k);
00146 
00147         offset = 0;
00148 
00149         end = len + 1 - s->sums[s->count-1].len;
00150 
00151         if (verbose > 3) {
00152                 rprintf(FINFO, "hash search s->blength=%ld len=%.0f count=%.0f\n",
00153                         (long)s->blength, (double)len, (double)s->count);
00154         }
00155 
00156         do {
00157                 int done_csum2 = 0;
00158                 int32 i;
00159 
00160                 if (verbose > 4) {
00161                         rprintf(FINFO, "offset=%.0f sum=%04x%04x\n",
00162                                 (double)offset, s2 & 0xFFFF, s1 & 0xFFFF);
00163                 }
00164 
00165                 i = hash_table[SUM2HASH2(s1,s2)];
00166                 if (i < 0)
00167                         goto null_hash;
00168 
00169                 sum = (s1 & 0xffff) | (s2 << 16);
00170                 hash_hits++;
00171                 do {
00172                         int32 l;
00173 
00174                         if (sum != s->sums[i].sum1)
00175                                 continue;
00176 
00177                         /* also make sure the two blocks are the same length */
00178                         l = (int32)MIN((OFF_T)s->blength, len-offset);
00179                         if (l != s->sums[i].len)
00180                                 continue;
00181 
00182                         /* in-place: ensure chunk's offset is either >= our
00183                          * offset or that the data didn't move. */
00184                         if (updating_basis_file && s->sums[i].offset < offset
00185                             && !(s->sums[i].flags & SUMFLG_SAME_OFFSET))
00186                                 continue;
00187 
00188                         if (verbose > 3) {
00189                                 rprintf(FINFO,
00190                                         "potential match at %.0f i=%ld sum=%08x\n",
00191                                         (double)offset, (long)i, sum);
00192                         }
00193 
00194                         if (!done_csum2) {
00195                                 map = (schar *)map_ptr(buf,offset,l);
00196                                 get_checksum2((char *)map,l,sum2);
00197                                 done_csum2 = 1;
00198                         }
00199 
00200                         if (memcmp(sum2,s->sums[i].sum2,s->s2length) != 0) {
00201                                 false_alarms++;
00202                                 continue;
00203                         }
00204 
00205                         /* When updating in-place, the best possible match is
00206                          * one with an identical offset, so we prefer that over
00207                          * the following want_i optimization. */
00208                         if (updating_basis_file) {
00209                                 int32 i2;
00210                                 for (i2 = i; i2 >= 0; i2 = s->sums[i2].chain) {
00211                                         if (s->sums[i2].offset != offset)
00212                                                 continue;
00213                                         if (i2 != i) {
00214                                                 if (sum != s->sums[i2].sum1)
00215                                                         break;
00216                                                 if (memcmp(sum2, s->sums[i2].sum2,
00217                                                            s->s2length) != 0)
00218                                                         break;
00219                                                 i = i2;
00220                                         }
00221                                         /* This chunk was at the same offset on
00222                                          * both the sender and the receiver. */
00223                                         s->sums[i].flags |= SUMFLG_SAME_OFFSET;
00224                                         goto set_want_i;
00225                                 }
00226                         }
00227 
00228                         /* we've found a match, but now check to see
00229                          * if want_i can hint at a better match. */
00230                         if (i != want_i && want_i < s->count
00231                             && (!updating_basis_file || s->sums[want_i].offset >= offset
00232                              || s->sums[want_i].flags & SUMFLG_SAME_OFFSET)
00233                             && sum == s->sums[want_i].sum1
00234                             && memcmp(sum2, s->sums[want_i].sum2, s->s2length) == 0) {
00235                                 /* we've found an adjacent match - the RLL coder
00236                                  * will be happy */
00237                                 i = want_i;
00238                         }
00239                     set_want_i:
00240                         want_i = i + 1;
00241 
00242                         matched(f,s,buf,offset,i);
00243                         offset += s->sums[i].len - 1;
00244                         k = (int32)MIN((OFF_T)s->blength, len-offset);
00245                         map = (schar *)map_ptr(buf, offset, k);
00246                         sum = get_checksum1((char *)map, k);
00247                         s1 = sum & 0xFFFF;
00248                         s2 = sum >> 16;
00249                         matches++;
00250                         break;
00251                 } while ((i = s->sums[i].chain) >= 0);
00252 
00253           null_hash:
00254                 backup = offset - last_match;
00255                 /* We sometimes read 1 byte prior to last_match... */
00256                 if (backup < 0)
00257                         backup = 0;
00258 
00259                 /* Trim off the first byte from the checksum */
00260                 more = offset + k < len;
00261                 map = (schar *)map_ptr(buf, offset - backup, k + more + backup)
00262                     + backup;
00263                 s1 -= map[0] + CHAR_OFFSET;
00264                 s2 -= k * (map[0]+CHAR_OFFSET);
00265 
00266                 /* Add on the next byte (if there is one) to the checksum */
00267                 if (more) {
00268                         s1 += map[k] + CHAR_OFFSET;
00269                         s2 += s1;
00270                 } else
00271                         --k;
00272 
00273                 /* By matching early we avoid re-reading the
00274                    data 3 times in the case where a token
00275                    match comes a long way after last
00276                    match. The 3 reads are caused by the
00277                    running match, the checksum update and the
00278                    literal send. */
00279                 if (backup >= s->blength+CHUNK_SIZE && end-offset > CHUNK_SIZE)
00280                         matched(f, s, buf, offset - s->blength, -2);
00281         } while (++offset < end);
00282 
00283         matched(f, s, buf, len, -1);
00284         map_ptr(buf, len-1, 1);
00285 }

void match_sums ( int  f,
struct sum_struct s,
struct map_struct buf,
OFF_T  len 
)

Scan through a origin file, looking for sections that match checksums from the generator, and transmit either literal or token data.

Also calculates the MD4 checksum of the whole file, using the md accumulator. This is transmitted with the file as protection against corruption on the wire.

引数:
s Checksums received from the generator. If s->count == 0, then there are actually no checksums for this file.
len Length of the file to send.

match.c302 行で定義されています。

参照先 append_modebufbuild_hash_table()checksum_seedsum_struct::countdata_transferdo_progressfalse_alarmsFINFOsum_struct::flengthhash_hitshash_search()last_matchmap_ptr()matchesrprintf()show_progress()sum_init()sum_update()verbose.

参照元 send_files().

00303 {
00304         char file_sum[MD4_SUM_LENGTH];
00305 
00306         last_match = 0;
00307         false_alarms = 0;
00308         hash_hits = 0;
00309         matches = 0;
00310         data_transfer = 0;
00311 
00312         sum_init(checksum_seed);
00313 
00314         if (append_mode) {
00315                 OFF_T j = 0;
00316                 for (j = CHUNK_SIZE; j < s->flength; j += CHUNK_SIZE) {
00317                         if (buf && do_progress)
00318                                 show_progress(last_match, buf->file_size);
00319                         sum_update(map_ptr(buf, last_match, CHUNK_SIZE),
00320                                    CHUNK_SIZE);
00321                         last_match = j;
00322                 }
00323                 if (last_match < s->flength) {
00324                         int32 len = s->flength - last_match;
00325                         if (buf && do_progress)
00326                                 show_progress(last_match, buf->file_size);
00327                         sum_update(map_ptr(buf, last_match, len), len);
00328                         last_match = s->flength;
00329                 }
00330                 s->count = 0;
00331         }
00332 
00333         if (len > 0 && s->count > 0) {
00334                 build_hash_table(s);
00335 
00336                 if (verbose > 2)
00337                         rprintf(FINFO,"built hash table\n");
00338 
00339                 hash_search(f,s,buf,len);
00340 
00341                 if (verbose > 2)
00342                         rprintf(FINFO,"done hash search\n");
00343         } else {
00344                 OFF_T j;
00345                 /* by doing this in pieces we avoid too many seeks */
00346                 for (j = last_match + CHUNK_SIZE; j < len; j += CHUNK_SIZE)
00347                         matched(f, s, buf, j, -2);
00348                 matched(f, s, buf, len, -1);
00349         }
00350 
00351         sum_end(file_sum);
00352         /* If we had a read error, send a bad checksum. */
00353         if (buf && buf->status != 0)
00354                 file_sum[0]++;
00355 
00356         if (verbose > 2)
00357                 rprintf(FINFO,"sending file_sum\n");
00358         write_buf(f,file_sum,MD4_SUM_LENGTH);
00359 
00360         if (verbose > 2)
00361                 rprintf(FINFO, "false_alarms=%d hash_hits=%d matches=%d\n",
00362                         false_alarms, hash_hits, matches);
00363 
00364         total_hash_hits += hash_hits;
00365         total_false_alarms += false_alarms;
00366         total_matches += matches;
00367         stats.literal_data += data_transfer;
00368 }

void match_report ( void   ) 

match.c370 行で定義されています。

参照先 FINFOstats::literal_datarprintf()statstotal_false_alarmstotal_hash_hitstotal_matchesverbose.

参照元 send_files().

00371 {
00372         if (verbose <= 1)
00373                 return;
00374 
00375         rprintf(FINFO,
00376                 "total: matches=%d  hash_hits=%d  false_alarms=%d data=%.0f\n",
00377                 total_matches, total_hash_hits, total_false_alarms,
00378                 (double)stats.literal_data);
00379 }


変数

int verbose

options.c163 行で定義されています。

int do_progress

options.c97 行で定義されています。

int checksum_seed

options.c115 行で定義されています。

int append_mode

options.c42 行で定義されています。

int updating_basis_file

match.c27 行で定義されています。

参照元 hash_search()send_files().

int false_alarms [static]

match.c29 行で定義されています。

参照元 hash_search()match_sums().

int hash_hits [static]

match.c30 行で定義されています。

参照元 hash_search()match_sums().

int matches [static]

match.c31 行で定義されています。

参照元 hash_search()match_sums().

int64 data_transfer [static]

match.c32 行で定義されています。

参照元 match_sums()matched().

int total_false_alarms [static]

match.c34 行で定義されています。

参照元 match_report().

int total_hash_hits [static]

match.c35 行で定義されています。

参照元 match_report().

int total_matches [static]

match.c36 行で定義されています。

参照元 match_report().

struct stats stats

log.c59 行で定義されています。

int32* hash_table [static]

match.c42 行で定義されています。

参照元 build_hash_table()hash_search().

OFF_T last_match [static]

match.c67 行で定義されています。

参照元 hash_search()match_sums()matched().


rsyncに対してSat Dec 5 19:45:44 2009に生成されました。  doxygen 1.4.7