00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include "rsync.h"
00021
00022 extern int verbose;
00023 extern int do_progress;
00024 extern int checksum_seed;
00025 extern int append_mode;
00026
00027 int updating_basis_file;
00028
00029 static int false_alarms;
00030 static int hash_hits;
00031 static int matches;
00032 static int64 data_transfer;
00033
00034 static int total_false_alarms;
00035 static int total_hash_hits;
00036 static int total_matches;
00037
00038 extern struct stats stats;
00039
00040 #define TABLESIZE (1<<16)
00041
00042 static int32 *hash_table;
00043
00044 #define SUM2HASH2(s1,s2) (((s1) + (s2)) & 0xFFFF)
00045 #define SUM2HASH(sum) SUM2HASH2((sum)&0xFFFF,(sum)>>16)
00046
00047 static void build_hash_table(struct sum_struct *s)
00048 {
00049 int32 i;
00050
00051 if (!hash_table) {
00052 hash_table = new_array(int32, TABLESIZE);
00053 if (!hash_table)
00054 out_of_memory("build_hash_table");
00055 }
00056
00057 memset(hash_table, 0xFF, TABLESIZE * sizeof hash_table[0]);
00058
00059 for (i = 0; i < s->count; i++) {
00060 uint32 t = SUM2HASH(s->sums[i].sum1);
00061 s->sums[i].chain = hash_table[t];
00062 hash_table[t] = i;
00063 }
00064 }
00065
00066
00067 static OFF_T last_match;
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082 static void matched(int f, struct sum_struct *s, struct map_struct *buf,
00083 OFF_T offset, int32 i)
00084 {
00085 int32 n = offset - last_match;
00086 int32 j;
00087
00088 if (verbose > 2 && i >= 0) {
00089 rprintf(FINFO,
00090 "match at %.0f last_match=%.0f j=%d len=%ld n=%ld\n",
00091 (double)offset, (double)last_match, i,
00092 (long)s->sums[i].len, (long)n);
00093 }
00094
00095 send_token(f, i, buf, last_match, n, i < 0 ? 0 : s->sums[i].len);
00096 data_transfer += n;
00097
00098 if (i >= 0) {
00099 stats.matched_data += s->sums[i].len;
00100 n += s->sums[i].len;
00101 }
00102
00103 for (j = 0; j < n; j += CHUNK_SIZE) {
00104 int32 n1 = MIN(CHUNK_SIZE, n - j);
00105 sum_update(map_ptr(buf, last_match + j, n1), n1);
00106 }
00107
00108 if (i >= 0)
00109 last_match = offset + s->sums[i].len;
00110 else
00111 last_match = offset;
00112
00113 if (buf && do_progress)
00114 show_progress(last_match, buf->file_size);
00115 }
00116
00117
00118 static void hash_search(int f,struct sum_struct *s,
00119 struct map_struct *buf, OFF_T len)
00120 {
00121 OFF_T offset, end, backup;
00122 int32 k, want_i;
00123 char sum2[SUM_LENGTH];
00124 uint32 s1, s2, sum;
00125 int more;
00126 schar *map;
00127
00128
00129
00130 want_i = 0;
00131
00132 if (verbose > 2) {
00133 rprintf(FINFO, "hash search b=%ld len=%.0f\n",
00134 (long)s->blength, (double)len);
00135 }
00136
00137 k = (int32)MIN(len, (OFF_T)s->blength);
00138
00139 map = (schar *)map_ptr(buf, 0, k);
00140
00141 sum = get_checksum1((char *)map, k);
00142 s1 = sum & 0xFFFF;
00143 s2 = sum >> 16;
00144 if (verbose > 3)
00145 rprintf(FINFO, "sum=%.8x k=%ld\n", sum, (long)k);
00146
00147 offset = 0;
00148
00149 end = len + 1 - s->sums[s->count-1].len;
00150
00151 if (verbose > 3) {
00152 rprintf(FINFO, "hash search s->blength=%ld len=%.0f count=%.0f\n",
00153 (long)s->blength, (double)len, (double)s->count);
00154 }
00155
00156 do {
00157 int done_csum2 = 0;
00158 int32 i;
00159
00160 if (verbose > 4) {
00161 rprintf(FINFO, "offset=%.0f sum=%04x%04x\n",
00162 (double)offset, s2 & 0xFFFF, s1 & 0xFFFF);
00163 }
00164
00165 i = hash_table[SUM2HASH2(s1,s2)];
00166 if (i < 0)
00167 goto null_hash;
00168
00169 sum = (s1 & 0xffff) | (s2 << 16);
00170 hash_hits++;
00171 do {
00172 int32 l;
00173
00174 if (sum != s->sums[i].sum1)
00175 continue;
00176
00177
00178 l = (int32)MIN((OFF_T)s->blength, len-offset);
00179 if (l != s->sums[i].len)
00180 continue;
00181
00182
00183
00184 if (updating_basis_file && s->sums[i].offset < offset
00185 && !(s->sums[i].flags & SUMFLG_SAME_OFFSET))
00186 continue;
00187
00188 if (verbose > 3) {
00189 rprintf(FINFO,
00190 "potential match at %.0f i=%ld sum=%08x\n",
00191 (double)offset, (long)i, sum);
00192 }
00193
00194 if (!done_csum2) {
00195 map = (schar *)map_ptr(buf,offset,l);
00196 get_checksum2((char *)map,l,sum2);
00197 done_csum2 = 1;
00198 }
00199
00200 if (memcmp(sum2,s->sums[i].sum2,s->s2length) != 0) {
00201 false_alarms++;
00202 continue;
00203 }
00204
00205
00206
00207
00208 if (updating_basis_file) {
00209 int32 i2;
00210 for (i2 = i; i2 >= 0; i2 = s->sums[i2].chain) {
00211 if (s->sums[i2].offset != offset)
00212 continue;
00213 if (i2 != i) {
00214 if (sum != s->sums[i2].sum1)
00215 break;
00216 if (memcmp(sum2, s->sums[i2].sum2,
00217 s->s2length) != 0)
00218 break;
00219 i = i2;
00220 }
00221
00222
00223 s->sums[i].flags |= SUMFLG_SAME_OFFSET;
00224 goto set_want_i;
00225 }
00226 }
00227
00228
00229
00230 if (i != want_i && want_i < s->count
00231 && (!updating_basis_file || s->sums[want_i].offset >= offset
00232 || s->sums[want_i].flags & SUMFLG_SAME_OFFSET)
00233 && sum == s->sums[want_i].sum1
00234 && memcmp(sum2, s->sums[want_i].sum2, s->s2length) == 0) {
00235
00236
00237 i = want_i;
00238 }
00239 set_want_i:
00240 want_i = i + 1;
00241
00242 matched(f,s,buf,offset,i);
00243 offset += s->sums[i].len - 1;
00244 k = (int32)MIN((OFF_T)s->blength, len-offset);
00245 map = (schar *)map_ptr(buf, offset, k);
00246 sum = get_checksum1((char *)map, k);
00247 s1 = sum & 0xFFFF;
00248 s2 = sum >> 16;
00249 matches++;
00250 break;
00251 } while ((i = s->sums[i].chain) >= 0);
00252
00253 null_hash:
00254 backup = offset - last_match;
00255
00256 if (backup < 0)
00257 backup = 0;
00258
00259
00260 more = offset + k < len;
00261 map = (schar *)map_ptr(buf, offset - backup, k + more + backup)
00262 + backup;
00263 s1 -= map[0] + CHAR_OFFSET;
00264 s2 -= k * (map[0]+CHAR_OFFSET);
00265
00266
00267 if (more) {
00268 s1 += map[k] + CHAR_OFFSET;
00269 s2 += s1;
00270 } else
00271 --k;
00272
00273
00274
00275
00276
00277
00278
00279 if (backup >= s->blength+CHUNK_SIZE && end-offset > CHUNK_SIZE)
00280 matched(f, s, buf, offset - s->blength, -2);
00281 } while (++offset < end);
00282
00283 matched(f, s, buf, len, -1);
00284 map_ptr(buf, len-1, 1);
00285 }
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302 void match_sums(int f, struct sum_struct *s, struct map_struct *buf, OFF_T len)
00303 {
00304 char file_sum[MD4_SUM_LENGTH];
00305
00306 last_match = 0;
00307 false_alarms = 0;
00308 hash_hits = 0;
00309 matches = 0;
00310 data_transfer = 0;
00311
00312 sum_init(checksum_seed);
00313
00314 if (append_mode) {
00315 OFF_T j = 0;
00316 for (j = CHUNK_SIZE; j < s->flength; j += CHUNK_SIZE) {
00317 if (buf && do_progress)
00318 show_progress(last_match, buf->file_size);
00319 sum_update(map_ptr(buf, last_match, CHUNK_SIZE),
00320 CHUNK_SIZE);
00321 last_match = j;
00322 }
00323 if (last_match < s->flength) {
00324 int32 len = s->flength - last_match;
00325 if (buf && do_progress)
00326 show_progress(last_match, buf->file_size);
00327 sum_update(map_ptr(buf, last_match, len), len);
00328 last_match = s->flength;
00329 }
00330 s->count = 0;
00331 }
00332
00333 if (len > 0 && s->count > 0) {
00334 build_hash_table(s);
00335
00336 if (verbose > 2)
00337 rprintf(FINFO,"built hash table\n");
00338
00339 hash_search(f,s,buf,len);
00340
00341 if (verbose > 2)
00342 rprintf(FINFO,"done hash search\n");
00343 } else {
00344 OFF_T j;
00345
00346 for (j = last_match + CHUNK_SIZE; j < len; j += CHUNK_SIZE)
00347 matched(f, s, buf, j, -2);
00348 matched(f, s, buf, len, -1);
00349 }
00350
00351 sum_end(file_sum);
00352
00353 if (buf && buf->status != 0)
00354 file_sum[0]++;
00355
00356 if (verbose > 2)
00357 rprintf(FINFO,"sending file_sum\n");
00358 write_buf(f,file_sum,MD4_SUM_LENGTH);
00359
00360 if (verbose > 2)
00361 rprintf(FINFO, "false_alarms=%d hash_hits=%d matches=%d\n",
00362 false_alarms, hash_hits, matches);
00363
00364 total_hash_hits += hash_hits;
00365 total_false_alarms += false_alarms;
00366 total_matches += matches;
00367 stats.literal_data += data_transfer;
00368 }
00369
00370 void match_report(void)
00371 {
00372 if (verbose <= 1)
00373 return;
00374
00375 rprintf(FINFO,
00376 "total: matches=%d hash_hits=%d false_alarms=%d data=%.0f\n",
00377 total_matches, total_hash_hits, total_false_alarms,
00378 (double)stats.literal_data);
00379 }