関数 | |
static void | build_hash_table (struct sum_struct *s) |
static void | matched (int f, struct sum_struct *s, struct map_struct *buf, OFF_T offset, int32 i) |
Transmit a literal and/or match token. | |
static void | hash_search (int f, struct sum_struct *s, struct map_struct *buf, OFF_T len) |
void | match_sums (int f, struct sum_struct *s, struct map_struct *buf, OFF_T len) |
Scan through a origin file, looking for sections that match checksums from the generator, and transmit either literal or token data. | |
void | match_report (void) |
変数 | |
int | verbose |
int | do_progress |
int | checksum_seed |
int | append_mode |
int | updating_basis_file |
static int | false_alarms |
static int | hash_hits |
static int | matches |
static int64 | data_transfer |
static int | total_false_alarms |
static int | total_hash_hits |
static int | total_matches |
stats | stats |
static int32 * | hash_table |
static OFF_T | last_match |
static void build_hash_table | ( | struct sum_struct * | s | ) | [static] |
参照先 sum_buf::chain・sum_struct::count・hash_table・out_of_memory()・sum_buf::sum1・sum_struct::sums.
参照元 match_sums().
00048 { 00049 int32 i; 00050 00051 if (!hash_table) { 00052 hash_table = new_array(int32, TABLESIZE); 00053 if (!hash_table) 00054 out_of_memory("build_hash_table"); 00055 } 00056 00057 memset(hash_table, 0xFF, TABLESIZE * sizeof hash_table[0]); 00058 00059 for (i = 0; i < s->count; i++) { 00060 uint32 t = SUM2HASH(s->sums[i].sum1); 00061 s->sums[i].chain = hash_table[t]; 00062 hash_table[t] = i; 00063 } 00064 }
static void matched | ( | int | f, | |
struct sum_struct * | s, | |||
struct map_struct * | buf, | |||
OFF_T | offset, | |||
int32 | i | |||
) | [static] |
Transmit a literal and/or match token.
This delightfully-named function is called either when we find a match and need to transmit all the unmatched data leading up to it, or when we get bored of accumulating literal data and just need to transmit it. As a result of this second case, it is called even if we have not matched at all!
i | If >0, the number of a matched token. If 0, indicates we have only literal data. |
参照先 buf・data_transfer・do_progress・FINFO・last_match・sum_buf::len・map_ptr()・stats::matched_data・rprintf()・send_token()・show_progress()・stats・sum_update()・sum_struct::sums・verbose.
参照元 dowild()・hash_search()・run_test().
00084 { 00085 int32 n = offset - last_match; /* max value: block_size (int32) */ 00086 int32 j; 00087 00088 if (verbose > 2 && i >= 0) { 00089 rprintf(FINFO, 00090 "match at %.0f last_match=%.0f j=%d len=%ld n=%ld\n", 00091 (double)offset, (double)last_match, i, 00092 (long)s->sums[i].len, (long)n); 00093 } 00094 00095 send_token(f, i, buf, last_match, n, i < 0 ? 0 : s->sums[i].len); 00096 data_transfer += n; 00097 00098 if (i >= 0) { 00099 stats.matched_data += s->sums[i].len; 00100 n += s->sums[i].len; 00101 } 00102 00103 for (j = 0; j < n; j += CHUNK_SIZE) { 00104 int32 n1 = MIN(CHUNK_SIZE, n - j); 00105 sum_update(map_ptr(buf, last_match + j, n1), n1); 00106 } 00107 00108 if (i >= 0) 00109 last_match = offset + s->sums[i].len; 00110 else 00111 last_match = offset; 00112 00113 if (buf && do_progress) 00114 show_progress(last_match, buf->file_size); 00115 }
static void hash_search | ( | int | f, | |
struct sum_struct * | s, | |||
struct map_struct * | buf, | |||
OFF_T | len | |||
) | [static] |
参照先 sum_struct::blength・buf・sum_buf::chain・sum_struct::count・false_alarms・FINFO・sum_buf::flags・get_checksum1()・get_checksum2()・hash_hits・hash_table・last_match・sum_buf::len・map_ptr()・matched()・matches・sum_buf::offset・rprintf()・sum_struct::s2length・sum_buf::sum1・sum_buf::sum2・sum_struct::sums・updating_basis_file・verbose.
参照元 match_sums().
00120 { 00121 OFF_T offset, end, backup; 00122 int32 k, want_i; 00123 char sum2[SUM_LENGTH]; 00124 uint32 s1, s2, sum; 00125 int more; 00126 schar *map; 00127 00128 /* want_i is used to encourage adjacent matches, allowing the RLL 00129 * coding of the output to work more efficiently. */ 00130 want_i = 0; 00131 00132 if (verbose > 2) { 00133 rprintf(FINFO, "hash search b=%ld len=%.0f\n", 00134 (long)s->blength, (double)len); 00135 } 00136 00137 k = (int32)MIN(len, (OFF_T)s->blength); 00138 00139 map = (schar *)map_ptr(buf, 0, k); 00140 00141 sum = get_checksum1((char *)map, k); 00142 s1 = sum & 0xFFFF; 00143 s2 = sum >> 16; 00144 if (verbose > 3) 00145 rprintf(FINFO, "sum=%.8x k=%ld\n", sum, (long)k); 00146 00147 offset = 0; 00148 00149 end = len + 1 - s->sums[s->count-1].len; 00150 00151 if (verbose > 3) { 00152 rprintf(FINFO, "hash search s->blength=%ld len=%.0f count=%.0f\n", 00153 (long)s->blength, (double)len, (double)s->count); 00154 } 00155 00156 do { 00157 int done_csum2 = 0; 00158 int32 i; 00159 00160 if (verbose > 4) { 00161 rprintf(FINFO, "offset=%.0f sum=%04x%04x\n", 00162 (double)offset, s2 & 0xFFFF, s1 & 0xFFFF); 00163 } 00164 00165 i = hash_table[SUM2HASH2(s1,s2)]; 00166 if (i < 0) 00167 goto null_hash; 00168 00169 sum = (s1 & 0xffff) | (s2 << 16); 00170 hash_hits++; 00171 do { 00172 int32 l; 00173 00174 if (sum != s->sums[i].sum1) 00175 continue; 00176 00177 /* also make sure the two blocks are the same length */ 00178 l = (int32)MIN((OFF_T)s->blength, len-offset); 00179 if (l != s->sums[i].len) 00180 continue; 00181 00182 /* in-place: ensure chunk's offset is either >= our 00183 * offset or that the data didn't move. */ 00184 if (updating_basis_file && s->sums[i].offset < offset 00185 && !(s->sums[i].flags & SUMFLG_SAME_OFFSET)) 00186 continue; 00187 00188 if (verbose > 3) { 00189 rprintf(FINFO, 00190 "potential match at %.0f i=%ld sum=%08x\n", 00191 (double)offset, (long)i, sum); 00192 } 00193 00194 if (!done_csum2) { 00195 map = (schar *)map_ptr(buf,offset,l); 00196 get_checksum2((char *)map,l,sum2); 00197 done_csum2 = 1; 00198 } 00199 00200 if (memcmp(sum2,s->sums[i].sum2,s->s2length) != 0) { 00201 false_alarms++; 00202 continue; 00203 } 00204 00205 /* When updating in-place, the best possible match is 00206 * one with an identical offset, so we prefer that over 00207 * the following want_i optimization. */ 00208 if (updating_basis_file) { 00209 int32 i2; 00210 for (i2 = i; i2 >= 0; i2 = s->sums[i2].chain) { 00211 if (s->sums[i2].offset != offset) 00212 continue; 00213 if (i2 != i) { 00214 if (sum != s->sums[i2].sum1) 00215 break; 00216 if (memcmp(sum2, s->sums[i2].sum2, 00217 s->s2length) != 0) 00218 break; 00219 i = i2; 00220 } 00221 /* This chunk was at the same offset on 00222 * both the sender and the receiver. */ 00223 s->sums[i].flags |= SUMFLG_SAME_OFFSET; 00224 goto set_want_i; 00225 } 00226 } 00227 00228 /* we've found a match, but now check to see 00229 * if want_i can hint at a better match. */ 00230 if (i != want_i && want_i < s->count 00231 && (!updating_basis_file || s->sums[want_i].offset >= offset 00232 || s->sums[want_i].flags & SUMFLG_SAME_OFFSET) 00233 && sum == s->sums[want_i].sum1 00234 && memcmp(sum2, s->sums[want_i].sum2, s->s2length) == 0) { 00235 /* we've found an adjacent match - the RLL coder 00236 * will be happy */ 00237 i = want_i; 00238 } 00239 set_want_i: 00240 want_i = i + 1; 00241 00242 matched(f,s,buf,offset,i); 00243 offset += s->sums[i].len - 1; 00244 k = (int32)MIN((OFF_T)s->blength, len-offset); 00245 map = (schar *)map_ptr(buf, offset, k); 00246 sum = get_checksum1((char *)map, k); 00247 s1 = sum & 0xFFFF; 00248 s2 = sum >> 16; 00249 matches++; 00250 break; 00251 } while ((i = s->sums[i].chain) >= 0); 00252 00253 null_hash: 00254 backup = offset - last_match; 00255 /* We sometimes read 1 byte prior to last_match... */ 00256 if (backup < 0) 00257 backup = 0; 00258 00259 /* Trim off the first byte from the checksum */ 00260 more = offset + k < len; 00261 map = (schar *)map_ptr(buf, offset - backup, k + more + backup) 00262 + backup; 00263 s1 -= map[0] + CHAR_OFFSET; 00264 s2 -= k * (map[0]+CHAR_OFFSET); 00265 00266 /* Add on the next byte (if there is one) to the checksum */ 00267 if (more) { 00268 s1 += map[k] + CHAR_OFFSET; 00269 s2 += s1; 00270 } else 00271 --k; 00272 00273 /* By matching early we avoid re-reading the 00274 data 3 times in the case where a token 00275 match comes a long way after last 00276 match. The 3 reads are caused by the 00277 running match, the checksum update and the 00278 literal send. */ 00279 if (backup >= s->blength+CHUNK_SIZE && end-offset > CHUNK_SIZE) 00280 matched(f, s, buf, offset - s->blength, -2); 00281 } while (++offset < end); 00282 00283 matched(f, s, buf, len, -1); 00284 map_ptr(buf, len-1, 1); 00285 }
void match_sums | ( | int | f, | |
struct sum_struct * | s, | |||
struct map_struct * | buf, | |||
OFF_T | len | |||
) |
Scan through a origin file, looking for sections that match checksums from the generator, and transmit either literal or token data.
Also calculates the MD4 checksum of the whole file, using the md accumulator. This is transmitted with the file as protection against corruption on the wire.
s | Checksums received from the generator. If s->count == 0 , then there are actually no checksums for this file. | |
len | Length of the file to send. |
参照先 append_mode・buf・build_hash_table()・checksum_seed・sum_struct::count・data_transfer・do_progress・false_alarms・FINFO・sum_struct::flength・hash_hits・hash_search()・last_match・map_ptr()・matches・rprintf()・show_progress()・sum_init()・sum_update()・verbose.
参照元 send_files().
00303 { 00304 char file_sum[MD4_SUM_LENGTH]; 00305 00306 last_match = 0; 00307 false_alarms = 0; 00308 hash_hits = 0; 00309 matches = 0; 00310 data_transfer = 0; 00311 00312 sum_init(checksum_seed); 00313 00314 if (append_mode) { 00315 OFF_T j = 0; 00316 for (j = CHUNK_SIZE; j < s->flength; j += CHUNK_SIZE) { 00317 if (buf && do_progress) 00318 show_progress(last_match, buf->file_size); 00319 sum_update(map_ptr(buf, last_match, CHUNK_SIZE), 00320 CHUNK_SIZE); 00321 last_match = j; 00322 } 00323 if (last_match < s->flength) { 00324 int32 len = s->flength - last_match; 00325 if (buf && do_progress) 00326 show_progress(last_match, buf->file_size); 00327 sum_update(map_ptr(buf, last_match, len), len); 00328 last_match = s->flength; 00329 } 00330 s->count = 0; 00331 } 00332 00333 if (len > 0 && s->count > 0) { 00334 build_hash_table(s); 00335 00336 if (verbose > 2) 00337 rprintf(FINFO,"built hash table\n"); 00338 00339 hash_search(f,s,buf,len); 00340 00341 if (verbose > 2) 00342 rprintf(FINFO,"done hash search\n"); 00343 } else { 00344 OFF_T j; 00345 /* by doing this in pieces we avoid too many seeks */ 00346 for (j = last_match + CHUNK_SIZE; j < len; j += CHUNK_SIZE) 00347 matched(f, s, buf, j, -2); 00348 matched(f, s, buf, len, -1); 00349 } 00350 00351 sum_end(file_sum); 00352 /* If we had a read error, send a bad checksum. */ 00353 if (buf && buf->status != 0) 00354 file_sum[0]++; 00355 00356 if (verbose > 2) 00357 rprintf(FINFO,"sending file_sum\n"); 00358 write_buf(f,file_sum,MD4_SUM_LENGTH); 00359 00360 if (verbose > 2) 00361 rprintf(FINFO, "false_alarms=%d hash_hits=%d matches=%d\n", 00362 false_alarms, hash_hits, matches); 00363 00364 total_hash_hits += hash_hits; 00365 total_false_alarms += false_alarms; 00366 total_matches += matches; 00367 stats.literal_data += data_transfer; 00368 }
void match_report | ( | void | ) |
参照先 FINFO・stats::literal_data・rprintf()・stats・total_false_alarms・total_hash_hits・total_matches・verbose.
参照元 send_files().
00371 { 00372 if (verbose <= 1) 00373 return; 00374 00375 rprintf(FINFO, 00376 "total: matches=%d hash_hits=%d false_alarms=%d data=%.0f\n", 00377 total_matches, total_hash_hits, total_false_alarms, 00378 (double)stats.literal_data); 00379 }
int do_progress |
int checksum_seed |
int append_mode |
int false_alarms [static] |
int hash_hits [static] |
int matches [static] |
int64 data_transfer [static] |
int total_false_alarms [static] |
int total_hash_hits [static] |
int total_matches [static] |
int32* hash_table [static] |
OFF_T last_match [static] |