Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | Copyright 2020 Google LLC |
3 | | |
4 | | Use of this source code is governed by a BSD-style |
5 | | license that can be found in the LICENSE file or at |
6 | | https://developers.google.com/open-source/licenses/bsd |
7 | | */ |
8 | | |
9 | | #include "block.h" |
10 | | |
11 | | #include "system.h" |
12 | | |
13 | | #include "constants.h" |
14 | | #include "record.h" |
15 | | #include "reftable.h" |
16 | | #include "zlib.h" |
17 | | |
18 | | int header_size(int version) |
19 | 4.30k | { |
20 | 4.30k | switch (version) { |
21 | 4.30k | case 1: |
22 | 3.50k | return 24; |
23 | 4.30k | case 2: |
24 | 798 | return 28; |
25 | 0 | } |
26 | 0 | abort(); |
27 | 0 | } |
28 | | |
29 | | int footer_size(int version) |
30 | 1.25k | { |
31 | 1.25k | switch (version) { |
32 | 1.25k | case 1: |
33 | 1.25k | return 68; |
34 | 1.25k | case 2: |
35 | 4 | return 72; |
36 | 0 | } |
37 | 0 | abort(); |
38 | 0 | } |
39 | | |
40 | | int block_writer_register_restart(struct block_writer *w, int n, bool restart, |
41 | | struct slice key); |
42 | | |
43 | | void block_writer_init(struct block_writer *bw, byte typ, byte *buf, |
44 | | uint32_t block_size, uint32_t header_off, int hash_size) |
45 | 454 | { |
46 | 454 | bw->buf = buf; |
47 | 454 | bw->hash_size = hash_size; |
48 | 454 | bw->block_size = block_size; |
49 | 454 | bw->header_off = header_off; |
50 | 454 | bw->buf[header_off] = typ; |
51 | 454 | bw->next = header_off + 4; |
52 | 454 | bw->restart_interval = 16; |
53 | 454 | bw->entries = 0; |
54 | 454 | bw->restart_len = 0; |
55 | 454 | bw->last_key.len = 0; |
56 | 454 | } |
57 | | |
58 | | byte block_writer_type(struct block_writer *bw) |
59 | 2.99k | { |
60 | 2.99k | return bw->buf[bw->header_off]; |
61 | 2.99k | } |
62 | | |
63 | | /* adds the record to the block. Returns -1 if it does not fit, 0 on |
64 | | success */ |
65 | | int block_writer_add(struct block_writer *w, struct record rec) |
66 | 1.91k | { |
67 | 1.91k | struct slice empty = { 0 }; |
68 | 1.91k | struct slice last = w->entries % w->restart_interval == 0 ? empty : |
69 | 1.91k | w->last_key; |
70 | 1.91k | struct slice out = { |
71 | 1.91k | .buf = w->buf + w->next, |
72 | 1.91k | .len = w->block_size - w->next, |
73 | 1.91k | }; |
74 | 1.91k | |
75 | 1.91k | struct slice start = out; |
76 | 1.91k | |
77 | 1.91k | bool restart = false; |
78 | 1.91k | struct slice key = { 0 }; |
79 | 1.91k | int n = 0; |
80 | 1.91k | |
81 | 1.91k | record_key(rec, &key); |
82 | 1.91k | n = encode_key(&restart, out, last, key, record_val_type(rec)); |
83 | 1.91k | if (n < 0) { |
84 | 43 | goto err; |
85 | 43 | } |
86 | 1.87k | slice_consume(&out, n); |
87 | 1.87k | |
88 | 1.87k | n = record_encode(rec, out, w->hash_size); |
89 | 1.87k | if (n < 0) { |
90 | 118 | goto err; |
91 | 118 | } |
92 | 1.75k | slice_consume(&out, n); |
93 | 1.75k | |
94 | 1.75k | if (block_writer_register_restart(w, start.len - out.len, restart, |
95 | 1.75k | key) < 0) { |
96 | 5 | goto err; |
97 | 5 | } |
98 | 1.75k | |
99 | 1.75k | slice_clear(&key); |
100 | 1.75k | return 0; |
101 | 166 | |
102 | 166 | err: |
103 | 166 | slice_clear(&key); |
104 | 166 | return -1; |
105 | 1.75k | } |
106 | | |
107 | | int block_writer_register_restart(struct block_writer *w, int n, bool restart, |
108 | | struct slice key) |
109 | 1.75k | { |
110 | 1.75k | int rlen = w->restart_len; |
111 | 1.75k | if (rlen >= MAX_RESTARTS) { |
112 | 0 | restart = false; |
113 | 0 | } |
114 | 1.75k | |
115 | 1.75k | if (restart) { |
116 | 702 | rlen++; |
117 | 702 | } |
118 | 1.75k | if (2 + 3 * rlen + n > w->block_size - w->next) { |
119 | 5 | return -1; |
120 | 5 | } |
121 | 1.75k | if (restart) { |
122 | 697 | if (w->restart_len == w->restart_cap) { |
123 | 260 | w->restart_cap = w->restart_cap * 2 + 1; |
124 | 260 | w->restarts = reftable_realloc( |
125 | 260 | w->restarts, sizeof(uint32_t) * w->restart_cap); |
126 | 260 | } |
127 | 697 | |
128 | 697 | w->restarts[w->restart_len++] = w->next; |
129 | 697 | } |
130 | 1.75k | |
131 | 1.75k | w->next += n; |
132 | 1.75k | slice_copy(&w->last_key, key); |
133 | 1.75k | w->entries++; |
134 | 1.75k | return 0; |
135 | 1.75k | } |
136 | | |
137 | | int block_writer_finish(struct block_writer *w) |
138 | 408 | { |
139 | 408 | int i = 0; |
140 | 1.10k | for (i = 0; i < w->restart_len; i++) { |
141 | 697 | put_be24(w->buf + w->next, w->restarts[i]); |
142 | 697 | w->next += 3; |
143 | 697 | } |
144 | 408 | |
145 | 408 | put_be16(w->buf + w->next, w->restart_len); |
146 | 408 | w->next += 2; |
147 | 408 | put_be24(w->buf + 1 + w->header_off, w->next); |
148 | 408 | |
149 | 408 | if (block_writer_type(w) == BLOCK_TYPE_LOG) { |
150 | 133 | int block_header_skip = 4 + w->header_off; |
151 | 133 | struct slice compressed = { 0 }; |
152 | 133 | int zresult = 0; |
153 | 133 | uLongf src_len = w->next - block_header_skip; |
154 | 133 | slice_resize(&compressed, src_len); |
155 | 133 | |
156 | 135 | while (1) { |
157 | 135 | uLongf dest_len = compressed.len; |
158 | 135 | |
159 | 135 | zresult = compress2(compressed.buf, &dest_len, |
160 | 135 | w->buf + block_header_skip, src_len, |
161 | 135 | 9); |
162 | 135 | if (zresult == Z_BUF_ERROR) { |
163 | 2 | slice_resize(&compressed, 2 * compressed.len); |
164 | 2 | continue; |
165 | 2 | } |
166 | 133 | |
167 | 133 | if (Z_OK != zresult) { |
168 | 0 | slice_clear(&compressed); |
169 | 0 | return REFTABLE_ZLIB_ERROR; |
170 | 0 | } |
171 | 133 | |
172 | 133 | memcpy(w->buf + block_header_skip, compressed.buf, |
173 | 133 | dest_len); |
174 | 133 | w->next = dest_len + block_header_skip; |
175 | 133 | slice_clear(&compressed); |
176 | 133 | break; |
177 | 133 | } |
178 | 133 | } |
179 | 408 | return w->next; |
180 | 408 | } |
181 | | |
182 | | byte block_reader_type(struct block_reader *r) |
183 | 2.34k | { |
184 | 2.34k | return r->block.data[r->header_off]; |
185 | 2.34k | } |
186 | | |
187 | | int block_reader_init(struct block_reader *br, struct reftable_block *block, |
188 | | uint32_t header_off, uint32_t table_block_size, |
189 | | int hash_size) |
190 | 1.59k | { |
191 | 1.59k | uint32_t full_block_size = table_block_size; |
192 | 1.59k | byte typ = block->data[header_off]; |
193 | 1.59k | uint32_t sz = get_be24(block->data + header_off + 1); |
194 | 1.59k | |
195 | 1.59k | if (!is_block_type(typ)) { |
196 | 0 | return REFTABLE_FORMAT_ERROR; |
197 | 0 | } |
198 | 1.59k | |
199 | 1.59k | if (typ == BLOCK_TYPE_LOG) { |
200 | 45 | struct slice uncompressed = { 0 }; |
201 | 45 | int block_header_skip = 4 + header_off; |
202 | 45 | uLongf dst_len = sz - block_header_skip; /* total size of dest |
203 | 45 | buffer. */ |
204 | 45 | uLongf src_len = block->len - block_header_skip; |
205 | 45 | |
206 | 45 | /* Log blocks specify the *uncompressed* size in their header. |
207 | 45 | */ |
208 | 45 | slice_resize(&uncompressed, sz); |
209 | 45 | |
210 | 45 | /* Copy over the block header verbatim. It's not compressed. */ |
211 | 45 | memcpy(uncompressed.buf, block->data, block_header_skip); |
212 | 45 | |
213 | 45 | /* Uncompress */ |
214 | 45 | if (Z_OK != uncompress_return_consumed( |
215 | 45 | uncompressed.buf + block_header_skip, |
216 | 45 | &dst_len, block->data + block_header_skip, |
217 | 45 | &src_len)) { |
218 | 0 | slice_clear(&uncompressed); |
219 | 0 | return REFTABLE_ZLIB_ERROR; |
220 | 0 | } |
221 | 45 | |
222 | 45 | if (dst_len + block_header_skip != sz) { |
223 | 0 | return REFTABLE_FORMAT_ERROR; |
224 | 0 | } |
225 | 45 | |
226 | 45 | /* We're done with the input data. */ |
227 | 45 | reftable_block_done(block); |
228 | 45 | block->data = uncompressed.buf; |
229 | 45 | block->len = sz; |
230 | 45 | block->source = malloc_block_source(); |
231 | 45 | full_block_size = src_len + block_header_skip; |
232 | 1.54k | } else if (full_block_size == 0) { |
233 | 0 | full_block_size = sz; |
234 | 1.54k | } else if (sz < full_block_size && sz < block->len && |
235 | 1.54k | block->data[sz] != 0) { |
236 | 3 | /* If the block is smaller than the full block size, it is |
237 | 3 | padded (data followed by '\0') or the next block is |
238 | 3 | unaligned. */ |
239 | 3 | full_block_size = sz; |
240 | 3 | } |
241 | 1.59k | |
242 | 1.59k | { |
243 | 1.59k | uint16_t restart_count = get_be16(block->data + sz - 2); |
244 | 1.59k | uint32_t restart_start = sz - 2 - 3 * restart_count; |
245 | 1.59k | |
246 | 1.59k | byte *restart_bytes = block->data + restart_start; |
247 | 1.59k | |
248 | 1.59k | /* transfer ownership. */ |
249 | 1.59k | br->block = *block; |
250 | 1.59k | block->data = NULL; |
251 | 1.59k | block->len = 0; |
252 | 1.59k | |
253 | 1.59k | br->hash_size = hash_size; |
254 | 1.59k | br->block_len = restart_start; |
255 | 1.59k | br->full_block_size = full_block_size; |
256 | 1.59k | br->header_off = header_off; |
257 | 1.59k | br->restart_count = restart_count; |
258 | 1.59k | br->restart_bytes = restart_bytes; |
259 | 1.59k | } |
260 | 1.59k | |
261 | 1.59k | return 0; |
262 | 1.59k | } |
263 | | |
264 | | static uint32_t block_reader_restart_offset(struct block_reader *br, int i) |
265 | 1.63k | { |
266 | 1.63k | return get_be24(br->restart_bytes + 3 * i); |
267 | 1.63k | } |
268 | | |
269 | | void block_reader_start(struct block_reader *br, struct block_iter *it) |
270 | 1.59k | { |
271 | 1.59k | it->br = br; |
272 | 1.59k | slice_resize(&it->last_key, 0); |
273 | 1.59k | it->next_off = br->header_off + 4; |
274 | 1.59k | } |
275 | | |
276 | | struct restart_find_args { |
277 | | int error; |
278 | | struct slice key; |
279 | | struct block_reader *r; |
280 | | }; |
281 | | |
282 | | static int restart_key_less(size_t idx, void *args) |
283 | 1.59k | { |
284 | 1.59k | struct restart_find_args *a = (struct restart_find_args *)args; |
285 | 1.59k | uint32_t off = block_reader_restart_offset(a->r, idx); |
286 | 1.59k | struct slice in = { |
287 | 1.59k | .buf = a->r->block.data + off, |
288 | 1.59k | .len = a->r->block_len - off, |
289 | 1.59k | }; |
290 | 1.59k | |
291 | 1.59k | /* the restart key is verbatim in the block, so this could avoid the |
292 | 1.59k | alloc for decoding the key */ |
293 | 1.59k | struct slice rkey = { 0 }; |
294 | 1.59k | struct slice last_key = { 0 }; |
295 | 1.59k | byte unused_extra; |
296 | 1.59k | int n = decode_key(&rkey, &unused_extra, last_key, in); |
297 | 1.59k | if (n < 0) { |
298 | 0 | a->error = 1; |
299 | 0 | return -1; |
300 | 0 | } |
301 | 1.59k | |
302 | 1.59k | { |
303 | 1.59k | int result = slice_compare(a->key, rkey); |
304 | 1.59k | slice_clear(&rkey); |
305 | 1.59k | return result; |
306 | 1.59k | } |
307 | 1.59k | } |
308 | | |
309 | | void block_iter_copy_from(struct block_iter *dest, struct block_iter *src) |
310 | 14.2k | { |
311 | 14.2k | dest->br = src->br; |
312 | 14.2k | dest->next_off = src->next_off; |
313 | 14.2k | slice_copy(&dest->last_key, src->last_key); |
314 | 14.2k | } |
315 | | |
316 | | int block_iter_next(struct block_iter *it, struct record rec) |
317 | 9.66k | { |
318 | 9.66k | struct slice in = { |
319 | 9.66k | .buf = it->br->block.data + it->next_off, |
320 | 9.66k | .len = it->br->block_len - it->next_off, |
321 | 9.66k | }; |
322 | 9.66k | struct slice start = in; |
323 | 9.66k | struct slice key = { 0 }; |
324 | 9.66k | byte extra = 0; |
325 | 9.66k | int n = 0; |
326 | 9.66k | |
327 | 9.66k | if (it->next_off >= it->br->block_len) { |
328 | 965 | return 1; |
329 | 965 | } |
330 | 8.70k | |
331 | 8.70k | n = decode_key(&key, &extra, it->last_key, in); |
332 | 8.70k | if (n < 0) { |
333 | 0 | return -1; |
334 | 0 | } |
335 | 8.70k | |
336 | 8.70k | slice_consume(&in, n); |
337 | 8.70k | n = record_decode(rec, key, extra, in, it->br->hash_size); |
338 | 8.70k | if (n < 0) { |
339 | 0 | return -1; |
340 | 0 | } |
341 | 8.70k | slice_consume(&in, n); |
342 | 8.70k | |
343 | 8.70k | slice_copy(&it->last_key, key); |
344 | 8.70k | it->next_off += start.len - in.len; |
345 | 8.70k | slice_clear(&key); |
346 | 8.70k | return 0; |
347 | 8.70k | } |
348 | | |
349 | | int block_reader_first_key(struct block_reader *br, struct slice *key) |
350 | 418 | { |
351 | 418 | struct slice empty = { 0 }; |
352 | 418 | int off = br->header_off + 4; |
353 | 418 | struct slice in = { |
354 | 418 | .buf = br->block.data + off, |
355 | 418 | .len = br->block_len - off, |
356 | 418 | }; |
357 | 418 | |
358 | 418 | byte extra = 0; |
359 | 418 | int n = decode_key(key, &extra, empty, in); |
360 | 418 | if (n < 0) { |
361 | 0 | return n; |
362 | 0 | } |
363 | 418 | return 0; |
364 | 418 | } |
365 | | |
366 | | int block_iter_seek(struct block_iter *it, struct slice want) |
367 | 1.14k | { |
368 | 1.14k | return block_reader_seek(it->br, it, want); |
369 | 1.14k | } |
370 | | |
371 | | void block_iter_close(struct block_iter *it) |
372 | 2.34k | { |
373 | 2.34k | slice_clear(&it->last_key); |
374 | 2.34k | } |
375 | | |
376 | | int block_reader_seek(struct block_reader *br, struct block_iter *it, |
377 | | struct slice want) |
378 | 1.20k | { |
379 | 1.20k | struct restart_find_args args = { |
380 | 1.20k | .key = want, |
381 | 1.20k | .r = br, |
382 | 1.20k | }; |
383 | 1.20k | struct record rec = new_record(block_reader_type(br)); |
384 | 1.20k | struct slice key = { 0 }; |
385 | 1.20k | int err = 0; |
386 | 1.20k | struct block_iter next = { 0 }; |
387 | 1.20k | |
388 | 1.20k | int i = binsearch(br->restart_count, &restart_key_less, &args); |
389 | 1.20k | if (args.error) { |
390 | 0 | err = REFTABLE_FORMAT_ERROR; |
391 | 0 | goto exit; |
392 | 0 | } |
393 | 1.20k | |
394 | 1.20k | it->br = br; |
395 | 1.20k | if (i > 0) { |
396 | 34 | i--; |
397 | 34 | it->next_off = block_reader_restart_offset(br, i); |
398 | 1.17k | } else { |
399 | 1.17k | it->next_off = br->header_off + 4; |
400 | 1.17k | } |
401 | 1.20k | |
402 | 1.20k | /* We're looking for the last entry less/equal than the wanted key, so |
403 | 1.20k | we have to go one entry too far and then back up. |
404 | 1.20k | */ |
405 | 7.51k | while (true) { |
406 | 7.51k | block_iter_copy_from(&next, it); |
407 | 7.51k | err = block_iter_next(&next, rec); |
408 | 7.51k | if (err < 0) { |
409 | 0 | goto exit; |
410 | 0 | } |
411 | 7.51k | |
412 | 7.51k | record_key(rec, &key); |
413 | 7.51k | if (err > 0 || slice_compare(key, want) >= 0) { |
414 | 1.20k | err = 0; |
415 | 1.20k | goto exit; |
416 | 1.20k | } |
417 | 6.30k | |
418 | 6.30k | block_iter_copy_from(it, &next); |
419 | 6.30k | } |
420 | 1.20k | |
421 | 1.20k | exit: |
422 | 1.20k | slice_clear(&key); |
423 | 1.20k | slice_clear(&next.last_key); |
424 | 1.20k | record_destroy(&rec); |
425 | 1.20k | |
426 | 1.20k | return err; |
427 | 1.20k | } |
428 | | |
429 | | void block_writer_clear(struct block_writer *bw) |
430 | 218 | { |
431 | 218 | FREE_AND_NULL(bw->restarts); |
432 | 218 | slice_clear(&bw->last_key); |
433 | 218 | /* the block is not owned. */ |
434 | 218 | } |