Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * basebackup_incremental.c
4 : : * code for incremental backup support
5 : : *
6 : : * This code isn't actually in charge of taking an incremental backup;
7 : : * the actual construction of the incremental backup happens in
8 : : * basebackup.c. Here, we're concerned with providing the necessary
9 : : * supports for that operation. In particular, we need to parse the
10 : : * backup manifest supplied by the user taking the incremental backup
11 : : * and extract the required information from it.
12 : : *
13 : : * Portions Copyright (c) 2010-2025, PostgreSQL Global Development Group
14 : : *
15 : : * IDENTIFICATION
16 : : * src/backend/backup/basebackup_incremental.c
17 : : *
18 : : *-------------------------------------------------------------------------
19 : : */
20 : : #include "postgres.h"
21 : :
22 : : #include "access/timeline.h"
23 : : #include "access/xlog.h"
24 : : #include "backup/basebackup_incremental.h"
25 : : #include "backup/walsummary.h"
26 : : #include "common/blkreftable.h"
27 : : #include "common/hashfn.h"
28 : : #include "common/int.h"
29 : : #include "common/parse_manifest.h"
30 : : #include "postmaster/walsummarizer.h"
31 : :
32 : : #define BLOCKS_PER_READ 512
33 : :
34 : : /*
35 : : * We expect to find the last lines of the manifest, including the checksum,
36 : : * in the last MIN_CHUNK bytes of the manifest. We trigger an incremental
37 : : * parse step if we are about to overflow MAX_CHUNK bytes.
38 : : */
39 : : #define MIN_CHUNK 1024
40 : : #define MAX_CHUNK (128 * 1024)
41 : :
42 : : /*
43 : : * Details extracted from the WAL ranges present in the supplied backup manifest.
44 : : */
45 : : typedef struct
46 : : {
47 : : TimeLineID tli;
48 : : XLogRecPtr start_lsn;
49 : : XLogRecPtr end_lsn;
50 : : } backup_wal_range;
51 : :
52 : : /*
53 : : * Details extracted from the file list present in the supplied backup manifest.
54 : : */
55 : : typedef struct
56 : : {
57 : : uint32 status;
58 : : const char *path;
59 : : uint64 size;
60 : : } backup_file_entry;
61 : :
62 : : static uint32 hash_string_pointer(const char *s);
63 : : #define SH_PREFIX backup_file
64 : : #define SH_ELEMENT_TYPE backup_file_entry
65 : : #define SH_KEY_TYPE const char *
66 : : #define SH_KEY path
67 : : #define SH_HASH_KEY(tb, key) hash_string_pointer(key)
68 : : #define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
69 : : #define SH_SCOPE static inline
70 : : #define SH_DECLARE
71 : : #define SH_DEFINE
72 : : #include "lib/simplehash.h"
73 : :
74 : : struct IncrementalBackupInfo
75 : : {
76 : : /* Memory context for this object and its subsidiary objects. */
77 : : MemoryContext mcxt;
78 : :
79 : : /* Temporary buffer for storing the manifest while parsing it. */
80 : : StringInfoData buf;
81 : :
82 : : /* WAL ranges extracted from the backup manifest. */
83 : : List *manifest_wal_ranges;
84 : :
85 : : /*
86 : : * Files extracted from the backup manifest.
87 : : *
88 : : * We don't really need this information, because we use WAL summaries to
89 : : * figure out what's changed. It would be unsafe to just rely on the list
90 : : * of files that existed before, because it's possible for a file to be
91 : : * removed and a new one created with the same name and different
92 : : * contents. In such cases, the whole file must still be sent. We can tell
93 : : * from the WAL summaries whether that happened, but not from the file
94 : : * list.
95 : : *
96 : : * Nonetheless, this data is useful for sanity checking. If a file that we
97 : : * think we shouldn't need to send is not present in the manifest for the
98 : : * prior backup, something has gone terribly wrong. We retain the file
99 : : * names and sizes, but not the checksums or last modified times, for
100 : : * which we have no use.
101 : : *
102 : : * One significant downside of storing this data is that it consumes
103 : : * memory. If that turns out to be a problem, we might have to decide not
104 : : * to retain this information, or to make it optional.
105 : : */
106 : : backup_file_hash *manifest_files;
107 : :
108 : : /*
109 : : * Block-reference table for the incremental backup.
110 : : *
111 : : * It's possible that storing the entire block-reference table in memory
112 : : * will be a problem for some users. The in-memory format that we're using
113 : : * here is pretty efficient, converging to little more than 1 bit per
114 : : * block for relation forks with large numbers of modified blocks. It's
115 : : * possible, however, that if you try to perform an incremental backup of
116 : : * a database with a sufficiently large number of relations on a
117 : : * sufficiently small machine, you could run out of memory here. If that
118 : : * turns out to be a problem in practice, we'll need to be more clever.
119 : : */
120 : : BlockRefTable *brtab;
121 : :
122 : : /*
123 : : * State object for incremental JSON parsing
124 : : */
125 : : JsonManifestParseIncrementalState *inc_state;
126 : : };
127 : :
128 : : static void manifest_process_version(JsonManifestParseContext *context,
129 : : int manifest_version);
130 : : static void manifest_process_system_identifier(JsonManifestParseContext *context,
131 : : uint64 manifest_system_identifier);
132 : : static void manifest_process_file(JsonManifestParseContext *context,
133 : : const char *pathname,
134 : : uint64 size,
135 : : pg_checksum_type checksum_type,
136 : : int checksum_length,
137 : : uint8 *checksum_payload);
138 : : static void manifest_process_wal_range(JsonManifestParseContext *context,
139 : : TimeLineID tli,
140 : : XLogRecPtr start_lsn,
141 : : XLogRecPtr end_lsn);
142 : : pg_noreturn static void manifest_report_error(JsonManifestParseContext *context,
143 : : const char *fmt,...)
144 : : pg_attribute_printf(2, 3);
145 : : static int compare_block_numbers(const void *a, const void *b);
146 : :
147 : : /*
148 : : * Create a new object for storing information extracted from the manifest
149 : : * supplied when creating an incremental backup.
150 : : */
151 : : IncrementalBackupInfo *
626 rhaas@postgresql.org 152 :CBC 11 : CreateIncrementalBackupInfo(MemoryContext mcxt)
153 : : {
154 : : IncrementalBackupInfo *ib;
155 : : MemoryContext oldcontext;
156 : : JsonManifestParseContext *context;
157 : :
158 : 11 : oldcontext = MemoryContextSwitchTo(mcxt);
159 : :
160 : 11 : ib = palloc0(sizeof(IncrementalBackupInfo));
161 : 11 : ib->mcxt = mcxt;
162 : 11 : initStringInfo(&ib->buf);
163 : :
164 : : /*
165 : : * It's hard to guess how many files a "typical" installation will have in
166 : : * the data directory, but a fresh initdb creates almost 1000 files as of
167 : : * this writing, so it seems to make sense for our estimate to
168 : : * substantially higher.
169 : : */
170 : 11 : ib->manifest_files = backup_file_create(mcxt, 10000, NULL);
171 : :
544 andrew@dunslane.net 172 : 11 : context = palloc0(sizeof(JsonManifestParseContext));
173 : : /* Parse the manifest. */
174 : 11 : context->private_data = ib;
175 : 11 : context->version_cb = manifest_process_version;
176 : 11 : context->system_identifier_cb = manifest_process_system_identifier;
177 : 11 : context->per_file_cb = manifest_process_file;
178 : 11 : context->per_wal_range_cb = manifest_process_wal_range;
179 : 11 : context->error_cb = manifest_report_error;
180 : :
181 : 11 : ib->inc_state = json_parse_manifest_incremental_init(context);
182 : :
626 rhaas@postgresql.org 183 : 11 : MemoryContextSwitchTo(oldcontext);
184 : :
185 : 11 : return ib;
186 : : }
187 : :
188 : : /*
189 : : * Before taking an incremental backup, the caller must supply the backup
190 : : * manifest from a prior backup. Each chunk of manifest data received
191 : : * from the client should be passed to this function.
192 : : */
193 : : void
194 : 33 : AppendIncrementalManifestData(IncrementalBackupInfo *ib, const char *data,
195 : : int len)
196 : : {
197 : : MemoryContext oldcontext;
198 : :
199 : : /* Switch to our memory context. */
200 : 33 : oldcontext = MemoryContextSwitchTo(ib->mcxt);
201 : :
544 andrew@dunslane.net 202 [ + + + + ]: 33 : if (ib->buf.len > MIN_CHUNK && ib->buf.len + len > MAX_CHUNK)
203 : : {
204 : : /*
205 : : * time for an incremental parse. We'll do all but the last MIN_CHUNK
206 : : * so that we have enough left for the final piece.
207 : : */
284 alvherre@alvh.no-ip. 208 : 11 : json_parse_manifest_incremental_chunk(ib->inc_state, ib->buf.data,
209 : 11 : ib->buf.len - MIN_CHUNK, false);
210 : : /* now remove what we just parsed */
544 andrew@dunslane.net 211 : 10 : memmove(ib->buf.data, ib->buf.data + (ib->buf.len - MIN_CHUNK),
212 : : MIN_CHUNK + 1);
213 : 10 : ib->buf.len = MIN_CHUNK;
214 : : }
215 : :
626 rhaas@postgresql.org 216 : 32 : appendBinaryStringInfo(&ib->buf, data, len);
217 : :
218 : : /* Switch back to previous memory context. */
219 : 32 : MemoryContextSwitchTo(oldcontext);
220 : 32 : }
221 : :
222 : : /*
223 : : * Finalize an IncrementalBackupInfo object after all manifest data has
224 : : * been supplied via calls to AppendIncrementalManifestData.
225 : : */
226 : : void
227 : 10 : FinalizeIncrementalManifest(IncrementalBackupInfo *ib)
228 : : {
229 : : MemoryContext oldcontext;
230 : :
231 : : /* Switch to our memory context. */
232 : 10 : oldcontext = MemoryContextSwitchTo(ib->mcxt);
233 : :
234 : : /* Parse the last chunk of the manifest */
284 alvherre@alvh.no-ip. 235 : 10 : json_parse_manifest_incremental_chunk(ib->inc_state, ib->buf.data,
236 : 10 : ib->buf.len, true);
237 : :
238 : : /* Done with the buffer, so release memory. */
626 rhaas@postgresql.org 239 : 10 : pfree(ib->buf.data);
240 : 10 : ib->buf.data = NULL;
241 : :
242 : : /* Done with inc_state, so release that memory too */
515 andrew@dunslane.net 243 : 10 : json_parse_manifest_incremental_shutdown(ib->inc_state);
244 : :
245 : : /* Switch back to previous memory context. */
626 rhaas@postgresql.org 246 : 10 : MemoryContextSwitchTo(oldcontext);
247 : 10 : }
248 : :
249 : : /*
250 : : * Prepare to take an incremental backup.
251 : : *
252 : : * Before this function is called, AppendIncrementalManifestData and
253 : : * FinalizeIncrementalManifest should have already been called to pass all
254 : : * the manifest data to this object.
255 : : *
256 : : * This function performs sanity checks on the data extracted from the
257 : : * manifest and figures out for which WAL ranges we need summaries, and
258 : : * whether those summaries are available. Then, it reads and combines the
259 : : * data from those summary files. It also updates the backup_state with the
260 : : * reference TLI and LSN for the prior backup.
261 : : */
262 : : void
263 : 10 : PrepareForIncrementalBackup(IncrementalBackupInfo *ib,
264 : : BackupState *backup_state)
265 : : {
266 : : MemoryContext oldcontext;
267 : : List *expectedTLEs;
268 : : List *all_wslist,
269 : 10 : *required_wslist = NIL;
270 : : ListCell *lc;
271 : : TimeLineHistoryEntry **tlep;
272 : : int num_wal_ranges;
273 : : int i;
274 : 10 : bool found_backup_start_tli = false;
275 : 10 : TimeLineID earliest_wal_range_tli = 0;
276 : 10 : XLogRecPtr earliest_wal_range_start_lsn = InvalidXLogRecPtr;
277 : 10 : TimeLineID latest_wal_range_tli = 0;
278 : :
279 [ - + ]: 10 : Assert(ib->buf.data == NULL);
280 : :
281 : : /* Switch to our memory context. */
282 : 10 : oldcontext = MemoryContextSwitchTo(ib->mcxt);
283 : :
284 : : /*
285 : : * A valid backup manifest must always contain at least one WAL range
286 : : * (usually exactly one, unless the backup spanned a timeline switch).
287 : : */
288 : 10 : num_wal_ranges = list_length(ib->manifest_wal_ranges);
289 [ - + ]: 10 : if (num_wal_ranges == 0)
626 rhaas@postgresql.org 290 [ # # ]:UBC 0 : ereport(ERROR,
291 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
292 : : errmsg("manifest contains no required WAL ranges")));
293 : :
294 : : /*
295 : : * Match up the TLIs that appear in the WAL ranges of the backup manifest
296 : : * with those that appear in this server's timeline history. We expect
297 : : * every backup_wal_range to match to a TimeLineHistoryEntry; if it does
298 : : * not, that's an error.
299 : : *
300 : : * This loop also decides which of the WAL ranges is the manifest is most
301 : : * ancient and which one is the newest, according to the timeline history
302 : : * of this server, and stores TLIs of those WAL ranges into
303 : : * earliest_wal_range_tli and latest_wal_range_tli. It also updates
304 : : * earliest_wal_range_start_lsn to the start LSN of the WAL range for
305 : : * earliest_wal_range_tli.
306 : : *
307 : : * Note that the return value of readTimeLineHistory puts the latest
308 : : * timeline at the beginning of the list, not the end. Hence, the earliest
309 : : * TLI is the one that occurs nearest the end of the list returned by
310 : : * readTimeLineHistory, and the latest TLI is the one that occurs closest
311 : : * to the beginning.
312 : : */
626 rhaas@postgresql.org 313 :CBC 10 : expectedTLEs = readTimeLineHistory(backup_state->starttli);
314 : 10 : tlep = palloc0(num_wal_ranges * sizeof(TimeLineHistoryEntry *));
315 [ + + ]: 20 : for (i = 0; i < num_wal_ranges; ++i)
316 : : {
317 : 10 : backup_wal_range *range = list_nth(ib->manifest_wal_ranges, i);
318 : 10 : bool saw_earliest_wal_range_tli = false;
319 : 10 : bool saw_latest_wal_range_tli = false;
320 : :
321 : : /* Search this server's history for this WAL range's TLI. */
322 [ + - + - : 11 : foreach(lc, expectedTLEs)
+ - ]
323 : : {
324 : 11 : TimeLineHistoryEntry *tle = lfirst(lc);
325 : :
326 [ + + ]: 11 : if (tle->tli == range->tli)
327 : : {
328 : 10 : tlep[i] = tle;
329 : 10 : break;
330 : : }
331 : :
332 [ - + ]: 1 : if (tle->tli == earliest_wal_range_tli)
626 rhaas@postgresql.org 333 :UBC 0 : saw_earliest_wal_range_tli = true;
626 rhaas@postgresql.org 334 [ - + ]:CBC 1 : if (tle->tli == latest_wal_range_tli)
626 rhaas@postgresql.org 335 :UBC 0 : saw_latest_wal_range_tli = true;
336 : : }
337 : :
338 : : /*
339 : : * An incremental backup can only be taken relative to a backup that
340 : : * represents a previous state of this server. If the backup requires
341 : : * WAL from a timeline that's not in our history, that definitely
342 : : * isn't the case.
343 : : */
626 rhaas@postgresql.org 344 [ - + ]:CBC 10 : if (tlep[i] == NULL)
626 rhaas@postgresql.org 345 [ # # ]:UBC 0 : ereport(ERROR,
346 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
347 : : errmsg("timeline %u found in manifest, but not in this server's history",
348 : : range->tli)));
349 : :
350 : : /*
351 : : * If we found this TLI in the server's history before encountering
352 : : * the latest TLI seen so far in the server's history, then this TLI
353 : : * is the latest one seen so far.
354 : : *
355 : : * If on the other hand we saw the earliest TLI seen so far before
356 : : * finding this TLI, this TLI is earlier than the earliest one seen so
357 : : * far. And if this is the first TLI for which we've searched, it's
358 : : * also the earliest one seen so far.
359 : : *
360 : : * On the first loop iteration, both things should necessarily be
361 : : * true.
362 : : */
626 rhaas@postgresql.org 363 [ + - ]:CBC 10 : if (!saw_latest_wal_range_tli)
364 : 10 : latest_wal_range_tli = range->tli;
365 [ - + - - ]: 10 : if (earliest_wal_range_tli == 0 || saw_earliest_wal_range_tli)
366 : : {
367 : 10 : earliest_wal_range_tli = range->tli;
368 : 10 : earliest_wal_range_start_lsn = range->start_lsn;
369 : : }
370 : : }
371 : :
372 : : /*
373 : : * Propagate information about the prior backup into the backup_label that
374 : : * will be generated for this backup.
375 : : */
376 : 10 : backup_state->istartpoint = earliest_wal_range_start_lsn;
377 : 10 : backup_state->istarttli = earliest_wal_range_tli;
378 : :
379 : : /*
380 : : * Sanity check start and end LSNs for the WAL ranges in the manifest.
381 : : *
382 : : * Commonly, there won't be any timeline switches during the prior backup
383 : : * at all, but if there are, they should happen at the same LSNs that this
384 : : * server switched timelines.
385 : : *
386 : : * Whether there are any timeline switches during the prior backup or not,
387 : : * the prior backup shouldn't require any WAL from a timeline prior to the
388 : : * start of that timeline. It also shouldn't require any WAL from later
389 : : * than the start of this backup.
390 : : *
391 : : * If any of these sanity checks fail, one possible explanation is that
392 : : * the user has generated WAL on the same timeline with the same LSNs more
393 : : * than once. For instance, if two standbys running on timeline 1 were
394 : : * both promoted and (due to a broken archiving setup) both selected new
395 : : * timeline ID 2, then it's possible that one of these checks might trip.
396 : : *
397 : : * Note that there are lots of ways for the user to do something very bad
398 : : * without tripping any of these checks, and they are not intended to be
399 : : * comprehensive. It's pretty hard to see how we could be certain of
400 : : * anything here. However, if there's a problem staring us right in the
401 : : * face, it's best to report it, so we do.
402 : : */
403 [ + + ]: 20 : for (i = 0; i < num_wal_ranges; ++i)
404 : : {
405 : 10 : backup_wal_range *range = list_nth(ib->manifest_wal_ranges, i);
406 : :
407 [ + - ]: 10 : if (range->tli == earliest_wal_range_tli)
408 : : {
409 [ - + ]: 10 : if (range->start_lsn < tlep[i]->begin)
626 rhaas@postgresql.org 410 [ # # ]:UBC 0 : ereport(ERROR,
411 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
412 : : errmsg("manifest requires WAL from initial timeline %u starting at %X/%08X, but that timeline begins at %X/%08X",
413 : : range->tli,
414 : : LSN_FORMAT_ARGS(range->start_lsn),
415 : : LSN_FORMAT_ARGS(tlep[i]->begin))));
416 : : }
417 : : else
418 : : {
419 [ # # ]: 0 : if (range->start_lsn != tlep[i]->begin)
420 [ # # ]: 0 : ereport(ERROR,
421 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
422 : : errmsg("manifest requires WAL from continuation timeline %u starting at %X/%08X, but that timeline begins at %X/%08X",
423 : : range->tli,
424 : : LSN_FORMAT_ARGS(range->start_lsn),
425 : : LSN_FORMAT_ARGS(tlep[i]->begin))));
426 : : }
427 : :
626 rhaas@postgresql.org 428 [ + - ]:CBC 10 : if (range->tli == latest_wal_range_tli)
429 : : {
430 [ - + ]: 10 : if (range->end_lsn > backup_state->startpoint)
626 rhaas@postgresql.org 431 [ # # ]:UBC 0 : ereport(ERROR,
432 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
433 : : errmsg("manifest requires WAL from final timeline %u ending at %X/%08X, but this backup starts at %X/%08X",
434 : : range->tli,
435 : : LSN_FORMAT_ARGS(range->end_lsn),
436 : : LSN_FORMAT_ARGS(backup_state->startpoint)),
437 : : errhint("This can happen for incremental backups on a standby if there was little activity since the previous backup.")));
438 : : }
439 : : else
440 : : {
441 [ # # ]: 0 : if (range->end_lsn != tlep[i]->end)
442 [ # # ]: 0 : ereport(ERROR,
443 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
444 : : errmsg("manifest requires WAL from non-final timeline %u ending at %X/%08X, but this server switched timelines at %X/%08X",
445 : : range->tli,
446 : : LSN_FORMAT_ARGS(range->end_lsn),
447 : : LSN_FORMAT_ARGS(tlep[i]->end))));
448 : : }
449 : :
450 : : }
451 : :
452 : : /*
453 : : * Wait for WAL summarization to catch up to the backup start LSN. This
454 : : * will throw an error if the WAL summarizer appears to be stuck. If WAL
455 : : * summarization gets disabled while we're waiting, this will return
456 : : * immediately, and we'll error out further down if the WAL summaries are
457 : : * incomplete.
458 : : */
407 rhaas@postgresql.org 459 :CBC 10 : WaitForWalSummarization(backup_state->startpoint);
460 : :
461 : : /*
462 : : * Retrieve a list of all WAL summaries on any timeline that overlap with
463 : : * the LSN range of interest. We could instead call GetWalSummaries() once
464 : : * per timeline in the loop that follows, but that would involve reading
465 : : * the directory multiple times. It should be mildly faster - and perhaps
466 : : * a bit safer - to do it just once.
467 : : */
626 468 : 10 : all_wslist = GetWalSummaries(0, earliest_wal_range_start_lsn,
469 : : backup_state->startpoint);
470 : :
471 : : /*
472 : : * We need WAL summaries for everything that happened during the prior
473 : : * backup and everything that happened afterward up until the point where
474 : : * the current backup started.
475 : : */
476 [ + - + - : 11 : foreach(lc, expectedTLEs)
+ - ]
477 : : {
478 : 11 : TimeLineHistoryEntry *tle = lfirst(lc);
479 : 11 : XLogRecPtr tli_start_lsn = tle->begin;
480 : 11 : XLogRecPtr tli_end_lsn = tle->end;
481 : 11 : XLogRecPtr tli_missing_lsn = InvalidXLogRecPtr;
482 : : List *tli_wslist;
483 : :
484 : : /*
485 : : * Working through the history of this server from the current
486 : : * timeline backwards, we skip everything until we find the timeline
487 : : * where this backup started. Most of the time, this means we won't
488 : : * skip anything at all, as it's unlikely that the timeline has
489 : : * changed since the beginning of the backup moments ago.
490 : : */
491 [ + + ]: 11 : if (tle->tli == backup_state->starttli)
492 : : {
493 : 10 : found_backup_start_tli = true;
494 : 10 : tli_end_lsn = backup_state->startpoint;
495 : : }
496 [ - + ]: 1 : else if (!found_backup_start_tli)
626 rhaas@postgresql.org 497 :UBC 0 : continue;
498 : :
499 : : /*
500 : : * Find the summaries that overlap the LSN range of interest for this
501 : : * timeline. If this is the earliest timeline involved, the range of
502 : : * interest begins with the start LSN of the prior backup; otherwise,
503 : : * it begins at the LSN at which this timeline came into existence. If
504 : : * this is the latest TLI involved, the range of interest ends at the
505 : : * start LSN of the current backup; otherwise, it ends at the point
506 : : * where we switched from this timeline to the next one.
507 : : */
626 rhaas@postgresql.org 508 [ + + ]:CBC 11 : if (tle->tli == earliest_wal_range_tli)
509 : 10 : tli_start_lsn = earliest_wal_range_start_lsn;
510 : 11 : tli_wslist = FilterWalSummaries(all_wslist, tle->tli,
511 : : tli_start_lsn, tli_end_lsn);
512 : :
513 : : /*
514 : : * There is no guarantee that the WAL summaries we found cover the
515 : : * entire range of LSNs for which summaries are required, or indeed
516 : : * that we found any WAL summaries at all. Check whether we have a
517 : : * problem of that sort.
518 : : */
519 [ - + ]: 11 : if (!WalSummariesAreComplete(tli_wslist, tli_start_lsn, tli_end_lsn,
520 : : &tli_missing_lsn))
521 : : {
626 rhaas@postgresql.org 522 [ # # ]:UBC 0 : if (XLogRecPtrIsInvalid(tli_missing_lsn))
523 [ # # ]: 0 : ereport(ERROR,
524 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
525 : : errmsg("WAL summaries are required on timeline %u from %X/%08X to %X/%08X, but no summaries for that timeline and LSN range exist",
526 : : tle->tli,
527 : : LSN_FORMAT_ARGS(tli_start_lsn),
528 : : LSN_FORMAT_ARGS(tli_end_lsn))));
529 : : else
530 [ # # ]: 0 : ereport(ERROR,
531 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
532 : : errmsg("WAL summaries are required on timeline %u from %X/%08X to %X/%08X, but the summaries for that timeline and LSN range are incomplete",
533 : : tle->tli,
534 : : LSN_FORMAT_ARGS(tli_start_lsn),
535 : : LSN_FORMAT_ARGS(tli_end_lsn)),
536 : : errdetail("The first unsummarized LSN in this range is %X/%08X.",
537 : : LSN_FORMAT_ARGS(tli_missing_lsn))));
538 : : }
539 : :
540 : : /*
541 : : * Remember that we need to read these summaries.
542 : : *
543 : : * Technically, it's possible that this could read more files than
544 : : * required, since tli_wslist in theory could contain redundant
545 : : * summaries. For instance, if we have a summary from 0/10000000 to
546 : : * 0/20000000 and also one from 0/00000000 to 0/30000000, then the
547 : : * latter subsumes the former and the former could be ignored.
548 : : *
549 : : * We ignore this possibility because the WAL summarizer only tries to
550 : : * generate summaries that do not overlap. If somehow they exist,
551 : : * we'll do a bit of extra work but the results should still be
552 : : * correct.
553 : : */
626 rhaas@postgresql.org 554 :CBC 11 : required_wslist = list_concat(required_wslist, tli_wslist);
555 : :
556 : : /*
557 : : * Timelines earlier than the one in which the prior backup began are
558 : : * not relevant.
559 : : */
560 [ + + ]: 11 : if (tle->tli == earliest_wal_range_tli)
561 : 10 : break;
562 : : }
563 : :
564 : : /*
565 : : * Read all of the required block reference table files and merge all of
566 : : * the data into a single in-memory block reference table.
567 : : *
568 : : * See the comments for struct IncrementalBackupInfo for some thoughts on
569 : : * memory usage.
570 : : */
571 : 10 : ib->brtab = CreateEmptyBlockRefTable();
572 [ + - + + : 28 : foreach(lc, required_wslist)
+ + ]
573 : : {
574 : 18 : WalSummaryFile *ws = lfirst(lc);
575 : : WalSummaryIO wsio;
576 : : BlockRefTableReader *reader;
577 : : RelFileLocator rlocator;
578 : : ForkNumber forknum;
579 : : BlockNumber limit_block;
580 : : BlockNumber blocks[BLOCKS_PER_READ];
581 : :
582 : 18 : wsio.file = OpenWalSummaryFile(ws, false);
583 : 18 : wsio.filepos = 0;
584 [ + + ]: 18 : ereport(DEBUG1,
585 : : (errmsg_internal("reading WAL summary file \"%s\"",
586 : : FilePathName(wsio.file))));
587 : 18 : reader = CreateBlockRefTableReader(ReadWalSummary, &wsio,
588 : : FilePathName(wsio.file),
589 : : ReportWalSummaryError, NULL);
590 [ + + ]: 369 : while (BlockRefTableReaderNextRelation(reader, &rlocator, &forknum,
591 : : &limit_block))
592 : : {
593 : 351 : BlockRefTableSetLimitBlock(ib->brtab, &rlocator,
594 : : forknum, limit_block);
595 : :
596 : : while (1)
597 : 254 : {
598 : : unsigned nblocks;
599 : : unsigned i;
600 : :
601 : 605 : nblocks = BlockRefTableReaderGetBlocks(reader, blocks,
602 : : BLOCKS_PER_READ);
603 [ + + ]: 605 : if (nblocks == 0)
604 : 351 : break;
605 : :
606 [ + + ]: 1198 : for (i = 0; i < nblocks; ++i)
607 : 944 : BlockRefTableMarkBlockModified(ib->brtab, &rlocator,
608 : : forknum, blocks[i]);
609 : : }
610 : : }
611 : 18 : DestroyBlockRefTableReader(reader);
612 : 18 : FileClose(wsio.file);
613 : : }
614 : :
615 : : /* Switch back to previous memory context. */
616 : 10 : MemoryContextSwitchTo(oldcontext);
617 : 10 : }
618 : :
619 : : /*
620 : : * Get the pathname that should be used when a file is sent incrementally.
621 : : *
622 : : * The result is a palloc'd string.
623 : : */
624 : : char *
625 : 1605 : GetIncrementalFilePath(Oid dboid, Oid spcoid, RelFileNumber relfilenumber,
626 : : ForkNumber forknum, unsigned segno)
627 : : {
628 : : RelPathStr path;
629 : : char *lastslash;
630 : : char *ipath;
631 : :
552 heikki.linnakangas@i 632 : 1605 : path = GetRelationPath(dboid, spcoid, relfilenumber, INVALID_PROC_NUMBER,
633 : : forknum);
634 : :
193 andres@anarazel.de 635 : 1605 : lastslash = strrchr(path.str, '/');
626 rhaas@postgresql.org 636 [ - + ]: 1605 : Assert(lastslash != NULL);
637 : 1605 : *lastslash = '\0';
638 : :
639 [ - + ]: 1605 : if (segno > 0)
193 andres@anarazel.de 640 :UBC 0 : ipath = psprintf("%s/INCREMENTAL.%s.%u", path.str, lastslash + 1, segno);
641 : : else
193 andres@anarazel.de 642 :CBC 1605 : ipath = psprintf("%s/INCREMENTAL.%s", path.str, lastslash + 1);
643 : :
626 rhaas@postgresql.org 644 : 1605 : return ipath;
645 : : }
646 : :
647 : : /*
648 : : * How should we back up a particular file as part of an incremental backup?
649 : : *
650 : : * If the return value is BACK_UP_FILE_FULLY, caller should back up the whole
651 : : * file just as if this were not an incremental backup. The contents of the
652 : : * relative_block_numbers array are unspecified in this case.
653 : : *
654 : : * If the return value is BACK_UP_FILE_INCREMENTALLY, caller should include
655 : : * an incremental file in the backup instead of the entire file. On return,
656 : : * *num_blocks_required will be set to the number of blocks that need to be
657 : : * sent, and the actual block numbers will have been stored in
658 : : * relative_block_numbers, which should be an array of at least RELSEG_SIZE.
659 : : * In addition, *truncation_block_length will be set to the value that should
660 : : * be included in the incremental file.
661 : : */
662 : : FileBackupMethod
663 : 10142 : GetFileBackupMethod(IncrementalBackupInfo *ib, const char *path,
664 : : Oid dboid, Oid spcoid,
665 : : RelFileNumber relfilenumber, ForkNumber forknum,
666 : : unsigned segno, size_t size,
667 : : unsigned *num_blocks_required,
668 : : BlockNumber *relative_block_numbers,
669 : : unsigned *truncation_block_length)
670 : : {
671 : : BlockNumber limit_block;
672 : : BlockNumber start_blkno;
673 : : BlockNumber stop_blkno;
674 : : RelFileLocator rlocator;
675 : : BlockRefTableEntry *brtentry;
676 : : unsigned i;
677 : : unsigned nblocks;
678 : :
679 : : /* Should only be called after PrepareForIncrementalBackup. */
680 [ - + ]: 10142 : Assert(ib->buf.data == NULL);
681 : :
682 : : /*
683 : : * dboid could be InvalidOid if shared rel, but spcoid and relfilenumber
684 : : * should have legal values.
685 : : */
686 [ - + ]: 10142 : Assert(OidIsValid(spcoid));
687 [ - + ]: 10142 : Assert(RelFileNumberIsValid(relfilenumber));
688 : :
689 : : /*
690 : : * If the file size is too large or not a multiple of BLCKSZ, then
691 : : * something weird is happening, so give up and send the whole file.
692 : : */
693 [ + - - + ]: 10142 : if ((size % BLCKSZ) != 0 || size / BLCKSZ > RELSEG_SIZE)
626 rhaas@postgresql.org 694 :UBC 0 : return BACK_UP_FILE_FULLY;
695 : :
696 : : /*
697 : : * The free-space map fork is not properly WAL-logged, so we need to
698 : : * backup the entire file every time.
699 : : */
626 rhaas@postgresql.org 700 [ + + ]:CBC 10142 : if (forknum == FSM_FORKNUM)
701 : 1237 : return BACK_UP_FILE_FULLY;
702 : :
703 : : /*
704 : : * If this file was not part of the prior backup, back it up fully.
705 : : *
706 : : * If this file was created after the prior backup and before the start of
707 : : * the current backup, then the WAL summary information will tell us to
708 : : * back up the whole file. However, if this file was created after the
709 : : * start of the current backup, then the WAL summary won't know anything
710 : : * about it. Without this logic, we would erroneously conclude that it was
711 : : * OK to send it incrementally.
712 : : *
713 : : * Note that the file could have existed at the time of the prior backup,
714 : : * gotten deleted, and then a new file with the same name could have been
715 : : * created. In that case, this logic won't prevent the file from being
716 : : * backed up incrementally. But, if the deletion happened before the start
717 : : * of the current backup, the limit block will be 0, inducing a full
718 : : * backup. If the deletion happened after the start of the current backup,
719 : : * reconstruction will erroneously combine blocks from the current
720 : : * lifespan of the file with blocks from the previous lifespan -- but in
721 : : * this type of case, WAL replay to reach backup consistency should remove
722 : : * and recreate the file anyway, so the initial bogus contents should not
723 : : * matter.
724 : : */
725 [ + + ]: 8905 : if (backup_file_lookup(ib->manifest_files, path) == NULL)
726 : : {
727 : : char *ipath;
728 : :
729 : 1605 : ipath = GetIncrementalFilePath(dboid, spcoid, relfilenumber,
730 : : forknum, segno);
731 [ + + ]: 1605 : if (backup_file_lookup(ib->manifest_files, ipath) == NULL)
732 : 274 : return BACK_UP_FILE_FULLY;
733 : : }
734 : :
735 : : /*
736 : : * Look up the special block reference table entry for the database as a
737 : : * whole.
738 : : */
739 : 8631 : rlocator.spcOid = spcoid;
740 : 8631 : rlocator.dbOid = dboid;
551 741 : 8631 : rlocator.relNumber = 0;
742 [ + + ]: 8631 : if (BlockRefTableGetEntry(ib->brtab, &rlocator, MAIN_FORKNUM,
743 : : &limit_block) != NULL)
744 : : {
745 : : /*
746 : : * According to the WAL summary, this database OID/tablespace OID
747 : : * pairing has been created since the previous backup. So, everything
748 : : * in it must be backed up fully.
749 : : */
750 : 261 : return BACK_UP_FILE_FULLY;
751 : : }
752 : :
753 : : /* Look up the block reference table entry for this relfilenode. */
626 754 : 8370 : rlocator.relNumber = relfilenumber;
755 : 8370 : brtentry = BlockRefTableGetEntry(ib->brtab, &rlocator, forknum,
756 : : &limit_block);
757 : :
758 : : /*
759 : : * If there is no entry, then there have been no WAL-logged changes to the
760 : : * relation since the predecessor backup was taken, so we can back it up
761 : : * incrementally and need not include any modified blocks.
762 : : *
763 : : * However, if the file is zero-length, we should do a full backup,
764 : : * because an incremental file is always more than zero length, and it's
765 : : * silly to take an incremental backup when a full backup would be
766 : : * smaller.
767 : : */
768 [ + + ]: 8370 : if (brtentry == NULL)
769 : : {
770 [ + + ]: 8336 : if (size == 0)
771 : 1700 : return BACK_UP_FILE_FULLY;
772 : 6636 : *num_blocks_required = 0;
773 : 6636 : *truncation_block_length = size / BLCKSZ;
774 : 6636 : return BACK_UP_FILE_INCREMENTALLY;
775 : : }
776 : :
777 : : /*
778 : : * If the limit_block is less than or equal to the point where this
779 : : * segment starts, send the whole file.
780 : : */
781 [ - + ]: 34 : if (limit_block <= segno * RELSEG_SIZE)
626 rhaas@postgresql.org 782 :UBC 0 : return BACK_UP_FILE_FULLY;
783 : :
784 : : /*
785 : : * Get relevant entries from the block reference table entry.
786 : : *
787 : : * We shouldn't overflow computing the start or stop block numbers, but if
788 : : * it manages to happen somehow, detect it and throw an error.
789 : : */
626 rhaas@postgresql.org 790 :CBC 34 : start_blkno = segno * RELSEG_SIZE;
791 : 34 : stop_blkno = start_blkno + (size / BLCKSZ);
792 [ + - - + ]: 34 : if (start_blkno / RELSEG_SIZE != segno || stop_blkno < start_blkno)
626 rhaas@postgresql.org 793 [ # # ]:UBC 0 : ereport(ERROR,
794 : : errcode(ERRCODE_INTERNAL_ERROR),
795 : : errmsg_internal("overflow computing block number bounds for segment %u with size %zu",
796 : : segno, size));
797 : :
798 : : /*
799 : : * This will write *absolute* block numbers into the output array, but
800 : : * we'll transpose them below.
801 : : */
626 rhaas@postgresql.org 802 :CBC 34 : nblocks = BlockRefTableEntryGetBlocks(brtentry, start_blkno, stop_blkno,
803 : : relative_block_numbers, RELSEG_SIZE);
804 [ - + ]: 34 : Assert(nblocks <= RELSEG_SIZE);
805 : :
806 : : /*
807 : : * If we're going to have to send nearly all of the blocks, then just send
808 : : * the whole file, because that won't require much extra storage or
809 : : * transfer and will speed up and simplify backup restoration. It's not
810 : : * clear what threshold is most appropriate here and perhaps it ought to
811 : : * be configurable, but for now we're just going to say that if we'd need
812 : : * to send 90% of the blocks anyway, give up and send the whole file.
813 : : *
814 : : * NB: If you change the threshold here, at least make sure to back up the
815 : : * file fully when every single block must be sent, because there's
816 : : * nothing good about sending an incremental file in that case.
817 : : */
818 [ + + ]: 34 : if (nblocks * BLCKSZ > size * 0.9)
819 : 9 : return BACK_UP_FILE_FULLY;
820 : :
821 : : /*
822 : : * Looks like we can send an incremental file, so sort the block numbers
823 : : * and then transpose them from absolute block numbers to relative block
824 : : * numbers if necessary.
825 : : *
826 : : * NB: If the block reference table was using the bitmap representation
827 : : * for a given chunk, the block numbers in that chunk will already be
828 : : * sorted, but when the array-of-offsets representation is used, we can
829 : : * receive block numbers here out of order.
830 : : */
513 tmunro@postgresql.or 831 : 25 : qsort(relative_block_numbers, nblocks, sizeof(BlockNumber),
832 : : compare_block_numbers);
833 [ - + ]: 25 : if (start_blkno != 0)
834 : : {
513 tmunro@postgresql.or 835 [ # # ]:UBC 0 : for (i = 0; i < nblocks; ++i)
836 : 0 : relative_block_numbers[i] -= start_blkno;
837 : : }
626 rhaas@postgresql.org 838 :CBC 25 : *num_blocks_required = nblocks;
839 : :
840 : : /*
841 : : * The truncation block length is the minimum length of the reconstructed
842 : : * file. Any block numbers below this threshold that are not present in
843 : : * the backup need to be fetched from the prior backup. At or above this
844 : : * threshold, blocks should only be included in the result if they are
845 : : * present in the backup. (This may require inserting zero blocks if the
846 : : * blocks included in the backup are non-consecutive.)
847 : : */
848 : 25 : *truncation_block_length = size / BLCKSZ;
849 [ - + ]: 25 : if (BlockNumberIsValid(limit_block))
850 : : {
626 rhaas@postgresql.org 851 :UBC 0 : unsigned relative_limit = limit_block - segno * RELSEG_SIZE;
852 : :
853 [ # # ]: 0 : if (*truncation_block_length < relative_limit)
854 : 0 : *truncation_block_length = relative_limit;
855 : : }
856 : :
857 : : /* Send it incrementally. */
626 rhaas@postgresql.org 858 :CBC 25 : return BACK_UP_FILE_INCREMENTALLY;
859 : : }
860 : :
861 : : /*
862 : : * Compute the size for a header of an incremental file containing a given
863 : : * number of blocks. The header is rounded to a multiple of BLCKSZ, but
864 : : * only if the file will store some block data.
865 : : */
866 : : size_t
519 tomas.vondra@postgre 867 : 6661 : GetIncrementalHeaderSize(unsigned num_blocks_required)
868 : : {
869 : : size_t result;
870 : :
871 : : /* Make sure we're not going to overflow. */
872 [ - + ]: 6661 : Assert(num_blocks_required <= RELSEG_SIZE);
873 : :
874 : : /*
875 : : * Three four byte quantities (magic number, truncation block length,
876 : : * block count) followed by block numbers.
877 : : */
878 : 6661 : result = 3 * sizeof(uint32) + (sizeof(BlockNumber) * num_blocks_required);
879 : :
880 : : /*
881 : : * Round the header size to a multiple of BLCKSZ - when not a multiple of
882 : : * BLCKSZ, add the missing fraction of a block. But do this only if the
883 : : * file will store data for some blocks, otherwise keep it small.
884 : : */
885 [ + + + - ]: 6661 : if ((num_blocks_required > 0) && (result % BLCKSZ != 0))
886 : 25 : result += BLCKSZ - (result % BLCKSZ);
887 : :
888 : 6661 : return result;
889 : : }
890 : :
891 : : /*
892 : : * Compute the size for an incremental file containing a given number of blocks.
893 : : */
894 : : size_t
626 rhaas@postgresql.org 895 : 6661 : GetIncrementalFileSize(unsigned num_blocks_required)
896 : : {
897 : : size_t result;
898 : :
899 : : /* Make sure we're not going to overflow. */
900 [ - + ]: 6661 : Assert(num_blocks_required <= RELSEG_SIZE);
901 : :
902 : : /*
903 : : * Header with three four byte quantities (magic number, truncation block
904 : : * length, block count) followed by block numbers, rounded to a multiple
905 : : * of BLCKSZ (for files with block data), followed by block contents.
906 : : */
519 tomas.vondra@postgre 907 : 6661 : result = GetIncrementalHeaderSize(num_blocks_required);
908 : 6661 : result += BLCKSZ * num_blocks_required;
909 : :
626 rhaas@postgresql.org 910 : 6661 : return result;
911 : : }
912 : :
913 : : /*
914 : : * Helper function for filemap hash table.
915 : : */
916 : : static uint32
917 : 21168 : hash_string_pointer(const char *s)
918 : : {
919 : 21168 : unsigned char *ss = (unsigned char *) s;
920 : :
921 : 21168 : return hash_bytes(ss, strlen(s));
922 : : }
923 : :
924 : : /*
925 : : * This callback to validate the manifest version for incremental backup.
926 : : */
927 : : static void
542 928 : 11 : manifest_process_version(JsonManifestParseContext *context,
929 : : int manifest_version)
930 : : {
931 : : /* Incremental backups don't work with manifest version 1 */
932 [ - + ]: 11 : if (manifest_version == 1)
542 rhaas@postgresql.org 933 :UBC 0 : context->error_cb(context,
934 : : "backup manifest version 1 does not support incremental backup");
542 rhaas@postgresql.org 935 :CBC 11 : }
936 : :
937 : : /*
938 : : * This callback to validate the manifest system identifier against the current
939 : : * database server.
940 : : */
941 : : static void
942 : 11 : manifest_process_system_identifier(JsonManifestParseContext *context,
943 : : uint64 manifest_system_identifier)
944 : : {
945 : : uint64 system_identifier;
946 : :
947 : : /* Get system identifier of current system */
948 : 11 : system_identifier = GetSystemIdentifier();
949 : :
950 [ + + ]: 11 : if (manifest_system_identifier != system_identifier)
951 : 1 : context->error_cb(context,
952 : : "system identifier in backup manifest is %" PRIu64 ", but database system identifier is %" PRIu64,
953 : : manifest_system_identifier,
954 : : system_identifier);
955 : 10 : }
956 : :
957 : : /*
958 : : * This callback is invoked for each file mentioned in the backup manifest.
959 : : *
960 : : * We store the path to each file and the size of each file for sanity-checking
961 : : * purposes. For further details, see comments for IncrementalBackupInfo.
962 : : */
963 : : static void
626 964 : 10325 : manifest_process_file(JsonManifestParseContext *context,
965 : : const char *pathname, uint64 size,
966 : : pg_checksum_type checksum_type,
967 : : int checksum_length,
968 : : uint8 *checksum_payload)
969 : : {
970 : 10325 : IncrementalBackupInfo *ib = context->private_data;
971 : : backup_file_entry *entry;
972 : : bool found;
973 : :
974 : 10325 : entry = backup_file_insert(ib->manifest_files, pathname, &found);
975 [ + - ]: 10325 : if (!found)
976 : : {
977 : 10325 : entry->path = MemoryContextStrdup(ib->manifest_files->ctx,
978 : : pathname);
979 : 10325 : entry->size = size;
980 : : }
981 : 10325 : }
982 : :
983 : : /*
984 : : * This callback is invoked for each WAL range mentioned in the backup
985 : : * manifest.
986 : : *
987 : : * We're just interested in learning the oldest LSN and the corresponding TLI
988 : : * that appear in any WAL range.
989 : : */
990 : : static void
991 : 10 : manifest_process_wal_range(JsonManifestParseContext *context,
992 : : TimeLineID tli, XLogRecPtr start_lsn,
993 : : XLogRecPtr end_lsn)
994 : : {
995 : 10 : IncrementalBackupInfo *ib = context->private_data;
996 : 10 : backup_wal_range *range = palloc(sizeof(backup_wal_range));
997 : :
998 : 10 : range->tli = tli;
999 : 10 : range->start_lsn = start_lsn;
1000 : 10 : range->end_lsn = end_lsn;
1001 : 10 : ib->manifest_wal_ranges = lappend(ib->manifest_wal_ranges, range);
1002 : 10 : }
1003 : :
1004 : : /*
1005 : : * This callback is invoked if an error occurs while parsing the backup
1006 : : * manifest.
1007 : : */
1008 : : static void
1009 : 1 : manifest_report_error(JsonManifestParseContext *context, const char *fmt,...)
1010 : : {
1011 : : StringInfoData errbuf;
1012 : :
1013 : 1 : initStringInfo(&errbuf);
1014 : :
1015 : : for (;;)
626 rhaas@postgresql.org 1016 :UBC 0 : {
1017 : : va_list ap;
1018 : : int needed;
1019 : :
626 rhaas@postgresql.org 1020 :CBC 1 : va_start(ap, fmt);
1021 : 1 : needed = appendStringInfoVA(&errbuf, fmt, ap);
1022 : 1 : va_end(ap);
1023 [ + - ]: 1 : if (needed == 0)
1024 : 1 : break;
626 rhaas@postgresql.org 1025 :UBC 0 : enlargeStringInfo(&errbuf, needed);
1026 : : }
1027 : :
626 rhaas@postgresql.org 1028 [ + - ]:CBC 1 : ereport(ERROR,
1029 : : errmsg_internal("%s", errbuf.data));
1030 : : }
1031 : :
1032 : : /*
1033 : : * Quicksort comparator for block numbers.
1034 : : */
1035 : : static int
1036 : 21 : compare_block_numbers(const void *a, const void *b)
1037 : : {
1038 : 21 : BlockNumber aa = *(BlockNumber *) a;
1039 : 21 : BlockNumber bb = *(BlockNumber *) b;
1040 : :
568 nathan@postgresql.or 1041 : 21 : return pg_cmp_u32(aa, bb);
1042 : : }
|