Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * walsummarizer.c
4 : : *
5 : : * Background process to perform WAL summarization, if it is enabled.
6 : : * It continuously scans the write-ahead log and periodically emits a
7 : : * summary file which indicates which blocks in which relation forks
8 : : * were modified by WAL records in the LSN range covered by the summary
9 : : * file. See walsummary.c and blkreftable.c for more details on the
10 : : * naming and contents of WAL summary files.
11 : : *
12 : : * If configured to do, this background process will also remove WAL
13 : : * summary files when the file timestamp is older than a configurable
14 : : * threshold (but only if the WAL has been removed first).
15 : : *
16 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
17 : : *
18 : : * IDENTIFICATION
19 : : * src/backend/postmaster/walsummarizer.c
20 : : *
21 : : *-------------------------------------------------------------------------
22 : : */
23 : : #include "postgres.h"
24 : :
25 : : #include "access/timeline.h"
26 : : #include "access/visibilitymap.h"
27 : : #include "access/xlog.h"
28 : : #include "access/xlog_internal.h"
29 : : #include "access/xlogrecovery.h"
30 : : #include "access/xlogutils.h"
31 : : #include "backup/walsummary.h"
32 : : #include "catalog/storage_xlog.h"
33 : : #include "commands/dbcommands_xlog.h"
34 : : #include "common/blkreftable.h"
35 : : #include "libpq/pqsignal.h"
36 : : #include "miscadmin.h"
37 : : #include "pgstat.h"
38 : : #include "postmaster/auxprocess.h"
39 : : #include "postmaster/interrupt.h"
40 : : #include "postmaster/walsummarizer.h"
41 : : #include "replication/walreceiver.h"
42 : : #include "storage/aio_subsys.h"
43 : : #include "storage/fd.h"
44 : : #include "storage/ipc.h"
45 : : #include "storage/latch.h"
46 : : #include "storage/lwlock.h"
47 : : #include "storage/proc.h"
48 : : #include "storage/procsignal.h"
49 : : #include "storage/shmem.h"
50 : : #include "storage/subsystems.h"
51 : : #include "utils/guc.h"
52 : : #include "utils/memutils.h"
53 : : #include "utils/wait_event.h"
54 : :
55 : : /*
56 : : * Data in shared memory related to WAL summarization.
57 : : */
58 : : typedef struct
59 : : {
60 : : /*
61 : : * These fields are protected by WALSummarizerLock.
62 : : *
63 : : * Until we've discovered what summary files already exist on disk and
64 : : * stored that information in shared memory, initialized is false and the
65 : : * other fields here contain no meaningful information. After that has
66 : : * been done, initialized is true.
67 : : *
68 : : * summarized_tli and summarized_lsn indicate the last LSN and TLI at
69 : : * which the next summary file will start. Normally, these are the LSN and
70 : : * TLI at which the last file ended; in such case, lsn_is_exact is true.
71 : : * If, however, the LSN is just an approximation, then lsn_is_exact is
72 : : * false. This can happen if, for example, there are no existing WAL
73 : : * summary files at startup. In that case, we have to derive the position
74 : : * at which to start summarizing from the WAL files that exist on disk,
75 : : * and so the LSN might point to the start of the next file even though
76 : : * that might happen to be in the middle of a WAL record.
77 : : *
78 : : * summarizer_pgprocno is the proc number of the summarizer process, if
79 : : * one is running, or else INVALID_PROC_NUMBER.
80 : : *
81 : : * pending_lsn is used by the summarizer to advertise the ending LSN of a
82 : : * record it has recently read. It shouldn't ever be less than
83 : : * summarized_lsn, but might be greater, because the summarizer buffers
84 : : * data for a range of LSNs in memory before writing out a new file.
85 : : */
86 : : bool initialized;
87 : : TimeLineID summarized_tli;
88 : : XLogRecPtr summarized_lsn;
89 : : bool lsn_is_exact;
90 : : ProcNumber summarizer_pgprocno;
91 : : XLogRecPtr pending_lsn;
92 : :
93 : : /*
94 : : * This field handles its own synchronization.
95 : : */
96 : : ConditionVariable summary_file_cv;
97 : : } WalSummarizerData;
98 : :
99 : : /*
100 : : * Private data for our xlogreader's page read callback.
101 : : */
102 : : typedef struct
103 : : {
104 : : TimeLineID tli;
105 : : bool historic;
106 : : XLogRecPtr read_upto;
107 : : bool end_of_wal;
108 : : } SummarizerReadLocalXLogPrivate;
109 : :
110 : : /* Pointer to shared memory state. */
111 : : static WalSummarizerData *WalSummarizerCtl;
112 : :
113 : : static void WalSummarizerShmemRequest(void *arg);
114 : : static void WalSummarizerShmemInit(void *arg);
115 : :
116 : : const ShmemCallbacks WalSummarizerShmemCallbacks = {
117 : : .request_fn = WalSummarizerShmemRequest,
118 : : .init_fn = WalSummarizerShmemInit,
119 : : };
120 : :
121 : : /*
122 : : * When we reach end of WAL and need to read more, we sleep for a number of
123 : : * milliseconds that is an integer multiple of MS_PER_SLEEP_QUANTUM. This is
124 : : * the multiplier. It should vary between 1 and MAX_SLEEP_QUANTA, depending
125 : : * on system activity. See summarizer_wait_for_wal() for how we adjust this.
126 : : */
127 : : static long sleep_quanta = 1;
128 : :
129 : : /*
130 : : * The sleep time will always be a multiple of 200ms and will not exceed
131 : : * thirty seconds (150 * 200 = 30 * 1000). Note that the timeout here needs
132 : : * to be substantially less than the maximum amount of time for which an
133 : : * incremental backup will wait for this process to catch up. Otherwise, an
134 : : * incremental backup might time out on an idle system just because we sleep
135 : : * for too long.
136 : : */
137 : : #define MAX_SLEEP_QUANTA 150
138 : : #define MS_PER_SLEEP_QUANTUM 200
139 : :
140 : : /*
141 : : * This is a count of the number of pages of WAL that we've read since the
142 : : * last time we waited for more WAL to appear.
143 : : */
144 : : static long pages_read_since_last_sleep = 0;
145 : :
146 : : /*
147 : : * Most recent RedoRecPtr value observed by MaybeRemoveOldWalSummaries.
148 : : */
149 : : static XLogRecPtr redo_pointer_at_last_summary_removal = InvalidXLogRecPtr;
150 : :
151 : : /*
152 : : * GUC parameters
153 : : */
154 : : bool summarize_wal = false;
155 : : int wal_summary_keep_time = 10 * HOURS_PER_DAY * MINS_PER_HOUR;
156 : :
157 : : static void WalSummarizerShutdown(int code, Datum arg);
158 : : static XLogRecPtr GetLatestLSN(TimeLineID *tli);
159 : : static void ProcessWalSummarizerInterrupts(void);
160 : : static XLogRecPtr SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn,
161 : : bool exact, XLogRecPtr switch_lsn,
162 : : XLogRecPtr maximum_lsn);
163 : : static void SummarizeDbaseRecord(XLogReaderState *xlogreader,
164 : : BlockRefTable *brtab);
165 : : static void SummarizeSmgrRecord(XLogReaderState *xlogreader,
166 : : BlockRefTable *brtab);
167 : : static void SummarizeXactRecord(XLogReaderState *xlogreader,
168 : : BlockRefTable *brtab);
169 : : static bool SummarizeXlogRecord(XLogReaderState *xlogreader,
170 : : bool *new_fast_forward);
171 : : static int summarizer_read_local_xlog_page(XLogReaderState *state,
172 : : XLogRecPtr targetPagePtr,
173 : : int reqLen,
174 : : XLogRecPtr targetRecPtr,
175 : : char *cur_page);
176 : : static void summarizer_wait_for_wal(void);
177 : : static void MaybeRemoveOldWalSummaries(void);
178 : :
179 : : /*
180 : : * Register shared memory space needed by this module.
181 : : */
182 : : static void
29 heikki.linnakangas@i 183 :GNC 1244 : WalSummarizerShmemRequest(void *arg)
184 : : {
185 : 1244 : ShmemRequestStruct(.name = "Wal Summarizer Ctl",
186 : : .size = sizeof(WalSummarizerData),
187 : : .ptr = (void **) &WalSummarizerCtl,
188 : : );
867 rhaas@postgresql.org 189 :GIC 1244 : }
190 : :
191 : : /*
192 : : * Initialize shared memory for this module.
193 : : */
194 : : static void
29 heikki.linnakangas@i 195 :GNC 1241 : WalSummarizerShmemInit(void *arg)
196 : : {
197 : : /*
198 : : * We're just filling in dummy values here -- the real initialization will
199 : : * happen when GetOldestUnsummarizedLSN() is called for the first time.
200 : : */
201 : 1241 : WalSummarizerCtl->initialized = false;
202 : 1241 : WalSummarizerCtl->summarized_tli = 0;
203 : 1241 : WalSummarizerCtl->summarized_lsn = InvalidXLogRecPtr;
204 : 1241 : WalSummarizerCtl->lsn_is_exact = false;
205 : 1241 : WalSummarizerCtl->summarizer_pgprocno = INVALID_PROC_NUMBER;
206 : 1241 : WalSummarizerCtl->pending_lsn = InvalidXLogRecPtr;
207 : 1241 : ConditionVariableInit(&WalSummarizerCtl->summary_file_cv);
867 rhaas@postgresql.org 208 :CBC 1241 : }
209 : :
210 : : /*
211 : : * Entry point for walsummarizer process.
212 : : */
213 : : void
438 peter@eisentraut.org 214 : 3 : WalSummarizerMain(const void *startup_data, size_t startup_data_len)
215 : : {
216 : : sigjmp_buf local_sigjmp_buf;
217 : : MemoryContext context;
218 : :
219 : : /*
220 : : * Within this function, 'current_lsn' and 'current_tli' refer to the
221 : : * point from which the next WAL summary file should start. 'exact' is
222 : : * true if 'current_lsn' is known to be the start of a WAL record or WAL
223 : : * segment, and false if it might be in the middle of a record someplace.
224 : : *
225 : : * 'switch_lsn' and 'switch_tli', if set, are the LSN at which we need to
226 : : * switch to a new timeline and the timeline to which we need to switch.
227 : : * If not set, we either haven't figured out the answers yet or we're
228 : : * already on the latest timeline.
229 : : */
230 : : XLogRecPtr current_lsn;
231 : : TimeLineID current_tli;
232 : : bool exact;
867 rhaas@postgresql.org 233 : 3 : XLogRecPtr switch_lsn = InvalidXLogRecPtr;
234 : 3 : TimeLineID switch_tli = 0;
235 : :
778 heikki.linnakangas@i 236 [ - + ]: 3 : Assert(startup_data_len == 0);
237 : :
238 : 3 : AuxiliaryProcessMainCommon();
239 : :
867 rhaas@postgresql.org 240 [ - + ]: 3 : ereport(DEBUG1,
241 : : (errmsg_internal("WAL summarizer started")));
242 : :
243 : : /*
244 : : * Properly accept or ignore signals the postmaster might send us
245 : : */
246 : 3 : pqsignal(SIGHUP, SignalHandlerForConfigReload);
21 andrew@dunslane.net 247 :GNC 3 : pqsignal(SIGINT, PG_SIG_IGN); /* no query to cancel */
867 rhaas@postgresql.org 248 :CBC 3 : pqsignal(SIGTERM, SignalHandlerForShutdownRequest);
249 : : /* SIGQUIT handler was already set up by InitPostmasterChild */
21 andrew@dunslane.net 250 :GNC 3 : pqsignal(SIGALRM, PG_SIG_IGN);
251 : 3 : pqsignal(SIGPIPE, PG_SIG_IGN);
867 rhaas@postgresql.org 252 :CBC 3 : pqsignal(SIGUSR1, procsignal_sigusr1_handler);
21 andrew@dunslane.net 253 :GNC 3 : pqsignal(SIGUSR2, PG_SIG_IGN); /* not used */
254 : :
255 : : /* Advertise ourselves. */
845 rhaas@postgresql.org 256 :CBC 3 : on_shmem_exit(WalSummarizerShutdown, (Datum) 0);
867 257 : 3 : LWLockAcquire(WALSummarizerLock, LW_EXCLUSIVE);
803 heikki.linnakangas@i 258 : 3 : WalSummarizerCtl->summarizer_pgprocno = MyProcNumber;
867 rhaas@postgresql.org 259 : 3 : LWLockRelease(WALSummarizerLock);
260 : :
261 : : /* Create and switch to a memory context that we can reset on error. */
262 : 3 : context = AllocSetContextCreate(TopMemoryContext,
263 : : "Wal Summarizer",
264 : : ALLOCSET_DEFAULT_SIZES);
265 : 3 : MemoryContextSwitchTo(context);
266 : :
267 : : /*
268 : : * Reset some signals that are accepted by postmaster but not here
269 : : */
21 andrew@dunslane.net 270 :GNC 3 : pqsignal(SIGCHLD, PG_SIG_DFL);
271 : :
272 : : /*
273 : : * If an exception is encountered, processing resumes here.
274 : : */
867 rhaas@postgresql.org 275 [ - + ]:CBC 3 : if (sigsetjmp(local_sigjmp_buf, 1) != 0)
276 : : {
277 : : /* Since not using PG_TRY, must reset error stack by hand */
867 rhaas@postgresql.org 278 :UBC 0 : error_context_stack = NULL;
279 : :
280 : : /* Prevent interrupts while cleaning up */
281 : 0 : HOLD_INTERRUPTS();
282 : :
283 : : /* Report the error to the server log */
284 : 0 : EmitErrorReport();
285 : :
286 : : /* Release resources we might have acquired. */
287 : 0 : LWLockReleaseAll();
288 : 0 : ConditionVariableCancelSleep();
289 : 0 : pgstat_report_wait_end();
414 andres@anarazel.de 290 : 0 : pgaio_error_cleanup();
867 rhaas@postgresql.org 291 : 0 : ReleaseAuxProcessResources(false);
292 : 0 : AtEOXact_Files(false);
293 : 0 : AtEOXact_HashTables(false);
294 : :
295 : : /*
296 : : * Now return to normal top-level context and clear ErrorContext for
297 : : * next time.
298 : : */
299 : 0 : MemoryContextSwitchTo(context);
300 : 0 : FlushErrorState();
301 : :
302 : : /* Flush any leaked data in the top-level context */
303 : 0 : MemoryContextReset(context);
304 : :
305 : : /* Now we can allow interrupts again */
306 [ # # ]: 0 : RESUME_INTERRUPTS();
307 : :
308 : : /*
309 : : * Sleep for 10 seconds before attempting to resume operations in
310 : : * order to avoid excessive logging.
311 : : *
312 : : * Many of the likely error conditions are things that will repeat
313 : : * every time. For example, if the WAL can't be read or the summary
314 : : * can't be written, only administrator action will cure the problem.
315 : : * So a really fast retry time doesn't seem to be especially
316 : : * beneficial, and it will clutter the logs.
317 : : */
577 heikki.linnakangas@i 318 : 0 : (void) WaitLatch(NULL,
319 : : WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
320 : : 10000,
321 : : WAIT_EVENT_WAL_SUMMARIZER_ERROR);
322 : : }
323 : :
324 : : /* We can now handle ereport(ERROR) */
867 rhaas@postgresql.org 325 :CBC 3 : PG_exception_stack = &local_sigjmp_buf;
326 : :
327 : : /*
328 : : * Unblock signals (they were blocked when the postmaster forked us)
329 : : */
330 : 3 : sigprocmask(SIG_SETMASK, &UnBlockSig, NULL);
331 : :
332 : : /*
333 : : * Fetch information about previous progress from shared memory, and ask
334 : : * GetOldestUnsummarizedLSN to reset pending_lsn to summarized_lsn. We
335 : : * might be recovering from an error, and if so, pending_lsn might have
336 : : * advanced past summarized_lsn, but any WAL we read previously has been
337 : : * lost and will need to be reread.
338 : : *
339 : : * If we discover that WAL summarization is not enabled, just exit.
340 : : */
679 341 : 3 : current_lsn = GetOldestUnsummarizedLSN(¤t_tli, &exact);
180 alvherre@kurilemu.de 342 [ + - ]:GNC 3 : if (!XLogRecPtrIsValid(current_lsn))
867 rhaas@postgresql.org 343 :UBC 0 : proc_exit(0);
344 : :
345 : : /*
346 : : * Loop forever
347 : : */
348 : : for (;;)
867 rhaas@postgresql.org 349 :CBC 18 : {
350 : : XLogRecPtr latest_lsn;
351 : : TimeLineID latest_tli;
352 : : XLogRecPtr end_of_summary_lsn;
353 : :
354 : : /* Flush any leaked data in the top-level context */
355 : 21 : MemoryContextReset(context);
356 : :
357 : : /* Process any signals received recently. */
426 heikki.linnakangas@i 358 : 21 : ProcessWalSummarizerInterrupts();
359 : :
360 : : /* If it's time to remove any old WAL summaries, do that now. */
867 rhaas@postgresql.org 361 : 21 : MaybeRemoveOldWalSummaries();
362 : :
363 : : /* Find the LSN and TLI up to which we can safely summarize. */
364 : 21 : latest_lsn = GetLatestLSN(&latest_tli);
365 : :
366 : : /*
367 : : * If we're summarizing a historic timeline and we haven't yet
368 : : * computed the point at which to switch to the next timeline, do that
369 : : * now.
370 : : *
371 : : * Note that if this is a standby, what was previously the current
372 : : * timeline could become historic at any time.
373 : : *
374 : : * We could try to make this more efficient by caching the results of
375 : : * readTimeLineHistory when latest_tli has not changed, but since we
376 : : * only have to do this once per timeline switch, we probably wouldn't
377 : : * save any significant amount of work in practice.
378 : : */
180 alvherre@kurilemu.de 379 [ - + - - ]:GNC 21 : if (current_tli != latest_tli && !XLogRecPtrIsValid(switch_lsn))
380 : : {
867 rhaas@postgresql.org 381 :UBC 0 : List *tles = readTimeLineHistory(latest_tli);
382 : :
383 : 0 : switch_lsn = tliSwitchPoint(current_tli, tles, &switch_tli);
384 [ # # ]: 0 : ereport(DEBUG1,
385 : : errmsg_internal("switch point from TLI %u to TLI %u is at %X/%08X",
386 : : current_tli, switch_tli, LSN_FORMAT_ARGS(switch_lsn)));
387 : : }
388 : :
389 : : /*
390 : : * If we've reached the switch LSN, we can't summarize anything else
391 : : * on this timeline. Switch to the next timeline and go around again,
392 : : * backing up to the exact switch point if we passed it.
393 : : */
180 alvherre@kurilemu.de 394 [ - + - - ]:GNC 21 : if (XLogRecPtrIsValid(switch_lsn) && current_lsn >= switch_lsn)
395 : : {
396 : : /* Restart summarization from switch point. */
867 rhaas@postgresql.org 397 :UBC 0 : current_tli = switch_tli;
648 398 : 0 : current_lsn = switch_lsn;
399 : :
400 : : /* Next timeline and switch point, if any, not yet known. */
867 401 : 0 : switch_lsn = InvalidXLogRecPtr;
402 : 0 : switch_tli = 0;
403 : :
404 : : /* Update (really, rewind, if needed) state in shared memory. */
648 405 : 0 : LWLockAcquire(WALSummarizerLock, LW_EXCLUSIVE);
406 : 0 : WalSummarizerCtl->summarized_lsn = current_lsn;
407 : 0 : WalSummarizerCtl->summarized_tli = current_tli;
408 : 0 : WalSummarizerCtl->lsn_is_exact = true;
409 : 0 : WalSummarizerCtl->pending_lsn = current_lsn;
410 : 0 : LWLockRelease(WALSummarizerLock);
411 : :
867 412 : 0 : continue;
413 : : }
414 : :
415 : : /* Summarize WAL. */
867 rhaas@postgresql.org 416 :CBC 21 : end_of_summary_lsn = SummarizeWAL(current_tli,
417 : : current_lsn, exact,
418 : : switch_lsn, latest_lsn);
180 alvherre@kurilemu.de 419 [ - + ]:GNC 18 : Assert(XLogRecPtrIsValid(end_of_summary_lsn));
867 rhaas@postgresql.org 420 [ - + ]:CBC 18 : Assert(end_of_summary_lsn >= current_lsn);
421 : :
422 : : /*
423 : : * Update state for next loop iteration.
424 : : *
425 : : * Next summary file should start from exactly where this one ended.
426 : : */
427 : 18 : current_lsn = end_of_summary_lsn;
428 : 18 : exact = true;
429 : :
430 : : /* Update state in shared memory. */
431 : 18 : LWLockAcquire(WALSummarizerLock, LW_EXCLUSIVE);
432 : 18 : WalSummarizerCtl->summarized_lsn = end_of_summary_lsn;
433 : 18 : WalSummarizerCtl->summarized_tli = current_tli;
434 : 18 : WalSummarizerCtl->lsn_is_exact = true;
435 : 18 : WalSummarizerCtl->pending_lsn = end_of_summary_lsn;
436 : 18 : LWLockRelease(WALSummarizerLock);
437 : :
438 : : /* Wake up anyone waiting for more summary files to be written. */
439 : 18 : ConditionVariableBroadcast(&WalSummarizerCtl->summary_file_cv);
440 : : }
441 : : }
442 : :
443 : : /*
444 : : * Get information about the state of the WAL summarizer.
445 : : */
446 : : void
845 rhaas@postgresql.org 447 :UBC 0 : GetWalSummarizerState(TimeLineID *summarized_tli, XLogRecPtr *summarized_lsn,
448 : : XLogRecPtr *pending_lsn, int *summarizer_pid)
449 : : {
450 : 0 : LWLockAcquire(WALSummarizerLock, LW_SHARED);
451 [ # # ]: 0 : if (!WalSummarizerCtl->initialized)
452 : : {
453 : : /*
454 : : * If initialized is false, the rest of the structure contents are
455 : : * undefined.
456 : : */
457 : 0 : *summarized_tli = 0;
458 : 0 : *summarized_lsn = InvalidXLogRecPtr;
459 : 0 : *pending_lsn = InvalidXLogRecPtr;
460 : 0 : *summarizer_pid = -1;
461 : : }
462 : : else
463 : : {
464 : 0 : int summarizer_pgprocno = WalSummarizerCtl->summarizer_pgprocno;
465 : :
466 : 0 : *summarized_tli = WalSummarizerCtl->summarized_tli;
467 : 0 : *summarized_lsn = WalSummarizerCtl->summarized_lsn;
793 heikki.linnakangas@i 468 [ # # ]: 0 : if (summarizer_pgprocno == INVALID_PROC_NUMBER)
469 : : {
470 : : /*
471 : : * If the summarizer has exited, the fact that it had processed
472 : : * beyond summarized_lsn is irrelevant now.
473 : : */
845 rhaas@postgresql.org 474 : 0 : *pending_lsn = WalSummarizerCtl->summarized_lsn;
475 : 0 : *summarizer_pid = -1;
476 : : }
477 : : else
478 : : {
479 : 0 : *pending_lsn = WalSummarizerCtl->pending_lsn;
480 : :
481 : : /*
482 : : * We're not fussed about inexact answers here, since they could
483 : : * become stale instantly, so we don't bother taking the lock, but
484 : : * make sure that invalid PID values are normalized to -1.
485 : : */
486 : 0 : *summarizer_pid = GetPGProcByNumber(summarizer_pgprocno)->pid;
487 [ # # ]: 0 : if (*summarizer_pid <= 0)
488 : 0 : *summarizer_pid = -1;
489 : : }
490 : : }
491 : 0 : LWLockRelease(WALSummarizerLock);
492 : 0 : }
493 : :
494 : : /*
495 : : * Get the oldest LSN in this server's timeline history that has not yet been
496 : : * summarized, and update shared memory state as appropriate.
497 : : *
498 : : * If *tli != NULL, it will be set to the TLI for the LSN that is returned.
499 : : *
500 : : * If *lsn_is_exact != NULL, it will be set to true if the returned LSN is
501 : : * necessarily the start of a WAL record and false if it's just the beginning
502 : : * of a WAL segment.
503 : : */
504 : : XLogRecPtr
679 rhaas@postgresql.org 505 :CBC 2559 : GetOldestUnsummarizedLSN(TimeLineID *tli, bool *lsn_is_exact)
506 : : {
507 : : TimeLineID latest_tli;
508 : : int n;
509 : : List *tles;
860 510 : 2559 : XLogRecPtr unsummarized_lsn = InvalidXLogRecPtr;
867 511 : 2559 : TimeLineID unsummarized_tli = 0;
512 : 2559 : bool should_make_exact = false;
513 : : List *existing_summaries;
514 : : ListCell *lc;
679 515 : 2559 : bool am_wal_summarizer = AmWalSummarizerProcess();
516 : :
517 : : /* If not summarizing WAL, do nothing. */
867 518 [ + + ]: 2559 : if (!summarize_wal)
519 : 2549 : return InvalidXLogRecPtr;
520 : :
521 : : /*
522 : : * If we are not the WAL summarizer process, then we normally just want to
523 : : * read the values from shared memory. However, as an exception, if shared
524 : : * memory hasn't been initialized yet, then we need to do that so that we
525 : : * can read legal values and not remove any WAL too early.
526 : : */
679 527 [ + + ]: 10 : if (!am_wal_summarizer)
528 : : {
529 : 7 : LWLockAcquire(WALSummarizerLock, LW_SHARED);
530 : :
867 531 [ + + ]: 7 : if (WalSummarizerCtl->initialized)
532 : : {
533 : 6 : unsummarized_lsn = WalSummarizerCtl->summarized_lsn;
534 [ - + ]: 6 : if (tli != NULL)
867 rhaas@postgresql.org 535 :UBC 0 : *tli = WalSummarizerCtl->summarized_tli;
867 rhaas@postgresql.org 536 [ - + ]:CBC 6 : if (lsn_is_exact != NULL)
867 rhaas@postgresql.org 537 :UBC 0 : *lsn_is_exact = WalSummarizerCtl->lsn_is_exact;
867 rhaas@postgresql.org 538 :CBC 6 : LWLockRelease(WALSummarizerLock);
539 : 6 : return unsummarized_lsn;
540 : : }
541 : :
542 : 1 : LWLockRelease(WALSummarizerLock);
543 : : }
544 : :
545 : : /*
546 : : * Find the oldest timeline on which WAL still exists, and the earliest
547 : : * segment for which it exists.
548 : : *
549 : : * Note that we do this every time the WAL summarizer process restarts or
550 : : * recovers from an error, in case the contents of pg_wal have changed
551 : : * under us e.g. if some files were removed, either manually - which
552 : : * shouldn't really happen, but might - or by postgres itself, if
553 : : * summarize_wal was turned off and then back on again.
554 : : */
555 : 4 : (void) GetLatestLSN(&latest_tli);
556 : 4 : tles = readTimeLineHistory(latest_tli);
557 [ + - ]: 4 : for (n = list_length(tles) - 1; n >= 0; --n)
558 : : {
559 : 4 : TimeLineHistoryEntry *tle = list_nth(tles, n);
560 : : XLogSegNo oldest_segno;
561 : :
562 : 4 : oldest_segno = XLogGetOldestSegno(tle->tli);
563 [ + - ]: 4 : if (oldest_segno != 0)
564 : : {
565 : : /* Compute oldest LSN that still exists on disk. */
566 : 4 : XLogSegNoOffsetToRecPtr(oldest_segno, 0, wal_segment_size,
567 : : unsummarized_lsn);
568 : :
569 : 4 : unsummarized_tli = tle->tli;
570 : 4 : break;
571 : : }
572 : : }
573 : :
574 : : /*
575 : : * Don't try to summarize anything older than the end LSN of the newest
576 : : * summary file that exists for this timeline.
577 : : */
578 : : existing_summaries =
579 : 4 : GetWalSummaries(unsummarized_tli,
580 : : InvalidXLogRecPtr, InvalidXLogRecPtr);
581 [ - + - - : 4 : foreach(lc, existing_summaries)
- + ]
582 : : {
867 rhaas@postgresql.org 583 :UBC 0 : WalSummaryFile *ws = lfirst(lc);
584 : :
585 [ # # ]: 0 : if (ws->end_lsn > unsummarized_lsn)
586 : : {
587 : 0 : unsummarized_lsn = ws->end_lsn;
588 : 0 : should_make_exact = true;
589 : : }
590 : : }
591 : :
592 : : /* It really should not be possible for us to find no WAL. */
679 rhaas@postgresql.org 593 [ - + ]:CBC 4 : if (unsummarized_tli == 0)
679 rhaas@postgresql.org 594 [ # # ]:UBC 0 : ereport(ERROR,
595 : : errcode(ERRCODE_INTERNAL_ERROR),
596 : : errmsg_internal("no WAL found on timeline %u", latest_tli));
597 : :
598 : : /*
599 : : * If we're the WAL summarizer, we always want to store the values we just
600 : : * computed into shared memory, because those are the values we're going
601 : : * to use to drive our operation, and so they are the authoritative
602 : : * values. Otherwise, we only store values into shared memory if shared
603 : : * memory is uninitialized. Our values are not canonical in such a case,
604 : : * but it's better to have something than nothing, to guide WAL retention.
605 : : */
679 rhaas@postgresql.org 606 :CBC 4 : LWLockAcquire(WALSummarizerLock, LW_EXCLUSIVE);
676 607 [ + + + - ]: 4 : if (am_wal_summarizer || !WalSummarizerCtl->initialized)
608 : : {
679 609 : 4 : WalSummarizerCtl->initialized = true;
610 : 4 : WalSummarizerCtl->summarized_lsn = unsummarized_lsn;
611 : 4 : WalSummarizerCtl->summarized_tli = unsummarized_tli;
612 : 4 : WalSummarizerCtl->lsn_is_exact = should_make_exact;
613 : 4 : WalSummarizerCtl->pending_lsn = unsummarized_lsn;
614 : : }
615 : : else
679 rhaas@postgresql.org 616 :UBC 0 : unsummarized_lsn = WalSummarizerCtl->summarized_lsn;
617 : :
618 : : /* Also return the to the caller as required. */
867 rhaas@postgresql.org 619 [ + + ]:CBC 4 : if (tli != NULL)
620 : 3 : *tli = WalSummarizerCtl->summarized_tli;
621 [ + + ]: 4 : if (lsn_is_exact != NULL)
622 : 3 : *lsn_is_exact = WalSummarizerCtl->lsn_is_exact;
623 : 4 : LWLockRelease(WALSummarizerLock);
624 : :
625 : 4 : return unsummarized_lsn;
626 : : }
627 : :
628 : : /*
629 : : * Wake up the WAL summarizer process.
630 : : *
631 : : * This might not work, because there's no guarantee that the WAL summarizer
632 : : * process was successfully started, and it also might have started but
633 : : * subsequently terminated. So, under normal circumstances, this will get the
634 : : * latch set, but there's no guarantee.
635 : : */
636 : : void
550 heikki.linnakangas@i 637 : 1731 : WakeupWalSummarizer(void)
638 : : {
639 : : ProcNumber pgprocno;
640 : :
867 rhaas@postgresql.org 641 [ - + ]: 1731 : if (WalSummarizerCtl == NULL)
867 rhaas@postgresql.org 642 :UBC 0 : return;
643 : :
250 msawada@postgresql.o 644 :CBC 1731 : LWLockAcquire(WALSummarizerLock, LW_SHARED);
867 rhaas@postgresql.org 645 : 1731 : pgprocno = WalSummarizerCtl->summarizer_pgprocno;
646 : 1731 : LWLockRelease(WALSummarizerLock);
647 : :
793 heikki.linnakangas@i 648 [ + + ]: 1731 : if (pgprocno != INVALID_PROC_NUMBER)
120 drowley@postgresql.o 649 :GNC 3 : SetLatch(&GetPGProcByNumber(pgprocno)->procLatch);
650 : : }
651 : :
652 : : /*
653 : : * Wait until WAL summarization reaches the given LSN, but time out with an
654 : : * error if the summarizer seems to be stick.
655 : : *
656 : : * Returns immediately if summarize_wal is turned off while we wait. Caller
657 : : * is expected to handle this case, if necessary.
658 : : */
659 : : void
648 rhaas@postgresql.org 660 :CBC 12 : WaitForWalSummarization(XLogRecPtr lsn)
661 : : {
662 : : TimestampTz initial_time,
663 : : cycle_time,
664 : : current_time;
665 : 12 : XLogRecPtr prior_pending_lsn = InvalidXLogRecPtr;
666 : 12 : int deadcycles = 0;
667 : :
668 : 12 : initial_time = cycle_time = GetCurrentTimestamp();
669 : :
670 : : while (1)
867 671 : 12 : {
648 672 : 24 : long timeout_in_ms = 10000;
673 : : XLogRecPtr summarized_lsn;
674 : : XLogRecPtr pending_lsn;
675 : :
676 [ - + ]: 24 : CHECK_FOR_INTERRUPTS();
677 : :
678 : : /* If WAL summarization is disabled while we're waiting, give up. */
679 [ - + ]: 24 : if (!summarize_wal)
648 rhaas@postgresql.org 680 :UBC 0 : return;
681 : :
682 : : /*
683 : : * If the LSN summarized on disk has reached the target value, stop.
684 : : */
250 msawada@postgresql.o 685 :CBC 24 : LWLockAcquire(WALSummarizerLock, LW_SHARED);
867 rhaas@postgresql.org 686 : 24 : summarized_lsn = WalSummarizerCtl->summarized_lsn;
648 687 : 24 : pending_lsn = WalSummarizerCtl->pending_lsn;
867 688 : 24 : LWLockRelease(WALSummarizerLock);
689 : :
690 : : /* If WAL summarization has progressed sufficiently, stop waiting. */
691 [ + + ]: 24 : if (summarized_lsn >= lsn)
692 : 12 : break;
693 : :
694 : : /* Recheck current time. */
648 695 : 12 : current_time = GetCurrentTimestamp();
696 : :
697 : : /* Have we finished the current cycle of waiting? */
698 [ - + ]: 12 : if (TimestampDifferenceMilliseconds(cycle_time,
699 : : current_time) >= timeout_in_ms)
700 : : {
701 : : long elapsed_seconds;
702 : :
703 : : /* Begin new wait cycle. */
648 rhaas@postgresql.org 704 :UBC 0 : cycle_time = TimestampTzPlusMilliseconds(cycle_time,
705 : : timeout_in_ms);
706 : :
707 : : /*
708 : : * Keep track of the number of cycles during which there has been
709 : : * no progression of pending_lsn. If pending_lsn is not advancing,
710 : : * that means that not only are no new files appearing on disk,
711 : : * but we're not even incorporating new records into the in-memory
712 : : * state.
713 : : */
714 [ # # ]: 0 : if (pending_lsn > prior_pending_lsn)
715 : : {
716 : 0 : prior_pending_lsn = pending_lsn;
717 : 0 : deadcycles = 0;
718 : : }
719 : : else
720 : 0 : ++deadcycles;
721 : :
722 : : /*
723 : : * If we've managed to wait for an entire minute without the WAL
724 : : * summarizer absorbing a single WAL record, error out; probably
725 : : * something is wrong.
726 : : *
727 : : * We could consider also erroring out if the summarizer is taking
728 : : * too long to catch up, but it's not clear what rate of progress
729 : : * would be acceptable and what would be too slow. So instead, we
730 : : * just try to error out in the case where there's no progress at
731 : : * all. That seems likely to catch a reasonable number of the
732 : : * things that can go wrong in practice (e.g. the summarizer
733 : : * process is completely hung, say because somebody hooked up a
734 : : * debugger to it or something) without giving up too quickly when
735 : : * the system is just slow.
736 : : */
737 [ # # ]: 0 : if (deadcycles >= 6)
738 [ # # ]: 0 : ereport(ERROR,
739 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
740 : : errmsg("WAL summarization is not progressing"),
741 : : errdetail("Summarization is needed through %X/%08X, but is stuck at %X/%08X on disk and %X/%08X in memory.",
742 : : LSN_FORMAT_ARGS(lsn),
743 : : LSN_FORMAT_ARGS(summarized_lsn),
744 : : LSN_FORMAT_ARGS(pending_lsn))));
745 : :
746 : :
747 : : /*
748 : : * Otherwise, just let the user know what's happening.
749 : : */
750 : 0 : elapsed_seconds =
751 : 0 : TimestampDifferenceMilliseconds(initial_time,
752 : : current_time) / 1000;
753 [ # # ]: 0 : ereport(WARNING,
754 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
755 : : errmsg_plural("still waiting for WAL summarization through %X/%08X after %ld second",
756 : : "still waiting for WAL summarization through %X/%08X after %ld seconds",
757 : : elapsed_seconds,
758 : : LSN_FORMAT_ARGS(lsn),
759 : : elapsed_seconds),
760 : : errdetail("Summarization has reached %X/%08X on disk and %X/%08X in memory.",
761 : : LSN_FORMAT_ARGS(summarized_lsn),
762 : : LSN_FORMAT_ARGS(pending_lsn))));
763 : : }
764 : :
765 : : /*
766 : : * Align the wait time to prevent drift. This doesn't really matter,
767 : : * but we'd like the warnings about how long we've been waiting to say
768 : : * 10 seconds, 20 seconds, 30 seconds, 40 seconds ... without ever
769 : : * drifting to something that is not a multiple of ten.
770 : : */
648 rhaas@postgresql.org 771 :CBC 12 : timeout_in_ms -=
772 : 12 : TimestampDifferenceMilliseconds(cycle_time, current_time);
773 : :
774 : : /* Wait and see. */
867 775 : 12 : ConditionVariableTimedSleep(&WalSummarizerCtl->summary_file_cv,
776 : : timeout_in_ms,
777 : : WAIT_EVENT_WAL_SUMMARY_READY);
778 : : }
779 : :
657 780 : 12 : ConditionVariableCancelSleep();
781 : : }
782 : :
783 : : /*
784 : : * On exit, update shared memory to make it clear that we're no longer
785 : : * running.
786 : : */
787 : : static void
845 788 : 3 : WalSummarizerShutdown(int code, Datum arg)
789 : : {
790 : 3 : LWLockAcquire(WALSummarizerLock, LW_EXCLUSIVE);
793 heikki.linnakangas@i 791 : 3 : WalSummarizerCtl->summarizer_pgprocno = INVALID_PROC_NUMBER;
845 rhaas@postgresql.org 792 : 3 : LWLockRelease(WALSummarizerLock);
793 : 3 : }
794 : :
795 : : /*
796 : : * Get the latest LSN that is eligible to be summarized, and set *tli to the
797 : : * corresponding timeline.
798 : : */
799 : : static XLogRecPtr
867 800 : 39 : GetLatestLSN(TimeLineID *tli)
801 : : {
802 [ + + ]: 39 : if (!RecoveryInProgress())
803 : : {
804 : : /* Don't summarize WAL before it's flushed. */
805 : 38 : return GetFlushRecPtr(tli);
806 : : }
807 : : else
808 : : {
809 : : XLogRecPtr flush_lsn;
810 : : TimeLineID flush_tli;
811 : : XLogRecPtr replay_lsn;
812 : : TimeLineID replay_tli;
813 : : TimeLineID insert_tli;
814 : :
815 : : /*
816 : : * After the insert TLI has been set and before the control file has
817 : : * been updated to show the DB in production, RecoveryInProgress()
818 : : * will return true, because it's not yet safe for all backends to
819 : : * begin writing WAL. However, replay has already ceased, so from our
820 : : * point of view, recovery is already over. We should summarize up to
821 : : * where replay stopped and then prepare to resume at the start of the
822 : : * insert timeline.
823 : : */
648 824 [ + - ]: 1 : if ((insert_tli = GetWALInsertionTimeLineIfSet()) != 0)
825 : : {
826 : 1 : *tli = insert_tli;
827 : 1 : return GetXLogReplayRecPtr(NULL);
828 : : }
829 : :
830 : : /*
831 : : * What we really want to know is how much WAL has been flushed to
832 : : * disk, but the only flush position available is the one provided by
833 : : * the walreceiver, which may not be running, because this could be
834 : : * crash recovery or recovery via restore_command. So use either the
835 : : * WAL receiver's flush position or the replay position, whichever is
836 : : * further ahead, on the theory that if the WAL has been replayed then
837 : : * it must also have been flushed to disk.
838 : : */
867 rhaas@postgresql.org 839 :UBC 0 : flush_lsn = GetWalRcvFlushRecPtr(NULL, &flush_tli);
840 : 0 : replay_lsn = GetXLogReplayRecPtr(&replay_tli);
841 [ # # ]: 0 : if (flush_lsn > replay_lsn)
842 : : {
843 : 0 : *tli = flush_tli;
844 : 0 : return flush_lsn;
845 : : }
846 : : else
847 : : {
848 : 0 : *tli = replay_tli;
849 : 0 : return replay_lsn;
850 : : }
851 : : }
852 : : }
853 : :
854 : : /*
855 : : * Interrupt handler for main loop of WAL summarizer process.
856 : : */
857 : : static void
426 heikki.linnakangas@i 858 :CBC 57236 : ProcessWalSummarizerInterrupts(void)
859 : : {
867 rhaas@postgresql.org 860 [ - + ]: 57236 : if (ProcSignalBarrierPending)
867 rhaas@postgresql.org 861 :UBC 0 : ProcessProcSignalBarrier();
862 : :
867 rhaas@postgresql.org 863 [ - + ]:CBC 57236 : if (ConfigReloadPending)
864 : : {
867 rhaas@postgresql.org 865 :UBC 0 : ConfigReloadPending = false;
866 : 0 : ProcessConfigFile(PGC_SIGHUP);
867 : : }
868 : :
867 rhaas@postgresql.org 869 [ + + - + ]:CBC 57236 : if (ShutdownRequestPending || !summarize_wal)
870 : : {
871 [ - + ]: 3 : ereport(DEBUG1,
872 : : errmsg_internal("WAL summarizer shutting down"));
873 : 3 : proc_exit(0);
874 : : }
875 : :
876 : : /* Perform logging of memory contexts of this process */
877 [ - + ]: 57233 : if (LogMemoryContextPending)
867 rhaas@postgresql.org 878 :UBC 0 : ProcessLogMemoryContextInterrupt();
867 rhaas@postgresql.org 879 :CBC 57233 : }
880 : :
881 : : /*
882 : : * Summarize a range of WAL records on a single timeline.
883 : : *
884 : : * 'tli' is the timeline to be summarized.
885 : : *
886 : : * 'start_lsn' is the point at which we should start summarizing. If this
887 : : * value comes from the end LSN of the previous record as returned by the
888 : : * xlogreader machinery, 'exact' should be true; otherwise, 'exact' should
889 : : * be false, and this function will search forward for the start of a valid
890 : : * WAL record.
891 : : *
892 : : * 'switch_lsn' is the point at which we should switch to a later timeline,
893 : : * if we're summarizing a historic timeline.
894 : : *
895 : : * 'maximum_lsn' identifies the point beyond which we can't count on being
896 : : * able to read any more WAL. It should be the switch point when reading a
897 : : * historic timeline, or the most-recently-measured end of WAL when reading
898 : : * the current timeline.
899 : : *
900 : : * The return value is the LSN at which the WAL summary actually ends. Most
901 : : * often, a summary file ends because we notice that a checkpoint has
902 : : * occurred and reach the redo pointer of that checkpoint, but sometimes
903 : : * we stop for other reasons, such as a timeline switch.
904 : : */
905 : : static XLogRecPtr
906 : 21 : SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact,
907 : : XLogRecPtr switch_lsn, XLogRecPtr maximum_lsn)
908 : : {
909 : : SummarizerReadLocalXLogPrivate *private_data;
910 : : XLogReaderState *xlogreader;
911 : : XLogRecPtr summary_start_lsn;
912 : 21 : XLogRecPtr summary_end_lsn = switch_lsn;
913 : : char temp_path[MAXPGPATH];
914 : : char final_path[MAXPGPATH];
915 : : WalSummaryIO io;
916 : 21 : BlockRefTable *brtab = CreateEmptyBlockRefTable();
656 917 : 21 : bool fast_forward = true;
918 : : char *errormsg;
919 : :
920 : : /* Initialize private data for xlogreader. */
146 michael@paquier.xyz 921 :GNC 21 : private_data = palloc0_object(SummarizerReadLocalXLogPrivate);
867 rhaas@postgresql.org 922 :CBC 21 : private_data->tli = tli;
180 alvherre@kurilemu.de 923 :GNC 21 : private_data->historic = XLogRecPtrIsValid(switch_lsn);
867 rhaas@postgresql.org 924 :CBC 21 : private_data->read_upto = maximum_lsn;
925 : :
926 : : /* Create xlogreader. */
927 : 21 : xlogreader = XLogReaderAllocate(wal_segment_size, NULL,
928 : 21 : XL_ROUTINE(.page_read = &summarizer_read_local_xlog_page,
929 : : .segment_open = &wal_segment_open,
930 : : .segment_close = &wal_segment_close),
931 : : private_data);
932 [ - + ]: 21 : if (xlogreader == NULL)
867 rhaas@postgresql.org 933 [ # # ]:UBC 0 : ereport(ERROR,
934 : : (errcode(ERRCODE_OUT_OF_MEMORY),
935 : : errmsg("out of memory"),
936 : : errdetail("Failed while allocating a WAL reading processor.")));
937 : :
938 : : /*
939 : : * When exact = false, we're starting from an arbitrary point in the WAL
940 : : * and must search forward for the start of the next record.
941 : : *
942 : : * When exact = true, start_lsn should be either the LSN where a record
943 : : * begins, or the LSN of a page where the page header is immediately
944 : : * followed by the start of a new record. XLogBeginRead should tolerate
945 : : * either case.
946 : : *
947 : : * We need to allow for both cases because the behavior of xlogreader
948 : : * varies. When a record spans two or more xlog pages, the ending LSN
949 : : * reported by xlogreader will be the starting LSN of the following
950 : : * record, but when an xlog page boundary falls between two records, the
951 : : * end LSN for the first will be reported as the first byte of the
952 : : * following page. We can't know until we read that page how large the
953 : : * header will be, but we'll have to skip over it to find the next record.
954 : : */
867 rhaas@postgresql.org 955 [ + + ]:CBC 21 : if (exact)
956 : : {
957 : : /*
958 : : * Even if start_lsn is the beginning of a page rather than the
959 : : * beginning of the first record on that page, we should still use it
960 : : * as the start LSN for the summary file. That's because we detect
961 : : * missing summary files by looking for cases where the end LSN of one
962 : : * file is less than the start LSN of the next file. When only a page
963 : : * header is skipped, nothing has been missed.
964 : : */
965 : 18 : XLogBeginRead(xlogreader, start_lsn);
966 : 18 : summary_start_lsn = start_lsn;
967 : : }
968 : : else
969 : : {
42 fujii@postgresql.org 970 :GNC 3 : summary_start_lsn = XLogFindNextRecord(xlogreader, start_lsn, &errormsg);
180 alvherre@kurilemu.de 971 [ - + ]: 3 : if (!XLogRecPtrIsValid(summary_start_lsn))
972 : : {
973 : : /*
974 : : * If we hit end-of-WAL while trying to find the next valid
975 : : * record, we must be on a historic timeline that has no valid
976 : : * records that begin after start_lsn and before end of WAL.
977 : : */
867 rhaas@postgresql.org 978 [ # # ]:UBC 0 : if (private_data->end_of_wal)
979 : : {
980 [ # # ]: 0 : ereport(DEBUG1,
981 : : errmsg_internal("could not read WAL from timeline %u at %X/%08X: end of WAL at %X/%08X",
982 : : tli,
983 : : LSN_FORMAT_ARGS(start_lsn),
984 : : LSN_FORMAT_ARGS(private_data->read_upto)));
985 : :
986 : : /*
987 : : * The timeline ends at or after start_lsn, without containing
988 : : * any records. Thus, we must make sure the main loop does not
989 : : * iterate. If start_lsn is the end of the timeline, then we
990 : : * won't actually emit an empty summary file, but otherwise,
991 : : * we must, to capture the fact that the LSN range in question
992 : : * contains no interesting WAL records.
993 : : */
994 : 0 : summary_start_lsn = start_lsn;
995 : 0 : summary_end_lsn = private_data->read_upto;
996 : 0 : switch_lsn = xlogreader->EndRecPtr;
997 : : }
998 : : else
999 : : {
42 fujii@postgresql.org 1000 [ # # ]:UNC 0 : if (errormsg)
1001 [ # # ]: 0 : ereport(ERROR,
1002 : : errmsg("could not find a valid record after %X/%08X: %s",
1003 : : LSN_FORMAT_ARGS(start_lsn), errormsg));
1004 : : else
1005 [ # # ]: 0 : ereport(ERROR,
1006 : : errmsg("could not find a valid record after %X/%08X",
1007 : : LSN_FORMAT_ARGS(start_lsn)));
1008 : : }
1009 : : }
1010 : :
1011 : : /* We shouldn't go backward. */
867 rhaas@postgresql.org 1012 [ + - ]:CBC 3 : Assert(summary_start_lsn >= start_lsn);
1013 : : }
1014 : :
1015 : : /*
1016 : : * Main loop: read xlog records one by one.
1017 : : */
1018 : : while (1)
1019 : 55083 : {
1020 : : int block_id;
1021 : : XLogRecord *record;
1022 : : uint8 rmid;
1023 : :
426 heikki.linnakangas@i 1024 : 55104 : ProcessWalSummarizerInterrupts();
1025 : :
1026 : : /* We shouldn't go backward. */
867 rhaas@postgresql.org 1027 [ - + ]: 55103 : Assert(summary_start_lsn <= xlogreader->EndRecPtr);
1028 : :
1029 : : /* Now read the next record. */
1030 : 55103 : record = XLogReadRecord(xlogreader, &errormsg);
1031 [ - + ]: 55101 : if (record == NULL)
1032 : : {
867 rhaas@postgresql.org 1033 [ # # ]:UBC 0 : if (private_data->end_of_wal)
1034 : : {
1035 : : /*
1036 : : * This timeline must be historic and must end before we were
1037 : : * able to read a complete record.
1038 : : */
1039 [ # # ]: 0 : ereport(DEBUG1,
1040 : : errmsg_internal("could not read WAL from timeline %u at %X/%08X: end of WAL at %X/%08X",
1041 : : tli,
1042 : : LSN_FORMAT_ARGS(xlogreader->EndRecPtr),
1043 : : LSN_FORMAT_ARGS(private_data->read_upto)));
1044 : : /* Summary ends at end of WAL. */
1045 : 0 : summary_end_lsn = private_data->read_upto;
1046 : 0 : break;
1047 : : }
1048 [ # # ]: 0 : if (errormsg)
1049 [ # # ]: 0 : ereport(ERROR,
1050 : : (errcode_for_file_access(),
1051 : : errmsg("could not read WAL from timeline %u at %X/%08X: %s",
1052 : : tli, LSN_FORMAT_ARGS(xlogreader->EndRecPtr),
1053 : : errormsg)));
1054 : : else
1055 [ # # ]: 0 : ereport(ERROR,
1056 : : (errcode_for_file_access(),
1057 : : errmsg("could not read WAL from timeline %u at %X/%08X",
1058 : : tli, LSN_FORMAT_ARGS(xlogreader->EndRecPtr))));
1059 : : }
1060 : :
1061 : : /* We shouldn't go backward. */
867 rhaas@postgresql.org 1062 [ - + ]:CBC 55101 : Assert(summary_start_lsn <= xlogreader->EndRecPtr);
1063 : :
180 alvherre@kurilemu.de 1064 [ - + ]:GNC 55101 : if (XLogRecPtrIsValid(switch_lsn) &&
867 rhaas@postgresql.org 1065 [ # # ]:UBC 0 : xlogreader->ReadRecPtr >= switch_lsn)
1066 : : {
1067 : : /*
1068 : : * Whoops! We've read a record that *starts* after the switch LSN,
1069 : : * contrary to our goal of reading only until we hit the first
1070 : : * record that ends at or after the switch LSN. Pretend we didn't
1071 : : * read it after all by bailing out of this loop right here,
1072 : : * before we do anything with this record.
1073 : : *
1074 : : * This can happen because the last record before the switch LSN
1075 : : * might be continued across multiple pages, and then we might
1076 : : * come to a page with XLP_FIRST_IS_OVERWRITE_CONTRECORD set. In
1077 : : * that case, the record that was continued across multiple pages
1078 : : * is incomplete and will be disregarded, and the read will
1079 : : * restart from the beginning of the page that is flagged
1080 : : * XLP_FIRST_IS_OVERWRITE_CONTRECORD.
1081 : : *
1082 : : * If this case occurs, we can fairly say that the current summary
1083 : : * file ends at the switch LSN exactly. The first record on the
1084 : : * page marked XLP_FIRST_IS_OVERWRITE_CONTRECORD will be
1085 : : * discovered when generating the next summary file.
1086 : : */
1087 : 0 : summary_end_lsn = switch_lsn;
1088 : 0 : break;
1089 : : }
1090 : :
1091 : : /*
1092 : : * Certain types of records require special handling. Redo points and
1093 : : * shutdown checkpoints trigger creation of new summary files and can
1094 : : * also cause us to enter or exit "fast forward" mode. Other types of
1095 : : * records can require special updates to the block reference table.
1096 : : */
656 rhaas@postgresql.org 1097 :CBC 55101 : rmid = XLogRecGetRmid(xlogreader);
1098 [ + + ]: 55101 : if (rmid == RM_XLOG_ID)
1099 : : {
1100 : : bool new_fast_forward;
1101 : :
1102 : : /*
1103 : : * If we've already processed some WAL records when we hit a redo
1104 : : * point or shutdown checkpoint, then we stop summarization before
1105 : : * including this record in the current file, so that it will be
1106 : : * the first record in the next file.
1107 : : *
1108 : : * When we hit one of those record types as the first record in a
1109 : : * file, we adjust our notion of whether we're fast-forwarding.
1110 : : * Any WAL generated with wal_level=minimal must be skipped
1111 : : * without actually generating any summary file, because an
1112 : : * incremental backup that crosses such WAL would be unsafe.
1113 : : */
1114 [ + + ]: 718 : if (SummarizeXlogRecord(xlogreader, &new_fast_forward))
1115 : : {
1116 [ + + ]: 39 : if (xlogreader->ReadRecPtr > summary_start_lsn)
1117 : : {
1118 : 18 : summary_end_lsn = xlogreader->ReadRecPtr;
1119 : 18 : break;
1120 : : }
1121 : : else
1122 : 21 : fast_forward = new_fast_forward;
1123 : : }
1124 : : }
1125 [ + - ]: 54383 : else if (!fast_forward)
1126 : : {
1127 : : /*
1128 : : * This switch handles record types that require extra updates to
1129 : : * the contents of the block reference table.
1130 : : */
1131 [ + + + + ]: 54383 : switch (rmid)
1132 : : {
1133 : 4 : case RM_DBASE_ID:
1134 : 4 : SummarizeDbaseRecord(xlogreader, brtab);
1135 : 4 : break;
1136 : 29 : case RM_SMGR_ID:
1137 : 29 : SummarizeSmgrRecord(xlogreader, brtab);
1138 : 29 : break;
1139 : 1384 : case RM_XACT_ID:
1140 : 1384 : SummarizeXactRecord(xlogreader, brtab);
1141 : 1384 : break;
1142 : : }
1143 : : }
1144 : :
1145 : : /*
1146 : : * If we're in fast-forward mode, we don't really need to do anything.
1147 : : * Otherwise, feed block references from xlog record to block
1148 : : * reference table.
1149 : : */
1150 [ + - ]: 55083 : if (!fast_forward)
1151 : : {
1152 [ + + ]: 109909 : for (block_id = 0; block_id <= XLogRecMaxBlockId(xlogreader);
1153 : 54826 : block_id++)
1154 : : {
1155 : : RelFileLocator rlocator;
1156 : : ForkNumber forknum;
1157 : : BlockNumber blocknum;
1158 : :
1159 [ + + ]: 54826 : if (!XLogRecGetBlockTagExtended(xlogreader, block_id, &rlocator,
1160 : : &forknum, &blocknum, NULL))
1161 : 32 : continue;
1162 : :
1163 : : /*
1164 : : * As we do elsewhere, ignore the FSM fork, because it's not
1165 : : * fully WAL-logged.
1166 : : */
1167 [ + + ]: 54794 : if (forknum != FSM_FORKNUM)
1168 : 54506 : BlockRefTableMarkBlockModified(brtab, &rlocator, forknum,
1169 : : blocknum);
1170 : : }
1171 : : }
1172 : :
1173 : : /* Update our notion of where this summary file ends. */
867 1174 : 55083 : summary_end_lsn = xlogreader->EndRecPtr;
1175 : :
1176 : : /* Also update shared memory. */
1177 : 55083 : LWLockAcquire(WALSummarizerLock, LW_EXCLUSIVE);
1178 [ - + ]: 55083 : Assert(summary_end_lsn >= WalSummarizerCtl->summarized_lsn);
1179 : 55083 : WalSummarizerCtl->pending_lsn = summary_end_lsn;
1180 : 55083 : LWLockRelease(WALSummarizerLock);
1181 : :
1182 : : /*
1183 : : * If we have a switch LSN and have reached it, stop before reading
1184 : : * the next record.
1185 : : */
180 alvherre@kurilemu.de 1186 [ - + ]:GNC 55083 : if (XLogRecPtrIsValid(switch_lsn) &&
867 rhaas@postgresql.org 1187 [ # # ]:UBC 0 : xlogreader->EndRecPtr >= switch_lsn)
1188 : 0 : break;
1189 : : }
1190 : :
1191 : : /* Destroy xlogreader. */
867 rhaas@postgresql.org 1192 :CBC 18 : pfree(xlogreader->private_data);
1193 : 18 : XLogReaderFree(xlogreader);
1194 : :
1195 : : /*
1196 : : * If a timeline switch occurs, we may fail to make any progress at all
1197 : : * before exiting the loop above. If that happens, we don't write a WAL
1198 : : * summary file at all. We can also skip writing a file if we're in
1199 : : * fast-forward mode.
1200 : : */
656 1201 [ + - + - ]: 18 : if (summary_end_lsn > summary_start_lsn && !fast_forward)
1202 : : {
1203 : : /* Generate temporary and final path name. */
867 1204 : 18 : snprintf(temp_path, MAXPGPATH,
1205 : : XLOGDIR "/summaries/temp.summary");
1206 : 18 : snprintf(final_path, MAXPGPATH,
1207 : : XLOGDIR "/summaries/%08X%08X%08X%08X%08X.summary",
1208 : : tli,
1209 : 18 : LSN_FORMAT_ARGS(summary_start_lsn),
1210 : 18 : LSN_FORMAT_ARGS(summary_end_lsn));
1211 : :
1212 : : /* Open the temporary file for writing. */
1213 : 18 : io.filepos = 0;
1214 : 18 : io.file = PathNameOpenFile(temp_path, O_WRONLY | O_CREAT | O_TRUNC);
1215 [ - + ]: 18 : if (io.file < 0)
867 rhaas@postgresql.org 1216 [ # # ]:UBC 0 : ereport(ERROR,
1217 : : (errcode_for_file_access(),
1218 : : errmsg("could not create file \"%s\": %m", temp_path)));
1219 : :
1220 : : /* Write the data. */
867 rhaas@postgresql.org 1221 :CBC 18 : WriteBlockRefTable(brtab, WriteWalSummary, &io);
1222 : :
1223 : : /* Close temporary file and shut down xlogreader. */
1224 : 18 : FileClose(io.file);
1225 : :
1226 : : /* Tell the user what we did. */
1227 [ - + ]: 18 : ereport(DEBUG1,
1228 : : errmsg_internal("summarized WAL on TLI %u from %X/%08X to %X/%08X",
1229 : : tli,
1230 : : LSN_FORMAT_ARGS(summary_start_lsn),
1231 : : LSN_FORMAT_ARGS(summary_end_lsn)));
1232 : :
1233 : : /* Durably rename the new summary into place. */
1234 : 18 : durable_rename(temp_path, final_path, ERROR);
1235 : : }
1236 : :
1237 : : /* If we skipped a non-zero amount of WAL, log a debug message. */
656 1238 [ + - - + ]: 18 : if (summary_end_lsn > summary_start_lsn && fast_forward)
656 rhaas@postgresql.org 1239 [ # # ]:UBC 0 : ereport(DEBUG1,
1240 : : errmsg_internal("skipped summarizing WAL on TLI %u from %X/%08X to %X/%08X",
1241 : : tli,
1242 : : LSN_FORMAT_ARGS(summary_start_lsn),
1243 : : LSN_FORMAT_ARGS(summary_end_lsn)));
1244 : :
867 rhaas@postgresql.org 1245 :CBC 18 : return summary_end_lsn;
1246 : : }
1247 : :
1248 : : /*
1249 : : * Special handling for WAL records with RM_DBASE_ID.
1250 : : */
1251 : : static void
792 1252 : 4 : SummarizeDbaseRecord(XLogReaderState *xlogreader, BlockRefTable *brtab)
1253 : : {
1254 : 4 : uint8 info = XLogRecGetInfo(xlogreader) & ~XLR_INFO_MASK;
1255 : :
1256 : : /*
1257 : : * We use relfilenode zero for a given database OID and tablespace OID to
1258 : : * indicate that all relations with that pair of IDs have been recreated
1259 : : * if they exist at all. Effectively, we're setting a limit block of 0 for
1260 : : * all such relfilenodes.
1261 : : *
1262 : : * Technically, this special handling is only needed in the case of
1263 : : * XLOG_DBASE_CREATE_FILE_COPY, because that can create a whole bunch of
1264 : : * relation files in a directory without logging anything specific to each
1265 : : * one. If we didn't mark the whole DB OID/TS OID combination in some way,
1266 : : * then a tablespace that was dropped after the reference backup and
1267 : : * recreated using the FILE_COPY method prior to the incremental backup
1268 : : * would look just like one that was never touched at all, which would be
1269 : : * catastrophic.
1270 : : *
1271 : : * But it seems best to adopt this treatment for all records that drop or
1272 : : * create a DB OID/TS OID combination. That's similar to how we treat the
1273 : : * limit block for individual relations, and it's an extra layer of safety
1274 : : * here. We can never lose data by marking more stuff as needing to be
1275 : : * backed up in full.
1276 : : */
1277 [ + - ]: 4 : if (info == XLOG_DBASE_CREATE_FILE_COPY)
1278 : : {
1279 : : xl_dbase_create_file_copy_rec *xlrec;
1280 : : RelFileLocator rlocator;
1281 : :
1282 : 4 : xlrec =
1283 : 4 : (xl_dbase_create_file_copy_rec *) XLogRecGetData(xlogreader);
1284 : 4 : rlocator.spcOid = xlrec->tablespace_id;
1285 : 4 : rlocator.dbOid = xlrec->db_id;
1286 : 4 : rlocator.relNumber = 0;
1287 : 4 : BlockRefTableSetLimitBlock(brtab, &rlocator, MAIN_FORKNUM, 0);
1288 : : }
792 rhaas@postgresql.org 1289 [ # # ]:UBC 0 : else if (info == XLOG_DBASE_CREATE_WAL_LOG)
1290 : : {
1291 : : xl_dbase_create_wal_log_rec *xlrec;
1292 : : RelFileLocator rlocator;
1293 : :
1294 : 0 : xlrec = (xl_dbase_create_wal_log_rec *) XLogRecGetData(xlogreader);
1295 : 0 : rlocator.spcOid = xlrec->tablespace_id;
1296 : 0 : rlocator.dbOid = xlrec->db_id;
1297 : 0 : rlocator.relNumber = 0;
1298 : 0 : BlockRefTableSetLimitBlock(brtab, &rlocator, MAIN_FORKNUM, 0);
1299 : : }
1300 [ # # ]: 0 : else if (info == XLOG_DBASE_DROP)
1301 : : {
1302 : : xl_dbase_drop_rec *xlrec;
1303 : : RelFileLocator rlocator;
1304 : : int i;
1305 : :
1306 : 0 : xlrec = (xl_dbase_drop_rec *) XLogRecGetData(xlogreader);
1307 : 0 : rlocator.dbOid = xlrec->db_id;
1308 : 0 : rlocator.relNumber = 0;
1309 [ # # ]: 0 : for (i = 0; i < xlrec->ntablespaces; ++i)
1310 : : {
1311 : 0 : rlocator.spcOid = xlrec->tablespace_ids[i];
1312 : 0 : BlockRefTableSetLimitBlock(brtab, &rlocator, MAIN_FORKNUM, 0);
1313 : : }
1314 : : }
792 rhaas@postgresql.org 1315 :CBC 4 : }
1316 : :
1317 : : /*
1318 : : * Special handling for WAL records with RM_SMGR_ID.
1319 : : */
1320 : : static void
867 1321 : 29 : SummarizeSmgrRecord(XLogReaderState *xlogreader, BlockRefTable *brtab)
1322 : : {
1323 : 29 : uint8 info = XLogRecGetInfo(xlogreader) & ~XLR_INFO_MASK;
1324 : :
1325 [ + + ]: 29 : if (info == XLOG_SMGR_CREATE)
1326 : : {
1327 : : xl_smgr_create *xlrec;
1328 : :
1329 : : /*
1330 : : * If a new relation fork is created on disk, there is no point
1331 : : * tracking anything about which blocks have been modified, because
1332 : : * the whole thing will be new. Hence, set the limit block for this
1333 : : * fork to 0.
1334 : : *
1335 : : * Ignore the FSM fork, which is not fully WAL-logged.
1336 : : */
1337 : 28 : xlrec = (xl_smgr_create *) XLogRecGetData(xlogreader);
1338 : :
1339 [ + - ]: 28 : if (xlrec->forkNum != FSM_FORKNUM)
1340 : 28 : BlockRefTableSetLimitBlock(brtab, &xlrec->rlocator,
1341 : : xlrec->forkNum, 0);
1342 : : }
1343 [ + - ]: 1 : else if (info == XLOG_SMGR_TRUNCATE)
1344 : : {
1345 : : xl_smgr_truncate *xlrec;
1346 : :
1347 : 1 : xlrec = (xl_smgr_truncate *) XLogRecGetData(xlogreader);
1348 : :
1349 : : /*
1350 : : * If a relation fork is truncated on disk, there is no point in
1351 : : * tracking anything about block modifications beyond the truncation
1352 : : * point.
1353 : : *
1354 : : * We ignore SMGR_TRUNCATE_FSM here because the FSM isn't fully
1355 : : * WAL-logged and thus we can't track modified blocks for it anyway.
1356 : : */
1357 [ + - ]: 1 : if ((xlrec->flags & SMGR_TRUNCATE_HEAP) != 0)
1358 : 1 : BlockRefTableSetLimitBlock(brtab, &xlrec->rlocator,
1359 : : MAIN_FORKNUM, xlrec->blkno);
1360 [ + - ]: 1 : if ((xlrec->flags & SMGR_TRUNCATE_VM) != 0)
1361 : 1 : BlockRefTableSetLimitBlock(brtab, &xlrec->rlocator,
1362 : : VISIBILITYMAP_FORKNUM,
1363 : : visibilitymap_truncation_length(xlrec->blkno));
1364 : : }
1365 : 29 : }
1366 : :
1367 : : /*
1368 : : * Special handling for WAL records with RM_XACT_ID.
1369 : : */
1370 : : static void
1371 : 1384 : SummarizeXactRecord(XLogReaderState *xlogreader, BlockRefTable *brtab)
1372 : : {
1373 : 1384 : uint8 info = XLogRecGetInfo(xlogreader) & ~XLR_INFO_MASK;
1374 : 1384 : uint8 xact_info = info & XLOG_XACT_OPMASK;
1375 : :
1376 [ - + - - ]: 1384 : if (xact_info == XLOG_XACT_COMMIT ||
1377 : : xact_info == XLOG_XACT_COMMIT_PREPARED)
1378 : 1384 : {
1379 : 1384 : xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(xlogreader);
1380 : : xl_xact_parsed_commit parsed;
1381 : : int i;
1382 : :
1383 : : /*
1384 : : * Don't track modified blocks for any relations that were removed on
1385 : : * commit.
1386 : : */
1387 : 1384 : ParseCommitRecord(XLogRecGetInfo(xlogreader), xlrec, &parsed);
1388 [ - + ]: 1384 : for (i = 0; i < parsed.nrels; ++i)
1389 : : {
1390 : : ForkNumber forknum;
1391 : :
867 rhaas@postgresql.org 1392 [ # # ]:UBC 0 : for (forknum = 0; forknum <= MAX_FORKNUM; ++forknum)
1393 [ # # ]: 0 : if (forknum != FSM_FORKNUM)
1394 : 0 : BlockRefTableSetLimitBlock(brtab, &parsed.xlocators[i],
1395 : : forknum, 0);
1396 : : }
1397 : : }
1398 [ # # # # ]: 0 : else if (xact_info == XLOG_XACT_ABORT ||
1399 : : xact_info == XLOG_XACT_ABORT_PREPARED)
1400 : : {
1401 : 0 : xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(xlogreader);
1402 : : xl_xact_parsed_abort parsed;
1403 : : int i;
1404 : :
1405 : : /*
1406 : : * Don't track modified blocks for any relations that were removed on
1407 : : * abort.
1408 : : */
1409 : 0 : ParseAbortRecord(XLogRecGetInfo(xlogreader), xlrec, &parsed);
1410 [ # # ]: 0 : for (i = 0; i < parsed.nrels; ++i)
1411 : : {
1412 : : ForkNumber forknum;
1413 : :
1414 [ # # ]: 0 : for (forknum = 0; forknum <= MAX_FORKNUM; ++forknum)
1415 [ # # ]: 0 : if (forknum != FSM_FORKNUM)
1416 : 0 : BlockRefTableSetLimitBlock(brtab, &parsed.xlocators[i],
1417 : : forknum, 0);
1418 : : }
1419 : : }
867 rhaas@postgresql.org 1420 :CBC 1384 : }
1421 : :
1422 : : /*
1423 : : * Special handling for WAL records with RM_XLOG_ID.
1424 : : *
1425 : : * The return value is true if WAL summarization should stop before this
1426 : : * record and false otherwise. When the return value is true,
1427 : : * *new_fast_forward indicates whether future processing should be done
1428 : : * in fast forward mode (i.e. read WAL without emitting summaries) or not.
1429 : : */
1430 : : static bool
656 1431 : 718 : SummarizeXlogRecord(XLogReaderState *xlogreader, bool *new_fast_forward)
1432 : : {
867 1433 : 718 : uint8 info = XLogRecGetInfo(xlogreader) & ~XLR_INFO_MASK;
1434 : : int record_wal_level;
1435 : :
656 1436 [ + + ]: 718 : if (info == XLOG_CHECKPOINT_REDO)
1437 : : {
1438 : : xl_checkpoint_redo xlrec;
1439 : :
1440 : : /* Payload is wal_level at the time record was written. */
35 dgustafsson@postgres 1441 :GNC 23 : memcpy(&xlrec, XLogRecGetData(xlogreader), sizeof(xl_checkpoint_redo));
1442 : 23 : record_wal_level = xlrec.wal_level;
1443 : : }
656 rhaas@postgresql.org 1444 [ + + ]:CBC 695 : else if (info == XLOG_CHECKPOINT_SHUTDOWN)
1445 : : {
1446 : : CheckPoint rec_ckpt;
1447 : :
1448 : : /* Extract wal_level at time record was written from payload. */
1449 : 12 : memcpy(&rec_ckpt, XLogRecGetData(xlogreader), sizeof(CheckPoint));
1450 : 12 : record_wal_level = rec_ckpt.wal_level;
1451 : : }
1452 [ + + ]: 683 : else if (info == XLOG_PARAMETER_CHANGE)
1453 : : {
1454 : : xl_parameter_change xlrec;
1455 : :
1456 : : /* Extract wal_level at time record was written from payload. */
1457 : 4 : memcpy(&xlrec, XLogRecGetData(xlogreader),
1458 : : sizeof(xl_parameter_change));
1459 : 4 : record_wal_level = xlrec.wal_level;
1460 : : }
1461 [ - + ]: 679 : else if (info == XLOG_END_OF_RECOVERY)
1462 : : {
1463 : : xl_end_of_recovery xlrec;
1464 : :
1465 : : /* Extract wal_level at time record was written from payload. */
656 rhaas@postgresql.org 1466 :UBC 0 : memcpy(&xlrec, XLogRecGetData(xlogreader), sizeof(xl_end_of_recovery));
1467 : 0 : record_wal_level = xlrec.wal_level;
1468 : : }
1469 : : else
1470 : : {
1471 : : /* No special handling required. Return false. */
656 rhaas@postgresql.org 1472 :CBC 679 : return false;
1473 : : }
1474 : :
1475 : : /*
1476 : : * Redo can only begin at an XLOG_CHECKPOINT_REDO or
1477 : : * XLOG_CHECKPOINT_SHUTDOWN record, so we want WAL summarization to begin
1478 : : * at those points. Hence, when those records are encountered, return
1479 : : * true, so that we stop just before summarizing either of those records.
1480 : : *
1481 : : * We also reach here if we just saw XLOG_END_OF_RECOVERY or
1482 : : * XLOG_PARAMETER_CHANGE. These are not places where recovery can start,
1483 : : * but they're still relevant here. A new timeline can begin with
1484 : : * XLOG_END_OF_RECOVERY, so we need to confirm the WAL level at that
1485 : : * point; and a restart can provoke XLOG_PARAMETER_CHANGE after an
1486 : : * intervening change to postgresql.conf, which might force us to stop
1487 : : * summarizing.
1488 : : */
1489 : 39 : *new_fast_forward = (record_wal_level == WAL_LEVEL_MINIMAL);
1490 : 39 : return true;
1491 : : }
1492 : :
1493 : : /*
1494 : : * Similar to read_local_xlog_page, but limited to read from one particular
1495 : : * timeline. If the end of WAL is reached, it will wait for more if reading
1496 : : * from the current timeline, or give up if reading from a historic timeline.
1497 : : * In the latter case, it will also set private_data->end_of_wal = true.
1498 : : *
1499 : : * Caller must set private_data->tli to the TLI of interest,
1500 : : * private_data->read_upto to the lowest LSN that is not known to be safe
1501 : : * to read on that timeline, and private_data->historic to true if and only
1502 : : * if the timeline is not the current timeline. This function will update
1503 : : * private_data->read_upto and private_data->historic if more WAL appears
1504 : : * on the current timeline or if the current timeline becomes historic.
1505 : : */
1506 : : static int
867 1507 : 2067 : summarizer_read_local_xlog_page(XLogReaderState *state,
1508 : : XLogRecPtr targetPagePtr, int reqLen,
1509 : : XLogRecPtr targetRecPtr, char *cur_page)
1510 : : {
1511 : : int count;
1512 : : WALReadError errinfo;
1513 : : SummarizerReadLocalXLogPrivate *private_data;
1514 : :
426 heikki.linnakangas@i 1515 : 2067 : ProcessWalSummarizerInterrupts();
1516 : :
867 rhaas@postgresql.org 1517 : 2067 : private_data = (SummarizerReadLocalXLogPrivate *)
1518 : : state->private_data;
1519 : :
1520 : : while (1)
1521 : : {
1522 [ + + ]: 2081 : if (targetPagePtr + XLOG_BLCKSZ <= private_data->read_upto)
1523 : : {
1524 : : /*
1525 : : * more than one block available; read only that block, have
1526 : : * caller come back if they need more.
1527 : : */
1528 : 2045 : count = XLOG_BLCKSZ;
1529 : 2045 : break;
1530 : : }
1531 [ + + ]: 36 : else if (targetPagePtr + reqLen > private_data->read_upto)
1532 : : {
1533 : : /* We don't seem to have enough data. */
1534 [ - + ]: 16 : if (private_data->historic)
1535 : : {
1536 : : /*
1537 : : * This is a historic timeline, so there will never be any
1538 : : * more data than we have currently.
1539 : : */
867 rhaas@postgresql.org 1540 :UBC 0 : private_data->end_of_wal = true;
1541 : 0 : return -1;
1542 : : }
1543 : : else
1544 : : {
1545 : : XLogRecPtr latest_lsn;
1546 : : TimeLineID latest_tli;
1547 : :
1548 : : /*
1549 : : * This is - or at least was up until very recently - the
1550 : : * current timeline, so more data might show up. Delay here
1551 : : * so we don't tight-loop.
1552 : : */
426 heikki.linnakangas@i 1553 :CBC 16 : ProcessWalSummarizerInterrupts();
867 rhaas@postgresql.org 1554 : 14 : summarizer_wait_for_wal();
1555 : :
1556 : : /* Recheck end-of-WAL. */
1557 : 14 : latest_lsn = GetLatestLSN(&latest_tli);
1558 [ + - ]: 14 : if (private_data->tli == latest_tli)
1559 : : {
1560 : : /* Still the current timeline, update max LSN. */
1561 [ - + ]: 14 : Assert(latest_lsn >= private_data->read_upto);
1562 : 14 : private_data->read_upto = latest_lsn;
1563 : : }
1564 : : else
1565 : : {
867 rhaas@postgresql.org 1566 :UBC 0 : List *tles = readTimeLineHistory(latest_tli);
1567 : : XLogRecPtr switchpoint;
1568 : :
1569 : : /*
1570 : : * The timeline we're scanning is no longer the latest
1571 : : * one. Figure out when it ended.
1572 : : */
1573 : 0 : private_data->historic = true;
1574 : 0 : switchpoint = tliSwitchPoint(private_data->tli, tles,
1575 : : NULL);
1576 : :
1577 : : /*
1578 : : * Allow reads up to exactly the switch point.
1579 : : *
1580 : : * It's possible that this will cause read_upto to move
1581 : : * backwards, because we might have been promoted before
1582 : : * reaching the end of the previous timeline. In that
1583 : : * case, the next loop iteration will likely conclude that
1584 : : * we've reached end of WAL.
1585 : : */
1586 : 0 : private_data->read_upto = switchpoint;
1587 : :
1588 : : /* Debugging output. */
1589 [ # # ]: 0 : ereport(DEBUG1,
1590 : : errmsg_internal("timeline %u became historic, can read up to %X/%08X",
1591 : : private_data->tli, LSN_FORMAT_ARGS(private_data->read_upto)));
1592 : : }
1593 : :
1594 : : /* Go around and try again. */
1595 : : }
1596 : : }
1597 : : else
1598 : : {
1599 : : /* enough bytes available to satisfy the request */
867 rhaas@postgresql.org 1600 :CBC 20 : count = private_data->read_upto - targetPagePtr;
1601 : 20 : break;
1602 : : }
1603 : : }
1604 : :
809 jdavis@postgresql.or 1605 [ - + ]: 2065 : if (!WALRead(state, cur_page, targetPagePtr, count,
1606 : : private_data->tli, &errinfo))
867 rhaas@postgresql.org 1607 :UBC 0 : WALReadRaiseError(&errinfo);
1608 : :
1609 : : /* Track that we read a page, for sleep time calculation. */
867 rhaas@postgresql.org 1610 :CBC 2065 : ++pages_read_since_last_sleep;
1611 : :
1612 : : /* number of valid bytes in the buffer */
1613 : 2065 : return count;
1614 : : }
1615 : :
1616 : : /*
1617 : : * Sleep for long enough that we believe it's likely that more WAL will
1618 : : * be available afterwards.
1619 : : */
1620 : : static void
1621 : 14 : summarizer_wait_for_wal(void)
1622 : : {
1623 [ + + ]: 14 : if (pages_read_since_last_sleep == 0)
1624 : : {
1625 : : /*
1626 : : * No pages were read since the last sleep, so double the sleep time,
1627 : : * but not beyond the maximum allowable value.
1628 : : */
1629 : 5 : sleep_quanta = Min(sleep_quanta * 2, MAX_SLEEP_QUANTA);
1630 : : }
1631 [ + + ]: 9 : else if (pages_read_since_last_sleep > 1)
1632 : : {
1633 : : /*
1634 : : * Multiple pages were read since the last sleep, so reduce the sleep
1635 : : * time.
1636 : : *
1637 : : * A large burst of activity should be able to quickly reduce the
1638 : : * sleep time to the minimum, but we don't want a handful of extra WAL
1639 : : * records to provoke a strong reaction. We choose to reduce the sleep
1640 : : * time by 1 quantum for each page read beyond the first, which is a
1641 : : * fairly arbitrary way of trying to be reactive without overreacting.
1642 : : */
1643 [ + - ]: 7 : if (pages_read_since_last_sleep > sleep_quanta - 1)
1644 : 7 : sleep_quanta = 1;
1645 : : else
867 rhaas@postgresql.org 1646 :UBC 0 : sleep_quanta -= pages_read_since_last_sleep;
1647 : : }
1648 : :
1649 : : /* Report pending statistics to the cumulative stats system. */
426 michael@paquier.xyz 1650 :CBC 14 : pgstat_report_wal(false);
1651 : :
1652 : : /* OK, now sleep. */
867 rhaas@postgresql.org 1653 : 14 : (void) WaitLatch(MyLatch,
1654 : : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
1655 : : sleep_quanta * MS_PER_SLEEP_QUANTUM,
1656 : : WAIT_EVENT_WAL_SUMMARIZER_WAL);
1657 : 14 : ResetLatch(MyLatch);
1658 : :
1659 : : /* Reset count of pages read. */
1660 : 14 : pages_read_since_last_sleep = 0;
1661 : 14 : }
1662 : :
1663 : : /*
1664 : : * Remove WAL summaries whose mtimes are older than wal_summary_keep_time.
1665 : : */
1666 : : static void
1667 : 21 : MaybeRemoveOldWalSummaries(void)
1668 : : {
1669 : 21 : XLogRecPtr redo_pointer = GetRedoRecPtr();
1670 : : List *wslist;
1671 : : time_t cutoff_time;
1672 : :
1673 : : /* If WAL summary removal is disabled, don't do anything. */
1674 [ - + ]: 21 : if (wal_summary_keep_time == 0)
867 rhaas@postgresql.org 1675 :UBC 0 : return;
1676 : :
1677 : : /*
1678 : : * If the redo pointer has not advanced, don't do anything.
1679 : : *
1680 : : * This has the effect that we only try to remove old WAL summary files
1681 : : * once per checkpoint cycle.
1682 : : */
867 rhaas@postgresql.org 1683 [ + + ]:CBC 21 : if (redo_pointer == redo_pointer_at_last_summary_removal)
1684 : 15 : return;
1685 : 6 : redo_pointer_at_last_summary_removal = redo_pointer;
1686 : :
1687 : : /*
1688 : : * Files should only be removed if the last modification time precedes the
1689 : : * cutoff time we compute here.
1690 : : */
776 nathan@postgresql.or 1691 : 6 : cutoff_time = time(NULL) - wal_summary_keep_time * SECS_PER_MINUTE;
1692 : :
1693 : : /* Get all the summaries that currently exist. */
867 rhaas@postgresql.org 1694 : 6 : wslist = GetWalSummaries(0, InvalidXLogRecPtr, InvalidXLogRecPtr);
1695 : :
1696 : : /* Loop until all summaries have been considered for removal. */
1697 [ + + ]: 9 : while (wslist != NIL)
1698 : : {
1699 : : ListCell *lc;
1700 : : XLogSegNo oldest_segno;
1701 : 3 : XLogRecPtr oldest_lsn = InvalidXLogRecPtr;
1702 : : TimeLineID selected_tli;
1703 : :
426 heikki.linnakangas@i 1704 : 3 : ProcessWalSummarizerInterrupts();
1705 : :
1706 : : /*
1707 : : * Pick a timeline for which some summary files still exist on disk,
1708 : : * and find the oldest LSN that still exists on disk for that
1709 : : * timeline.
1710 : : */
867 rhaas@postgresql.org 1711 : 3 : selected_tli = ((WalSummaryFile *) linitial(wslist))->tli;
1712 : 3 : oldest_segno = XLogGetOldestSegno(selected_tli);
1713 [ + - ]: 3 : if (oldest_segno != 0)
1714 : 3 : XLogSegNoOffsetToRecPtr(oldest_segno, 0, wal_segment_size,
1715 : : oldest_lsn);
1716 : :
1717 : :
1718 : : /* Consider each WAL file on the selected timeline in turn. */
1719 [ + + + - : 28 : foreach(lc, wslist)
+ + ]
1720 : : {
1721 : 25 : WalSummaryFile *ws = lfirst(lc);
1722 : :
426 heikki.linnakangas@i 1723 : 25 : ProcessWalSummarizerInterrupts();
1724 : :
1725 : : /* If it's not on this timeline, it's not time to consider it. */
867 rhaas@postgresql.org 1726 [ - + ]: 25 : if (selected_tli != ws->tli)
867 rhaas@postgresql.org 1727 :UBC 0 : continue;
1728 : :
1729 : : /*
1730 : : * If the WAL doesn't exist any more, we can remove it if the file
1731 : : * modification time is old enough.
1732 : : */
180 alvherre@kurilemu.de 1733 [ + - - + ]:GNC 25 : if (!XLogRecPtrIsValid(oldest_lsn) || ws->end_lsn <= oldest_lsn)
867 rhaas@postgresql.org 1734 :UBC 0 : RemoveWalSummaryIfOlderThan(ws, cutoff_time);
1735 : :
1736 : : /*
1737 : : * Whether we removed the file or not, we need not consider it
1738 : : * again.
1739 : : */
867 rhaas@postgresql.org 1740 :CBC 25 : wslist = foreach_delete_current(wslist, lc);
1741 : 25 : pfree(ws);
1742 : : }
1743 : : }
1744 : : }
|