Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * xlogrecovery.c
4 : : * Functions for WAL recovery, standby mode
5 : : *
6 : : * This source file contains functions controlling WAL recovery.
7 : : * InitWalRecovery() initializes the system for crash or archive recovery,
8 : : * or standby mode, depending on configuration options and the state of
9 : : * the control file and possible backup label file. PerformWalRecovery()
10 : : * performs the actual WAL replay, calling the rmgr-specific redo routines.
11 : : * FinishWalRecovery() performs end-of-recovery checks and cleanup actions,
12 : : * and prepares information needed to initialize the WAL for writes. In
13 : : * addition to these three main functions, there are a bunch of functions
14 : : * for interrogating recovery state and controlling the recovery process.
15 : : *
16 : : *
17 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
18 : : * Portions Copyright (c) 1994, Regents of the University of California
19 : : *
20 : : * src/backend/access/transam/xlogrecovery.c
21 : : *
22 : : *-------------------------------------------------------------------------
23 : : */
24 : :
25 : : #include "postgres.h"
26 : :
27 : : #include <ctype.h>
28 : : #include <math.h>
29 : : #include <time.h>
30 : : #include <sys/stat.h>
31 : : #include <sys/time.h>
32 : : #include <unistd.h>
33 : :
34 : : #include "access/timeline.h"
35 : : #include "access/transam.h"
36 : : #include "access/xact.h"
37 : : #include "access/xlog_internal.h"
38 : : #include "access/xlogarchive.h"
39 : : #include "access/xlogprefetcher.h"
40 : : #include "access/xlogreader.h"
41 : : #include "access/xlogrecovery.h"
42 : : #include "access/xlogutils.h"
43 : : #include "access/xlogwait.h"
44 : : #include "backup/basebackup.h"
45 : : #include "catalog/pg_control.h"
46 : : #include "commands/tablespace.h"
47 : : #include "common/file_utils.h"
48 : : #include "miscadmin.h"
49 : : #include "nodes/miscnodes.h"
50 : : #include "pgstat.h"
51 : : #include "postmaster/bgwriter.h"
52 : : #include "postmaster/startup.h"
53 : : #include "replication/slot.h"
54 : : #include "replication/slotsync.h"
55 : : #include "replication/walreceiver.h"
56 : : #include "storage/fd.h"
57 : : #include "storage/ipc.h"
58 : : #include "storage/latch.h"
59 : : #include "storage/pmsignal.h"
60 : : #include "storage/procarray.h"
61 : : #include "storage/spin.h"
62 : : #include "utils/datetime.h"
63 : : #include "utils/fmgrprotos.h"
64 : : #include "utils/guc_hooks.h"
65 : : #include "utils/pgstat_internal.h"
66 : : #include "utils/pg_lsn.h"
67 : : #include "utils/ps_status.h"
68 : : #include "utils/pg_rusage.h"
69 : :
70 : : /* Unsupported old recovery command file names (relative to $PGDATA) */
71 : : #define RECOVERY_COMMAND_FILE "recovery.conf"
72 : : #define RECOVERY_COMMAND_DONE "recovery.done"
73 : :
74 : : /*
75 : : * GUC support
76 : : */
77 : : const struct config_enum_entry recovery_target_action_options[] = {
78 : : {"pause", RECOVERY_TARGET_ACTION_PAUSE, false},
79 : : {"promote", RECOVERY_TARGET_ACTION_PROMOTE, false},
80 : : {"shutdown", RECOVERY_TARGET_ACTION_SHUTDOWN, false},
81 : : {NULL, 0, false}
82 : : };
83 : :
84 : : /* options formerly taken from recovery.conf for archive recovery */
85 : : char *recoveryRestoreCommand = NULL;
86 : : char *recoveryEndCommand = NULL;
87 : : char *archiveCleanupCommand = NULL;
88 : : RecoveryTargetType recoveryTarget = RECOVERY_TARGET_UNSET;
89 : : bool recoveryTargetInclusive = true;
90 : : int recoveryTargetAction = RECOVERY_TARGET_ACTION_PAUSE;
91 : : TransactionId recoveryTargetXid;
92 : : char *recovery_target_time_string;
93 : : TimestampTz recoveryTargetTime;
94 : : const char *recoveryTargetName;
95 : : XLogRecPtr recoveryTargetLSN;
96 : : int recovery_min_apply_delay = 0;
97 : :
98 : : /* options formerly taken from recovery.conf for XLOG streaming */
99 : : char *PrimaryConnInfo = NULL;
100 : : char *PrimarySlotName = NULL;
101 : : bool wal_receiver_create_temp_slot = false;
102 : :
103 : : /*
104 : : * recoveryTargetTimeLineGoal: what the user requested, if any
105 : : *
106 : : * recoveryTargetTLIRequested: numeric value of requested timeline, if constant
107 : : *
108 : : * recoveryTargetTLI: the currently understood target timeline; changes
109 : : *
110 : : * expectedTLEs: a list of TimeLineHistoryEntries for recoveryTargetTLI and
111 : : * the timelines of its known parents, newest first (so recoveryTargetTLI is
112 : : * always the first list member). Only these TLIs are expected to be seen in
113 : : * the WAL segments we read, and indeed only these TLIs will be considered as
114 : : * candidate WAL files to open at all.
115 : : *
116 : : * curFileTLI: the TLI appearing in the name of the current input WAL file.
117 : : * (This is not necessarily the same as the timeline from which we are
118 : : * replaying WAL, which StartupXLOG calls replayTLI, because we could be
119 : : * scanning data that was copied from an ancestor timeline when the current
120 : : * file was created.) During a sequential scan we do not allow this value
121 : : * to decrease.
122 : : */
123 : : RecoveryTargetTimeLineGoal recoveryTargetTimeLineGoal = RECOVERY_TARGET_TIMELINE_LATEST;
124 : : TimeLineID recoveryTargetTLIRequested = 0;
125 : : TimeLineID recoveryTargetTLI = 0;
126 : : static List *expectedTLEs;
127 : : static TimeLineID curFileTLI;
128 : :
129 : : /*
130 : : * When ArchiveRecoveryRequested is set, archive recovery was requested,
131 : : * ie. signal files were present. When InArchiveRecovery is set, we are
132 : : * currently recovering using offline XLOG archives. These variables are only
133 : : * valid in the startup process.
134 : : *
135 : : * When ArchiveRecoveryRequested is true, but InArchiveRecovery is false, we're
136 : : * currently performing crash recovery using only XLOG files in pg_wal, but
137 : : * will switch to using offline XLOG archives as soon as we reach the end of
138 : : * WAL in pg_wal.
139 : : */
140 : : bool ArchiveRecoveryRequested = false;
141 : : bool InArchiveRecovery = false;
142 : :
143 : : /*
144 : : * When StandbyModeRequested is set, standby mode was requested, i.e.
145 : : * standby.signal file was present. When StandbyMode is set, we are currently
146 : : * in standby mode. These variables are only valid in the startup process.
147 : : * They work similarly to ArchiveRecoveryRequested and InArchiveRecovery.
148 : : */
149 : : static bool StandbyModeRequested = false;
150 : : bool StandbyMode = false;
151 : :
152 : : /* was a signal file present at startup? */
153 : : static bool standby_signal_file_found = false;
154 : : static bool recovery_signal_file_found = false;
155 : :
156 : : /*
157 : : * CheckPointLoc is the position of the checkpoint record that determines
158 : : * where to start the replay. It comes from the backup label file or the
159 : : * control file.
160 : : *
161 : : * RedoStartLSN is the checkpoint's REDO location, also from the backup label
162 : : * file or the control file. In standby mode, XLOG streaming usually starts
163 : : * from the position where an invalid record was found. But if we fail to
164 : : * read even the initial checkpoint record, we use the REDO location instead
165 : : * of the checkpoint location as the start position of XLOG streaming.
166 : : * Otherwise we would have to jump backwards to the REDO location after
167 : : * reading the checkpoint record, because the REDO record can precede the
168 : : * checkpoint record.
169 : : */
170 : : static XLogRecPtr CheckPointLoc = InvalidXLogRecPtr;
171 : : static TimeLineID CheckPointTLI = 0;
172 : : static XLogRecPtr RedoStartLSN = InvalidXLogRecPtr;
173 : : static TimeLineID RedoStartTLI = 0;
174 : :
175 : : /*
176 : : * Local copy of SharedHotStandbyActive variable. False actually means "not
177 : : * known, need to check the shared state".
178 : : */
179 : : static bool LocalHotStandbyActive = false;
180 : :
181 : : /*
182 : : * Local copy of SharedPromoteIsTriggered variable. False actually means "not
183 : : * known, need to check the shared state".
184 : : */
185 : : static bool LocalPromoteIsTriggered = false;
186 : :
187 : : /* Has the recovery code requested a walreceiver wakeup? */
188 : : static bool doRequestWalReceiverReply;
189 : :
190 : : /* XLogReader object used to parse the WAL records */
191 : : static XLogReaderState *xlogreader = NULL;
192 : :
193 : : /* XLogPrefetcher object used to consume WAL records with read-ahead */
194 : : static XLogPrefetcher *xlogprefetcher = NULL;
195 : :
196 : : /* Parameters passed down from ReadRecord to the XLogPageRead callback. */
197 : : typedef struct XLogPageReadPrivate
198 : : {
199 : : int emode;
200 : : bool fetching_ckpt; /* are we fetching a checkpoint record? */
201 : : bool randAccess;
202 : : TimeLineID replayTLI;
203 : : } XLogPageReadPrivate;
204 : :
205 : : /* flag to tell XLogPageRead that we have started replaying */
206 : : static bool InRedo = false;
207 : :
208 : : /*
209 : : * Codes indicating where we got a WAL file from during recovery, or where
210 : : * to attempt to get one.
211 : : */
212 : : typedef enum
213 : : {
214 : : XLOG_FROM_ANY = 0, /* request to read WAL from any source */
215 : : XLOG_FROM_ARCHIVE, /* restored using restore_command */
216 : : XLOG_FROM_PG_WAL, /* existing file in pg_wal */
217 : : XLOG_FROM_STREAM, /* streamed from primary */
218 : : } XLogSource;
219 : :
220 : : /* human-readable names for XLogSources, for debugging output */
221 : : static const char *const xlogSourceNames[] = {"any", "archive", "pg_wal", "stream"};
222 : :
223 : : /*
224 : : * readFile is -1 or a kernel FD for the log file segment that's currently
225 : : * open for reading. readSegNo identifies the segment. readOff is the offset
226 : : * of the page just read, readLen indicates how much of it has been read into
227 : : * readBuf, and readSource indicates where we got the currently open file from.
228 : : *
229 : : * Note: we could use Reserve/ReleaseExternalFD to track consumption of this
230 : : * FD too (like for openLogFile in xlog.c); but it doesn't currently seem
231 : : * worthwhile, since the XLOG is not read by general-purpose sessions.
232 : : */
233 : : static int readFile = -1;
234 : : static XLogSegNo readSegNo = 0;
235 : : static uint32 readOff = 0;
236 : : static uint32 readLen = 0;
237 : : static XLogSource readSource = XLOG_FROM_ANY;
238 : :
239 : : /*
240 : : * Keeps track of which source we're currently reading from. This is
241 : : * different from readSource in that this is always set, even when we don't
242 : : * currently have a WAL file open. If lastSourceFailed is set, our last
243 : : * attempt to read from currentSource failed, and we should try another source
244 : : * next.
245 : : *
246 : : * pendingWalRcvRestart is set when a config change occurs that requires a
247 : : * walreceiver restart. This is only valid in XLOG_FROM_STREAM state.
248 : : */
249 : : static XLogSource currentSource = XLOG_FROM_ANY;
250 : : static bool lastSourceFailed = false;
251 : : static bool pendingWalRcvRestart = false;
252 : :
253 : : /*
254 : : * These variables track when we last obtained some WAL data to process,
255 : : * and where we got it from. (XLogReceiptSource is initially the same as
256 : : * readSource, but readSource gets reset to zero when we don't have data
257 : : * to process right now. It is also different from currentSource, which
258 : : * also changes when we try to read from a source and fail, while
259 : : * XLogReceiptSource tracks where we last successfully read some WAL.)
260 : : */
261 : : static TimestampTz XLogReceiptTime = 0;
262 : : static XLogSource XLogReceiptSource = XLOG_FROM_ANY;
263 : :
264 : : /* Local copy of WalRcv->flushedUpto */
265 : : static XLogRecPtr flushedUpto = 0;
266 : : static TimeLineID receiveTLI = 0;
267 : :
268 : : /*
269 : : * Copy of minRecoveryPoint and backupEndPoint from the control file.
270 : : *
271 : : * In order to reach consistency, we must replay the WAL up to
272 : : * minRecoveryPoint. If backupEndRequired is true, we must also reach
273 : : * backupEndPoint, or if it's invalid, an end-of-backup record corresponding
274 : : * to backupStartPoint.
275 : : *
276 : : * Note: In archive recovery, after consistency has been reached, the
277 : : * functions in xlog.c will start updating minRecoveryPoint in the control
278 : : * file. But this copy of minRecoveryPoint variable reflects the value at the
279 : : * beginning of recovery, and is *not* updated after consistency is reached.
280 : : */
281 : : static XLogRecPtr minRecoveryPoint;
282 : : static TimeLineID minRecoveryPointTLI;
283 : :
284 : : static XLogRecPtr backupStartPoint;
285 : : static XLogRecPtr backupEndPoint;
286 : : static bool backupEndRequired = false;
287 : :
288 : : /*
289 : : * Have we reached a consistent database state? In crash recovery, we have
290 : : * to replay all the WAL, so reachedConsistency is never set. During archive
291 : : * recovery, the database is consistent once minRecoveryPoint is reached.
292 : : *
293 : : * Consistent state means that the system is internally consistent, all
294 : : * the WAL has been replayed up to a certain point, and importantly, there
295 : : * is no trace of later actions on disk.
296 : : *
297 : : * This flag is used only by the startup process and postmaster. When
298 : : * minRecoveryPoint is reached, the startup process sets it to true and
299 : : * sends a PMSIGNAL_RECOVERY_CONSISTENT signal to the postmaster,
300 : : * which then sets it to true upon receiving the signal.
301 : : */
302 : : bool reachedConsistency = false;
303 : :
304 : : /* Buffers dedicated to consistency checks of size BLCKSZ */
305 : : static char *replay_image_masked = NULL;
306 : : static char *primary_image_masked = NULL;
307 : :
308 : :
309 : : /*
310 : : * Shared-memory state for WAL recovery.
311 : : */
312 : : typedef struct XLogRecoveryCtlData
313 : : {
314 : : /*
315 : : * SharedHotStandbyActive indicates if we allow hot standby queries to be
316 : : * run. Protected by info_lck.
317 : : */
318 : : bool SharedHotStandbyActive;
319 : :
320 : : /*
321 : : * SharedPromoteIsTriggered indicates if a standby promotion has been
322 : : * triggered. Protected by info_lck.
323 : : */
324 : : bool SharedPromoteIsTriggered;
325 : :
326 : : /*
327 : : * recoveryWakeupLatch is used to wake up the startup process to continue
328 : : * WAL replay, if it is waiting for WAL to arrive or promotion to be
329 : : * requested.
330 : : *
331 : : * Note that the startup process also uses another latch, its procLatch,
332 : : * to wait for recovery conflict. If we get rid of recoveryWakeupLatch for
333 : : * signaling the startup process in favor of using its procLatch, which
334 : : * comports better with possible generic signal handlers using that latch.
335 : : * But we should not do that because the startup process doesn't assume
336 : : * that it's waken up by walreceiver process or SIGHUP signal handler
337 : : * while it's waiting for recovery conflict. The separate latches,
338 : : * recoveryWakeupLatch and procLatch, should be used for inter-process
339 : : * communication for WAL replay and recovery conflict, respectively.
340 : : */
341 : : Latch recoveryWakeupLatch;
342 : :
343 : : /*
344 : : * Last record successfully replayed.
345 : : */
346 : : XLogRecPtr lastReplayedReadRecPtr; /* start position */
347 : : XLogRecPtr lastReplayedEndRecPtr; /* end+1 position */
348 : : TimeLineID lastReplayedTLI; /* timeline */
349 : :
350 : : /*
351 : : * When we're currently replaying a record, ie. in a redo function,
352 : : * replayEndRecPtr points to the end+1 of the record being replayed,
353 : : * otherwise it's equal to lastReplayedEndRecPtr.
354 : : */
355 : : XLogRecPtr replayEndRecPtr;
356 : : TimeLineID replayEndTLI;
357 : : /* timestamp of last COMMIT/ABORT record replayed (or being replayed) */
358 : : TimestampTz recoveryLastXTime;
359 : :
360 : : /*
361 : : * timestamp of when we started replaying the current chunk of WAL data,
362 : : * only relevant for replication or archive recovery
363 : : */
364 : : TimestampTz currentChunkStartTime;
365 : : /* Recovery pause state */
366 : : RecoveryPauseState recoveryPauseState;
367 : : ConditionVariable recoveryNotPausedCV;
368 : :
369 : : slock_t info_lck; /* locks shared variables shown above */
370 : : } XLogRecoveryCtlData;
371 : :
372 : : static XLogRecoveryCtlData *XLogRecoveryCtl = NULL;
373 : :
374 : : /*
375 : : * abortedRecPtr is the start pointer of a broken record at end of WAL when
376 : : * recovery completes; missingContrecPtr is the location of the first
377 : : * contrecord that went missing. See CreateOverwriteContrecordRecord for
378 : : * details.
379 : : */
380 : : static XLogRecPtr abortedRecPtr;
381 : : static XLogRecPtr missingContrecPtr;
382 : :
383 : : /*
384 : : * if recoveryStopsBefore/After returns true, it saves information of the stop
385 : : * point here
386 : : */
387 : : static TransactionId recoveryStopXid;
388 : : static TimestampTz recoveryStopTime;
389 : : static XLogRecPtr recoveryStopLSN;
390 : : static char recoveryStopName[MAXFNAMELEN];
391 : : static bool recoveryStopAfter;
392 : :
393 : : /* prototypes for local functions */
394 : : static void ApplyWalRecord(XLogReaderState *xlogreader, XLogRecord *record, TimeLineID *replayTLI);
395 : :
396 : : static void EnableStandbyMode(void);
397 : : static void readRecoverySignalFile(void);
398 : : static void validateRecoveryParameters(void);
399 : : static bool read_backup_label(XLogRecPtr *checkPointLoc,
400 : : TimeLineID *backupLabelTLI,
401 : : bool *backupEndRequired, bool *backupFromStandby);
402 : : static bool read_tablespace_map(List **tablespaces);
403 : :
404 : : static void xlogrecovery_redo(XLogReaderState *record, TimeLineID replayTLI);
405 : : static void CheckRecoveryConsistency(void);
406 : : static void rm_redo_error_callback(void *arg);
407 : : #ifdef WAL_DEBUG
408 : : static void xlog_outrec(StringInfo buf, XLogReaderState *record);
409 : : #endif
410 : : static void xlog_block_info(StringInfo buf, XLogReaderState *record);
411 : : static void checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI,
412 : : TimeLineID prevTLI, TimeLineID replayTLI);
413 : : static bool getRecordTimestamp(XLogReaderState *record, TimestampTz *recordXtime);
414 : : static void verifyBackupPageConsistency(XLogReaderState *record);
415 : :
416 : : static bool recoveryStopsBefore(XLogReaderState *record);
417 : : static bool recoveryStopsAfter(XLogReaderState *record);
418 : : static char *getRecoveryStopReason(void);
419 : : static void recoveryPausesHere(bool endOfRecovery);
420 : : static bool recoveryApplyDelay(XLogReaderState *record);
421 : : static void ConfirmRecoveryPaused(void);
422 : :
423 : : static XLogRecord *ReadRecord(XLogPrefetcher *xlogprefetcher,
424 : : int emode, bool fetching_ckpt,
425 : : TimeLineID replayTLI);
426 : :
427 : : static int XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
428 : : int reqLen, XLogRecPtr targetRecPtr, char *readBuf);
429 : : static XLogPageReadResult WaitForWALToBecomeAvailable(XLogRecPtr RecPtr,
430 : : bool randAccess,
431 : : bool fetching_ckpt,
432 : : XLogRecPtr tliRecPtr,
433 : : TimeLineID replayTLI,
434 : : XLogRecPtr replayLSN,
435 : : bool nonblocking);
436 : : static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
437 : : static XLogRecord *ReadCheckpointRecord(XLogPrefetcher *xlogprefetcher,
438 : : XLogRecPtr RecPtr, TimeLineID replayTLI);
439 : : static bool rescanLatestTimeLine(TimeLineID replayTLI, XLogRecPtr replayLSN);
440 : : static int XLogFileRead(XLogSegNo segno, TimeLineID tli,
441 : : XLogSource source, bool notfoundOk);
442 : : static int XLogFileReadAnyTLI(XLogSegNo segno, XLogSource source);
443 : :
444 : : static bool CheckForStandbyTrigger(void);
445 : : static void SetPromoteIsTriggered(void);
446 : : static bool HotStandbyActiveInReplay(void);
447 : :
448 : : static void SetCurrentChunkStartTime(TimestampTz xtime);
449 : : static void SetLatestXTime(TimestampTz xtime);
450 : :
451 : : /*
452 : : * Initialization of shared memory for WAL recovery
453 : : */
454 : : Size
1401 heikki.linnakangas@i 455 :CBC 3061 : XLogRecoveryShmemSize(void)
456 : : {
457 : : Size size;
458 : :
459 : : /* XLogRecoveryCtl */
460 : 3061 : size = sizeof(XLogRecoveryCtlData);
461 : :
462 : 3061 : return size;
463 : : }
464 : :
465 : : void
466 : 1071 : XLogRecoveryShmemInit(void)
467 : : {
468 : : bool found;
469 : :
470 : 1071 : XLogRecoveryCtl = (XLogRecoveryCtlData *)
471 : 1071 : ShmemInitStruct("XLOG Recovery Ctl", XLogRecoveryShmemSize(), &found);
472 [ - + ]: 1071 : if (found)
1401 heikki.linnakangas@i 473 :UBC 0 : return;
1401 heikki.linnakangas@i 474 :CBC 1071 : memset(XLogRecoveryCtl, 0, sizeof(XLogRecoveryCtlData));
475 : :
476 : 1071 : SpinLockInit(&XLogRecoveryCtl->info_lck);
477 : 1071 : InitSharedLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
478 : 1071 : ConditionVariableInit(&XLogRecoveryCtl->recoveryNotPausedCV);
479 : : }
480 : :
481 : : /*
482 : : * A thin wrapper to enable StandbyMode and do other preparatory work as
483 : : * needed.
484 : : */
485 : : static void
1046 rhaas@postgresql.org 486 : 99 : EnableStandbyMode(void)
487 : : {
488 : 99 : StandbyMode = true;
489 : :
490 : : /*
491 : : * To avoid server log bloat, we don't report recovery progress in a
492 : : * standby as it will always be in recovery unless promoted. We disable
493 : : * startup progress timeout in standby mode to avoid calling
494 : : * startup_progress_timeout_handler() unnecessarily.
495 : : */
496 : 99 : disable_startup_progress_timeout();
497 : 99 : }
498 : :
499 : : /*
500 : : * Prepare the system for WAL recovery, if needed.
501 : : *
502 : : * This is called by StartupXLOG() which coordinates the server startup
503 : : * sequence. This function analyzes the control file and the backup label
504 : : * file, if any, and figures out whether we need to perform crash recovery or
505 : : * archive recovery, and how far we need to replay the WAL to reach a
506 : : * consistent state.
507 : : *
508 : : * This doesn't yet change the on-disk state, except for creating the symlinks
509 : : * from table space map file if any, and for fetching WAL files needed to find
510 : : * the checkpoint record. On entry, the caller has already read the control
511 : : * file into memory, and passes it as argument. This function updates it to
512 : : * reflect the recovery state, and the caller is expected to write it back to
513 : : * disk does after initializing other subsystems, but before calling
514 : : * PerformWalRecovery().
515 : : *
516 : : * This initializes some global variables like ArchiveRecoveryRequested, and
517 : : * StandbyModeRequested and InRecovery.
518 : : */
519 : : void
1401 heikki.linnakangas@i 520 : 927 : InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
521 : : bool *haveBackupLabel_ptr, bool *haveTblspcMap_ptr)
522 : : {
523 : : XLogPageReadPrivate *private;
524 : : struct stat st;
525 : : bool wasShutdown;
526 : : XLogRecord *record;
527 : : DBState dbstate_at_startup;
528 : 927 : bool haveTblspcMap = false;
529 : 927 : bool haveBackupLabel = false;
530 : : CheckPoint checkPoint;
531 : 927 : bool backupFromStandby = false;
532 : :
533 : 927 : dbstate_at_startup = ControlFile->state;
534 : :
535 : : /*
536 : : * Initialize on the assumption we want to recover to the latest timeline
537 : : * that's active according to pg_control.
538 : : */
539 : 927 : if (ControlFile->minRecoveryPointTLI >
540 [ + + ]: 927 : ControlFile->checkPointCopy.ThisTimeLineID)
541 : 1 : recoveryTargetTLI = ControlFile->minRecoveryPointTLI;
542 : : else
543 : 926 : recoveryTargetTLI = ControlFile->checkPointCopy.ThisTimeLineID;
544 : :
545 : : /*
546 : : * Check for signal files, and if so set up state for offline recovery
547 : : */
548 : 927 : readRecoverySignalFile();
549 : 927 : validateRecoveryParameters();
550 : :
551 : : /*
552 : : * Take ownership of the wakeup latch if we're going to sleep during
553 : : * recovery, if required.
554 : : */
555 [ + + ]: 927 : if (ArchiveRecoveryRequested)
556 : 104 : OwnLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
557 : :
558 : : /*
559 : : * Set the WAL reading processor now, as it will be needed when reading
560 : : * the checkpoint record required (backup_label or not).
561 : : */
8 michael@paquier.xyz 562 :GNC 927 : private = palloc0_object(XLogPageReadPrivate);
1401 heikki.linnakangas@i 563 :CBC 927 : xlogreader =
564 : 927 : XLogReaderAllocate(wal_segment_size, NULL,
565 : 927 : XL_ROUTINE(.page_read = &XLogPageRead,
566 : : .segment_open = NULL,
567 : : .segment_close = wal_segment_close),
568 : : private);
569 [ - + ]: 927 : if (!xlogreader)
1401 heikki.linnakangas@i 570 [ # # ]:UBC 0 : ereport(ERROR,
571 : : (errcode(ERRCODE_OUT_OF_MEMORY),
572 : : errmsg("out of memory"),
573 : : errdetail("Failed while allocating a WAL reading processor.")));
1401 heikki.linnakangas@i 574 :CBC 927 : xlogreader->system_identifier = ControlFile->system_identifier;
575 : :
576 : : /*
577 : : * Set the WAL decode buffer size. This limits how far ahead we can read
578 : : * in the WAL.
579 : : */
1351 tmunro@postgresql.or 580 : 927 : XLogReaderSetDecodeBuffer(xlogreader, NULL, wal_decode_buffer_size);
581 : :
582 : : /* Create a WAL prefetcher. */
583 : 927 : xlogprefetcher = XLogPrefetcherAllocate(xlogreader);
584 : :
585 : : /*
586 : : * Allocate two page buffers dedicated to WAL consistency checks. We do
587 : : * it this way, rather than just making static arrays, for two reasons:
588 : : * (1) no need to waste the storage in most instantiations of the backend;
589 : : * (2) a static char array isn't guaranteed to have any particular
590 : : * alignment, whereas palloc() will provide MAXALIGN'd storage.
591 : : */
1401 heikki.linnakangas@i 592 : 927 : replay_image_masked = (char *) palloc(BLCKSZ);
593 : 927 : primary_image_masked = (char *) palloc(BLCKSZ);
594 : :
595 : : /*
596 : : * Read the backup_label file. We want to run this part of the recovery
597 : : * process after checking for signal files and after performing validation
598 : : * of the recovery parameters.
599 : : */
600 [ + + ]: 927 : if (read_backup_label(&CheckPointLoc, &CheckPointTLI, &backupEndRequired,
601 : : &backupFromStandby))
602 : : {
603 : 71 : List *tablespaces = NIL;
604 : :
605 : : /*
606 : : * Archive recovery was requested, and thanks to the backup label
607 : : * file, we know how far we need to replay to reach consistency. Enter
608 : : * archive recovery directly.
609 : : */
610 : 71 : InArchiveRecovery = true;
611 [ + + ]: 71 : if (StandbyModeRequested)
1046 rhaas@postgresql.org 612 : 60 : EnableStandbyMode();
613 : :
614 : : /*
615 : : * Omitting backup_label when creating a new replica, PITR node etc.
616 : : * unfortunately is a common cause of corruption. Logging that
617 : : * backup_label was used makes it a bit easier to exclude that as the
618 : : * cause of observed corruption.
619 : : *
620 : : * Do so before we try to read the checkpoint record (which can fail),
621 : : * as otherwise it can be hard to understand why a checkpoint other
622 : : * than ControlFile->checkPoint is used.
623 : : */
693 michael@paquier.xyz 624 [ + - ]: 71 : ereport(LOG,
625 : : errmsg("starting backup recovery with redo LSN %X/%08X, checkpoint LSN %X/%08X, on timeline ID %u",
626 : : LSN_FORMAT_ARGS(RedoStartLSN),
627 : : LSN_FORMAT_ARGS(CheckPointLoc),
628 : : CheckPointTLI));
629 : :
630 : : /*
631 : : * When a backup_label file is present, we want to roll forward from
632 : : * the checkpoint it identifies, rather than using pg_control.
633 : : */
1247 fujii@postgresql.org 634 : 71 : record = ReadCheckpointRecord(xlogprefetcher, CheckPointLoc,
635 : : CheckPointTLI);
1401 heikki.linnakangas@i 636 [ + - ]: 71 : if (record != NULL)
637 : : {
638 : 71 : memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
639 : 71 : wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN);
640 [ + + ]: 71 : ereport(DEBUG1,
641 : : errmsg_internal("checkpoint record is at %X/%08X",
642 : : LSN_FORMAT_ARGS(CheckPointLoc)));
643 : 71 : InRecovery = true; /* force recovery even if SHUTDOWNED */
644 : :
645 : : /*
646 : : * Make sure that REDO location exists. This may not be the case
647 : : * if there was a crash during an online backup, which left a
648 : : * backup_label around that references a WAL segment that's
649 : : * already been archived.
650 : : */
651 [ + - ]: 71 : if (checkPoint.redo < CheckPointLoc)
652 : : {
1351 tmunro@postgresql.or 653 : 71 : XLogPrefetcherBeginRead(xlogprefetcher, checkPoint.redo);
654 [ - + ]: 71 : if (!ReadRecord(xlogprefetcher, LOG, false,
655 : : checkPoint.ThisTimeLineID))
1401 heikki.linnakangas@i 656 [ # # ]:UBC 0 : ereport(FATAL,
657 : : errmsg("could not find redo location %X/%08X referenced by checkpoint record at %X/%08X",
658 : : LSN_FORMAT_ARGS(checkPoint.redo), LSN_FORMAT_ARGS(CheckPointLoc)),
659 : : errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" or \"%s/standby.signal\" and add required recovery options.\n"
660 : : "If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n"
661 : : "Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.",
662 : : DataDir, DataDir, DataDir, DataDir));
663 : : }
664 : : }
665 : : else
666 : : {
667 [ # # ]: 0 : ereport(FATAL,
668 : : errmsg("could not locate required checkpoint record at %X/%08X",
669 : : LSN_FORMAT_ARGS(CheckPointLoc)),
670 : : errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" or \"%s/standby.signal\" and add required recovery options.\n"
671 : : "If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n"
672 : : "Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.",
673 : : DataDir, DataDir, DataDir, DataDir));
674 : : wasShutdown = false; /* keep compiler quiet */
675 : : }
676 : :
677 : : /* Read the tablespace_map file if present and create symlinks. */
1401 heikki.linnakangas@i 678 [ + + ]:CBC 71 : if (read_tablespace_map(&tablespaces))
679 : : {
680 : : ListCell *lc;
681 : :
682 [ + - + + : 4 : foreach(lc, tablespaces)
+ + ]
683 : : {
684 : 2 : tablespaceinfo *ti = lfirst(lc);
685 : : char *linkloc;
686 : :
471 michael@paquier.xyz 687 : 2 : linkloc = psprintf("%s/%u", PG_TBLSPC_DIR, ti->oid);
688 : :
689 : : /*
690 : : * Remove the existing symlink if any and Create the symlink
691 : : * under PGDATA.
692 : : */
1401 heikki.linnakangas@i 693 : 2 : remove_tablespace_symlink(linkloc);
694 : :
695 [ - + ]: 2 : if (symlink(ti->path, linkloc) < 0)
1401 heikki.linnakangas@i 696 [ # # ]:UBC 0 : ereport(ERROR,
697 : : (errcode_for_file_access(),
698 : : errmsg("could not create symbolic link \"%s\": %m",
699 : : linkloc)));
700 : :
1401 heikki.linnakangas@i 701 :CBC 2 : pfree(ti->path);
702 : 2 : pfree(ti);
703 : : }
704 : :
705 : : /* tell the caller to delete it later */
706 : 2 : haveTblspcMap = true;
707 : : }
708 : :
709 : : /* tell the caller to delete it later */
710 : 71 : haveBackupLabel = true;
711 : : }
712 : : else
713 : : {
714 : : /* No backup_label file has been found if we are here. */
715 : :
716 : : /*
717 : : * If tablespace_map file is present without backup_label file, there
718 : : * is no use of such file. There is no harm in retaining it, but it
719 : : * is better to get rid of the map file so that we don't have any
720 : : * redundant file in data directory and it will avoid any sort of
721 : : * confusion. It seems prudent though to just rename the file out of
722 : : * the way rather than delete it completely, also we ignore any error
723 : : * that occurs in rename operation as even if map file is present
724 : : * without backup_label file, it is harmless.
725 : : */
726 [ + + ]: 856 : if (stat(TABLESPACE_MAP, &st) == 0)
727 : : {
728 : 1 : unlink(TABLESPACE_MAP_OLD);
729 [ + - ]: 1 : if (durable_rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD, DEBUG1) == 0)
730 [ + - ]: 1 : ereport(LOG,
731 : : (errmsg("ignoring file \"%s\" because no file \"%s\" exists",
732 : : TABLESPACE_MAP, BACKUP_LABEL_FILE),
733 : : errdetail("File \"%s\" was renamed to \"%s\".",
734 : : TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
735 : : else
1401 heikki.linnakangas@i 736 [ # # ]:UBC 0 : ereport(LOG,
737 : : (errmsg("ignoring file \"%s\" because no file \"%s\" exists",
738 : : TABLESPACE_MAP, BACKUP_LABEL_FILE),
739 : : errdetail("Could not rename file \"%s\" to \"%s\": %m.",
740 : : TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
741 : : }
742 : :
743 : : /*
744 : : * It's possible that archive recovery was requested, but we don't
745 : : * know how far we need to replay the WAL before we reach consistency.
746 : : * This can happen for example if a base backup is taken from a
747 : : * running server using an atomic filesystem snapshot, without calling
748 : : * pg_backup_start/stop. Or if you just kill a running primary server
749 : : * and put it into archive recovery by creating a recovery signal
750 : : * file.
751 : : *
752 : : * Our strategy in that case is to perform crash recovery first,
753 : : * replaying all the WAL present in pg_wal, and only enter archive
754 : : * recovery after that.
755 : : *
756 : : * But usually we already know how far we need to replay the WAL (up
757 : : * to minRecoveryPoint, up to backupEndPoint, or until we see an
758 : : * end-of-backup record), and we can enter archive recovery directly.
759 : : */
1401 heikki.linnakangas@i 760 [ + + ]:CBC 856 : if (ArchiveRecoveryRequested &&
42 alvherre@kurilemu.de 761 [ + + ]:GNC 39 : (XLogRecPtrIsValid(ControlFile->minRecoveryPoint) ||
1401 heikki.linnakangas@i 762 [ + - ]:CBC 9 : ControlFile->backupEndRequired ||
42 alvherre@kurilemu.de 763 [ + - ]:GNC 9 : XLogRecPtrIsValid(ControlFile->backupEndPoint) ||
1401 heikki.linnakangas@i 764 [ + + ]:CBC 9 : ControlFile->state == DB_SHUTDOWNED))
765 : : {
766 : 37 : InArchiveRecovery = true;
767 [ + - ]: 37 : if (StandbyModeRequested)
1046 rhaas@postgresql.org 768 : 37 : EnableStandbyMode();
769 : : }
770 : :
771 : : /*
772 : : * For the same reason as when starting up with backup_label present,
773 : : * emit a log message when we continue initializing from a base
774 : : * backup.
775 : : */
42 alvherre@kurilemu.de 776 [ - + ]:GNC 856 : if (XLogRecPtrIsValid(ControlFile->backupStartPoint))
693 michael@paquier.xyz 777 [ # # ]:UBC 0 : ereport(LOG,
778 : : errmsg("restarting backup recovery with redo LSN %X/%08X",
779 : : LSN_FORMAT_ARGS(ControlFile->backupStartPoint)));
780 : :
781 : : /* Get the last valid checkpoint record. */
1401 heikki.linnakangas@i 782 :CBC 856 : CheckPointLoc = ControlFile->checkPoint;
783 : 856 : CheckPointTLI = ControlFile->checkPointCopy.ThisTimeLineID;
784 : 856 : RedoStartLSN = ControlFile->checkPointCopy.redo;
785 : 856 : RedoStartTLI = ControlFile->checkPointCopy.ThisTimeLineID;
1247 fujii@postgresql.org 786 : 856 : record = ReadCheckpointRecord(xlogprefetcher, CheckPointLoc,
787 : : CheckPointTLI);
1401 heikki.linnakangas@i 788 [ + - ]: 856 : if (record != NULL)
789 : : {
790 [ + + ]: 856 : ereport(DEBUG1,
791 : : errmsg_internal("checkpoint record is at %X/%08X",
792 : : LSN_FORMAT_ARGS(CheckPointLoc)));
793 : : }
794 : : else
795 : : {
796 : : /*
797 : : * We used to attempt to go back to a secondary checkpoint record
798 : : * here, but only when not in standby mode. We now just fail if we
799 : : * can't read the last checkpoint because this allows us to
800 : : * simplify processing around checkpoints.
801 : : */
1401 heikki.linnakangas@i 802 [ # # ]:UBC 0 : ereport(PANIC,
803 : : errmsg("could not locate a valid checkpoint record at %X/%08X",
804 : : LSN_FORMAT_ARGS(CheckPointLoc)));
805 : : }
1401 heikki.linnakangas@i 806 :CBC 856 : memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
807 : 856 : wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN);
808 : :
809 : : /* Make sure that REDO location exists. */
2 michael@paquier.xyz 810 [ + + ]: 856 : if (checkPoint.redo < CheckPointLoc)
811 : : {
812 : 35 : XLogPrefetcherBeginRead(xlogprefetcher, checkPoint.redo);
813 [ - + ]: 35 : if (!ReadRecord(xlogprefetcher, LOG, false, checkPoint.ThisTimeLineID))
2 michael@paquier.xyz 814 [ # # ]:UNC 0 : ereport(FATAL,
815 : : errmsg("could not find redo location %X/%08X referenced by checkpoint record at %X/%08X",
816 : : LSN_FORMAT_ARGS(checkPoint.redo), LSN_FORMAT_ARGS(CheckPointLoc)));
817 : : }
818 : : }
819 : :
780 michael@paquier.xyz 820 [ + + ]:CBC 927 : if (ArchiveRecoveryRequested)
821 : : {
822 [ + + ]: 104 : if (StandbyModeRequested)
823 [ + - ]: 99 : ereport(LOG,
824 : : (errmsg("entering standby mode")));
825 [ - + ]: 5 : else if (recoveryTarget == RECOVERY_TARGET_XID)
780 michael@paquier.xyz 826 [ # # ]:UBC 0 : ereport(LOG,
827 : : (errmsg("starting point-in-time recovery to XID %u",
828 : : recoveryTargetXid)));
780 michael@paquier.xyz 829 [ - + ]:CBC 5 : else if (recoveryTarget == RECOVERY_TARGET_TIME)
780 michael@paquier.xyz 830 [ # # ]:UBC 0 : ereport(LOG,
831 : : (errmsg("starting point-in-time recovery to %s",
832 : : timestamptz_to_str(recoveryTargetTime))));
780 michael@paquier.xyz 833 [ + + ]:CBC 5 : else if (recoveryTarget == RECOVERY_TARGET_NAME)
834 [ + - ]: 3 : ereport(LOG,
835 : : (errmsg("starting point-in-time recovery to \"%s\"",
836 : : recoveryTargetName)));
837 [ - + ]: 2 : else if (recoveryTarget == RECOVERY_TARGET_LSN)
780 michael@paquier.xyz 838 [ # # ]:UBC 0 : ereport(LOG,
839 : : errmsg("starting point-in-time recovery to WAL location (LSN) \"%X/%08X\"",
840 : : LSN_FORMAT_ARGS(recoveryTargetLSN)));
780 michael@paquier.xyz 841 [ - + ]:CBC 2 : else if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE)
780 michael@paquier.xyz 842 [ # # ]:UBC 0 : ereport(LOG,
843 : : (errmsg("starting point-in-time recovery to earliest consistent point")));
844 : : else
780 michael@paquier.xyz 845 [ + - ]:CBC 2 : ereport(LOG,
846 : : (errmsg("starting archive recovery")));
847 : : }
848 : :
849 : : /*
850 : : * If the location of the checkpoint record is not on the expected
851 : : * timeline in the history of the requested timeline, we cannot proceed:
852 : : * the backup is not part of the history of the requested timeline.
853 : : */
1401 heikki.linnakangas@i 854 [ - + ]: 927 : Assert(expectedTLEs); /* was initialized by reading checkpoint
855 : : * record */
856 [ - + ]: 927 : if (tliOfPointInHistory(CheckPointLoc, expectedTLEs) !=
857 : : CheckPointTLI)
858 : : {
859 : : XLogRecPtr switchpoint;
860 : :
861 : : /*
862 : : * tliSwitchPoint will throw an error if the checkpoint's timeline is
863 : : * not in expectedTLEs at all.
864 : : */
301 michael@paquier.xyz 865 :UBC 0 : switchpoint = tliSwitchPoint(CheckPointTLI, expectedTLEs, NULL);
1401 heikki.linnakangas@i 866 [ # # # # ]: 0 : ereport(FATAL,
867 : : (errmsg("requested timeline %u is not a child of this server's history",
868 : : recoveryTargetTLI),
869 : : /* translator: %s is a backup_label file or a pg_control file */
870 : : errdetail("Latest checkpoint in file \"%s\" is at %X/%08X on timeline %u, but in the history of the requested timeline, the server forked off from that timeline at %X/%08X.",
871 : : haveBackupLabel ? "backup_label" : "pg_control",
872 : : LSN_FORMAT_ARGS(CheckPointLoc),
873 : : CheckPointTLI,
874 : : LSN_FORMAT_ARGS(switchpoint))));
875 : : }
876 : :
877 : : /*
878 : : * The min recovery point should be part of the requested timeline's
879 : : * history, too.
880 : : */
42 alvherre@kurilemu.de 881 [ + + ]:GNC 927 : if (XLogRecPtrIsValid(ControlFile->minRecoveryPoint) &&
1401 heikki.linnakangas@i 882 :CBC 35 : tliOfPointInHistory(ControlFile->minRecoveryPoint - 1, expectedTLEs) !=
883 [ - + ]: 35 : ControlFile->minRecoveryPointTLI)
1401 heikki.linnakangas@i 884 [ # # ]:UBC 0 : ereport(FATAL,
885 : : errmsg("requested timeline %u does not contain minimum recovery point %X/%08X on timeline %u",
886 : : recoveryTargetTLI,
887 : : LSN_FORMAT_ARGS(ControlFile->minRecoveryPoint),
888 : : ControlFile->minRecoveryPointTLI));
889 : :
1401 heikki.linnakangas@i 890 [ + + + + ]:CBC 927 : ereport(DEBUG1,
891 : : errmsg_internal("redo record is at %X/%08X; shutdown %s",
892 : : LSN_FORMAT_ARGS(checkPoint.redo),
893 : : wasShutdown ? "true" : "false"));
894 [ + + ]: 927 : ereport(DEBUG1,
895 : : (errmsg_internal("next transaction ID: " UINT64_FORMAT "; next OID: %u",
896 : : U64FromFullTransactionId(checkPoint.nextXid),
897 : : checkPoint.nextOid)));
898 [ + + ]: 927 : ereport(DEBUG1,
899 : : (errmsg_internal("next MultiXactId: %u; next MultiXactOffset: %" PRIu64,
900 : : checkPoint.nextMulti, checkPoint.nextMultiOffset)));
901 [ + + ]: 927 : ereport(DEBUG1,
902 : : (errmsg_internal("oldest unfrozen transaction ID: %u, in database %u",
903 : : checkPoint.oldestXid, checkPoint.oldestXidDB)));
904 [ + + ]: 927 : ereport(DEBUG1,
905 : : (errmsg_internal("oldest MultiXactId: %u, in database %u",
906 : : checkPoint.oldestMulti, checkPoint.oldestMultiDB)));
907 [ + + ]: 927 : ereport(DEBUG1,
908 : : (errmsg_internal("commit timestamp Xid oldest/newest: %u/%u",
909 : : checkPoint.oldestCommitTsXid,
910 : : checkPoint.newestCommitTsXid)));
911 [ - + ]: 927 : if (!TransactionIdIsNormal(XidFromFullTransactionId(checkPoint.nextXid)))
1401 heikki.linnakangas@i 912 [ # # ]:UBC 0 : ereport(PANIC,
913 : : (errmsg("invalid next transaction ID")));
914 : :
915 : : /* sanity check */
1401 heikki.linnakangas@i 916 [ - + ]:CBC 927 : if (checkPoint.redo > CheckPointLoc)
1401 heikki.linnakangas@i 917 [ # # ]:UBC 0 : ereport(PANIC,
918 : : (errmsg("invalid redo in checkpoint record")));
919 : :
920 : : /*
921 : : * Check whether we need to force recovery from WAL. If it appears to
922 : : * have been a clean shutdown and we did not have a recovery signal file,
923 : : * then assume no recovery needed.
924 : : */
1401 heikki.linnakangas@i 925 [ + + ]:CBC 927 : if (checkPoint.redo < CheckPointLoc)
926 : : {
927 [ - + ]: 106 : if (wasShutdown)
1401 heikki.linnakangas@i 928 [ # # ]:UBC 0 : ereport(PANIC,
929 : : (errmsg("invalid redo record in shutdown checkpoint")));
1401 heikki.linnakangas@i 930 :CBC 106 : InRecovery = true;
931 : : }
932 [ + + ]: 821 : else if (ControlFile->state != DB_SHUTDOWNED)
933 : 94 : InRecovery = true;
934 [ + + ]: 727 : else if (ArchiveRecoveryRequested)
935 : : {
936 : : /* force recovery due to presence of recovery signal file */
937 : 7 : InRecovery = true;
938 : : }
939 : :
940 : : /*
941 : : * If recovery is needed, update our in-memory copy of pg_control to show
942 : : * that we are recovering and to show the selected checkpoint as the place
943 : : * we are starting from. We also mark pg_control with any minimum recovery
944 : : * stop point obtained from a backup history file.
945 : : *
946 : : * We don't write the changes to disk yet, though. Only do that after
947 : : * initializing various subsystems.
948 : : */
949 [ + + ]: 927 : if (InRecovery)
950 : : {
951 [ + + ]: 207 : if (InArchiveRecovery)
952 : : {
953 : 108 : ControlFile->state = DB_IN_ARCHIVE_RECOVERY;
954 : : }
955 : : else
956 : : {
957 [ + - ]: 99 : ereport(LOG,
958 : : (errmsg("database system was not properly shut down; "
959 : : "automatic recovery in progress")));
960 [ + + ]: 99 : if (recoveryTargetTLI > ControlFile->checkPointCopy.ThisTimeLineID)
961 [ + - ]: 1 : ereport(LOG,
962 : : (errmsg("crash recovery starts in timeline %u "
963 : : "and has target timeline %u",
964 : : ControlFile->checkPointCopy.ThisTimeLineID,
965 : : recoveryTargetTLI)));
966 : 99 : ControlFile->state = DB_IN_CRASH_RECOVERY;
967 : : }
968 : 207 : ControlFile->checkPoint = CheckPointLoc;
969 : 207 : ControlFile->checkPointCopy = checkPoint;
970 [ + + ]: 207 : if (InArchiveRecovery)
971 : : {
972 : : /* initialize minRecoveryPoint if not set yet */
973 [ + + ]: 108 : if (ControlFile->minRecoveryPoint < checkPoint.redo)
974 : : {
975 : 74 : ControlFile->minRecoveryPoint = checkPoint.redo;
976 : 74 : ControlFile->minRecoveryPointTLI = checkPoint.ThisTimeLineID;
977 : : }
978 : : }
979 : :
980 : : /*
981 : : * Set backupStartPoint if we're starting recovery from a base backup.
982 : : *
983 : : * Also set backupEndPoint and use minRecoveryPoint as the backup end
984 : : * location if we're starting recovery from a base backup which was
985 : : * taken from a standby. In this case, the database system status in
986 : : * pg_control must indicate that the database was already in recovery.
987 : : * Usually that will be DB_IN_ARCHIVE_RECOVERY but also can be
988 : : * DB_SHUTDOWNED_IN_RECOVERY if recovery previously was interrupted
989 : : * before reaching this point; e.g. because restore_command or
990 : : * primary_conninfo were faulty.
991 : : *
992 : : * Any other state indicates that the backup somehow became corrupted
993 : : * and we can't sensibly continue with recovery.
994 : : */
995 [ + + ]: 207 : if (haveBackupLabel)
996 : : {
997 : 71 : ControlFile->backupStartPoint = checkPoint.redo;
998 : 71 : ControlFile->backupEndRequired = backupEndRequired;
999 : :
1000 [ + + ]: 71 : if (backupFromStandby)
1001 : : {
1002 [ - + - - ]: 4 : if (dbstate_at_startup != DB_IN_ARCHIVE_RECOVERY &&
1003 : : dbstate_at_startup != DB_SHUTDOWNED_IN_RECOVERY)
1401 heikki.linnakangas@i 1004 [ # # ]:UBC 0 : ereport(FATAL,
1005 : : (errmsg("backup_label contains data inconsistent with control file"),
1006 : : errhint("This means that the backup is corrupted and you will "
1007 : : "have to use another backup for recovery.")));
1401 heikki.linnakangas@i 1008 :CBC 4 : ControlFile->backupEndPoint = ControlFile->minRecoveryPoint;
1009 : : }
1010 : : }
1011 : : }
1012 : :
1013 : : /* remember these, so that we know when we have reached consistency */
1014 : 927 : backupStartPoint = ControlFile->backupStartPoint;
1015 : 927 : backupEndRequired = ControlFile->backupEndRequired;
1016 : 927 : backupEndPoint = ControlFile->backupEndPoint;
1017 [ + + ]: 927 : if (InArchiveRecovery)
1018 : : {
1019 : 108 : minRecoveryPoint = ControlFile->minRecoveryPoint;
1020 : 108 : minRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
1021 : : }
1022 : : else
1023 : : {
1024 : 819 : minRecoveryPoint = InvalidXLogRecPtr;
1025 : 819 : minRecoveryPointTLI = 0;
1026 : : }
1027 : :
1028 : : /*
1029 : : * Start recovery assuming that the final record isn't lost.
1030 : : */
1031 : 927 : abortedRecPtr = InvalidXLogRecPtr;
1032 : 927 : missingContrecPtr = InvalidXLogRecPtr;
1033 : :
1034 : 927 : *wasShutdown_ptr = wasShutdown;
1035 : 927 : *haveBackupLabel_ptr = haveBackupLabel;
1036 : 927 : *haveTblspcMap_ptr = haveTblspcMap;
1037 : 927 : }
1038 : :
1039 : : /*
1040 : : * See if there are any recovery signal files and if so, set state for
1041 : : * recovery.
1042 : : *
1043 : : * See if there is a recovery command file (recovery.conf), and if so
1044 : : * throw an ERROR since as of PG12 we no longer recognize that.
1045 : : */
1046 : : static void
1047 : 927 : readRecoverySignalFile(void)
1048 : : {
1049 : : struct stat stat_buf;
1050 : :
1051 [ + + ]: 927 : if (IsBootstrapProcessingMode())
1052 : 823 : return;
1053 : :
1054 : : /*
1055 : : * Check for old recovery API file: recovery.conf
1056 : : */
1057 [ - + ]: 876 : if (stat(RECOVERY_COMMAND_FILE, &stat_buf) == 0)
1401 heikki.linnakangas@i 1058 [ # # ]:UBC 0 : ereport(FATAL,
1059 : : (errcode_for_file_access(),
1060 : : errmsg("using recovery command file \"%s\" is not supported",
1061 : : RECOVERY_COMMAND_FILE)));
1062 : :
1063 : : /*
1064 : : * Remove unused .done file, if present. Ignore if absent.
1065 : : */
1401 heikki.linnakangas@i 1066 :CBC 876 : unlink(RECOVERY_COMMAND_DONE);
1067 : :
1068 : : /*
1069 : : * Check for recovery signal files and if found, fsync them since they
1070 : : * represent server state information. We don't sweat too much about the
1071 : : * possibility of fsync failure, however.
1072 : : *
1073 : : * If present, standby signal file takes precedence. If neither is present
1074 : : * then we won't enter archive recovery.
1075 : : */
1076 [ + + ]: 876 : if (stat(STANDBY_SIGNAL_FILE, &stat_buf) == 0)
1077 : : {
1078 : : int fd;
1079 : :
1080 : 99 : fd = BasicOpenFilePerm(STANDBY_SIGNAL_FILE, O_RDWR | PG_BINARY,
1081 : : S_IRUSR | S_IWUSR);
1082 [ + - ]: 99 : if (fd >= 0)
1083 : : {
1084 : 99 : (void) pg_fsync(fd);
1085 : 99 : close(fd);
1086 : : }
1087 : 99 : standby_signal_file_found = true;
1088 : : }
1089 [ + + ]: 777 : else if (stat(RECOVERY_SIGNAL_FILE, &stat_buf) == 0)
1090 : : {
1091 : : int fd;
1092 : :
1093 : 5 : fd = BasicOpenFilePerm(RECOVERY_SIGNAL_FILE, O_RDWR | PG_BINARY,
1094 : : S_IRUSR | S_IWUSR);
1095 [ + - ]: 5 : if (fd >= 0)
1096 : : {
1097 : 5 : (void) pg_fsync(fd);
1098 : 5 : close(fd);
1099 : : }
1100 : 5 : recovery_signal_file_found = true;
1101 : : }
1102 : :
1103 : 876 : StandbyModeRequested = false;
1104 : 876 : ArchiveRecoveryRequested = false;
1105 [ + + ]: 876 : if (standby_signal_file_found)
1106 : : {
1107 : 99 : StandbyModeRequested = true;
1108 : 99 : ArchiveRecoveryRequested = true;
1109 : : }
1110 [ + + ]: 777 : else if (recovery_signal_file_found)
1111 : : {
1112 : 5 : StandbyModeRequested = false;
1113 : 5 : ArchiveRecoveryRequested = true;
1114 : : }
1115 : : else
1116 : 772 : return;
1117 : :
1118 : : /*
1119 : : * We don't support standby mode in standalone backends; that requires
1120 : : * other processes such as the WAL receiver to be alive.
1121 : : */
1122 [ + + - + ]: 104 : if (StandbyModeRequested && !IsUnderPostmaster)
1401 heikki.linnakangas@i 1123 [ # # ]:UBC 0 : ereport(FATAL,
1124 : : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1125 : : errmsg("standby mode is not supported by single-user servers")));
1126 : : }
1127 : :
1128 : : static void
1401 heikki.linnakangas@i 1129 :CBC 927 : validateRecoveryParameters(void)
1130 : : {
1131 [ + + ]: 927 : if (!ArchiveRecoveryRequested)
1132 : 823 : return;
1133 : :
1134 : : /*
1135 : : * Check for compulsory parameters
1136 : : */
1137 [ + + ]: 104 : if (StandbyModeRequested)
1138 : : {
1139 [ + - + + ]: 99 : if ((PrimaryConnInfo == NULL || strcmp(PrimaryConnInfo, "") == 0) &&
1140 [ + - + + ]: 11 : (recoveryRestoreCommand == NULL || strcmp(recoveryRestoreCommand, "") == 0))
1141 [ + - ]: 2 : ereport(WARNING,
1142 : : (errmsg("specified neither \"primary_conninfo\" nor \"restore_command\""),
1143 : : errhint("The database server will regularly poll the pg_wal subdirectory to check for files placed there.")));
1144 : : }
1145 : : else
1146 : : {
1147 [ + - ]: 5 : if (recoveryRestoreCommand == NULL ||
1148 [ - + ]: 5 : strcmp(recoveryRestoreCommand, "") == 0)
1401 heikki.linnakangas@i 1149 [ # # ]:UBC 0 : ereport(FATAL,
1150 : : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1151 : : errmsg("must specify \"restore_command\" when standby mode is not enabled")));
1152 : : }
1153 : :
1154 : : /*
1155 : : * Override any inconsistent requests. Note that this is a change of
1156 : : * behaviour in 9.5; prior to this we simply ignored a request to pause if
1157 : : * hot_standby = off, which was surprising behaviour.
1158 : : */
1401 heikki.linnakangas@i 1159 [ + + ]:CBC 104 : if (recoveryTargetAction == RECOVERY_TARGET_ACTION_PAUSE &&
1160 [ + + ]: 97 : !EnableHotStandby)
1161 : 2 : recoveryTargetAction = RECOVERY_TARGET_ACTION_SHUTDOWN;
1162 : :
1163 : : /*
1164 : : * Final parsing of recovery_target_time string; see also
1165 : : * check_recovery_target_time().
1166 : : */
1167 [ - + ]: 104 : if (recoveryTarget == RECOVERY_TARGET_TIME)
1168 : : {
1401 heikki.linnakangas@i 1169 :UBC 0 : recoveryTargetTime = DatumGetTimestampTz(DirectFunctionCall3(timestamptz_in,
1170 : : CStringGetDatum(recovery_target_time_string),
1171 : : ObjectIdGetDatum(InvalidOid),
1172 : : Int32GetDatum(-1)));
1173 : : }
1174 : :
1175 : : /*
1176 : : * If user specified recovery_target_timeline, validate it or compute the
1177 : : * "latest" value. We can't do this until after we've gotten the restore
1178 : : * command and set InArchiveRecovery, because we need to fetch timeline
1179 : : * history files from the archive.
1180 : : */
1401 heikki.linnakangas@i 1181 [ - + ]:CBC 104 : if (recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_NUMERIC)
1182 : : {
1401 heikki.linnakangas@i 1183 :UBC 0 : TimeLineID rtli = recoveryTargetTLIRequested;
1184 : :
1185 : : /* Timeline 1 does not have a history file, all else should */
1186 [ # # # # ]: 0 : if (rtli != 1 && !existsTimeLineHistory(rtli))
1187 [ # # ]: 0 : ereport(FATAL,
1188 : : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1189 : : errmsg("recovery target timeline %u does not exist",
1190 : : rtli)));
1191 : 0 : recoveryTargetTLI = rtli;
1192 : : }
1401 heikki.linnakangas@i 1193 [ + - ]:CBC 104 : else if (recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_LATEST)
1194 : : {
1195 : : /* We start the "latest" search from pg_control's timeline */
1196 : 104 : recoveryTargetTLI = findNewestTimeLine(recoveryTargetTLI);
1197 : : }
1198 : : else
1199 : : {
1200 : : /*
1201 : : * else we just use the recoveryTargetTLI as already read from
1202 : : * ControlFile
1203 : : */
1401 heikki.linnakangas@i 1204 [ # # ]:UBC 0 : Assert(recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_CONTROLFILE);
1205 : : }
1206 : : }
1207 : :
1208 : : /*
1209 : : * read_backup_label: check to see if a backup_label file is present
1210 : : *
1211 : : * If we see a backup_label during recovery, we assume that we are recovering
1212 : : * from a backup dump file, and we therefore roll forward from the checkpoint
1213 : : * identified by the label file, NOT what pg_control says. This avoids the
1214 : : * problem that pg_control might have been archived one or more checkpoints
1215 : : * later than the start of the dump, and so if we rely on it as the start
1216 : : * point, we will fail to restore a consistent database state.
1217 : : *
1218 : : * Returns true if a backup_label was found (and fills the checkpoint
1219 : : * location and TLI into *checkPointLoc and *backupLabelTLI, respectively);
1220 : : * returns false if not. If this backup_label came from a streamed backup,
1221 : : * *backupEndRequired is set to true. If this backup_label was created during
1222 : : * recovery, *backupFromStandby is set to true.
1223 : : *
1224 : : * Also sets the global variables RedoStartLSN and RedoStartTLI with the LSN
1225 : : * and TLI read from the backup file.
1226 : : */
1227 : : static bool
1401 heikki.linnakangas@i 1228 :CBC 927 : read_backup_label(XLogRecPtr *checkPointLoc, TimeLineID *backupLabelTLI,
1229 : : bool *backupEndRequired, bool *backupFromStandby)
1230 : : {
1231 : : char startxlogfilename[MAXFNAMELEN];
1232 : : TimeLineID tli_from_walseg,
1233 : : tli_from_file;
1234 : : FILE *lfp;
1235 : : char ch;
1236 : : char backuptype[20];
1237 : : char backupfrom[20];
1238 : : char backuplabel[MAXPGPATH];
1239 : : char backuptime[128];
1240 : : uint32 hi,
1241 : : lo;
1242 : :
1243 : : /* suppress possible uninitialized-variable warnings */
1244 : 927 : *checkPointLoc = InvalidXLogRecPtr;
1245 : 927 : *backupLabelTLI = 0;
1246 : 927 : *backupEndRequired = false;
1247 : 927 : *backupFromStandby = false;
1248 : :
1249 : : /*
1250 : : * See if label file is present
1251 : : */
1252 : 927 : lfp = AllocateFile(BACKUP_LABEL_FILE, "r");
1253 [ + + ]: 927 : if (!lfp)
1254 : : {
1255 [ - + ]: 856 : if (errno != ENOENT)
1401 heikki.linnakangas@i 1256 [ # # ]:UBC 0 : ereport(FATAL,
1257 : : (errcode_for_file_access(),
1258 : : errmsg("could not read file \"%s\": %m",
1259 : : BACKUP_LABEL_FILE)));
1401 heikki.linnakangas@i 1260 :CBC 856 : return false; /* it's not there, all is fine */
1261 : : }
1262 : :
1263 : : /*
1264 : : * Read and parse the START WAL LOCATION and CHECKPOINT lines (this code
1265 : : * is pretty crude, but we are not expecting any variability in the file
1266 : : * format).
1267 : : */
164 alvherre@kurilemu.de 1268 [ + - ]:GNC 71 : if (fscanf(lfp, "START WAL LOCATION: %X/%08X (file %08X%16s)%c",
1401 heikki.linnakangas@i 1269 [ - + ]:CBC 71 : &hi, &lo, &tli_from_walseg, startxlogfilename, &ch) != 5 || ch != '\n')
1401 heikki.linnakangas@i 1270 [ # # ]:UBC 0 : ereport(FATAL,
1271 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1272 : : errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
1401 heikki.linnakangas@i 1273 :CBC 71 : RedoStartLSN = ((uint64) hi) << 32 | lo;
1274 : 71 : RedoStartTLI = tli_from_walseg;
164 alvherre@kurilemu.de 1275 [ + - ]:GNC 71 : if (fscanf(lfp, "CHECKPOINT LOCATION: %X/%08X%c",
1401 heikki.linnakangas@i 1276 [ - + ]:CBC 71 : &hi, &lo, &ch) != 3 || ch != '\n')
1401 heikki.linnakangas@i 1277 [ # # ]:UBC 0 : ereport(FATAL,
1278 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1279 : : errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
1401 heikki.linnakangas@i 1280 :CBC 71 : *checkPointLoc = ((uint64) hi) << 32 | lo;
1281 : 71 : *backupLabelTLI = tli_from_walseg;
1282 : :
1283 : : /*
1284 : : * BACKUP METHOD lets us know if this was a typical backup ("streamed",
1285 : : * which could mean either pg_basebackup or the pg_backup_start/stop
1286 : : * method was used) or if this label came from somewhere else (the only
1287 : : * other option today being from pg_rewind). If this was a streamed
1288 : : * backup then we know that we need to play through until we get to the
1289 : : * end of the WAL which was generated during the backup (at which point we
1290 : : * will have reached consistency and backupEndRequired will be reset to be
1291 : : * false).
1292 : : */
1293 [ + - ]: 71 : if (fscanf(lfp, "BACKUP METHOD: %19s\n", backuptype) == 1)
1294 : : {
1295 [ + + ]: 71 : if (strcmp(backuptype, "streamed") == 0)
1296 : 70 : *backupEndRequired = true;
1297 : : }
1298 : :
1299 : : /*
1300 : : * BACKUP FROM lets us know if this was from a primary or a standby. If
1301 : : * it was from a standby, we'll double-check that the control file state
1302 : : * matches that of a standby.
1303 : : */
1304 [ + - ]: 71 : if (fscanf(lfp, "BACKUP FROM: %19s\n", backupfrom) == 1)
1305 : : {
1306 [ + + ]: 71 : if (strcmp(backupfrom, "standby") == 0)
1307 : 4 : *backupFromStandby = true;
1308 : : }
1309 : :
1310 : : /*
1311 : : * Parse START TIME and LABEL. Those are not mandatory fields for recovery
1312 : : * but checking for their presence is useful for debugging and the next
1313 : : * sanity checks. Cope also with the fact that the result buffers have a
1314 : : * pre-allocated size, hence if the backup_label file has been generated
1315 : : * with strings longer than the maximum assumed here an incorrect parsing
1316 : : * happens. That's fine as only minor consistency checks are done
1317 : : * afterwards.
1318 : : */
1319 [ + - ]: 71 : if (fscanf(lfp, "START TIME: %127[^\n]\n", backuptime) == 1)
1320 [ + + ]: 71 : ereport(DEBUG1,
1321 : : (errmsg_internal("backup time %s in file \"%s\"",
1322 : : backuptime, BACKUP_LABEL_FILE)));
1323 : :
1324 [ + + ]: 71 : if (fscanf(lfp, "LABEL: %1023[^\n]\n", backuplabel) == 1)
1325 [ + + ]: 70 : ereport(DEBUG1,
1326 : : (errmsg_internal("backup label %s in file \"%s\"",
1327 : : backuplabel, BACKUP_LABEL_FILE)));
1328 : :
1329 : : /*
1330 : : * START TIMELINE is new as of 11. Its parsing is not mandatory, still use
1331 : : * it as a sanity check if present.
1332 : : */
1333 [ + + ]: 71 : if (fscanf(lfp, "START TIMELINE: %u\n", &tli_from_file) == 1)
1334 : : {
1335 [ - + ]: 70 : if (tli_from_walseg != tli_from_file)
1401 heikki.linnakangas@i 1336 [ # # ]:UBC 0 : ereport(FATAL,
1337 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1338 : : errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE),
1339 : : errdetail("Timeline ID parsed is %u, but expected %u.",
1340 : : tli_from_file, tli_from_walseg)));
1341 : :
1401 heikki.linnakangas@i 1342 [ + + ]:CBC 70 : ereport(DEBUG1,
1343 : : (errmsg_internal("backup timeline %u in file \"%s\"",
1344 : : tli_from_file, BACKUP_LABEL_FILE)));
1345 : : }
1346 : :
164 alvherre@kurilemu.de 1347 [ - + ]:GNC 71 : if (fscanf(lfp, "INCREMENTAL FROM LSN: %X/%08X\n", &hi, &lo) > 0)
729 rhaas@postgresql.org 1348 [ # # ]:UBC 0 : ereport(FATAL,
1349 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1350 : : errmsg("this is an incremental backup, not a data directory"),
1351 : : errhint("Use pg_combinebackup to reconstruct a valid data directory.")));
1352 : :
1401 heikki.linnakangas@i 1353 [ + - - + ]:CBC 71 : if (ferror(lfp) || FreeFile(lfp))
1401 heikki.linnakangas@i 1354 [ # # ]:UBC 0 : ereport(FATAL,
1355 : : (errcode_for_file_access(),
1356 : : errmsg("could not read file \"%s\": %m",
1357 : : BACKUP_LABEL_FILE)));
1358 : :
1401 heikki.linnakangas@i 1359 :CBC 71 : return true;
1360 : : }
1361 : :
1362 : : /*
1363 : : * read_tablespace_map: check to see if a tablespace_map file is present
1364 : : *
1365 : : * If we see a tablespace_map file during recovery, we assume that we are
1366 : : * recovering from a backup dump file, and we therefore need to create symlinks
1367 : : * as per the information present in tablespace_map file.
1368 : : *
1369 : : * Returns true if a tablespace_map file was found (and fills *tablespaces
1370 : : * with a tablespaceinfo struct for each tablespace listed in the file);
1371 : : * returns false if not.
1372 : : */
1373 : : static bool
1374 : 71 : read_tablespace_map(List **tablespaces)
1375 : : {
1376 : : tablespaceinfo *ti;
1377 : : FILE *lfp;
1378 : : char str[MAXPGPATH];
1379 : : int ch,
1380 : : i,
1381 : : n;
1382 : : bool was_backslash;
1383 : :
1384 : : /*
1385 : : * See if tablespace_map file is present
1386 : : */
1387 : 71 : lfp = AllocateFile(TABLESPACE_MAP, "r");
1388 [ + + ]: 71 : if (!lfp)
1389 : : {
1390 [ - + ]: 69 : if (errno != ENOENT)
1401 heikki.linnakangas@i 1391 [ # # ]:UBC 0 : ereport(FATAL,
1392 : : (errcode_for_file_access(),
1393 : : errmsg("could not read file \"%s\": %m",
1394 : : TABLESPACE_MAP)));
1401 heikki.linnakangas@i 1395 :CBC 69 : return false; /* it's not there, all is fine */
1396 : : }
1397 : :
1398 : : /*
1399 : : * Read and parse the link name and path lines from tablespace_map file
1400 : : * (this code is pretty crude, but we are not expecting any variability in
1401 : : * the file format). De-escape any backslashes that were inserted.
1402 : : */
1403 : 2 : i = 0;
1404 : 2 : was_backslash = false;
1405 [ + + ]: 77 : while ((ch = fgetc(lfp)) != EOF)
1406 : : {
1407 [ + - + + : 75 : if (!was_backslash && (ch == '\n' || ch == '\r'))
- + ]
1408 : 2 : {
1409 : : char *endp;
1410 : :
1411 [ - + ]: 2 : if (i == 0)
1401 heikki.linnakangas@i 1412 :UBC 0 : continue; /* \r immediately followed by \n */
1413 : :
1414 : : /*
1415 : : * The de-escaped line should contain an OID followed by exactly
1416 : : * one space followed by a path. The path might start with
1417 : : * spaces, so don't be too liberal about parsing.
1418 : : */
1401 heikki.linnakangas@i 1419 :CBC 2 : str[i] = '\0';
1420 : 2 : n = 0;
1421 [ + - + + ]: 12 : while (str[n] && str[n] != ' ')
1422 : 10 : n++;
1423 [ + - - + ]: 2 : if (n < 1 || n >= i - 1)
1401 heikki.linnakangas@i 1424 [ # # ]:UBC 0 : ereport(FATAL,
1425 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1426 : : errmsg("invalid data in file \"%s\"", TABLESPACE_MAP)));
1401 heikki.linnakangas@i 1427 :CBC 2 : str[n++] = '\0';
1428 : :
8 michael@paquier.xyz 1429 :GNC 2 : ti = palloc0_object(tablespaceinfo);
787 rhaas@postgresql.org 1430 :CBC 2 : errno = 0;
1431 : 2 : ti->oid = strtoul(str, &endp, 10);
1432 [ + - + - : 2 : if (*endp != '\0' || errno == EINVAL || errno == ERANGE)
- + ]
787 rhaas@postgresql.org 1433 [ # # ]:UBC 0 : ereport(FATAL,
1434 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1435 : : errmsg("invalid data in file \"%s\"", TABLESPACE_MAP)));
1401 heikki.linnakangas@i 1436 :CBC 2 : ti->path = pstrdup(str + n);
1437 : 2 : *tablespaces = lappend(*tablespaces, ti);
1438 : :
1439 : 2 : i = 0;
1440 : 2 : continue;
1441 : : }
1442 [ + - - + ]: 73 : else if (!was_backslash && ch == '\\')
1401 heikki.linnakangas@i 1443 :UBC 0 : was_backslash = true;
1444 : : else
1445 : : {
1401 heikki.linnakangas@i 1446 [ + - ]:CBC 73 : if (i < sizeof(str) - 1)
1447 : 73 : str[i++] = ch;
1448 : 73 : was_backslash = false;
1449 : : }
1450 : : }
1451 : :
1452 [ + - - + ]: 2 : if (i != 0 || was_backslash) /* last line not terminated? */
1401 heikki.linnakangas@i 1453 [ # # ]:UBC 0 : ereport(FATAL,
1454 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1455 : : errmsg("invalid data in file \"%s\"", TABLESPACE_MAP)));
1456 : :
1401 heikki.linnakangas@i 1457 [ + - - + ]:CBC 2 : if (ferror(lfp) || FreeFile(lfp))
1401 heikki.linnakangas@i 1458 [ # # ]:UBC 0 : ereport(FATAL,
1459 : : (errcode_for_file_access(),
1460 : : errmsg("could not read file \"%s\": %m",
1461 : : TABLESPACE_MAP)));
1462 : :
1401 heikki.linnakangas@i 1463 :CBC 2 : return true;
1464 : : }
1465 : :
1466 : : /*
1467 : : * Finish WAL recovery.
1468 : : *
1469 : : * This does not close the 'xlogreader' yet, because in some cases the caller
1470 : : * still wants to re-read the last checkpoint record by calling
1471 : : * ReadCheckpointRecord().
1472 : : *
1473 : : * Returns the position of the last valid or applied record, after which new
1474 : : * WAL should be appended, information about why recovery was ended, and some
1475 : : * other things. See the EndOfWalRecoveryInfo struct for details.
1476 : : */
1477 : : EndOfWalRecoveryInfo *
1478 : 872 : FinishWalRecovery(void)
1479 : : {
8 michael@paquier.xyz 1480 :GNC 872 : EndOfWalRecoveryInfo *result = palloc_object(EndOfWalRecoveryInfo);
1481 : : XLogRecPtr lastRec;
1482 : : TimeLineID lastRecTLI;
1483 : : XLogRecPtr endOfLog;
1484 : :
1485 : : /*
1486 : : * Kill WAL receiver, if it's still running, before we continue to write
1487 : : * the startup checkpoint and aborted-contrecord records. It will trump
1488 : : * over these records and subsequent ones if it's still alive when we
1489 : : * start writing WAL.
1490 : : */
1401 heikki.linnakangas@i 1491 :CBC 872 : XLogShutdownWalRcv();
1492 : :
1493 : : /*
1494 : : * Shutdown the slot sync worker to drop any temporary slots acquired by
1495 : : * it and to prevent it from keep trying to fetch the failover slots.
1496 : : *
1497 : : * We do not update the 'synced' column in 'pg_replication_slots' system
1498 : : * view from true to false here, as any failed update could leave 'synced'
1499 : : * column false for some slots. This could cause issues during slot sync
1500 : : * after restarting the server as a standby. While updating the 'synced'
1501 : : * column after switching to the new timeline is an option, it does not
1502 : : * simplify the handling for the 'synced' column. Therefore, we retain the
1503 : : * 'synced' column as true after promotion as it may provide useful
1504 : : * information about the slot origin.
1505 : : */
665 akapila@postgresql.o 1506 : 872 : ShutDownSlotSync();
1507 : :
1508 : : /*
1509 : : * We are now done reading the xlog from stream. Turn off streaming
1510 : : * recovery to force fetching the files (which would be required at end of
1511 : : * recovery, e.g., timeline history file) from archive or pg_wal.
1512 : : *
1513 : : * Note that standby mode must be turned off after killing WAL receiver,
1514 : : * i.e., calling XLogShutdownWalRcv().
1515 : : */
1401 heikki.linnakangas@i 1516 [ - + ]: 872 : Assert(!WalRcvStreaming());
1517 : 872 : StandbyMode = false;
1518 : :
1519 : : /*
1520 : : * Determine where to start writing WAL next.
1521 : : *
1522 : : * Re-fetch the last valid or last applied record, so we can identify the
1523 : : * exact endpoint of what we consider the valid portion of WAL. There may
1524 : : * be an incomplete continuation record after that, in which case
1525 : : * 'abortedRecPtr' and 'missingContrecPtr' are set and the caller will
1526 : : * write a special OVERWRITE_CONTRECORD message to mark that the rest of
1527 : : * it is intentionally missing. See CreateOverwriteContrecordRecord().
1528 : : *
1529 : : * An important side-effect of this is to load the last page into
1530 : : * xlogreader. The caller uses it to initialize the WAL for writing.
1531 : : */
1532 [ + + ]: 872 : if (!InRecovery)
1533 : : {
1534 : 720 : lastRec = CheckPointLoc;
1535 : 720 : lastRecTLI = CheckPointTLI;
1536 : : }
1537 : : else
1538 : : {
1539 : 152 : lastRec = XLogRecoveryCtl->lastReplayedReadRecPtr;
1540 : 152 : lastRecTLI = XLogRecoveryCtl->lastReplayedTLI;
1541 : : }
1351 tmunro@postgresql.or 1542 : 872 : XLogPrefetcherBeginRead(xlogprefetcher, lastRec);
1543 : 872 : (void) ReadRecord(xlogprefetcher, PANIC, false, lastRecTLI);
1401 heikki.linnakangas@i 1544 : 872 : endOfLog = xlogreader->EndRecPtr;
1545 : :
1546 : : /*
1547 : : * Remember the TLI in the filename of the XLOG segment containing the
1548 : : * end-of-log. It could be different from the timeline that endOfLog
1549 : : * nominally belongs to, if there was a timeline switch in that segment,
1550 : : * and we were reading the old WAL from a segment belonging to a higher
1551 : : * timeline.
1552 : : */
1553 : 872 : result->endOfLogTLI = xlogreader->seg.ws_tli;
1554 : :
1555 [ + + ]: 872 : if (ArchiveRecoveryRequested)
1556 : : {
1557 : : /*
1558 : : * We are no longer in archive recovery state.
1559 : : *
1560 : : * We are now done reading the old WAL. Turn off archive fetching if
1561 : : * it was active.
1562 : : */
1563 [ - + ]: 49 : Assert(InArchiveRecovery);
1564 : 49 : InArchiveRecovery = false;
1565 : :
1566 : : /*
1567 : : * If the ending log segment is still open, close it (to avoid
1568 : : * problems on Windows with trying to rename or delete an open file).
1569 : : */
1570 [ + - ]: 49 : if (readFile >= 0)
1571 : : {
1572 : 49 : close(readFile);
1573 : 49 : readFile = -1;
1574 : : }
1575 : : }
1576 : :
1577 : : /*
1578 : : * Copy the last partial block to the caller, for initializing the WAL
1579 : : * buffer for appending new WAL.
1580 : : */
1581 [ + + ]: 872 : if (endOfLog % XLOG_BLCKSZ != 0)
1582 : : {
1583 : : char *page;
1584 : : int len;
1585 : : XLogRecPtr pageBeginPtr;
1586 : :
1587 : 855 : pageBeginPtr = endOfLog - (endOfLog % XLOG_BLCKSZ);
1588 [ - + ]: 855 : Assert(readOff == XLogSegmentOffset(pageBeginPtr, wal_segment_size));
1589 : :
1590 : : /* Copy the valid part of the last block */
1591 : 855 : len = endOfLog % XLOG_BLCKSZ;
1592 : 855 : page = palloc(len);
1593 : 855 : memcpy(page, xlogreader->readBuf, len);
1594 : :
1595 : 855 : result->lastPageBeginPtr = pageBeginPtr;
1596 : 855 : result->lastPage = page;
1597 : : }
1598 : : else
1599 : : {
1600 : : /* There is no partial block to copy. */
1601 : 17 : result->lastPageBeginPtr = endOfLog;
1602 : 17 : result->lastPage = NULL;
1603 : : }
1604 : :
1605 : : /*
1606 : : * Create a comment for the history file to explain why and where timeline
1607 : : * changed.
1608 : : */
1609 : 872 : result->recoveryStopReason = getRecoveryStopReason();
1610 : :
1611 : 872 : result->lastRec = lastRec;
1612 : 872 : result->lastRecTLI = lastRecTLI;
1613 : 872 : result->endOfLog = endOfLog;
1614 : :
1615 : 872 : result->abortedRecPtr = abortedRecPtr;
1616 : 872 : result->missingContrecPtr = missingContrecPtr;
1617 : :
1618 : 872 : result->standby_signal_file_found = standby_signal_file_found;
1619 : 872 : result->recovery_signal_file_found = recovery_signal_file_found;
1620 : :
1621 : 872 : return result;
1622 : : }
1623 : :
1624 : : /*
1625 : : * Clean up the WAL reader and leftovers from restoring WAL from archive
1626 : : */
1627 : : void
1628 : 872 : ShutdownWalRecovery(void)
1629 : : {
1630 : : char recoveryPath[MAXPGPATH];
1631 : :
1632 : : /* Final update of pg_stat_recovery_prefetch. */
1351 tmunro@postgresql.or 1633 : 872 : XLogPrefetcherComputeStats(xlogprefetcher);
1634 : :
1635 : : /* Shut down xlogreader */
1401 heikki.linnakangas@i 1636 [ + + ]: 872 : if (readFile >= 0)
1637 : : {
1638 : 823 : close(readFile);
1639 : 823 : readFile = -1;
1640 : : }
138 tgl@sss.pgh.pa.us 1641 :GNC 872 : pfree(xlogreader->private_data);
1401 heikki.linnakangas@i 1642 :CBC 872 : XLogReaderFree(xlogreader);
1351 tmunro@postgresql.or 1643 : 872 : XLogPrefetcherFree(xlogprefetcher);
1644 : :
1401 heikki.linnakangas@i 1645 [ + + ]: 872 : if (ArchiveRecoveryRequested)
1646 : : {
1647 : : /*
1648 : : * Since there might be a partial WAL segment named RECOVERYXLOG, get
1649 : : * rid of it.
1650 : : */
1651 : 49 : snprintf(recoveryPath, MAXPGPATH, XLOGDIR "/RECOVERYXLOG");
1652 : 49 : unlink(recoveryPath); /* ignore any error */
1653 : :
1654 : : /* Get rid of any remaining recovered timeline-history file, too */
1655 : 49 : snprintf(recoveryPath, MAXPGPATH, XLOGDIR "/RECOVERYHISTORY");
1656 : 49 : unlink(recoveryPath); /* ignore any error */
1657 : : }
1658 : :
1659 : : /*
1660 : : * We don't need the latch anymore. It's not strictly necessary to disown
1661 : : * it, but let's do it for the sake of tidiness.
1662 : : */
1663 [ + + ]: 872 : if (ArchiveRecoveryRequested)
1664 : 49 : DisownLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
1665 : 872 : }
1666 : :
1667 : : /*
1668 : : * Perform WAL recovery.
1669 : : *
1670 : : * If the system was shut down cleanly, this is never called.
1671 : : */
1672 : : void
1673 : 207 : PerformWalRecovery(void)
1674 : : {
1675 : : XLogRecord *record;
1676 : 207 : bool reachedRecoveryTarget = false;
1677 : : TimeLineID replayTLI;
1678 : :
1679 : : /*
1680 : : * Initialize shared variables for tracking progress of WAL replay, as if
1681 : : * we had just replayed the record before the REDO location (or the
1682 : : * checkpoint record itself, if it's a shutdown checkpoint).
1683 : : */
1684 [ - + ]: 207 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
1685 [ + + ]: 207 : if (RedoStartLSN < CheckPointLoc)
1686 : : {
1687 : 106 : XLogRecoveryCtl->lastReplayedReadRecPtr = InvalidXLogRecPtr;
1688 : 106 : XLogRecoveryCtl->lastReplayedEndRecPtr = RedoStartLSN;
1689 : 106 : XLogRecoveryCtl->lastReplayedTLI = RedoStartTLI;
1690 : : }
1691 : : else
1692 : : {
1693 : 101 : XLogRecoveryCtl->lastReplayedReadRecPtr = xlogreader->ReadRecPtr;
1694 : 101 : XLogRecoveryCtl->lastReplayedEndRecPtr = xlogreader->EndRecPtr;
1695 : 101 : XLogRecoveryCtl->lastReplayedTLI = CheckPointTLI;
1696 : : }
1697 : 207 : XLogRecoveryCtl->replayEndRecPtr = XLogRecoveryCtl->lastReplayedEndRecPtr;
1698 : 207 : XLogRecoveryCtl->replayEndTLI = XLogRecoveryCtl->lastReplayedTLI;
1699 : 207 : XLogRecoveryCtl->recoveryLastXTime = 0;
1700 : 207 : XLogRecoveryCtl->currentChunkStartTime = 0;
1701 : 207 : XLogRecoveryCtl->recoveryPauseState = RECOVERY_NOT_PAUSED;
1702 : 207 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
1703 : :
1704 : : /* Also ensure XLogReceiptTime has a sane value */
1705 : 207 : XLogReceiptTime = GetCurrentTimestamp();
1706 : :
1707 : : /*
1708 : : * Let postmaster know we've started redo now, so that it can launch the
1709 : : * archiver if necessary.
1710 : : */
1711 [ + + ]: 207 : if (IsUnderPostmaster)
1712 : 198 : SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED);
1713 : :
1714 : : /*
1715 : : * Allow read-only connections immediately if we're consistent already.
1716 : : */
1717 : 207 : CheckRecoveryConsistency();
1718 : :
1719 : : /*
1720 : : * Find the first record that logically follows the checkpoint --- it
1721 : : * might physically precede it, though.
1722 : : */
1723 [ + + ]: 207 : if (RedoStartLSN < CheckPointLoc)
1724 : : {
1725 : : /* back up to find the record */
1726 : 106 : replayTLI = RedoStartTLI;
1351 tmunro@postgresql.or 1727 : 106 : XLogPrefetcherBeginRead(xlogprefetcher, RedoStartLSN);
1728 : 106 : record = ReadRecord(xlogprefetcher, PANIC, false, replayTLI);
1729 : :
1730 : : /*
1731 : : * If a checkpoint record's redo pointer points back to an earlier
1732 : : * LSN, the record at that LSN should be an XLOG_CHECKPOINT_REDO
1733 : : * record.
1734 : : */
791 rhaas@postgresql.org 1735 [ + - ]: 106 : if (record->xl_rmid != RM_XLOG_ID ||
1736 [ - + ]: 106 : (record->xl_info & ~XLR_INFO_MASK) != XLOG_CHECKPOINT_REDO)
791 rhaas@postgresql.org 1737 [ # # ]:UBC 0 : ereport(FATAL,
1738 : : errmsg("unexpected record type found at redo point %X/%08X",
1739 : : LSN_FORMAT_ARGS(xlogreader->ReadRecPtr)));
1740 : : }
1741 : : else
1742 : : {
1743 : : /* just have to read next record after CheckPoint */
1401 heikki.linnakangas@i 1744 [ - + ]:CBC 101 : Assert(xlogreader->ReadRecPtr == CheckPointLoc);
1745 : 101 : replayTLI = CheckPointTLI;
1351 tmunro@postgresql.or 1746 : 101 : record = ReadRecord(xlogprefetcher, LOG, false, replayTLI);
1747 : : }
1748 : :
1401 heikki.linnakangas@i 1749 [ + + ]: 207 : if (record != NULL)
1750 : : {
1751 : : TimestampTz xtime;
1752 : : PGRUsage ru0;
1753 : :
1754 : 198 : pg_rusage_init(&ru0);
1755 : :
1756 : 198 : InRedo = true;
1757 : :
1352 jdavis@postgresql.or 1758 : 198 : RmgrStartup();
1759 : :
1401 heikki.linnakangas@i 1760 [ + - ]: 198 : ereport(LOG,
1761 : : errmsg("redo starts at %X/%08X",
1762 : : LSN_FORMAT_ARGS(xlogreader->ReadRecPtr)));
1763 : :
1764 : : /* Prepare to report progress of the redo phase. */
1765 [ + + ]: 198 : if (!StandbyMode)
1766 : 104 : begin_startup_progress_phase();
1767 : :
1768 : : /*
1769 : : * main redo apply loop
1770 : : */
1771 : : do
1772 : : {
1773 [ + + ]: 2782737 : if (!StandbyMode)
164 alvherre@kurilemu.de 1774 [ - + - - ]:GNC 262707 : ereport_startup_progress("redo in progress, elapsed time: %ld.%02d s, current LSN: %X/%08X",
1775 : : LSN_FORMAT_ARGS(xlogreader->ReadRecPtr));
1776 : :
1777 : : #ifdef WAL_DEBUG
1778 : : if (XLOG_DEBUG)
1779 : : {
1780 : : StringInfoData buf;
1781 : :
1782 : : initStringInfo(&buf);
1783 : : appendStringInfo(&buf, "REDO @ %X/%08X; LSN %X/%08X: ",
1784 : : LSN_FORMAT_ARGS(xlogreader->ReadRecPtr),
1785 : : LSN_FORMAT_ARGS(xlogreader->EndRecPtr));
1786 : : xlog_outrec(&buf, xlogreader);
1787 : : appendStringInfoString(&buf, " - ");
1788 : : xlog_outdesc(&buf, xlogreader);
1789 : : elog(LOG, "%s", buf.data);
1790 : : pfree(buf.data);
1791 : : }
1792 : : #endif
1793 : :
1794 : : /* Handle interrupt signals of startup process */
288 heikki.linnakangas@i 1795 :CBC 2782737 : ProcessStartupProcInterrupts();
1796 : :
1797 : : /*
1798 : : * Pause WAL replay, if requested by a hot-standby session via
1799 : : * SetRecoveryPause().
1800 : : *
1801 : : * Note that we intentionally don't take the info_lck spinlock
1802 : : * here. We might therefore read a slightly stale value of the
1803 : : * recoveryPause flag, but it can't be very stale (no worse than
1804 : : * the last spinlock we did acquire). Since a pause request is a
1805 : : * pretty asynchronous thing anyway, possibly responding to it one
1806 : : * WAL record later than we otherwise would is a minor issue, so
1807 : : * it doesn't seem worth adding another spinlock cycle to prevent
1808 : : * that.
1809 : : */
1401 1810 [ - + ]: 2782737 : if (((volatile XLogRecoveryCtlData *) XLogRecoveryCtl)->recoveryPauseState !=
1811 : : RECOVERY_NOT_PAUSED)
1401 heikki.linnakangas@i 1812 :UBC 0 : recoveryPausesHere(false);
1813 : :
1814 : : /*
1815 : : * Have we reached our recovery target?
1816 : : */
1401 heikki.linnakangas@i 1817 [ + + ]:CBC 2782737 : if (recoveryStopsBefore(xlogreader))
1818 : : {
1819 : 2 : reachedRecoveryTarget = true;
1820 : 2 : break;
1821 : : }
1822 : :
1823 : : /*
1824 : : * If we've been asked to lag the primary, wait on latch until
1825 : : * enough time has passed.
1826 : : */
1827 [ + + ]: 2782735 : if (recoveryApplyDelay(xlogreader))
1828 : : {
1829 : : /*
1830 : : * We test for paused recovery again here. If user sets
1831 : : * delayed apply, it may be because they expect to pause
1832 : : * recovery in case of problems, so we must test again here
1833 : : * otherwise pausing during the delay-wait wouldn't work.
1834 : : */
1401 heikki.linnakangas@i 1835 [ - + ]:GBC 9 : if (((volatile XLogRecoveryCtlData *) XLogRecoveryCtl)->recoveryPauseState !=
1836 : : RECOVERY_NOT_PAUSED)
1401 heikki.linnakangas@i 1837 :UBC 0 : recoveryPausesHere(false);
1838 : : }
1839 : :
1840 : : /*
1841 : : * Apply the record
1842 : : */
1401 heikki.linnakangas@i 1843 :CBC 2782735 : ApplyWalRecord(xlogreader, record, &replayTLI);
1844 : :
1845 : : /* Exit loop if we reached inclusive recovery target */
1846 [ + + ]: 2782733 : if (recoveryStopsAfter(xlogreader))
1847 : : {
1848 : 5 : reachedRecoveryTarget = true;
1849 : 5 : break;
1850 : : }
1851 : :
1852 : : /*
1853 : : * If we replayed an LSN that someone was waiting for then walk
1854 : : * over the shared memory array and set latches to notify the
1855 : : * waiters.
1856 : : */
43 akorotkov@postgresql 1857 [ + - + + ]:GNC 5565456 : if (waitLSNState &&
1858 : 2782728 : (XLogRecoveryCtl->lastReplayedEndRecPtr >=
1859 : 2782728 : pg_atomic_read_u64(&waitLSNState->minWaitedLSN[WAIT_LSN_TYPE_REPLAY])))
1860 : 7 : WaitLSNWakeup(WAIT_LSN_TYPE_REPLAY, XLogRecoveryCtl->lastReplayedEndRecPtr);
1861 : :
1862 : : /* Else, try to fetch the next WAL record */
1351 tmunro@postgresql.or 1863 :CBC 2782728 : record = ReadRecord(xlogprefetcher, LOG, false, replayTLI);
1401 heikki.linnakangas@i 1864 [ + + ]: 2782676 : } while (record != NULL);
1865 : :
1866 : : /*
1867 : : * end of main redo apply loop
1868 : : */
1869 : :
1870 [ + + ]: 144 : if (reachedRecoveryTarget)
1871 : : {
1872 [ - + ]: 7 : if (!reachedConsistency)
1401 heikki.linnakangas@i 1873 [ # # ]:UBC 0 : ereport(FATAL,
1874 : : (errmsg("requested recovery stop point is before consistent recovery point")));
1875 : :
1876 : : /*
1877 : : * This is the last point where we can restart recovery with a new
1878 : : * recovery target, if we shutdown and begin again. After this,
1879 : : * Resource Managers may choose to do permanent corrective actions
1880 : : * at end of recovery.
1881 : : */
1401 heikki.linnakangas@i 1882 [ - + + - ]:CBC 7 : switch (recoveryTargetAction)
1883 : : {
1401 heikki.linnakangas@i 1884 :UBC 0 : case RECOVERY_TARGET_ACTION_SHUTDOWN:
1885 : :
1886 : : /*
1887 : : * exit with special return code to request shutdown of
1888 : : * postmaster. Log messages issued from postmaster.
1889 : : */
1890 : 0 : proc_exit(3);
1891 : :
1401 heikki.linnakangas@i 1892 :CBC 1 : case RECOVERY_TARGET_ACTION_PAUSE:
1893 : 1 : SetRecoveryPause(true);
1894 : 1 : recoveryPausesHere(true);
1895 : :
1896 : : /* drop into promote */
1897 : :
1898 : 7 : case RECOVERY_TARGET_ACTION_PROMOTE:
1899 : 7 : break;
1900 : : }
1901 : : }
1902 : :
1352 jdavis@postgresql.or 1903 : 144 : RmgrCleanup();
1904 : :
1401 heikki.linnakangas@i 1905 [ + - ]: 144 : ereport(LOG,
1906 : : errmsg("redo done at %X/%08X system usage: %s",
1907 : : LSN_FORMAT_ARGS(xlogreader->ReadRecPtr),
1908 : : pg_rusage_show(&ru0)));
1909 : 144 : xtime = GetLatestXTime();
1910 [ + + ]: 144 : if (xtime)
1911 [ + - ]: 35 : ereport(LOG,
1912 : : (errmsg("last completed transaction was at log time %s",
1913 : : timestamptz_to_str(xtime))));
1914 : :
1915 : 144 : InRedo = false;
1916 : : }
1917 : : else
1918 : : {
1919 : : /* there are no WAL records following the checkpoint */
1920 [ + - ]: 9 : ereport(LOG,
1921 : : (errmsg("redo is not required")));
1922 : : }
1923 : :
1924 : : /*
1925 : : * This check is intentionally after the above log messages that indicate
1926 : : * how far recovery went.
1927 : : */
1928 [ + + ]: 153 : if (ArchiveRecoveryRequested &&
1929 [ + + ]: 50 : recoveryTarget != RECOVERY_TARGET_UNSET &&
1930 [ + + ]: 8 : !reachedRecoveryTarget)
1931 [ + - ]: 1 : ereport(FATAL,
1932 : : (errcode(ERRCODE_CONFIG_FILE_ERROR),
1933 : : errmsg("recovery ended before configured recovery target was reached")));
1934 : 152 : }
1935 : :
1936 : : /*
1937 : : * Subroutine of PerformWalRecovery, to apply one WAL record.
1938 : : */
1939 : : static void
1940 : 2782735 : ApplyWalRecord(XLogReaderState *xlogreader, XLogRecord *record, TimeLineID *replayTLI)
1941 : : {
1942 : : ErrorContextCallback errcallback;
1943 : 2782735 : bool switchedTLI = false;
1944 : :
1945 : : /* Setup error traceback support for ereport() */
1946 : 2782735 : errcallback.callback = rm_redo_error_callback;
385 peter@eisentraut.org 1947 : 2782735 : errcallback.arg = xlogreader;
1401 heikki.linnakangas@i 1948 : 2782735 : errcallback.previous = error_context_stack;
1949 : 2782735 : error_context_stack = &errcallback;
1950 : :
1951 : : /*
1952 : : * TransamVariables->nextXid must be beyond record's xid.
1953 : : */
1954 : 2782735 : AdvanceNextFullTransactionIdPastXid(record->xl_xid);
1955 : :
1956 : : /*
1957 : : * Before replaying this record, check if this record causes the current
1958 : : * timeline to change. The record is already considered to be part of the
1959 : : * new timeline, so we update replayTLI before replaying it. That's
1960 : : * important so that replayEndTLI, which is recorded as the minimum
1961 : : * recovery point's TLI if recovery stops after this record, is set
1962 : : * correctly.
1963 : : */
1964 [ + + ]: 2782735 : if (record->xl_rmid == RM_XLOG_ID)
1965 : : {
1966 : 41488 : TimeLineID newReplayTLI = *replayTLI;
1967 : 41488 : TimeLineID prevReplayTLI = *replayTLI;
1968 : 41488 : uint8 info = record->xl_info & ~XLR_INFO_MASK;
1969 : :
1970 [ + + ]: 41488 : if (info == XLOG_CHECKPOINT_SHUTDOWN)
1971 : : {
1972 : : CheckPoint checkPoint;
1973 : :
1974 : 31 : memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
1975 : 31 : newReplayTLI = checkPoint.ThisTimeLineID;
1976 : 31 : prevReplayTLI = checkPoint.PrevTimeLineID;
1977 : : }
1978 [ + + ]: 41457 : else if (info == XLOG_END_OF_RECOVERY)
1979 : : {
1980 : : xl_end_of_recovery xlrec;
1981 : :
1982 : 9 : memcpy(&xlrec, XLogRecGetData(xlogreader), sizeof(xl_end_of_recovery));
1983 : 9 : newReplayTLI = xlrec.ThisTimeLineID;
1984 : 9 : prevReplayTLI = xlrec.PrevTimeLineID;
1985 : : }
1986 : :
1987 [ + + ]: 41488 : if (newReplayTLI != *replayTLI)
1988 : : {
1989 : : /* Check that it's OK to switch to this TLI */
1990 : 10 : checkTimeLineSwitch(xlogreader->EndRecPtr,
1991 : : newReplayTLI, prevReplayTLI, *replayTLI);
1992 : :
1993 : : /* Following WAL records should be run with new TLI */
1994 : 10 : *replayTLI = newReplayTLI;
1995 : 10 : switchedTLI = true;
1996 : : }
1997 : : }
1998 : :
1999 : : /*
2000 : : * Update shared replayEndRecPtr before replaying this record, so that
2001 : : * XLogFlush will update minRecoveryPoint correctly.
2002 : : */
2003 [ + + ]: 2782735 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
2004 : 2782735 : XLogRecoveryCtl->replayEndRecPtr = xlogreader->EndRecPtr;
2005 : 2782735 : XLogRecoveryCtl->replayEndTLI = *replayTLI;
2006 : 2782735 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
2007 : :
2008 : : /*
2009 : : * If we are attempting to enter Hot Standby mode, process XIDs we see
2010 : : */
2011 [ + + ]: 2782735 : if (standbyState >= STANDBY_INITIALIZED &&
2012 [ + + ]: 2539445 : TransactionIdIsValid(record->xl_xid))
2013 : 2486547 : RecordKnownAssignedTransactionIds(record->xl_xid);
2014 : :
2015 : : /*
2016 : : * Some XLOG record types that are related to recovery are processed
2017 : : * directly here, rather than in xlog_redo()
2018 : : */
2019 [ + + ]: 2782735 : if (record->xl_rmid == RM_XLOG_ID)
2020 : 41488 : xlogrecovery_redo(xlogreader, *replayTLI);
2021 : :
2022 : : /* Now apply the WAL record itself */
1352 jdavis@postgresql.or 2023 : 2782735 : GetRmgr(record->xl_rmid).rm_redo(xlogreader);
2024 : :
2025 : : /*
2026 : : * After redo, check whether the backup pages associated with the WAL
2027 : : * record are consistent with the existing pages. This check is done only
2028 : : * if consistency check is enabled for this record.
2029 : : */
1401 heikki.linnakangas@i 2030 [ + + ]: 2782733 : if ((record->xl_info & XLR_CHECK_CONSISTENCY) != 0)
2031 : 2206260 : verifyBackupPageConsistency(xlogreader);
2032 : :
2033 : : /* Pop the error context stack */
2034 : 2782733 : error_context_stack = errcallback.previous;
2035 : :
2036 : : /*
2037 : : * Update lastReplayedEndRecPtr after this record has been successfully
2038 : : * replayed.
2039 : : */
2040 [ + + ]: 2782733 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
2041 : 2782733 : XLogRecoveryCtl->lastReplayedReadRecPtr = xlogreader->ReadRecPtr;
2042 : 2782733 : XLogRecoveryCtl->lastReplayedEndRecPtr = xlogreader->EndRecPtr;
2043 : 2782733 : XLogRecoveryCtl->lastReplayedTLI = *replayTLI;
2044 : 2782733 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
2045 : :
2046 : : /* ------
2047 : : * Wakeup walsenders:
2048 : : *
2049 : : * On the standby, the WAL is flushed first (which will only wake up
2050 : : * physical walsenders) and then applied, which will only wake up logical
2051 : : * walsenders.
2052 : : *
2053 : : * Indeed, logical walsenders on standby can't decode and send data until
2054 : : * it's been applied.
2055 : : *
2056 : : * Physical walsenders don't need to be woken up during replay unless
2057 : : * cascading replication is allowed and time line change occurred (so that
2058 : : * they can notice that they are on a new time line).
2059 : : *
2060 : : * That's why the wake up conditions are for:
2061 : : *
2062 : : * - physical walsenders in case of new time line and cascade
2063 : : * replication is allowed
2064 : : * - logical walsenders in case cascade replication is allowed (could not
2065 : : * be created otherwise)
2066 : : * ------
2067 : : */
985 andres@anarazel.de 2068 [ + + + + ]: 2782733 : if (AllowCascadeReplication())
2069 : 2593543 : WalSndWakeup(switchedTLI, true);
2070 : :
2071 : : /*
2072 : : * If rm_redo called XLogRequestWalReceiverReply, then we wake up the
2073 : : * receiver so that it notices the updated lastReplayedEndRecPtr and sends
2074 : : * a reply to the primary.
2075 : : */
1401 heikki.linnakangas@i 2076 [ + + ]: 2782733 : if (doRequestWalReceiverReply)
2077 : : {
2078 : 2 : doRequestWalReceiverReply = false;
2079 : 2 : WalRcvForceReply();
2080 : : }
2081 : :
2082 : : /* Allow read-only connections if we're consistent now */
2083 : 2782733 : CheckRecoveryConsistency();
2084 : :
2085 : : /* Is this a timeline switch? */
2086 [ + + ]: 2782733 : if (switchedTLI)
2087 : : {
2088 : : /*
2089 : : * Before we continue on the new timeline, clean up any (possibly
2090 : : * bogus) future WAL segments on the old timeline.
2091 : : */
2092 : 10 : RemoveNonParentXlogFiles(xlogreader->EndRecPtr, *replayTLI);
2093 : :
2094 : : /* Reset the prefetcher. */
1351 tmunro@postgresql.or 2095 : 10 : XLogPrefetchReconfigure();
2096 : : }
1401 heikki.linnakangas@i 2097 : 2782733 : }
2098 : :
2099 : : /*
2100 : : * Some XLOG RM record types that are directly related to WAL recovery are
2101 : : * handled here rather than in the xlog_redo()
2102 : : */
2103 : : static void
2104 : 41488 : xlogrecovery_redo(XLogReaderState *record, TimeLineID replayTLI)
2105 : : {
2106 : 41488 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
2107 : 41488 : XLogRecPtr lsn = record->EndRecPtr;
2108 : :
2109 [ - + ]: 41488 : Assert(XLogRecGetRmid(record) == RM_XLOG_ID);
2110 : :
2111 [ + + ]: 41488 : if (info == XLOG_OVERWRITE_CONTRECORD)
2112 : : {
2113 : : /* Verify the payload of a XLOG_OVERWRITE_CONTRECORD record. */
2114 : : xl_overwrite_contrecord xlrec;
2115 : :
2116 : 1 : memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_overwrite_contrecord));
2117 [ - + ]: 1 : if (xlrec.overwritten_lsn != record->overwrittenRecPtr)
164 alvherre@kurilemu.de 2118 [ # # ]:UNC 0 : elog(FATAL, "mismatching overwritten LSN %X/%08X -> %X/%08X",
2119 : : LSN_FORMAT_ARGS(xlrec.overwritten_lsn),
2120 : : LSN_FORMAT_ARGS(record->overwrittenRecPtr));
2121 : :
2122 : : /* We have safely skipped the aborted record */
1366 alvherre@alvh.no-ip. 2123 :CBC 1 : abortedRecPtr = InvalidXLogRecPtr;
2124 : 1 : missingContrecPtr = InvalidXLogRecPtr;
2125 : :
1401 heikki.linnakangas@i 2126 [ + - ]: 1 : ereport(LOG,
2127 : : errmsg("successfully skipped missing contrecord at %X/%08X, overwritten at %s",
2128 : : LSN_FORMAT_ARGS(xlrec.overwritten_lsn),
2129 : : timestamptz_to_str(xlrec.overwrite_time)));
2130 : :
2131 : : /* Verifying the record should only happen once */
2132 : 1 : record->overwrittenRecPtr = InvalidXLogRecPtr;
2133 : : }
2134 [ + + ]: 41487 : else if (info == XLOG_BACKUP_END)
2135 : : {
2136 : : XLogRecPtr startpoint;
2137 : :
2138 : 83 : memcpy(&startpoint, XLogRecGetData(record), sizeof(startpoint));
2139 : :
2140 [ + + ]: 83 : if (backupStartPoint == startpoint)
2141 : : {
2142 : : /*
2143 : : * We have reached the end of base backup, the point where
2144 : : * pg_backup_stop() was done. The data on disk is now consistent
2145 : : * (assuming we have also reached minRecoveryPoint). Set
2146 : : * backupEndPoint to the current LSN, so that the next call to
2147 : : * CheckRecoveryConsistency() will notice it and do the
2148 : : * end-of-backup processing.
2149 : : */
2150 [ + + ]: 69 : elog(DEBUG1, "end of backup record reached");
2151 : :
2152 : 69 : backupEndPoint = lsn;
2153 : : }
2154 : : else
164 alvherre@kurilemu.de 2155 [ - + ]:GNC 14 : elog(DEBUG1, "saw end-of-backup record for backup starting at %X/%08X, waiting for %X/%08X",
2156 : : LSN_FORMAT_ARGS(startpoint), LSN_FORMAT_ARGS(backupStartPoint));
2157 : : }
1401 heikki.linnakangas@i 2158 :CBC 41488 : }
2159 : :
2160 : : /*
2161 : : * Verify that, in non-test mode, ./pg_tblspc doesn't contain any real
2162 : : * directories.
2163 : : *
2164 : : * Replay of database creation XLOG records for databases that were later
2165 : : * dropped can create fake directories in pg_tblspc. By the time consistency
2166 : : * is reached these directories should have been removed; here we verify
2167 : : * that this did indeed happen. This is to be called at the point where
2168 : : * consistent state is reached.
2169 : : *
2170 : : * allow_in_place_tablespaces turns the PANIC into a WARNING, which is
2171 : : * useful for testing purposes, and also allows for an escape hatch in case
2172 : : * things go south.
2173 : : */
2174 : : static void
1239 alvherre@alvh.no-ip. 2175 : 110 : CheckTablespaceDirectory(void)
2176 : : {
2177 : : DIR *dir;
2178 : : struct dirent *de;
2179 : :
471 michael@paquier.xyz 2180 : 110 : dir = AllocateDir(PG_TBLSPC_DIR);
2181 [ + + ]: 337 : while ((de = ReadDir(dir, PG_TBLSPC_DIR)) != NULL)
2182 : : {
2183 : : char path[MAXPGPATH + sizeof(PG_TBLSPC_DIR)];
2184 : :
2185 : : /* Skip entries of non-oid names */
1239 alvherre@alvh.no-ip. 2186 [ + + ]: 227 : if (strspn(de->d_name, "0123456789") != strlen(de->d_name))
2187 : 220 : continue;
2188 : :
471 michael@paquier.xyz 2189 : 7 : snprintf(path, sizeof(path), "%s/%s", PG_TBLSPC_DIR, de->d_name);
2190 : :
1239 alvherre@alvh.no-ip. 2191 [ + + ]: 7 : if (get_dirent_type(path, de, false, ERROR) != PGFILETYPE_LNK)
2192 [ + - + - ]: 4 : ereport(allow_in_place_tablespaces ? WARNING : PANIC,
2193 : : (errcode(ERRCODE_DATA_CORRUPTED),
2194 : : errmsg("unexpected directory entry \"%s\" found in %s",
2195 : : de->d_name, PG_TBLSPC_DIR),
2196 : : errdetail("All directory entries in %s/ should be symbolic links.",
2197 : : PG_TBLSPC_DIR),
2198 : : errhint("Remove those directories, or set \"allow_in_place_tablespaces\" to ON transiently to let recovery complete.")));
2199 : : }
2200 : 110 : }
2201 : :
2202 : : /*
2203 : : * Checks if recovery has reached a consistent state. When consistency is
2204 : : * reached and we have a valid starting standby snapshot, tell postmaster
2205 : : * that it can start accepting read-only connections.
2206 : : */
2207 : : static void
1401 heikki.linnakangas@i 2208 : 2782942 : CheckRecoveryConsistency(void)
2209 : : {
2210 : : XLogRecPtr lastReplayedEndRecPtr;
2211 : : TimeLineID lastReplayedTLI;
2212 : :
2213 : : /*
2214 : : * During crash recovery, we don't reach a consistent state until we've
2215 : : * replayed all the WAL.
2216 : : */
42 alvherre@kurilemu.de 2217 [ + + ]:GNC 2782942 : if (!XLogRecPtrIsValid(minRecoveryPoint))
1401 heikki.linnakangas@i 2218 :CBC 257593 : return;
2219 : :
2220 [ - + ]: 2525349 : Assert(InArchiveRecovery);
2221 : :
2222 : : /*
2223 : : * assume that we are called in the startup process, and hence don't need
2224 : : * a lock to read lastReplayedEndRecPtr
2225 : : */
2226 : 2525349 : lastReplayedEndRecPtr = XLogRecoveryCtl->lastReplayedEndRecPtr;
2227 : 2525349 : lastReplayedTLI = XLogRecoveryCtl->lastReplayedTLI;
2228 : :
2229 : : /*
2230 : : * Have we reached the point where our base backup was completed?
2231 : : */
42 alvherre@kurilemu.de 2232 [ + + ]:GNC 2525349 : if (XLogRecPtrIsValid(backupEndPoint) &&
1401 heikki.linnakangas@i 2233 [ + + ]:CBC 101 : backupEndPoint <= lastReplayedEndRecPtr)
2234 : : {
693 michael@paquier.xyz 2235 : 71 : XLogRecPtr saveBackupStartPoint = backupStartPoint;
2236 : 71 : XLogRecPtr saveBackupEndPoint = backupEndPoint;
2237 : :
1401 heikki.linnakangas@i 2238 [ + + ]: 71 : elog(DEBUG1, "end of backup reached");
2239 : :
2240 : : /*
2241 : : * We have reached the end of base backup, as indicated by pg_control.
2242 : : * Update the control file accordingly.
2243 : : */
2244 : 71 : ReachedEndOfBackup(lastReplayedEndRecPtr, lastReplayedTLI);
2245 : 71 : backupStartPoint = InvalidXLogRecPtr;
2246 : 71 : backupEndPoint = InvalidXLogRecPtr;
2247 : 71 : backupEndRequired = false;
2248 : :
693 michael@paquier.xyz 2249 [ + - ]: 71 : ereport(LOG,
2250 : : errmsg("completed backup recovery with redo LSN %X/%08X and end LSN %X/%08X",
2251 : : LSN_FORMAT_ARGS(saveBackupStartPoint),
2252 : : LSN_FORMAT_ARGS(saveBackupEndPoint)));
2253 : : }
2254 : :
2255 : : /*
2256 : : * Have we passed our safe starting point? Note that minRecoveryPoint is
2257 : : * known to be incorrectly set if recovering from a backup, until the
2258 : : * XLOG_BACKUP_END arrives to advise us of the correct minRecoveryPoint.
2259 : : * All we know prior to that is that we're not consistent yet.
2260 : : */
1401 heikki.linnakangas@i 2261 [ + + + + ]: 2525349 : if (!reachedConsistency && !backupEndRequired &&
2262 [ + + ]: 4960 : minRecoveryPoint <= lastReplayedEndRecPtr)
2263 : : {
2264 : : /*
2265 : : * Check to see if the XLOG sequence contained any unresolved
2266 : : * references to uninitialized pages.
2267 : : */
2268 : 110 : XLogCheckInvalidPages();
2269 : :
2270 : : /*
2271 : : * Check that pg_tblspc doesn't contain any real directories. Replay
2272 : : * of Database/CREATE_* records may have created fictitious tablespace
2273 : : * directories that should have been removed by the time consistency
2274 : : * was reached.
2275 : : */
1239 alvherre@alvh.no-ip. 2276 : 110 : CheckTablespaceDirectory();
2277 : :
1401 heikki.linnakangas@i 2278 : 110 : reachedConsistency = true;
260 fujii@postgresql.org 2279 : 110 : SendPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT);
1401 heikki.linnakangas@i 2280 [ + - ]: 110 : ereport(LOG,
2281 : : errmsg("consistent recovery state reached at %X/%08X",
2282 : : LSN_FORMAT_ARGS(lastReplayedEndRecPtr)));
2283 : : }
2284 : :
2285 : : /*
2286 : : * Have we got a valid starting snapshot that will allow queries to be
2287 : : * run? If so, we can tell postmaster that the database is consistent now,
2288 : : * enabling connections.
2289 : : */
2290 [ + + ]: 2525349 : if (standbyState == STANDBY_SNAPSHOT_READY &&
2291 [ + + + + ]: 2525134 : !LocalHotStandbyActive &&
2292 [ + - ]: 102 : reachedConsistency &&
2293 : : IsUnderPostmaster)
2294 : : {
2295 [ - + ]: 102 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
2296 : 102 : XLogRecoveryCtl->SharedHotStandbyActive = true;
2297 : 102 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
2298 : :
2299 : 102 : LocalHotStandbyActive = true;
2300 : :
2301 : 102 : SendPostmasterSignal(PMSIGNAL_BEGIN_HOT_STANDBY);
2302 : : }
2303 : : }
2304 : :
2305 : : /*
2306 : : * Error context callback for errors occurring during rm_redo().
2307 : : */
2308 : : static void
2309 : 83 : rm_redo_error_callback(void *arg)
2310 : : {
2311 : 83 : XLogReaderState *record = (XLogReaderState *) arg;
2312 : : StringInfoData buf;
2313 : :
2314 : 83 : initStringInfo(&buf);
2315 : 83 : xlog_outdesc(&buf, record);
2316 : 83 : xlog_block_info(&buf, record);
2317 : :
2318 : : /* translator: %s is a WAL record description */
164 alvherre@kurilemu.de 2319 :GNC 83 : errcontext("WAL redo at %X/%08X for %s",
1401 heikki.linnakangas@i 2320 :CBC 83 : LSN_FORMAT_ARGS(record->ReadRecPtr),
2321 : : buf.data);
2322 : :
2323 : 83 : pfree(buf.data);
2324 : 83 : }
2325 : :
2326 : : /*
2327 : : * Returns a string describing an XLogRecord, consisting of its identity
2328 : : * optionally followed by a colon, a space, and a further description.
2329 : : */
2330 : : void
2331 : 83 : xlog_outdesc(StringInfo buf, XLogReaderState *record)
2332 : : {
1352 jdavis@postgresql.or 2333 : 83 : RmgrData rmgr = GetRmgr(XLogRecGetRmid(record));
1401 heikki.linnakangas@i 2334 : 83 : uint8 info = XLogRecGetInfo(record);
2335 : : const char *id;
2336 : :
1352 jdavis@postgresql.or 2337 : 83 : appendStringInfoString(buf, rmgr.rm_name);
1401 heikki.linnakangas@i 2338 : 83 : appendStringInfoChar(buf, '/');
2339 : :
1352 jdavis@postgresql.or 2340 : 83 : id = rmgr.rm_identify(info);
1401 heikki.linnakangas@i 2341 [ - + ]: 83 : if (id == NULL)
1401 heikki.linnakangas@i 2342 :UBC 0 : appendStringInfo(buf, "UNKNOWN (%X): ", info & ~XLR_INFO_MASK);
2343 : : else
1401 heikki.linnakangas@i 2344 :CBC 83 : appendStringInfo(buf, "%s: ", id);
2345 : :
1352 jdavis@postgresql.or 2346 : 83 : rmgr.rm_desc(buf, record);
1401 heikki.linnakangas@i 2347 : 83 : }
2348 : :
2349 : : #ifdef WAL_DEBUG
2350 : :
2351 : : static void
2352 : : xlog_outrec(StringInfo buf, XLogReaderState *record)
2353 : : {
2354 : : appendStringInfo(buf, "prev %X/%08X; xid %u",
2355 : : LSN_FORMAT_ARGS(XLogRecGetPrev(record)),
2356 : : XLogRecGetXid(record));
2357 : :
2358 : : appendStringInfo(buf, "; len %u",
2359 : : XLogRecGetDataLen(record));
2360 : :
2361 : : xlog_block_info(buf, record);
2362 : : }
2363 : : #endif /* WAL_DEBUG */
2364 : :
2365 : : /*
2366 : : * Returns a string giving information about all the blocks in an
2367 : : * XLogRecord.
2368 : : */
2369 : : static void
2370 : 83 : xlog_block_info(StringInfo buf, XLogReaderState *record)
2371 : : {
2372 : : int block_id;
2373 : :
2374 : : /* decode block references */
1371 tmunro@postgresql.or 2375 [ + + ]: 113 : for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
2376 : : {
2377 : : RelFileLocator rlocator;
2378 : : ForkNumber forknum;
2379 : : BlockNumber blk;
2380 : :
1347 tgl@sss.pgh.pa.us 2381 [ - + ]: 30 : if (!XLogRecGetBlockTagExtended(record, block_id,
2382 : : &rlocator, &forknum, &blk, NULL))
1401 heikki.linnakangas@i 2383 :LBC (1) : continue;
2384 : :
1401 heikki.linnakangas@i 2385 [ + + ]:CBC 30 : if (forknum != MAIN_FORKNUM)
1177 rhaas@postgresql.org 2386 : 5 : appendStringInfo(buf, "; blkref #%d: rel %u/%u/%u, fork %u, blk %u",
2387 : : block_id,
2388 : : rlocator.spcOid, rlocator.dbOid,
2389 : : rlocator.relNumber,
2390 : : forknum,
2391 : : blk);
2392 : : else
2393 : 25 : appendStringInfo(buf, "; blkref #%d: rel %u/%u/%u, blk %u",
2394 : : block_id,
2395 : : rlocator.spcOid, rlocator.dbOid,
2396 : : rlocator.relNumber,
2397 : : blk);
1401 heikki.linnakangas@i 2398 [ + + ]: 30 : if (XLogRecHasBlockImage(record, block_id))
2399 : 24 : appendStringInfoString(buf, " FPW");
2400 : : }
2401 : 83 : }
2402 : :
2403 : :
2404 : : /*
2405 : : * Check that it's OK to switch to new timeline during recovery.
2406 : : *
2407 : : * 'lsn' is the address of the shutdown checkpoint record we're about to
2408 : : * replay. (Currently, timeline can only change at a shutdown checkpoint).
2409 : : */
2410 : : static void
2411 : 10 : checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI, TimeLineID prevTLI,
2412 : : TimeLineID replayTLI)
2413 : : {
2414 : : /* Check that the record agrees on what the current (old) timeline is */
2415 [ - + ]: 10 : if (prevTLI != replayTLI)
1401 heikki.linnakangas@i 2416 [ # # ]:UBC 0 : ereport(PANIC,
2417 : : (errmsg("unexpected previous timeline ID %u (current timeline ID %u) in checkpoint record",
2418 : : prevTLI, replayTLI)));
2419 : :
2420 : : /*
2421 : : * The new timeline better be in the list of timelines we expect to see,
2422 : : * according to the timeline history. It should also not decrease.
2423 : : */
1401 heikki.linnakangas@i 2424 [ + - - + ]:CBC 10 : if (newTLI < replayTLI || !tliInHistory(newTLI, expectedTLEs))
1401 heikki.linnakangas@i 2425 [ # # ]:UBC 0 : ereport(PANIC,
2426 : : (errmsg("unexpected timeline ID %u (after %u) in checkpoint record",
2427 : : newTLI, replayTLI)));
2428 : :
2429 : : /*
2430 : : * If we have not yet reached min recovery point, and we're about to
2431 : : * switch to a timeline greater than the timeline of the min recovery
2432 : : * point: trouble. After switching to the new timeline, we could not
2433 : : * possibly visit the min recovery point on the correct timeline anymore.
2434 : : * This can happen if there is a newer timeline in the archive that
2435 : : * branched before the timeline the min recovery point is on, and you
2436 : : * attempt to do PITR to the new timeline.
2437 : : */
42 alvherre@kurilemu.de 2438 [ + + ]:GNC 10 : if (XLogRecPtrIsValid(minRecoveryPoint) &&
1401 heikki.linnakangas@i 2439 [ + + ]:CBC 9 : lsn < minRecoveryPoint &&
2440 [ - + ]: 1 : newTLI > minRecoveryPointTLI)
1401 heikki.linnakangas@i 2441 [ # # ]:UBC 0 : ereport(PANIC,
2442 : : errmsg("unexpected timeline ID %u in checkpoint record, before reaching minimum recovery point %X/%08X on timeline %u",
2443 : : newTLI,
2444 : : LSN_FORMAT_ARGS(minRecoveryPoint),
2445 : : minRecoveryPointTLI));
2446 : :
2447 : : /* Looks good */
1401 heikki.linnakangas@i 2448 :CBC 10 : }
2449 : :
2450 : :
2451 : : /*
2452 : : * Extract timestamp from WAL record.
2453 : : *
2454 : : * If the record contains a timestamp, returns true, and saves the timestamp
2455 : : * in *recordXtime. If the record type has no timestamp, returns false.
2456 : : * Currently, only transaction commit/abort records and restore points contain
2457 : : * timestamps.
2458 : : */
2459 : : static bool
2460 : 44185 : getRecordTimestamp(XLogReaderState *record, TimestampTz *recordXtime)
2461 : : {
2462 : 44185 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
2463 : 44185 : uint8 xact_info = info & XLOG_XACT_OPMASK;
2464 : 44185 : uint8 rmid = XLogRecGetRmid(record);
2465 : :
2466 [ + + + - ]: 44185 : if (rmid == RM_XLOG_ID && info == XLOG_RESTORE_POINT)
2467 : : {
2468 : 2 : *recordXtime = ((xl_restore_point *) XLogRecGetData(record))->rp_time;
2469 : 2 : return true;
2470 : : }
2471 [ + - + + : 44183 : if (rmid == RM_XACT_ID && (xact_info == XLOG_XACT_COMMIT ||
+ + ]
2472 : : xact_info == XLOG_XACT_COMMIT_PREPARED))
2473 : : {
2474 : 40463 : *recordXtime = ((xl_xact_commit *) XLogRecGetData(record))->xact_time;
2475 : 40463 : return true;
2476 : : }
2477 [ + - + + : 3720 : if (rmid == RM_XACT_ID && (xact_info == XLOG_XACT_ABORT ||
+ - ]
2478 : : xact_info == XLOG_XACT_ABORT_PREPARED))
2479 : : {
2480 : 3720 : *recordXtime = ((xl_xact_abort *) XLogRecGetData(record))->xact_time;
2481 : 3720 : return true;
2482 : : }
1401 heikki.linnakangas@i 2483 :UBC 0 : return false;
2484 : : }
2485 : :
2486 : : /*
2487 : : * Checks whether the current buffer page and backup page stored in the
2488 : : * WAL record are consistent or not. Before comparing the two pages, a
2489 : : * masking can be applied to the pages to ignore certain areas like hint bits,
2490 : : * unused space between pd_lower and pd_upper among other things. This
2491 : : * function should be called once WAL replay has been completed for a
2492 : : * given record.
2493 : : */
2494 : : static void
1401 heikki.linnakangas@i 2495 :CBC 2206260 : verifyBackupPageConsistency(XLogReaderState *record)
2496 : : {
1352 jdavis@postgresql.or 2497 : 2206260 : RmgrData rmgr = GetRmgr(XLogRecGetRmid(record));
2498 : : RelFileLocator rlocator;
2499 : : ForkNumber forknum;
2500 : : BlockNumber blkno;
2501 : : int block_id;
2502 : :
2503 : : /* Records with no backup blocks have no need for consistency checks. */
1401 heikki.linnakangas@i 2504 [ + + ]: 2206260 : if (!XLogRecHasAnyBlockRefs(record))
2505 : 65 : return;
2506 : :
2507 [ - + ]: 2206195 : Assert((XLogRecGetInfo(record) & XLR_CHECK_CONSISTENCY) != 0);
2508 : :
1371 tmunro@postgresql.or 2509 [ + + ]: 4581343 : for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
2510 : : {
2511 : : Buffer buf;
2512 : : Page page;
2513 : :
1347 tgl@sss.pgh.pa.us 2514 [ + + ]: 2375148 : if (!XLogRecGetBlockTagExtended(record, block_id,
2515 : : &rlocator, &forknum, &blkno, NULL))
2516 : : {
2517 : : /*
2518 : : * WAL record doesn't contain a block reference with the given id.
2519 : : * Do nothing.
2520 : : */
1401 heikki.linnakangas@i 2521 : 2030 : continue;
2522 : : }
2523 : :
2524 [ - + ]: 2373118 : Assert(XLogRecHasBlockImage(record, block_id));
2525 : :
2526 [ + + ]: 2373118 : if (XLogRecBlockImageApply(record, block_id))
2527 : : {
2528 : : /*
2529 : : * WAL record has already applied the page, so bypass the
2530 : : * consistency check as that would result in comparing the full
2531 : : * page stored in the record with itself.
2532 : : */
2533 : 26654 : continue;
2534 : : }
2535 : :
2536 : : /*
2537 : : * Read the contents from the current buffer and store it in a
2538 : : * temporary page.
2539 : : */
1261 rhaas@postgresql.org 2540 : 2346464 : buf = XLogReadBufferExtended(rlocator, forknum, blkno,
2541 : : RBM_NORMAL_NO_LOG,
2542 : : InvalidBuffer);
1401 heikki.linnakangas@i 2543 [ - + ]: 2346464 : if (!BufferIsValid(buf))
1401 heikki.linnakangas@i 2544 :UBC 0 : continue;
2545 : :
1401 heikki.linnakangas@i 2546 :CBC 2346464 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
2547 : 2346464 : page = BufferGetPage(buf);
2548 : :
2549 : : /*
2550 : : * Take a copy of the local page where WAL has been applied to have a
2551 : : * comparison base before masking it...
2552 : : */
2553 : 2346464 : memcpy(replay_image_masked, page, BLCKSZ);
2554 : :
2555 : : /* No need for this page anymore now that a copy is in. */
2556 : 2346464 : UnlockReleaseBuffer(buf);
2557 : :
2558 : : /*
2559 : : * If the block LSN is already ahead of this WAL record, we can't
2560 : : * expect contents to match. This can happen if recovery is
2561 : : * restarted.
2562 : : */
2563 [ - + ]: 2346464 : if (PageGetLSN(replay_image_masked) > record->EndRecPtr)
1401 heikki.linnakangas@i 2564 :UBC 0 : continue;
2565 : :
2566 : : /*
2567 : : * Read the contents from the backup copy, stored in WAL record and
2568 : : * store it in a temporary page. There is no need to allocate a new
2569 : : * page here, a local buffer is fine to hold its contents and a mask
2570 : : * can be directly applied on it.
2571 : : */
1401 heikki.linnakangas@i 2572 [ - + ]:CBC 2346464 : if (!RestoreBlockImage(record, block_id, primary_image_masked))
1196 michael@paquier.xyz 2573 [ # # ]:UBC 0 : ereport(ERROR,
2574 : : (errcode(ERRCODE_INTERNAL_ERROR),
2575 : : errmsg_internal("%s", record->errormsg_buf)));
2576 : :
2577 : : /*
2578 : : * If masking function is defined, mask both the primary and replay
2579 : : * images
2580 : : */
1352 jdavis@postgresql.or 2581 [ + - ]:CBC 2346464 : if (rmgr.rm_mask != NULL)
2582 : : {
2583 : 2346464 : rmgr.rm_mask(replay_image_masked, blkno);
2584 : 2346464 : rmgr.rm_mask(primary_image_masked, blkno);
2585 : : }
2586 : :
2587 : : /* Time to compare the primary and replay images. */
1401 heikki.linnakangas@i 2588 [ - + ]: 2346464 : if (memcmp(replay_image_masked, primary_image_masked, BLCKSZ) != 0)
2589 : : {
1401 heikki.linnakangas@i 2590 [ # # ]:UBC 0 : elog(FATAL,
2591 : : "inconsistent page found, rel %u/%u/%u, forknum %u, blkno %u",
2592 : : rlocator.spcOid, rlocator.dbOid, rlocator.relNumber,
2593 : : forknum, blkno);
2594 : : }
2595 : : }
2596 : : }
2597 : :
2598 : : /*
2599 : : * For point-in-time recovery, this function decides whether we want to
2600 : : * stop applying the XLOG before the current record.
2601 : : *
2602 : : * Returns true if we are stopping, false otherwise. If stopping, some
2603 : : * information is saved in recoveryStopXid et al for use in annotating the
2604 : : * new timeline's history file.
2605 : : */
2606 : : static bool
1401 heikki.linnakangas@i 2607 :CBC 2782737 : recoveryStopsBefore(XLogReaderState *record)
2608 : : {
2609 : 2782737 : bool stopsHere = false;
2610 : : uint8 xact_info;
2611 : : bool isCommit;
2612 : 2782737 : TimestampTz recordXtime = 0;
2613 : : TransactionId recordXid;
2614 : :
2615 : : /*
2616 : : * Ignore recovery target settings when not in archive recovery (meaning
2617 : : * we are in crash recovery).
2618 : : */
2619 [ + + ]: 2782737 : if (!ArchiveRecoveryRequested)
2620 : 243276 : return false;
2621 : :
2622 : : /* Check if we should stop as soon as reaching consistency */
2623 [ - + - - ]: 2539461 : if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE && reachedConsistency)
2624 : : {
1401 heikki.linnakangas@i 2625 [ # # ]:UBC 0 : ereport(LOG,
2626 : : (errmsg("recovery stopping after reaching consistency")));
2627 : :
2628 : 0 : recoveryStopAfter = false;
2629 : 0 : recoveryStopXid = InvalidTransactionId;
2630 : 0 : recoveryStopLSN = InvalidXLogRecPtr;
2631 : 0 : recoveryStopTime = 0;
2632 : 0 : recoveryStopName[0] = '\0';
2633 : 0 : return true;
2634 : : }
2635 : :
2636 : : /* Check if target LSN has been reached */
1401 heikki.linnakangas@i 2637 [ + + ]:CBC 2539461 : if (recoveryTarget == RECOVERY_TARGET_LSN &&
2638 [ + + ]: 8549 : !recoveryTargetInclusive &&
2639 [ + + ]: 496 : record->ReadRecPtr >= recoveryTargetLSN)
2640 : : {
2641 : 2 : recoveryStopAfter = false;
2642 : 2 : recoveryStopXid = InvalidTransactionId;
2643 : 2 : recoveryStopLSN = record->ReadRecPtr;
2644 : 2 : recoveryStopTime = 0;
2645 : 2 : recoveryStopName[0] = '\0';
2646 [ + - ]: 2 : ereport(LOG,
2647 : : errmsg("recovery stopping before WAL location (LSN) \"%X/%08X\"",
2648 : : LSN_FORMAT_ARGS(recoveryStopLSN)));
2649 : 2 : return true;
2650 : : }
2651 : :
2652 : : /* Otherwise we only consider stopping before COMMIT or ABORT records. */
2653 [ + + ]: 2539459 : if (XLogRecGetRmid(record) != RM_XACT_ID)
2654 : 2517226 : return false;
2655 : :
2656 : 22233 : xact_info = XLogRecGetInfo(record) & XLOG_XACT_OPMASK;
2657 : :
2658 [ + + ]: 22233 : if (xact_info == XLOG_XACT_COMMIT)
2659 : : {
2660 : 20199 : isCommit = true;
2661 : 20199 : recordXid = XLogRecGetXid(record);
2662 : : }
2663 [ + + ]: 2034 : else if (xact_info == XLOG_XACT_COMMIT_PREPARED)
2664 : : {
2665 : 29 : xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record);
2666 : : xl_xact_parsed_commit parsed;
2667 : :
2668 : 29 : isCommit = true;
2669 : 29 : ParseCommitRecord(XLogRecGetInfo(record),
2670 : : xlrec,
2671 : : &parsed);
2672 : 29 : recordXid = parsed.twophase_xid;
2673 : : }
2674 [ + + ]: 2005 : else if (xact_info == XLOG_XACT_ABORT)
2675 : : {
2676 : 1847 : isCommit = false;
2677 : 1847 : recordXid = XLogRecGetXid(record);
2678 : : }
2679 [ + + ]: 158 : else if (xact_info == XLOG_XACT_ABORT_PREPARED)
2680 : : {
2681 : 13 : xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(record);
2682 : : xl_xact_parsed_abort parsed;
2683 : :
2684 : 13 : isCommit = false;
2685 : 13 : ParseAbortRecord(XLogRecGetInfo(record),
2686 : : xlrec,
2687 : : &parsed);
2688 : 13 : recordXid = parsed.twophase_xid;
2689 : : }
2690 : : else
2691 : 145 : return false;
2692 : :
2693 [ - + - - ]: 22088 : if (recoveryTarget == RECOVERY_TARGET_XID && !recoveryTargetInclusive)
2694 : : {
2695 : : /*
2696 : : * There can be only one transaction end record with this exact
2697 : : * transactionid
2698 : : *
2699 : : * when testing for an xid, we MUST test for equality only, since
2700 : : * transactions are numbered in the order they start, not the order
2701 : : * they complete. A higher numbered xid will complete before you about
2702 : : * 50% of the time...
2703 : : */
1401 heikki.linnakangas@i 2704 :UBC 0 : stopsHere = (recordXid == recoveryTargetXid);
2705 : : }
2706 : :
2707 : : /*
2708 : : * Note: we must fetch recordXtime regardless of recoveryTarget setting.
2709 : : * We don't expect getRecordTimestamp ever to fail, since we already know
2710 : : * this is a commit or abort record; but test its result anyway.
2711 : : */
1064 tgl@sss.pgh.pa.us 2712 [ + - ]:CBC 22088 : if (getRecordTimestamp(record, &recordXtime) &&
2713 [ - + ]: 22088 : recoveryTarget == RECOVERY_TARGET_TIME)
2714 : : {
2715 : : /*
2716 : : * There can be many transactions that share the same commit time, so
2717 : : * we stop after the last one, if we are inclusive, or stop at the
2718 : : * first one if we are exclusive
2719 : : */
1401 heikki.linnakangas@i 2720 [ # # ]:UBC 0 : if (recoveryTargetInclusive)
2721 : 0 : stopsHere = (recordXtime > recoveryTargetTime);
2722 : : else
2723 : 0 : stopsHere = (recordXtime >= recoveryTargetTime);
2724 : : }
2725 : :
1401 heikki.linnakangas@i 2726 [ - + ]:CBC 22088 : if (stopsHere)
2727 : : {
1401 heikki.linnakangas@i 2728 :UBC 0 : recoveryStopAfter = false;
2729 : 0 : recoveryStopXid = recordXid;
2730 : 0 : recoveryStopTime = recordXtime;
2731 : 0 : recoveryStopLSN = InvalidXLogRecPtr;
2732 : 0 : recoveryStopName[0] = '\0';
2733 : :
2734 [ # # ]: 0 : if (isCommit)
2735 : : {
2736 [ # # ]: 0 : ereport(LOG,
2737 : : (errmsg("recovery stopping before commit of transaction %u, time %s",
2738 : : recoveryStopXid,
2739 : : timestamptz_to_str(recoveryStopTime))));
2740 : : }
2741 : : else
2742 : : {
2743 [ # # ]: 0 : ereport(LOG,
2744 : : (errmsg("recovery stopping before abort of transaction %u, time %s",
2745 : : recoveryStopXid,
2746 : : timestamptz_to_str(recoveryStopTime))));
2747 : : }
2748 : : }
2749 : :
1401 heikki.linnakangas@i 2750 :CBC 22088 : return stopsHere;
2751 : : }
2752 : :
2753 : : /*
2754 : : * Same as recoveryStopsBefore, but called after applying the record.
2755 : : *
2756 : : * We also track the timestamp of the latest applied COMMIT/ABORT
2757 : : * record in XLogRecoveryCtl->recoveryLastXTime.
2758 : : */
2759 : : static bool
2760 : 2782733 : recoveryStopsAfter(XLogReaderState *record)
2761 : : {
2762 : : uint8 info;
2763 : : uint8 xact_info;
2764 : : uint8 rmid;
926 2765 : 2782733 : TimestampTz recordXtime = 0;
2766 : :
2767 : : /*
2768 : : * Ignore recovery target settings when not in archive recovery (meaning
2769 : : * we are in crash recovery).
2770 : : */
1401 2771 [ + + ]: 2782733 : if (!ArchiveRecoveryRequested)
2772 : 243276 : return false;
2773 : :
2774 : 2539457 : info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
2775 : 2539457 : rmid = XLogRecGetRmid(record);
2776 : :
2777 : : /*
2778 : : * There can be many restore points that share the same name; we stop at
2779 : : * the first one.
2780 : : */
2781 [ + + + + ]: 2539457 : if (recoveryTarget == RECOVERY_TARGET_NAME &&
2782 [ + + ]: 20 : rmid == RM_XLOG_ID && info == XLOG_RESTORE_POINT)
2783 : : {
2784 : : xl_restore_point *recordRestorePointData;
2785 : :
2786 : 3 : recordRestorePointData = (xl_restore_point *) XLogRecGetData(record);
2787 : :
2788 [ + + ]: 3 : if (strcmp(recordRestorePointData->rp_name, recoveryTargetName) == 0)
2789 : : {
2790 : 2 : recoveryStopAfter = true;
2791 : 2 : recoveryStopXid = InvalidTransactionId;
2792 : 2 : recoveryStopLSN = InvalidXLogRecPtr;
2793 : 2 : (void) getRecordTimestamp(record, &recoveryStopTime);
2794 : 2 : strlcpy(recoveryStopName, recordRestorePointData->rp_name, MAXFNAMELEN);
2795 : :
2796 [ + - ]: 2 : ereport(LOG,
2797 : : (errmsg("recovery stopping at restore point \"%s\", time %s",
2798 : : recoveryStopName,
2799 : : timestamptz_to_str(recoveryStopTime))));
2800 : 2 : return true;
2801 : : }
2802 : : }
2803 : :
2804 : : /* Check if the target LSN has been reached */
2805 [ + + + + ]: 2539455 : if (recoveryTarget == RECOVERY_TARGET_LSN &&
2806 : 8053 : recoveryTargetInclusive &&
2807 [ + + ]: 8053 : record->ReadRecPtr >= recoveryTargetLSN)
2808 : : {
2809 : 3 : recoveryStopAfter = true;
2810 : 3 : recoveryStopXid = InvalidTransactionId;
2811 : 3 : recoveryStopLSN = record->ReadRecPtr;
2812 : 3 : recoveryStopTime = 0;
2813 : 3 : recoveryStopName[0] = '\0';
2814 [ + - ]: 3 : ereport(LOG,
2815 : : errmsg("recovery stopping after WAL location (LSN) \"%X/%08X\"",
2816 : : LSN_FORMAT_ARGS(recoveryStopLSN)));
2817 : 3 : return true;
2818 : : }
2819 : :
2820 [ + + ]: 2539452 : if (rmid != RM_XACT_ID)
2821 : 2517221 : return false;
2822 : :
2823 : 22231 : xact_info = info & XLOG_XACT_OPMASK;
2824 : :
2825 [ + + + + ]: 22231 : if (xact_info == XLOG_XACT_COMMIT ||
2826 [ + + ]: 2005 : xact_info == XLOG_XACT_COMMIT_PREPARED ||
2827 [ + + ]: 158 : xact_info == XLOG_XACT_ABORT ||
2828 : : xact_info == XLOG_XACT_ABORT_PREPARED)
2829 : : {
2830 : : TransactionId recordXid;
2831 : :
2832 : : /* Update the last applied transaction timestamp */
2833 [ + - ]: 22086 : if (getRecordTimestamp(record, &recordXtime))
2834 : 22086 : SetLatestXTime(recordXtime);
2835 : :
2836 : : /* Extract the XID of the committed/aborted transaction */
2837 [ + + ]: 22086 : if (xact_info == XLOG_XACT_COMMIT_PREPARED)
2838 : : {
2839 : 29 : xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record);
2840 : : xl_xact_parsed_commit parsed;
2841 : :
2842 : 29 : ParseCommitRecord(XLogRecGetInfo(record),
2843 : : xlrec,
2844 : : &parsed);
2845 : 29 : recordXid = parsed.twophase_xid;
2846 : : }
2847 [ + + ]: 22057 : else if (xact_info == XLOG_XACT_ABORT_PREPARED)
2848 : : {
2849 : 13 : xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(record);
2850 : : xl_xact_parsed_abort parsed;
2851 : :
2852 : 13 : ParseAbortRecord(XLogRecGetInfo(record),
2853 : : xlrec,
2854 : : &parsed);
2855 : 13 : recordXid = parsed.twophase_xid;
2856 : : }
2857 : : else
2858 : 22044 : recordXid = XLogRecGetXid(record);
2859 : :
2860 : : /*
2861 : : * There can be only one transaction end record with this exact
2862 : : * transactionid
2863 : : *
2864 : : * when testing for an xid, we MUST test for equality only, since
2865 : : * transactions are numbered in the order they start, not the order
2866 : : * they complete. A higher numbered xid will complete before you about
2867 : : * 50% of the time...
2868 : : */
2869 [ - + - - ]: 22086 : if (recoveryTarget == RECOVERY_TARGET_XID && recoveryTargetInclusive &&
1401 heikki.linnakangas@i 2870 [ # # ]:UBC 0 : recordXid == recoveryTargetXid)
2871 : : {
2872 : 0 : recoveryStopAfter = true;
2873 : 0 : recoveryStopXid = recordXid;
2874 : 0 : recoveryStopTime = recordXtime;
2875 : 0 : recoveryStopLSN = InvalidXLogRecPtr;
2876 : 0 : recoveryStopName[0] = '\0';
2877 : :
2878 [ # # # # ]: 0 : if (xact_info == XLOG_XACT_COMMIT ||
2879 : : xact_info == XLOG_XACT_COMMIT_PREPARED)
2880 : : {
2881 [ # # ]: 0 : ereport(LOG,
2882 : : (errmsg("recovery stopping after commit of transaction %u, time %s",
2883 : : recoveryStopXid,
2884 : : timestamptz_to_str(recoveryStopTime))));
2885 : : }
2886 [ # # # # ]: 0 : else if (xact_info == XLOG_XACT_ABORT ||
2887 : : xact_info == XLOG_XACT_ABORT_PREPARED)
2888 : : {
2889 [ # # ]: 0 : ereport(LOG,
2890 : : (errmsg("recovery stopping after abort of transaction %u, time %s",
2891 : : recoveryStopXid,
2892 : : timestamptz_to_str(recoveryStopTime))));
2893 : : }
2894 : 0 : return true;
2895 : : }
2896 : : }
2897 : :
2898 : : /* Check if we should stop as soon as reaching consistency */
1401 heikki.linnakangas@i 2899 [ - + - - ]:CBC 22231 : if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE && reachedConsistency)
2900 : : {
1401 heikki.linnakangas@i 2901 [ # # ]:UBC 0 : ereport(LOG,
2902 : : (errmsg("recovery stopping after reaching consistency")));
2903 : :
2904 : 0 : recoveryStopAfter = true;
2905 : 0 : recoveryStopXid = InvalidTransactionId;
2906 : 0 : recoveryStopTime = 0;
2907 : 0 : recoveryStopLSN = InvalidXLogRecPtr;
2908 : 0 : recoveryStopName[0] = '\0';
2909 : 0 : return true;
2910 : : }
2911 : :
1401 heikki.linnakangas@i 2912 :CBC 22231 : return false;
2913 : : }
2914 : :
2915 : : /*
2916 : : * Create a comment for the history file to explain why and where
2917 : : * timeline changed.
2918 : : */
2919 : : static char *
2920 : 872 : getRecoveryStopReason(void)
2921 : : {
2922 : : char reason[200];
2923 : :
2924 [ - + ]: 872 : if (recoveryTarget == RECOVERY_TARGET_XID)
1401 heikki.linnakangas@i 2925 :UBC 0 : snprintf(reason, sizeof(reason),
2926 : : "%s transaction %u",
2927 [ # # ]: 0 : recoveryStopAfter ? "after" : "before",
2928 : : recoveryStopXid);
1401 heikki.linnakangas@i 2929 [ - + ]:CBC 872 : else if (recoveryTarget == RECOVERY_TARGET_TIME)
1401 heikki.linnakangas@i 2930 :UBC 0 : snprintf(reason, sizeof(reason),
2931 : : "%s %s\n",
2932 [ # # ]: 0 : recoveryStopAfter ? "after" : "before",
2933 : : timestamptz_to_str(recoveryStopTime));
1401 heikki.linnakangas@i 2934 [ + + ]:CBC 872 : else if (recoveryTarget == RECOVERY_TARGET_LSN)
2935 : 7 : snprintf(reason, sizeof(reason),
2936 : : "%s LSN %X/%08X\n",
2937 : 7 : recoveryStopAfter ? "after" : "before",
2938 [ + + ]: 7 : LSN_FORMAT_ARGS(recoveryStopLSN));
2939 [ + + ]: 865 : else if (recoveryTarget == RECOVERY_TARGET_NAME)
2940 : 3 : snprintf(reason, sizeof(reason),
2941 : : "at restore point \"%s\"",
2942 : : recoveryStopName);
2943 [ - + ]: 862 : else if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE)
1401 heikki.linnakangas@i 2944 :UBC 0 : snprintf(reason, sizeof(reason), "reached consistency");
2945 : : else
1401 heikki.linnakangas@i 2946 :CBC 862 : snprintf(reason, sizeof(reason), "no recovery target specified");
2947 : :
2948 : 872 : return pstrdup(reason);
2949 : : }
2950 : :
2951 : : /*
2952 : : * Wait until shared recoveryPauseState is set to RECOVERY_NOT_PAUSED.
2953 : : *
2954 : : * endOfRecovery is true if the recovery target is reached and
2955 : : * the paused state starts at the end of recovery because of
2956 : : * recovery_target_action=pause, and false otherwise.
2957 : : */
2958 : : static void
2959 : 3 : recoveryPausesHere(bool endOfRecovery)
2960 : : {
2961 : : /* Don't pause unless users can connect! */
2962 [ - + ]: 3 : if (!LocalHotStandbyActive)
1401 heikki.linnakangas@i 2963 :UBC 0 : return;
2964 : :
2965 : : /* Don't pause after standby promotion has been triggered */
1401 heikki.linnakangas@i 2966 [ - + ]:CBC 3 : if (LocalPromoteIsTriggered)
1401 heikki.linnakangas@i 2967 :UBC 0 : return;
2968 : :
1401 heikki.linnakangas@i 2969 [ + + ]:CBC 3 : if (endOfRecovery)
2970 [ + - ]: 1 : ereport(LOG,
2971 : : (errmsg("pausing at the end of recovery"),
2972 : : errhint("Execute pg_wal_replay_resume() to promote.")));
2973 : : else
2974 [ + - ]: 2 : ereport(LOG,
2975 : : (errmsg("recovery has paused"),
2976 : : errhint("Execute pg_wal_replay_resume() to continue.")));
2977 : :
2978 : : /* loop until recoveryPauseState is set to RECOVERY_NOT_PAUSED */
2979 [ + + ]: 9 : while (GetRecoveryPauseState() != RECOVERY_NOT_PAUSED)
2980 : : {
288 2981 : 8 : ProcessStartupProcInterrupts();
1401 2982 [ + + ]: 8 : if (CheckForStandbyTrigger())
2983 : 2 : return;
2984 : :
2985 : : /*
2986 : : * If recovery pause is requested then set it paused. While we are in
2987 : : * the loop, user might resume and pause again so set this every time.
2988 : : */
2989 : 6 : ConfirmRecoveryPaused();
2990 : :
2991 : : /*
2992 : : * We wait on a condition variable that will wake us as soon as the
2993 : : * pause ends, but we use a timeout so we can check the above exit
2994 : : * condition periodically too.
2995 : : */
2996 : 6 : ConditionVariableTimedSleep(&XLogRecoveryCtl->recoveryNotPausedCV, 1000,
2997 : : WAIT_EVENT_RECOVERY_PAUSE);
2998 : : }
2999 : 1 : ConditionVariableCancelSleep();
3000 : : }
3001 : :
3002 : : /*
3003 : : * When recovery_min_apply_delay is set, we wait long enough to make sure
3004 : : * certain record types are applied at least that interval behind the primary.
3005 : : *
3006 : : * Returns true if we waited.
3007 : : *
3008 : : * Note that the delay is calculated between the WAL record log time and
3009 : : * the current time on standby. We would prefer to keep track of when this
3010 : : * standby received each WAL record, which would allow a more consistent
3011 : : * approach and one not affected by time synchronisation issues, but that
3012 : : * is significantly more effort and complexity for little actual gain in
3013 : : * usability.
3014 : : */
3015 : : static bool
3016 : 2782735 : recoveryApplyDelay(XLogReaderState *record)
3017 : : {
3018 : : uint8 xact_info;
3019 : : TimestampTz xtime;
3020 : : TimestampTz delayUntil;
3021 : : long msecs;
3022 : :
3023 : : /* nothing to do if no delay configured */
3024 [ + + ]: 2782735 : if (recovery_min_apply_delay <= 0)
3025 : 2782677 : return false;
3026 : :
3027 : : /* no delay is applied on a database not yet consistent */
1401 heikki.linnakangas@i 3028 [ + + ]:GBC 58 : if (!reachedConsistency)
3029 : 4 : return false;
3030 : :
3031 : : /* nothing to do if crash recovery is requested */
3032 [ - + ]: 54 : if (!ArchiveRecoveryRequested)
1401 heikki.linnakangas@i 3033 :UBC 0 : return false;
3034 : :
3035 : : /*
3036 : : * Is it a COMMIT record?
3037 : : *
3038 : : * We deliberately choose not to delay aborts since they have no effect on
3039 : : * MVCC. We already allow replay of records that don't have a timestamp,
3040 : : * so there is already opportunity for issues caused by early conflicts on
3041 : : * standbys.
3042 : : */
1401 heikki.linnakangas@i 3043 [ + + ]:GBC 54 : if (XLogRecGetRmid(record) != RM_XACT_ID)
3044 : 45 : return false;
3045 : :
3046 : 9 : xact_info = XLogRecGetInfo(record) & XLOG_XACT_OPMASK;
3047 : :
3048 [ - + - - ]: 9 : if (xact_info != XLOG_XACT_COMMIT &&
3049 : : xact_info != XLOG_XACT_COMMIT_PREPARED)
1401 heikki.linnakangas@i 3050 :UBC 0 : return false;
3051 : :
1401 heikki.linnakangas@i 3052 [ - + ]:GBC 9 : if (!getRecordTimestamp(record, &xtime))
1401 heikki.linnakangas@i 3053 :UBC 0 : return false;
3054 : :
1401 heikki.linnakangas@i 3055 :GBC 9 : delayUntil = TimestampTzPlusMilliseconds(xtime, recovery_min_apply_delay);
3056 : :
3057 : : /*
3058 : : * Exit without arming the latch if it's already past time to apply this
3059 : : * record
3060 : : */
3061 : 9 : msecs = TimestampDifferenceMilliseconds(GetCurrentTimestamp(), delayUntil);
3062 [ + - ]: 9 : if (msecs <= 0)
1401 heikki.linnakangas@i 3063 :UBC 0 : return false;
3064 : :
3065 : : while (true)
3066 : : {
1401 heikki.linnakangas@i 3067 :GBC 24 : ResetLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
3068 : :
3069 : : /* This might change recovery_min_apply_delay. */
288 3070 : 24 : ProcessStartupProcInterrupts();
3071 : :
1401 3072 [ - + ]: 24 : if (CheckForStandbyTrigger())
1401 heikki.linnakangas@i 3073 :UBC 0 : break;
3074 : :
3075 : : /*
3076 : : * Recalculate delayUntil as recovery_min_apply_delay could have
3077 : : * changed while waiting in this loop.
3078 : : */
1401 heikki.linnakangas@i 3079 :GBC 24 : delayUntil = TimestampTzPlusMilliseconds(xtime, recovery_min_apply_delay);
3080 : :
3081 : : /*
3082 : : * Wait for difference between GetCurrentTimestamp() and delayUntil.
3083 : : */
3084 : 24 : msecs = TimestampDifferenceMilliseconds(GetCurrentTimestamp(),
3085 : : delayUntil);
3086 : :
3087 [ + + ]: 24 : if (msecs <= 0)
3088 : 9 : break;
3089 : :
3090 [ - + ]: 15 : elog(DEBUG2, "recovery apply delay %ld milliseconds", msecs);
3091 : :
3092 : 15 : (void) WaitLatch(&XLogRecoveryCtl->recoveryWakeupLatch,
3093 : : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
3094 : : msecs,
3095 : : WAIT_EVENT_RECOVERY_APPLY_DELAY);
3096 : : }
3097 : 9 : return true;
3098 : : }
3099 : :
3100 : : /*
3101 : : * Get the current state of the recovery pause request.
3102 : : */
3103 : : RecoveryPauseState
1401 heikki.linnakangas@i 3104 :CBC 14 : GetRecoveryPauseState(void)
3105 : : {
3106 : : RecoveryPauseState state;
3107 : :
3108 [ - + ]: 14 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
3109 : 14 : state = XLogRecoveryCtl->recoveryPauseState;
3110 : 14 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
3111 : :
3112 : 14 : return state;
3113 : : }
3114 : :
3115 : : /*
3116 : : * Set the recovery pause state.
3117 : : *
3118 : : * If recovery pause is requested then sets the recovery pause state to
3119 : : * 'pause requested' if it is not already 'paused'. Otherwise, sets it
3120 : : * to 'not paused' to resume the recovery. The recovery pause will be
3121 : : * confirmed by the ConfirmRecoveryPaused.
3122 : : */
3123 : : void
3124 : 48 : SetRecoveryPause(bool recoveryPause)
3125 : : {
3126 [ - + ]: 48 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
3127 : :
3128 [ + + ]: 48 : if (!recoveryPause)
3129 : 44 : XLogRecoveryCtl->recoveryPauseState = RECOVERY_NOT_PAUSED;
3130 [ + - ]: 4 : else if (XLogRecoveryCtl->recoveryPauseState == RECOVERY_NOT_PAUSED)
3131 : 4 : XLogRecoveryCtl->recoveryPauseState = RECOVERY_PAUSE_REQUESTED;
3132 : :
3133 : 48 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
3134 : :
3135 [ + + ]: 48 : if (!recoveryPause)
3136 : 44 : ConditionVariableBroadcast(&XLogRecoveryCtl->recoveryNotPausedCV);
3137 : 48 : }
3138 : :
3139 : : /*
3140 : : * Confirm the recovery pause by setting the recovery pause state to
3141 : : * RECOVERY_PAUSED.
3142 : : */
3143 : : static void
3144 : 6 : ConfirmRecoveryPaused(void)
3145 : : {
3146 : : /* If recovery pause is requested then set it paused */
3147 [ - + ]: 6 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
3148 [ + + ]: 6 : if (XLogRecoveryCtl->recoveryPauseState == RECOVERY_PAUSE_REQUESTED)
3149 : 3 : XLogRecoveryCtl->recoveryPauseState = RECOVERY_PAUSED;
3150 : 6 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
3151 : 6 : }
3152 : :
3153 : :
3154 : : /*
3155 : : * Attempt to read the next XLOG record.
3156 : : *
3157 : : * Before first call, the reader needs to be positioned to the first record
3158 : : * by calling XLogPrefetcherBeginRead().
3159 : : *
3160 : : * If no valid record is available, returns NULL, or fails if emode is PANIC.
3161 : : * (emode must be either PANIC, LOG). In standby mode, retries until a valid
3162 : : * record is available.
3163 : : */
3164 : : static XLogRecord *
1351 tmunro@postgresql.or 3165 : 2784840 : ReadRecord(XLogPrefetcher *xlogprefetcher, int emode,
3166 : : bool fetching_ckpt, TimeLineID replayTLI)
3167 : : {
3168 : : XLogRecord *record;
3169 : 2784840 : XLogReaderState *xlogreader = XLogPrefetcherGetReader(xlogprefetcher);
1401 heikki.linnakangas@i 3170 : 2784840 : XLogPageReadPrivate *private = (XLogPageReadPrivate *) xlogreader->private_data;
3171 : :
43 michael@paquier.xyz 3172 [ + + - + ]:GNC 2784840 : Assert(AmStartupProcess() || !IsUnderPostmaster);
3173 : :
3174 : : /* Pass through parameters to XLogPageRead */
1401 heikki.linnakangas@i 3175 :CBC 2784840 : private->fetching_ckpt = fetching_ckpt;
3176 : 2784840 : private->emode = emode;
42 alvherre@kurilemu.de 3177 :GNC 2784840 : private->randAccess = !XLogRecPtrIsValid(xlogreader->ReadRecPtr);
1401 heikki.linnakangas@i 3178 :CBC 2784840 : private->replayTLI = replayTLI;
3179 : :
3180 : : /* This is the first attempt to read this page. */
3181 : 2784840 : lastSourceFailed = false;
3182 : :
3183 : : for (;;)
3184 : 99 : {
3185 : : char *errormsg;
3186 : :
1351 tmunro@postgresql.or 3187 : 2784939 : record = XLogPrefetcherReadRecord(xlogprefetcher, &errormsg);
1401 heikki.linnakangas@i 3188 [ + + ]: 2784887 : if (record == NULL)
3189 : : {
3190 : : /*
3191 : : * When we find that WAL ends in an incomplete record, keep track
3192 : : * of that record. After recovery is done, we'll write a record
3193 : : * to indicate to downstream WAL readers that that portion is to
3194 : : * be ignored.
3195 : : *
3196 : : * However, when ArchiveRecoveryRequested = true, we're going to
3197 : : * switch to a new timeline at the end of recovery. We will only
3198 : : * copy WAL over to the new timeline up to the end of the last
3199 : : * complete record, so if we did this, we would later create an
3200 : : * overwrite contrecord in the wrong place, breaking everything.
3201 : : */
1207 rhaas@postgresql.org 3202 [ + + ]: 245 : if (!ArchiveRecoveryRequested &&
42 alvherre@kurilemu.de 3203 [ + + ]:GNC 103 : XLogRecPtrIsValid(xlogreader->abortedRecPtr))
3204 : : {
1401 heikki.linnakangas@i 3205 :CBC 10 : abortedRecPtr = xlogreader->abortedRecPtr;
3206 : 10 : missingContrecPtr = xlogreader->missingContrecPtr;
3207 : : }
3208 : :
3209 [ + + ]: 245 : if (readFile >= 0)
3210 : : {
3211 : 225 : close(readFile);
3212 : 225 : readFile = -1;
3213 : : }
3214 : :
3215 : : /*
3216 : : * We only end up here without a message when XLogPageRead()
3217 : : * failed - in that case we already logged something. In
3218 : : * StandbyMode that only happens if we have been triggered, so we
3219 : : * shouldn't loop anymore in that case.
3220 : : */
3221 [ + + ]: 245 : if (errormsg)
3222 [ + + ]: 225 : ereport(emode_for_corrupt_record(emode, xlogreader->EndRecPtr),
3223 : : (errmsg_internal("%s", errormsg) /* already translated */ ));
3224 : : }
3225 : :
3226 : : /*
3227 : : * Check page TLI is one of the expected values.
3228 : : */
3229 [ - + ]: 2784642 : else if (!tliInHistory(xlogreader->latestPageTLI, expectedTLEs))
3230 : : {
3231 : : char fname[MAXFNAMELEN];
3232 : : XLogSegNo segno;
3233 : : int32 offset;
3234 : :
1401 heikki.linnakangas@i 3235 :UBC 0 : XLByteToSeg(xlogreader->latestPagePtr, segno, wal_segment_size);
3236 : 0 : offset = XLogSegmentOffset(xlogreader->latestPagePtr,
3237 : : wal_segment_size);
3238 : 0 : XLogFileName(fname, xlogreader->seg.ws_tli, segno,
3239 : : wal_segment_size);
3240 [ # # ]: 0 : ereport(emode_for_corrupt_record(emode, xlogreader->EndRecPtr),
3241 : : errmsg("unexpected timeline ID %u in WAL segment %s, LSN %X/%08X, offset %u",
3242 : : xlogreader->latestPageTLI,
3243 : : fname,
3244 : : LSN_FORMAT_ARGS(xlogreader->latestPagePtr),
3245 : : offset));
3246 : 0 : record = NULL;
3247 : : }
3248 : :
1401 heikki.linnakangas@i 3249 [ + + ]:CBC 2784887 : if (record)
3250 : : {
3251 : : /* Great, got a record */
3252 : 2784788 : return record;
3253 : : }
3254 : : else
3255 : : {
3256 : : /* No valid record available from this source */
3257 : 245 : lastSourceFailed = true;
3258 : :
3259 : : /*
3260 : : * If archive recovery was requested, but we were still doing
3261 : : * crash recovery, switch to archive recovery and retry using the
3262 : : * offline archive. We have now replayed all the valid WAL in
3263 : : * pg_wal, so we are presumably now consistent.
3264 : : *
3265 : : * We require that there's at least some valid WAL present in
3266 : : * pg_wal, however (!fetching_ckpt). We could recover using the
3267 : : * WAL from the archive, even if pg_wal is completely empty, but
3268 : : * we'd have no idea how far we'd have to replay to reach
3269 : : * consistency. So err on the safe side and give up.
3270 : : */
3271 [ + + + + ]: 245 : if (!InArchiveRecovery && ArchiveRecoveryRequested &&
3272 [ + - ]: 2 : !fetching_ckpt)
3273 : : {
3274 [ - + ]: 2 : ereport(DEBUG1,
3275 : : (errmsg_internal("reached end of WAL in pg_wal, entering archive recovery")));
3276 : 2 : InArchiveRecovery = true;
3277 [ + - ]: 2 : if (StandbyModeRequested)
1046 rhaas@postgresql.org 3278 : 2 : EnableStandbyMode();
3279 : :
1401 heikki.linnakangas@i 3280 : 2 : SwitchIntoArchiveRecovery(xlogreader->EndRecPtr, replayTLI);
3281 : 2 : minRecoveryPoint = xlogreader->EndRecPtr;
3282 : 2 : minRecoveryPointTLI = replayTLI;
3283 : :
3284 : 2 : CheckRecoveryConsistency();
3285 : :
3286 : : /*
3287 : : * Before we retry, reset lastSourceFailed and currentSource
3288 : : * so that we will check the archive next.
3289 : : */
3290 : 2 : lastSourceFailed = false;
3291 : 2 : currentSource = XLOG_FROM_ANY;
3292 : :
3293 : 99 : continue;
3294 : : }
3295 : :
3296 : : /* In standby mode, loop back to retry. Otherwise, give up. */
3297 [ + + + + ]: 243 : if (StandbyMode && !CheckForStandbyTrigger())
3298 : 97 : continue;
3299 : : else
3300 : 146 : return NULL;
3301 : : }
3302 : : }
3303 : : }
3304 : :
3305 : : /*
3306 : : * Read the XLOG page containing targetPagePtr into readBuf (if not read
3307 : : * already). Returns number of bytes read, if the page is read successfully,
3308 : : * or XLREAD_FAIL in case of errors. When errors occur, they are ereport'ed,
3309 : : * but only if they have not been previously reported.
3310 : : *
3311 : : * See XLogReaderRoutine.page_read for more details.
3312 : : *
3313 : : * While prefetching, xlogreader->nonblocking may be set. In that case,
3314 : : * returns XLREAD_WOULDBLOCK if we'd otherwise have to wait for more WAL.
3315 : : *
3316 : : * This is responsible for restoring files from archive as needed, as well
3317 : : * as for waiting for the requested WAL record to arrive in standby mode.
3318 : : *
3319 : : * xlogreader->private_data->emode specifies the log level used for reporting
3320 : : * "file not found" or "end of WAL" situations in archive recovery, or in
3321 : : * standby mode when promotion is triggered. If set to WARNING or below,
3322 : : * XLogPageRead() returns XLREAD_FAIL in those situations, on higher log
3323 : : * levels the ereport() won't return.
3324 : : *
3325 : : * In standby mode, if after a successful return of XLogPageRead() the
3326 : : * caller finds the record it's interested in to be broken, it should
3327 : : * ereport the error with the level determined by
3328 : : * emode_for_corrupt_record(), and then set lastSourceFailed
3329 : : * and call XLogPageRead() again with the same arguments. This lets
3330 : : * XLogPageRead() to try fetching the record from another source, or to
3331 : : * sleep and retry.
3332 : : */
3333 : : static int
3334 : 1458166 : XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
3335 : : XLogRecPtr targetRecPtr, char *readBuf)
3336 : : {
3337 : 1458166 : XLogPageReadPrivate *private =
3338 : : (XLogPageReadPrivate *) xlogreader->private_data;
3339 : 1458166 : int emode = private->emode;
3340 : : uint32 targetPageOff;
3341 : : XLogSegNo targetSegNo PG_USED_FOR_ASSERTS_ONLY;
3342 : : int r;
3343 : : instr_time io_start;
3344 : :
43 michael@paquier.xyz 3345 [ + + - + ]:GNC 1458166 : Assert(AmStartupProcess() || !IsUnderPostmaster);
3346 : :
1401 heikki.linnakangas@i 3347 :CBC 1458166 : XLByteToSeg(targetPagePtr, targetSegNo, wal_segment_size);
3348 : 1458166 : targetPageOff = XLogSegmentOffset(targetPagePtr, wal_segment_size);
3349 : :
3350 : : /*
3351 : : * See if we need to switch to a new segment because the requested record
3352 : : * is not in the currently open one.
3353 : : */
3354 [ + + ]: 1458166 : if (readFile >= 0 &&
3355 [ + + ]: 1456646 : !XLByteInSeg(targetPagePtr, readSegNo, wal_segment_size))
3356 : : {
3357 : : /*
3358 : : * Request a restartpoint if we've replayed too much xlog since the
3359 : : * last one.
3360 : : */
3361 [ + + + - ]: 1371 : if (ArchiveRecoveryRequested && IsUnderPostmaster)
3362 : : {
3363 [ + + ]: 1357 : if (XLogCheckpointNeeded(readSegNo))
3364 : : {
3365 : 1254 : (void) GetRedoRecPtr();
3366 [ + + ]: 1254 : if (XLogCheckpointNeeded(readSegNo))
3367 : 1247 : RequestCheckpoint(CHECKPOINT_CAUSE_XLOG);
3368 : : }
3369 : : }
3370 : :
3371 : 1371 : close(readFile);
3372 : 1371 : readFile = -1;
3373 : 1371 : readSource = XLOG_FROM_ANY;
3374 : : }
3375 : :
3376 : 1458166 : XLByteToSeg(targetPagePtr, readSegNo, wal_segment_size);
3377 : :
3378 : 1458168 : retry:
3379 : : /* See if we need to retrieve more data */
3380 [ + + ]: 1458168 : if (readFile < 0 ||
3381 [ + + ]: 1455275 : (readSource == XLOG_FROM_STREAM &&
3382 [ + + ]: 1443944 : flushedUpto < targetPagePtr + reqLen))
3383 : : {
1351 tmunro@postgresql.or 3384 [ + + ]: 25480 : if (readFile >= 0 &&
3385 [ + + ]: 22587 : xlogreader->nonblocking &&
3386 [ + - ]: 11183 : readSource == XLOG_FROM_STREAM &&
3387 [ + - ]: 11183 : flushedUpto < targetPagePtr + reqLen)
3388 : 11183 : return XLREAD_WOULDBLOCK;
3389 : :
3390 [ + + + - ]: 14245 : switch (WaitForWALToBecomeAvailable(targetPagePtr + reqLen,
3391 : 14297 : private->randAccess,
3392 : 14297 : private->fetching_ckpt,
3393 : : targetRecPtr,
3394 : : private->replayTLI,
3395 : : xlogreader->EndRecPtr,
3396 : 14297 : xlogreader->nonblocking))
3397 : : {
3398 : 331 : case XLREAD_WOULDBLOCK:
3399 : 331 : return XLREAD_WOULDBLOCK;
3400 : 38 : case XLREAD_FAIL:
3401 [ - + ]: 38 : if (readFile >= 0)
1351 tmunro@postgresql.or 3402 :UBC 0 : close(readFile);
1351 tmunro@postgresql.or 3403 :CBC 38 : readFile = -1;
3404 : 38 : readLen = 0;
3405 : 38 : readSource = XLOG_FROM_ANY;
3406 : 38 : return XLREAD_FAIL;
3407 : 13876 : case XLREAD_SUCCESS:
3408 : 13876 : break;
3409 : : }
3410 : : }
3411 : :
3412 : : /*
3413 : : * At this point, we have the right segment open and if we're streaming we
3414 : : * know the requested record is in it.
3415 : : */
1401 heikki.linnakangas@i 3416 [ - + ]: 1446564 : Assert(readFile != -1);
3417 : :
3418 : : /*
3419 : : * If the current segment is being streamed from the primary, calculate
3420 : : * how much of the current page we have received already. We know the
3421 : : * requested record has been received, but this is for the benefit of
3422 : : * future calls, to allow quick exit at the top of this function.
3423 : : */
3424 [ + + ]: 1446564 : if (readSource == XLOG_FROM_STREAM)
3425 : : {
3426 [ + + ]: 1433716 : if (((targetPagePtr) / XLOG_BLCKSZ) != (flushedUpto / XLOG_BLCKSZ))
3427 : 1425006 : readLen = XLOG_BLCKSZ;
3428 : : else
3429 : 8710 : readLen = XLogSegmentOffset(flushedUpto, wal_segment_size) -
3430 : : targetPageOff;
3431 : : }
3432 : : else
3433 : 12848 : readLen = XLOG_BLCKSZ;
3434 : :
3435 : : /* Read the requested page */
3436 : 1446564 : readOff = targetPageOff;
3437 : :
3438 : : /* Measure I/O timing when reading segment */
295 michael@paquier.xyz 3439 : 1446564 : io_start = pgstat_prepare_io_time(track_wal_io_timing);
3440 : :
1401 heikki.linnakangas@i 3441 : 1446564 : pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
35 michael@paquier.xyz 3442 :GNC 1446564 : r = pg_pread(readFile, readBuf, XLOG_BLCKSZ, (pgoff_t) readOff);
1401 heikki.linnakangas@i 3443 [ - + ]:CBC 1446564 : if (r != XLOG_BLCKSZ)
3444 : : {
3445 : : char fname[MAXFNAMELEN];
1401 heikki.linnakangas@i 3446 :UBC 0 : int save_errno = errno;
3447 : :
3448 : 0 : pgstat_report_wait_end();
3449 : :
317 michael@paquier.xyz 3450 : 0 : pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL, IOOP_READ,
3451 : : io_start, 1, r);
3452 : :
1401 heikki.linnakangas@i 3453 : 0 : XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
3454 [ # # ]: 0 : if (r < 0)
3455 : : {
3456 : 0 : errno = save_errno;
3457 [ # # ]: 0 : ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
3458 : : (errcode_for_file_access(),
3459 : : errmsg("could not read from WAL segment %s, LSN %X/%08X, offset %u: %m",
3460 : : fname, LSN_FORMAT_ARGS(targetPagePtr),
3461 : : readOff)));
3462 : : }
3463 : : else
3464 [ # # ]: 0 : ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
3465 : : (errcode(ERRCODE_DATA_CORRUPTED),
3466 : : errmsg("could not read from WAL segment %s, LSN %X/%08X, offset %u: read %d of %zu",
3467 : : fname, LSN_FORMAT_ARGS(targetPagePtr),
3468 : : readOff, r, (Size) XLOG_BLCKSZ)));
3469 : 0 : goto next_record_is_invalid;
3470 : : }
1401 heikki.linnakangas@i 3471 :CBC 1446564 : pgstat_report_wait_end();
3472 : :
317 michael@paquier.xyz 3473 : 1446564 : pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL, IOOP_READ,
3474 : : io_start, 1, r);
3475 : :
1401 heikki.linnakangas@i 3476 [ - + ]: 1446564 : Assert(targetSegNo == readSegNo);
3477 [ - + ]: 1446564 : Assert(targetPageOff == readOff);
3478 [ - + ]: 1446564 : Assert(reqLen <= readLen);
3479 : :
3480 : 1446564 : xlogreader->seg.ws_tli = curFileTLI;
3481 : :
3482 : : /*
3483 : : * Check the page header immediately, so that we can retry immediately if
3484 : : * it's not valid. This may seem unnecessary, because ReadPageInternal()
3485 : : * validates the page header anyway, and would propagate the failure up to
3486 : : * ReadRecord(), which would retry. However, there's a corner case with
3487 : : * continuation records, if a record is split across two pages such that
3488 : : * we would need to read the two pages from different sources across two
3489 : : * WAL segments.
3490 : : *
3491 : : * The first page is only available locally, in pg_wal, because it's
3492 : : * already been recycled on the primary. The second page, however, is not
3493 : : * present in pg_wal, and we should stream it from the primary. There is a
3494 : : * recycled WAL segment present in pg_wal, with garbage contents, however.
3495 : : * We would read the first page from the local WAL segment, but when
3496 : : * reading the second page, we would read the bogus, recycled, WAL
3497 : : * segment. If we didn't catch that case here, we would never recover,
3498 : : * because ReadRecord() would retry reading the whole record from the
3499 : : * beginning.
3500 : : *
3501 : : * Of course, this only catches errors in the page header, which is what
3502 : : * happens in the case of a recycled WAL segment. Other kinds of errors or
3503 : : * corruption still has the same problem. But this at least fixes the
3504 : : * common case, which can happen as part of normal operation.
3505 : : *
3506 : : * Validating the page header is cheap enough that doing it twice
3507 : : * shouldn't be a big deal from a performance point of view.
3508 : : *
3509 : : * When not in standby mode, an invalid page header should cause recovery
3510 : : * to end, not retry reading the page, so we don't need to validate the
3511 : : * page header here for the retry. Instead, ReadPageInternal() is
3512 : : * responsible for the validation.
3513 : : */
3514 [ + + ]: 1446564 : if (StandbyMode &&
332 michael@paquier.xyz 3515 [ + + ]: 1436778 : (targetPagePtr % wal_segment_size) == 0 &&
1401 heikki.linnakangas@i 3516 [ + + ]: 1314 : !XLogReaderValidatePageHeader(xlogreader, targetPagePtr, readBuf))
3517 : : {
3518 : : /*
3519 : : * Emit this error right now then retry this page immediately. Use
3520 : : * errmsg_internal() because the message was already translated.
3521 : : */
3522 [ + - ]: 3 : if (xlogreader->errormsg_buf[0])
3523 [ + + ]: 3 : ereport(emode_for_corrupt_record(emode, xlogreader->EndRecPtr),
3524 : : (errmsg_internal("%s", xlogreader->errormsg_buf)));
3525 : :
3526 : : /* reset any error XLogReaderValidatePageHeader() might have set */
1202 tmunro@postgresql.or 3527 : 3 : XLogReaderResetError(xlogreader);
1401 heikki.linnakangas@i 3528 : 3 : goto next_record_is_invalid;
3529 : : }
3530 : :
3531 : 1446561 : return readLen;
3532 : :
3533 : 3 : next_record_is_invalid:
3534 : :
3535 : : /*
3536 : : * If we're reading ahead, give up fast. Retries and error reporting will
3537 : : * be handled by a later read when recovery catches up to this point.
3538 : : */
1202 tmunro@postgresql.or 3539 [ + + ]: 3 : if (xlogreader->nonblocking)
3540 : 1 : return XLREAD_WOULDBLOCK;
3541 : :
1401 heikki.linnakangas@i 3542 : 2 : lastSourceFailed = true;
3543 : :
3544 [ + - ]: 2 : if (readFile >= 0)
3545 : 2 : close(readFile);
3546 : 2 : readFile = -1;
3547 : 2 : readLen = 0;
3548 : 2 : readSource = XLOG_FROM_ANY;
3549 : :
3550 : : /* In standby-mode, keep trying */
3551 [ + - ]: 2 : if (StandbyMode)
3552 : 2 : goto retry;
3553 : : else
1351 tmunro@postgresql.or 3554 :UBC 0 : return XLREAD_FAIL;
3555 : : }
3556 : :
3557 : : /*
3558 : : * Open the WAL segment containing WAL location 'RecPtr'.
3559 : : *
3560 : : * The segment can be fetched via restore_command, or via walreceiver having
3561 : : * streamed the record, or it can already be present in pg_wal. Checking
3562 : : * pg_wal is mainly for crash recovery, but it will be polled in standby mode
3563 : : * too, in case someone copies a new segment directly to pg_wal. That is not
3564 : : * documented or recommended, though.
3565 : : *
3566 : : * If 'fetching_ckpt' is true, we're fetching a checkpoint record, and should
3567 : : * prepare to read WAL starting from RedoStartLSN after this.
3568 : : *
3569 : : * 'RecPtr' might not point to the beginning of the record we're interested
3570 : : * in, it might also point to the page or segment header. In that case,
3571 : : * 'tliRecPtr' is the position of the WAL record we're interested in. It is
3572 : : * used to decide which timeline to stream the requested WAL from.
3573 : : *
3574 : : * 'replayLSN' is the current replay LSN, so that if we scan for new
3575 : : * timelines, we can reject a switch to a timeline that branched off before
3576 : : * this point.
3577 : : *
3578 : : * If the record is not immediately available, the function returns false
3579 : : * if we're not in standby mode. In standby mode, waits for it to become
3580 : : * available.
3581 : : *
3582 : : * When the requested record becomes available, the function opens the file
3583 : : * containing it (if not open already), and returns XLREAD_SUCCESS. When end
3584 : : * of standby mode is triggered by the user, and there is no more WAL
3585 : : * available, returns XLREAD_FAIL.
3586 : : *
3587 : : * If nonblocking is true, then give up immediately if we can't satisfy the
3588 : : * request, returning XLREAD_WOULDBLOCK instead of waiting.
3589 : : */
3590 : : static XLogPageReadResult
1401 heikki.linnakangas@i 3591 :CBC 14297 : WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
3592 : : bool fetching_ckpt, XLogRecPtr tliRecPtr,
3593 : : TimeLineID replayTLI, XLogRecPtr replayLSN,
3594 : : bool nonblocking)
3595 : : {
3596 : : static TimestampTz last_fail_time = 0;
3597 : : TimestampTz now;
3598 : 14297 : bool streaming_reply_sent = false;
3599 : :
3600 : : /*-------
3601 : : * Standby mode is implemented by a state machine:
3602 : : *
3603 : : * 1. Read from either archive or pg_wal (XLOG_FROM_ARCHIVE), or just
3604 : : * pg_wal (XLOG_FROM_PG_WAL)
3605 : : * 2. Check for promotion trigger request
3606 : : * 3. Read from primary server via walreceiver (XLOG_FROM_STREAM)
3607 : : * 4. Rescan timelines
3608 : : * 5. Sleep wal_retrieve_retry_interval milliseconds, and loop back to 1.
3609 : : *
3610 : : * Failure to read from the current source advances the state machine to
3611 : : * the next state.
3612 : : *
3613 : : * 'currentSource' indicates the current state. There are no currentSource
3614 : : * values for "check trigger", "rescan timelines", and "sleep" states,
3615 : : * those actions are taken when reading from the previous source fails, as
3616 : : * part of advancing to the next state.
3617 : : *
3618 : : * If standby mode is turned off while reading WAL from stream, we move
3619 : : * to XLOG_FROM_ARCHIVE and reset lastSourceFailed, to force fetching
3620 : : * the files (which would be required at end of recovery, e.g., timeline
3621 : : * history file) from archive or pg_wal. We don't need to kill WAL receiver
3622 : : * here because it's already stopped when standby mode is turned off at
3623 : : * the end of recovery.
3624 : : *-------
3625 : : */
3626 [ + + ]: 14297 : if (!InArchiveRecovery)
3627 : 930 : currentSource = XLOG_FROM_PG_WAL;
3628 [ + + ]: 13367 : else if (currentSource == XLOG_FROM_ANY ||
3629 [ + + - + ]: 13257 : (!StandbyMode && currentSource == XLOG_FROM_STREAM))
3630 : : {
3631 : 110 : lastSourceFailed = false;
3632 : 110 : currentSource = XLOG_FROM_ARCHIVE;
3633 : : }
3634 : :
3635 : : for (;;)
3636 : 12004 : {
3637 : 26301 : XLogSource oldSource = currentSource;
3638 : 26301 : bool startWalReceiver = false;
3639 : :
3640 : : /*
3641 : : * First check if we failed to read from the current source, and
3642 : : * advance the state machine if so. The failure to read might've
3643 : : * happened outside this function, e.g when a CRC check fails on a
3644 : : * record, or within this loop.
3645 : : */
3646 [ + + ]: 26301 : if (lastSourceFailed)
3647 : : {
3648 : : /*
3649 : : * Don't allow any retry loops to occur during nonblocking
3650 : : * readahead. Let the caller process everything that has been
3651 : : * decoded already first.
3652 : : */
1351 tmunro@postgresql.or 3653 [ + + ]: 415 : if (nonblocking)
3654 : 71 : return XLREAD_WOULDBLOCK;
3655 : :
1401 heikki.linnakangas@i 3656 [ + + - ]: 344 : switch (currentSource)
3657 : : {
3658 : 211 : case XLOG_FROM_ARCHIVE:
3659 : : case XLOG_FROM_PG_WAL:
3660 : :
3661 : : /*
3662 : : * Check to see if promotion is requested. Note that we do
3663 : : * this only after failure, so when you promote, we still
3664 : : * finish replaying as much as we can from archive and
3665 : : * pg_wal before failover.
3666 : : */
3667 [ + + + + ]: 211 : if (StandbyMode && CheckForStandbyTrigger())
3668 : : {
3669 : 18 : XLogShutdownWalRcv();
1351 tmunro@postgresql.or 3670 : 18 : return XLREAD_FAIL;
3671 : : }
3672 : :
3673 : : /*
3674 : : * Not in standby mode, and we've now tried the archive
3675 : : * and pg_wal.
3676 : : */
1401 heikki.linnakangas@i 3677 [ + + ]: 193 : if (!StandbyMode)
1351 tmunro@postgresql.or 3678 : 20 : return XLREAD_FAIL;
3679 : :
3680 : : /*
3681 : : * Move to XLOG_FROM_STREAM state, and set to start a
3682 : : * walreceiver if necessary.
3683 : : */
1401 heikki.linnakangas@i 3684 : 173 : currentSource = XLOG_FROM_STREAM;
3685 : 173 : startWalReceiver = true;
3686 : 173 : break;
3687 : :
3688 : 133 : case XLOG_FROM_STREAM:
3689 : :
3690 : : /*
3691 : : * Failure while streaming. Most likely, we got here
3692 : : * because streaming replication was terminated, or
3693 : : * promotion was triggered. But we also get here if we
3694 : : * find an invalid record in the WAL streamed from the
3695 : : * primary, in which case something is seriously wrong.
3696 : : * There's little chance that the problem will just go
3697 : : * away, but PANIC is not good for availability either,
3698 : : * especially in hot standby mode. So, we treat that the
3699 : : * same as disconnection, and retry from archive/pg_wal
3700 : : * again. The WAL in the archive should be identical to
3701 : : * what was streamed, so it's unlikely that it helps, but
3702 : : * one can hope...
3703 : : */
3704 : :
3705 : : /*
3706 : : * We should be able to move to XLOG_FROM_STREAM only in
3707 : : * standby mode.
3708 : : */
3709 [ - + ]: 133 : Assert(StandbyMode);
3710 : :
3711 : : /*
3712 : : * Before we leave XLOG_FROM_STREAM state, make sure that
3713 : : * walreceiver is not active, so that it won't overwrite
3714 : : * WAL that we restore from archive.
3715 : : *
3716 : : * If walreceiver is actively streaming (or attempting to
3717 : : * connect), we must shut it down. However, if it's
3718 : : * already in WAITING state (e.g., due to timeline
3719 : : * divergence), we only need to reset the install flag to
3720 : : * allow archive restoration.
3721 : : */
44 michael@paquier.xyz 3722 [ + + ]: 133 : if (WalRcvStreaming())
3723 : 29 : XLogShutdownWalRcv();
3724 : : else
3725 : : {
3726 : : /*
3727 : : * WALRCV_STOPPING state is a transient state while
3728 : : * the startup process is in ShutdownWalRcv(). It
3729 : : * should never appear here since we would be waiting
3730 : : * for the walreceiver to reach WALRCV_STOPPED in that
3731 : : * case.
3732 : : */
44 michael@paquier.xyz 3733 [ - + ]:GNC 104 : Assert(WalRcvGetState() != WALRCV_STOPPING);
44 michael@paquier.xyz 3734 :CBC 104 : ResetInstallXLogFileSegmentActive();
3735 : : }
3736 : :
3737 : : /*
3738 : : * Before we sleep, re-scan for possible new timelines if
3739 : : * we were requested to recover to the latest timeline.
3740 : : */
1401 heikki.linnakangas@i 3741 [ + - ]: 133 : if (recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_LATEST)
3742 : : {
3743 [ + + ]: 133 : if (rescanLatestTimeLine(replayTLI, replayLSN))
3744 : : {
3745 : 6 : currentSource = XLOG_FROM_ARCHIVE;
3746 : 6 : break;
3747 : : }
3748 : : }
3749 : :
3750 : : /*
3751 : : * XLOG_FROM_STREAM is the last state in our state
3752 : : * machine, so we've exhausted all the options for
3753 : : * obtaining the requested WAL. We're going to loop back
3754 : : * and retry from the archive, but if it hasn't been long
3755 : : * since last attempt, sleep wal_retrieve_retry_interval
3756 : : * milliseconds to avoid busy-waiting.
3757 : : */
3758 : 127 : now = GetCurrentTimestamp();
3759 [ + + ]: 127 : if (!TimestampDifferenceExceeds(last_fail_time, now,
3760 : : wal_retrieve_retry_interval))
3761 : : {
3762 : : long wait_time;
3763 : :
3764 : 124 : wait_time = wal_retrieve_retry_interval -
3765 : 62 : TimestampDifferenceMilliseconds(last_fail_time, now);
3766 : :
164 alvherre@kurilemu.de 3767 [ + - ]:GNC 62 : elog(LOG, "waiting for WAL to become available at %X/%08X",
3768 : : LSN_FORMAT_ARGS(RecPtr));
3769 : :
3770 : : /* Do background tasks that might benefit us later. */
1115 tgl@sss.pgh.pa.us 3771 :CBC 62 : KnownAssignedTransactionIdsIdleMaintenance();
3772 : :
1401 heikki.linnakangas@i 3773 : 62 : (void) WaitLatch(&XLogRecoveryCtl->recoveryWakeupLatch,
3774 : : WL_LATCH_SET | WL_TIMEOUT |
3775 : : WL_EXIT_ON_PM_DEATH,
3776 : : wait_time,
3777 : : WAIT_EVENT_RECOVERY_RETRIEVE_RETRY_INTERVAL);
3778 : 62 : ResetLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
3779 : 62 : now = GetCurrentTimestamp();
3780 : :
3781 : : /* Handle interrupt signals of startup process */
288 3782 : 62 : ProcessStartupProcInterrupts();
3783 : : }
1401 3784 : 113 : last_fail_time = now;
3785 : 113 : currentSource = XLOG_FROM_ARCHIVE;
3786 : 113 : break;
3787 : :
1401 heikki.linnakangas@i 3788 :UBC 0 : default:
3789 [ # # ]: 0 : elog(ERROR, "unexpected WAL source %d", currentSource);
3790 : : }
3791 : : }
1401 heikki.linnakangas@i 3792 [ + + ]:CBC 25886 : else if (currentSource == XLOG_FROM_PG_WAL)
3793 : : {
3794 : : /*
3795 : : * We just successfully read a file in pg_wal. We prefer files in
3796 : : * the archive over ones in pg_wal, so try the next file again
3797 : : * from the archive first.
3798 : : */
3799 [ - + ]: 928 : if (InArchiveRecovery)
1401 heikki.linnakangas@i 3800 :UBC 0 : currentSource = XLOG_FROM_ARCHIVE;
3801 : : }
3802 : :
1401 heikki.linnakangas@i 3803 [ + + ]:CBC 26178 : if (currentSource != oldSource)
3804 [ + + + - ]: 292 : elog(DEBUG2, "switched WAL source from %s to %s after %s",
3805 : : xlogSourceNames[oldSource], xlogSourceNames[currentSource],
3806 : : lastSourceFailed ? "failure" : "success");
3807 : :
3808 : : /*
3809 : : * We've now handled possible failure. Try to read from the chosen
3810 : : * source.
3811 : : */
3812 : 26178 : lastSourceFailed = false;
3813 : :
3814 [ + + - ]: 26178 : switch (currentSource)
3815 : : {
3816 : 1682 : case XLOG_FROM_ARCHIVE:
3817 : : case XLOG_FROM_PG_WAL:
3818 : :
3819 : : /*
3820 : : * WAL receiver must not be running when reading WAL from
3821 : : * archive or pg_wal.
3822 : : */
3823 [ - + ]: 1682 : Assert(!WalRcvStreaming());
3824 : :
3825 : : /* Close any old file we might have open. */
3826 [ + + ]: 1682 : if (readFile >= 0)
3827 : : {
3828 : 69 : close(readFile);
3829 : 69 : readFile = -1;
3830 : : }
3831 : : /* Reset curFileTLI if random fetch. */
3832 [ + + ]: 1682 : if (randAccess)
3833 : 1075 : curFileTLI = 0;
3834 : :
3835 : : /*
3836 : : * Try to restore the file from archive, or read an existing
3837 : : * file from pg_wal.
3838 : : */
464 michael@paquier.xyz 3839 : 1682 : readFile = XLogFileReadAnyTLI(readSegNo,
1401 heikki.linnakangas@i 3840 [ + + ]: 1682 : currentSource == XLOG_FROM_ARCHIVE ? XLOG_FROM_ANY :
3841 : : currentSource);
3842 [ + + ]: 1682 : if (readFile >= 0)
1351 tmunro@postgresql.or 3843 : 1517 : return XLREAD_SUCCESS; /* success! */
3844 : :
3845 : : /*
3846 : : * Nope, not found in archive or pg_wal.
3847 : : */
1401 heikki.linnakangas@i 3848 : 165 : lastSourceFailed = true;
3849 : 165 : break;
3850 : :
3851 : 24496 : case XLOG_FROM_STREAM:
3852 : : {
3853 : : bool havedata;
3854 : :
3855 : : /*
3856 : : * We should be able to move to XLOG_FROM_STREAM only in
3857 : : * standby mode.
3858 : : */
3859 [ - + ]: 24496 : Assert(StandbyMode);
3860 : :
3861 : : /*
3862 : : * First, shutdown walreceiver if its restart has been
3863 : : * requested -- but no point if we're already slated for
3864 : : * starting it.
3865 : : */
3866 [ + + + - ]: 24496 : if (pendingWalRcvRestart && !startWalReceiver)
3867 : : {
3868 : 4 : XLogShutdownWalRcv();
3869 : :
3870 : : /*
3871 : : * Re-scan for possible new timelines if we were
3872 : : * requested to recover to the latest timeline.
3873 : : */
3874 [ + - ]: 4 : if (recoveryTargetTimeLineGoal ==
3875 : : RECOVERY_TARGET_TIMELINE_LATEST)
3876 : 4 : rescanLatestTimeLine(replayTLI, replayLSN);
3877 : :
3878 : 4 : startWalReceiver = true;
3879 : : }
3880 : 24496 : pendingWalRcvRestart = false;
3881 : :
3882 : : /*
3883 : : * Launch walreceiver if needed.
3884 : : *
3885 : : * If fetching_ckpt is true, RecPtr points to the initial
3886 : : * checkpoint location. In that case, we use RedoStartLSN
3887 : : * as the streaming start position instead of RecPtr, so
3888 : : * that when we later jump backwards to start redo at
3889 : : * RedoStartLSN, we will have the logs streamed already.
3890 : : */
3891 [ + + + - ]: 24496 : if (startWalReceiver &&
3892 [ + + ]: 177 : PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
3893 : : {
3894 : : XLogRecPtr ptr;
3895 : : TimeLineID tli;
3896 : :
3897 [ - + ]: 153 : if (fetching_ckpt)
3898 : : {
1401 heikki.linnakangas@i 3899 :UBC 0 : ptr = RedoStartLSN;
3900 : 0 : tli = RedoStartTLI;
3901 : : }
3902 : : else
3903 : : {
1401 heikki.linnakangas@i 3904 :CBC 153 : ptr = RecPtr;
3905 : :
3906 : : /*
3907 : : * Use the record begin position to determine the
3908 : : * TLI, rather than the position we're reading.
3909 : : */
3910 : 153 : tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
3911 : :
3912 [ + - - + ]: 153 : if (curFileTLI > 0 && tli < curFileTLI)
164 alvherre@kurilemu.de 3913 [ # # ]:UNC 0 : elog(ERROR, "according to history file, WAL location %X/%08X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
3914 : : LSN_FORMAT_ARGS(tliRecPtr),
3915 : : tli, curFileTLI);
3916 : : }
1401 heikki.linnakangas@i 3917 :CBC 153 : curFileTLI = tli;
3918 : 153 : SetInstallXLogFileSegmentActive();
3919 : 153 : RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
3920 : : PrimarySlotName,
3921 : : wal_receiver_create_temp_slot);
3922 : 153 : flushedUpto = 0;
3923 : : }
3924 : :
3925 : : /*
3926 : : * Check if WAL receiver is active or wait to start up.
3927 : : */
3928 [ + + ]: 24496 : if (!WalRcvStreaming())
3929 : : {
3930 : 104 : lastSourceFailed = true;
3931 : 104 : break;
3932 : : }
3933 : :
3934 : : /*
3935 : : * Walreceiver is active, so see if new data has arrived.
3936 : : *
3937 : : * We only advance XLogReceiptTime when we obtain fresh
3938 : : * WAL from walreceiver and observe that we had already
3939 : : * processed everything before the most recent "chunk"
3940 : : * that it flushed to disk. In steady state where we are
3941 : : * keeping up with the incoming data, XLogReceiptTime will
3942 : : * be updated on each cycle. When we are behind,
3943 : : * XLogReceiptTime will not advance, so the grace time
3944 : : * allotted to conflicting queries will decrease.
3945 : : */
3946 [ + + ]: 24392 : if (RecPtr < flushedUpto)
3947 : 1627 : havedata = true;
3948 : : else
3949 : : {
3950 : : XLogRecPtr latestChunkStart;
3951 : :
3952 : 22765 : flushedUpto = GetWalRcvFlushRecPtr(&latestChunkStart, &receiveTLI);
3953 [ + + + - ]: 22765 : if (RecPtr < flushedUpto && receiveTLI == curFileTLI)
3954 : : {
3955 : 11784 : havedata = true;
3956 [ + + ]: 11784 : if (latestChunkStart <= RecPtr)
3957 : : {
3958 : 8527 : XLogReceiptTime = GetCurrentTimestamp();
3959 : 8527 : SetCurrentChunkStartTime(XLogReceiptTime);
3960 : : }
3961 : : }
3962 : : else
3963 : 10981 : havedata = false;
3964 : : }
3965 [ + + ]: 24392 : if (havedata)
3966 : : {
3967 : : /*
3968 : : * Great, streamed far enough. Open the file if it's
3969 : : * not open already. Also read the timeline history
3970 : : * file if we haven't initialized timeline history
3971 : : * yet; it should be streamed over and present in
3972 : : * pg_wal by now. Use XLOG_FROM_STREAM so that source
3973 : : * info is set correctly and XLogReceiptTime isn't
3974 : : * changed.
3975 : : *
3976 : : * NB: We must set readTimeLineHistory based on
3977 : : * recoveryTargetTLI, not receiveTLI. Normally they'll
3978 : : * be the same, but if recovery_target_timeline is
3979 : : * 'latest' and archiving is configured, then it's
3980 : : * possible that we managed to retrieve one or more
3981 : : * new timeline history files from the archive,
3982 : : * updating recoveryTargetTLI.
3983 : : */
3984 [ + + ]: 13411 : if (readFile < 0)
3985 : : {
3986 [ - + ]: 1052 : if (!expectedTLEs)
1401 heikki.linnakangas@i 3987 :UBC 0 : expectedTLEs = readTimeLineHistory(recoveryTargetTLI);
464 michael@paquier.xyz 3988 :CBC 1052 : readFile = XLogFileRead(readSegNo, receiveTLI,
3989 : : XLOG_FROM_STREAM, false);
1401 heikki.linnakangas@i 3990 [ - + ]: 1052 : Assert(readFile >= 0);
3991 : : }
3992 : : else
3993 : : {
3994 : : /* just make sure source info is correct... */
3995 : 12359 : readSource = XLOG_FROM_STREAM;
3996 : 12359 : XLogReceiptSource = XLOG_FROM_STREAM;
1351 tmunro@postgresql.or 3997 : 12359 : return XLREAD_SUCCESS;
3998 : : }
1401 heikki.linnakangas@i 3999 : 1052 : break;
4000 : : }
4001 : :
4002 : : /* In nonblocking mode, return rather than sleeping. */
1351 tmunro@postgresql.or 4003 [ + + ]: 10981 : if (nonblocking)
4004 : 260 : return XLREAD_WOULDBLOCK;
4005 : :
4006 : : /*
4007 : : * Data not here yet. Check for trigger, then wait for
4008 : : * walreceiver to wake us up when new WAL arrives.
4009 : : */
1401 heikki.linnakangas@i 4010 [ + + ]: 10721 : if (CheckForStandbyTrigger())
4011 : : {
4012 : : /*
4013 : : * Note that we don't return XLREAD_FAIL immediately
4014 : : * here. After being triggered, we still want to
4015 : : * replay all the WAL that was already streamed. It's
4016 : : * in pg_wal now, so we just treat this as a failure,
4017 : : * and the state machine will move on to replay the
4018 : : * streamed WAL from pg_wal, and then recheck the
4019 : : * trigger and exit replay.
4020 : : */
4021 : 29 : lastSourceFailed = true;
4022 : 29 : break;
4023 : : }
4024 : :
4025 : : /*
4026 : : * Since we have replayed everything we have received so
4027 : : * far and are about to start waiting for more WAL, let's
4028 : : * tell the upstream server our replay location now so
4029 : : * that pg_stat_replication doesn't show stale
4030 : : * information.
4031 : : */
4032 [ + + ]: 10692 : if (!streaming_reply_sent)
4033 : : {
4034 : 9129 : WalRcvForceReply();
4035 : 9129 : streaming_reply_sent = true;
4036 : : }
4037 : :
4038 : : /* Do any background tasks that might benefit us later. */
1115 tgl@sss.pgh.pa.us 4039 : 10692 : KnownAssignedTransactionIdsIdleMaintenance();
4040 : :
4041 : : /* Update pg_stat_recovery_prefetch before sleeping. */
1351 tmunro@postgresql.or 4042 : 10692 : XLogPrefetcherComputeStats(xlogprefetcher);
4043 : :
4044 : : /*
4045 : : * Wait for more WAL to arrive, when we will be woken
4046 : : * immediately by the WAL receiver.
4047 : : */
1401 heikki.linnakangas@i 4048 : 10692 : (void) WaitLatch(&XLogRecoveryCtl->recoveryWakeupLatch,
4049 : : WL_LATCH_SET | WL_EXIT_ON_PM_DEATH,
4050 : : -1L,
4051 : : WAIT_EVENT_RECOVERY_WAL_STREAM);
4052 : 10692 : ResetLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
4053 : 10692 : break;
4054 : : }
4055 : :
1401 heikki.linnakangas@i 4056 :UBC 0 : default:
4057 [ # # ]: 0 : elog(ERROR, "unexpected WAL source %d", currentSource);
4058 : : }
4059 : :
4060 : : /*
4061 : : * Check for recovery pause here so that we can confirm more quickly
4062 : : * that a requested pause has actually taken effect.
4063 : : */
1401 heikki.linnakangas@i 4064 [ + + ]:CBC 12042 : if (((volatile XLogRecoveryCtlData *) XLogRecoveryCtl)->recoveryPauseState !=
4065 : : RECOVERY_NOT_PAUSED)
4066 : 2 : recoveryPausesHere(false);
4067 : :
4068 : : /*
4069 : : * This possibly-long loop needs to handle interrupts of startup
4070 : : * process.
4071 : : */
288 4072 : 12042 : ProcessStartupProcInterrupts();
4073 : : }
4074 : :
4075 : : return XLREAD_FAIL; /* not reached */
4076 : : }
4077 : :
4078 : :
4079 : : /*
4080 : : * Determine what log level should be used to report a corrupt WAL record
4081 : : * in the current WAL page, previously read by XLogPageRead().
4082 : : *
4083 : : * 'emode' is the error mode that would be used to report a file-not-found
4084 : : * or legitimate end-of-WAL situation. Generally, we use it as-is, but if
4085 : : * we're retrying the exact same record that we've tried previously, only
4086 : : * complain the first time to keep the noise down. However, we only do when
4087 : : * reading from pg_wal, because we don't expect any invalid records in archive
4088 : : * or in records streamed from the primary. Files in the archive should be complete,
4089 : : * and we should never hit the end of WAL because we stop and wait for more WAL
4090 : : * to arrive before replaying it.
4091 : : *
4092 : : * NOTE: This function remembers the RecPtr value it was last called with,
4093 : : * to suppress repeated messages about the same record. Only call this when
4094 : : * you are about to ereport(), or you might cause a later message to be
4095 : : * erroneously suppressed.
4096 : : */
4097 : : static int
1401 4098 : 228 : emode_for_corrupt_record(int emode, XLogRecPtr RecPtr)
4099 : : {
4100 : : static XLogRecPtr lastComplaint = 0;
4101 : :
4102 [ + + + - ]: 228 : if (readSource == XLOG_FROM_PG_WAL && emode == LOG)
4103 : : {
4104 [ + + ]: 225 : if (RecPtr == lastComplaint)
4105 : 41 : emode = DEBUG1;
4106 : : else
4107 : 184 : lastComplaint = RecPtr;
4108 : : }
4109 : 228 : return emode;
4110 : : }
4111 : :
4112 : :
4113 : : /*
4114 : : * Subroutine to try to fetch and validate a prior checkpoint record.
4115 : : */
4116 : : static XLogRecord *
1351 tmunro@postgresql.or 4117 : 927 : ReadCheckpointRecord(XLogPrefetcher *xlogprefetcher, XLogRecPtr RecPtr,
4118 : : TimeLineID replayTLI)
4119 : : {
4120 : : XLogRecord *record;
4121 : : uint8 info;
4122 : :
1401 heikki.linnakangas@i 4123 [ - + ]: 927 : Assert(xlogreader != NULL);
4124 : :
4125 [ - + ]: 927 : if (!XRecOffIsValid(RecPtr))
4126 : : {
1247 fujii@postgresql.org 4127 [ # # ]:UBC 0 : ereport(LOG,
4128 : : (errmsg("invalid checkpoint location")));
1401 heikki.linnakangas@i 4129 : 0 : return NULL;
4130 : : }
4131 : :
1351 tmunro@postgresql.or 4132 :CBC 927 : XLogPrefetcherBeginRead(xlogprefetcher, RecPtr);
4133 : 927 : record = ReadRecord(xlogprefetcher, LOG, true, replayTLI);
4134 : :
1401 heikki.linnakangas@i 4135 [ - + ]: 927 : if (record == NULL)
4136 : : {
1247 fujii@postgresql.org 4137 [ # # ]:UBC 0 : ereport(LOG,
4138 : : (errmsg("invalid checkpoint record")));
1401 heikki.linnakangas@i 4139 : 0 : return NULL;
4140 : : }
1401 heikki.linnakangas@i 4141 [ - + ]:CBC 927 : if (record->xl_rmid != RM_XLOG_ID)
4142 : : {
1247 fujii@postgresql.org 4143 [ # # ]:UBC 0 : ereport(LOG,
4144 : : (errmsg("invalid resource manager ID in checkpoint record")));
1401 heikki.linnakangas@i 4145 : 0 : return NULL;
4146 : : }
1401 heikki.linnakangas@i 4147 :CBC 927 : info = record->xl_info & ~XLR_INFO_MASK;
4148 [ + + - + ]: 927 : if (info != XLOG_CHECKPOINT_SHUTDOWN &&
4149 : : info != XLOG_CHECKPOINT_ONLINE)
4150 : : {
1247 fujii@postgresql.org 4151 [ # # ]:UBC 0 : ereport(LOG,
4152 : : (errmsg("invalid xl_info in checkpoint record")));
1401 heikki.linnakangas@i 4153 : 0 : return NULL;
4154 : : }
1401 heikki.linnakangas@i 4155 [ - + ]:CBC 927 : if (record->xl_tot_len != SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint))
4156 : : {
1247 fujii@postgresql.org 4157 [ # # ]:UBC 0 : ereport(LOG,
4158 : : (errmsg("invalid length of checkpoint record")));
1401 heikki.linnakangas@i 4159 : 0 : return NULL;
4160 : : }
1401 heikki.linnakangas@i 4161 :CBC 927 : return record;
4162 : : }
4163 : :
4164 : : /*
4165 : : * Scan for new timelines that might have appeared in the archive since we
4166 : : * started recovery.
4167 : : *
4168 : : * If there are any, the function changes recovery target TLI to the latest
4169 : : * one and returns 'true'.
4170 : : */
4171 : : static bool
4172 : 137 : rescanLatestTimeLine(TimeLineID replayTLI, XLogRecPtr replayLSN)
4173 : : {
4174 : : List *newExpectedTLEs;
4175 : : bool found;
4176 : : ListCell *cell;
4177 : : TimeLineID newtarget;
4178 : 137 : TimeLineID oldtarget = recoveryTargetTLI;
4179 : 137 : TimeLineHistoryEntry *currentTle = NULL;
4180 : :
4181 : 137 : newtarget = findNewestTimeLine(recoveryTargetTLI);
4182 [ + + ]: 137 : if (newtarget == recoveryTargetTLI)
4183 : : {
4184 : : /* No new timelines found */
4185 : 131 : return false;
4186 : : }
4187 : :
4188 : : /*
4189 : : * Determine the list of expected TLIs for the new TLI
4190 : : */
4191 : :
4192 : 6 : newExpectedTLEs = readTimeLineHistory(newtarget);
4193 : :
4194 : : /*
4195 : : * If the current timeline is not part of the history of the new timeline,
4196 : : * we cannot proceed to it.
4197 : : */
4198 : 6 : found = false;
4199 [ + - + - : 12 : foreach(cell, newExpectedTLEs)
+ - ]
4200 : : {
4201 : 12 : currentTle = (TimeLineHistoryEntry *) lfirst(cell);
4202 : :
4203 [ + + ]: 12 : if (currentTle->tli == recoveryTargetTLI)
4204 : : {
4205 : 6 : found = true;
4206 : 6 : break;
4207 : : }
4208 : : }
4209 [ - + ]: 6 : if (!found)
4210 : : {
1401 heikki.linnakangas@i 4211 [ # # ]:UBC 0 : ereport(LOG,
4212 : : (errmsg("new timeline %u is not a child of database system timeline %u",
4213 : : newtarget,
4214 : : replayTLI)));
4215 : 0 : return false;
4216 : : }
4217 : :
4218 : : /*
4219 : : * The current timeline was found in the history file, but check that the
4220 : : * next timeline was forked off from it *after* the current recovery
4221 : : * location.
4222 : : */
1401 heikki.linnakangas@i 4223 [ - + ]:CBC 6 : if (currentTle->end < replayLSN)
4224 : : {
1401 heikki.linnakangas@i 4225 [ # # ]:UBC 0 : ereport(LOG,
4226 : : errmsg("new timeline %u forked off current database system timeline %u before current recovery point %X/%08X",
4227 : : newtarget,
4228 : : replayTLI,
4229 : : LSN_FORMAT_ARGS(replayLSN)));
4230 : 0 : return false;
4231 : : }
4232 : :
4233 : : /* The new timeline history seems valid. Switch target */
1401 heikki.linnakangas@i 4234 :CBC 6 : recoveryTargetTLI = newtarget;
4235 : 6 : list_free_deep(expectedTLEs);
4236 : 6 : expectedTLEs = newExpectedTLEs;
4237 : :
4238 : : /*
4239 : : * As in StartupXLOG(), try to ensure we have all the history files
4240 : : * between the old target and new target in pg_wal.
4241 : : */
4242 : 6 : restoreTimeLineHistoryFiles(oldtarget + 1, newtarget);
4243 : :
4244 [ + - ]: 6 : ereport(LOG,
4245 : : (errmsg("new target timeline is %u",
4246 : : recoveryTargetTLI)));
4247 : :
4248 : 6 : return true;
4249 : : }
4250 : :
4251 : :
4252 : : /*
4253 : : * Open a logfile segment for reading (during recovery).
4254 : : *
4255 : : * If source == XLOG_FROM_ARCHIVE, the segment is retrieved from archive.
4256 : : * Otherwise, it's assumed to be already available in pg_wal.
4257 : : */
4258 : : static int
464 michael@paquier.xyz 4259 : 3151 : XLogFileRead(XLogSegNo segno, TimeLineID tli,
4260 : : XLogSource source, bool notfoundOk)
4261 : : {
4262 : : char xlogfname[MAXFNAMELEN];
4263 : : char activitymsg[MAXFNAMELEN + 16];
4264 : : char path[MAXPGPATH];
4265 : : int fd;
4266 : :
1401 heikki.linnakangas@i 4267 : 3151 : XLogFileName(xlogfname, tli, segno, wal_segment_size);
4268 : :
4269 [ + + - ]: 3151 : switch (source)
4270 : : {
4271 : 764 : case XLOG_FROM_ARCHIVE:
4272 : : /* Report recovery progress in PS display */
4273 : 764 : snprintf(activitymsg, sizeof(activitymsg), "waiting for %s",
4274 : : xlogfname);
4275 : 764 : set_ps_display(activitymsg);
4276 : :
4277 [ + + ]: 764 : if (!RestoreArchivedFile(path, xlogfname,
4278 : : "RECOVERYXLOG",
4279 : : wal_segment_size,
4280 : : InRedo))
4281 : 407 : return -1;
4282 : 357 : break;
4283 : :
4284 : 2387 : case XLOG_FROM_PG_WAL:
4285 : : case XLOG_FROM_STREAM:
4286 : 2387 : XLogFilePath(path, tli, segno, wal_segment_size);
4287 : 2387 : break;
4288 : :
1401 heikki.linnakangas@i 4289 :UBC 0 : default:
4290 [ # # ]: 0 : elog(ERROR, "invalid XLogFileRead source %d", source);
4291 : : }
4292 : :
4293 : : /*
4294 : : * If the segment was fetched from archival storage, replace the existing
4295 : : * xlog segment (if any) with the archival version.
4296 : : */
1401 heikki.linnakangas@i 4297 [ + + ]:CBC 2744 : if (source == XLOG_FROM_ARCHIVE)
4298 : : {
4299 [ - + ]: 357 : Assert(!IsInstallXLogFileSegmentActive());
4300 : 357 : KeepFileRestoredFromArchive(path, xlogfname);
4301 : :
4302 : : /*
4303 : : * Set path to point at the new file in pg_wal.
4304 : : */
4305 : 357 : snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname);
4306 : : }
4307 : :
4308 : 2744 : fd = BasicOpenFile(path, O_RDONLY | PG_BINARY);
4309 [ + + ]: 2744 : if (fd >= 0)
4310 : : {
4311 : : /* Success! */
4312 : 2569 : curFileTLI = tli;
4313 : :
4314 : : /* Report recovery progress in PS display */
4315 : 2569 : snprintf(activitymsg, sizeof(activitymsg), "recovering %s",
4316 : : xlogfname);
4317 : 2569 : set_ps_display(activitymsg);
4318 : :
4319 : : /* Track source of data in assorted state variables */
4320 : 2569 : readSource = source;
4321 : 2569 : XLogReceiptSource = source;
4322 : : /* In FROM_STREAM case, caller tracks receipt time, not me */
4323 [ + + ]: 2569 : if (source != XLOG_FROM_STREAM)
4324 : 1517 : XLogReceiptTime = GetCurrentTimestamp();
4325 : :
4326 : 2569 : return fd;
4327 : : }
4328 [ + - - + ]: 175 : if (errno != ENOENT || !notfoundOk) /* unexpected failure? */
1401 heikki.linnakangas@i 4329 [ # # ]:UBC 0 : ereport(PANIC,
4330 : : (errcode_for_file_access(),
4331 : : errmsg("could not open file \"%s\": %m", path)));
1401 heikki.linnakangas@i 4332 :CBC 175 : return -1;
4333 : : }
4334 : :
4335 : : /*
4336 : : * Open a logfile segment for reading (during recovery).
4337 : : *
4338 : : * This version searches for the segment with any TLI listed in expectedTLEs.
4339 : : */
4340 : : static int
464 michael@paquier.xyz 4341 : 1682 : XLogFileReadAnyTLI(XLogSegNo segno, XLogSource source)
4342 : : {
4343 : : char path[MAXPGPATH];
4344 : : ListCell *cell;
4345 : : int fd;
4346 : : List *tles;
4347 : :
4348 : : /*
4349 : : * Loop looking for a suitable timeline ID: we might need to read any of
4350 : : * the timelines listed in expectedTLEs.
4351 : : *
4352 : : * We expect curFileTLI on entry to be the TLI of the preceding file in
4353 : : * sequence, or 0 if there was no predecessor. We do not allow curFileTLI
4354 : : * to go backwards; this prevents us from picking up the wrong file when a
4355 : : * parent timeline extends to higher segment numbers than the child we
4356 : : * want to read.
4357 : : *
4358 : : * If we haven't read the timeline history file yet, read it now, so that
4359 : : * we know which TLIs to scan. We don't save the list in expectedTLEs,
4360 : : * however, unless we actually find a valid segment. That way if there is
4361 : : * neither a timeline history file nor a WAL segment in the archive, and
4362 : : * streaming replication is set up, we'll read the timeline history file
4363 : : * streamed from the primary when we start streaming, instead of
4364 : : * recovering with a dummy history generated here.
4365 : : */
1401 heikki.linnakangas@i 4366 [ + + ]: 1682 : if (expectedTLEs)
4367 : 755 : tles = expectedTLEs;
4368 : : else
4369 : 927 : tles = readTimeLineHistory(recoveryTargetTLI);
4370 : :
4371 [ + - + + : 1863 : foreach(cell, tles)
+ + ]
4372 : : {
4373 : 1702 : TimeLineHistoryEntry *hent = (TimeLineHistoryEntry *) lfirst(cell);
4374 : 1702 : TimeLineID tli = hent->tli;
4375 : :
4376 [ + + ]: 1702 : if (tli < curFileTLI)
4377 : 4 : break; /* don't bother looking at too-old TLIs */
4378 : :
4379 : : /*
4380 : : * Skip scanning the timeline ID that the logfile segment to read
4381 : : * doesn't belong to
4382 : : */
42 alvherre@kurilemu.de 4383 [ + + ]:GNC 1698 : if (XLogRecPtrIsValid(hent->begin))
4384 : : {
1401 heikki.linnakangas@i 4385 :CBC 69 : XLogSegNo beginseg = 0;
4386 : :
4387 : 69 : XLByteToSeg(hent->begin, beginseg, wal_segment_size);
4388 : :
4389 : : /*
4390 : : * The logfile segment that doesn't belong to the timeline is
4391 : : * older or newer than the segment that the timeline started or
4392 : : * ended at, respectively. It's sufficient to check only the
4393 : : * starting segment of the timeline here. Since the timelines are
4394 : : * scanned in descending order in this loop, any segments newer
4395 : : * than the ending segment should belong to newer timeline and
4396 : : * have already been read before. So it's not necessary to check
4397 : : * the ending segment of the timeline here.
4398 : : */
4399 [ + + ]: 69 : if (segno < beginseg)
4400 : 6 : continue;
4401 : : }
4402 : :
4403 [ + + - + ]: 1692 : if (source == XLOG_FROM_ANY || source == XLOG_FROM_ARCHIVE)
4404 : : {
464 michael@paquier.xyz 4405 : 764 : fd = XLogFileRead(segno, tli, XLOG_FROM_ARCHIVE, true);
1401 heikki.linnakangas@i 4406 [ + + ]: 764 : if (fd != -1)
4407 : : {
4408 [ - + ]: 357 : elog(DEBUG1, "got WAL segment from archive");
4409 [ + + ]: 357 : if (!expectedTLEs)
4410 : 14 : expectedTLEs = tles;
4411 : 1517 : return fd;
4412 : : }
4413 : : }
4414 : :
4415 [ + + + - ]: 1335 : if (source == XLOG_FROM_ANY || source == XLOG_FROM_PG_WAL)
4416 : : {
464 michael@paquier.xyz 4417 : 1335 : fd = XLogFileRead(segno, tli, XLOG_FROM_PG_WAL, true);
1401 heikki.linnakangas@i 4418 [ + + ]: 1335 : if (fd != -1)
4419 : : {
4420 [ + + ]: 1160 : if (!expectedTLEs)
4421 : 913 : expectedTLEs = tles;
4422 : 1160 : return fd;
4423 : : }
4424 : : }
4425 : : }
4426 : :
4427 : : /* Couldn't find it. For simplicity, complain about front timeline */
4428 : 165 : XLogFilePath(path, recoveryTargetTLI, segno, wal_segment_size);
4429 : 165 : errno = ENOENT;
464 michael@paquier.xyz 4430 [ + + ]: 165 : ereport(DEBUG2,
4431 : : (errcode_for_file_access(),
4432 : : errmsg("could not open file \"%s\": %m", path)));
1401 heikki.linnakangas@i 4433 : 165 : return -1;
4434 : : }
4435 : :
4436 : : /*
4437 : : * Set flag to signal the walreceiver to restart. (The startup process calls
4438 : : * this on noticing a relevant configuration change.)
4439 : : */
4440 : : void
4441 : 5 : StartupRequestWalReceiverRestart(void)
4442 : : {
4443 [ + - + + ]: 5 : if (currentSource == XLOG_FROM_STREAM && WalRcvRunning())
4444 : : {
4445 [ + - ]: 4 : ereport(LOG,
4446 : : (errmsg("WAL receiver process shutdown requested")));
4447 : :
4448 : 4 : pendingWalRcvRestart = true;
4449 : : }
4450 : 5 : }
4451 : :
4452 : :
4453 : : /*
4454 : : * Has a standby promotion already been triggered?
4455 : : *
4456 : : * Unlike CheckForStandbyTrigger(), this works in any process
4457 : : * that's connected to shared memory.
4458 : : */
4459 : : bool
4460 : 60 : PromoteIsTriggered(void)
4461 : : {
4462 : : /*
4463 : : * We check shared state each time only until a standby promotion is
4464 : : * triggered. We can't trigger a promotion again, so there's no need to
4465 : : * keep checking after the shared variable has once been seen true.
4466 : : */
4467 [ + + ]: 60 : if (LocalPromoteIsTriggered)
4468 : 43 : return true;
4469 : :
4470 [ - + ]: 17 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
4471 : 17 : LocalPromoteIsTriggered = XLogRecoveryCtl->SharedPromoteIsTriggered;
4472 : 17 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
4473 : :
4474 : 17 : return LocalPromoteIsTriggered;
4475 : : }
4476 : :
4477 : : static void
4478 : 42 : SetPromoteIsTriggered(void)
4479 : : {
4480 [ - + ]: 42 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
4481 : 42 : XLogRecoveryCtl->SharedPromoteIsTriggered = true;
4482 : 42 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
4483 : :
4484 : : /*
4485 : : * Mark the recovery pause state as 'not paused' because the paused state
4486 : : * ends and promotion continues if a promotion is triggered while recovery
4487 : : * is paused. Otherwise pg_get_wal_replay_pause_state() can mistakenly
4488 : : * return 'paused' while a promotion is ongoing.
4489 : : */
4490 : 42 : SetRecoveryPause(false);
4491 : :
4492 : 42 : LocalPromoteIsTriggered = true;
4493 : 42 : }
4494 : :
4495 : : /*
4496 : : * Check whether a promote request has arrived.
4497 : : */
4498 : : static bool
4499 : 11082 : CheckForStandbyTrigger(void)
4500 : : {
4501 [ + + ]: 11082 : if (LocalPromoteIsTriggered)
4502 : 48 : return true;
4503 : :
4504 [ + + + - ]: 11034 : if (IsPromoteSignaled() && CheckPromoteSignal())
4505 : : {
4506 [ + - ]: 42 : ereport(LOG, (errmsg("received promote request")));
4507 : 42 : RemovePromoteSignalFiles();
4508 : 42 : ResetPromoteSignaled();
4509 : 42 : SetPromoteIsTriggered();
4510 : 42 : return true;
4511 : : }
4512 : :
4513 : 10992 : return false;
4514 : : }
4515 : :
4516 : : /*
4517 : : * Remove the files signaling a standby promotion request.
4518 : : */
4519 : : void
4520 : 886 : RemovePromoteSignalFiles(void)
4521 : : {
4522 : 886 : unlink(PROMOTE_SIGNAL_FILE);
4523 : 886 : }
4524 : :
4525 : : /*
4526 : : * Check to see if a promote request has arrived.
4527 : : */
4528 : : bool
4529 : 625 : CheckPromoteSignal(void)
4530 : : {
4531 : : struct stat stat_buf;
4532 : :
4533 [ + + ]: 625 : if (stat(PROMOTE_SIGNAL_FILE, &stat_buf) == 0)
4534 : 84 : return true;
4535 : :
4536 : 541 : return false;
4537 : : }
4538 : :
4539 : : /*
4540 : : * Wake up startup process to replay newly arrived WAL, or to notice that
4541 : : * failover has been requested.
4542 : : */
4543 : : void
4544 : 39027 : WakeupRecovery(void)
4545 : : {
4546 : 39027 : SetLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
4547 : 39027 : }
4548 : :
4549 : : /*
4550 : : * Schedule a walreceiver wakeup in the main recovery loop.
4551 : : */
4552 : : void
4553 : 2 : XLogRequestWalReceiverReply(void)
4554 : : {
4555 : 2 : doRequestWalReceiverReply = true;
4556 : 2 : }
4557 : :
4558 : : /*
4559 : : * Is HotStandby active yet? This is only important in special backends
4560 : : * since normal backends won't ever be able to connect until this returns
4561 : : * true. Postmaster knows this by way of signal, not via shared memory.
4562 : : *
4563 : : * Unlike testing standbyState, this works in any process that's connected to
4564 : : * shared memory. (And note that standbyState alone doesn't tell the truth
4565 : : * anyway.)
4566 : : */
4567 : : bool
4568 : 139 : HotStandbyActive(void)
4569 : : {
4570 : : /*
4571 : : * We check shared state each time only until Hot Standby is active. We
4572 : : * can't de-activate Hot Standby, so there's no need to keep checking
4573 : : * after the shared variable has once been seen true.
4574 : : */
4575 [ + + ]: 139 : if (LocalHotStandbyActive)
4576 : 15 : return true;
4577 : : else
4578 : : {
4579 : : /* spinlock is essential on machines with weak memory ordering! */
4580 [ - + ]: 124 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
4581 : 124 : LocalHotStandbyActive = XLogRecoveryCtl->SharedHotStandbyActive;
4582 : 124 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
4583 : :
4584 : 124 : return LocalHotStandbyActive;
4585 : : }
4586 : : }
4587 : :
4588 : : /*
4589 : : * Like HotStandbyActive(), but to be used only in WAL replay code,
4590 : : * where we don't need to ask any other process what the state is.
4591 : : */
4592 : : static bool
1401 heikki.linnakangas@i 4593 :UBC 0 : HotStandbyActiveInReplay(void)
4594 : : {
4595 [ # # # # ]: 0 : Assert(AmStartupProcess() || !IsPostmasterEnvironment);
4596 : 0 : return LocalHotStandbyActive;
4597 : : }
4598 : :
4599 : : /*
4600 : : * Get latest redo apply position.
4601 : : *
4602 : : * Exported to allow WALReceiver to read the pointer directly.
4603 : : */
4604 : : XLogRecPtr
1401 heikki.linnakangas@i 4605 :CBC 94815 : GetXLogReplayRecPtr(TimeLineID *replayTLI)
4606 : : {
4607 : : XLogRecPtr recptr;
4608 : : TimeLineID tli;
4609 : :
4610 [ + + ]: 94815 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
4611 : 94815 : recptr = XLogRecoveryCtl->lastReplayedEndRecPtr;
4612 : 94815 : tli = XLogRecoveryCtl->lastReplayedTLI;
4613 : 94815 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
4614 : :
4615 [ + + ]: 94815 : if (replayTLI)
4616 : 1730 : *replayTLI = tli;
4617 : 94815 : return recptr;
4618 : : }
4619 : :
4620 : :
4621 : : /*
4622 : : * Get position of last applied, or the record being applied.
4623 : : *
4624 : : * This is different from GetXLogReplayRecPtr() in that if a WAL
4625 : : * record is currently being applied, this includes that record.
4626 : : */
4627 : : XLogRecPtr
4628 : 6349 : GetCurrentReplayRecPtr(TimeLineID *replayEndTLI)
4629 : : {
4630 : : XLogRecPtr recptr;
4631 : : TimeLineID tli;
4632 : :
4633 [ + + ]: 6349 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
4634 : 6349 : recptr = XLogRecoveryCtl->replayEndRecPtr;
4635 : 6349 : tli = XLogRecoveryCtl->replayEndTLI;
4636 : 6349 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
4637 : :
4638 [ + - ]: 6349 : if (replayEndTLI)
4639 : 6349 : *replayEndTLI = tli;
4640 : 6349 : return recptr;
4641 : : }
4642 : :
4643 : : /*
4644 : : * Save timestamp of latest processed commit/abort record.
4645 : : *
4646 : : * We keep this in XLogRecoveryCtl, not a simple static variable, so that it can be
4647 : : * seen by processes other than the startup process. Note in particular
4648 : : * that CreateRestartPoint is executed in the checkpointer.
4649 : : */
4650 : : static void
4651 : 22086 : SetLatestXTime(TimestampTz xtime)
4652 : : {
4653 [ - + ]: 22086 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
4654 : 22086 : XLogRecoveryCtl->recoveryLastXTime = xtime;
4655 : 22086 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
4656 : 22086 : }
4657 : :
4658 : : /*
4659 : : * Fetch timestamp of latest processed commit/abort record.
4660 : : */
4661 : : TimestampTz
4662 : 344 : GetLatestXTime(void)
4663 : : {
4664 : : TimestampTz xtime;
4665 : :
4666 [ + + ]: 344 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
4667 : 344 : xtime = XLogRecoveryCtl->recoveryLastXTime;
4668 : 344 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
4669 : :
4670 : 344 : return xtime;
4671 : : }
4672 : :
4673 : : /*
4674 : : * Save timestamp of the next chunk of WAL records to apply.
4675 : : *
4676 : : * We keep this in XLogRecoveryCtl, not a simple static variable, so that it can be
4677 : : * seen by all backends.
4678 : : */
4679 : : static void
4680 : 8527 : SetCurrentChunkStartTime(TimestampTz xtime)
4681 : : {
4682 [ - + ]: 8527 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
4683 : 8527 : XLogRecoveryCtl->currentChunkStartTime = xtime;
4684 : 8527 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
4685 : 8527 : }
4686 : :
4687 : : /*
4688 : : * Fetch timestamp of latest processed commit/abort record.
4689 : : * Startup process maintains an accurate local copy in XLogReceiptTime
4690 : : */
4691 : : TimestampTz
4692 : 257 : GetCurrentChunkReplayStartTime(void)
4693 : : {
4694 : : TimestampTz xtime;
4695 : :
4696 [ - + ]: 257 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
4697 : 257 : xtime = XLogRecoveryCtl->currentChunkStartTime;
4698 : 257 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
4699 : :
4700 : 257 : return xtime;
4701 : : }
4702 : :
4703 : : /*
4704 : : * Returns time of receipt of current chunk of XLOG data, as well as
4705 : : * whether it was received from streaming replication or from archives.
4706 : : */
4707 : : void
4708 : 27 : GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream)
4709 : : {
4710 : : /*
4711 : : * This must be executed in the startup process, since we don't export the
4712 : : * relevant state to shared memory.
4713 : : */
4714 [ - + ]: 27 : Assert(InRecovery);
4715 : :
4716 : 27 : *rtime = XLogReceiptTime;
4717 : 27 : *fromStream = (XLogReceiptSource == XLOG_FROM_STREAM);
4718 : 27 : }
4719 : :
4720 : : /*
4721 : : * Note that text field supplied is a parameter name and does not require
4722 : : * translation
4723 : : */
4724 : : void
4725 : 590 : RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue)
4726 : : {
4727 [ - + ]: 590 : if (currValue < minValue)
4728 : : {
1401 heikki.linnakangas@i 4729 [ # # ]:UBC 0 : if (HotStandbyActiveInReplay())
4730 : : {
4731 : 0 : bool warned_for_promote = false;
4732 : :
4733 [ # # ]: 0 : ereport(WARNING,
4734 : : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4735 : : errmsg("hot standby is not possible because of insufficient parameter settings"),
4736 : : errdetail("%s = %d is a lower setting than on the primary server, where its value was %d.",
4737 : : param_name,
4738 : : currValue,
4739 : : minValue)));
4740 : :
4741 : 0 : SetRecoveryPause(true);
4742 : :
4743 [ # # ]: 0 : ereport(LOG,
4744 : : (errmsg("recovery has paused"),
4745 : : errdetail("If recovery is unpaused, the server will shut down."),
4746 : : errhint("You can then restart the server after making the necessary configuration changes.")));
4747 : :
4748 [ # # ]: 0 : while (GetRecoveryPauseState() != RECOVERY_NOT_PAUSED)
4749 : : {
288 4750 : 0 : ProcessStartupProcInterrupts();
4751 : :
1401 4752 [ # # ]: 0 : if (CheckForStandbyTrigger())
4753 : : {
4754 [ # # ]: 0 : if (!warned_for_promote)
4755 [ # # ]: 0 : ereport(WARNING,
4756 : : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4757 : : errmsg("promotion is not possible because of insufficient parameter settings"),
4758 : :
4759 : : /*
4760 : : * Repeat the detail from above so it's easy to find
4761 : : * in the log.
4762 : : */
4763 : : errdetail("%s = %d is a lower setting than on the primary server, where its value was %d.",
4764 : : param_name,
4765 : : currValue,
4766 : : minValue),
4767 : : errhint("Restart the server after making the necessary configuration changes.")));
4768 : 0 : warned_for_promote = true;
4769 : : }
4770 : :
4771 : : /*
4772 : : * If recovery pause is requested then set it paused. While
4773 : : * we are in the loop, user might resume and pause again so
4774 : : * set this every time.
4775 : : */
4776 : 0 : ConfirmRecoveryPaused();
4777 : :
4778 : : /*
4779 : : * We wait on a condition variable that will wake us as soon
4780 : : * as the pause ends, but we use a timeout so we can check the
4781 : : * above conditions periodically too.
4782 : : */
4783 : 0 : ConditionVariableTimedSleep(&XLogRecoveryCtl->recoveryNotPausedCV, 1000,
4784 : : WAIT_EVENT_RECOVERY_PAUSE);
4785 : : }
4786 : 0 : ConditionVariableCancelSleep();
4787 : : }
4788 : :
4789 [ # # ]: 0 : ereport(FATAL,
4790 : : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4791 : : errmsg("recovery aborted because of insufficient parameter settings"),
4792 : : /* Repeat the detail from above so it's easy to find in the log. */
4793 : : errdetail("%s = %d is a lower setting than on the primary server, where its value was %d.",
4794 : : param_name,
4795 : : currValue,
4796 : : minValue),
4797 : : errhint("You can restart the server after making the necessary configuration changes.")));
4798 : : }
1401 heikki.linnakangas@i 4799 :CBC 590 : }
4800 : :
4801 : :
4802 : : /*
4803 : : * GUC check_hook for primary_slot_name
4804 : : */
4805 : : bool
1192 tgl@sss.pgh.pa.us 4806 : 1242 : check_primary_slot_name(char **newval, void **extra, GucSource source)
4807 : : {
4808 : : int err_code;
57 fujii@postgresql.org 4809 : 1242 : char *err_msg = NULL;
4810 : 1242 : char *err_hint = NULL;
4811 : :
1192 tgl@sss.pgh.pa.us 4812 [ + - + + ]: 1242 : if (*newval && strcmp(*newval, "") != 0 &&
57 fujii@postgresql.org 4813 [ - + ]:GNC 132 : !ReplicationSlotValidateNameInternal(*newval, false, &err_code,
4814 : : &err_msg, &err_hint))
4815 : : {
57 fujii@postgresql.org 4816 :UBC 0 : GUC_check_errcode(err_code);
4817 : 0 : GUC_check_errdetail("%s", err_msg);
4818 [ # # ]: 0 : if (err_hint != NULL)
4819 : 0 : GUC_check_errhint("%s", err_hint);
1192 tgl@sss.pgh.pa.us 4820 : 0 : return false;
4821 : : }
4822 : :
1192 tgl@sss.pgh.pa.us 4823 :CBC 1242 : return true;
4824 : : }
4825 : :
4826 : : /*
4827 : : * Recovery target settings: Only one of the several recovery_target* settings
4828 : : * may be set. Setting a second one results in an error. The global variable
4829 : : * recoveryTarget tracks which kind of recovery target was chosen. Other
4830 : : * variables store the actual target value (for example a string or a xid).
4831 : : * The assign functions of the parameters check whether a competing parameter
4832 : : * was already set. But we want to allow setting the same parameter multiple
4833 : : * times. We also want to allow unsetting a parameter and setting a different
4834 : : * one, so we unset recoveryTarget when the parameter is set to an empty
4835 : : * string.
4836 : : *
4837 : : * XXX this code is broken by design. Throwing an error from a GUC assign
4838 : : * hook breaks fundamental assumptions of guc.c. So long as all the variables
4839 : : * for which this can happen are PGC_POSTMASTER, the consequences are limited,
4840 : : * since we'd just abort postmaster startup anyway. Nonetheless it's likely
4841 : : * that we have odd behaviors such as unexpected GUC ordering dependencies.
4842 : : */
4843 : :
4844 : : pg_noreturn static void
4845 : 1 : error_multiple_recovery_targets(void)
4846 : : {
4847 [ + - ]: 1 : ereport(ERROR,
4848 : : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4849 : : errmsg("multiple recovery targets specified"),
4850 : : errdetail("At most one of \"recovery_target\", \"recovery_target_lsn\", \"recovery_target_name\", \"recovery_target_time\", \"recovery_target_xid\" may be set.")));
4851 : : }
4852 : :
4853 : : /*
4854 : : * GUC check_hook for recovery_target
4855 : : */
4856 : : bool
4857 : 1112 : check_recovery_target(char **newval, void **extra, GucSource source)
4858 : : {
4859 [ + + - + ]: 1112 : if (strcmp(*newval, "immediate") != 0 && strcmp(*newval, "") != 0)
4860 : : {
1192 tgl@sss.pgh.pa.us 4861 :UBC 0 : GUC_check_errdetail("The only allowed value is \"immediate\".");
4862 : 0 : return false;
4863 : : }
1192 tgl@sss.pgh.pa.us 4864 :CBC 1112 : return true;
4865 : : }
4866 : :
4867 : : /*
4868 : : * GUC assign_hook for recovery_target
4869 : : */
4870 : : void
4871 : 1112 : assign_recovery_target(const char *newval, void *extra)
4872 : : {
4873 [ - + ]: 1112 : if (recoveryTarget != RECOVERY_TARGET_UNSET &&
1192 tgl@sss.pgh.pa.us 4874 [ # # ]:UBC 0 : recoveryTarget != RECOVERY_TARGET_IMMEDIATE)
4875 : 0 : error_multiple_recovery_targets();
4876 : :
1192 tgl@sss.pgh.pa.us 4877 [ + - + + ]:CBC 1112 : if (newval && strcmp(newval, "") != 0)
4878 : 1 : recoveryTarget = RECOVERY_TARGET_IMMEDIATE;
4879 : : else
4880 : 1111 : recoveryTarget = RECOVERY_TARGET_UNSET;
4881 : 1112 : }
4882 : :
4883 : : /*
4884 : : * GUC check_hook for recovery_target_lsn
4885 : : */
4886 : : bool
4887 : 1118 : check_recovery_target_lsn(char **newval, void **extra, GucSource source)
4888 : : {
4889 [ + + ]: 1118 : if (strcmp(*newval, "") != 0)
4890 : : {
4891 : : XLogRecPtr lsn;
4892 : : XLogRecPtr *myextra;
104 michael@paquier.xyz 4893 :GNC 9 : ErrorSaveContext escontext = {T_ErrorSaveContext};
4894 : :
4895 : 9 : lsn = pg_lsn_in_safe(*newval, (Node *) &escontext);
4896 [ - + ]: 9 : if (escontext.error_occurred)
1192 tgl@sss.pgh.pa.us 4897 :UBC 0 : return false;
4898 : :
266 dgustafsson@postgres 4899 :CBC 9 : myextra = (XLogRecPtr *) guc_malloc(LOG, sizeof(XLogRecPtr));
4900 [ - + ]: 9 : if (!myextra)
266 dgustafsson@postgres 4901 :UBC 0 : return false;
1192 tgl@sss.pgh.pa.us 4902 :CBC 9 : *myextra = lsn;
385 peter@eisentraut.org 4903 : 9 : *extra = myextra;
4904 : : }
1192 tgl@sss.pgh.pa.us 4905 : 1118 : return true;
4906 : : }
4907 : :
4908 : : /*
4909 : : * GUC assign_hook for recovery_target_lsn
4910 : : */
4911 : : void
4912 : 1118 : assign_recovery_target_lsn(const char *newval, void *extra)
4913 : : {
4914 [ - + ]: 1118 : if (recoveryTarget != RECOVERY_TARGET_UNSET &&
1192 tgl@sss.pgh.pa.us 4915 [ # # ]:UBC 0 : recoveryTarget != RECOVERY_TARGET_LSN)
4916 : 0 : error_multiple_recovery_targets();
4917 : :
1192 tgl@sss.pgh.pa.us 4918 [ + - + + ]:CBC 1118 : if (newval && strcmp(newval, "") != 0)
4919 : : {
4920 : 9 : recoveryTarget = RECOVERY_TARGET_LSN;
4921 : 9 : recoveryTargetLSN = *((XLogRecPtr *) extra);
4922 : : }
4923 : : else
4924 : 1109 : recoveryTarget = RECOVERY_TARGET_UNSET;
4925 : 1118 : }
4926 : :
4927 : : /*
4928 : : * GUC check_hook for recovery_target_name
4929 : : */
4930 : : bool
4931 : 1118 : check_recovery_target_name(char **newval, void **extra, GucSource source)
4932 : : {
4933 : : /* Use the value of newval directly */
4934 [ - + ]: 1118 : if (strlen(*newval) >= MAXFNAMELEN)
4935 : : {
580 peter@eisentraut.org 4936 :UBC 0 : GUC_check_errdetail("\"%s\" is too long (maximum %d characters).",
4937 : : "recovery_target_name", MAXFNAMELEN - 1);
1192 tgl@sss.pgh.pa.us 4938 : 0 : return false;
4939 : : }
1192 tgl@sss.pgh.pa.us 4940 :CBC 1118 : return true;
4941 : : }
4942 : :
4943 : : /*
4944 : : * GUC assign_hook for recovery_target_name
4945 : : */
4946 : : void
4947 : 1118 : assign_recovery_target_name(const char *newval, void *extra)
4948 : : {
4949 [ - + ]: 1118 : if (recoveryTarget != RECOVERY_TARGET_UNSET &&
1192 tgl@sss.pgh.pa.us 4950 [ # # ]:UBC 0 : recoveryTarget != RECOVERY_TARGET_NAME)
4951 : 0 : error_multiple_recovery_targets();
4952 : :
1192 tgl@sss.pgh.pa.us 4953 [ + - + + ]:CBC 1118 : if (newval && strcmp(newval, "") != 0)
4954 : : {
4955 : 6 : recoveryTarget = RECOVERY_TARGET_NAME;
4956 : 6 : recoveryTargetName = newval;
4957 : : }
4958 : : else
4959 : 1112 : recoveryTarget = RECOVERY_TARGET_UNSET;
4960 : 1118 : }
4961 : :
4962 : : /*
4963 : : * GUC check_hook for recovery_target_time
4964 : : *
4965 : : * The interpretation of the recovery_target_time string can depend on the
4966 : : * time zone setting, so we need to wait until after all GUC processing is
4967 : : * done before we can do the final parsing of the string. This check function
4968 : : * only does a parsing pass to catch syntax errors, but we store the string
4969 : : * and parse it again when we need to use it.
4970 : : */
4971 : : bool
4972 : 1114 : check_recovery_target_time(char **newval, void **extra, GucSource source)
4973 : : {
4974 [ + + ]: 1114 : if (strcmp(*newval, "") != 0)
4975 : : {
4976 : : /* reject some special values */
4977 [ + - ]: 3 : if (strcmp(*newval, "now") == 0 ||
4978 [ + - ]: 3 : strcmp(*newval, "today") == 0 ||
4979 [ + - ]: 3 : strcmp(*newval, "tomorrow") == 0 ||
4980 [ - + ]: 3 : strcmp(*newval, "yesterday") == 0)
4981 : : {
1192 tgl@sss.pgh.pa.us 4982 :UBC 0 : return false;
4983 : : }
4984 : :
4985 : : /*
4986 : : * parse timestamp value (see also timestamptz_in())
4987 : : */
4988 : : {
1192 tgl@sss.pgh.pa.us 4989 :CBC 3 : char *str = *newval;
4990 : : fsec_t fsec;
4991 : : struct pg_tm tt,
4992 : 3 : *tm = &tt;
4993 : : int tz;
4994 : : int dtype;
4995 : : int nf;
4996 : : int dterr;
4997 : : char *field[MAXDATEFIELDS];
4998 : : int ftype[MAXDATEFIELDS];
4999 : : char workbuf[MAXDATELEN + MAXDATEFIELDS];
5000 : : DateTimeErrorExtra dtextra;
5001 : : TimestampTz timestamp;
5002 : :
5003 : 3 : dterr = ParseDateTime(str, workbuf, sizeof(workbuf),
5004 : : field, ftype, MAXDATEFIELDS, &nf);
5005 [ + - ]: 3 : if (dterr == 0)
1105 5006 : 3 : dterr = DecodeDateTime(field, ftype, nf,
5007 : : &dtype, tm, &fsec, &tz, &dtextra);
1192 5008 [ - + ]: 3 : if (dterr != 0)
1192 tgl@sss.pgh.pa.us 5009 :UBC 0 : return false;
1192 tgl@sss.pgh.pa.us 5010 [ - + ]:CBC 3 : if (dtype != DTK_DATE)
1192 tgl@sss.pgh.pa.us 5011 :UBC 0 : return false;
5012 : :
1192 tgl@sss.pgh.pa.us 5013 [ - + ]:CBC 3 : if (tm2timestamp(tm, fsec, &tz, ×tamp) != 0)
5014 : : {
386 alvherre@alvh.no-ip. 5015 :UBC 0 : GUC_check_errdetail("Timestamp out of range: \"%s\".", str);
1192 tgl@sss.pgh.pa.us 5016 : 0 : return false;
5017 : : }
5018 : : }
5019 : : }
1192 tgl@sss.pgh.pa.us 5020 :CBC 1114 : return true;
5021 : : }
5022 : :
5023 : : /*
5024 : : * GUC assign_hook for recovery_target_time
5025 : : */
5026 : : void
5027 : 1114 : assign_recovery_target_time(const char *newval, void *extra)
5028 : : {
5029 [ + + ]: 1114 : if (recoveryTarget != RECOVERY_TARGET_UNSET &&
5030 [ + - ]: 1 : recoveryTarget != RECOVERY_TARGET_TIME)
5031 : 1 : error_multiple_recovery_targets();
5032 : :
5033 [ + - + + ]: 1113 : if (newval && strcmp(newval, "") != 0)
5034 : 2 : recoveryTarget = RECOVERY_TARGET_TIME;
5035 : : else
5036 : 1111 : recoveryTarget = RECOVERY_TARGET_UNSET;
5037 : 1113 : }
5038 : :
5039 : : /*
5040 : : * GUC check_hook for recovery_target_timeline
5041 : : */
5042 : : bool
5043 : 1115 : check_recovery_target_timeline(char **newval, void **extra, GucSource source)
5044 : : {
5045 : : RecoveryTargetTimeLineGoal rttg;
5046 : : RecoveryTargetTimeLineGoal *myextra;
5047 : :
5048 [ - + ]: 1115 : if (strcmp(*newval, "current") == 0)
1192 tgl@sss.pgh.pa.us 5049 :UBC 0 : rttg = RECOVERY_TARGET_TIMELINE_CONTROLFILE;
1192 tgl@sss.pgh.pa.us 5050 [ + + ]:CBC 1115 : else if (strcmp(*newval, "latest") == 0)
5051 : 1112 : rttg = RECOVERY_TARGET_TIMELINE_LATEST;
5052 : : else
5053 : : {
5054 : : char *endp;
5055 : : uint64 timeline;
5056 : :
1192 tgl@sss.pgh.pa.us 5057 :GBC 3 : rttg = RECOVERY_TARGET_TIMELINE_NUMERIC;
5058 : :
5059 : 3 : errno = 0;
168 michael@paquier.xyz 5060 :GNC 3 : timeline = strtou64(*newval, &endp, 0);
5061 : :
5062 [ + + + - : 3 : if (*endp != '\0' || errno == EINVAL || errno == ERANGE)
- + ]
5063 : : {
5064 : 1 : GUC_check_errdetail("\"%s\" is not a valid number.",
5065 : : "recovery_target_timeline");
5066 : 3 : return false;
5067 : : }
5068 : :
5069 [ + + + - ]: 2 : if (timeline < 1 || timeline > PG_UINT32_MAX)
5070 : : {
5071 : 2 : GUC_check_errdetail("\"%s\" must be between %u and %u.",
5072 : : "recovery_target_timeline", 1, UINT_MAX);
1192 tgl@sss.pgh.pa.us 5073 :GBC 2 : return false;
5074 : : }
5075 : : }
5076 : :
266 dgustafsson@postgres 5077 :CBC 1112 : myextra = (RecoveryTargetTimeLineGoal *) guc_malloc(LOG, sizeof(RecoveryTargetTimeLineGoal));
5078 [ - + ]: 1112 : if (!myextra)
266 dgustafsson@postgres 5079 :UBC 0 : return false;
1192 tgl@sss.pgh.pa.us 5080 :CBC 1112 : *myextra = rttg;
385 peter@eisentraut.org 5081 : 1112 : *extra = myextra;
5082 : :
1192 tgl@sss.pgh.pa.us 5083 : 1112 : return true;
5084 : : }
5085 : :
5086 : : /*
5087 : : * GUC assign_hook for recovery_target_timeline
5088 : : */
5089 : : void
5090 : 1112 : assign_recovery_target_timeline(const char *newval, void *extra)
5091 : : {
5092 : 1112 : recoveryTargetTimeLineGoal = *((RecoveryTargetTimeLineGoal *) extra);
5093 [ - + ]: 1112 : if (recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_NUMERIC)
1192 tgl@sss.pgh.pa.us 5094 :UBC 0 : recoveryTargetTLIRequested = (TimeLineID) strtoul(newval, NULL, 0);
5095 : : else
1192 tgl@sss.pgh.pa.us 5096 :CBC 1112 : recoveryTargetTLIRequested = 0;
5097 : 1112 : }
5098 : :
5099 : : /*
5100 : : * GUC check_hook for recovery_target_xid
5101 : : */
5102 : : bool
5103 : 1112 : check_recovery_target_xid(char **newval, void **extra, GucSource source)
5104 : : {
5105 [ + + ]: 1112 : if (strcmp(*newval, "") != 0)
5106 : : {
5107 : : TransactionId xid;
5108 : : TransactionId *myextra;
5109 : :
5110 : 1 : errno = 0;
5111 : 1 : xid = (TransactionId) strtou64(*newval, NULL, 0);
5112 [ + - - + ]: 1 : if (errno == EINVAL || errno == ERANGE)
1192 tgl@sss.pgh.pa.us 5113 :UBC 0 : return false;
5114 : :
266 dgustafsson@postgres 5115 :CBC 1 : myextra = (TransactionId *) guc_malloc(LOG, sizeof(TransactionId));
5116 [ - + ]: 1 : if (!myextra)
266 dgustafsson@postgres 5117 :UBC 0 : return false;
1192 tgl@sss.pgh.pa.us 5118 :CBC 1 : *myextra = xid;
385 peter@eisentraut.org 5119 : 1 : *extra = myextra;
5120 : : }
1192 tgl@sss.pgh.pa.us 5121 : 1112 : return true;
5122 : : }
5123 : :
5124 : : /*
5125 : : * GUC assign_hook for recovery_target_xid
5126 : : */
5127 : : void
5128 : 1112 : assign_recovery_target_xid(const char *newval, void *extra)
5129 : : {
5130 [ - + ]: 1112 : if (recoveryTarget != RECOVERY_TARGET_UNSET &&
1192 tgl@sss.pgh.pa.us 5131 [ # # ]:UBC 0 : recoveryTarget != RECOVERY_TARGET_XID)
5132 : 0 : error_multiple_recovery_targets();
5133 : :
1192 tgl@sss.pgh.pa.us 5134 [ + - + + ]:CBC 1112 : if (newval && strcmp(newval, "") != 0)
5135 : : {
5136 : 1 : recoveryTarget = RECOVERY_TARGET_XID;
5137 : 1 : recoveryTargetXid = *((TransactionId *) extra);
5138 : : }
5139 : : else
5140 : 1111 : recoveryTarget = RECOVERY_TARGET_UNSET;
5141 : 1112 : }
|