Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * standby.c
4 : : * Misc functions used in Hot Standby mode.
5 : : *
6 : : * All functions for handling RM_STANDBY_ID, which relate to
7 : : * AccessExclusiveLocks and starting snapshots for Hot Standby mode.
8 : : * Plus conflict recovery processing.
9 : : *
10 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
11 : : * Portions Copyright (c) 1994, Regents of the University of California
12 : : *
13 : : * IDENTIFICATION
14 : : * src/backend/storage/ipc/standby.c
15 : : *
16 : : *-------------------------------------------------------------------------
17 : : */
18 : : #include "postgres.h"
19 : : #include "access/transam.h"
20 : : #include "access/twophase.h"
21 : : #include "access/xact.h"
22 : : #include "access/xloginsert.h"
23 : : #include "access/xlogrecovery.h"
24 : : #include "access/xlogutils.h"
25 : : #include "miscadmin.h"
26 : : #include "pgstat.h"
27 : : #include "replication/slot.h"
28 : : #include "storage/bufmgr.h"
29 : : #include "storage/proc.h"
30 : : #include "storage/procarray.h"
31 : : #include "storage/sinvaladt.h"
32 : : #include "storage/standby.h"
33 : : #include "utils/hsearch.h"
34 : : #include "utils/injection_point.h"
35 : : #include "utils/ps_status.h"
36 : : #include "utils/timeout.h"
37 : : #include "utils/timestamp.h"
38 : : #include "utils/wait_event.h"
39 : :
40 : : /* User-settable GUC parameters */
41 : : int max_standby_archive_delay = 30 * 1000;
42 : : int max_standby_streaming_delay = 30 * 1000;
43 : : bool log_recovery_conflict_waits = false;
44 : :
45 : : /*
46 : : * Keep track of all the exclusive locks owned by original transactions.
47 : : * For each known exclusive lock, there is a RecoveryLockEntry in the
48 : : * RecoveryLockHash hash table. All RecoveryLockEntrys belonging to a
49 : : * given XID are chained together so that we can find them easily.
50 : : * For each original transaction that is known to have any such locks,
51 : : * there is a RecoveryLockXidEntry in the RecoveryLockXidHash hash table,
52 : : * which stores the head of the chain of its locks.
53 : : */
54 : : typedef struct RecoveryLockEntry
55 : : {
56 : : xl_standby_lock key; /* hash key: xid, dbOid, relOid */
57 : : struct RecoveryLockEntry *next; /* chain link */
58 : : } RecoveryLockEntry;
59 : :
60 : : typedef struct RecoveryLockXidEntry
61 : : {
62 : : TransactionId xid; /* hash key -- must be first */
63 : : struct RecoveryLockEntry *head; /* chain head */
64 : : } RecoveryLockXidEntry;
65 : :
66 : : static HTAB *RecoveryLockHash = NULL;
67 : : static HTAB *RecoveryLockXidHash = NULL;
68 : :
69 : : /* Flags set by timeout handlers */
70 : : static volatile sig_atomic_t got_standby_deadlock_timeout = false;
71 : : static volatile sig_atomic_t got_standby_delay_timeout = false;
72 : : static volatile sig_atomic_t got_standby_lock_timeout = false;
73 : :
74 : : static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
75 : : RecoveryConflictReason reason,
76 : : uint32 wait_event_info,
77 : : bool report_waiting);
78 : : static void SendRecoveryConflictWithBufferPin(RecoveryConflictReason reason);
79 : : static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts);
80 : : static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks);
81 : : static const char *get_recovery_conflict_desc(RecoveryConflictReason reason);
82 : :
83 : : /*
84 : : * InitRecoveryTransactionEnvironment
85 : : * Initialize tracking of our primary's in-progress transactions.
86 : : *
87 : : * We need to issue shared invalidations and hold locks. Holding locks
88 : : * means others may want to wait on us, so we need to make a lock table
89 : : * vxact entry like a real transaction. We could create and delete
90 : : * lock table entries for each transaction but its simpler just to create
91 : : * one permanent entry and leave it there all the time. Locks are then
92 : : * acquired and released as needed. Yes, this means you can see the
93 : : * Startup process in pg_locks once we have run this.
94 : : */
95 : : void
5981 simon@2ndQuadrant.co 96 :CBC 121 : InitRecoveryTransactionEnvironment(void)
97 : : {
98 : : VirtualTransactionId vxid;
99 : : HASHCTL hash_ctl;
100 : :
1307 tgl@sss.pgh.pa.us 101 [ - + ]: 121 : Assert(RecoveryLockHash == NULL); /* don't run this twice */
102 : :
103 : : /*
104 : : * Initialize the hash tables for tracking the locks held by each
105 : : * transaction.
106 : : */
107 : 121 : hash_ctl.keysize = sizeof(xl_standby_lock);
108 : 121 : hash_ctl.entrysize = sizeof(RecoveryLockEntry);
109 : 121 : RecoveryLockHash = hash_create("RecoveryLockHash",
110 : : 64,
111 : : &hash_ctl,
112 : : HASH_ELEM | HASH_BLOBS);
2870 tmunro@postgresql.or 113 : 121 : hash_ctl.keysize = sizeof(TransactionId);
1307 tgl@sss.pgh.pa.us 114 : 121 : hash_ctl.entrysize = sizeof(RecoveryLockXidEntry);
115 : 121 : RecoveryLockXidHash = hash_create("RecoveryLockXidHash",
116 : : 64,
117 : : &hash_ctl,
118 : : HASH_ELEM | HASH_BLOBS);
119 : :
120 : : /*
121 : : * Initialize shared invalidation management for Startup process, being
122 : : * careful to register ourselves as a sendOnly process so we don't need to
123 : : * read messages, nor will we get signaled when the queue starts filling
124 : : * up.
125 : : */
5981 simon@2ndQuadrant.co 126 : 121 : SharedInvalBackendInit(true);
127 : :
128 : : /*
129 : : * Lock a virtual transaction id for Startup process.
130 : : *
131 : : * We need to do GetNextLocalTransactionId() because
132 : : * SharedInvalBackendInit() leaves localTransactionId invalid and the lock
133 : : * manager doesn't like that at all.
134 : : *
135 : : * Note that we don't need to run XactLockTableInsert() because nobody
136 : : * needs to wait on xids. That sounds a little strange, but table locks
137 : : * are held by vxids and row level locks are held by xids. All queries
138 : : * hold AccessShareLocks so never block while we write or lock new rows.
139 : : */
793 heikki.linnakangas@i 140 : 121 : MyProc->vxid.procNumber = MyProcNumber;
141 : 121 : vxid.procNumber = MyProcNumber;
5981 simon@2ndQuadrant.co 142 : 121 : vxid.localTransactionId = GetNextLocalTransactionId();
143 : 121 : VirtualXactLockTableInsert(vxid);
144 : :
145 : 121 : standbyState = STANDBY_INITIALIZED;
146 : 121 : }
147 : :
148 : : /*
149 : : * ShutdownRecoveryTransactionEnvironment
150 : : * Shut down transaction tracking
151 : : *
152 : : * Prepare to switch from hot standby mode to normal operation. Shut down
153 : : * recovery-time transaction tracking.
154 : : *
155 : : * This must be called even in shutdown of startup process if transaction
156 : : * tracking has been initialized. Otherwise some locks the tracked
157 : : * transactions were holding will not be released and may interfere with
158 : : * the processes still running (but will exit soon later) at the exit of
159 : : * startup process.
160 : : */
161 : : void
162 : 177 : ShutdownRecoveryTransactionEnvironment(void)
163 : : {
164 : : /*
165 : : * Do nothing if RecoveryLockHash is NULL because that means that
166 : : * transaction tracking has not yet been initialized or has already been
167 : : * shut down. This makes it safe to have possibly-redundant calls of this
168 : : * function during process exit.
169 : : */
1307 tgl@sss.pgh.pa.us 170 [ + + ]: 177 : if (RecoveryLockHash == NULL)
1855 fujii@postgresql.org 171 : 56 : return;
172 : :
173 : : /* Mark all tracked in-progress transactions as finished. */
5981 simon@2ndQuadrant.co 174 : 121 : ExpireAllKnownAssignedTransactionIds();
175 : :
176 : : /* Release all locks the tracked transactions were holding */
177 : 121 : StandbyReleaseAllLocks();
178 : :
179 : : /* Destroy the lock hash tables. */
1307 tgl@sss.pgh.pa.us 180 : 121 : hash_destroy(RecoveryLockHash);
181 : 121 : hash_destroy(RecoveryLockXidHash);
182 : 121 : RecoveryLockHash = NULL;
183 : 121 : RecoveryLockXidHash = NULL;
184 : :
185 : : /* Cleanup our VirtualTransaction */
4905 simon@2ndQuadrant.co 186 : 121 : VirtualXactLockTableCleanup();
187 : : }
188 : :
189 : :
190 : : /*
191 : : * -----------------------------------------------------
192 : : * Standby wait timers and backend cancel logic
193 : : * -----------------------------------------------------
194 : : */
195 : :
196 : : /*
197 : : * Determine the cutoff time at which we want to start canceling conflicting
198 : : * transactions. Returns zero (a time safely in the past) if we are willing
199 : : * to wait forever.
200 : : */
201 : : static TimestampTz
5785 tgl@sss.pgh.pa.us 202 : 42 : GetStandbyLimitTime(void)
203 : : {
204 : : TimestampTz rtime;
205 : : bool fromStream;
206 : :
207 : : /*
208 : : * The cutoff time is the last WAL data receipt time plus the appropriate
209 : : * delay variable. Delay of -1 means wait forever.
210 : : */
211 : 42 : GetXLogReceiptTime(&rtime, &fromStream);
212 [ + - ]: 42 : if (fromStream)
213 : : {
214 [ - + ]: 42 : if (max_standby_streaming_delay < 0)
5785 tgl@sss.pgh.pa.us 215 :UBC 0 : return 0; /* wait forever */
5785 tgl@sss.pgh.pa.us 216 :CBC 42 : return TimestampTzPlusMilliseconds(rtime, max_standby_streaming_delay);
217 : : }
218 : : else
219 : : {
5785 tgl@sss.pgh.pa.us 220 [ # # ]:UBC 0 : if (max_standby_archive_delay < 0)
221 : 0 : return 0; /* wait forever */
222 : 0 : return TimestampTzPlusMilliseconds(rtime, max_standby_archive_delay);
223 : : }
224 : : }
225 : :
226 : : #define STANDBY_INITIAL_WAIT_US 1000
227 : : static int standbyWait_us = STANDBY_INITIAL_WAIT_US;
228 : :
229 : : /*
230 : : * Standby wait logic for ResolveRecoveryConflictWithVirtualXIDs.
231 : : * We wait here for a while then return. If we decide we can't wait any
232 : : * more then we return true, if we can wait some more return false.
233 : : */
234 : : static bool
2223 fujii@postgresql.org 235 :CBC 28 : WaitExceedsMaxStandbyDelay(uint32 wait_event_info)
236 : : {
237 : : TimestampTz ltime;
238 : :
3386 simon@2ndQuadrant.co 239 [ - + ]: 28 : CHECK_FOR_INTERRUPTS();
240 : :
241 : : /* Are we past the limit time? */
5785 tgl@sss.pgh.pa.us 242 : 28 : ltime = GetStandbyLimitTime();
243 [ + - + + ]: 28 : if (ltime && GetCurrentTimestamp() >= ltime)
5981 simon@2ndQuadrant.co 244 : 3 : return true;
245 : :
246 : : /*
247 : : * Sleep a bit (this is essential to avoid busy-waiting).
248 : : */
2223 fujii@postgresql.org 249 : 25 : pgstat_report_wait_start(wait_event_info);
5981 simon@2ndQuadrant.co 250 : 25 : pg_usleep(standbyWait_us);
2223 fujii@postgresql.org 251 : 25 : pgstat_report_wait_end();
252 : :
253 : : /*
254 : : * Progressively increase the sleep times, but not to more than 1s, since
255 : : * pg_usleep isn't interruptible on some platforms.
256 : : */
5981 simon@2ndQuadrant.co 257 : 25 : standbyWait_us *= 2;
258 [ + + ]: 25 : if (standbyWait_us > 1000000)
5981 simon@2ndQuadrant.co 259 :GBC 4 : standbyWait_us = 1000000;
260 : :
5981 simon@2ndQuadrant.co 261 :CBC 25 : return false;
262 : : }
263 : :
264 : : /*
265 : : * Log the recovery conflict.
266 : : *
267 : : * wait_start is the timestamp when the caller started to wait.
268 : : * now is the timestamp when this function has been called.
269 : : * wait_list is the list of virtual transaction ids assigned to
270 : : * conflicting processes. still_waiting indicates whether
271 : : * the startup process is still waiting for the recovery conflict
272 : : * to be resolved or not.
273 : : */
274 : : void
84 heikki.linnakangas@i 275 :GNC 10 : LogRecoveryConflict(RecoveryConflictReason reason, TimestampTz wait_start,
276 : : TimestampTz now, VirtualTransactionId *wait_list,
277 : : bool still_waiting)
278 : : {
279 : : long secs;
280 : : int usecs;
281 : : long msecs;
282 : : StringInfoData buf;
1943 fujii@postgresql.org 283 :CBC 10 : int nprocs = 0;
284 : :
285 : : /*
286 : : * There must be no conflicting processes when the recovery conflict has
287 : : * already been resolved.
288 : : */
1938 289 [ + + - + ]: 10 : Assert(still_waiting || wait_list == NULL);
290 : :
1943 291 : 10 : TimestampDifference(wait_start, now, &secs, &usecs);
292 : 10 : msecs = secs * 1000 + usecs / 1000;
293 : 10 : usecs = usecs % 1000;
294 : :
295 [ + + ]: 10 : if (wait_list)
296 : : {
297 : : VirtualTransactionId *vxids;
298 : :
299 : : /* Construct a string of list of the conflicting processes */
300 : 3 : vxids = wait_list;
301 [ + + ]: 6 : while (VirtualTransactionIdIsValid(*vxids))
302 : : {
793 heikki.linnakangas@i 303 : 3 : PGPROC *proc = ProcNumberGetProc(vxids->procNumber);
304 : :
305 : : /* proc can be NULL if the target backend is not active */
1943 fujii@postgresql.org 306 [ + - ]: 3 : if (proc)
307 : : {
308 [ + - ]: 3 : if (nprocs == 0)
309 : : {
310 : 3 : initStringInfo(&buf);
311 : 3 : appendStringInfo(&buf, "%d", proc->pid);
312 : : }
313 : : else
1943 fujii@postgresql.org 314 :UBC 0 : appendStringInfo(&buf, ", %d", proc->pid);
315 : :
1943 fujii@postgresql.org 316 :CBC 3 : nprocs++;
317 : : }
318 : :
319 : 3 : vxids++;
320 : : }
321 : : }
322 : :
323 : : /*
324 : : * If wait_list is specified, report the list of PIDs of active
325 : : * conflicting backends in a detail message. Note that if all the backends
326 : : * in the list are not active, no detail message is logged.
327 : : */
1938 328 [ + + ]: 10 : if (still_waiting)
329 : : {
330 [ + - + + ]: 5 : ereport(LOG,
331 : : errmsg("recovery still waiting after %ld.%03d ms: %s",
332 : : msecs, usecs, get_recovery_conflict_desc(reason)),
333 : : nprocs > 0 ? errdetail_log_plural("Conflicting process: %s.",
334 : : "Conflicting processes: %s.",
335 : : nprocs, buf.data) : 0);
336 : : }
337 : : else
338 : : {
339 [ + - ]: 5 : ereport(LOG,
340 : : errmsg("recovery finished waiting after %ld.%03d ms: %s",
341 : : msecs, usecs, get_recovery_conflict_desc(reason)));
342 : : }
343 : :
1943 344 [ + + ]: 10 : if (nprocs > 0)
345 : 3 : pfree(buf.data);
346 : 10 : }
347 : :
348 : : /*
349 : : * This is the main executioner for any query backend that conflicts with
350 : : * recovery processing. Judgement has already been passed on it within
351 : : * a specific rmgr. Here we just issue the orders to the procs. The procs
352 : : * then throw the required error as instructed.
353 : : *
354 : : * If report_waiting is true, "waiting" is reported in PS display and the
355 : : * wait for recovery conflict is reported in the log, if necessary. If
356 : : * the caller is responsible for reporting them, report_waiting should be
357 : : * false. Otherwise, both the caller and this function report the same
358 : : * thing unexpectedly.
359 : : */
360 : : static void
5981 simon@2ndQuadrant.co 361 : 16089 : ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
362 : : RecoveryConflictReason reason,
363 : : uint32 wait_event_info,
364 : : bool report_waiting)
365 : : {
2247 fujii@postgresql.org 366 : 16089 : TimestampTz waitStart = 0;
1170 drowley@postgresql.o 367 : 16089 : bool waiting = false;
1943 fujii@postgresql.org 368 : 16089 : bool logged_recovery_conflict = false;
369 : :
370 : : /* Fast exit, to avoid a kernel call if there's no work to be done. */
5618 rhaas@postgresql.org 371 [ + + ]: 16089 : if (!VirtualTransactionIdIsValid(*waitlist))
372 : 16085 : return;
373 : :
374 : : /* Set the wait start timestamp for reporting */
1943 fujii@postgresql.org 375 [ + + + + : 4 : if (report_waiting && (log_recovery_conflict_waits || update_process_title))
+ - ]
2247 376 : 3 : waitStart = GetCurrentTimestamp();
377 : :
5618 rhaas@postgresql.org 378 [ + + ]: 8 : while (VirtualTransactionIdIsValid(*waitlist))
379 : : {
380 : : /* reset standbyWait_us for each xact we wait for */
5981 simon@2ndQuadrant.co 381 : 4 : standbyWait_us = STANDBY_INITIAL_WAIT_US;
382 : :
383 : : /* wait until the virtual xid is gone */
5388 rhaas@postgresql.org 384 [ + + ]: 32 : while (!VirtualXactLock(*waitlist, false))
385 : : {
386 : : /* Is it time to kill it? */
2223 fujii@postgresql.org 387 [ + + ]: 28 : if (WaitExceedsMaxStandbyDelay(wait_event_info))
388 : : {
389 : : bool signaled;
390 : :
391 : : /*
392 : : * Now find out who to throw out of the balloon.
393 : : */
5981 simon@2ndQuadrant.co 394 [ - + ]: 3 : Assert(VirtualTransactionIdIsValid(*waitlist));
84 heikki.linnakangas@i 395 :GNC 3 : signaled = SignalRecoveryConflictWithVirtualXID(*waitlist, reason);
396 : :
397 : : /*
398 : : * Wait a little bit for it to die so that we avoid flooding
399 : : * an unresponsive backend when system is heavily loaded.
400 : : */
401 [ + - ]: 3 : if (signaled)
5847 tgl@sss.pgh.pa.us 402 :CBC 3 : pg_usleep(5000L);
403 : : }
404 : :
1170 drowley@postgresql.o 405 [ + + + + : 28 : if (waitStart != 0 && (!logged_recovery_conflict || !waiting))
+ - ]
406 : : {
1943 fujii@postgresql.org 407 : 27 : TimestampTz now = 0;
408 : : bool maybe_log_conflict;
409 : : bool maybe_update_title;
410 : :
411 [ + + + + ]: 27 : maybe_log_conflict = (log_recovery_conflict_waits && !logged_recovery_conflict);
1170 drowley@postgresql.o 412 [ + - + + ]: 27 : maybe_update_title = (update_process_title && !waiting);
413 : :
414 : : /* Get the current timestamp if not report yet */
1943 fujii@postgresql.org 415 [ + + + + ]: 27 : if (maybe_log_conflict || maybe_update_title)
416 : 23 : now = GetCurrentTimestamp();
417 : :
418 : : /*
419 : : * Report via ps if we have been waiting for more than 500
420 : : * msec (should that be configurable?)
421 : : */
422 [ + + + + ]: 50 : if (maybe_update_title &&
423 : 23 : TimestampDifferenceExceeds(waitStart, now, 500))
424 : : {
1170 drowley@postgresql.o 425 :GBC 1 : set_ps_display_suffix("waiting");
426 : 1 : waiting = true;
427 : : }
428 : :
429 : : /*
430 : : * Emit the log message if the startup process is waiting
431 : : * longer than deadlock_timeout for recovery conflict.
432 : : */
1943 fujii@postgresql.org 433 [ + + + + ]:CBC 35 : if (maybe_log_conflict &&
434 : 8 : TimestampDifferenceExceeds(waitStart, now, DeadlockTimeout))
435 : : {
1938 436 : 2 : LogRecoveryConflict(reason, waitStart, now, waitlist, true);
1943 437 : 2 : logged_recovery_conflict = true;
438 : : }
439 : : }
440 : : }
441 : :
442 : : /* The virtual transaction is gone now, wait for the next one */
5981 simon@2ndQuadrant.co 443 : 4 : waitlist++;
444 : : }
445 : :
446 : : /*
447 : : * Emit the log message if recovery conflict was resolved but the startup
448 : : * process waited longer than deadlock_timeout for it.
449 : : */
1938 fujii@postgresql.org 450 [ + + ]: 4 : if (logged_recovery_conflict)
451 : 2 : LogRecoveryConflict(reason, waitStart, GetCurrentTimestamp(),
452 : : NULL, false);
453 : :
454 : : /* reset ps display to remove the suffix if we added one */
1170 drowley@postgresql.o 455 [ + + ]: 4 : if (waiting)
1170 drowley@postgresql.o 456 :GBC 1 : set_ps_display_remove_suffix();
457 : :
458 : : }
459 : :
460 : : /*
461 : : * Generate whatever recovery conflicts are needed to eliminate snapshots that
462 : : * might see XIDs <= snapshotConflictHorizon as still running.
463 : : *
464 : : * snapshotConflictHorizon cutoffs are our standard approach to generating
465 : : * granular recovery conflicts. Note that InvalidTransactionId values are
466 : : * interpreted as "definitely don't need any conflicts" here, which is a
467 : : * general convention that WAL records can (and often do) depend on.
468 : : */
469 : : void
1265 pg@bowt.ie 470 :CBC 16807 : ResolveRecoveryConflictWithSnapshot(TransactionId snapshotConflictHorizon,
471 : : bool isCatalogRel,
472 : : RelFileLocator locator)
473 : : {
474 : : VirtualTransactionId *backends;
475 : :
476 : : /*
477 : : * If we get passed InvalidTransactionId then we do nothing (no conflict).
478 : : *
479 : : * This can happen whenever the changes in the WAL record do not affect
480 : : * visibility on a standby. For example: a record that only freezes an
481 : : * xmax from a locker.
482 : : *
483 : : * It's also quite common with records generated during index deletion
484 : : * (original execution of the deletion can reason that a recovery conflict
485 : : * which is sufficient for the deletion operation must take place before
486 : : * replay of the deletion record itself).
487 : : */
488 [ + + ]: 16807 : if (!TransactionIdIsValid(snapshotConflictHorizon))
5650 simon@2ndQuadrant.co 489 : 720 : return;
490 : :
1219 pg@bowt.ie 491 [ - + ]: 16087 : Assert(TransactionIdIsNormal(snapshotConflictHorizon));
1265 492 : 16087 : backends = GetConflictingVirtualXIDs(snapshotConflictHorizon,
493 : : locator.dbOid);
5955 simon@2ndQuadrant.co 494 : 16087 : ResolveRecoveryConflictWithVirtualXIDs(backends,
495 : : RECOVERY_CONFLICT_SNAPSHOT,
496 : : WAIT_EVENT_RECOVERY_CONFLICT_SNAPSHOT,
497 : : true);
498 : :
499 : : /*
500 : : * Note that WaitExceedsMaxStandbyDelay() is not taken into account here
501 : : * (as opposed to ResolveRecoveryConflictWithVirtualXIDs() above). That
502 : : * seems OK, given that this kind of conflict should not normally be
503 : : * reached, e.g. due to using a physical replication slot.
504 : : */
133 msawada@postgresql.o 505 [ + + + + ]:GNC 16087 : if (IsLogicalDecodingEnabled() && isCatalogRel)
1124 andres@anarazel.de 506 :CBC 16 : InvalidateObsoleteReplicationSlots(RS_INVAL_HORIZON, 0, locator.dbOid,
507 : : snapshotConflictHorizon);
508 : : }
509 : :
510 : : /*
511 : : * Variant of ResolveRecoveryConflictWithSnapshot that works with
512 : : * FullTransactionId values
513 : : */
514 : : void
1265 pg@bowt.ie 515 : 66 : ResolveRecoveryConflictWithSnapshotFullXid(FullTransactionId snapshotConflictHorizon,
516 : : bool isCatalogRel,
517 : : RelFileLocator locator)
518 : : {
519 : : /*
520 : : * ResolveRecoveryConflictWithSnapshot operates on 32-bit TransactionIds,
521 : : * so truncate the logged FullTransactionId. If the logged value is very
522 : : * old, so that XID wrap-around already happened on it, there can't be any
523 : : * snapshots that still see it.
524 : : */
1896 525 : 66 : FullTransactionId nextXid = ReadNextFullTransactionId();
526 : : uint64 diff;
527 : :
528 : 66 : diff = U64FromFullTransactionId(nextXid) -
1265 529 : 66 : U64FromFullTransactionId(snapshotConflictHorizon);
1896 530 [ + - ]: 66 : if (diff < MaxTransactionId / 2)
531 : : {
532 : : TransactionId truncated;
533 : :
1265 534 : 66 : truncated = XidFromFullTransactionId(snapshotConflictHorizon);
1124 andres@anarazel.de 535 : 66 : ResolveRecoveryConflictWithSnapshot(truncated,
536 : : isCatalogRel,
537 : : locator);
538 : : }
1896 pg@bowt.ie 539 : 66 : }
540 : :
541 : : void
5955 simon@2ndQuadrant.co 542 : 1 : ResolveRecoveryConflictWithTablespace(Oid tsid)
543 : : {
544 : : VirtualTransactionId *temp_file_users;
545 : :
546 : : /*
547 : : * Standby users may be currently using this tablespace for their
548 : : * temporary files. We only care about current users because
549 : : * temp_tablespace parameter will just ignore tablespaces that no longer
550 : : * exist.
551 : : *
552 : : * Ask everybody to cancel their queries immediately so we can ensure no
553 : : * temp files remain and we can remove the tablespace. Nuke the entire
554 : : * site from orbit, it's the only way to be sure.
555 : : *
556 : : * XXX: We could work out the pids of active backends using this
557 : : * tablespace by examining the temp filenames in the directory. We would
558 : : * then convert the pids into VirtualXIDs before attempting to cancel
559 : : * them.
560 : : *
561 : : * We don't wait for commit because drop tablespace is non-transactional.
562 : : */
563 : 1 : temp_file_users = GetConflictingVirtualXIDs(InvalidTransactionId,
564 : : InvalidOid);
565 : 1 : ResolveRecoveryConflictWithVirtualXIDs(temp_file_users,
566 : : RECOVERY_CONFLICT_TABLESPACE,
567 : : WAIT_EVENT_RECOVERY_CONFLICT_TABLESPACE,
568 : : true);
569 : 1 : }
570 : :
571 : : void
572 : 14 : ResolveRecoveryConflictWithDatabase(Oid dbid)
573 : : {
574 : : /*
575 : : * We don't do ResolveRecoveryConflictWithVirtualXIDs() here since that
576 : : * only waits for transactions and completely idle sessions would block
577 : : * us. This is rare enough that we do this as simply as possible: no wait,
578 : : * just force them off immediately.
579 : : *
580 : : * No locking is required here because we already acquired
581 : : * AccessExclusiveLock. Anybody trying to connect while we do this will
582 : : * block during InitPostgres() and then disconnect when they see the
583 : : * database has been removed.
584 : : */
585 [ + + ]: 16 : while (CountDBBackends(dbid) > 0)
586 : : {
84 heikki.linnakangas@i 587 :GNC 2 : SignalRecoveryConflictWithDatabase(dbid, RECOVERY_CONFLICT_DATABASE);
588 : :
589 : : /*
590 : : * Wait awhile for them to die so that we avoid flooding an
591 : : * unresponsive backend when system is heavily loaded.
592 : : */
5955 simon@2ndQuadrant.co 593 :CBC 2 : pg_usleep(10000);
594 : : }
595 : 14 : }
596 : :
597 : : /*
598 : : * ResolveRecoveryConflictWithLock is called from ProcSleep()
599 : : * to resolve conflicts with other backends holding relation locks.
600 : : *
601 : : * The WaitLatch sleep normally done in ProcSleep()
602 : : * (when not InHotStandby) is performed here, for code clarity.
603 : : *
604 : : * We either resolve conflicts immediately or set a timeout to wake us at
605 : : * the limit of our patience.
606 : : *
607 : : * Resolve conflicts by canceling to all backends holding a conflicting
608 : : * lock. As we are already queued to be granted the lock, no new lock
609 : : * requests conflicting with ours will be granted in the meantime.
610 : : *
611 : : * We also must check for deadlocks involving the Startup process and
612 : : * hot-standby backend processes. If deadlock_timeout is reached in
613 : : * this function, all the backends holding the conflicting locks are
614 : : * requested to check themselves for deadlocks.
615 : : *
616 : : * logging_conflict should be true if the recovery conflict has not been
617 : : * logged yet even though logging is enabled. After deadlock_timeout is
618 : : * reached and the request for deadlock check is sent, we wait again to
619 : : * be signaled by the release of the lock if logging_conflict is false.
620 : : * Otherwise we return without waiting again so that the caller can report
621 : : * the recovery conflict. In this case, then, this function is called again
622 : : * with logging_conflict=false (because the recovery conflict has already
623 : : * been logged) and we will wait again for the lock to be released.
624 : : */
625 : : void
1943 fujii@postgresql.org 626 : 3 : ResolveRecoveryConflictWithLock(LOCKTAG locktag, bool logging_conflict)
627 : : {
628 : : TimestampTz ltime;
629 : : TimestampTz now;
630 : :
3708 simon@2ndQuadrant.co 631 [ - + ]: 3 : Assert(InHotStandby);
632 : :
633 : 3 : ltime = GetStandbyLimitTime();
1905 fujii@postgresql.org 634 : 3 : now = GetCurrentTimestamp();
635 : :
636 : : /*
637 : : * Update waitStart if first time through after the startup process
638 : : * started waiting for the lock. It should not be updated every time
639 : : * ResolveRecoveryConflictWithLock() is called during the wait.
640 : : *
641 : : * Use the current time obtained for comparison with ltime as waitStart
642 : : * (i.e., the time when this process started waiting for the lock). Since
643 : : * getting the current time newly can cause overhead, we reuse the
644 : : * already-obtained time to avoid that overhead.
645 : : *
646 : : * Note that waitStart is updated without holding the lock table's
647 : : * partition lock, to avoid the overhead by additional lock acquisition.
648 : : * This can cause "waitstart" in pg_locks to become NULL for a very short
649 : : * period of time after the wait started even though "granted" is false.
650 : : * This is OK in practice because we can assume that users are likely to
651 : : * look at "waitstart" when waiting for the lock for a long time.
652 : : */
653 [ + + ]: 3 : if (pg_atomic_read_u64(&MyProc->waitStart) == 0)
654 : 1 : pg_atomic_write_u64(&MyProc->waitStart, now);
655 : :
656 [ + + + - ]: 3 : if (now >= ltime && ltime != 0)
3708 simon@2ndQuadrant.co 657 : 1 : {
658 : : /*
659 : : * We're already behind, so clear a path as quickly as possible.
660 : : */
661 : : VirtualTransactionId *backends;
662 : :
2590 alvherre@alvh.no-ip. 663 : 1 : backends = GetLockConflicts(&locktag, AccessExclusiveLock, NULL);
664 : :
665 : : /*
666 : : * Prevent ResolveRecoveryConflictWithVirtualXIDs() from reporting
667 : : * "waiting" in PS display by disabling its argument report_waiting
668 : : * because the caller, WaitOnLock(), has already reported that.
669 : : */
5955 simon@2ndQuadrant.co 670 : 1 : ResolveRecoveryConflictWithVirtualXIDs(backends,
671 : : RECOVERY_CONFLICT_LOCK,
2223 fujii@postgresql.org 672 : 1 : PG_WAIT_LOCK | locktag.locktag_type,
673 : : false);
674 : : }
675 : : else
676 : : {
677 : : /*
678 : : * Wait (or wait again) until ltime, and check for deadlocks as well
679 : : * if we will be waiting longer than deadlock_timeout
680 : : */
681 : : EnableTimeoutParams timeouts[2];
1945 682 : 2 : int cnt = 0;
683 : :
684 [ + - ]: 2 : if (ltime != 0)
685 : : {
686 : 2 : got_standby_lock_timeout = false;
687 : 2 : timeouts[cnt].id = STANDBY_LOCK_TIMEOUT;
688 : 2 : timeouts[cnt].type = TMPARAM_AT;
689 : 2 : timeouts[cnt].fin_time = ltime;
690 : 2 : cnt++;
691 : : }
692 : :
693 : 2 : got_standby_deadlock_timeout = false;
694 : 2 : timeouts[cnt].id = STANDBY_DEADLOCK_TIMEOUT;
695 : 2 : timeouts[cnt].type = TMPARAM_AFTER;
696 : 2 : timeouts[cnt].delay_ms = DeadlockTimeout;
697 : 2 : cnt++;
698 : :
699 : 2 : enable_timeouts(timeouts, cnt);
700 : : }
701 : :
702 : : /* Wait to be signaled by the release of the Relation Lock */
3499 rhaas@postgresql.org 703 : 3 : ProcWaitForSignal(PG_WAIT_LOCK | locktag.locktag_type);
704 : :
705 : : /*
706 : : * Exit if ltime is reached. Then all the backends holding conflicting
707 : : * locks will be canceled in the next ResolveRecoveryConflictWithLock()
708 : : * call.
709 : : */
1945 fujii@postgresql.org 710 [ - + ]: 3 : if (got_standby_lock_timeout)
1945 fujii@postgresql.org 711 :UBC 0 : goto cleanup;
712 : :
1945 fujii@postgresql.org 713 [ + + ]:CBC 3 : if (got_standby_deadlock_timeout)
714 : : {
715 : : VirtualTransactionId *backends;
716 : :
717 : 2 : backends = GetLockConflicts(&locktag, AccessExclusiveLock, NULL);
718 : :
719 : : /* Quick exit if there's no work to be done */
720 [ - + ]: 2 : if (!VirtualTransactionIdIsValid(*backends))
1945 fujii@postgresql.org 721 :UBC 0 : goto cleanup;
722 : :
723 : : /*
724 : : * Send signals to all the backends holding the conflicting locks, to
725 : : * ask them to check themselves for deadlocks.
726 : : */
1945 fujii@postgresql.org 727 [ + + ]:CBC 4 : while (VirtualTransactionIdIsValid(*backends))
728 : : {
84 heikki.linnakangas@i 729 :GNC 2 : (void) SignalRecoveryConflictWithVirtualXID(*backends,
730 : : RECOVERY_CONFLICT_STARTUP_DEADLOCK);
1945 fujii@postgresql.org 731 :CBC 2 : backends++;
732 : : }
733 : :
734 : : /*
735 : : * Exit if the recovery conflict has not been logged yet even though
736 : : * logging is enabled, so that the caller can log that. Then
737 : : * RecoveryConflictWithLock() is called again and we will wait again
738 : : * for the lock to be released.
739 : : */
1943 740 [ + + ]: 2 : if (logging_conflict)
741 : 1 : goto cleanup;
742 : :
743 : : /*
744 : : * Wait again here to be signaled by the release of the Relation Lock,
745 : : * to prevent the subsequent RecoveryConflictWithLock() from causing
746 : : * deadlock_timeout and sending a request for deadlocks check again.
747 : : * Otherwise the request continues to be sent every deadlock_timeout
748 : : * until the relation locks are released or ltime is reached.
749 : : */
1945 750 : 1 : got_standby_deadlock_timeout = false;
751 : 1 : ProcWaitForSignal(PG_WAIT_LOCK | locktag.locktag_type);
752 : : }
753 : :
754 : 1 : cleanup:
755 : :
756 : : /*
757 : : * Clear any timeout requests established above. We assume here that the
758 : : * Startup process doesn't have any other outstanding timeouts than those
759 : : * used by this function. If that stops being true, we could cancel the
760 : : * timeouts individually, but that'd be slower.
761 : : */
3708 simon@2ndQuadrant.co 762 : 3 : disable_all_timeouts(false);
1945 fujii@postgresql.org 763 : 3 : got_standby_lock_timeout = false;
764 : 3 : got_standby_deadlock_timeout = false;
5955 simon@2ndQuadrant.co 765 : 3 : }
766 : :
767 : : /*
768 : : * ResolveRecoveryConflictWithBufferPin is called from LockBufferForCleanup()
769 : : * to resolve conflicts with other backends holding buffer pins.
770 : : *
771 : : * The ProcWaitForSignal() sleep normally done in LockBufferForCleanup()
772 : : * (when not InHotStandby) is performed here, for code clarity.
773 : : *
774 : : * We either resolve conflicts immediately or set a timeout to wake us at
775 : : * the limit of our patience.
776 : : *
777 : : * Resolve conflicts by sending a PROCSIG signal to all backends to check if
778 : : * they hold one of the buffer pins that is blocking Startup process. If so,
779 : : * those backends will take an appropriate error action, ERROR or FATAL.
780 : : *
781 : : * We also must check for deadlocks. Deadlocks occur because if queries
782 : : * wait on a lock, that must be behind an AccessExclusiveLock, which can only
783 : : * be cleared if the Startup process replays a transaction completion record.
784 : : * If Startup process is also waiting then that is a deadlock. The deadlock
785 : : * can occur if the query is waiting and then the Startup sleeps, or if
786 : : * Startup is sleeping and the query waits on a lock. We protect against
787 : : * only the former sequence here, the latter sequence is checked prior to
788 : : * the query sleeping, in CheckRecoveryConflictDeadlock().
789 : : *
790 : : * Deadlocks are extremely rare, and relatively expensive to check for,
791 : : * so we don't do a deadlock check right away ... only if we have had to wait
792 : : * at least deadlock_timeout.
793 : : */
794 : : void
5946 795 : 11 : ResolveRecoveryConflictWithBufferPin(void)
796 : : {
797 : : TimestampTz ltime;
798 : :
799 [ - + ]: 11 : Assert(InHotStandby);
800 : :
5785 tgl@sss.pgh.pa.us 801 : 11 : ltime = GetStandbyLimitTime();
802 : :
1945 fujii@postgresql.org 803 [ + + + - ]: 11 : if (GetCurrentTimestamp() >= ltime && ltime != 0)
804 : : {
805 : : /*
806 : : * We're already behind, so clear a path as quickly as possible.
807 : : */
84 heikki.linnakangas@i 808 :GNC 1 : SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_BUFFERPIN);
809 : : }
810 : : else
811 : : {
812 : : /*
813 : : * Wake up at ltime, and check for deadlocks as well if we will be
814 : : * waiting longer than deadlock_timeout
815 : : */
816 : : EnableTimeoutParams timeouts[2];
1945 fujii@postgresql.org 817 :CBC 10 : int cnt = 0;
818 : :
819 [ + - ]: 10 : if (ltime != 0)
820 : : {
821 : 10 : timeouts[cnt].id = STANDBY_TIMEOUT;
822 : 10 : timeouts[cnt].type = TMPARAM_AT;
823 : 10 : timeouts[cnt].fin_time = ltime;
824 : 10 : cnt++;
825 : : }
826 : :
827 : 10 : got_standby_deadlock_timeout = false;
828 : 10 : timeouts[cnt].id = STANDBY_DEADLOCK_TIMEOUT;
829 : 10 : timeouts[cnt].type = TMPARAM_AFTER;
830 : 10 : timeouts[cnt].delay_ms = DeadlockTimeout;
831 : 10 : cnt++;
832 : :
833 : 10 : enable_timeouts(timeouts, cnt);
834 : : }
835 : :
836 : : /*
837 : : * Wait to be signaled by UnpinBuffer() or for the wait to be interrupted
838 : : * by one of the timeouts established above.
839 : : *
840 : : * We assume that only UnpinBuffer() and the timeout requests established
841 : : * above can wake us up here. WakeupRecovery() called by walreceiver or
842 : : * SIGHUP signal handler, etc cannot do that because it uses the different
843 : : * latch from that ProcWaitForSignal() waits on.
844 : : */
153 andres@anarazel.de 845 :GNC 11 : ProcWaitForSignal(WAIT_EVENT_BUFFER_CLEANUP);
846 : :
1464 andres@anarazel.de 847 [ + + ]:CBC 11 : if (got_standby_delay_timeout)
84 heikki.linnakangas@i 848 :GNC 1 : SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_BUFFERPIN);
1464 andres@anarazel.de 849 [ + + ]:CBC 10 : else if (got_standby_deadlock_timeout)
850 : : {
851 : : /*
852 : : * Send out a request for hot-standby backends to check themselves for
853 : : * deadlocks.
854 : : *
855 : : * XXX The subsequent ResolveRecoveryConflictWithBufferPin() will wait
856 : : * to be signaled by UnpinBuffer() again and send a request for
857 : : * deadlocks check if deadlock_timeout happens. This causes the
858 : : * request to continue to be sent every deadlock_timeout until the
859 : : * buffer is unpinned or ltime is reached. This would increase the
860 : : * workload in the startup process and backends. In practice it may
861 : : * not be so harmful because the period that the buffer is kept pinned
862 : : * is basically no so long. But we should fix this?
863 : : */
84 heikki.linnakangas@i 864 :GNC 6 : SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK);
865 : : }
866 : :
867 : : /*
868 : : * Clear any timeout requests established above. We assume here that the
869 : : * Startup process doesn't have any other timeouts than what this function
870 : : * uses. If that stops being true, we could cancel the timeouts
871 : : * individually, but that'd be slower.
872 : : */
5041 alvherre@alvh.no-ip. 873 :CBC 11 : disable_all_timeouts(false);
1464 andres@anarazel.de 874 : 11 : got_standby_delay_timeout = false;
1945 fujii@postgresql.org 875 : 11 : got_standby_deadlock_timeout = false;
5946 simon@2ndQuadrant.co 876 : 11 : }
877 : :
878 : : static void
84 heikki.linnakangas@i 879 :GNC 8 : SendRecoveryConflictWithBufferPin(RecoveryConflictReason reason)
880 : : {
881 [ + + - + ]: 8 : Assert(reason == RECOVERY_CONFLICT_BUFFERPIN ||
882 : : reason == RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK);
883 : :
884 : : /*
885 : : * We send signal to all backends to ask them if they are holding the
886 : : * buffer pin which is delaying the Startup process. Most of them will be
887 : : * innocent, but we let the SIGUSR1 handling in each backend decide their
888 : : * own fate.
889 : : */
890 : 8 : SignalRecoveryConflictWithDatabase(InvalidOid, reason);
5946 simon@2ndQuadrant.co 891 :CBC 8 : }
892 : :
893 : : /*
894 : : * In Hot Standby perform early deadlock detection. We abort the lock
895 : : * wait if we are about to sleep while holding the buffer pin that Startup
896 : : * process is waiting for.
897 : : *
898 : : * Note: this code is pessimistic, because there is no way for it to
899 : : * determine whether an actual deadlock condition is present: the lock we
900 : : * need to wait for might be unrelated to any held by the Startup process.
901 : : * Sooner or later, this mechanism should get ripped out in favor of somehow
902 : : * accounting for buffer locks in DeadLockCheck(). However, errors here
903 : : * seem to be very low-probability in practice, so for now it's not worth
904 : : * the trouble.
905 : : */
906 : : void
5390 tgl@sss.pgh.pa.us 907 : 1 : CheckRecoveryConflictDeadlock(void)
908 : : {
909 [ - + ]: 1 : Assert(!InRecovery); /* do not call in Startup process */
910 : :
5938 simon@2ndQuadrant.co 911 [ + - ]: 1 : if (!HoldingBufferPinThatDelaysRecovery())
912 : 1 : return;
913 : :
914 : : /*
915 : : * Error message should match ProcessInterrupts() but we avoid calling
916 : : * that because we aren't handling an interrupt at this point. Note that
917 : : * we only cancel the current transaction here, so if we are in a
918 : : * subtransaction and the pin is held by a parent, then the Startup
919 : : * process will continue to wait even though we have avoided deadlock.
920 : : */
5938 simon@2ndQuadrant.co 921 [ # # ]:UBC 0 : ereport(ERROR,
922 : : (errcode(ERRCODE_T_R_DEADLOCK_DETECTED),
923 : : errmsg("canceling statement due to conflict with recovery"),
924 : : errdetail("User transaction caused buffer deadlock with recovery.")));
925 : : }
926 : :
927 : :
928 : : /* --------------------------------
929 : : * timeout handler routines
930 : : * --------------------------------
931 : : */
932 : :
933 : : /*
934 : : * StandbyDeadLockHandler() will be called if STANDBY_DEADLOCK_TIMEOUT is
935 : : * exceeded.
936 : : */
937 : : void
5041 alvherre@alvh.no-ip. 938 :CBC 8 : StandbyDeadLockHandler(void)
939 : : {
1945 fujii@postgresql.org 940 : 8 : got_standby_deadlock_timeout = true;
5041 alvherre@alvh.no-ip. 941 : 8 : }
942 : :
943 : : /*
944 : : * StandbyTimeoutHandler() will be called if STANDBY_TIMEOUT is exceeded.
945 : : */
946 : : void
947 : 1 : StandbyTimeoutHandler(void)
948 : : {
1464 andres@anarazel.de 949 : 1 : got_standby_delay_timeout = true;
5041 alvherre@alvh.no-ip. 950 : 1 : }
951 : :
952 : : /*
953 : : * StandbyLockTimeoutHandler() will be called if STANDBY_LOCK_TIMEOUT is exceeded.
954 : : */
955 : : void
3708 simon@2ndQuadrant.co 956 : 1 : StandbyLockTimeoutHandler(void)
957 : : {
1945 fujii@postgresql.org 958 : 1 : got_standby_lock_timeout = true;
3708 simon@2ndQuadrant.co 959 : 1 : }
960 : :
961 : : /*
962 : : * -----------------------------------------------------
963 : : * Locking in Recovery Mode
964 : : * -----------------------------------------------------
965 : : *
966 : : * All locks are held by the Startup process using a single virtual
967 : : * transaction. This implementation is both simpler and in some senses,
968 : : * more correct. The locks held mean "some original transaction held
969 : : * this lock, so query access is not allowed at this time". So the Startup
970 : : * process is the proxy by which the original locks are implemented.
971 : : *
972 : : * We only keep track of AccessExclusiveLocks, which are only ever held by
973 : : * one transaction on one relation.
974 : : *
975 : : * We keep a table of known locks in the RecoveryLockHash hash table.
976 : : * The point of that table is to let us efficiently de-duplicate locks,
977 : : * which is important because checkpoints will re-report the same locks
978 : : * already held. There is also a RecoveryLockXidHash table with one entry
979 : : * per xid, which allows us to efficiently find all the locks held by a
980 : : * given original transaction.
981 : : *
982 : : * We use session locks rather than normal locks so we don't need
983 : : * ResourceOwners.
984 : : */
985 : :
986 : :
987 : : void
5981 988 : 29460 : StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid)
989 : : {
990 : : RecoveryLockXidEntry *xidentry;
991 : : RecoveryLockEntry *lockentry;
992 : : xl_standby_lock key;
993 : : LOCKTAG locktag;
994 : : bool found;
995 : :
996 : : /* Already processed? */
5216 997 [ + - + + ]: 58920 : if (!TransactionIdIsValid(xid) ||
998 [ - + ]: 58863 : TransactionIdDidCommit(xid) ||
999 : 29403 : TransactionIdDidAbort(xid))
5981 1000 : 57 : return;
1001 : :
876 michael@paquier.xyz 1002 [ - + ]: 29403 : elog(DEBUG4, "adding recovery lock: db %u rel %u", dbOid, relOid);
1003 : :
1004 : : /* dbOid is InvalidOid when we are locking a shared relation. */
5981 simon@2ndQuadrant.co 1005 [ - + ]: 29403 : Assert(OidIsValid(relOid));
1006 : :
1007 : : /* Create a hash entry for this xid, if we don't have one already. */
1307 tgl@sss.pgh.pa.us 1008 : 29403 : xidentry = hash_search(RecoveryLockXidHash, &xid, HASH_ENTER, &found);
2870 tmunro@postgresql.or 1009 [ + + ]: 29403 : if (!found)
1010 : : {
1307 tgl@sss.pgh.pa.us 1011 [ - + ]: 11669 : Assert(xidentry->xid == xid); /* dynahash should have set this */
1012 : 11669 : xidentry->head = NULL;
1013 : : }
1014 : :
1015 : : /* Create a hash entry for this lock, unless we have one already. */
1016 : 29403 : key.xid = xid;
1017 : 29403 : key.dbOid = dbOid;
1018 : 29403 : key.relOid = relOid;
1019 : 29403 : lockentry = hash_search(RecoveryLockHash, &key, HASH_ENTER, &found);
1020 [ + + ]: 29403 : if (!found)
1021 : : {
1022 : : /* It's new, so link it into the XID's list ... */
1023 : 27490 : lockentry->next = xidentry->head;
1024 : 27490 : xidentry->head = lockentry;
1025 : :
1026 : : /* ... and acquire the lock locally. */
1027 : 27490 : SET_LOCKTAG_RELATION(locktag, dbOid, relOid);
1028 : :
1029 : 27490 : (void) LockAcquire(&locktag, AccessExclusiveLock, true, false);
1030 : : }
1031 : : }
1032 : :
1033 : : /*
1034 : : * Release all the locks associated with this RecoveryLockXidEntry.
1035 : : */
1036 : : static void
1037 : 11669 : StandbyReleaseXidEntryLocks(RecoveryLockXidEntry *xidentry)
1038 : : {
1039 : : RecoveryLockEntry *entry;
1040 : : RecoveryLockEntry *next;
1041 : :
1042 [ + + ]: 39159 : for (entry = xidentry->head; entry != NULL; entry = next)
1043 : : {
1044 : : LOCKTAG locktag;
1045 : :
876 michael@paquier.xyz 1046 [ - + ]: 27490 : elog(DEBUG4,
1047 : : "releasing recovery lock: xid %u db %u rel %u",
1048 : : entry->key.xid, entry->key.dbOid, entry->key.relOid);
1049 : : /* Release the lock ... */
1307 tgl@sss.pgh.pa.us 1050 : 27490 : SET_LOCKTAG_RELATION(locktag, entry->key.dbOid, entry->key.relOid);
2870 tmunro@postgresql.or 1051 [ - + ]: 27490 : if (!LockRelease(&locktag, AccessExclusiveLock, true))
1052 : : {
2870 tmunro@postgresql.or 1053 [ # # ]:UBC 0 : elog(LOG,
1054 : : "RecoveryLockHash contains entry for lock no longer recorded by lock manager: xid %u database %u relation %u",
1055 : : entry->key.xid, entry->key.dbOid, entry->key.relOid);
1056 : 0 : Assert(false);
1057 : : }
1058 : : /* ... and remove the per-lock hash entry */
1307 tgl@sss.pgh.pa.us 1059 :CBC 27490 : next = entry->next;
1060 : 27490 : hash_search(RecoveryLockHash, entry, HASH_REMOVE, NULL);
1061 : : }
1062 : :
1063 : 11669 : xidentry->head = NULL; /* just for paranoia */
2870 tmunro@postgresql.or 1064 : 11669 : }
1065 : :
1066 : : /*
1067 : : * Release locks for specific XID, or all locks if it's InvalidXid.
1068 : : */
1069 : : static void
1070 : 12349 : StandbyReleaseLocks(TransactionId xid)
1071 : : {
1072 : : RecoveryLockXidEntry *entry;
1073 : :
1074 [ + - ]: 12349 : if (TransactionIdIsValid(xid))
1075 : : {
1307 tgl@sss.pgh.pa.us 1076 [ + + ]: 12349 : if ((entry = hash_search(RecoveryLockXidHash, &xid, HASH_FIND, NULL)))
1077 : : {
1078 : 11669 : StandbyReleaseXidEntryLocks(entry);
1079 : 11669 : hash_search(RecoveryLockXidHash, entry, HASH_REMOVE, NULL);
1080 : : }
1081 : : }
1082 : : else
2870 tmunro@postgresql.or 1083 :UBC 0 : StandbyReleaseAllLocks();
5981 simon@2ndQuadrant.co 1084 :CBC 12349 : }
1085 : :
1086 : : /*
1087 : : * Release locks for a transaction tree, starting at xid down, from
1088 : : * RecoveryLockXidHash.
1089 : : *
1090 : : * Called during WAL replay of COMMIT/ROLLBACK when in hot standby mode,
1091 : : * to remove any AccessExclusiveLocks requested by a transaction.
1092 : : */
1093 : : void
1094 : 11849 : StandbyReleaseLockTree(TransactionId xid, int nsubxids, TransactionId *subxids)
1095 : : {
1096 : : int i;
1097 : :
1098 : 11849 : StandbyReleaseLocks(xid);
1099 : :
1100 [ + + ]: 12349 : for (i = 0; i < nsubxids; i++)
1101 : 500 : StandbyReleaseLocks(subxids[i]);
1102 : 11849 : }
1103 : :
1104 : : /*
1105 : : * Called at end of recovery and when we see a shutdown checkpoint.
1106 : : */
1107 : : void
5216 1108 : 121 : StandbyReleaseAllLocks(void)
1109 : : {
1110 : : HASH_SEQ_STATUS status;
1111 : : RecoveryLockXidEntry *entry;
1112 : :
876 michael@paquier.xyz 1113 [ + + ]: 121 : elog(DEBUG2, "release all standby locks");
1114 : :
1307 tgl@sss.pgh.pa.us 1115 : 121 : hash_seq_init(&status, RecoveryLockXidHash);
2870 tmunro@postgresql.or 1116 [ - + ]: 121 : while ((entry = hash_seq_search(&status)))
1117 : : {
1307 tgl@sss.pgh.pa.us 1118 :UBC 0 : StandbyReleaseXidEntryLocks(entry);
1119 : 0 : hash_search(RecoveryLockXidHash, entry, HASH_REMOVE, NULL);
1120 : : }
5216 simon@2ndQuadrant.co 1121 :CBC 121 : }
1122 : :
1123 : : /*
1124 : : * StandbyReleaseOldLocks
1125 : : * Release standby locks held by top-level XIDs that aren't running,
1126 : : * as long as they're not prepared transactions.
1127 : : *
1128 : : * This is needed to prune the locks of crashed transactions, which didn't
1129 : : * write an ABORT/COMMIT record.
1130 : : */
1131 : : void
2880 1132 : 843 : StandbyReleaseOldLocks(TransactionId oldxid)
1133 : : {
1134 : : HASH_SEQ_STATUS status;
1135 : : RecoveryLockXidEntry *entry;
1136 : :
1307 tgl@sss.pgh.pa.us 1137 : 843 : hash_seq_init(&status, RecoveryLockXidHash);
2870 tmunro@postgresql.or 1138 [ + + ]: 1205 : while ((entry = hash_seq_search(&status)))
1139 : : {
1140 [ - + ]: 362 : Assert(TransactionIdIsValid(entry->xid));
1141 : :
1142 : : /* Skip if prepared transaction. */
1143 [ - + ]: 362 : if (StandbyTransactionIdIsPrepared(entry->xid))
2870 tmunro@postgresql.or 1144 :UBC 0 : continue;
1145 : :
1146 : : /* Skip if >= oldxid. */
2870 tmunro@postgresql.or 1147 [ + - ]:CBC 362 : if (!TransactionIdPrecedes(entry->xid, oldxid))
1148 : 362 : continue;
1149 : :
1150 : : /* Remove all locks and hash table entry. */
1307 tgl@sss.pgh.pa.us 1151 :UBC 0 : StandbyReleaseXidEntryLocks(entry);
1152 : 0 : hash_search(RecoveryLockXidHash, entry, HASH_REMOVE, NULL);
1153 : : }
5981 simon@2ndQuadrant.co 1154 :CBC 843 : }
1155 : :
1156 : : /*
1157 : : * --------------------------------------------------------------------
1158 : : * Recovery handling for Rmgr RM_STANDBY_ID
1159 : : *
1160 : : * These record types will only be created if XLogStandbyInfoActive()
1161 : : * --------------------------------------------------------------------
1162 : : */
1163 : :
1164 : : void
4184 heikki.linnakangas@i 1165 : 29296 : standby_redo(XLogReaderState *record)
1166 : : {
1167 : 29296 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1168 : :
1169 : : /* Backup blocks are not used in standby records */
1170 [ - + ]: 29296 : Assert(!XLogRecHasAnyBlockRefs(record));
1171 : :
1172 : : /* Do nothing if we're not in hot standby mode */
5981 simon@2ndQuadrant.co 1173 [ + + ]: 29296 : if (standbyState == STANDBY_DISABLED)
1174 : 162 : return;
1175 : :
1176 [ + + ]: 29134 : if (info == XLOG_STANDBY_LOCK)
1177 : : {
1178 : 27667 : xl_standby_locks *xlrec = (xl_standby_locks *) XLogRecGetData(record);
1179 : : int i;
1180 : :
1181 [ + + ]: 57127 : for (i = 0; i < xlrec->nlocks; i++)
1182 : 29460 : StandbyAcquireAccessExclusiveLock(xlrec->locks[i].xid,
1183 : : xlrec->locks[i].dbOid,
1184 : : xlrec->locks[i].relOid);
1185 : : }
1186 [ + + ]: 1467 : else if (info == XLOG_RUNNING_XACTS)
1187 : : {
1188 : 776 : xl_running_xacts *xlrec = (xl_running_xacts *) XLogRecGetData(record);
1189 : : RunningTransactionsData running;
1190 : :
1191 : : /*
1192 : : * Records issued for specific database are not suitable for physical
1193 : : * replication because that affects the whole cluster. In particular,
1194 : : * the list of XID is probably incomplete here.
1195 : : */
28 alvherre@kurilemu.de 1196 [ - + ]:GNC 776 : if (OidIsValid(xlrec->dbid))
28 alvherre@kurilemu.de 1197 :UNC 0 : return;
1198 : :
5981 simon@2ndQuadrant.co 1199 :CBC 776 : running.xcnt = xlrec->xcnt;
4902 1200 : 776 : running.subxcnt = xlrec->subxcnt;
677 heikki.linnakangas@i 1201 : 776 : running.subxid_status = xlrec->subxid_overflow ? SUBXIDS_MISSING : SUBXIDS_IN_ARRAY;
5981 simon@2ndQuadrant.co 1202 : 776 : running.nextXid = xlrec->nextXid;
5835 1203 : 776 : running.latestCompletedXid = xlrec->latestCompletedXid;
5981 1204 : 776 : running.oldestRunningXid = xlrec->oldestRunningXid;
1205 : 776 : running.xids = xlrec->xids;
1206 : :
1207 : 776 : ProcArrayApplyRecoveryInfo(&running);
1208 : :
1209 : : /*
1210 : : * The startup process currently has no convenient way to schedule
1211 : : * stats to be reported. XLOG_RUNNING_XACTS records issued at a
1212 : : * regular cadence, making this a convenient location to report stats.
1213 : : * While these records aren't generated with wal_level=minimal, stats
1214 : : * also cannot be accessed during WAL replay.
1215 : : */
1058 andres@anarazel.de 1216 : 776 : pgstat_report_stat(true);
1217 : : }
3664 1218 [ + - ]: 691 : else if (info == XLOG_INVALIDATIONS)
1219 : : {
1220 : 691 : xl_invalidations *xlrec = (xl_invalidations *) XLogRecGetData(record);
1221 : :
1222 : 691 : ProcessCommittedInvalidationMessages(xlrec->msgs,
1223 : : xlrec->nmsgs,
1224 : 691 : xlrec->relcacheInitFileInval,
1225 : : xlrec->dbId,
1226 : : xlrec->tsId);
1227 : : }
1228 : : else
5204 tgl@sss.pgh.pa.us 1229 [ # # ]:UBC 0 : elog(PANIC, "standby_redo: unknown op code %u", info);
1230 : : }
1231 : :
1232 : : /*
1233 : : * Log details of the current snapshot to WAL. This allows the snapshot state
1234 : : * to be reconstructed on the standby and for logical decoding.
1235 : : *
1236 : : * This is used for Hot Standby as follows:
1237 : : *
1238 : : * We can move directly to STANDBY_SNAPSHOT_READY at startup if we
1239 : : * start from a shutdown checkpoint because we know nothing was running
1240 : : * at that time and our recovery snapshot is known empty. In the more
1241 : : * typical case of an online checkpoint we need to jump through a few
1242 : : * hoops to get a correct recovery snapshot and this requires a two or
1243 : : * sometimes a three stage process.
1244 : : *
1245 : : * The initial snapshot must contain all running xids and all current
1246 : : * AccessExclusiveLocks at a point in time on the standby. Assembling
1247 : : * that information while the server is running requires many and
1248 : : * various LWLocks, so we choose to derive that information piece by
1249 : : * piece and then re-assemble that info on the standby. When that
1250 : : * information is fully assembled we move to STANDBY_SNAPSHOT_READY.
1251 : : *
1252 : : * Since locking on the primary when we derive the information is not
1253 : : * strict, we note that there is a time window between the derivation and
1254 : : * writing to WAL of the derived information. That allows race conditions
1255 : : * that we must resolve, since xids and locks may enter or leave the
1256 : : * snapshot during that window. This creates the issue that an xid or
1257 : : * lock may start *after* the snapshot has been derived yet *before* the
1258 : : * snapshot is logged in the running xacts WAL record. We resolve this by
1259 : : * starting to accumulate changes at a point just prior to when we derive
1260 : : * the snapshot on the primary, then ignore duplicates when we later apply
1261 : : * the snapshot from the running xacts record. This is implemented during
1262 : : * CreateCheckPoint() where we use the logical checkpoint location as
1263 : : * our starting point and then write the running xacts record immediately
1264 : : * before writing the main checkpoint WAL record. Since we always start
1265 : : * up from a checkpoint and are immediately at our starting point, we
1266 : : * unconditionally move to STANDBY_INITIALIZED. After this point we
1267 : : * must do 4 things:
1268 : : * * move shared nextXid forwards as we see new xids
1269 : : * * extend the clog and subtrans with each new xid
1270 : : * * keep track of uncommitted known assigned xids
1271 : : * * keep track of uncommitted AccessExclusiveLocks
1272 : : *
1273 : : * When we see a commit/abort we must remove known assigned xids and locks
1274 : : * from the completing transaction. Attempted removals that cannot locate
1275 : : * an entry are expected and must not cause an error when we are in state
1276 : : * STANDBY_INITIALIZED. This is implemented in StandbyReleaseLocks() and
1277 : : * KnownAssignedXidsRemove().
1278 : : *
1279 : : * Later, when we apply the running xact data we must be careful to ignore
1280 : : * transactions already committed, since those commits raced ahead when
1281 : : * making WAL entries.
1282 : : *
1283 : : * For logical decoding only the running xacts information is needed;
1284 : : * there's no need to look at the locking information, but it's logged anyway,
1285 : : * as there's no independent knob to just enable logical decoding. For
1286 : : * details of how this is used, check snapbuild.c's introductory comment.
1287 : : *
1288 : : * If 'dbid' is valid, only gather transactions running in that
1289 : : * database. snapbuild.c can use such running xacts information for faster
1290 : : * startup, but it still needs normal (cluster-wide) during the actual
1291 : : * decoding - see standby_decode() and SnapBuildProcessRunningXacts() for
1292 : : * details. Other processes (e.g. checkpointer) issue the cluster-wide records
1293 : : * whether logical decoding is active or not.
1294 : : *
1295 : : * Please be careful about using this argument for other purposes. In
1296 : : * particular, physical replication *must* ignore the database-specific
1297 : : * records, exactly because they do not cover the whole cluster - see
1298 : : * standby_redo().
1299 : : *
1300 : : * Returns the RecPtr of the last inserted record.
1301 : : */
1302 : : XLogRecPtr
28 alvherre@kurilemu.de 1303 :GNC 1537 : LogStandbySnapshot(Oid dbid)
1304 : : {
1305 : : XLogRecPtr recptr;
1306 : : RunningTransactions running;
1307 : : xl_standby_lock *locks;
1308 : : int nlocks;
133 msawada@postgresql.o 1309 : 1537 : bool logical_decoding_enabled = IsLogicalDecodingEnabled();
1310 : :
5981 simon@2ndQuadrant.co 1311 [ - + ]:CBC 1537 : Assert(XLogStandbyInfoActive());
1312 : :
1313 : : #ifdef USE_INJECTION_POINTS
392 akapila@postgresql.o 1314 [ - + ]: 1537 : if (IS_INJECTION_POINT_ATTACHED("skip-log-running-xacts"))
1315 : : {
1316 : : /*
1317 : : * This record could move slot's xmin forward during decoding, leading
1318 : : * to unpredictable results, so skip it when requested by the test.
1319 : : */
392 akapila@postgresql.o 1320 :UBC 0 : return GetInsertRecPtr();
1321 : : }
1322 : : #endif
1323 : :
1324 : : /*
1325 : : * Get details of any AccessExclusiveLocks being held at the moment.
1326 : : */
5981 simon@2ndQuadrant.co 1327 :CBC 1537 : locks = GetRunningTransactionLocks(&nlocks);
1328 [ + + ]: 1537 : if (nlocks > 0)
1329 : 186 : LogAccessExclusiveLocks(nlocks, locks);
4718 tgl@sss.pgh.pa.us 1330 : 1537 : pfree(locks);
1331 : :
1332 : : /*
1333 : : * Log details of all in-progress transactions. This should be the last
1334 : : * record we write, because standby will open up when it sees this.
1335 : : */
28 alvherre@kurilemu.de 1336 :GNC 1537 : running = GetRunningTransactionData(dbid);
1337 : :
1338 : : /*
1339 : : * GetRunningTransactionData() acquired ProcArrayLock, we must release it.
1340 : : * For Hot Standby this can be done before inserting the WAL record
1341 : : * because ProcArrayApplyRecoveryInfo() rechecks the commit status using
1342 : : * the clog. For logical decoding, though, the lock can't be released
1343 : : * early because the clog might be "in the future" from the POV of the
1344 : : * historic snapshot. This would allow for situations where we're waiting
1345 : : * for the end of a transaction listed in the xl_running_xacts record
1346 : : * which, according to the WAL, has committed before the xl_running_xacts
1347 : : * record. Fortunately this routine isn't executed frequently, and it's
1348 : : * only a shared lock.
1349 : : */
133 msawada@postgresql.o 1350 [ + + ]: 1537 : if (!logical_decoding_enabled)
4446 rhaas@postgresql.org 1351 :CBC 979 : LWLockRelease(ProcArrayLock);
1352 : :
4493 1353 : 1537 : recptr = LogCurrentRunningXacts(running);
1354 : :
1355 : : /* Release lock if we kept it longer ... */
133 msawada@postgresql.o 1356 [ + + ]:GNC 1537 : if (logical_decoding_enabled)
4446 rhaas@postgresql.org 1357 :CBC 558 : LWLockRelease(ProcArrayLock);
1358 : :
1359 : : /* GetRunningTransactionData() acquired XidGenLock, we must release it */
5628 heikki.linnakangas@i 1360 : 1537 : LWLockRelease(XidGenLock);
1361 : :
4493 rhaas@postgresql.org 1362 : 1537 : return recptr;
1363 : : }
1364 : :
1365 : : /*
1366 : : * Record an enhanced snapshot of running transactions into WAL.
1367 : : *
1368 : : * The definitions of RunningTransactionsData and xl_running_xacts are
1369 : : * similar. We keep them separate because xl_running_xacts is a contiguous
1370 : : * chunk of memory and never exists fully until it is assembled in WAL.
1371 : : * The inserted records are marked as not being important for durability,
1372 : : * to avoid triggering superfluous checkpoint / archiving activity.
1373 : : */
1374 : : static XLogRecPtr
5981 simon@2ndQuadrant.co 1375 : 1537 : LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
1376 : : {
1377 : : xl_running_xacts xlrec;
1378 : : XLogRecPtr recptr;
1379 : :
28 alvherre@kurilemu.de 1380 :GNC 1537 : xlrec.dbid = CurrRunningXacts->dbid;
5981 simon@2ndQuadrant.co 1381 :CBC 1537 : xlrec.xcnt = CurrRunningXacts->xcnt;
4902 1382 : 1537 : xlrec.subxcnt = CurrRunningXacts->subxcnt;
677 heikki.linnakangas@i 1383 : 1537 : xlrec.subxid_overflow = (CurrRunningXacts->subxid_status != SUBXIDS_IN_ARRAY);
5981 simon@2ndQuadrant.co 1384 : 1537 : xlrec.nextXid = CurrRunningXacts->nextXid;
1385 : 1537 : xlrec.oldestRunningXid = CurrRunningXacts->oldestRunningXid;
5835 1386 : 1537 : xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid;
1387 : :
1388 : : /* Header */
4184 heikki.linnakangas@i 1389 : 1537 : XLogBeginInsert();
3421 andres@anarazel.de 1390 : 1537 : XLogSetRecordFlags(XLOG_MARK_UNIMPORTANT);
448 peter@eisentraut.org 1391 : 1537 : XLogRegisterData(&xlrec, MinSizeOfXactRunningXacts);
1392 : :
1393 : : /* array of TransactionIds */
5981 simon@2ndQuadrant.co 1394 [ + + ]: 1537 : if (xlrec.xcnt > 0)
448 peter@eisentraut.org 1395 : 523 : XLogRegisterData(CurrRunningXacts->xids,
3240 tgl@sss.pgh.pa.us 1396 : 523 : (xlrec.xcnt + xlrec.subxcnt) * sizeof(TransactionId));
1397 : :
4184 heikki.linnakangas@i 1398 : 1537 : recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS);
1399 : :
677 1400 [ + + ]: 1537 : if (xlrec.subxid_overflow)
876 michael@paquier.xyz 1401 [ - + ]: 2 : elog(DEBUG2,
1402 : : "snapshot of %d running transactions overflowed (lsn %X/%08X oldest xid %u latest complete %u next xid %u)",
1403 : : CurrRunningXacts->xcnt,
1404 : : LSN_FORMAT_ARGS(recptr),
1405 : : CurrRunningXacts->oldestRunningXid,
1406 : : CurrRunningXacts->latestCompletedXid,
1407 : : CurrRunningXacts->nextXid);
1408 : : else
1409 [ + + ]: 1535 : elog(DEBUG2,
1410 : : "snapshot of %d+%d running transaction ids (lsn %X/%08X oldest xid %u latest complete %u next xid %u)",
1411 : : CurrRunningXacts->xcnt, CurrRunningXacts->subxcnt,
1412 : : LSN_FORMAT_ARGS(recptr),
1413 : : CurrRunningXacts->oldestRunningXid,
1414 : : CurrRunningXacts->latestCompletedXid,
1415 : : CurrRunningXacts->nextXid);
1416 : :
1417 : : /*
1418 : : * Ensure running_xacts information is synced to disk not too far in the
1419 : : * future. We don't want to stall anything though (i.e. use XLogFlush()),
1420 : : * so we let the wal writer do it during normal operation.
1421 : : * XLogSetAsyncXactLSN() conveniently will mark the LSN as to-be-synced
1422 : : * and nudge the WALWriter into action if sleeping. Check
1423 : : * XLogBackgroundFlush() for details why a record might not be flushed
1424 : : * without it.
1425 : : */
4493 rhaas@postgresql.org 1426 : 1537 : XLogSetAsyncXactLSN(recptr);
1427 : :
1428 : 1537 : return recptr;
1429 : : }
1430 : :
1431 : : /*
1432 : : * Wholesale logging of AccessExclusiveLocks. Other lock types need not be
1433 : : * logged, as described in backend/storage/lmgr/README.
1434 : : */
1435 : : static void
5981 simon@2ndQuadrant.co 1436 : 171865 : LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks)
1437 : : {
1438 : : xl_standby_locks xlrec;
1439 : :
1440 : 171865 : xlrec.nlocks = nlocks;
1441 : :
4184 heikki.linnakangas@i 1442 : 171865 : XLogBeginInsert();
448 peter@eisentraut.org 1443 : 171865 : XLogRegisterData(&xlrec, offsetof(xl_standby_locks, locks));
1444 : 171865 : XLogRegisterData(locks, nlocks * sizeof(xl_standby_lock));
3421 andres@anarazel.de 1445 : 171865 : XLogSetRecordFlags(XLOG_MARK_UNIMPORTANT);
1446 : :
4184 heikki.linnakangas@i 1447 : 171865 : (void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK);
5981 simon@2ndQuadrant.co 1448 : 171865 : }
1449 : :
1450 : : /*
1451 : : * Individual logging of AccessExclusiveLocks for use during LockAcquire()
1452 : : */
1453 : : void
1454 : 171679 : LogAccessExclusiveLock(Oid dbOid, Oid relOid)
1455 : : {
1456 : : xl_standby_lock xlrec;
1457 : :
3331 1458 : 171679 : xlrec.xid = GetCurrentTransactionId();
1459 : :
5981 1460 : 171679 : xlrec.dbOid = dbOid;
1461 : 171679 : xlrec.relOid = relOid;
1462 : :
1463 : 171679 : LogAccessExclusiveLocks(1, &xlrec);
3331 1464 : 171679 : MyXactFlags |= XACT_FLAGS_ACQUIREDACCESSEXCLUSIVELOCK;
5981 1465 : 171679 : }
1466 : :
1467 : : /*
1468 : : * Prepare to log an AccessExclusiveLock, for use during LockAcquire()
1469 : : */
1470 : : void
5636 1471 : 171893 : LogAccessExclusiveLockPrepare(void)
1472 : : {
1473 : : /*
1474 : : * Ensure that a TransactionId has been assigned to this transaction, for
1475 : : * two reasons, both related to lock release on the standby. First, we
1476 : : * must assign an xid so that RecordTransactionCommit() and
1477 : : * RecordTransactionAbort() do not optimise away the transaction
1478 : : * completion record which recovery relies upon to release locks. It's a
1479 : : * hack, but for a corner case not worth adding code for into the main
1480 : : * commit path. Second, we must assign an xid before the lock is recorded
1481 : : * in shared memory, otherwise a concurrently executing
1482 : : * GetRunningTransactionLocks() might see a lock associated with an
1483 : : * InvalidTransactionId which we later assert cannot happen.
1484 : : */
3331 1485 : 171893 : (void) GetCurrentTransactionId();
5636 1486 : 171893 : }
1487 : :
1488 : : /*
1489 : : * Emit WAL for invalidations. This currently is only used for commits without
1490 : : * an xid but which contain invalidations.
1491 : : */
1492 : : void
3664 andres@anarazel.de 1493 : 11933 : LogStandbyInvalidations(int nmsgs, SharedInvalidationMessage *msgs,
1494 : : bool relcacheInitFileInval)
1495 : : {
1496 : : xl_invalidations xlrec;
1497 : :
1498 : : /* prepare record */
1499 : 11933 : memset(&xlrec, 0, sizeof(xlrec));
1500 : 11933 : xlrec.dbId = MyDatabaseId;
1501 : 11933 : xlrec.tsId = MyDatabaseTableSpace;
1502 : 11933 : xlrec.relcacheInitFileInval = relcacheInitFileInval;
1503 : 11933 : xlrec.nmsgs = nmsgs;
1504 : :
1505 : : /* perform insertion */
1506 : 11933 : XLogBeginInsert();
448 peter@eisentraut.org 1507 : 11933 : XLogRegisterData(&xlrec, MinSizeOfInvalidations);
1508 : 11933 : XLogRegisterData(msgs,
1509 : : nmsgs * sizeof(SharedInvalidationMessage));
3664 andres@anarazel.de 1510 : 11933 : XLogInsert(RM_STANDBY_ID, XLOG_INVALIDATIONS);
1511 : 11933 : }
1512 : :
1513 : : /* Return the description of recovery conflict */
1514 : : static const char *
84 heikki.linnakangas@i 1515 :GNC 10 : get_recovery_conflict_desc(RecoveryConflictReason reason)
1516 : : {
1772 peter@eisentraut.org 1517 :CBC 10 : const char *reasonDesc = _("unknown reason");
1518 : :
1943 fujii@postgresql.org 1519 [ + + + + : 10 : switch (reason)
- - - -
- ]
1520 : : {
84 heikki.linnakangas@i 1521 :GNC 4 : case RECOVERY_CONFLICT_BUFFERPIN:
1772 peter@eisentraut.org 1522 :CBC 4 : reasonDesc = _("recovery conflict on buffer pin");
1943 fujii@postgresql.org 1523 : 4 : break;
84 heikki.linnakangas@i 1524 :GNC 2 : case RECOVERY_CONFLICT_LOCK:
1772 peter@eisentraut.org 1525 :CBC 2 : reasonDesc = _("recovery conflict on lock");
1943 fujii@postgresql.org 1526 : 2 : break;
84 heikki.linnakangas@i 1527 :GNC 2 : case RECOVERY_CONFLICT_TABLESPACE:
1772 peter@eisentraut.org 1528 :CBC 2 : reasonDesc = _("recovery conflict on tablespace");
1943 fujii@postgresql.org 1529 : 2 : break;
84 heikki.linnakangas@i 1530 :GNC 2 : case RECOVERY_CONFLICT_SNAPSHOT:
1772 peter@eisentraut.org 1531 :CBC 2 : reasonDesc = _("recovery conflict on snapshot");
1943 fujii@postgresql.org 1532 : 2 : break;
84 heikki.linnakangas@i 1533 :UNC 0 : case RECOVERY_CONFLICT_LOGICALSLOT:
1124 andres@anarazel.de 1534 :UBC 0 : reasonDesc = _("recovery conflict on replication slot");
1535 : 0 : break;
84 heikki.linnakangas@i 1536 :UNC 0 : case RECOVERY_CONFLICT_STARTUP_DEADLOCK:
1537 : 0 : reasonDesc = _("recovery conflict on deadlock");
1538 : 0 : break;
1539 : 0 : case RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK:
1772 peter@eisentraut.org 1540 :UBC 0 : reasonDesc = _("recovery conflict on buffer deadlock");
1943 fujii@postgresql.org 1541 : 0 : break;
84 heikki.linnakangas@i 1542 :UNC 0 : case RECOVERY_CONFLICT_DATABASE:
1772 peter@eisentraut.org 1543 :UBC 0 : reasonDesc = _("recovery conflict on database");
1943 fujii@postgresql.org 1544 : 0 : break;
1545 : : }
1546 : :
1943 fujii@postgresql.org 1547 :CBC 10 : return reasonDesc;
1548 : : }
|