Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * storage.c
4 : : * code to create and destroy physical storage for relations
5 : : *
6 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/catalog/storage.c
12 : : *
13 : : * NOTES
14 : : * Some of this code used to be in storage/smgr/smgr.c, and the
15 : : * function names still reflect that.
16 : : *
17 : : *-------------------------------------------------------------------------
18 : : */
19 : :
20 : : #include "postgres.h"
21 : :
22 : : #include "access/visibilitymap.h"
23 : : #include "access/xact.h"
24 : : #include "access/xlog.h"
25 : : #include "access/xloginsert.h"
26 : : #include "access/xlogutils.h"
27 : : #include "catalog/storage.h"
28 : : #include "catalog/storage_xlog.h"
29 : : #include "miscadmin.h"
30 : : #include "pgstat.h"
31 : : #include "storage/bulk_write.h"
32 : : #include "storage/freespace.h"
33 : : #include "storage/proc.h"
34 : : #include "storage/smgr.h"
35 : : #include "utils/hsearch.h"
36 : : #include "utils/memutils.h"
37 : : #include "utils/rel.h"
38 : :
39 : : /* GUC variables */
40 : : int wal_skip_threshold = 2048; /* in kilobytes */
41 : :
42 : : /*
43 : : * We keep a list of all relations (represented as RelFileLocator values)
44 : : * that have been created or deleted in the current transaction. When
45 : : * a relation is created, we create the physical file immediately, but
46 : : * remember it so that we can delete the file again if the current
47 : : * transaction is aborted. Conversely, a deletion request is NOT
48 : : * executed immediately, but is just entered in the list. When and if
49 : : * the transaction commits, we can delete the physical file.
50 : : *
51 : : * To handle subtransactions, every entry is marked with its transaction
52 : : * nesting level. At subtransaction commit, we reassign the subtransaction's
53 : : * entries to the parent nesting level. At subtransaction abort, we can
54 : : * immediately execute the abort-time actions for all entries of the current
55 : : * nesting level.
56 : : *
57 : : * NOTE: the list is kept in TopMemoryContext to be sure it won't disappear
58 : : * unbetimes. It'd probably be OK to keep it in TopTransactionContext,
59 : : * but I'm being paranoid.
60 : : */
61 : :
62 : : typedef struct PendingRelDelete
63 : : {
64 : : RelFileLocator rlocator; /* relation that may need to be deleted */
65 : : ProcNumber procNumber; /* INVALID_PROC_NUMBER if not a temp rel */
66 : : bool atCommit; /* T=delete at commit; F=delete at abort */
67 : : int nestLevel; /* xact nesting level of request */
68 : : struct PendingRelDelete *next; /* linked-list link */
69 : : } PendingRelDelete;
70 : :
71 : : typedef struct PendingRelSync
72 : : {
73 : : RelFileLocator rlocator;
74 : : bool is_truncated; /* Has the file experienced truncation? */
75 : : } PendingRelSync;
76 : :
77 : : static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
78 : : static HTAB *pendingSyncHash = NULL;
79 : :
80 : :
81 : : /*
82 : : * AddPendingSync
83 : : * Queue an at-commit fsync.
84 : : */
85 : : static void
1158 rhaas@postgresql.org 86 :CBC 40737 : AddPendingSync(const RelFileLocator *rlocator)
87 : : {
88 : : PendingRelSync *pending;
89 : : bool found;
90 : :
91 : : /* create the hash if not yet */
1981 noah@leadboat.com 92 [ + + ]: 40737 : if (!pendingSyncHash)
93 : : {
94 : : HASHCTL ctl;
95 : :
1158 rhaas@postgresql.org 96 : 6340 : ctl.keysize = sizeof(RelFileLocator);
1981 noah@leadboat.com 97 : 6340 : ctl.entrysize = sizeof(PendingRelSync);
98 : 6340 : ctl.hcxt = TopTransactionContext;
99 : 6340 : pendingSyncHash = hash_create("pending sync hash", 16, &ctl,
100 : : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
101 : : }
102 : :
1158 rhaas@postgresql.org 103 : 40737 : pending = hash_search(pendingSyncHash, rlocator, HASH_ENTER, &found);
1981 noah@leadboat.com 104 [ - + ]: 40737 : Assert(!found);
105 : 40737 : pending->is_truncated = false;
106 : 40737 : }
107 : :
108 : : /*
109 : : * RelationCreateStorage
110 : : * Create physical storage for a relation.
111 : : *
112 : : * Create the underlying disk file storage for the relation. This only
113 : : * creates the main fork; additional forks are created lazily by the
114 : : * modules that need them.
115 : : *
116 : : * This function is transactional. The creation is WAL-logged, and if the
117 : : * transaction aborts later on, the storage will be destroyed. A caller
118 : : * that does not want the storage to be destroyed in case of an abort may
119 : : * pass register_delete = false.
120 : : */
121 : : SMgrRelation
1158 rhaas@postgresql.org 122 : 112819 : RelationCreateStorage(RelFileLocator rlocator, char relpersistence,
123 : : bool register_delete)
124 : : {
125 : : SMgrRelation srel;
126 : : ProcNumber procNumber;
127 : : bool needs_wal;
128 : :
1981 noah@leadboat.com 129 [ - + ]: 112819 : Assert(!IsInParallelMode()); /* couldn't update pendingSyncHash */
130 : :
5381 rhaas@postgresql.org 131 [ + + + - ]: 112819 : switch (relpersistence)
132 : : {
133 : 3162 : case RELPERSISTENCE_TEMP:
552 heikki.linnakangas@i 134 [ + - ]: 3162 : procNumber = ProcNumberForTempRelations();
5381 rhaas@postgresql.org 135 : 3162 : needs_wal = false;
136 : 3162 : break;
5365 137 : 284 : case RELPERSISTENCE_UNLOGGED:
552 heikki.linnakangas@i 138 : 284 : procNumber = INVALID_PROC_NUMBER;
5365 rhaas@postgresql.org 139 : 284 : needs_wal = false;
140 : 284 : break;
5381 141 : 109373 : case RELPERSISTENCE_PERMANENT:
552 heikki.linnakangas@i 142 : 109373 : procNumber = INVALID_PROC_NUMBER;
5381 rhaas@postgresql.org 143 : 109373 : needs_wal = true;
144 : 109373 : break;
5381 rhaas@postgresql.org 145 :UBC 0 : default:
146 [ # # ]: 0 : elog(ERROR, "invalid relpersistence: %c", relpersistence);
147 : : return NULL; /* placate compiler */
148 : : }
149 : :
552 heikki.linnakangas@i 150 :CBC 112819 : srel = smgropen(rlocator, procNumber);
6135 151 : 112819 : smgrcreate(srel, MAIN_FORKNUM, false);
152 : :
5381 rhaas@postgresql.org 153 [ + + ]: 112819 : if (needs_wal)
1158 154 : 109373 : log_smgrcreate(&srel->smgr_rlocator.locator, MAIN_FORKNUM);
155 : :
156 : : /*
157 : : * Add the relation to the list of stuff to delete at abort, if we are
158 : : * asked to do so.
159 : : */
1257 160 [ + + ]: 112819 : if (register_delete)
161 : : {
162 : : PendingRelDelete *pending;
163 : :
164 : : pending = (PendingRelDelete *)
165 : 59709 : MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
1158 166 : 59709 : pending->rlocator = rlocator;
552 heikki.linnakangas@i 167 : 59709 : pending->procNumber = procNumber;
1257 rhaas@postgresql.org 168 : 59709 : pending->atCommit = false; /* delete if abort */
169 : 59709 : pending->nestLevel = GetCurrentTransactionNestLevel();
170 : 59709 : pending->next = pendingDeletes;
171 : 59709 : pendingDeletes = pending;
172 : : }
173 : :
1981 noah@leadboat.com 174 [ + + + + ]: 112819 : if (relpersistence == RELPERSISTENCE_PERMANENT && !XLogIsNeeded())
175 : : {
552 heikki.linnakangas@i 176 [ - + ]: 38939 : Assert(procNumber == INVALID_PROC_NUMBER);
1158 rhaas@postgresql.org 177 : 38939 : AddPendingSync(&rlocator);
178 : : }
179 : :
2322 andres@anarazel.de 180 : 112819 : return srel;
181 : : }
182 : :
183 : : /*
184 : : * Perform XLogInsert of an XLOG_SMGR_CREATE record to WAL.
185 : : */
186 : : void
1158 rhaas@postgresql.org 187 : 127206 : log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum)
188 : : {
189 : : xl_smgr_create xlrec;
190 : :
191 : : /*
192 : : * Make an XLOG entry reporting the file creation.
193 : : */
194 : 127206 : xlrec.rlocator = *rlocator;
5365 195 : 127206 : xlrec.forkNum = forkNum;
196 : :
3943 heikki.linnakangas@i 197 : 127206 : XLogBeginInsert();
207 peter@eisentraut.org 198 : 127206 : XLogRegisterData(&xlrec, sizeof(xlrec));
3943 heikki.linnakangas@i 199 : 127206 : XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE | XLR_SPECIAL_REL_UPDATE);
5365 rhaas@postgresql.org 200 : 127206 : }
201 : :
202 : : /*
203 : : * RelationDropStorage
204 : : * Schedule unlinking of physical storage at transaction commit.
205 : : */
206 : : void
6135 heikki.linnakangas@i 207 : 36126 : RelationDropStorage(Relation rel)
208 : : {
209 : : PendingRelDelete *pending;
210 : :
211 : : /* Add the relation to the list of stuff to delete at commit */
212 : : pending = (PendingRelDelete *)
213 : 36126 : MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
1158 rhaas@postgresql.org 214 : 36126 : pending->rlocator = rel->rd_locator;
552 heikki.linnakangas@i 215 : 36126 : pending->procNumber = rel->rd_backend;
6135 216 : 36126 : pending->atCommit = true; /* delete if commit */
217 : 36126 : pending->nestLevel = GetCurrentTransactionNestLevel();
218 : 36126 : pending->next = pendingDeletes;
219 : 36126 : pendingDeletes = pending;
220 : :
221 : : /*
222 : : * NOTE: if the relation was created in this transaction, it will now be
223 : : * present in the pending-delete list twice, once with atCommit true and
224 : : * once with atCommit false. Hence, it will be physically deleted at end
225 : : * of xact in either case (and the other entry will be ignored by
226 : : * smgrDoPendingDeletes, so no error will occur). We could instead remove
227 : : * the existing list entry and delete the physical file immediately, but
228 : : * for now I'll keep the logic simple.
229 : : */
230 : :
231 : 36126 : RelationCloseSmgr(rel);
232 : 36126 : }
233 : :
234 : : /*
235 : : * RelationPreserveStorage
236 : : * Mark a relation as not to be deleted after all.
237 : : *
238 : : * We need this function because relation mapping changes are committed
239 : : * separately from commit of the whole transaction, so it's still possible
240 : : * for the transaction to abort after the mapping update is done.
241 : : * When a new physical relation is installed in the map, it would be
242 : : * scheduled for delete-on-abort, so we'd delete it, and be in trouble.
243 : : * The relation mapper fixes this by telling us to not delete such relations
244 : : * after all as part of its commit.
245 : : *
246 : : * We also use this to reuse an old build of an index during ALTER TABLE, this
247 : : * time removing the delete-at-commit entry.
248 : : *
249 : : * No-op if the relation is not among those scheduled for deletion.
250 : : */
251 : : void
1158 rhaas@postgresql.org 252 : 5792 : RelationPreserveStorage(RelFileLocator rlocator, bool atCommit)
253 : : {
254 : : PendingRelDelete *pending;
255 : : PendingRelDelete *prev;
256 : : PendingRelDelete *next;
257 : :
5690 tgl@sss.pgh.pa.us 258 : 5792 : prev = NULL;
259 [ + + ]: 33269 : for (pending = pendingDeletes; pending != NULL; pending = next)
260 : : {
261 : 27477 : next = pending->next;
1158 rhaas@postgresql.org 262 [ + + + - : 27477 : if (RelFileLocatorEquals(rlocator, pending->rlocator)
+ - ]
5164 263 [ + + ]: 498 : && pending->atCommit == atCommit)
264 : : {
265 : : /* unlink and delete list entry */
5690 tgl@sss.pgh.pa.us 266 [ + + ]: 495 : if (prev)
267 : 352 : prev->next = next;
268 : : else
269 : 143 : pendingDeletes = next;
270 : 495 : pfree(pending);
271 : : /* prev does not change */
272 : : }
273 : : else
274 : : {
275 : : /* unrelated entry, don't touch it */
276 : 26982 : prev = pending;
277 : : }
278 : : }
279 : 5792 : }
280 : :
281 : : /*
282 : : * RelationTruncate
283 : : * Physically truncate a relation to the specified number of blocks.
284 : : *
285 : : * This includes getting rid of any buffers for the blocks that are to be
286 : : * dropped.
287 : : */
288 : : void
6135 heikki.linnakangas@i 289 : 573 : RelationTruncate(Relation rel, BlockNumber nblocks)
290 : : {
291 : : bool fsm;
292 : : bool vm;
2174 fujii@postgresql.org 293 : 573 : bool need_fsm_vacuum = false;
294 : : ForkNumber forks[MAX_FORKNUM];
295 : : BlockNumber old_blocks[MAX_FORKNUM];
296 : : BlockNumber blocks[MAX_FORKNUM];
1941 tgl@sss.pgh.pa.us 297 : 573 : int nforks = 0;
298 : : SMgrRelation reln;
299 : :
300 : : /*
301 : : * Make sure smgr_targblock etc aren't pointing somewhere past new end.
302 : : * (Note: don't rely on this reln pointer below this loop.)
303 : : */
1517 304 : 573 : reln = RelationGetSmgr(rel);
305 : 573 : reln->smgr_targblock = InvalidBlockNumber;
1863 tmunro@postgresql.or 306 [ + + ]: 2865 : for (int i = 0; i <= MAX_FORKNUM; ++i)
1517 tgl@sss.pgh.pa.us 307 : 2292 : reln->smgr_cached_nblocks[i] = InvalidBlockNumber;
308 : :
309 : : /* Prepare for truncation of MAIN fork of the relation */
2174 fujii@postgresql.org 310 : 573 : forks[nforks] = MAIN_FORKNUM;
260 tmunro@postgresql.or 311 : 573 : old_blocks[nforks] = smgrnblocks(reln, MAIN_FORKNUM);
2174 fujii@postgresql.org 312 : 573 : blocks[nforks] = nblocks;
313 : 573 : nforks++;
314 : :
315 : : /* Prepare for truncation of the FSM if it exists */
1517 tgl@sss.pgh.pa.us 316 : 573 : fsm = smgrexists(RelationGetSmgr(rel), FSM_FORKNUM);
6135 heikki.linnakangas@i 317 [ + + ]: 573 : if (fsm)
318 : : {
2174 fujii@postgresql.org 319 : 172 : blocks[nforks] = FreeSpaceMapPrepareTruncateRel(rel, nblocks);
320 [ + - ]: 172 : if (BlockNumberIsValid(blocks[nforks]))
321 : : {
322 : 172 : forks[nforks] = FSM_FORKNUM;
260 tmunro@postgresql.or 323 : 172 : old_blocks[nforks] = smgrnblocks(reln, FSM_FORKNUM);
2174 fujii@postgresql.org 324 : 172 : nforks++;
325 : 172 : need_fsm_vacuum = true;
326 : : }
327 : : }
328 : :
329 : : /* Prepare for truncation of the visibility map too if it exists */
1517 tgl@sss.pgh.pa.us 330 : 573 : vm = smgrexists(RelationGetSmgr(rel), VISIBILITYMAP_FORKNUM);
6121 heikki.linnakangas@i 331 [ + + ]: 573 : if (vm)
332 : : {
2174 fujii@postgresql.org 333 : 172 : blocks[nforks] = visibilitymap_prepare_truncate(rel, nblocks);
334 [ + + ]: 172 : if (BlockNumberIsValid(blocks[nforks]))
335 : : {
336 : 61 : forks[nforks] = VISIBILITYMAP_FORKNUM;
260 tmunro@postgresql.or 337 : 61 : old_blocks[nforks] = smgrnblocks(reln, VISIBILITYMAP_FORKNUM);
2174 fujii@postgresql.org 338 : 61 : nforks++;
339 : : }
340 : : }
341 : :
1981 noah@leadboat.com 342 : 573 : RelationPreTruncate(rel);
343 : :
344 : : /*
345 : : * The code which follows can interact with concurrent checkpoints in two
346 : : * separate ways.
347 : : *
348 : : * First, the truncation operation might drop buffers that the checkpoint
349 : : * otherwise would have flushed. If it does, then it's essential that the
350 : : * files actually get truncated on disk before the checkpoint record is
351 : : * written. Otherwise, if reply begins from that checkpoint, the
352 : : * to-be-truncated blocks might still exist on disk but have older
353 : : * contents than expected, which can cause replay to fail. It's OK for the
354 : : * blocks to not exist on disk at all, but not for them to have the wrong
355 : : * contents. For this reason, we need to set DELAY_CHKPT_COMPLETE while
356 : : * this code executes.
357 : : *
358 : : * Second, the call to smgrtruncate() below will in turn call
359 : : * RegisterSyncRequest(). We need the sync request created by that call to
360 : : * be processed before the checkpoint completes. CheckPointGuts() will
361 : : * call ProcessSyncRequests(), but if we register our sync request after
362 : : * that happens, then the WAL record for the truncation could end up
363 : : * preceding the checkpoint record, while the actual sync doesn't happen
364 : : * until the next checkpoint. To prevent that, we need to set
365 : : * DELAY_CHKPT_START here. That way, if the XLOG_SMGR_TRUNCATE precedes
366 : : * the redo pointer of a concurrent checkpoint, we're guaranteed that the
367 : : * corresponding sync request will be processed before the checkpoint
368 : : * completes.
369 : : */
277 tmunro@postgresql.or 370 [ - + ]: 573 : Assert((MyProc->delayChkptFlags & (DELAY_CHKPT_START | DELAY_CHKPT_COMPLETE)) == 0);
371 : 573 : MyProc->delayChkptFlags |= DELAY_CHKPT_START | DELAY_CHKPT_COMPLETE;
372 : :
373 : : /*
374 : : * We WAL-log the truncation first and then truncate in a critical
375 : : * section. Truncation drops buffers, even if dirty, and then truncates
376 : : * disk files. All of that work needs to complete before the lock is
377 : : * released, or else old versions of pages on disk that are missing recent
378 : : * changes would become accessible again. We'll try the whole operation
379 : : * again in crash recovery if we panic, but even then we can't give up
380 : : * because we don't want standbys' relation sizes to diverge and break
381 : : * replay or visibility invariants downstream. The critical section also
382 : : * suppresses interrupts.
383 : : *
384 : : * (See also visibilitymap.c if changing this code.)
385 : : */
260 386 : 573 : START_CRIT_SECTION();
387 : :
5381 rhaas@postgresql.org 388 [ + + + + : 573 : if (RelationNeedsWAL(rel))
+ + + - ]
389 : : {
390 : : /*
391 : : * Make an XLOG entry reporting the file truncation.
392 : : */
393 : : XLogRecPtr lsn;
394 : : xl_smgr_truncate xlrec;
395 : :
6135 heikki.linnakangas@i 396 : 196 : xlrec.blkno = nblocks;
1158 rhaas@postgresql.org 397 : 196 : xlrec.rlocator = rel->rd_locator;
3368 398 : 196 : xlrec.flags = SMGR_TRUNCATE_ALL;
399 : :
3943 heikki.linnakangas@i 400 : 196 : XLogBeginInsert();
207 peter@eisentraut.org 401 : 196 : XLogRegisterData(&xlrec, sizeof(xlrec));
402 : :
3943 heikki.linnakangas@i 403 : 196 : lsn = XLogInsert(RM_SMGR_ID,
404 : : XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE);
405 : :
406 : : /*
407 : : * Flush, because otherwise the truncation of the main relation might
408 : : * hit the disk before the WAL record, and the truncation of the FSM
409 : : * or visibility map. If we crashed during that window, we'd be left
410 : : * with a truncated heap, but the FSM or visibility map would still
411 : : * contain entries for the non-existent heap pages, and standbys would
412 : : * also never replay the truncation.
413 : : */
260 tmunro@postgresql.or 414 : 196 : XLogFlush(lsn);
415 : : }
416 : :
417 : : /*
418 : : * This will first remove any buffers from the buffer pool that should no
419 : : * longer exist after truncation is complete, and then truncate the
420 : : * corresponding files on disk.
421 : : */
422 : 573 : smgrtruncate(RelationGetSmgr(rel), forks, nforks, old_blocks, blocks);
423 : :
424 [ - + ]: 573 : END_CRIT_SECTION();
425 : :
426 : : /* We've done all the critical work, so checkpoints are OK now. */
277 427 : 573 : MyProc->delayChkptFlags &= ~(DELAY_CHKPT_START | DELAY_CHKPT_COMPLETE);
428 : :
429 : : /*
430 : : * Update upper-level FSM pages to account for the truncation. This is
431 : : * important because the just-truncated pages were likely marked as
432 : : * all-free, and would be preferentially selected.
433 : : *
434 : : * NB: There's no point in delaying checkpoints until this is done.
435 : : * Because the FSM is not WAL-logged, we have to be prepared for the
436 : : * possibility of corruption after a crash anyway.
437 : : */
2174 fujii@postgresql.org 438 [ + + ]: 573 : if (need_fsm_vacuum)
439 : 172 : FreeSpaceMapVacuumRange(rel, nblocks, InvalidBlockNumber);
6135 heikki.linnakangas@i 440 : 573 : }
441 : :
442 : : /*
443 : : * RelationPreTruncate
444 : : * Perform AM-independent work before a physical truncation.
445 : : *
446 : : * If an access method's relation_nontransactional_truncate does not call
447 : : * RelationTruncate(), it must call this before decreasing the table size.
448 : : */
449 : : void
1981 noah@leadboat.com 450 : 573 : RelationPreTruncate(Relation rel)
451 : : {
452 : : PendingRelSync *pending;
453 : :
454 [ + + ]: 573 : if (!pendingSyncHash)
455 : 570 : return;
456 : :
1517 tgl@sss.pgh.pa.us 457 : 3 : pending = hash_search(pendingSyncHash,
1158 rhaas@postgresql.org 458 : 3 : &(RelationGetSmgr(rel)->smgr_rlocator.locator),
459 : : HASH_FIND, NULL);
1981 noah@leadboat.com 460 [ + - ]: 3 : if (pending)
461 : 3 : pending->is_truncated = true;
462 : : }
463 : :
464 : : /*
465 : : * Copy a fork's data, block by block.
466 : : *
467 : : * Note that this requires that there is no dirty data in shared buffers. If
468 : : * it's possible that there are, callers need to flush those using
469 : : * e.g. FlushRelationBuffers(rel).
470 : : *
471 : : * Also note that this is frequently called via locutions such as
472 : : * RelationCopyStorage(RelationGetSmgr(rel), ...);
473 : : * That's safe only because we perform only smgr and WAL operations here.
474 : : * If we invoked anything else, a relcache flush could cause our SMgrRelation
475 : : * argument to become a dangling pointer.
476 : : */
477 : : void
2354 andres@anarazel.de 478 : 89 : RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
479 : : ForkNumber forkNum, char relpersistence)
480 : : {
481 : : bool use_wal;
482 : : bool copying_initfork;
483 : : BlockNumber nblocks;
484 : : BlockNumber blkno;
485 : : BulkWriteState *bulkstate;
486 : :
487 : : /*
488 : : * The init fork for an unlogged relation in many respects has to be
489 : : * treated the same as normal relation, changes need to be WAL logged and
490 : : * it needs to be synced to disk.
491 : : */
492 [ - + - - ]: 89 : copying_initfork = relpersistence == RELPERSISTENCE_UNLOGGED &&
493 : : forkNum == INIT_FORKNUM;
494 : :
495 : : /*
496 : : * We need to log the copied data in WAL iff WAL archiving/streaming is
497 : : * enabled AND it's a permanent relation. This gives the same answer as
498 : : * "RelationNeedsWAL(rel) || copying_initfork", because we know the
499 : : * current operation created new relation storage.
500 : : */
501 [ + + + + ]: 97 : use_wal = XLogIsNeeded() &&
502 [ - + ]: 8 : (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork);
503 : :
561 heikki.linnakangas@i 504 : 89 : bulkstate = smgr_bulk_start_smgr(dst, forkNum, use_wal);
505 : :
2354 andres@anarazel.de 506 : 89 : nblocks = smgrnblocks(src, forkNum);
507 : :
508 [ + + ]: 686 : for (blkno = 0; blkno < nblocks; blkno++)
509 : : {
510 : : BulkWriteBuffer buf;
511 : : int piv_flags;
512 : : bool checksum_failure;
513 : : bool verified;
514 : :
515 : : /* If we got a cancel signal during the copy of the data, quit */
516 [ - + ]: 597 : CHECK_FOR_INTERRUPTS();
517 : :
561 heikki.linnakangas@i 518 : 597 : buf = smgr_bulk_get_buf(bulkstate);
519 : 597 : smgrread(src, forkNum, blkno, (Page) buf);
520 : :
160 andres@anarazel.de 521 : 597 : piv_flags = PIV_LOG_WARNING;
522 [ - + ]: 597 : if (ignore_checksum_failure)
160 andres@anarazel.de 523 :UBC 0 : piv_flags |= PIV_IGNORE_CHECKSUM_FAILURE;
160 andres@anarazel.de 524 :CBC 597 : verified = PageIsVerified((Page) buf, blkno, piv_flags,
525 : : &checksum_failure);
161 526 [ - + ]: 597 : if (checksum_failure)
527 : : {
161 andres@anarazel.de 528 :UBC 0 : RelFileLocatorBackend rloc = src->smgr_rlocator;
529 : :
160 530 : 0 : pgstat_prepare_report_checksum_failure(rloc.locator.dbOid);
161 531 : 0 : pgstat_report_checksum_failures_in_db(rloc.locator.dbOid, 1);
532 : : }
533 : :
161 andres@anarazel.de 534 [ - + ]:CBC 597 : if (!verified)
535 : : {
536 : : /*
537 : : * For paranoia's sake, capture the file path before invoking the
538 : : * ereport machinery. This guards against the possibility of a
539 : : * relcache flush caused by, e.g., an errcontext callback.
540 : : * (errcontext callbacks shouldn't be risking any such thing, but
541 : : * people have been known to forget that rule.)
542 : : */
193 andres@anarazel.de 543 :UBC 0 : RelPathStr relpath = relpathbackend(src->smgr_rlocator.locator,
544 : : src->smgr_rlocator.backend,
545 : : forkNum);
546 : :
2354 547 [ # # ]: 0 : ereport(ERROR,
548 : : (errcode(ERRCODE_DATA_CORRUPTED),
549 : : errmsg("invalid page in block %u of relation \"%s\"",
550 : : blkno, relpath.str)));
551 : : }
552 : :
553 : : /*
554 : : * Queue the page for WAL-logging and writing out. Unfortunately we
555 : : * don't know what kind of a page this is, so we have to log the full
556 : : * page including any unused space.
557 : : */
561 heikki.linnakangas@i 558 :CBC 597 : smgr_bulk_write(bulkstate, blkno, buf, false);
559 : : }
560 : 89 : smgr_bulk_finish(bulkstate);
2354 andres@anarazel.de 561 : 89 : }
562 : :
563 : : /*
564 : : * RelFileLocatorSkippingWAL
565 : : * Check if a BM_PERMANENT relfilelocator is using WAL.
566 : : *
567 : : * Changes to certain relations must not write WAL; see "Skipping WAL for
568 : : * New RelFileLocator" in src/backend/access/transam/README. Though it is
569 : : * known from Relation efficiently, this function is intended for the code
570 : : * paths not having access to Relation.
571 : : */
572 : : bool
1158 rhaas@postgresql.org 573 : 1086341 : RelFileLocatorSkippingWAL(RelFileLocator rlocator)
574 : : {
1981 noah@leadboat.com 575 [ + + + + ]: 2085055 : if (!pendingSyncHash ||
1158 rhaas@postgresql.org 576 : 998714 : hash_search(pendingSyncHash, &rlocator, HASH_FIND, NULL) == NULL)
1981 noah@leadboat.com 577 : 1069437 : return false;
578 : :
579 : 16904 : return true;
580 : : }
581 : :
582 : : /*
583 : : * EstimatePendingSyncsSpace
584 : : * Estimate space needed to pass syncs to parallel workers.
585 : : */
586 : : Size
587 : 456 : EstimatePendingSyncsSpace(void)
588 : : {
589 : : int64 entries;
590 : :
591 [ + + ]: 456 : entries = pendingSyncHash ? hash_get_num_entries(pendingSyncHash) : 0;
1158 rhaas@postgresql.org 592 : 456 : return mul_size(1 + entries, sizeof(RelFileLocator));
593 : : }
594 : :
595 : : /*
596 : : * SerializePendingSyncs
597 : : * Serialize syncs for parallel workers.
598 : : */
599 : : void
1981 noah@leadboat.com 600 : 456 : SerializePendingSyncs(Size maxSize, char *startAddress)
601 : : {
602 : : HTAB *tmphash;
603 : : HASHCTL ctl;
604 : : HASH_SEQ_STATUS scan;
605 : : PendingRelSync *sync;
606 : : PendingRelDelete *delete;
607 : : RelFileLocator *src;
1158 rhaas@postgresql.org 608 : 456 : RelFileLocator *dest = (RelFileLocator *) startAddress;
609 : :
1981 noah@leadboat.com 610 [ + + ]: 456 : if (!pendingSyncHash)
611 : 360 : goto terminate;
612 : :
613 : : /* Create temporary hash to collect active relfilelocators */
1158 rhaas@postgresql.org 614 : 96 : ctl.keysize = sizeof(RelFileLocator);
615 : 96 : ctl.entrysize = sizeof(RelFileLocator);
1981 noah@leadboat.com 616 : 96 : ctl.hcxt = CurrentMemoryContext;
1158 rhaas@postgresql.org 617 : 96 : tmphash = hash_create("tmp relfilelocators",
618 : : hash_get_num_entries(pendingSyncHash), &ctl,
619 : : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
620 : :
621 : : /* collect all rlocator from pending syncs */
1981 noah@leadboat.com 622 : 96 : hash_seq_init(&scan, pendingSyncHash);
623 [ + + ]: 827 : while ((sync = (PendingRelSync *) hash_seq_search(&scan)))
1158 rhaas@postgresql.org 624 : 731 : (void) hash_search(tmphash, &sync->rlocator, HASH_ENTER, NULL);
625 : :
626 : : /* remove deleted rnodes */
1981 noah@leadboat.com 627 [ + + ]: 997 : for (delete = pendingDeletes; delete != NULL; delete = delete->next)
628 [ + + ]: 901 : if (delete->atCommit)
943 peter@eisentraut.org 629 : 165 : (void) hash_search(tmphash, &delete->rlocator,
630 : : HASH_REMOVE, NULL);
631 : :
1981 noah@leadboat.com 632 : 96 : hash_seq_init(&scan, tmphash);
1158 rhaas@postgresql.org 633 [ + + ]: 666 : while ((src = (RelFileLocator *) hash_seq_search(&scan)))
1981 noah@leadboat.com 634 : 570 : *dest++ = *src;
635 : :
636 : 96 : hash_destroy(tmphash);
637 : :
638 : 456 : terminate:
1158 rhaas@postgresql.org 639 [ + + - + : 456 : MemSet(dest, 0, sizeof(RelFileLocator));
- - - - -
- ]
1981 noah@leadboat.com 640 : 456 : }
641 : :
642 : : /*
643 : : * RestorePendingSyncs
644 : : * Restore syncs within a parallel worker.
645 : : *
646 : : * RelationNeedsWAL() and RelFileLocatorSkippingWAL() must offer the correct
647 : : * answer to parallel workers. Only smgrDoPendingSyncs() reads the
648 : : * is_truncated field, at end of transaction. Hence, don't restore it.
649 : : */
650 : : void
651 : 1378 : RestorePendingSyncs(char *startAddress)
652 : : {
653 : : RelFileLocator *rlocator;
654 : :
655 [ - + ]: 1378 : Assert(pendingSyncHash == NULL);
1158 rhaas@postgresql.org 656 [ + + ]: 3176 : for (rlocator = (RelFileLocator *) startAddress; rlocator->relNumber != 0;
657 : 1798 : rlocator++)
658 : 1798 : AddPendingSync(rlocator);
1981 noah@leadboat.com 659 : 1378 : }
660 : :
661 : : /*
662 : : * smgrDoPendingDeletes() -- Take care of relation deletes at end of xact.
663 : : *
664 : : * This also runs when aborting a subxact; we want to clean up a failed
665 : : * subxact immediately.
666 : : *
667 : : * Note: It's possible that we're being asked to remove a relation that has
668 : : * no physical storage in any fork. In particular, it's possible that we're
669 : : * cleaning up an old temporary relation for which RemovePgTempFiles has
670 : : * already recovered the physical storage.
671 : : */
672 : : void
6135 heikki.linnakangas@i 673 : 321440 : smgrDoPendingDeletes(bool isCommit)
674 : : {
675 : 321440 : int nestLevel = GetCurrentTransactionNestLevel();
676 : : PendingRelDelete *pending;
677 : : PendingRelDelete *prev;
678 : : PendingRelDelete *next;
4615 alvherre@alvh.no-ip. 679 : 321440 : int nrels = 0,
4278 680 : 321440 : maxrels = 0;
681 : 321440 : SMgrRelation *srels = NULL;
682 : :
6135 heikki.linnakangas@i 683 : 321440 : prev = NULL;
684 [ + + ]: 420834 : for (pending = pendingDeletes; pending != NULL; pending = next)
685 : : {
686 : 99394 : next = pending->next;
687 [ + + ]: 99394 : if (pending->nestLevel < nestLevel)
688 : : {
689 : : /* outer-level entries should not be processed yet */
690 : 4114 : prev = pending;
691 : : }
692 : : else
693 : : {
694 : : /* unlink list entry first, so we don't retry on failure */
695 [ - + ]: 95280 : if (prev)
6135 heikki.linnakangas@i 696 :UBC 0 : prev->next = next;
697 : : else
6135 heikki.linnakangas@i 698 :CBC 95280 : pendingDeletes = next;
699 : : /* do deletion if called for */
700 [ + + ]: 95280 : if (pending->atCommit == isCommit)
701 : : {
702 : : SMgrRelation srel;
703 : :
552 704 : 37588 : srel = smgropen(pending->rlocator, pending->procNumber);
705 : :
706 : : /* allocate the initial array, or extend it, if needed */
4278 alvherre@alvh.no-ip. 707 [ + + ]: 37588 : if (maxrels == 0)
708 : : {
709 : 10935 : maxrels = 8;
4141 bruce@momjian.us 710 : 10935 : srels = palloc(sizeof(SMgrRelation) * maxrels);
711 : : }
4278 alvherre@alvh.no-ip. 712 [ + + ]: 26653 : else if (maxrels <= nrels)
713 : : {
4615 714 : 865 : maxrels *= 2;
715 : 865 : srels = repalloc(srels, sizeof(SMgrRelation) * maxrels);
716 : : }
717 : :
718 : 37588 : srels[nrels++] = srel;
719 : : }
720 : : /* must explicitly free the list entry */
6135 heikki.linnakangas@i 721 : 95280 : pfree(pending);
722 : : /* prev does not change */
723 : : }
724 : : }
725 : :
4615 alvherre@alvh.no-ip. 726 [ + + ]: 321440 : if (nrels > 0)
727 : : {
728 : 10935 : smgrdounlinkall(srels, nrels, false);
729 : :
1827 tgl@sss.pgh.pa.us 730 [ + + ]: 48523 : for (int i = 0; i < nrels; i++)
4615 alvherre@alvh.no-ip. 731 : 37588 : smgrclose(srels[i]);
732 : :
4278 733 : 10935 : pfree(srels);
734 : : }
6135 heikki.linnakangas@i 735 : 321440 : }
736 : :
737 : : /*
738 : : * smgrDoPendingSyncs() -- Take care of relation syncs at end of xact.
739 : : */
740 : : void
1981 noah@leadboat.com 741 : 317263 : smgrDoPendingSyncs(bool isCommit, bool isParallelWorker)
742 : : {
743 : : PendingRelDelete *pending;
744 : 317263 : int nrels = 0,
745 : 317263 : maxrels = 0;
746 : 317263 : SMgrRelation *srels = NULL;
747 : : HASH_SEQ_STATUS scan;
748 : : PendingRelSync *pendingsync;
749 : :
750 [ - + ]: 317263 : Assert(GetCurrentTransactionNestLevel() == 1);
751 : :
752 [ + + ]: 317263 : if (!pendingSyncHash)
753 : 311428 : return; /* no relation needs sync */
754 : :
755 : : /* Abort -- just throw away all pending syncs */
756 [ + + ]: 6340 : if (!isCommit)
757 : : {
758 : 306 : pendingSyncHash = NULL;
759 : 306 : return;
760 : : }
761 : :
762 : 6034 : AssertPendingSyncs_RelationCache();
763 : :
764 : : /* Parallel worker -- just throw away all pending syncs */
765 [ + + ]: 6034 : if (isParallelWorker)
766 : : {
767 : 199 : pendingSyncHash = NULL;
768 : 199 : return;
769 : : }
770 : :
771 : : /* Skip syncing nodes that smgrDoPendingDeletes() will delete. */
772 [ + + ]: 22727 : for (pending = pendingDeletes; pending != NULL; pending = pending->next)
773 [ + + ]: 16892 : if (pending->atCommit)
943 peter@eisentraut.org 774 : 3623 : (void) hash_search(pendingSyncHash, &pending->rlocator,
775 : : HASH_REMOVE, NULL);
776 : :
1981 noah@leadboat.com 777 : 5835 : hash_seq_init(&scan, pendingSyncHash);
778 [ + + ]: 43906 : while ((pendingsync = (PendingRelSync *) hash_seq_search(&scan)))
779 : : {
780 : : ForkNumber fork;
781 : : BlockNumber nblocks[MAX_FORKNUM + 1];
219 tgl@sss.pgh.pa.us 782 : 38071 : uint64 total_blocks = 0;
783 : : SMgrRelation srel;
784 : :
552 heikki.linnakangas@i 785 : 38071 : srel = smgropen(pendingsync->rlocator, INVALID_PROC_NUMBER);
786 : :
787 : : /*
788 : : * We emit newpage WAL records for smaller relations.
789 : : *
790 : : * Small WAL records have a chance to be flushed along with other
791 : : * backends' WAL records. We emit WAL records instead of syncing for
792 : : * files that are smaller than a certain threshold, expecting faster
793 : : * commit. The threshold is defined by the GUC wal_skip_threshold.
794 : : */
1981 noah@leadboat.com 795 [ + - ]: 38071 : if (!pendingsync->is_truncated)
796 : : {
797 [ + + ]: 190355 : for (fork = 0; fork <= MAX_FORKNUM; fork++)
798 : : {
799 [ + + ]: 152284 : if (smgrexists(srel, fork))
800 : : {
801 : 46549 : BlockNumber n = smgrnblocks(srel, fork);
802 : :
803 : : /* we shouldn't come here for unlogged relations */
804 [ - + ]: 46549 : Assert(fork != INIT_FORKNUM);
805 : 46549 : nblocks[fork] = n;
806 : 46549 : total_blocks += n;
807 : : }
808 : : else
809 : 105735 : nblocks[fork] = InvalidBlockNumber;
810 : : }
811 : : }
812 : :
813 : : /*
814 : : * Sync file or emit WAL records for its contents.
815 : : *
816 : : * Although we emit WAL record if the file is small enough, do file
817 : : * sync regardless of the size if the file has experienced a
818 : : * truncation. It is because the file would be followed by trailing
819 : : * garbage blocks after a crash recovery if, while a past longer file
820 : : * had been flushed out, we omitted syncing-out of the file and
821 : : * emitted WAL instead. You might think that we could choose WAL if
822 : : * the current main fork is longer than ever, but there's a case where
823 : : * main fork is longer than ever but FSM fork gets shorter.
824 : : */
825 [ + - ]: 38071 : if (pendingsync->is_truncated ||
219 tgl@sss.pgh.pa.us 826 [ + + ]: 38071 : total_blocks >= wal_skip_threshold * (uint64) 1024 / BLCKSZ)
827 : : {
828 : : /* allocate the initial array, or extend it, if needed */
1981 noah@leadboat.com 829 [ + - ]: 13 : if (maxrels == 0)
830 : : {
831 : 13 : maxrels = 8;
832 : 13 : srels = palloc(sizeof(SMgrRelation) * maxrels);
833 : : }
1981 noah@leadboat.com 834 [ # # ]:UBC 0 : else if (maxrels <= nrels)
835 : : {
836 : 0 : maxrels *= 2;
837 : 0 : srels = repalloc(srels, sizeof(SMgrRelation) * maxrels);
838 : : }
839 : :
1981 noah@leadboat.com 840 :CBC 13 : srels[nrels++] = srel;
841 : : }
842 : : else
843 : : {
844 : : /* Emit WAL records for all blocks. The file is small enough. */
845 [ + + ]: 190290 : for (fork = 0; fork <= MAX_FORKNUM; fork++)
846 : : {
847 : 152232 : int n = nblocks[fork];
848 : : Relation rel;
849 : :
850 [ + + ]: 152232 : if (!BlockNumberIsValid(n))
851 : 105700 : continue;
852 : :
853 : : /*
854 : : * Emit WAL for the whole file. Unfortunately we don't know
855 : : * what kind of a page this is, so we have to log the full
856 : : * page including any unused space. ReadBufferExtended()
857 : : * counts some pgstat events; unfortunately, we discard them.
858 : : */
1158 rhaas@postgresql.org 859 : 46532 : rel = CreateFakeRelcacheEntry(srel->smgr_rlocator.locator);
1981 noah@leadboat.com 860 : 46532 : log_newpage_range(rel, fork, 0, n, false);
861 : 46532 : FreeFakeRelcacheEntry(rel);
862 : : }
863 : : }
864 : : }
865 : :
866 : 5835 : pendingSyncHash = NULL;
867 : :
868 [ + + ]: 5835 : if (nrels > 0)
869 : : {
870 : 13 : smgrdosyncall(srels, nrels);
871 : 13 : pfree(srels);
872 : : }
873 : : }
874 : :
875 : : /*
876 : : * smgrGetPendingDeletes() -- Get a list of non-temp relations to be deleted.
877 : : *
878 : : * The return value is the number of relations scheduled for termination.
879 : : * *ptr is set to point to a freshly-palloc'd array of RelFileLocators.
880 : : * If there are no relations to be deleted, *ptr is set to NULL.
881 : : *
882 : : * Only non-temporary relations are included in the returned list. This is OK
883 : : * because the list is used only in contexts where temporary relations don't
884 : : * matter: we're either writing to the two-phase state file (and transactions
885 : : * that have touched temp tables can't be prepared) or we're writing to xlog
886 : : * (and all temporary files will be zapped if we restart anyway, so no need
887 : : * for redo to do it also).
888 : : *
889 : : * Note that the list does not include anything scheduled for termination
890 : : * by upper-level transactions.
891 : : */
892 : : int
1158 rhaas@postgresql.org 893 : 296961 : smgrGetPendingDeletes(bool forCommit, RelFileLocator **ptr)
894 : : {
6135 heikki.linnakangas@i 895 : 296961 : int nestLevel = GetCurrentTransactionNestLevel();
896 : : int nrels;
897 : : RelFileLocator *rptr;
898 : : PendingRelDelete *pending;
899 : :
900 : 296961 : nrels = 0;
901 [ + + ]: 394933 : for (pending = pendingDeletes; pending != NULL; pending = pending->next)
902 : : {
5503 rhaas@postgresql.org 903 [ + + + + ]: 97972 : if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit
552 heikki.linnakangas@i 904 [ + + ]: 37648 : && pending->procNumber == INVALID_PROC_NUMBER)
6135 905 : 34486 : nrels++;
906 : : }
907 [ + + ]: 296961 : if (nrels == 0)
908 : : {
909 : 286822 : *ptr = NULL;
910 : 286822 : return 0;
911 : : }
1158 rhaas@postgresql.org 912 : 10139 : rptr = (RelFileLocator *) palloc(nrels * sizeof(RelFileLocator));
6135 heikki.linnakangas@i 913 : 10139 : *ptr = rptr;
914 [ + + ]: 53025 : for (pending = pendingDeletes; pending != NULL; pending = pending->next)
915 : : {
5503 rhaas@postgresql.org 916 [ + + + + ]: 42886 : if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit
552 heikki.linnakangas@i 917 [ + + ]: 34560 : && pending->procNumber == INVALID_PROC_NUMBER)
918 : : {
1158 rhaas@postgresql.org 919 : 34486 : *rptr = pending->rlocator;
6135 heikki.linnakangas@i 920 : 34486 : rptr++;
921 : : }
922 : : }
923 : 10139 : return nrels;
924 : : }
925 : :
926 : : /*
927 : : * PostPrepare_smgr -- Clean up after a successful PREPARE
928 : : *
929 : : * What we have to do here is throw away the in-memory state about pending
930 : : * relation deletes. It's all been recorded in the 2PC state file and
931 : : * it's no longer smgr's job to worry about it.
932 : : */
933 : : void
934 : 287 : PostPrepare_smgr(void)
935 : : {
936 : : PendingRelDelete *pending;
937 : : PendingRelDelete *next;
938 : :
939 [ + + ]: 347 : for (pending = pendingDeletes; pending != NULL; pending = next)
940 : : {
941 : 60 : next = pending->next;
942 : 60 : pendingDeletes = next;
943 : : /* must explicitly free the list entry */
944 : 60 : pfree(pending);
945 : : }
946 : 287 : }
947 : :
948 : :
949 : : /*
950 : : * AtSubCommit_smgr() --- Take care of subtransaction commit.
951 : : *
952 : : * Reassign all items in the pending-deletes list to the parent transaction.
953 : : */
954 : : void
955 : 4423 : AtSubCommit_smgr(void)
956 : : {
957 : 4423 : int nestLevel = GetCurrentTransactionNestLevel();
958 : : PendingRelDelete *pending;
959 : :
960 [ + + ]: 4651 : for (pending = pendingDeletes; pending != NULL; pending = pending->next)
961 : : {
962 [ + + ]: 228 : if (pending->nestLevel >= nestLevel)
963 : 107 : pending->nestLevel = nestLevel - 1;
964 : : }
965 : 4423 : }
966 : :
967 : : /*
968 : : * AtSubAbort_smgr() --- Take care of subtransaction abort.
969 : : *
970 : : * Delete created relations and forget about deleted relations.
971 : : * We can execute these operations immediately because we know this
972 : : * subtransaction will not commit.
973 : : */
974 : : void
975 : 4667 : AtSubAbort_smgr(void)
976 : : {
977 : 4667 : smgrDoPendingDeletes(false);
978 : 4667 : }
979 : :
980 : : void
3943 981 : 15259 : smgr_redo(XLogReaderState *record)
982 : : {
983 : 15259 : XLogRecPtr lsn = record->EndRecPtr;
984 : 15259 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
985 : :
986 : : /* Backup blocks are not used in smgr records */
987 [ - + ]: 15259 : Assert(!XLogRecHasAnyBlockRefs(record));
988 : :
6135 989 [ + + ]: 15259 : if (info == XLOG_SMGR_CREATE)
990 : : {
991 : 15206 : xl_smgr_create *xlrec = (xl_smgr_create *) XLogRecGetData(record);
992 : : SMgrRelation reln;
993 : :
552 994 : 15206 : reln = smgropen(xlrec->rlocator, INVALID_PROC_NUMBER);
5365 rhaas@postgresql.org 995 : 15206 : smgrcreate(reln, xlrec->forkNum, true);
996 : : }
6135 heikki.linnakangas@i 997 [ + - ]: 53 : else if (info == XLOG_SMGR_TRUNCATE)
998 : : {
999 : 53 : xl_smgr_truncate *xlrec = (xl_smgr_truncate *) XLogRecGetData(record);
1000 : : SMgrRelation reln;
1001 : : Relation rel;
1002 : : ForkNumber forks[MAX_FORKNUM];
1003 : : BlockNumber blocks[MAX_FORKNUM];
1004 : : BlockNumber old_blocks[MAX_FORKNUM];
1941 tgl@sss.pgh.pa.us 1005 : 53 : int nforks = 0;
2174 fujii@postgresql.org 1006 : 53 : bool need_fsm_vacuum = false;
1007 : :
552 heikki.linnakangas@i 1008 : 53 : reln = smgropen(xlrec->rlocator, INVALID_PROC_NUMBER);
1009 : :
1010 : : /*
1011 : : * Forcibly create relation if it doesn't exist (which suggests that
1012 : : * it was dropped somewhere later in the WAL sequence). As in
1013 : : * XLogReadBufferForRedo, we prefer to recreate the rel and replay the
1014 : : * log as best we can until the drop is seen.
1015 : : */
6135 1016 : 53 : smgrcreate(reln, MAIN_FORKNUM, true);
1017 : :
1018 : : /*
1019 : : * Before we perform the truncation, update minimum recovery point to
1020 : : * cover this WAL record. Once the relation is truncated, there's no
1021 : : * going back. The buffer manager enforces the WAL-first rule for
1022 : : * normal updates to relation files, so that the minimum recovery
1023 : : * point is always updated before the corresponding change in the data
1024 : : * file is flushed to disk. We have to do the same manually here.
1025 : : *
1026 : : * Doing this before the truncation means that if the truncation fails
1027 : : * for some reason, you cannot start up the system even after restart,
1028 : : * until you fix the underlying situation so that the truncation will
1029 : : * succeed. Alternatively, we could update the minimum recovery point
1030 : : * after truncation, but that would leave a small window where the
1031 : : * WAL-first rule could be violated.
1032 : : */
4653 1033 : 53 : XLogFlush(lsn);
1034 : :
1035 : : /* Prepare for truncation of MAIN fork */
3368 rhaas@postgresql.org 1036 [ + - ]: 53 : if ((xlrec->flags & SMGR_TRUNCATE_HEAP) != 0)
1037 : : {
2174 fujii@postgresql.org 1038 : 53 : forks[nforks] = MAIN_FORKNUM;
260 tmunro@postgresql.or 1039 : 53 : old_blocks[nforks] = smgrnblocks(reln, MAIN_FORKNUM);
2174 fujii@postgresql.org 1040 : 53 : blocks[nforks] = xlrec->blkno;
1041 : 53 : nforks++;
1042 : :
1043 : : /* Also tell xlogutils.c about it */
1158 rhaas@postgresql.org 1044 : 53 : XLogTruncateRelation(xlrec->rlocator, MAIN_FORKNUM, xlrec->blkno);
1045 : : }
1046 : :
1047 : : /* Prepare for truncation of FSM and VM too */
1048 : 53 : rel = CreateFakeRelcacheEntry(xlrec->rlocator);
1049 : :
3368 1050 [ + - + + ]: 106 : if ((xlrec->flags & SMGR_TRUNCATE_FSM) != 0 &&
1051 : 53 : smgrexists(reln, FSM_FORKNUM))
1052 : : {
2174 fujii@postgresql.org 1053 : 35 : blocks[nforks] = FreeSpaceMapPrepareTruncateRel(rel, xlrec->blkno);
1054 [ + - ]: 35 : if (BlockNumberIsValid(blocks[nforks]))
1055 : : {
1056 : 35 : forks[nforks] = FSM_FORKNUM;
260 tmunro@postgresql.or 1057 : 35 : old_blocks[nforks] = smgrnblocks(reln, FSM_FORKNUM);
2174 fujii@postgresql.org 1058 : 35 : nforks++;
1059 : 35 : need_fsm_vacuum = true;
1060 : : }
1061 : : }
3368 rhaas@postgresql.org 1062 [ + - + + ]: 106 : if ((xlrec->flags & SMGR_TRUNCATE_VM) != 0 &&
1063 : 53 : smgrexists(reln, VISIBILITYMAP_FORKNUM))
1064 : : {
2174 fujii@postgresql.org 1065 : 30 : blocks[nforks] = visibilitymap_prepare_truncate(rel, xlrec->blkno);
1066 [ + + ]: 30 : if (BlockNumberIsValid(blocks[nforks]))
1067 : : {
1068 : 12 : forks[nforks] = VISIBILITYMAP_FORKNUM;
260 tmunro@postgresql.or 1069 : 12 : old_blocks[nforks] = smgrnblocks(reln, VISIBILITYMAP_FORKNUM);
2174 fujii@postgresql.org 1070 : 12 : nforks++;
1071 : : }
1072 : : }
1073 : :
1074 : : /* Do the real work to truncate relation forks */
1075 [ + - ]: 53 : if (nforks > 0)
1076 : : {
260 tmunro@postgresql.or 1077 : 53 : START_CRIT_SECTION();
1078 : 53 : smgrtruncate(reln, forks, nforks, old_blocks, blocks);
1079 [ - + ]: 53 : END_CRIT_SECTION();
1080 : : }
1081 : :
1082 : : /*
1083 : : * Update upper-level FSM pages to account for the truncation. This is
1084 : : * important because the just-truncated pages were likely marked as
1085 : : * all-free, and would be preferentially selected.
1086 : : */
2174 fujii@postgresql.org 1087 [ + + ]: 53 : if (need_fsm_vacuum)
1088 : 35 : FreeSpaceMapVacuumRange(rel, xlrec->blkno,
1089 : : InvalidBlockNumber);
1090 : :
5688 tgl@sss.pgh.pa.us 1091 : 53 : FreeFakeRelcacheEntry(rel);
1092 : : }
1093 : : else
6135 heikki.linnakangas@i 1094 [ # # ]:UBC 0 : elog(PANIC, "smgr_redo: unknown op code %u", info);
6135 heikki.linnakangas@i 1095 :CBC 15259 : }
|