Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * pruneheap.c
4 : : * heap page pruning and HOT-chain management code
5 : : *
6 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/access/heap/pruneheap.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : #include "postgres.h"
16 : :
17 : : #include "access/heapam.h"
18 : : #include "access/heapam_xlog.h"
19 : : #include "access/htup_details.h"
20 : : #include "access/multixact.h"
21 : : #include "access/transam.h"
22 : : #include "access/visibilitymap.h"
23 : : #include "access/xlog.h"
24 : : #include "access/xloginsert.h"
25 : : #include "commands/vacuum.h"
26 : : #include "executor/instrument.h"
27 : : #include "miscadmin.h"
28 : : #include "pgstat.h"
29 : : #include "storage/bufmgr.h"
30 : : #include "utils/rel.h"
31 : : #include "utils/snapmgr.h"
32 : :
33 : : /* Working data for heap_page_prune_and_freeze() and subroutines */
34 : : typedef struct
35 : : {
36 : : /*-------------------------------------------------------
37 : : * Arguments passed to heap_page_prune_and_freeze()
38 : : *-------------------------------------------------------
39 : : */
40 : :
41 : : /* tuple visibility test, initialized for the relation */
42 : : GlobalVisState *vistest;
43 : : /* whether or not dead items can be set LP_UNUSED during pruning */
44 : : bool mark_unused_now;
45 : : /* whether to attempt freezing tuples */
46 : : bool attempt_freeze;
47 : : /* whether to attempt setting the VM */
48 : : bool attempt_set_vm;
49 : : struct VacuumCutoffs *cutoffs;
50 : : Relation relation;
51 : :
52 : : /*
53 : : * Keep the buffer, block, and page handy so that helpers needing to
54 : : * access them don't need to make repeated calls to BufferGetBlockNumber()
55 : : * and BufferGetPage().
56 : : */
57 : : BlockNumber block;
58 : : Buffer buffer;
59 : : Page page;
60 : :
61 : : /*-------------------------------------------------------
62 : : * Fields describing what to do to the page
63 : : *-------------------------------------------------------
64 : : */
65 : : TransactionId new_prune_xid; /* new prune hint value */
66 : : TransactionId latest_xid_removed;
67 : : int nredirected; /* numbers of entries in arrays below */
68 : : int ndead;
69 : : int nunused;
70 : : int nfrozen;
71 : : /* arrays that accumulate indexes of items to be changed */
72 : : OffsetNumber redirected[MaxHeapTuplesPerPage * 2];
73 : : OffsetNumber nowdead[MaxHeapTuplesPerPage];
74 : : OffsetNumber nowunused[MaxHeapTuplesPerPage];
75 : : HeapTupleFreeze frozen[MaxHeapTuplesPerPage];
76 : :
77 : : /*
78 : : * set_all_visible and set_all_frozen indicate if the all-visible and
79 : : * all-frozen bits in the visibility map can be set for this page after
80 : : * pruning. They are only tracked when the caller requests VM updates
81 : : * (attempt_set_vm); otherwise they remain false throughout.
82 : : *
83 : : * NOTE: set_all_visible and set_all_frozen initially don't include
84 : : * LP_DEAD items. That's convenient for heap_page_prune_and_freeze() to
85 : : * use them to decide whether to opportunistically freeze the page or not.
86 : : * The set_all_visible and set_all_frozen values ultimately used to set
87 : : * the VM are adjusted to include LP_DEAD items after we determine whether
88 : : * or not to opportunistically freeze.
89 : : */
90 : : bool set_all_visible;
91 : : bool set_all_frozen;
92 : :
93 : : /*-------------------------------------------------------
94 : : * Working state for HOT chain processing
95 : : *-------------------------------------------------------
96 : : */
97 : :
98 : : /*
99 : : * 'root_items' contains offsets of all LP_REDIRECT line pointers and
100 : : * normal non-HOT tuples. They can be stand-alone items or the first item
101 : : * in a HOT chain. 'heaponly_items' contains heap-only tuples which can
102 : : * only be removed as part of a HOT chain.
103 : : */
104 : : int nroot_items;
105 : : OffsetNumber root_items[MaxHeapTuplesPerPage];
106 : : int nheaponly_items;
107 : : OffsetNumber heaponly_items[MaxHeapTuplesPerPage];
108 : :
109 : : /*
110 : : * processed[offnum] is true if item at offnum has been processed.
111 : : *
112 : : * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
113 : : * 1. Otherwise every access would need to subtract 1.
114 : : */
115 : : bool processed[MaxHeapTuplesPerPage + 1];
116 : :
117 : : /*
118 : : * Tuple visibility is only computed once for each tuple, for correctness
119 : : * and efficiency reasons; see comment in heap_page_prune_and_freeze() for
120 : : * details. This is of type int8[], instead of HTSV_Result[], so we can
121 : : * use -1 to indicate no visibility has been computed, e.g. for LP_DEAD
122 : : * items.
123 : : *
124 : : * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
125 : : * 1. Otherwise every access would need to subtract 1.
126 : : */
127 : : int8 htsv[MaxHeapTuplesPerPage + 1];
128 : :
129 : : /*-------------------------------------------------------
130 : : * Working state for freezing
131 : : *-------------------------------------------------------
132 : : */
133 : : HeapPageFreeze pagefrz;
134 : :
135 : : /*-------------------------------------------------------
136 : : * Working state for visibility map processing
137 : : *-------------------------------------------------------
138 : : */
139 : :
140 : : /*
141 : : * Caller must provide a pinned vmbuffer corresponding to the heap block
142 : : * passed to heap_page_prune_and_freeze(). We will fix any corruption
143 : : * found in the VM and set the VM if the page is all-visible/all-frozen.
144 : : */
145 : : Buffer vmbuffer;
146 : :
147 : : /*
148 : : * The state of the VM bits at the beginning of pruning and the state they
149 : : * will be in at the end.
150 : : */
151 : : uint8 old_vmbits;
152 : : uint8 new_vmbits;
153 : :
154 : : /* The newest xmin of live tuples on the page */
155 : : TransactionId newest_live_xid;
156 : :
157 : : /*-------------------------------------------------------
158 : : * Information about what was done
159 : : *
160 : : * These fields are not used by pruning itself for the most part, but are
161 : : * used to collect information about what was pruned and what state the
162 : : * page is in after pruning, for the benefit of the caller. They are
163 : : * copied to the caller's PruneFreezeResult at the end.
164 : : * -------------------------------------------------------
165 : : */
166 : :
167 : : int ndeleted; /* Number of tuples deleted from the page */
168 : :
169 : : /* Number of live and recently dead tuples, after pruning */
170 : : int live_tuples;
171 : : int recently_dead_tuples;
172 : :
173 : : /* Whether or not the page makes rel truncation unsafe */
174 : : bool hastup;
175 : :
176 : : /*
177 : : * LP_DEAD items on the page after pruning. Includes existing LP_DEAD
178 : : * items
179 : : */
180 : : int lpdead_items; /* number of items in the array */
181 : : OffsetNumber *deadoffsets; /* points directly to presult->deadoffsets */
182 : : } PruneState;
183 : :
184 : : /*
185 : : * Type of visibility map corruption detected on a heap page and its
186 : : * associated VM page. Passed to heap_page_fix_vm_corruption() so the caller
187 : : * can specify what it found rather than having the function rederive the
188 : : * corruption from page state.
189 : : */
190 : : typedef enum VMCorruptionType
191 : : {
192 : : /* VM bits are set but the heap page-level PD_ALL_VISIBLE flag is not */
193 : : VM_CORRUPT_MISSING_PAGE_HINT,
194 : : /* LP_DEAD line pointers found on a page marked all-visible */
195 : : VM_CORRUPT_LPDEAD,
196 : : /* Tuple not visible to all transactions on a page marked all-visible */
197 : : VM_CORRUPT_TUPLE_VISIBILITY,
198 : : } VMCorruptionType;
199 : :
200 : : /* Local functions */
201 : : static void prune_freeze_setup(PruneFreezeParams *params,
202 : : TransactionId *new_relfrozen_xid,
203 : : MultiXactId *new_relmin_mxid,
204 : : PruneFreezeResult *presult,
205 : : PruneState *prstate);
206 : : static void heap_page_fix_vm_corruption(PruneState *prstate,
207 : : OffsetNumber offnum,
208 : : VMCorruptionType corruption_type);
209 : : static void prune_freeze_fast_path(PruneState *prstate,
210 : : PruneFreezeResult *presult);
211 : : static void prune_freeze_plan(PruneState *prstate,
212 : : OffsetNumber *off_loc);
213 : : static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate,
214 : : HeapTuple tup);
215 : : static inline HTSV_Result htsv_get_valid_status(int status);
216 : : static void heap_prune_chain(OffsetNumber maxoff,
217 : : OffsetNumber rootoffnum, PruneState *prstate);
218 : : static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid,
219 : : OffsetNumber offnum);
220 : : static void heap_prune_record_redirect(PruneState *prstate,
221 : : OffsetNumber offnum, OffsetNumber rdoffnum,
222 : : bool was_normal);
223 : : static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum,
224 : : bool was_normal);
225 : : static void heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum,
226 : : bool was_normal);
227 : : static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal);
228 : :
229 : : static void heap_prune_record_unchanged_lp_unused(PruneState *prstate, OffsetNumber offnum);
230 : : static void heap_prune_record_unchanged_lp_normal(PruneState *prstate, OffsetNumber offnum);
231 : : static void heap_prune_record_unchanged_lp_dead(PruneState *prstate, OffsetNumber offnum);
232 : : static void heap_prune_record_unchanged_lp_redirect(PruneState *prstate, OffsetNumber offnum);
233 : :
234 : : static void page_verify_redirects(Page page);
235 : :
236 : : static bool heap_page_will_freeze(bool did_tuple_hint_fpi, bool do_prune, bool do_hint_prune,
237 : : PruneState *prstate);
238 : : static bool heap_page_will_set_vm(PruneState *prstate, PruneReason reason,
239 : : bool do_prune, bool do_freeze);
240 : :
241 : :
242 : : /*
243 : : * Optionally prune and repair fragmentation in the specified page.
244 : : *
245 : : * This is an opportunistic function. It will perform housekeeping
246 : : * only if the page heuristically looks like a candidate for pruning and we
247 : : * can acquire buffer cleanup lock without blocking.
248 : : *
249 : : * Note: this is called quite often. It's important that it fall out quickly
250 : : * if there's not any use in pruning.
251 : : *
252 : : * Caller must have pin on the buffer, and must *not* have a lock on it.
253 : : *
254 : : * This function may pin *vmbuffer. It's passed by reference so the caller can
255 : : * reuse the pin across calls, avoiding repeated pin/unpin cycles. If we find
256 : : * VM corruption during pruning, we will fix it. Caller is responsible for
257 : : * unpinning *vmbuffer.
258 : : *
259 : : * rel_read_only is true if we determined at plan time that the query does not
260 : : * modify the relation. It is counterproductive to set the VM if the query
261 : : * will immediately clear it.
262 : : *
263 : : * As noted in ScanRelIsReadOnly(), INSERT ... SELECT from the same table will
264 : : * report the scan relation as read-only. This is usually harmless in
265 : : * practice. It is useful to set scanned pages all-visible that won't be
266 : : * inserted into. Pages it does insert to will rarely meet the criteria for
267 : : * pruning, and those that do are likely to contain in-progress inserts which
268 : : * make the page not fully all-visible.
269 : : */
270 : : void
36 melanieplageman@gmai 271 :GNC 19775485 : heap_page_prune_opt(Relation relation, Buffer buffer, Buffer *vmbuffer,
272 : : bool rel_read_only)
273 : : {
3667 kgrittn@postgresql.o 274 :CBC 19775485 : Page page = BufferGetPage(buffer);
275 : : TransactionId prune_xid;
276 : : GlobalVisState *vistest;
277 : : Size minfree;
278 : :
279 : : /*
280 : : * We can't write WAL in recovery mode, so there's no point trying to
281 : : * clean the page. The primary will likely issue a cleaning WAL record
282 : : * soon anyway, so this is no particular loss.
283 : : */
4446 rhaas@postgresql.org 284 [ + + ]: 19775485 : if (RecoveryInProgress())
285 : 259806 : return;
286 : :
287 : : /*
288 : : * First check whether there's any chance there's something to prune,
289 : : * determining the appropriate horizon is a waste if there's no prune_xid
290 : : * (i.e. no updates/deletes left potentially dead tuples around and no
291 : : * inserts inserted new tuples that may be visible to all).
292 : : */
61 melanieplageman@gmai 293 :GNC 19515679 : prune_xid = PageGetPruneXid(page);
2092 andres@anarazel.de 294 [ + + ]:CBC 19515679 : if (!TransactionIdIsValid(prune_xid))
295 : 8043410 : return;
296 : :
297 : : /*
298 : : * Check whether prune_xid indicates that there may be dead rows that can
299 : : * be cleaned up.
300 : : */
301 : 11472269 : vistest = GlobalVisTestFor(relation);
302 : :
42 melanieplageman@gmai 303 [ + + ]:GNC 11472269 : if (!GlobalVisTestIsRemovableXid(vistest, prune_xid, true))
973 tmunro@postgresql.or 304 :CBC 9157852 : return;
305 : :
306 : : /*
307 : : * We prune when a previous UPDATE failed to find enough space on the page
308 : : * for a new tuple version, or when free space falls below the relation's
309 : : * fill-factor target (but not less than 10%).
310 : : *
311 : : * Checking free space here is questionable since we aren't holding any
312 : : * lock on the buffer; in the worst case we could get a bogus answer. It's
313 : : * unlikely to be *seriously* wrong, though, since reading either pd_lower
314 : : * or pd_upper is probably atomic. Avoiding taking a lock seems more
315 : : * important than sometimes getting a wrong answer in what is after all
316 : : * just a heuristic estimate.
317 : : */
754 akorotkov@postgresql 318 [ + + ]: 2314417 : minfree = RelationGetTargetPageFreeSpace(relation,
319 : : HEAP_DEFAULT_FILLFACTOR);
6802 tgl@sss.pgh.pa.us 320 : 2314417 : minfree = Max(minfree, BLCKSZ / 10);
321 : :
6505 322 [ + + + + ]: 2314417 : if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
323 : : {
324 : : /* OK, try to get exclusive buffer lock */
6802 325 [ + + ]: 130826 : if (!ConditionalLockBufferForCleanup(buffer))
326 : 2094 : return;
327 : :
328 : : /*
329 : : * Now that we have buffer lock, get accurate information about the
330 : : * page's free space, and recheck the heuristic about whether to
331 : : * prune.
332 : : */
6505 333 [ + + + - ]: 128732 : if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
334 : : {
335 : : OffsetNumber dummy_off_loc;
336 : : PruneFreezeResult presult;
337 : : PruneFreezeParams params;
338 : :
44 melanieplageman@gmai 339 :GNC 128732 : visibilitymap_pin(relation, BufferGetBlockNumber(buffer),
340 : : vmbuffer);
341 : :
342 : 128732 : params.relation = relation;
343 : 128732 : params.buffer = buffer;
344 : 128732 : params.vmbuffer = *vmbuffer;
345 : 128732 : params.reason = PRUNE_ON_ACCESS;
346 : 128732 : params.vistest = vistest;
347 : 128732 : params.cutoffs = NULL;
348 : :
349 : : /*
350 : : * We don't pass the HEAP_PAGE_PRUNE_MARK_UNUSED_NOW option
351 : : * regardless of whether or not the relation has indexes, since we
352 : : * cannot safely determine that during on-access pruning with the
353 : : * current implementation.
354 : : */
355 : 128732 : params.options = HEAP_PAGE_PRUNE_ALLOW_FAST_PATH;
36 356 [ + + ]: 128732 : if (rel_read_only)
357 : 38533 : params.options |= HEAP_PAGE_PRUNE_SET_VM;
358 : :
166 359 : 128732 : heap_page_prune_and_freeze(¶ms, &presult, &dummy_off_loc,
360 : : NULL, NULL);
361 : :
362 : : /*
363 : : * Report the number of tuples reclaimed to pgstats. This is
364 : : * presult.ndeleted minus the number of newly-LP_DEAD-set items.
365 : : *
366 : : * We derive the number of dead tuples like this to avoid totally
367 : : * forgetting about items that were set to LP_DEAD, since they
368 : : * still need to be cleaned up by VACUUM. We only want to count
369 : : * heap-only tuples that just became LP_UNUSED in our report,
370 : : * which don't.
371 : : *
372 : : * VACUUM doesn't have to compensate in the same way when it
373 : : * tracks ndeleted, since it will set the same LP_DEAD items to
374 : : * LP_UNUSED separately.
375 : : */
950 rhaas@postgresql.org 376 [ + + ]:CBC 128732 : if (presult.ndeleted > presult.nnewlpdead)
1635 pg@bowt.ie 377 : 21321 : pgstat_update_heap_dead_tuples(relation,
950 rhaas@postgresql.org 378 : 21321 : presult.ndeleted - presult.nnewlpdead);
379 : : }
380 : :
381 : : /* And release buffer lock */
6802 tgl@sss.pgh.pa.us 382 : 128732 : LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
383 : :
384 : : /*
385 : : * We avoid reuse of any free space created on the page by unrelated
386 : : * UPDATEs/INSERTs by opting to not update the FSM at this point. The
387 : : * free space should be reused by UPDATEs to *this* page.
388 : : */
389 : : }
390 : : }
391 : :
392 : : /*
393 : : * Helper for heap_page_prune_and_freeze() to initialize the PruneState using
394 : : * the provided parameters.
395 : : *
396 : : * params, new_relfrozen_xid, new_relmin_mxid, and presult are input
397 : : * parameters and are not modified by this function. Only prstate is modified.
398 : : */
399 : : static void
160 melanieplageman@gmai 400 :GNC 219361 : prune_freeze_setup(PruneFreezeParams *params,
401 : : TransactionId *new_relfrozen_xid,
402 : : MultiXactId *new_relmin_mxid,
403 : : PruneFreezeResult *presult,
404 : : PruneState *prstate)
405 : : {
406 : : /* Copy parameters to prstate */
407 : 219361 : prstate->vistest = params->vistest;
408 : 219361 : prstate->mark_unused_now =
166 409 : 219361 : (params->options & HEAP_PAGE_PRUNE_MARK_UNUSED_NOW) != 0;
410 : :
411 : : /* cutoffs must be provided if we will attempt freezing */
161 412 [ + + - + ]: 219361 : Assert(!(params->options & HEAP_PAGE_PRUNE_FREEZE) || params->cutoffs);
160 413 : 219361 : prstate->attempt_freeze = (params->options & HEAP_PAGE_PRUNE_FREEZE) != 0;
36 414 : 219361 : prstate->attempt_set_vm = (params->options & HEAP_PAGE_PRUNE_SET_VM) != 0;
160 415 : 219361 : prstate->cutoffs = params->cutoffs;
61 416 : 219361 : prstate->relation = params->relation;
417 : 219361 : prstate->block = BufferGetBlockNumber(params->buffer);
418 : 219361 : prstate->buffer = params->buffer;
419 : 219361 : prstate->page = BufferGetPage(params->buffer);
420 : :
44 421 [ - + ]: 219361 : Assert(BufferIsValid(params->vmbuffer));
422 : 219361 : prstate->vmbuffer = params->vmbuffer;
42 423 : 219361 : prstate->new_vmbits = 0;
44 424 : 219361 : prstate->old_vmbits = visibilitymap_get_status(prstate->relation,
425 : : prstate->block,
426 : : &prstate->vmbuffer);
427 : :
428 : : /*
429 : : * Our strategy is to scan the page and make lists of items to change,
430 : : * then apply the changes within a critical section. This keeps as much
431 : : * logic as possible out of the critical section, and also ensures that
432 : : * WAL replay will work the same as the normal case.
433 : : *
434 : : * First, initialize the new pd_prune_xid value to zero (indicating no
435 : : * prunable tuples). If we find any tuples which may soon become
436 : : * prunable, we will save the lowest relevant XID in new_prune_xid. Also
437 : : * initialize the rest of our working state.
438 : : */
160 439 : 219361 : prstate->new_prune_xid = InvalidTransactionId;
440 : 219361 : prstate->latest_xid_removed = InvalidTransactionId;
441 : 219361 : prstate->nredirected = prstate->ndead = prstate->nunused = 0;
442 : 219361 : prstate->nfrozen = 0;
443 : 219361 : prstate->nroot_items = 0;
444 : 219361 : prstate->nheaponly_items = 0;
445 : :
446 : : /* initialize page freezing working state */
447 : 219361 : prstate->pagefrz.freeze_required = false;
56 448 : 219361 : prstate->pagefrz.FreezePageConflictXid = InvalidTransactionId;
160 449 [ + + ]: 219361 : if (prstate->attempt_freeze)
450 : : {
152 melanieplageman@gmai 451 [ + - - + ]:CBC 90629 : Assert(new_relfrozen_xid && new_relmin_mxid);
152 melanieplageman@gmai 452 :GNC 90629 : prstate->pagefrz.FreezePageRelfrozenXid = *new_relfrozen_xid;
453 : 90629 : prstate->pagefrz.NoFreezePageRelfrozenXid = *new_relfrozen_xid;
454 : 90629 : prstate->pagefrz.FreezePageRelminMxid = *new_relmin_mxid;
455 : 90629 : prstate->pagefrz.NoFreezePageRelminMxid = *new_relmin_mxid;
456 : : }
457 : : else
458 : : {
459 [ + - - + ]: 128732 : Assert(!new_relfrozen_xid && !new_relmin_mxid);
160 460 : 128732 : prstate->pagefrz.FreezePageRelminMxid = InvalidMultiXactId;
461 : 128732 : prstate->pagefrz.NoFreezePageRelminMxid = InvalidMultiXactId;
462 : 128732 : prstate->pagefrz.FreezePageRelfrozenXid = InvalidTransactionId;
463 : 128732 : prstate->pagefrz.NoFreezePageRelfrozenXid = InvalidTransactionId;
464 : : }
465 : :
466 : 219361 : prstate->ndeleted = 0;
467 : 219361 : prstate->live_tuples = 0;
468 : 219361 : prstate->recently_dead_tuples = 0;
469 : 219361 : prstate->hastup = false;
470 : 219361 : prstate->lpdead_items = 0;
471 : :
472 : : /*
473 : : * deadoffsets are filled in during pruning but are only used to populate
474 : : * PruneFreezeResult->deadoffsets. To avoid needing two copies of the
475 : : * array, just save a pointer to the result offsets array in the
476 : : * PruneState.
477 : : */
140 478 : 219361 : prstate->deadoffsets = presult->deadoffsets;
479 : :
480 : : /*
481 : : * We track whether the page will be all-visible/all-frozen at the end of
482 : : * pruning and freezing. While examining tuple visibility, we'll set
483 : : * set_all_visible to false if there are tuples on the page not visible to
484 : : * all running and future transactions. If setting the VM is enabled for
485 : : * this scan, we will do so if the page ends up being all-visible.
486 : : *
487 : : * We also keep track of the newest live XID, which is used to calculate
488 : : * the snapshot conflict horizon for a WAL record setting the VM.
489 : : */
36 490 : 219361 : prstate->set_all_visible = prstate->attempt_set_vm;
42 491 : 219361 : prstate->newest_live_xid = InvalidTransactionId;
492 : :
493 : : /*
494 : : * Currently, only VACUUM performs freezing, but other callers may in the
495 : : * future. We must initialize set_all_frozen based on whether or not the
496 : : * caller passed HEAP_PAGE_PRUNE_FREEZE, because if they did not, we won't
497 : : * call heap_prepare_freeze_tuple() for each tuple, and set_all_frozen
498 : : * will never be cleared for tuples that need freezing. This would lead to
499 : : * incorrectly setting the visibility map all-frozen for this page. We
500 : : * can't set the page all-frozen in the VM if the caller didn't pass
501 : : * HEAP_PAGE_PRUNE_SET_VM.
502 : : *
503 : : * When freezing is not required (no XIDs/MXIDs older than the freeze
504 : : * cutoff), we may still choose to "opportunistically" freeze if doing so
505 : : * would make the page all-frozen.
506 : : *
507 : : * We will not be able to freeze the whole page at the end of vacuum if
508 : : * there are tuples present that are not visible to everyone or if there
509 : : * are dead tuples which will not be removable. However, dead tuples that
510 : : * will be removed by the end of vacuum should not prevent this
511 : : * opportunistic freezing.
512 : : *
513 : : * Therefore, we do not clear set_all_visible and set_all_frozen when we
514 : : * encounter LP_DEAD items. Instead, we correct them after deciding
515 : : * whether to freeze, but before updating the VM, to avoid setting the VM
516 : : * bits incorrectly.
517 : : */
36 518 [ + + + - ]: 219361 : prstate->set_all_frozen = prstate->attempt_freeze && prstate->attempt_set_vm;
160 519 : 219361 : }
520 : :
521 : : /*
522 : : * Helper for heap_page_prune_and_freeze(). Iterates over every tuple on the
523 : : * page, examines its visibility information, and determines the appropriate
524 : : * action for each tuple. All tuples are processed and classified during this
525 : : * phase, but no modifications are made to the page until the later execution
526 : : * stage.
527 : : *
528 : : * *off_loc is used for error callback and cleared before returning.
529 : : */
530 : : static void
61 531 : 206184 : prune_freeze_plan(PruneState *prstate, OffsetNumber *off_loc)
532 : : {
533 : 206184 : Page page = prstate->page;
534 : 206184 : BlockNumber blockno = prstate->block;
535 : 206184 : OffsetNumber maxoff = PageGetMaxOffsetNumber(prstate->page);
536 : : OffsetNumber offnum;
537 : : HeapTupleData tup;
538 : :
539 : 206184 : tup.t_tableOid = RelationGetRelid(prstate->relation);
540 : :
541 : : /*
542 : : * Determine HTSV for all tuples, and queue them up for processing as HOT
543 : : * chain roots or as heap-only items.
544 : : *
545 : : * Determining HTSV only once for each tuple is required for correctness,
546 : : * to deal with cases where running HTSV twice could result in different
547 : : * results. For example, RECENTLY_DEAD can turn to DEAD if another
548 : : * checked item causes GlobalVisTestIsRemovableFullXid() to update the
549 : : * horizon, or INSERT_IN_PROGRESS can change to DEAD if the inserting
550 : : * transaction aborts.
551 : : *
552 : : * It's also good for performance. Most commonly tuples within a page are
553 : : * stored at decreasing offsets (while the items are stored at increasing
554 : : * offsets). When processing all tuples on a page this leads to reading
555 : : * memory at decreasing offsets within a page, with a variable stride.
556 : : * That's hard for CPU prefetchers to deal with. Processing the items in
557 : : * reverse order (and thus the tuples in increasing order) increases
558 : : * prefetching efficiency significantly / decreases the number of cache
559 : : * misses.
560 : : */
1607 andres@anarazel.de 561 :CBC 206184 : for (offnum = maxoff;
562 [ + + ]: 21133354 : offnum >= FirstOffsetNumber;
563 : 20927170 : offnum = OffsetNumberPrev(offnum))
564 : : {
565 : 20927170 : ItemId itemid = PageGetItemId(page, offnum);
566 : : HeapTupleHeader htup;
567 : :
568 : : /*
569 : : * Set the offset number so that we can display it along with any
570 : : * error that occurred while processing this tuple.
571 : : */
764 heikki.linnakangas@i 572 : 20927170 : *off_loc = offnum;
573 : :
160 melanieplageman@gmai 574 :GNC 20927170 : prstate->processed[offnum] = false;
575 : 20927170 : prstate->htsv[offnum] = -1;
576 : :
577 : : /* Nothing to do if slot doesn't contain a tuple */
764 heikki.linnakangas@i 578 [ + + ]:CBC 20927170 : if (!ItemIdIsUsed(itemid))
579 : : {
61 melanieplageman@gmai 580 :GNC 259495 : heap_prune_record_unchanged_lp_unused(prstate, offnum);
1607 andres@anarazel.de 581 :CBC 259495 : continue;
582 : : }
583 : :
764 heikki.linnakangas@i 584 [ + + ]: 20667675 : if (ItemIdIsDead(itemid))
585 : : {
586 : : /*
587 : : * If the caller set mark_unused_now true, we can set dead line
588 : : * pointers LP_UNUSED now.
589 : : */
160 melanieplageman@gmai 590 [ + + ]:GNC 1466216 : if (unlikely(prstate->mark_unused_now))
591 : 2203 : heap_prune_record_unused(prstate, offnum, false);
592 : : else
61 593 : 1464013 : heap_prune_record_unchanged_lp_dead(prstate, offnum);
764 heikki.linnakangas@i 594 :CBC 1466216 : continue;
595 : : }
596 : :
597 [ + + ]: 19201459 : if (ItemIdIsRedirected(itemid))
598 : : {
599 : : /* This is the start of a HOT chain */
160 melanieplageman@gmai 600 :GNC 221359 : prstate->root_items[prstate->nroot_items++] = offnum;
764 heikki.linnakangas@i 601 :CBC 221359 : continue;
602 : : }
603 : :
604 [ - + ]: 18980100 : Assert(ItemIdIsNormal(itemid));
605 : :
606 : : /*
607 : : * Get the tuple's visibility status and queue it up for processing.
608 : : */
609 : 18980100 : htup = (HeapTupleHeader) PageGetItem(page, itemid);
610 : 18980100 : tup.t_data = htup;
611 : 18980100 : tup.t_len = ItemIdGetLength(itemid);
612 : 18980100 : ItemPointerSet(&tup.t_self, blockno, offnum);
613 : :
61 melanieplageman@gmai 614 :GNC 18980100 : prstate->htsv[offnum] = heap_prune_satisfies_vacuum(prstate, &tup);
615 : :
764 heikki.linnakangas@i 616 [ + + ]:CBC 18980100 : if (!HeapTupleHeaderIsHeapOnly(htup))
160 melanieplageman@gmai 617 :GNC 18612314 : prstate->root_items[prstate->nroot_items++] = offnum;
618 : : else
619 : 367786 : prstate->heaponly_items[prstate->nheaponly_items++] = offnum;
620 : : }
621 : :
622 : : /*
623 : : * Process HOT chains.
624 : : *
625 : : * We added the items to the array starting from 'maxoff', so by
626 : : * processing the array in reverse order, we process the items in
627 : : * ascending offset number order. The order doesn't matter for
628 : : * correctness, but some quick micro-benchmarking suggests that this is
629 : : * faster. (Earlier PostgreSQL versions, which scanned all the items on
630 : : * the page instead of using the root_items array, also did it in
631 : : * ascending offset number order.)
632 : : */
633 [ + + ]: 19039857 : for (int i = prstate->nroot_items - 1; i >= 0; i--)
634 : : {
635 : 18833673 : offnum = prstate->root_items[i];
636 : :
637 : : /* Ignore items already processed as part of an earlier chain */
638 [ - + ]: 18833673 : if (prstate->processed[offnum])
6632 tgl@sss.pgh.pa.us 639 :UBC 0 : continue;
640 : :
641 : : /* see preceding loop */
764 heikki.linnakangas@i 642 :CBC 18833673 : *off_loc = offnum;
643 : :
644 : : /* Process this item or chain of items */
61 melanieplageman@gmai 645 :GNC 18833673 : heap_prune_chain(maxoff, offnum, prstate);
646 : : }
647 : :
648 : : /*
649 : : * Process any heap-only tuples that were not already processed as part of
650 : : * a HOT chain.
651 : : */
160 652 [ + + ]: 573970 : for (int i = prstate->nheaponly_items - 1; i >= 0; i--)
653 : : {
654 : 367786 : offnum = prstate->heaponly_items[i];
655 : :
656 [ + + ]: 367786 : if (prstate->processed[offnum])
764 heikki.linnakangas@i 657 :CBC 351690 : continue;
658 : :
659 : : /* see preceding loop */
660 : 16096 : *off_loc = offnum;
661 : :
662 : : /*
663 : : * If the tuple is DEAD and doesn't chain to anything else, mark it
664 : : * unused. (If it does chain, we can only remove it as part of
665 : : * pruning its chain.)
666 : : *
667 : : * We need this primarily to handle aborted HOT updates, that is,
668 : : * XMIN_INVALID heap-only tuples. Those might not be linked to by any
669 : : * chain, since the parent tuple might be re-updated before any
670 : : * pruning occurs. So we have to be able to reap them separately from
671 : : * chain-pruning. (Note that HeapTupleHeaderIsHotUpdated will never
672 : : * return true for an XMIN_INVALID tuple, so this code will work even
673 : : * when there were sequential updates within the aborted transaction.)
674 : : */
160 melanieplageman@gmai 675 [ + + ]:GNC 16096 : if (prstate->htsv[offnum] == HEAPTUPLE_DEAD)
676 : : {
764 heikki.linnakangas@i 677 :CBC 3280 : ItemId itemid = PageGetItemId(page, offnum);
678 : 3280 : HeapTupleHeader htup = (HeapTupleHeader) PageGetItem(page, itemid);
679 : :
680 [ + - ]: 3280 : if (likely(!HeapTupleHeaderIsHotUpdated(htup)))
681 : : {
682 : 3280 : HeapTupleHeaderAdvanceConflictHorizon(htup,
683 : : &prstate->latest_xid_removed);
160 melanieplageman@gmai 684 :GNC 3280 : heap_prune_record_unused(prstate, offnum, true);
685 : : }
686 : : else
687 : : {
688 : : /*
689 : : * This tuple should've been processed and removed as part of
690 : : * a HOT chain, so something's wrong. To preserve evidence,
691 : : * we don't dare to remove it. We cannot leave behind a DEAD
692 : : * tuple either, because that will cause VACUUM to error out.
693 : : * Throwing an error with a distinct error message seems like
694 : : * the least bad option.
695 : : */
764 heikki.linnakangas@i 696 [ # # ]:UBC 0 : elog(ERROR, "dead heap-only tuple (%u, %d) is not linked to from any HOT chain",
697 : : blockno, offnum);
698 : : }
699 : : }
700 : : else
61 melanieplageman@gmai 701 :GNC 12816 : heap_prune_record_unchanged_lp_normal(prstate, offnum);
702 : : }
703 : :
704 : : /* We should now have processed every tuple exactly once */
705 : : #ifdef USE_ASSERT_CHECKING
764 heikki.linnakangas@i 706 :CBC 206184 : for (offnum = FirstOffsetNumber;
707 [ + + ]: 21133354 : offnum <= maxoff;
708 : 20927170 : offnum = OffsetNumberNext(offnum))
709 : : {
710 : 20927170 : *off_loc = offnum;
711 : :
160 melanieplageman@gmai 712 [ - + ]:GNC 20927170 : Assert(prstate->processed[offnum]);
713 : : }
714 : : #endif
715 : :
716 : : /* Clear the offset information once we have processed the given page. */
764 heikki.linnakangas@i 717 :CBC 206184 : *off_loc = InvalidOffsetNumber;
160 melanieplageman@gmai 718 :GNC 206184 : }
719 : :
720 : : /*
721 : : * Decide whether to proceed with freezing according to the freeze plans
722 : : * prepared for the current heap buffer. If freezing is chosen, this function
723 : : * performs several pre-freeze checks.
724 : : *
725 : : * The values of do_prune, do_hint_prune, and did_tuple_hint_fpi must be
726 : : * determined before calling this function.
727 : : *
728 : : * prstate is both an input and output parameter.
729 : : *
730 : : * Returns true if we should apply the freeze plans and freeze tuples on the
731 : : * page, and false otherwise.
732 : : */
733 : : static bool
61 734 : 206184 : heap_page_will_freeze(bool did_tuple_hint_fpi,
735 : : bool do_prune,
736 : : bool do_hint_prune,
737 : : PruneState *prstate)
738 : : {
160 739 : 206184 : bool do_freeze = false;
740 : :
741 : : /*
742 : : * If the caller specified we should not attempt to freeze any tuples,
743 : : * validate that everything is in the right state and return.
744 : : */
745 [ + + ]: 206184 : if (!prstate->attempt_freeze)
746 : : {
61 747 [ + - - + ]: 128634 : Assert(!prstate->set_all_frozen && prstate->nfrozen == 0);
160 748 : 128634 : return false;
749 : : }
750 : :
751 [ + + ]: 77550 : if (prstate->pagefrz.freeze_required)
752 : : {
753 : : /*
754 : : * heap_prepare_freeze_tuple indicated that at least one XID/MXID from
755 : : * before FreezeLimit/MultiXactCutoff is present. Must freeze to
756 : : * advance relfrozenxid/relminmxid.
757 : : */
758 : 21470 : do_freeze = true;
759 : : }
760 : : else
761 : : {
762 : : /*
763 : : * Opportunistically freeze the page if we are generating an FPI
764 : : * anyway and if doing so means that we can set the page all-frozen
765 : : * afterwards (might not happen until VACUUM's final heap pass).
766 : : *
767 : : * XXX: Previously, we knew if pruning emitted an FPI by checking
768 : : * pgWalUsage.wal_fpi before and after pruning. Once the freeze and
769 : : * prune records were combined, this heuristic couldn't be used
770 : : * anymore. The opportunistic freeze heuristic must be improved;
771 : : * however, for now, try to approximate the old logic.
772 : : */
61 773 [ + + + + ]: 56080 : if (prstate->set_all_frozen && prstate->nfrozen > 0)
774 : : {
775 [ - + ]: 26015 : Assert(prstate->set_all_visible);
776 : :
777 : : /*
778 : : * Freezing would make the page all-frozen. Have already emitted
779 : : * an FPI or will do so anyway?
780 : : */
781 [ + + + + : 26015 : if (RelationNeedsWAL(prstate->relation))
+ - + - ]
782 : : {
160 783 [ + + ]: 24357 : if (did_tuple_hint_fpi)
784 : 1404 : do_freeze = true;
785 [ + + ]: 22953 : else if (do_prune)
786 : : {
61 787 [ + + ]: 3691 : if (XLogCheckBufferNeedsBackup(prstate->buffer))
160 788 : 1169 : do_freeze = true;
789 : : }
790 [ + + ]: 19262 : else if (do_hint_prune)
791 : : {
61 792 [ + + + - : 22692 : if (XLogHintBitIsNeeded() &&
+ + ]
793 : 11346 : XLogCheckBufferNeedsBackup(prstate->buffer))
160 melanieplageman@gmai 794 :CBC 2109 : do_freeze = true;
795 : : }
796 : : }
797 : : }
798 : : }
799 : :
800 [ + + ]: 77550 : if (do_freeze)
801 : : {
802 : : /*
803 : : * Validate the tuples we will be freezing before entering the
804 : : * critical section.
805 : : */
61 melanieplageman@gmai 806 :GNC 26152 : heap_pre_freeze_checks(prstate->buffer, prstate->frozen, prstate->nfrozen);
56 807 [ - + ]: 26152 : Assert(TransactionIdPrecedes(prstate->pagefrz.FreezePageConflictXid,
808 : : prstate->cutoffs->OldestXmin));
809 : : }
160 810 [ + + ]: 51398 : else if (prstate->nfrozen > 0)
811 : : {
812 : : /*
813 : : * The page contained some tuples that were not already frozen, and we
814 : : * chose not to freeze them now. The page won't be all-frozen then.
815 : : */
816 [ - + ]: 22038 : Assert(!prstate->pagefrz.freeze_required);
817 : :
61 818 : 22038 : prstate->set_all_frozen = false;
160 819 : 22038 : prstate->nfrozen = 0; /* avoid miscounts in instrumentation */
820 : : }
821 : : else
822 : : {
823 : : /*
824 : : * We have no freeze plans to execute. The page might already be
825 : : * all-frozen (perhaps only following pruning), though. Such pages
826 : : * can be marked all-frozen in the VM by our caller, even though none
827 : : * of its tuples were newly frozen here.
828 : : */
829 : : }
830 : :
831 : 77550 : return do_freeze;
832 : : }
833 : :
834 : : /*
835 : : * Emit a warning about and fix visibility map corruption on the given page.
836 : : *
837 : : * The caller specifies the type of corruption it has already detected via
838 : : * corruption_type, so that we can emit the appropriate warning. All cases
839 : : * result in the VM bits being cleared; corruption types where PD_ALL_VISIBLE
840 : : * is incorrectly set also clear PD_ALL_VISIBLE.
841 : : *
842 : : * Must be called while holding an exclusive lock on the heap buffer. Dead
843 : : * items and not all-visible tuples must have been discovered under that same
844 : : * lock. Although we do not hold a lock on the VM buffer, it is pinned, and
845 : : * the heap buffer is exclusively locked, ensuring that no other backend can
846 : : * update the VM bits corresponding to this heap page.
847 : : *
848 : : * This function makes changes to the VM and, potentially, the heap page, but
849 : : * it does not need to be done in a critical section.
850 : : */
851 : : static void
44 melanieplageman@gmai 852 :UNC 0 : heap_page_fix_vm_corruption(PruneState *prstate, OffsetNumber offnum,
853 : : VMCorruptionType corruption_type)
854 : : {
855 : 0 : const char *relname = RelationGetRelationName(prstate->relation);
856 : 0 : bool do_clear_vm = false;
857 : 0 : bool do_clear_heap = false;
858 : :
859 [ # # ]: 0 : Assert(BufferIsLockedByMeInMode(prstate->buffer, BUFFER_LOCK_EXCLUSIVE));
860 : :
861 [ # # # # ]: 0 : switch (corruption_type)
862 : : {
863 : 0 : case VM_CORRUPT_LPDEAD:
864 [ # # ]: 0 : ereport(WARNING,
865 : : (errcode(ERRCODE_DATA_CORRUPTED),
866 : : errmsg("dead line pointer found on page marked all-visible"),
867 : : errcontext("relation \"%s\", page %u, tuple %u",
868 : : relname, prstate->block, offnum)));
869 : 0 : do_clear_vm = true;
870 : 0 : do_clear_heap = true;
871 : 0 : break;
872 : :
873 : 0 : case VM_CORRUPT_TUPLE_VISIBILITY:
874 : :
875 : : /*
876 : : * A HEAPTUPLE_LIVE tuple on an all-visible page can appear to not
877 : : * be visible to everyone when
878 : : * GetOldestNonRemovableTransactionId() returns a conservative
879 : : * value that's older than the real safe xmin. That is not
880 : : * corruption -- the PD_ALL_VISIBLE flag is still correct.
881 : : *
882 : : * However, dead tuple versions, in-progress inserts, and
883 : : * in-progress deletes should never appear on a page marked
884 : : * all-visible. That indicates real corruption. PD_ALL_VISIBLE
885 : : * should have been cleared by the DML operation that deleted or
886 : : * inserted the tuple.
887 : : */
888 [ # # ]: 0 : ereport(WARNING,
889 : : (errcode(ERRCODE_DATA_CORRUPTED),
890 : : errmsg("tuple not visible to all transactions found on page marked all-visible"),
891 : : errcontext("relation \"%s\", page %u, tuple %u",
892 : : relname, prstate->block, offnum)));
893 : 0 : do_clear_vm = true;
894 : 0 : do_clear_heap = true;
895 : 0 : break;
896 : :
897 : 0 : case VM_CORRUPT_MISSING_PAGE_HINT:
898 : :
899 : : /*
900 : : * As of PostgreSQL 9.2, the visibility map bit should never be
901 : : * set if the page-level bit is clear. However, for vacuum, it's
902 : : * possible that the bit got cleared after
903 : : * heap_vac_scan_next_block() was called, so we must recheck now
904 : : * that we have the buffer lock before concluding that the VM is
905 : : * corrupt.
906 : : */
907 [ # # ]: 0 : Assert(!PageIsAllVisible(prstate->page));
908 [ # # ]: 0 : Assert(prstate->old_vmbits & VISIBILITYMAP_VALID_BITS);
909 [ # # ]: 0 : ereport(WARNING,
910 : : (errcode(ERRCODE_DATA_CORRUPTED),
911 : : errmsg("page is not marked all-visible but visibility map bit is set"),
912 : : errcontext("relation \"%s\", page %u",
913 : : relname, prstate->block)));
914 : 0 : do_clear_vm = true;
915 : 0 : break;
916 : : }
917 : :
918 [ # # # # ]: 0 : Assert(do_clear_heap || do_clear_vm);
919 : :
920 : : /* Avoid marking the buffer dirty if PD_ALL_VISIBLE is already clear */
921 [ # # ]: 0 : if (do_clear_heap)
922 : : {
923 [ # # ]: 0 : Assert(PageIsAllVisible(prstate->page));
924 : 0 : PageClearAllVisible(prstate->page);
925 : 0 : MarkBufferDirtyHint(prstate->buffer, true);
926 : : }
927 : :
928 [ # # ]: 0 : if (do_clear_vm)
929 : : {
930 : 0 : visibilitymap_clear(prstate->relation, prstate->block,
931 : : prstate->vmbuffer,
932 : : VISIBILITYMAP_VALID_BITS);
933 : 0 : prstate->old_vmbits = 0;
934 : : }
935 : 0 : }
936 : :
937 : : /*
938 : : * Decide whether to set the visibility map bits (all-visible and all-frozen)
939 : : * for the current page using information from the PruneState and VM.
940 : : *
941 : : * This function does not actually set the VM bits or page-level visibility
942 : : * hint, PD_ALL_VISIBLE.
943 : : *
944 : : * This should be called only after do_freeze has been decided (and do_prune
945 : : * has been set), as these factor into our heuristic-based decision.
946 : : *
947 : : * Returns true if one or both VM bits should be set and false otherwise.
948 : : */
949 : : static bool
36 melanieplageman@gmai 950 :GNC 206184 : heap_page_will_set_vm(PruneState *prstate, PruneReason reason,
951 : : bool do_prune, bool do_freeze)
952 : : {
953 [ + + ]: 206184 : if (!prstate->attempt_set_vm)
42 954 : 90199 : return false;
955 : :
956 [ + + ]: 115985 : if (!prstate->set_all_visible)
957 : 34969 : return false;
958 : :
959 : : /*
960 : : * If this is an on-access call and we're not actually pruning, avoid
961 : : * setting the visibility map if it would newly dirty the heap page or, if
962 : : * the page is already dirty, if doing so would require including a
963 : : * full-page image (FPI) of the heap page in the WAL.
964 : : */
36 965 [ + + + + : 81016 : if (reason == PRUNE_ON_ACCESS && !do_prune && !do_freeze &&
+ - ]
966 [ + + + + ]: 30094 : (!BufferIsDirty(prstate->buffer) || XLogCheckBufferNeedsBackup(prstate->buffer)))
967 : : {
968 : 16005 : prstate->set_all_visible = prstate->set_all_frozen = false;
969 : 16005 : return false;
970 : : }
971 : :
42 972 : 65011 : prstate->new_vmbits = VISIBILITYMAP_ALL_VISIBLE;
973 : :
974 [ + + ]: 65011 : if (prstate->set_all_frozen)
975 : 35126 : prstate->new_vmbits |= VISIBILITYMAP_ALL_FROZEN;
976 : :
977 [ + + ]: 65011 : if (prstate->new_vmbits == prstate->old_vmbits)
978 : : {
979 : 2115 : prstate->new_vmbits = 0;
980 : 2115 : return false;
981 : : }
982 : :
983 : 62896 : return true;
984 : : }
985 : :
986 : : /*
987 : : * If the page is already all-frozen, or already all-visible and freezing
988 : : * won't be attempted, there is no remaining work and we can use the fast path
989 : : * to avoid the expensive overhead of heap_page_prune_and_freeze().
990 : : *
991 : : * This can happen when the page has a stale prune hint, or if VACUUM is
992 : : * scanning an already all-frozen page due to SKIP_PAGES_THRESHOLD.
993 : : *
994 : : * The caller must already have examined the visibility map and saved the
995 : : * status of the page's VM bits in prstate->old_vmbits. Caller must hold a
996 : : * content lock on the heap page since it will examine line pointers.
997 : : *
998 : : * Before calling prune_freeze_fast_path(), the caller should first
999 : : * check for and fix any discrepancy between the page-level visibility hint
1000 : : * and the visibility map. Otherwise, the fast path will always prevent us
1001 : : * from getting them in sync. Note that if there are tuples on the page that
1002 : : * are not visible to all but the VM is incorrectly marked
1003 : : * all-visible/all-frozen, we will not get the chance to fix that corruption
1004 : : * when using the fast path.
1005 : : */
1006 : : static void
44 1007 : 13177 : prune_freeze_fast_path(PruneState *prstate, PruneFreezeResult *presult)
1008 : : {
1009 : 13177 : OffsetNumber maxoff = PageGetMaxOffsetNumber(prstate->page);
1010 : 13177 : Page page = prstate->page;
1011 : :
1012 [ - + - - : 13177 : Assert((prstate->old_vmbits & VISIBILITYMAP_ALL_FROZEN) ||
- - ]
1013 : : ((prstate->old_vmbits & VISIBILITYMAP_ALL_VISIBLE) &&
1014 : : !prstate->attempt_freeze));
1015 : :
1016 : : /* We'll fill in presult for the caller */
1017 : 13177 : memset(presult, 0, sizeof(PruneFreezeResult));
1018 : :
1019 : : /* Clear any stale prune hint */
1020 [ + + ]: 13177 : if (TransactionIdIsValid(PageGetPruneXid(page)))
1021 : : {
1022 : 98 : PageClearPrunable(page);
1023 : 98 : MarkBufferDirtyHint(prstate->buffer, true);
1024 : : }
1025 : :
1026 [ - + ]: 13177 : if (PageIsEmpty(page))
44 melanieplageman@gmai 1027 :UNC 0 : return;
1028 : :
1029 : : /*
1030 : : * Since the page is all-visible, a count of the normal ItemIds on the
1031 : : * page should be sufficient for vacuum's live tuple count.
1032 : : */
44 melanieplageman@gmai 1033 :GNC 13177 : for (OffsetNumber off = FirstOffsetNumber;
1034 [ + + ]: 799446 : off <= maxoff;
1035 : 786269 : off = OffsetNumberNext(off))
1036 : : {
1037 : 786269 : ItemId lp = PageGetItemId(page, off);
1038 : :
1039 [ + + ]: 786269 : if (!ItemIdIsUsed(lp))
1040 : 14977 : continue;
1041 : :
1042 : 771292 : presult->hastup = true;
1043 : :
1044 [ + + ]: 771292 : if (ItemIdIsNormal(lp))
1045 : 761030 : prstate->live_tuples++;
1046 : : }
1047 : :
1048 : 13177 : presult->live_tuples = prstate->live_tuples;
1049 : : }
1050 : :
1051 : : /*
1052 : : * Prune and repair fragmentation and potentially freeze tuples on the
1053 : : * specified page. If the page's visibility status has changed, update it in
1054 : : * the VM.
1055 : : *
1056 : : * Caller must have pin and buffer cleanup lock on the page. Note that we
1057 : : * don't update the FSM information for page on caller's behalf. Caller might
1058 : : * also need to account for a reduction in the length of the line pointer
1059 : : * array following array truncation by us.
1060 : : *
1061 : : * params contains the input parameters used to control freezing and pruning
1062 : : * behavior. See the definition of PruneFreezeParams for more on what each
1063 : : * parameter does.
1064 : : *
1065 : : * If the HEAP_PAGE_PRUNE_FREEZE option is set in params, we will freeze
1066 : : * tuples if it's required in order to advance relfrozenxid / relminmxid, or
1067 : : * if it's considered advantageous for overall system performance to do so
1068 : : * now. The 'params.cutoffs', 'presult', 'new_relfrozen_xid' and
1069 : : * 'new_relmin_mxid' arguments are required when freezing.
1070 : : *
1071 : : * A vmbuffer corresponding to the heap page is also passed and if the page is
1072 : : * found to be all-visible/all-frozen, we will set it in the VM.
1073 : : *
1074 : : * presult contains output parameters needed by callers, such as the number of
1075 : : * tuples removed and the offsets of dead items on the page after pruning.
1076 : : * heap_page_prune_and_freeze() is responsible for initializing it. Required
1077 : : * by all callers.
1078 : : *
1079 : : * off_loc is the offset location required by the caller to use in error
1080 : : * callback.
1081 : : *
1082 : : * new_relfrozen_xid and new_relmin_mxid must be provided by the caller if the
1083 : : * HEAP_PAGE_PRUNE_FREEZE option is set in params. On entry, they contain the
1084 : : * oldest XID and multi-XID seen on the relation so far. They will be updated
1085 : : * with the oldest values present on the page after pruning. After processing
1086 : : * the whole relation, VACUUM can use these values as the new
1087 : : * relfrozenxid/relminmxid for the relation.
1088 : : */
1089 : : void
160 1090 : 219361 : heap_page_prune_and_freeze(PruneFreezeParams *params,
1091 : : PruneFreezeResult *presult,
1092 : : OffsetNumber *off_loc,
1093 : : TransactionId *new_relfrozen_xid,
1094 : : MultiXactId *new_relmin_mxid)
1095 : : {
1096 : : PruneState prstate;
1097 : : bool do_freeze;
1098 : : bool do_prune;
1099 : : bool do_hint_prune;
1100 : : bool do_set_vm;
1101 : : bool did_tuple_hint_fpi;
1102 : 219361 : int64 fpi_before = pgWalUsage.wal_fpi;
1103 : : TransactionId conflict_xid;
1104 : :
1105 : : /* Initialize prstate */
1106 : 219361 : prune_freeze_setup(params,
1107 : : new_relfrozen_xid, new_relmin_mxid,
1108 : : presult, &prstate);
1109 : :
1110 : : /*
1111 : : * If the VM is set but PD_ALL_VISIBLE is clear, fix that corruption
1112 : : * before pruning and freezing so that the page and VM start out in a
1113 : : * consistent state.
1114 : : */
44 1115 [ + + ]: 219361 : if ((prstate.old_vmbits & VISIBILITYMAP_VALID_BITS) &&
1116 [ - + ]: 19552 : !PageIsAllVisible(prstate.page))
44 melanieplageman@gmai 1117 :UNC 0 : heap_page_fix_vm_corruption(&prstate, InvalidOffsetNumber,
1118 : : VM_CORRUPT_MISSING_PAGE_HINT);
1119 : :
1120 : : /*
1121 : : * If the page is already all-frozen, or already all-visible when freezing
1122 : : * is not being attempted, take the fast path, skipping pruning and
1123 : : * freezing code entirely. This must be done after fixing any discrepancy
1124 : : * between the page-level visibility hint and the VM, since that may have
1125 : : * cleared old_vmbits.
1126 : : */
44 melanieplageman@gmai 1127 [ + + ]:GNC 219361 : if ((params->options & HEAP_PAGE_PRUNE_ALLOW_FAST_PATH) != 0 &&
1128 [ + + ]: 218050 : ((prstate.old_vmbits & VISIBILITYMAP_ALL_FROZEN) ||
1129 [ + + ]: 204873 : ((prstate.old_vmbits & VISIBILITYMAP_ALL_VISIBLE) &&
1130 [ - + ]: 5905 : !prstate.attempt_freeze)))
1131 : : {
1132 : 13177 : prune_freeze_fast_path(&prstate, presult);
1133 : 13177 : return;
1134 : : }
1135 : :
1136 : : /*
1137 : : * Examine all line pointers and tuple visibility information to determine
1138 : : * which line pointers should change state and which tuples may be frozen.
1139 : : * Prepare queue of state changes to later be executed in a critical
1140 : : * section.
1141 : : */
61 1142 : 206184 : prune_freeze_plan(&prstate, off_loc);
1143 : :
1144 : : /*
1145 : : * After processing all the live tuples on the page, if the newest xmin
1146 : : * amongst them may be considered running by any snapshot, the page cannot
1147 : : * be all-visible. This should be done before determining whether or not
1148 : : * to opportunistically freeze.
1149 : : */
42 1150 [ + + ]: 206184 : if (prstate.set_all_visible &&
1151 [ + + + + ]: 196958 : TransactionIdIsNormal(prstate.newest_live_xid) &&
1152 : 84982 : GlobalVisTestXidConsideredRunning(prstate.vistest,
1153 : : prstate.newest_live_xid,
1154 : : true))
1155 : 3121 : prstate.set_all_visible = prstate.set_all_frozen = false;
1156 : :
1157 : : /*
1158 : : * If checksums are enabled, calling heap_prune_satisfies_vacuum() while
1159 : : * checking tuple visibility information in prune_freeze_plan() may have
1160 : : * caused an FPI to be emitted.
1161 : : */
160 1162 : 206184 : did_tuple_hint_fpi = fpi_before != pgWalUsage.wal_fpi;
1163 : :
762 heikki.linnakangas@i 1164 : 600074 : do_prune = prstate.nredirected > 0 ||
1165 [ + + + + ]: 341482 : prstate.ndead > 0 ||
1166 [ + + ]: 135298 : prstate.nunused > 0;
1167 : :
1168 : : /*
1169 : : * Even if we don't prune anything, if we found a new value for the
1170 : : * pd_prune_xid field or the page was marked full, we will update the hint
1171 : : * bit.
1172 : : */
61 melanieplageman@gmai 1173 [ + + + + ]: 239421 : do_hint_prune = PageGetPruneXid(prstate.page) != prstate.new_prune_xid ||
1174 : 33237 : PageIsFull(prstate.page);
1175 : :
1176 : : /*
1177 : : * Decide if we want to go ahead with freezing according to the freeze
1178 : : * plans we prepared, or not.
1179 : : */
1180 : 206184 : do_freeze = heap_page_will_freeze(did_tuple_hint_fpi,
1181 : : do_prune,
1182 : : do_hint_prune,
1183 : : &prstate);
1184 : :
1185 : : /*
1186 : : * While scanning the line pointers, we did not clear
1187 : : * set_all_visible/set_all_frozen when encountering LP_DEAD items because
1188 : : * we wanted the decision whether or not to freeze the page to be
1189 : : * unaffected by the short-term presence of LP_DEAD items. These LP_DEAD
1190 : : * items are effectively assumed to be LP_UNUSED items in the making. It
1191 : : * doesn't matter which vacuum heap pass (initial pass or final pass) ends
1192 : : * up setting the page all-frozen, as long as the ongoing VACUUM does it.
1193 : : *
1194 : : * Now that we finished determining whether or not to freeze the page,
1195 : : * update set_all_visible and set_all_frozen so that they reflect the true
1196 : : * state of the page for setting PD_ALL_VISIBLE and VM bits.
1197 : : */
166 1198 [ + + ]: 206184 : if (prstate.lpdead_items > 0)
61 1199 : 72874 : prstate.set_all_visible = prstate.set_all_frozen = false;
1200 : :
1201 [ + + - + ]: 206184 : Assert(!prstate.set_all_frozen || prstate.set_all_visible);
36 1202 [ + + - + ]: 206184 : Assert(!prstate.set_all_visible || prstate.attempt_set_vm);
42 1203 [ + + - + ]: 206184 : Assert(!prstate.set_all_visible || (prstate.lpdead_items == 0));
1204 : :
36 1205 : 206184 : do_set_vm = heap_page_will_set_vm(&prstate, params->reason, do_prune, do_freeze);
1206 : :
1207 : : /*
1208 : : * new_vmbits should be 0 regardless of whether or not the page is
1209 : : * all-visible if we do not intend to set the VM.
1210 : : */
42 1211 [ + + - + ]: 206184 : Assert(do_set_vm || prstate.new_vmbits == 0);
1212 : :
1213 : : /*
1214 : : * The snapshot conflict horizon for the whole record is the most
1215 : : * conservative (newest) horizon required by any change in the record.
1216 : : */
1217 : 206184 : conflict_xid = InvalidTransactionId;
1218 [ + + ]: 206184 : if (do_set_vm)
1219 : 62896 : conflict_xid = prstate.newest_live_xid;
1220 [ + + + + ]: 206184 : if (do_freeze && TransactionIdFollows(prstate.pagefrz.FreezePageConflictXid, conflict_xid))
1221 : 2793 : conflict_xid = prstate.pagefrz.FreezePageConflictXid;
1222 [ + + + + ]: 206184 : if (do_prune && TransactionIdFollows(prstate.latest_xid_removed, conflict_xid))
1223 : 63214 : conflict_xid = prstate.latest_xid_removed;
1224 : :
1225 : : /* Lock vmbuffer before entering a critical section */
1226 [ + + ]: 206184 : if (do_set_vm)
1227 : 62896 : LockBuffer(prstate.vmbuffer, BUFFER_LOCK_EXCLUSIVE);
1228 : :
1229 : : /* Any error while applying the changes is critical */
762 heikki.linnakangas@i 1230 :CBC 206184 : START_CRIT_SECTION();
1231 : :
203 melanieplageman@gmai 1232 [ + + ]:GNC 206184 : if (do_hint_prune)
1233 : : {
1234 : : /*
1235 : : * Update the page's pd_prune_xid field to either zero, or the lowest
1236 : : * XID of any soon-prunable tuple.
1237 : : */
61 1238 : 173033 : ((PageHeader) prstate.page)->pd_prune_xid = prstate.new_prune_xid;
1239 : :
1240 : : /*
1241 : : * Also clear the "page is full" flag, since there's no point in
1242 : : * repeating the prune/defrag process until something else happens to
1243 : : * the page.
1244 : : */
1245 : 173033 : PageClearFull(prstate.page);
1246 : :
1247 : : /*
1248 : : * If that's all we had to do to the page, this is a non-WAL-logged
1249 : : * hint. If we are going to freeze or prune the page or set
1250 : : * PD_ALL_VISIBLE, we will mark the buffer dirty below.
1251 : : *
1252 : : * Setting PD_ALL_VISIBLE is fully WAL-logged because it is forbidden
1253 : : * for the VM to be set and PD_ALL_VISIBLE to be clear.
1254 : : */
42 1255 [ + + + + : 173033 : if (!do_freeze && !do_prune && !do_set_vm)
+ + ]
61 1256 : 62804 : MarkBufferDirtyHint(prstate.buffer, true);
1257 : : }
1258 : :
42 1259 [ + + + + : 206184 : if (do_prune || do_freeze || do_set_vm)
+ + ]
1260 : : {
1261 : : /* Apply the planned item changes and repair page fragmentation. */
762 heikki.linnakangas@i 1262 [ + + ]:CBC 133764 : if (do_prune)
1263 : : {
61 melanieplageman@gmai 1264 :GNC 71328 : heap_page_prune_execute(prstate.buffer, false,
1265 : : prstate.redirected, prstate.nredirected,
1266 : : prstate.nowdead, prstate.ndead,
1267 : : prstate.nowunused, prstate.nunused);
1268 : : }
1269 : :
762 heikki.linnakangas@i 1270 [ + + ]:CBC 133764 : if (do_freeze)
61 melanieplageman@gmai 1271 :GNC 26152 : heap_freeze_prepared_tuples(prstate.buffer, prstate.frozen, prstate.nfrozen);
1272 : :
1273 : : /* Set the visibility map and page visibility hint */
42 1274 [ + + ]: 133764 : if (do_set_vm)
1275 : : {
1276 : : /*
1277 : : * While it is valid for PD_ALL_VISIBLE to be set when the
1278 : : * corresponding VM bit is clear, we strongly prefer to keep them
1279 : : * in sync.
1280 : : *
1281 : : * The heap buffer must be marked dirty before adding it to the
1282 : : * WAL chain when setting the VM. We don't worry about
1283 : : * unnecessarily dirtying the heap buffer if PD_ALL_VISIBLE is
1284 : : * already set, though. It is extremely rare to have a clean heap
1285 : : * buffer with PD_ALL_VISIBLE already set and the VM bits clear,
1286 : : * so there is no point in optimizing it.
1287 : : */
1288 : 62896 : PageSetAllVisible(prstate.page);
1289 : 62896 : PageClearPrunable(prstate.page);
1290 : 62896 : visibilitymap_set(prstate.block, prstate.vmbuffer, prstate.new_vmbits,
1291 : 62896 : prstate.relation->rd_locator);
1292 : : }
1293 : :
61 1294 : 133764 : MarkBufferDirty(prstate.buffer);
1295 : :
1296 : : /*
1297 : : * Emit a WAL XLOG_HEAP2_PRUNE* record showing what we did
1298 : : */
1299 [ + + + + : 133764 : if (RelationNeedsWAL(prstate.relation))
+ - + - ]
1300 : : {
1301 [ + + + + ]: 192372 : log_heap_prune_and_freeze(prstate.relation, prstate.buffer,
1302 : : do_set_vm ? prstate.vmbuffer : InvalidBuffer,
42 1303 : 61306 : do_set_vm ? prstate.new_vmbits : 0,
1304 : : conflict_xid,
1305 : : true, /* cleanup lock */
1306 : : params->reason,
1307 : : prstate.frozen, prstate.nfrozen,
1308 : : prstate.redirected, prstate.nredirected,
1309 : : prstate.nowdead, prstate.ndead,
1310 : : prstate.nowunused, prstate.nunused);
1311 : : }
1312 : : }
1313 : :
6802 tgl@sss.pgh.pa.us 1314 [ - + ]:CBC 206184 : END_CRIT_SECTION();
1315 : :
42 melanieplageman@gmai 1316 [ + + ]:GNC 206184 : if (do_set_vm)
1317 : 62896 : LockBuffer(prstate.vmbuffer, BUFFER_LOCK_UNLOCK);
1318 : :
1319 : : /*
1320 : : * During its second pass over the heap, VACUUM calls
1321 : : * heap_page_would_be_all_visible() to determine whether a page is
1322 : : * all-visible and all-frozen. The logic here is similar. After completing
1323 : : * pruning and freezing, use an assertion to verify that our results
1324 : : * remain consistent with heap_page_would_be_all_visible(). It's also a
1325 : : * valuable cross-check of the page state after pruning and freezing.
1326 : : */
1327 : : #ifdef USE_ASSERT_CHECKING
1328 [ + + ]: 206184 : if (prstate.set_all_visible)
1329 : : {
1330 : : TransactionId debug_cutoff;
1331 : : bool debug_all_frozen;
1332 : :
1333 [ - + ]: 65011 : Assert(prstate.lpdead_items == 0);
1334 : :
1335 [ - + ]: 65011 : Assert(heap_page_is_all_visible(prstate.relation, prstate.buffer,
1336 : : prstate.vistest,
1337 : : &debug_all_frozen,
1338 : : &debug_cutoff, off_loc));
1339 : :
1340 [ + + - + ]: 65011 : Assert(!TransactionIdIsValid(debug_cutoff) ||
1341 : : debug_cutoff == prstate.newest_live_xid);
1342 : :
1343 : : /*
1344 : : * It's possible the page is composed entirely of frozen tuples but is
1345 : : * not set all-frozen in the VM and did not pass
1346 : : * HEAP_PAGE_PRUNE_FREEZE. In this case, it's possible
1347 : : * heap_page_is_all_visible() finds the page completely frozen, even
1348 : : * though prstate.set_all_frozen is false.
1349 : : */
1350 [ + + - + ]: 65011 : Assert(!prstate.set_all_frozen || debug_all_frozen);
1351 : : }
1352 : : #endif
1353 : :
1354 : : /* Copy information back for caller */
764 heikki.linnakangas@i 1355 : 206184 : presult->ndeleted = prstate.ndeleted;
762 1356 : 206184 : presult->nnewlpdead = prstate.ndead;
1357 : 206184 : presult->nfrozen = prstate.nfrozen;
1358 : 206184 : presult->live_tuples = prstate.live_tuples;
1359 : 206184 : presult->recently_dead_tuples = prstate.recently_dead_tuples;
762 heikki.linnakangas@i 1360 :CBC 206184 : presult->hastup = prstate.hastup;
1361 : :
1362 : 206184 : presult->lpdead_items = prstate.lpdead_items;
1363 : : /* the presult->deadoffsets array was already filled in */
1364 : :
42 melanieplageman@gmai 1365 :GNC 206184 : presult->newly_all_visible = false;
1366 : 206184 : presult->newly_all_frozen = false;
1367 : 206184 : presult->newly_all_visible_frozen = false;
1368 [ + + ]: 206184 : if (do_set_vm)
1369 : : {
1370 [ + + ]: 62896 : if ((prstate.old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
1371 : : {
1372 : 58636 : presult->newly_all_visible = true;
1373 [ + + ]: 58636 : if (prstate.set_all_frozen)
1374 : 30397 : presult->newly_all_visible_frozen = true;
1375 : : }
1376 [ + - ]: 4260 : else if ((prstate.old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
1377 [ + - ]: 4260 : prstate.set_all_frozen)
1378 : 4260 : presult->newly_all_frozen = true;
1379 : : }
1380 : :
203 1381 [ + + ]: 206184 : if (prstate.attempt_freeze)
1382 : : {
762 heikki.linnakangas@i 1383 [ + + ]:CBC 77550 : if (presult->nfrozen > 0)
1384 : : {
1385 : 26152 : *new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
1386 : 26152 : *new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
1387 : : }
1388 : : else
1389 : : {
1390 : 51398 : *new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
1391 : 51398 : *new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
1392 : : }
1393 : : }
6802 tgl@sss.pgh.pa.us 1394 :ECB (110457) : }
1395 : :
1396 : :
1397 : : /*
1398 : : * Perform visibility checks for heap pruning.
1399 : : */
1400 : : static HTSV_Result
61 melanieplageman@gmai 1401 :GNC 18980100 : heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup)
1402 : : {
1403 : : HTSV_Result res;
1404 : : TransactionId dead_after;
1405 : :
1406 : 18980100 : res = HeapTupleSatisfiesVacuumHorizon(tup, prstate->buffer, &dead_after);
1407 : :
2092 andres@anarazel.de 1408 [ + + ]:CBC 18980100 : if (res != HEAPTUPLE_RECENTLY_DEAD)
1409 : 14774095 : return res;
1410 : :
1411 : : /*
1412 : : * For VACUUM, we must be sure to prune tuples with xmax older than
1413 : : * OldestXmin -- a visibility cutoff determined at the beginning of
1414 : : * vacuuming the relation. OldestXmin is used for freezing determination
1415 : : * and we cannot freeze dead tuples' xmaxes.
1416 : : */
655 melanieplageman@gmai 1417 [ + + ]: 4206005 : if (prstate->cutoffs &&
1418 [ + - ]: 1251384 : TransactionIdIsValid(prstate->cutoffs->OldestXmin) &&
1419 [ + - - + : 1251384 : NormalTransactionIdPrecedes(dead_after, prstate->cutoffs->OldestXmin))
+ + ]
1420 : 922049 : return HEAPTUPLE_DEAD;
1421 : :
1422 : : /*
1423 : : * Determine whether or not the tuple is considered dead when compared
1424 : : * with the provided GlobalVisState. On-access pruning does not provide
1425 : : * VacuumCutoffs. And for vacuum, even if the tuple's xmax is not older
1426 : : * than OldestXmin, GlobalVisTestIsRemovableXid() could find the row dead
1427 : : * if the GlobalVisState has been updated since the beginning of vacuuming
1428 : : * the relation.
1429 : : */
42 melanieplageman@gmai 1430 [ + + ]:GNC 3283956 : if (GlobalVisTestIsRemovableXid(prstate->vistest, dead_after, true))
655 melanieplageman@gmai 1431 :CBC 2896608 : return HEAPTUPLE_DEAD;
1432 : :
2092 andres@anarazel.de 1433 : 387348 : return res;
1434 : : }
1435 : :
1436 : :
1437 : : /*
1438 : : * Pruning calculates tuple visibility once and saves the results in an array
1439 : : * of int8. See PruneState.htsv for details. This helper function is meant
1440 : : * to guard against examining visibility status array members which have not
1441 : : * yet been computed.
1442 : : */
1443 : : static inline HTSV_Result
762 heikki.linnakangas@i 1444 : 18964004 : htsv_get_valid_status(int status)
1445 : : {
1446 [ + - - + ]: 18964004 : Assert(status >= HEAPTUPLE_DEAD &&
1447 : : status <= HEAPTUPLE_DELETE_IN_PROGRESS);
1448 : 18964004 : return (HTSV_Result) status;
1449 : : }
1450 : :
1451 : : /*
1452 : : * Prune specified line pointer or a HOT chain originating at line pointer.
1453 : : *
1454 : : * Tuple visibility information is provided in prstate->htsv.
1455 : : *
1456 : : * If the item is an index-referenced tuple (i.e. not a heap-only tuple),
1457 : : * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
1458 : : * chain. We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
1459 : : * This is OK because a RECENTLY_DEAD tuple preceding a DEAD tuple is really
1460 : : * DEAD, our visibility test is just too coarse to detect it.
1461 : : *
1462 : : * Pruning must never leave behind a DEAD tuple that still has tuple storage.
1463 : : * VACUUM isn't prepared to deal with that case.
1464 : : *
1465 : : * The root line pointer is redirected to the tuple immediately after the
1466 : : * latest DEAD tuple. If all tuples in the chain are DEAD, the root line
1467 : : * pointer is marked LP_DEAD. (This includes the case of a DEAD simple
1468 : : * tuple, which we treat as a chain of length 1.)
1469 : : *
1470 : : * We don't actually change the page here. We just add entries to the arrays in
1471 : : * prstate showing the changes to be made. Items to be redirected are added
1472 : : * to the redirected[] array (two entries per redirection); items to be set to
1473 : : * LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED
1474 : : * state are added to nowunused[]. We perform bookkeeping of live tuples,
1475 : : * visibility etc. based on what the page will look like after the changes
1476 : : * applied. All that bookkeeping is performed in the heap_prune_record_*()
1477 : : * subroutines. The division of labor is that heap_prune_chain() decides the
1478 : : * fate of each tuple, ie. whether it's going to be removed, redirected or
1479 : : * left unchanged, and the heap_prune_record_*() subroutines update PruneState
1480 : : * based on that outcome.
1481 : : */
1482 : : static void
61 melanieplageman@gmai 1483 :GNC 18833673 : heap_prune_chain(OffsetNumber maxoff, OffsetNumber rootoffnum,
1484 : : PruneState *prstate)
1485 : : {
6746 bruce@momjian.us 1486 :CBC 18833673 : TransactionId priorXmax = InvalidTransactionId;
1487 : : ItemId rootlp;
1488 : : OffsetNumber offnum;
1489 : : OffsetNumber chainitems[MaxHeapTuplesPerPage];
61 melanieplageman@gmai 1490 :GNC 18833673 : Page page = prstate->page;
1491 : :
1492 : : /*
1493 : : * After traversing the HOT chain, ndeadchain is the index in chainitems
1494 : : * of the first live successor after the last dead item.
1495 : : */
764 heikki.linnakangas@i 1496 :CBC 18833673 : int ndeadchain = 0,
1497 : 18833673 : nchain = 0;
1498 : :
1499 : 18833673 : rootlp = PageGetItemId(page, rootoffnum);
1500 : :
1501 : : /* Start from the root tuple */
6802 tgl@sss.pgh.pa.us 1502 : 18833673 : offnum = rootoffnum;
1503 : :
1504 : : /* while not end of the chain */
1505 : : for (;;)
1506 : 351690 : {
1507 : : HeapTupleHeader htup;
1508 : : ItemId lp;
1509 : :
1510 : : /* Sanity check (pure paranoia) */
1686 pg@bowt.ie 1511 [ - + ]: 19185363 : if (offnum < FirstOffsetNumber)
1686 pg@bowt.ie 1512 :UBC 0 : break;
1513 : :
1514 : : /*
1515 : : * An offset past the end of page's line pointer array is possible
1516 : : * when the array was truncated (original item must have been unused)
1517 : : */
1686 pg@bowt.ie 1518 [ - + ]:CBC 19185363 : if (offnum > maxoff)
6802 tgl@sss.pgh.pa.us 1519 :UBC 0 : break;
1520 : :
1521 : : /* If item is already processed, stop --- it must not be same chain */
764 heikki.linnakangas@i 1522 [ - + ]:CBC 19185363 : if (prstate->processed[offnum])
6632 tgl@sss.pgh.pa.us 1523 :UBC 0 : break;
1524 : :
764 heikki.linnakangas@i 1525 :CBC 19185363 : lp = PageGetItemId(page, offnum);
1526 : :
1527 : : /*
1528 : : * Unused item obviously isn't part of the chain. Likewise, a dead
1529 : : * line pointer can't be part of the chain. Both of those cases were
1530 : : * already marked as processed.
1531 : : */
1532 [ - + ]: 19185363 : Assert(ItemIdIsUsed(lp));
1533 [ - + ]: 19185363 : Assert(!ItemIdIsDead(lp));
1534 : :
1535 : : /*
1536 : : * If we are looking at the redirected root line pointer, jump to the
1537 : : * first normal tuple in the chain. If we find a redirect somewhere
1538 : : * else, stop --- it must not be same chain.
1539 : : */
6802 tgl@sss.pgh.pa.us 1540 [ + + ]: 19185363 : if (ItemIdIsRedirected(lp))
1541 : : {
1542 [ - + ]: 221359 : if (nchain > 0)
6802 tgl@sss.pgh.pa.us 1543 :UBC 0 : break; /* not at start of chain */
6802 tgl@sss.pgh.pa.us 1544 :CBC 221359 : chainitems[nchain++] = offnum;
1545 : 221359 : offnum = ItemIdGetRedirect(rootlp);
1546 : 221359 : continue;
1547 : : }
1548 : :
1549 [ - + ]: 18964004 : Assert(ItemIdIsNormal(lp));
1550 : :
764 heikki.linnakangas@i 1551 : 18964004 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1552 : :
1553 : : /*
1554 : : * Check the tuple XMIN against prior XMAX, if any
1555 : : */
6802 tgl@sss.pgh.pa.us 1556 [ + + - + ]: 19094335 : if (TransactionIdIsValid(priorXmax) &&
3106 alvherre@alvh.no-ip. 1557 : 130331 : !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
6802 tgl@sss.pgh.pa.us 1558 :UBC 0 : break;
1559 : :
1560 : : /*
1561 : : * OK, this tuple is indeed a member of the chain.
1562 : : */
6802 tgl@sss.pgh.pa.us 1563 :CBC 18964004 : chainitems[nchain++] = offnum;
1564 : :
762 heikki.linnakangas@i 1565 [ + + + - ]: 18964004 : switch (htsv_get_valid_status(prstate->htsv[offnum]))
1566 : : {
6802 tgl@sss.pgh.pa.us 1567 : 3890376 : case HEAPTUPLE_DEAD:
1568 : :
1569 : : /* Remember the last DEAD tuple seen */
764 heikki.linnakangas@i 1570 : 3890376 : ndeadchain = nchain;
1571 : 3890376 : HeapTupleHeaderAdvanceConflictHorizon(htup,
1572 : : &prstate->latest_xid_removed);
1573 : : /* Advance to next chain member */
6802 tgl@sss.pgh.pa.us 1574 : 3890376 : break;
1575 : :
1576 : 387348 : case HEAPTUPLE_RECENTLY_DEAD:
1577 : :
1578 : : /*
1579 : : * We don't need to advance the conflict horizon for
1580 : : * RECENTLY_DEAD tuples, even if we are removing them. This
1581 : : * is because we only remove RECENTLY_DEAD tuples if they
1582 : : * precede a DEAD tuple, and the DEAD tuple must have been
1583 : : * inserted by a newer transaction than the RECENTLY_DEAD
1584 : : * tuple by virtue of being later in the chain. We will have
1585 : : * advanced the conflict horizon for the DEAD tuple.
1586 : : */
1587 : :
1588 : : /*
1589 : : * Advance past RECENTLY_DEAD tuples just in case there's a
1590 : : * DEAD one after them. We have to make sure that we don't
1591 : : * miss any DEAD tuples, since DEAD tuples that still have
1592 : : * tuple storage after pruning will confuse VACUUM.
1593 : : */
1594 : 387348 : break;
1595 : :
1596 : 14686280 : case HEAPTUPLE_DELETE_IN_PROGRESS:
1597 : : case HEAPTUPLE_LIVE:
1598 : : case HEAPTUPLE_INSERT_IN_PROGRESS:
764 heikki.linnakangas@i 1599 : 14686280 : goto process_chain;
1600 : :
6802 tgl@sss.pgh.pa.us 1601 :UBC 0 : default:
1602 [ # # ]: 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1603 : : goto process_chain;
1604 : : }
1605 : :
1606 : : /*
1607 : : * If the tuple is not HOT-updated, then we are at the end of this
1608 : : * HOT-update chain.
1609 : : */
6802 tgl@sss.pgh.pa.us 1610 [ + + ]:CBC 4277724 : if (!HeapTupleHeaderIsHotUpdated(htup))
764 heikki.linnakangas@i 1611 : 4147393 : goto process_chain;
1612 : :
1613 : : /* HOT implies it can't have moved to different partition */
2950 andres@anarazel.de 1614 [ - + ]: 130331 : Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
1615 : :
1616 : : /*
1617 : : * Advance to next chain member.
1618 : : */
61 melanieplageman@gmai 1619 [ - + ]:GNC 130331 : Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == prstate->block);
6802 tgl@sss.pgh.pa.us 1620 :CBC 130331 : offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
4850 alvherre@alvh.no-ip. 1621 : 130331 : priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1622 : : }
1623 : :
764 heikki.linnakangas@i 1624 [ # # # # ]:UBC 0 : if (ItemIdIsRedirected(rootlp) && nchain < 2)
1625 : : {
1626 : : /*
1627 : : * We found a redirect item that doesn't point to a valid follow-on
1628 : : * item. This can happen if the loop in heap_page_prune_and_freeze()
1629 : : * caused us to visit the dead successor of a redirect item before
1630 : : * visiting the redirect item. We can clean up by setting the
1631 : : * redirect item to LP_DEAD state or LP_UNUSED if the caller
1632 : : * indicated.
1633 : : */
1634 : 0 : heap_prune_record_dead_or_unused(prstate, rootoffnum, false);
1635 : 0 : return;
1636 : : }
1637 : :
1638 : 0 : process_chain:
1639 : :
764 heikki.linnakangas@i 1640 [ + + ]:CBC 18833673 : if (ndeadchain == 0)
1641 : : {
1642 : : /*
1643 : : * No DEAD tuple was found, so the chain is entirely composed of
1644 : : * normal, unchanged tuples. Leave it alone.
1645 : : */
762 1646 : 14991348 : int i = 0;
1647 : :
1648 [ + + ]: 14991348 : if (ItemIdIsRedirected(rootlp))
1649 : : {
1650 : 199889 : heap_prune_record_unchanged_lp_redirect(prstate, rootoffnum);
1651 : 199889 : i++;
1652 : : }
1653 [ + + ]: 29989624 : for (; i < nchain; i++)
61 melanieplageman@gmai 1654 :GNC 14998276 : heap_prune_record_unchanged_lp_normal(prstate, chainitems[i]);
1655 : : }
764 heikki.linnakangas@i 1656 [ + + ]:CBC 3842325 : else if (ndeadchain == nchain)
1657 : : {
1658 : : /*
1659 : : * The entire chain is dead. Mark the root line pointer LP_DEAD, and
1660 : : * fully remove the other tuples in the chain.
1661 : : */
1662 : 3770084 : heap_prune_record_dead_or_unused(prstate, rootoffnum, ItemIdIsNormal(rootlp));
1663 [ + + ]: 3817066 : for (int i = 1; i < nchain; i++)
1664 : 46982 : heap_prune_record_unused(prstate, chainitems[i], true);
1665 : : }
1666 : : else
1667 : : {
1668 : : /*
1669 : : * We found a DEAD tuple in the chain. Redirect the root line pointer
1670 : : * to the first non-DEAD tuple, and mark as unused each intermediate
1671 : : * item that we are able to remove from the chain.
1672 : : */
1673 : 72241 : heap_prune_record_redirect(prstate, rootoffnum, chainitems[ndeadchain],
1674 : 72241 : ItemIdIsNormal(rootlp));
1675 [ + + ]: 94780 : for (int i = 1; i < ndeadchain; i++)
1676 : 22539 : heap_prune_record_unused(prstate, chainitems[i], true);
1677 : :
1678 : : /* the rest of tuples in the chain are normal, unchanged tuples */
1679 [ + + ]: 147593 : for (int i = ndeadchain; i < nchain; i++)
61 melanieplageman@gmai 1680 :GNC 75352 : heap_prune_record_unchanged_lp_normal(prstate, chainitems[i]);
1681 : : }
1682 : : }
1683 : :
1684 : : /* Record lowest soon-prunable XID */
1685 : : static void
44 1686 : 536944 : heap_prune_record_prunable(PruneState *prstate, TransactionId xid,
1687 : : OffsetNumber offnum)
1688 : : {
1689 : : /*
1690 : : * This should exactly match the PageSetPrunable macro. We can't store
1691 : : * directly into the page header yet, so we update working state.
1692 : : */
6632 tgl@sss.pgh.pa.us 1693 [ - + ]:CBC 536944 : Assert(TransactionIdIsNormal(xid));
1694 [ + + + + ]: 1044034 : if (!TransactionIdIsValid(prstate->new_prune_xid) ||
1695 : 507090 : TransactionIdPrecedes(xid, prstate->new_prune_xid))
1696 : 31510 : prstate->new_prune_xid = xid;
1697 : :
1698 : : /*
1699 : : * It's incorrect for a page to be marked all-visible if it contains
1700 : : * prunable items.
1701 : : */
44 melanieplageman@gmai 1702 [ - + ]:GNC 536944 : if (PageIsAllVisible(prstate->page))
44 melanieplageman@gmai 1703 :UNC 0 : heap_page_fix_vm_corruption(prstate, offnum,
1704 : : VM_CORRUPT_TUPLE_VISIBILITY);
6632 tgl@sss.pgh.pa.us 1705 :CBC 536944 : }
1706 : :
1707 : : /* Record line pointer to be redirected */
1708 : : static void
1709 : 72241 : heap_prune_record_redirect(PruneState *prstate,
1710 : : OffsetNumber offnum, OffsetNumber rdoffnum,
1711 : : bool was_normal)
1712 : : {
764 heikki.linnakangas@i 1713 [ - + ]: 72241 : Assert(!prstate->processed[offnum]);
1714 : 72241 : prstate->processed[offnum] = true;
1715 : :
1716 : : /*
1717 : : * Do not mark the redirect target here. It needs to be counted
1718 : : * separately as an unchanged tuple.
1719 : : */
1720 : :
6632 tgl@sss.pgh.pa.us 1721 [ - + ]: 72241 : Assert(prstate->nredirected < MaxHeapTuplesPerPage);
1722 : 72241 : prstate->redirected[prstate->nredirected * 2] = offnum;
1723 : 72241 : prstate->redirected[prstate->nredirected * 2 + 1] = rdoffnum;
1724 : :
1725 : 72241 : prstate->nredirected++;
1726 : :
1727 : : /*
1728 : : * If the root entry had been a normal tuple, we are deleting it, so count
1729 : : * it in the result. But changing a redirect (even to DEAD state) doesn't
1730 : : * count.
1731 : : */
764 heikki.linnakangas@i 1732 [ + + ]: 72241 : if (was_normal)
1733 : 63580 : prstate->ndeleted++;
1734 : :
762 1735 : 72241 : prstate->hastup = true;
6802 tgl@sss.pgh.pa.us 1736 : 72241 : }
1737 : :
1738 : : /* Record line pointer to be marked dead */
1739 : : static void
764 heikki.linnakangas@i 1740 : 3736052 : heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum,
1741 : : bool was_normal)
1742 : : {
1743 [ - + ]: 3736052 : Assert(!prstate->processed[offnum]);
1744 : 3736052 : prstate->processed[offnum] = true;
1745 : :
6632 tgl@sss.pgh.pa.us 1746 [ - + ]: 3736052 : Assert(prstate->ndead < MaxHeapTuplesPerPage);
1747 : 3736052 : prstate->nowdead[prstate->ndead] = offnum;
1748 : 3736052 : prstate->ndead++;
1749 : :
1750 : : /*
1751 : : * Deliberately delay unsetting set_all_visible and set_all_frozen until
1752 : : * later during pruning. Removable dead tuples shouldn't preclude freezing
1753 : : * the page.
1754 : : */
1755 : :
1756 : : /* Record the dead offset for vacuum */
762 heikki.linnakangas@i 1757 : 3736052 : prstate->deadoffsets[prstate->lpdead_items++] = offnum;
1758 : :
1759 : : /*
1760 : : * If the root entry had been a normal tuple, we are deleting it, so count
1761 : : * it in the result. But changing a redirect (even to DEAD state) doesn't
1762 : : * count.
1763 : : */
764 1764 [ + + ]: 3736052 : if (was_normal)
1765 : 3723243 : prstate->ndeleted++;
6802 tgl@sss.pgh.pa.us 1766 : 3736052 : }
1767 : :
1768 : : /*
1769 : : * Depending on whether or not the caller set mark_unused_now to true, record that a
1770 : : * line pointer should be marked LP_DEAD or LP_UNUSED. There are other cases in
1771 : : * which we will mark line pointers LP_UNUSED, but we will not mark line
1772 : : * pointers LP_DEAD if mark_unused_now is true.
1773 : : */
1774 : : static void
764 heikki.linnakangas@i 1775 : 3770084 : heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum,
1776 : : bool was_normal)
1777 : : {
1778 : : /*
1779 : : * If the caller set mark_unused_now to true, we can remove dead tuples
1780 : : * during pruning instead of marking their line pointers dead. Set this
1781 : : * tuple's line pointer LP_UNUSED. We hint that this option is less
1782 : : * likely.
1783 : : */
838 rhaas@postgresql.org 1784 [ + + ]: 3770084 : if (unlikely(prstate->mark_unused_now))
764 heikki.linnakangas@i 1785 : 34032 : heap_prune_record_unused(prstate, offnum, was_normal);
1786 : : else
1787 : 3736052 : heap_prune_record_dead(prstate, offnum, was_normal);
1788 : :
1789 : : /*
1790 : : * It's incorrect for the page to be set all-visible if it contains dead
1791 : : * items. Fix that on the heap page and check the VM for corruption as
1792 : : * well. Do that here rather than in heap_prune_record_dead() so we also
1793 : : * cover tuples that are directly marked LP_UNUSED via mark_unused_now.
1794 : : */
44 melanieplageman@gmai 1795 [ - + ]:GNC 3770084 : if (PageIsAllVisible(prstate->page))
44 melanieplageman@gmai 1796 :UNC 0 : heap_page_fix_vm_corruption(prstate, offnum, VM_CORRUPT_LPDEAD);
838 rhaas@postgresql.org 1797 :CBC 3770084 : }
1798 : :
1799 : : /* Record line pointer to be marked unused */
1800 : : static void
764 heikki.linnakangas@i 1801 : 109036 : heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal)
1802 : : {
1803 [ - + ]: 109036 : Assert(!prstate->processed[offnum]);
1804 : 109036 : prstate->processed[offnum] = true;
1805 : :
6632 tgl@sss.pgh.pa.us 1806 [ - + ]: 109036 : Assert(prstate->nunused < MaxHeapTuplesPerPage);
1807 : 109036 : prstate->nowunused[prstate->nunused] = offnum;
1808 : 109036 : prstate->nunused++;
1809 : :
1810 : : /*
1811 : : * If the root entry had been a normal tuple, we are deleting it, so count
1812 : : * it in the result. But changing a redirect (even to DEAD state) doesn't
1813 : : * count.
1814 : : */
764 heikki.linnakangas@i 1815 [ + + ]: 109036 : if (was_normal)
1816 : 106833 : prstate->ndeleted++;
6632 tgl@sss.pgh.pa.us 1817 : 109036 : }
1818 : :
1819 : : /*
1820 : : * Record an unused line pointer that is left unchanged.
1821 : : */
1822 : : static void
61 melanieplageman@gmai 1823 :GNC 259495 : heap_prune_record_unchanged_lp_unused(PruneState *prstate, OffsetNumber offnum)
1824 : : {
762 heikki.linnakangas@i 1825 [ - + ]:CBC 259495 : Assert(!prstate->processed[offnum]);
1826 : 259495 : prstate->processed[offnum] = true;
1827 : 259495 : }
1828 : :
1829 : : /*
1830 : : * Record line pointer that is left unchanged. We consider freezing it, and
1831 : : * update bookkeeping of tuple counts and page visibility.
1832 : : */
1833 : : static void
61 melanieplageman@gmai 1834 :GNC 15086444 : heap_prune_record_unchanged_lp_normal(PruneState *prstate, OffsetNumber offnum)
1835 : : {
1836 : : HeapTupleHeader htup;
1837 : : TransactionId xmin;
1838 : 15086444 : Page page = prstate->page;
1839 : :
762 heikki.linnakangas@i 1840 [ - + ]:CBC 15086444 : Assert(!prstate->processed[offnum]);
1841 : 15086444 : prstate->processed[offnum] = true;
1842 : :
1843 : 15086444 : prstate->hastup = true; /* the page is not empty */
1844 : :
1845 : : /*
1846 : : * The criteria for counting a tuple as live in this block need to match
1847 : : * what analyze.c's acquire_sample_rows() does, otherwise VACUUM and
1848 : : * ANALYZE may produce wildly different reltuples values, e.g. when there
1849 : : * are many recently-dead tuples.
1850 : : *
1851 : : * The logic here is a bit simpler than acquire_sample_rows(), as VACUUM
1852 : : * can't run inside a transaction block, which makes some cases impossible
1853 : : * (e.g. in-progress insert from the same transaction).
1854 : : *
1855 : : * HEAPTUPLE_DEAD are handled by the other heap_prune_record_*()
1856 : : * subroutines. They don't count dead items like acquire_sample_rows()
1857 : : * does, because we assume that all dead items will become LP_UNUSED
1858 : : * before VACUUM finishes. This difference is only superficial. VACUUM
1859 : : * effectively agrees with ANALYZE about DEAD items, in the end. VACUUM
1860 : : * won't remember LP_DEAD items, but only because they're not supposed to
1861 : : * be left behind when it is done. (Cases where we bypass index vacuuming
1862 : : * will violate this optimistic assumption, but the overall impact of that
1863 : : * should be negligible.)
1864 : : */
1865 : 15086444 : htup = (HeapTupleHeader) PageGetItem(page, PageGetItemId(page, offnum));
1866 : :
1867 [ + + + + : 15086444 : switch (prstate->htsv[offnum])
- ]
1868 : : {
1869 : 14549500 : case HEAPTUPLE_LIVE:
1870 : :
1871 : : /*
1872 : : * Count it as live. Not only is this natural, but it's also what
1873 : : * acquire_sample_rows() does.
1874 : : */
1875 : 14549500 : prstate->live_tuples++;
1876 : :
1877 : : /*
1878 : : * Is the tuple definitely visible to all transactions?
1879 : : *
1880 : : * NB: Like with per-tuple hint bits, we can't set the
1881 : : * PD_ALL_VISIBLE flag if the inserter committed asynchronously.
1882 : : * See SetHintBits for more info. Check that the tuple is hinted
1883 : : * xmin-committed because of that.
1884 : : */
42 melanieplageman@gmai 1885 [ + + ]:GNC 14549500 : if (!HeapTupleHeaderXminCommitted(htup))
1886 : : {
1887 : 54029 : prstate->set_all_visible = false;
1888 : 54029 : prstate->set_all_frozen = false;
1889 : 54029 : break;
1890 : : }
1891 : :
1892 : : /*
1893 : : * The inserter definitely committed. But we don't know if it is
1894 : : * old enough that everyone sees it as committed. Later, after
1895 : : * processing all the tuples on the page, we'll check if there is
1896 : : * any snapshot that still considers the newest xid on the page to
1897 : : * be running. If so, we don't consider the page all-visible.
1898 : : */
1899 : 14495471 : xmin = HeapTupleHeaderGetXmin(htup);
1900 : :
1901 : : /* Track newest xmin on page. */
1902 [ + + + + ]: 14495471 : if (TransactionIdFollows(xmin, prstate->newest_live_xid) &&
1903 : : TransactionIdIsNormal(xmin))
1904 : 593076 : prstate->newest_live_xid = xmin;
1905 : :
762 heikki.linnakangas@i 1906 :CBC 14495471 : break;
1907 : :
1908 : 387348 : case HEAPTUPLE_RECENTLY_DEAD:
1909 : 387348 : prstate->recently_dead_tuples++;
61 melanieplageman@gmai 1910 :GNC 387348 : prstate->set_all_visible = false;
1911 : 387348 : prstate->set_all_frozen = false;
1912 : :
1913 : : /*
1914 : : * This tuple will soon become DEAD. Update the hint field so
1915 : : * that the page is reconsidered for pruning in future.
1916 : : */
762 heikki.linnakangas@i 1917 :CBC 387348 : heap_prune_record_prunable(prstate,
1918 : : HeapTupleHeaderGetUpdateXid(htup),
1919 : : offnum);
1920 : 387348 : break;
1921 : :
1922 : 131365 : case HEAPTUPLE_INSERT_IN_PROGRESS:
1923 : :
1924 : : /*
1925 : : * We do not count these rows as live, because we expect the
1926 : : * inserting transaction to update the counters at commit, and we
1927 : : * assume that will happen only after we report our results. This
1928 : : * assumption is a bit shaky, but it is what acquire_sample_rows()
1929 : : * does, so be consistent.
1930 : : */
61 melanieplageman@gmai 1931 :GNC 131365 : prstate->set_all_visible = false;
1932 : 131365 : prstate->set_all_frozen = false;
1933 : :
1934 : : /*
1935 : : * Though there is nothing "prunable" on the page, we maintain
1936 : : * pd_prune_xid for inserts so that we have the opportunity to
1937 : : * mark them all-visible during the next round of pruning.
1938 : : */
36 1939 : 131365 : heap_prune_record_prunable(prstate,
1940 : : HeapTupleHeaderGetXmin(htup),
1941 : : offnum);
762 heikki.linnakangas@i 1942 :CBC 131365 : break;
1943 : :
1944 : 18231 : case HEAPTUPLE_DELETE_IN_PROGRESS:
1945 : :
1946 : : /*
1947 : : * This an expected case during concurrent vacuum. Count such
1948 : : * rows as live. As above, we assume the deleting transaction
1949 : : * will commit and update the counters after we report.
1950 : : */
1951 : 18231 : prstate->live_tuples++;
61 melanieplageman@gmai 1952 :GNC 18231 : prstate->set_all_visible = false;
1953 : 18231 : prstate->set_all_frozen = false;
1954 : :
1955 : : /*
1956 : : * This tuple may soon become DEAD. Update the hint field so that
1957 : : * the page is reconsidered for pruning in future.
1958 : : */
762 heikki.linnakangas@i 1959 :CBC 18231 : heap_prune_record_prunable(prstate,
1960 : : HeapTupleHeaderGetUpdateXid(htup),
1961 : : offnum);
1962 : 18231 : break;
1963 : :
762 heikki.linnakangas@i 1964 :UBC 0 : default:
1965 : :
1966 : : /*
1967 : : * DEAD tuples should've been passed to heap_prune_record_dead()
1968 : : * or heap_prune_record_unused() instead.
1969 : : */
1970 [ # # ]: 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result %d",
1971 : : prstate->htsv[offnum]);
1972 : : break;
1973 : : }
1974 : :
1975 : : /* Consider freezing any normal tuples which will not be removed */
203 melanieplageman@gmai 1976 [ + + ]:GNC 15086444 : if (prstate->attempt_freeze)
1977 : : {
1978 : : bool totally_frozen;
1979 : :
762 heikki.linnakangas@i 1980 [ + + ]:CBC 5016140 : if ((heap_prepare_freeze_tuple(htup,
1981 : 5016140 : prstate->cutoffs,
1982 : : &prstate->pagefrz,
1983 : 5016140 : &prstate->frozen[prstate->nfrozen],
1984 : : &totally_frozen)))
1985 : : {
1986 : : /* Save prepared freeze plan for later */
1987 : 3447449 : prstate->frozen[prstate->nfrozen++].offset = offnum;
1988 : : }
1989 : :
1990 : : /*
1991 : : * If any tuple isn't either totally frozen already or eligible to
1992 : : * become totally frozen (according to its freeze plan), then the page
1993 : : * definitely cannot be set all-frozen in the visibility map later on.
1994 : : */
1995 [ + + ]: 5016140 : if (!totally_frozen)
61 melanieplageman@gmai 1996 :GNC 797043 : prstate->set_all_frozen = false;
1997 : : }
762 heikki.linnakangas@i 1998 :CBC 15086444 : }
1999 : :
2000 : :
2001 : : /*
2002 : : * Record line pointer that was already LP_DEAD and is left unchanged.
2003 : : */
2004 : : static void
61 melanieplageman@gmai 2005 :GNC 1464013 : heap_prune_record_unchanged_lp_dead(PruneState *prstate, OffsetNumber offnum)
2006 : : {
762 heikki.linnakangas@i 2007 [ - + ]:CBC 1464013 : Assert(!prstate->processed[offnum]);
2008 : 1464013 : prstate->processed[offnum] = true;
2009 : :
2010 : : /*
2011 : : * Deliberately don't set hastup for LP_DEAD items. We make the soft
2012 : : * assumption that any LP_DEAD items encountered here will become
2013 : : * LP_UNUSED later on, before count_nondeletable_pages is reached. If we
2014 : : * don't make this assumption then rel truncation will only happen every
2015 : : * other VACUUM, at most. Besides, VACUUM must treat
2016 : : * hastup/nonempty_pages as provisional no matter how LP_DEAD items are
2017 : : * handled (handled here, or handled later on).
2018 : : *
2019 : : * Similarly, don't unset set_all_visible and set_all_frozen until later,
2020 : : * at the end of heap_page_prune_and_freeze(). This will allow us to
2021 : : * attempt to freeze the page after pruning. As long as we unset it
2022 : : * before updating the visibility map, this will be correct.
2023 : : */
2024 : :
2025 : : /* Record the dead offset for vacuum */
2026 : 1464013 : prstate->deadoffsets[prstate->lpdead_items++] = offnum;
2027 : :
2028 : : /*
2029 : : * It's incorrect for a page to be marked all-visible if it contains dead
2030 : : * items.
2031 : : */
44 melanieplageman@gmai 2032 [ - + ]:GNC 1464013 : if (PageIsAllVisible(prstate->page))
44 melanieplageman@gmai 2033 :UNC 0 : heap_page_fix_vm_corruption(prstate, offnum, VM_CORRUPT_LPDEAD);
762 heikki.linnakangas@i 2034 :CBC 1464013 : }
2035 : :
2036 : : /*
2037 : : * Record LP_REDIRECT that is left unchanged.
2038 : : */
2039 : : static void
2040 : 199889 : heap_prune_record_unchanged_lp_redirect(PruneState *prstate, OffsetNumber offnum)
2041 : : {
2042 : : /*
2043 : : * A redirect line pointer doesn't count as a live tuple.
2044 : : *
2045 : : * If we leave a redirect line pointer in place, there will be another
2046 : : * tuple on the page that it points to. We will do the bookkeeping for
2047 : : * that separately. So we have nothing to do here, except remember that
2048 : : * we processed this item.
2049 : : */
764 2050 [ - + ]: 199889 : Assert(!prstate->processed[offnum]);
2051 : 199889 : prstate->processed[offnum] = true;
2052 : 199889 : }
2053 : :
2054 : : /*
2055 : : * Perform the actual page changes needed by heap_page_prune_and_freeze().
2056 : : *
2057 : : * If 'lp_truncate_only' is set, we are merely marking LP_DEAD line pointers
2058 : : * as unused, not redirecting or removing anything else. The
2059 : : * PageRepairFragmentation() call is skipped in that case.
2060 : : *
2061 : : * If 'lp_truncate_only' is not set, the caller must hold a cleanup lock on
2062 : : * the buffer. If it is set, an ordinary exclusive lock suffices.
2063 : : */
2064 : : void
771 2065 : 82478 : heap_page_prune_execute(Buffer buffer, bool lp_truncate_only,
2066 : : OffsetNumber *redirected, int nredirected,
2067 : : OffsetNumber *nowdead, int ndead,
2068 : : OffsetNumber *nowunused, int nunused)
2069 : : {
249 peter@eisentraut.org 2070 :GNC 82478 : Page page = BufferGetPage(buffer);
2071 : : OffsetNumber *offnum;
2072 : : HeapTupleHeader htup PG_USED_FOR_ASSERTS_ONLY;
2073 : :
2074 : : /* Shouldn't be called unless there's something to do */
1855 pg@bowt.ie 2075 [ + + + + :CBC 82478 : Assert(nredirected > 0 || ndead > 0 || nunused > 0);
- + ]
2076 : :
2077 : : /* If 'lp_truncate_only', we can only remove already-dead line pointers */
771 heikki.linnakangas@i 2078 [ + + + - : 82478 : Assert(!lp_truncate_only || (nredirected == 0 && ndead == 0));
- + ]
2079 : :
2080 : : /* Update all redirected line pointers */
6632 tgl@sss.pgh.pa.us 2081 : 82478 : offnum = redirected;
1643 pg@bowt.ie 2082 [ + + ]: 173820 : for (int i = 0; i < nredirected; i++)
2083 : : {
6632 tgl@sss.pgh.pa.us 2084 : 91342 : OffsetNumber fromoff = *offnum++;
2085 : 91342 : OffsetNumber tooff = *offnum++;
2086 : 91342 : ItemId fromlp = PageGetItemId(page, fromoff);
2087 : : ItemId tolp PG_USED_FOR_ASSERTS_ONLY;
2088 : :
2089 : : #ifdef USE_ASSERT_CHECKING
2090 : :
2091 : : /*
2092 : : * Any existing item that we set as an LP_REDIRECT (any 'from' item)
2093 : : * must be the first item from a HOT chain. If the item has tuple
2094 : : * storage then it can't be a heap-only tuple. Otherwise we are just
2095 : : * maintaining an existing LP_REDIRECT from an existing HOT chain that
2096 : : * has been pruned at least once before now.
2097 : : */
1643 pg@bowt.ie 2098 [ + + ]: 91342 : if (!ItemIdIsRedirected(fromlp))
2099 : : {
2100 [ + - - + ]: 82227 : Assert(ItemIdHasStorage(fromlp) && ItemIdIsNormal(fromlp));
2101 : :
2102 : 82227 : htup = (HeapTupleHeader) PageGetItem(page, fromlp);
2103 [ - + ]: 82227 : Assert(!HeapTupleHeaderIsHeapOnly(htup));
2104 : : }
2105 : : else
2106 : : {
2107 : : /* We shouldn't need to redundantly set the redirect */
2108 [ - + ]: 9115 : Assert(ItemIdGetRedirect(fromlp) != tooff);
2109 : : }
2110 : :
2111 : : /*
2112 : : * The item that we're about to set as an LP_REDIRECT (the 'from'
2113 : : * item) will point to an existing item (the 'to' item) that is
2114 : : * already a heap-only tuple. There can be at most one LP_REDIRECT
2115 : : * item per HOT chain.
2116 : : *
2117 : : * We need to keep around an LP_REDIRECT item (after original
2118 : : * non-heap-only root tuple gets pruned away) so that it's always
2119 : : * possible for VACUUM to easily figure out what TID to delete from
2120 : : * indexes when an entire HOT chain becomes dead. A heap-only tuple
2121 : : * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
2122 : : * tuple can.
2123 : : *
2124 : : * This check may miss problems, e.g. the target of a redirect could
2125 : : * be marked as unused subsequently. The page_verify_redirects() check
2126 : : * below will catch such problems.
2127 : : */
2128 : 91342 : tolp = PageGetItemId(page, tooff);
2129 [ + - - + ]: 91342 : Assert(ItemIdHasStorage(tolp) && ItemIdIsNormal(tolp));
2130 : 91342 : htup = (HeapTupleHeader) PageGetItem(page, tolp);
2131 [ - + ]: 91342 : Assert(HeapTupleHeaderIsHeapOnly(htup));
2132 : : #endif
2133 : :
5930 tgl@sss.pgh.pa.us 2134 : 91342 : ItemIdSetRedirect(fromlp, tooff);
2135 : : }
2136 : :
2137 : : /* Update all now-dead line pointers */
6632 2138 : 82478 : offnum = nowdead;
1643 pg@bowt.ie 2139 [ + + ]: 4097974 : for (int i = 0; i < ndead; i++)
2140 : : {
6632 tgl@sss.pgh.pa.us 2141 : 4015496 : OffsetNumber off = *offnum++;
2142 : 4015496 : ItemId lp = PageGetItemId(page, off);
2143 : :
2144 : : #ifdef USE_ASSERT_CHECKING
2145 : :
2146 : : /*
2147 : : * An LP_DEAD line pointer must be left behind when the original item
2148 : : * (which is dead to everybody) could still be referenced by a TID in
2149 : : * an index. This should never be necessary with any individual
2150 : : * heap-only tuple item, though. (It's not clear how much of a problem
2151 : : * that would be, but there is no reason to allow it.)
2152 : : */
1643 pg@bowt.ie 2153 [ + + ]: 4015496 : if (ItemIdHasStorage(lp))
2154 : : {
2155 [ - + ]: 4001431 : Assert(ItemIdIsNormal(lp));
2156 : 4001431 : htup = (HeapTupleHeader) PageGetItem(page, lp);
2157 [ - + ]: 4001431 : Assert(!HeapTupleHeaderIsHeapOnly(htup));
2158 : : }
2159 : : else
2160 : : {
2161 : : /* Whole HOT chain becomes dead */
2162 [ - + ]: 14065 : Assert(ItemIdIsRedirected(lp));
2163 : : }
2164 : : #endif
2165 : :
6632 tgl@sss.pgh.pa.us 2166 : 4015496 : ItemIdSetDead(lp);
2167 : : }
2168 : :
2169 : : /* Update all now-unused line pointers */
2170 : 82478 : offnum = nowunused;
1643 pg@bowt.ie 2171 [ + + ]: 434341 : for (int i = 0; i < nunused; i++)
2172 : : {
6632 tgl@sss.pgh.pa.us 2173 : 351863 : OffsetNumber off = *offnum++;
2174 : 351863 : ItemId lp = PageGetItemId(page, off);
2175 : :
2176 : : #ifdef USE_ASSERT_CHECKING
2177 : :
771 heikki.linnakangas@i 2178 [ + + ]: 351863 : if (lp_truncate_only)
2179 : : {
2180 : : /* Setting LP_DEAD to LP_UNUSED in vacuum's second pass */
2181 [ + - - + ]: 220403 : Assert(ItemIdIsDead(lp) && !ItemIdHasStorage(lp));
2182 : : }
2183 : : else
2184 : : {
2185 : : /*
2186 : : * When heap_page_prune_and_freeze() was called, mark_unused_now
2187 : : * may have been passed as true, which allows would-be LP_DEAD
2188 : : * items to be made LP_UNUSED instead. This is only possible if
2189 : : * the relation has no indexes. If there are any dead items, then
2190 : : * mark_unused_now was not true and every item being marked
2191 : : * LP_UNUSED must refer to a heap-only tuple.
2192 : : */
2193 [ + + ]: 131460 : if (ndead > 0)
2194 : : {
2195 [ + - - + ]: 63416 : Assert(ItemIdHasStorage(lp) && ItemIdIsNormal(lp));
2196 : 63416 : htup = (HeapTupleHeader) PageGetItem(page, lp);
2197 [ - + ]: 63416 : Assert(HeapTupleHeaderIsHeapOnly(htup));
2198 : : }
2199 : : else
2200 [ - + ]: 68044 : Assert(ItemIdIsUsed(lp));
2201 : : }
2202 : :
2203 : : #endif
2204 : :
6632 tgl@sss.pgh.pa.us 2205 : 351863 : ItemIdSetUnused(lp);
2206 : : }
2207 : :
771 heikki.linnakangas@i 2208 [ + + ]: 82478 : if (lp_truncate_only)
2209 : 2917 : PageTruncateLinePointerArray(page);
2210 : : else
2211 : : {
2212 : : /*
2213 : : * Finally, repair any fragmentation, and update the page's hint bit
2214 : : * about whether it has free pointers.
2215 : : */
2216 : 79561 : PageRepairFragmentation(page);
2217 : :
2218 : : /*
2219 : : * Now that the page has been modified, assert that redirect items
2220 : : * still point to valid targets.
2221 : : */
2222 : 79561 : page_verify_redirects(page);
2223 : : }
1625 andres@anarazel.de 2224 : 82478 : }
2225 : :
2226 : :
2227 : : /*
2228 : : * If built with assertions, verify that all LP_REDIRECT items point to a
2229 : : * valid item.
2230 : : *
2231 : : * One way that bugs related to HOT pruning show is redirect items pointing to
2232 : : * removed tuples. It's not trivial to reliably check that marking an item
2233 : : * unused will not orphan a redirect item during heap_prune_chain() /
2234 : : * heap_page_prune_execute(), so we additionally check the whole page after
2235 : : * pruning. Without this check such bugs would typically only cause asserts
2236 : : * later, potentially well after the corruption has been introduced.
2237 : : *
2238 : : * Also check comments in heap_page_prune_execute()'s redirection loop.
2239 : : */
2240 : : static void
2241 : 79561 : page_verify_redirects(Page page)
2242 : : {
2243 : : #ifdef USE_ASSERT_CHECKING
2244 : : OffsetNumber offnum;
2245 : : OffsetNumber maxoff;
2246 : :
2247 : 79561 : maxoff = PageGetMaxOffsetNumber(page);
2248 : 79561 : for (offnum = FirstOffsetNumber;
2249 [ + + ]: 7953836 : offnum <= maxoff;
2250 : 7874275 : offnum = OffsetNumberNext(offnum))
2251 : : {
2252 : 7874275 : ItemId itemid = PageGetItemId(page, offnum);
2253 : : OffsetNumber targoff;
2254 : : ItemId targitem;
2255 : : HeapTupleHeader htup;
2256 : :
2257 [ + + ]: 7874275 : if (!ItemIdIsRedirected(itemid))
2258 : 7609795 : continue;
2259 : :
2260 : 264480 : targoff = ItemIdGetRedirect(itemid);
2261 : 264480 : targitem = PageGetItemId(page, targoff);
2262 : :
2263 [ - + ]: 264480 : Assert(ItemIdIsUsed(targitem));
2264 [ - + ]: 264480 : Assert(ItemIdIsNormal(targitem));
2265 [ - + ]: 264480 : Assert(ItemIdHasStorage(targitem));
2266 : 264480 : htup = (HeapTupleHeader) PageGetItem(page, targitem);
2267 [ - + ]: 264480 : Assert(HeapTupleHeaderIsHeapOnly(htup));
2268 : : }
2269 : : #endif
6802 tgl@sss.pgh.pa.us 2270 : 79561 : }
2271 : :
2272 : :
2273 : : /*
2274 : : * For all items in this page, find their respective root line pointers.
2275 : : * If item k is part of a HOT-chain with root at item j, then we set
2276 : : * root_offsets[k - 1] = j.
2277 : : *
2278 : : * The passed-in root_offsets array must have MaxHeapTuplesPerPage entries.
2279 : : * Unused entries are filled with InvalidOffsetNumber (zero).
2280 : : *
2281 : : * The function must be called with at least share lock on the buffer, to
2282 : : * prevent concurrent prune operations.
2283 : : *
2284 : : * Note: The information collected here is valid only as long as the caller
2285 : : * holds a pin on the buffer. Once pin is released, a tuple might be pruned
2286 : : * and reused by a completely unrelated tuple.
2287 : : */
2288 : : void
2289 : 139054 : heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
2290 : : {
2291 : : OffsetNumber offnum,
2292 : : maxoff;
2293 : :
2091 alvherre@alvh.no-ip. 2294 [ + - - + : 139054 : MemSet(root_offsets, InvalidOffsetNumber,
- - - - -
- ]
2295 : : MaxHeapTuplesPerPage * sizeof(OffsetNumber));
2296 : :
6802 tgl@sss.pgh.pa.us 2297 : 139054 : maxoff = PageGetMaxOffsetNumber(page);
6566 bruce@momjian.us 2298 [ + + ]: 11653242 : for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
2299 : : {
6746 2300 : 11514188 : ItemId lp = PageGetItemId(page, offnum);
2301 : : HeapTupleHeader htup;
2302 : : OffsetNumber nextoffnum;
2303 : : TransactionId priorXmax;
2304 : :
2305 : : /* skip unused and dead items */
6802 tgl@sss.pgh.pa.us 2306 [ + + + + ]: 11514188 : if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
2307 : 9862 : continue;
2308 : :
2309 [ + + ]: 11504326 : if (ItemIdIsNormal(lp))
2310 : : {
2311 : 11500510 : htup = (HeapTupleHeader) PageGetItem(page, lp);
2312 : :
2313 : : /*
2314 : : * Check if this tuple is part of a HOT-chain rooted at some other
2315 : : * tuple. If so, skip it for now; we'll process it when we find
2316 : : * its root.
2317 : : */
2318 [ + + ]: 11500510 : if (HeapTupleHeaderIsHeapOnly(htup))
2319 : 4288 : continue;
2320 : :
2321 : : /*
2322 : : * This is either a plain tuple or the root of a HOT-chain.
2323 : : * Remember it in the mapping.
2324 : : */
2325 : 11496222 : root_offsets[offnum - 1] = offnum;
2326 : :
2327 : : /* If it's not the start of a HOT-chain, we're done with it */
2328 [ + + ]: 11496222 : if (!HeapTupleHeaderIsHotUpdated(htup))
2329 : 11495898 : continue;
2330 : :
2331 : : /* Set up to scan the HOT-chain */
2332 : 324 : nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
4850 alvherre@alvh.no-ip. 2333 : 324 : priorXmax = HeapTupleHeaderGetUpdateXid(htup);
2334 : : }
2335 : : else
2336 : : {
2337 : : /* Must be a redirect item. We do not set its root_offsets entry */
6802 tgl@sss.pgh.pa.us 2338 [ - + ]: 3816 : Assert(ItemIdIsRedirected(lp));
2339 : : /* Set up to scan the HOT-chain */
2340 : 3816 : nextoffnum = ItemIdGetRedirect(lp);
2341 : 3816 : priorXmax = InvalidTransactionId;
2342 : : }
2343 : :
2344 : : /*
2345 : : * Now follow the HOT-chain and collect other tuples in the chain.
2346 : : *
2347 : : * Note: Even though this is a nested loop, the complexity of the
2348 : : * function is O(N) because a tuple in the page should be visited not
2349 : : * more than twice, once in the outer loop and once in HOT-chain
2350 : : * chases.
2351 : : */
2352 : : for (;;)
2353 : : {
2354 : : /* Sanity check (pure paranoia) */
1686 pg@bowt.ie 2355 [ - + ]: 4284 : if (offnum < FirstOffsetNumber)
1686 pg@bowt.ie 2356 :UBC 0 : break;
2357 : :
2358 : : /*
2359 : : * An offset past the end of page's line pointer array is possible
2360 : : * when the array was truncated
2361 : : */
1686 pg@bowt.ie 2362 [ - + ]:CBC 4284 : if (offnum > maxoff)
1854 pg@bowt.ie 2363 :UBC 0 : break;
2364 : :
6802 tgl@sss.pgh.pa.us 2365 :CBC 4284 : lp = PageGetItemId(page, nextoffnum);
2366 : :
2367 : : /* Check for broken chains */
2368 [ - + ]: 4284 : if (!ItemIdIsNormal(lp))
6802 tgl@sss.pgh.pa.us 2369 :UBC 0 : break;
2370 : :
6802 tgl@sss.pgh.pa.us 2371 :CBC 4284 : htup = (HeapTupleHeader) PageGetItem(page, lp);
2372 : :
2373 [ + + - + ]: 4752 : if (TransactionIdIsValid(priorXmax) &&
3106 alvherre@alvh.no-ip. 2374 : 468 : !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
6802 tgl@sss.pgh.pa.us 2375 :UBC 0 : break;
2376 : :
2377 : : /* Remember the root line pointer for this item */
6802 tgl@sss.pgh.pa.us 2378 :CBC 4284 : root_offsets[nextoffnum - 1] = offnum;
2379 : :
2380 : : /* Advance to next chain member, if any */
2381 [ + + ]: 4284 : if (!HeapTupleHeaderIsHotUpdated(htup))
2382 : 4140 : break;
2383 : :
2384 : : /* HOT implies it can't have moved to different partition */
2950 andres@anarazel.de 2385 [ - + ]: 144 : Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
2386 : :
6802 tgl@sss.pgh.pa.us 2387 : 144 : nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
4850 alvherre@alvh.no-ip. 2388 : 144 : priorXmax = HeapTupleHeaderGetUpdateXid(htup);
2389 : : }
2390 : : }
6802 tgl@sss.pgh.pa.us 2391 : 139054 : }
2392 : :
2393 : :
2394 : : /*
2395 : : * Compare fields that describe actions required to freeze tuple with caller's
2396 : : * open plan. If everything matches then the frz tuple plan is equivalent to
2397 : : * caller's plan.
2398 : : */
2399 : : static inline bool
771 heikki.linnakangas@i 2400 : 1344416 : heap_log_freeze_eq(xlhp_freeze_plan *plan, HeapTupleFreeze *frz)
2401 : : {
2402 [ + + ]: 1344416 : if (plan->xmax == frz->xmax &&
2403 [ + + ]: 1344378 : plan->t_infomask2 == frz->t_infomask2 &&
2404 [ + + ]: 1343406 : plan->t_infomask == frz->t_infomask &&
2405 [ + - ]: 1340195 : plan->frzflags == frz->frzflags)
2406 : 1340195 : return true;
2407 : :
2408 : : /* Caller must call heap_log_freeze_new_plan again for frz */
2409 : 4221 : return false;
2410 : : }
2411 : :
2412 : : /*
2413 : : * Comparator used to deduplicate the freeze plans used in WAL records.
2414 : : */
2415 : : static int
2416 : 1637222 : heap_log_freeze_cmp(const void *arg1, const void *arg2)
2417 : : {
113 peter@eisentraut.org 2418 :GNC 1637222 : const HeapTupleFreeze *frz1 = arg1;
2419 : 1637222 : const HeapTupleFreeze *frz2 = arg2;
2420 : :
771 heikki.linnakangas@i 2421 [ + + ]:CBC 1637222 : if (frz1->xmax < frz2->xmax)
2422 : 62 : return -1;
2423 [ + + ]: 1637160 : else if (frz1->xmax > frz2->xmax)
2424 : 85 : return 1;
2425 : :
2426 [ + + ]: 1637075 : if (frz1->t_infomask2 < frz2->t_infomask2)
2427 : 5772 : return -1;
2428 [ + + ]: 1631303 : else if (frz1->t_infomask2 > frz2->t_infomask2)
2429 : 6264 : return 1;
2430 : :
2431 [ + + ]: 1625039 : if (frz1->t_infomask < frz2->t_infomask)
2432 : 12964 : return -1;
2433 [ + + ]: 1612075 : else if (frz1->t_infomask > frz2->t_infomask)
2434 : 19802 : return 1;
2435 : :
2436 [ - + ]: 1592273 : if (frz1->frzflags < frz2->frzflags)
771 heikki.linnakangas@i 2437 :UBC 0 : return -1;
771 heikki.linnakangas@i 2438 [ - + ]:CBC 1592273 : else if (frz1->frzflags > frz2->frzflags)
771 heikki.linnakangas@i 2439 :UBC 0 : return 1;
2440 : :
2441 : : /*
2442 : : * heap_log_freeze_eq would consider these tuple-wise plans to be equal.
2443 : : * (So the tuples will share a single canonical freeze plan.)
2444 : : *
2445 : : * We tiebreak on page offset number to keep each freeze plan's page
2446 : : * offset number array individually sorted. (Unnecessary, but be tidy.)
2447 : : */
771 heikki.linnakangas@i 2448 [ + + ]:CBC 1592273 : if (frz1->offset < frz2->offset)
2449 : 1461281 : return -1;
2450 [ + - ]: 130992 : else if (frz1->offset > frz2->offset)
2451 : 130992 : return 1;
2452 : :
771 heikki.linnakangas@i 2453 :UBC 0 : Assert(false);
2454 : : return 0;
2455 : : }
2456 : :
2457 : : /*
2458 : : * Start new plan initialized using tuple-level actions. At least one tuple
2459 : : * will have steps required to freeze described by caller's plan during REDO.
2460 : : */
2461 : : static inline void
771 heikki.linnakangas@i 2462 :CBC 30370 : heap_log_freeze_new_plan(xlhp_freeze_plan *plan, HeapTupleFreeze *frz)
2463 : : {
2464 : 30370 : plan->xmax = frz->xmax;
2465 : 30370 : plan->t_infomask2 = frz->t_infomask2;
2466 : 30370 : plan->t_infomask = frz->t_infomask;
2467 : 30370 : plan->frzflags = frz->frzflags;
2468 : 30370 : plan->ntuples = 1; /* for now */
2469 : 30370 : }
2470 : :
2471 : : /*
2472 : : * Deduplicate tuple-based freeze plans so that each distinct set of
2473 : : * processing steps is only stored once in the WAL record.
2474 : : * Called during original execution of freezing (for logged relations).
2475 : : *
2476 : : * Return value is number of plans set in *plans_out for caller. Also writes
2477 : : * an array of offset numbers into *offsets_out output argument for caller
2478 : : * (actually there is one array per freeze plan, but that's not of immediate
2479 : : * concern to our caller).
2480 : : */
2481 : : static int
2482 : 26149 : heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples,
2483 : : xlhp_freeze_plan *plans_out,
2484 : : OffsetNumber *offsets_out)
2485 : : {
2486 : 26149 : int nplans = 0;
2487 : :
2488 : : /* Sort tuple-based freeze plans in the order required to deduplicate */
2489 : 26149 : qsort(tuples, ntuples, sizeof(HeapTupleFreeze), heap_log_freeze_cmp);
2490 : :
2491 [ + + ]: 1396714 : for (int i = 0; i < ntuples; i++)
2492 : : {
2493 : 1370565 : HeapTupleFreeze *frz = tuples + i;
2494 : :
2495 [ + + ]: 1370565 : if (i == 0)
2496 : : {
2497 : : /* New canonical freeze plan starting with first tup */
2498 : 26149 : heap_log_freeze_new_plan(plans_out, frz);
2499 : 26149 : nplans++;
2500 : : }
2501 [ + + ]: 1344416 : else if (heap_log_freeze_eq(plans_out, frz))
2502 : : {
2503 : : /* tup matches open canonical plan -- include tup in it */
2504 [ - + ]: 1340195 : Assert(offsets_out[i - 1] < frz->offset);
2505 : 1340195 : plans_out->ntuples++;
2506 : : }
2507 : : else
2508 : : {
2509 : : /* Tup doesn't match current plan -- done with it now */
2510 : 4221 : plans_out++;
2511 : :
2512 : : /* New canonical freeze plan starting with this tup */
2513 : 4221 : heap_log_freeze_new_plan(plans_out, frz);
2514 : 4221 : nplans++;
2515 : : }
2516 : :
2517 : : /*
2518 : : * Save page offset number in dedicated buffer in passing.
2519 : : *
2520 : : * REDO routine relies on the record's offset numbers array grouping
2521 : : * offset numbers by freeze plan. The sort order within each grouping
2522 : : * is ascending offset number order, just to keep things tidy.
2523 : : */
2524 : 1370565 : offsets_out[i] = frz->offset;
2525 : : }
2526 : :
2527 [ + - - + ]: 26149 : Assert(nplans > 0 && nplans <= ntuples);
2528 : :
2529 : 26149 : return nplans;
2530 : : }
2531 : :
2532 : : /*
2533 : : * Write an XLOG_HEAP2_PRUNE* WAL record
2534 : : *
2535 : : * This is used for several different page maintenance operations:
2536 : : *
2537 : : * - Page pruning, in VACUUM's 1st pass or on access: Some items are
2538 : : * redirected, some marked dead, and some removed altogether.
2539 : : *
2540 : : * - Freezing: Items are marked as 'frozen'.
2541 : : *
2542 : : * - Vacuum, 2nd pass: Items that are already LP_DEAD are marked as unused.
2543 : : *
2544 : : * They have enough commonalities that we use a single WAL record for them
2545 : : * all.
2546 : : *
2547 : : * If replaying the record requires a cleanup lock, pass cleanup_lock = true.
2548 : : * Replaying 'redirected' or 'dead' items always requires a cleanup lock, but
2549 : : * replaying 'unused' items depends on whether they were all previously marked
2550 : : * as dead.
2551 : : *
2552 : : * If the VM is being updated, vmflags will contain the bits to set. In this
2553 : : * case, vmbuffer should already have been updated and marked dirty and should
2554 : : * still be pinned and locked.
2555 : : *
2556 : : * Note: This function scribbles on the 'frozen' array.
2557 : : *
2558 : : * Note: This is called in a critical section, so careful what you do here.
2559 : : */
2560 : : void
2561 : 150437 : log_heap_prune_and_freeze(Relation relation, Buffer buffer,
2562 : : Buffer vmbuffer, uint8 vmflags,
2563 : : TransactionId conflict_xid,
2564 : : bool cleanup_lock,
2565 : : PruneReason reason,
2566 : : HeapTupleFreeze *frozen, int nfrozen,
2567 : : OffsetNumber *redirected, int nredirected,
2568 : : OffsetNumber *dead, int ndead,
2569 : : OffsetNumber *unused, int nunused)
2570 : : {
2571 : : xl_heap_prune xlrec;
2572 : : XLogRecPtr recptr;
2573 : : uint8 info;
2574 : : uint8 regbuf_flags_heap;
2575 : :
42 melanieplageman@gmai 2576 :GNC 150437 : Page heap_page = BufferGetPage(buffer);
2577 : :
2578 : : /* The following local variables hold data registered in the WAL record: */
2579 : : xlhp_freeze_plan plans[MaxHeapTuplesPerPage];
2580 : : xlhp_freeze_plans freeze_plans;
2581 : : xlhp_prune_items redirect_items;
2582 : : xlhp_prune_items dead_items;
2583 : : xlhp_prune_items unused_items;
2584 : : OffsetNumber frz_offsets[MaxHeapTuplesPerPage];
204 2585 [ + + + + : 150437 : bool do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
+ + ]
2586 : 150437 : bool do_set_vm = vmflags & VISIBILITYMAP_VALID_BITS;
42 2587 : 150437 : bool heap_fpi_allowed = true;
2588 : :
204 2589 [ - + ]: 150437 : Assert((vmflags & VISIBILITYMAP_VALID_BITS) == vmflags);
2590 : :
771 heikki.linnakangas@i 2591 :CBC 150437 : xlrec.flags = 0;
204 melanieplageman@gmai 2592 :GNC 150437 : regbuf_flags_heap = REGBUF_STANDARD;
2593 : :
2594 : : /*
2595 : : * We can avoid an FPI of the heap page if the only modification we are
2596 : : * making to it is to set PD_ALL_VISIBLE and checksums/wal_log_hints are
2597 : : * disabled.
2598 : : *
2599 : : * However, if the page has never been WAL-logged (LSN is invalid), we
2600 : : * must force an FPI regardless. This can happen when another backend
2601 : : * extends the heap, initializes the page, and then fails before WAL-
2602 : : * logging it. Since heap extension is not WAL-logged, recovery might try
2603 : : * to replay our record and find that the page isn't initialized, which
2604 : : * would cause a PANIC.
2605 : : */
42 2606 [ - + ]: 150437 : if (!XLogRecPtrIsValid(PageGetLSN(heap_page)))
42 melanieplageman@gmai 2607 :UNC 0 : regbuf_flags_heap |= REGBUF_FORCE_IMAGE;
42 melanieplageman@gmai 2608 [ + + + + :GNC 150437 : else if (!do_prune && nfrozen == 0 && (!do_set_vm || !XLogHintBitIsNeeded()))
+ - + + +
+ ]
2609 : : {
204 2610 : 2844 : regbuf_flags_heap |= REGBUF_NO_IMAGE;
42 2611 : 2844 : heap_fpi_allowed = false;
2612 : : }
2613 : :
2614 : : /*
2615 : : * Prepare data for the buffer. The arrays are not actually in the
2616 : : * buffer, but we pretend that they are. When XLogInsert stores a full
2617 : : * page image, the arrays can be omitted.
2618 : : */
771 heikki.linnakangas@i 2619 :CBC 150437 : XLogBeginInsert();
204 melanieplageman@gmai 2620 :GNC 150437 : XLogRegisterBuffer(0, buffer, regbuf_flags_heap);
2621 : :
2622 [ + + ]: 150437 : if (do_set_vm)
2623 : 80530 : XLogRegisterBuffer(1, vmbuffer, 0);
2624 : :
771 heikki.linnakangas@i 2625 [ + + ]:CBC 150437 : if (nfrozen > 0)
2626 : : {
2627 : : int nplans;
2628 : :
2629 : 26149 : xlrec.flags |= XLHP_HAS_FREEZE_PLANS;
2630 : :
2631 : : /*
2632 : : * Prepare deduplicated representation for use in the WAL record. This
2633 : : * destructively sorts frozen tuples array in-place.
2634 : : */
2635 : 26149 : nplans = heap_log_freeze_plan(frozen, nfrozen, plans, frz_offsets);
2636 : :
2637 : 26149 : freeze_plans.nplans = nplans;
448 peter@eisentraut.org 2638 : 26149 : XLogRegisterBufData(0, &freeze_plans,
2639 : : offsetof(xlhp_freeze_plans, plans));
2640 : 26149 : XLogRegisterBufData(0, plans,
2641 : : sizeof(xlhp_freeze_plan) * nplans);
2642 : : }
771 heikki.linnakangas@i 2643 [ + + ]: 150437 : if (nredirected > 0)
2644 : : {
2645 : 18472 : xlrec.flags |= XLHP_HAS_REDIRECTIONS;
2646 : :
2647 : 18472 : redirect_items.ntargets = nredirected;
448 peter@eisentraut.org 2648 : 18472 : XLogRegisterBufData(0, &redirect_items,
2649 : : offsetof(xlhp_prune_items, data));
2650 : 18472 : XLogRegisterBufData(0, redirected,
2651 : : sizeof(OffsetNumber[2]) * nredirected);
2652 : : }
771 heikki.linnakangas@i 2653 [ + + ]: 150437 : if (ndead > 0)
2654 : : {
2655 : 56537 : xlrec.flags |= XLHP_HAS_DEAD_ITEMS;
2656 : :
2657 : 56537 : dead_items.ntargets = ndead;
448 peter@eisentraut.org 2658 : 56537 : XLogRegisterBufData(0, &dead_items,
2659 : : offsetof(xlhp_prune_items, data));
2660 : 56537 : XLogRegisterBufData(0, dead,
2661 : : sizeof(OffsetNumber) * ndead);
2662 : : }
771 heikki.linnakangas@i 2663 [ + + ]: 150437 : if (nunused > 0)
2664 : : {
2665 : 34399 : xlrec.flags |= XLHP_HAS_NOW_UNUSED_ITEMS;
2666 : :
2667 : 34399 : unused_items.ntargets = nunused;
448 peter@eisentraut.org 2668 : 34399 : XLogRegisterBufData(0, &unused_items,
2669 : : offsetof(xlhp_prune_items, data));
2670 : 34399 : XLogRegisterBufData(0, unused,
2671 : : sizeof(OffsetNumber) * nunused);
2672 : : }
771 heikki.linnakangas@i 2673 [ + + ]: 150437 : if (nfrozen > 0)
448 peter@eisentraut.org 2674 : 26149 : XLogRegisterBufData(0, frz_offsets,
2675 : : sizeof(OffsetNumber) * nfrozen);
2676 : :
2677 : : /*
2678 : : * Prepare the main xl_heap_prune record. We already set the XLHP_HAS_*
2679 : : * flag above.
2680 : : */
204 melanieplageman@gmai 2681 [ + + ]:GNC 150437 : if (vmflags & VISIBILITYMAP_ALL_VISIBLE)
2682 : : {
2683 : 80530 : xlrec.flags |= XLHP_VM_ALL_VISIBLE;
2684 [ + + ]: 80530 : if (vmflags & VISIBILITYMAP_ALL_FROZEN)
2685 : 48377 : xlrec.flags |= XLHP_VM_ALL_FROZEN;
2686 : : }
771 heikki.linnakangas@i 2687 [ + + + + :CBC 150437 : if (RelationIsAccessibleInLogicalDecoding(relation))
+ - - + -
- - - + +
+ + - + -
- + - ]
2688 : 731 : xlrec.flags |= XLHP_IS_CATALOG_REL;
2689 [ + + ]: 150437 : if (TransactionIdIsValid(conflict_xid))
2690 : 118848 : xlrec.flags |= XLHP_HAS_CONFLICT_HORIZON;
2691 [ + + ]: 150437 : if (cleanup_lock)
2692 : 131066 : xlrec.flags |= XLHP_CLEANUP_LOCK;
2693 : : else
2694 : : {
2695 [ + - - + ]: 19371 : Assert(nredirected == 0 && ndead == 0);
2696 : : /* also, any items in 'unused' must've been LP_DEAD previously */
2697 : : }
448 peter@eisentraut.org 2698 : 150437 : XLogRegisterData(&xlrec, SizeOfHeapPrune);
771 heikki.linnakangas@i 2699 [ + + ]: 150437 : if (TransactionIdIsValid(conflict_xid))
448 peter@eisentraut.org 2700 : 118848 : XLogRegisterData(&conflict_xid, sizeof(TransactionId));
2701 : :
771 heikki.linnakangas@i 2702 [ + + + - ]: 150437 : switch (reason)
2703 : : {
2704 : 70123 : case PRUNE_ON_ACCESS:
2705 : 70123 : info = XLOG_HEAP2_PRUNE_ON_ACCESS;
2706 : 70123 : break;
2707 : 60943 : case PRUNE_VACUUM_SCAN:
2708 : 60943 : info = XLOG_HEAP2_PRUNE_VACUUM_SCAN;
2709 : 60943 : break;
2710 : 19371 : case PRUNE_VACUUM_CLEANUP:
2711 : 19371 : info = XLOG_HEAP2_PRUNE_VACUUM_CLEANUP;
2712 : 19371 : break;
771 heikki.linnakangas@i 2713 :UBC 0 : default:
2714 [ # # ]: 0 : elog(ERROR, "unrecognized prune reason: %d", (int) reason);
2715 : : break;
2716 : : }
771 heikki.linnakangas@i 2717 :CBC 150437 : recptr = XLogInsert(RM_HEAP2_ID, info);
2718 : :
204 melanieplageman@gmai 2719 [ + + ]:GNC 150437 : if (do_set_vm)
2720 : : {
2721 [ - + ]: 80530 : Assert(BufferIsDirty(vmbuffer));
2722 : 80530 : PageSetLSN(BufferGetPage(vmbuffer), recptr);
2723 : : }
2724 : :
2725 : : /*
2726 : : * If we explicitly skip an FPI, we must not stamp the heap page with this
2727 : : * record's LSN. Recovery skips records <= the stamped LSN, so this could
2728 : : * lead to skipping an earlier FPI needed to repair a torn page.
2729 : : */
42 2730 [ + + ]: 150437 : if (heap_fpi_allowed)
2731 : : {
204 2732 [ - + ]: 147593 : Assert(BufferIsDirty(buffer));
42 2733 : 147593 : PageSetLSN(heap_page, recptr);
2734 : : }
771 heikki.linnakangas@i 2735 :CBC 150437 : }
|