Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * heapam_handler.c
4 : : * heap table access method code
5 : : *
6 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/access/heap/heapam_handler.c
12 : : *
13 : : *
14 : : * NOTES
15 : : * This files wires up the lower level heapam.c et al routines with the
16 : : * tableam abstraction.
17 : : *
18 : : *-------------------------------------------------------------------------
19 : : */
20 : : #include "postgres.h"
21 : :
22 : : #include "access/genam.h"
23 : : #include "access/heapam.h"
24 : : #include "access/heaptoast.h"
25 : : #include "access/multixact.h"
26 : : #include "access/rewriteheap.h"
27 : : #include "access/syncscan.h"
28 : : #include "access/tableam.h"
29 : : #include "access/tsmapi.h"
30 : : #include "access/visibilitymap.h"
31 : : #include "access/xact.h"
32 : : #include "catalog/catalog.h"
33 : : #include "catalog/index.h"
34 : : #include "catalog/storage.h"
35 : : #include "catalog/storage_xlog.h"
36 : : #include "commands/progress.h"
37 : : #include "executor/executor.h"
38 : : #include "miscadmin.h"
39 : : #include "pgstat.h"
40 : : #include "storage/bufmgr.h"
41 : : #include "storage/bufpage.h"
42 : : #include "storage/lmgr.h"
43 : : #include "storage/lock.h"
44 : : #include "storage/predicate.h"
45 : : #include "storage/procarray.h"
46 : : #include "storage/smgr.h"
47 : : #include "utils/builtins.h"
48 : : #include "utils/rel.h"
49 : : #include "utils/tuplesort.h"
50 : :
51 : : static void reform_and_rewrite_tuple(HeapTuple tuple,
52 : : Relation OldHeap, Relation NewHeap,
53 : : Datum *values, bool *isnull, RewriteState rwstate);
54 : : static void heap_insert_for_repack(HeapTuple tuple, Relation OldHeap,
55 : : Relation NewHeap, Datum *values, bool *isnull,
56 : : BulkInsertState bistate);
57 : : static HeapTuple reform_tuple(HeapTuple tuple, Relation OldHeap,
58 : : Relation NewHeap, Datum *values, bool *isnull);
59 : :
60 : : static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
61 : : HeapTuple tuple,
62 : : OffsetNumber tupoffset);
63 : :
64 : : static BlockNumber heapam_scan_get_blocks_done(HeapScanDesc hscan);
65 : :
66 : : static bool BitmapHeapScanNextBlock(TableScanDesc scan,
67 : : bool *recheck,
68 : : uint64 *lossy_pages, uint64 *exact_pages);
69 : :
70 : :
71 : : /* ------------------------------------------------------------------------
72 : : * Slot related callbacks for heap AM
73 : : * ------------------------------------------------------------------------
74 : : */
75 : :
76 : : static const TupleTableSlotOps *
2612 andres@anarazel.de 77 :CBC 17523769 : heapam_slot_callbacks(Relation relation)
78 : : {
79 : 17523769 : return &TTSOpsBufferHeapTuple;
80 : : }
81 : :
82 : :
83 : : /* ------------------------------------------------------------------------
84 : : * Callbacks for non-modifying operations on individual tuples for heap AM
85 : : * ------------------------------------------------------------------------
86 : : */
87 : :
88 : : static bool
2598 89 : 2837517 : heapam_fetch_row_version(Relation relation,
90 : : ItemPointer tid,
91 : : Snapshot snapshot,
92 : : TupleTableSlot *slot)
93 : : {
94 : 2837517 : BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
95 : : Buffer buffer;
96 : :
97 [ - + ]: 2837517 : Assert(TTS_IS_BUFFERTUPLE(slot));
98 : :
99 : 2837517 : bslot->base.tupdata.t_self = *tid;
1483 tgl@sss.pgh.pa.us 100 [ + + ]: 2837517 : if (heap_fetch(relation, snapshot, &bslot->base.tupdata, &buffer, false))
101 : : {
102 : : /* store in slot, transferring existing pin */
2598 andres@anarazel.de 103 : 2837198 : ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, slot, buffer);
104 : 2837198 : slot->tts_tableOid = RelationGetRelid(relation);
105 : :
106 : 2837198 : return true;
107 : : }
108 : :
109 : 307 : return false;
110 : : }
111 : :
112 : : static bool
2545 113 : 475 : heapam_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
114 : : {
115 : 475 : HeapScanDesc hscan = (HeapScanDesc) scan;
116 : :
117 [ + + ]: 938 : return ItemPointerIsValid(tid) &&
118 [ + + ]: 463 : ItemPointerGetBlockNumber(tid) < hscan->rs_nblocks;
119 : : }
120 : :
121 : : static bool
2612 122 : 767945 : heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
123 : : Snapshot snapshot)
124 : : {
125 : 767945 : BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
126 : : bool res;
127 : :
128 [ - + ]: 767945 : Assert(TTS_IS_BUFFERTUPLE(slot));
129 [ - + ]: 767945 : Assert(BufferIsValid(bslot->buffer));
130 : :
131 : : /*
132 : : * We need buffer pin and lock to call HeapTupleSatisfiesVisibility.
133 : : * Caller should be holding pin, but not lock.
134 : : */
135 : 767945 : LockBuffer(bslot->buffer, BUFFER_LOCK_SHARE);
136 : 767945 : res = HeapTupleSatisfiesVisibility(bslot->base.tuple, snapshot,
137 : : bslot->buffer);
138 : 767945 : LockBuffer(bslot->buffer, BUFFER_LOCK_UNLOCK);
139 : :
140 : 767945 : return res;
141 : : }
142 : :
143 : :
144 : : /* ----------------------------------------------------------------------------
145 : : * Functions for manipulations of physical tuples for heap AM.
146 : : * ----------------------------------------------------------------------------
147 : : */
148 : :
149 : : static void
2600 150 : 10759256 : heapam_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid,
151 : : uint32 options, BulkInsertState bistate)
152 : : {
153 : 10759256 : bool shouldFree = true;
154 : 10759256 : HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
155 : :
156 : : /* Update the tuple with table oid */
157 : 10759256 : slot->tts_tableOid = RelationGetRelid(relation);
158 : 10759256 : tuple->t_tableOid = slot->tts_tableOid;
159 : :
160 : : /* Perform the insertion, and copy the resulting ItemPointer */
161 : 10759256 : heap_insert(relation, tuple, cid, options, bistate);
162 : 10759244 : ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
163 : :
164 [ + + ]: 10759244 : if (shouldFree)
165 : 2923907 : pfree(tuple);
166 : 10759244 : }
167 : :
168 : : static void
2593 169 : 2229 : heapam_tuple_insert_speculative(Relation relation, TupleTableSlot *slot,
170 : : CommandId cid, uint32 options,
171 : : BulkInsertState bistate, uint32 specToken)
172 : : {
2600 173 : 2229 : bool shouldFree = true;
174 : 2229 : HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
175 : :
176 : : /* Update the tuple with table oid */
177 : 2229 : slot->tts_tableOid = RelationGetRelid(relation);
178 : 2229 : tuple->t_tableOid = slot->tts_tableOid;
179 : :
180 : 2229 : HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken);
181 : 2229 : options |= HEAP_INSERT_SPECULATIVE;
182 : :
183 : : /* Perform the insertion, and copy the resulting ItemPointer */
184 : 2229 : heap_insert(relation, tuple, cid, options, bistate);
185 : 2229 : ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
186 : :
187 [ + + ]: 2229 : if (shouldFree)
188 : 54 : pfree(tuple);
189 : 2229 : }
190 : :
191 : : static void
2593 192 : 2225 : heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot,
193 : : uint32 specToken, bool succeeded)
194 : : {
2600 195 : 2225 : bool shouldFree = true;
196 : 2225 : HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
197 : :
198 : : /* adjust the tuple's state accordingly */
2548 199 [ + + ]: 2225 : if (succeeded)
2600 200 : 2214 : heap_finish_speculative(relation, &slot->tts_tid);
201 : : else
202 : 11 : heap_abort_speculative(relation, &slot->tts_tid);
203 : :
204 [ + + ]: 2225 : if (shouldFree)
205 : 54 : pfree(tuple);
206 : 2225 : }
207 : :
208 : : static TM_Result
209 : 1025371 : heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid,
210 : : uint32 options, Snapshot snapshot, Snapshot crosscheck,
211 : : bool wait, TM_FailureData *tmfd)
212 : : {
213 : : /*
214 : : * Currently Deleting of index tuples are handled at vacuum, in case if
215 : : * the storage itself is cleaning the dead tuples by itself, it is the
216 : : * time to call the index tuple deletion also.
217 : : */
34 alvherre@kurilemu.de 218 :GNC 1025371 : return heap_delete(relation, tid, cid, options, crosscheck, wait,
219 : : tmfd);
220 : : }
221 : :
222 : :
223 : : static TM_Result
2600 andres@anarazel.de 224 :CBC 2244391 : heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
225 : : CommandId cid, uint32 options,
226 : : Snapshot snapshot, Snapshot crosscheck,
227 : : bool wait, TM_FailureData *tmfd,
228 : : LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
229 : : {
230 : 2244391 : bool shouldFree = true;
231 : 2244391 : HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
232 : : TM_Result result;
233 : :
234 : : /* Update the tuple with table oid */
235 : 2244391 : slot->tts_tableOid = RelationGetRelid(relation);
236 : 2244391 : tuple->t_tableOid = slot->tts_tableOid;
237 : :
34 alvherre@kurilemu.de 238 :GNC 2244391 : result = heap_update(relation, otid, tuple, cid, options,
239 : : crosscheck, wait,
240 : : tmfd, lockmode, update_indexes);
2600 andres@anarazel.de 241 :CBC 2244379 : ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
242 : :
243 : : /*
244 : : * Decide whether new index entries are needed for the tuple
245 : : *
246 : : * Note: heap_update returns the tid (location) of the new tuple in the
247 : : * t_self field.
248 : : *
249 : : * If the update is not HOT, we must update all indexes. If the update is
250 : : * HOT, it could be that we updated summarized columns, so we either
251 : : * update only summarized indexes, or none at all.
252 : : */
1142 tomas.vondra@postgre 253 [ + + ]: 2244379 : if (result != TM_Ok)
254 : : {
255 [ - + ]: 208 : Assert(*update_indexes == TU_None);
256 : 208 : *update_indexes = TU_None;
257 : : }
258 [ + + ]: 2244171 : else if (!HeapTupleIsHeapOnly(tuple))
259 [ - + ]: 2174074 : Assert(*update_indexes == TU_All);
260 : : else
261 [ + + - + ]: 70097 : Assert((*update_indexes == TU_Summarizing) ||
262 : : (*update_indexes == TU_None));
263 : :
2600 andres@anarazel.de 264 [ + + ]: 2244379 : if (shouldFree)
265 : 31987 : pfree(tuple);
266 : :
267 : 2244379 : return result;
268 : : }
269 : :
270 : : static TM_Result
271 : 570286 : heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
272 : : TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
273 : : LockWaitPolicy wait_policy, uint8 flags,
274 : : TM_FailureData *tmfd)
275 : : {
276 : 570286 : BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
277 : : TM_Result result;
278 : : Buffer buffer;
279 : 570286 : HeapTuple tuple = &bslot->base.tupdata;
280 : : bool follow_updates;
281 : :
282 : 570286 : follow_updates = (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) != 0;
283 : 570286 : tmfd->traversed = false;
284 : :
285 [ + - ]: 570286 : Assert(TTS_IS_BUFFERTUPLE(slot));
286 : :
287 : 570286 : tuple_lock_retry:
754 akorotkov@postgresql 288 : 570474 : tuple->t_self = *tid;
289 : 570474 : result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
290 : : follow_updates, &buffer, tmfd);
291 : :
2600 andres@anarazel.de 292 [ + + ]: 570461 : if (result == TM_Updated &&
293 [ + + ]: 231 : (flags & TUPLE_LOCK_FLAG_FIND_LAST_VERSION))
294 : : {
295 : : /* Should not encounter speculative tuple on recheck */
1128 akorotkov@postgresql 296 [ - + ]: 211 : Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data));
297 : :
754 298 : 211 : ReleaseBuffer(buffer);
299 : :
2600 andres@anarazel.de 300 [ + - ]: 211 : if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
301 : : {
302 : : SnapshotData SnapshotDirty;
303 : : TransactionId priorXmax;
304 : :
305 : : /* it was updated, so look at the updated version */
306 : 211 : *tid = tmfd->ctid;
307 : : /* updated row should have xmin matching this xmax */
308 : 211 : priorXmax = tmfd->xmax;
309 : :
310 : : /* signal that a tuple later in the chain is getting locked */
311 : 211 : tmfd->traversed = true;
312 : :
313 : : /*
314 : : * fetch target tuple
315 : : *
316 : : * Loop here to deal with updated or busy tuples
317 : : */
318 : 211 : InitDirtySnapshot(SnapshotDirty);
319 : : for (;;)
320 : : {
321 [ + + ]: 243 : if (ItemPointerIndicatesMovedPartitions(tid))
322 [ + - ]: 11 : ereport(ERROR,
323 : : (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
324 : : errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
325 : :
326 : 232 : tuple->t_self = *tid;
1483 tgl@sss.pgh.pa.us 327 [ + + ]: 232 : if (heap_fetch(relation, &SnapshotDirty, tuple, &buffer, true))
328 : : {
329 : : /*
330 : : * If xmin isn't what we're expecting, the slot must have
331 : : * been recycled and reused for an unrelated tuple. This
332 : : * implies that the latest version of the row was deleted,
333 : : * so we need do nothing. (Should be safe to examine xmin
334 : : * without getting buffer's content lock. We assume
335 : : * reading a TransactionId to be atomic, and Xmin never
336 : : * changes in an existing tuple, except to invalid or
337 : : * frozen, and neither of those can match priorXmax.)
338 : : */
2600 andres@anarazel.de 339 [ - + ]: 197 : if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data),
340 : : priorXmax))
341 : : {
2600 andres@anarazel.de 342 :UBC 0 : ReleaseBuffer(buffer);
2600 andres@anarazel.de 343 :CBC 11 : return TM_Deleted;
344 : : }
345 : :
346 : : /* otherwise xmin should not be dirty... */
347 [ - + ]: 197 : if (TransactionIdIsValid(SnapshotDirty.xmin))
2469 peter@eisentraut.org 348 [ # # ]:UBC 0 : ereport(ERROR,
349 : : (errcode(ERRCODE_DATA_CORRUPTED),
350 : : errmsg_internal("t_xmin %u is uncommitted in tuple (%u,%u) to be updated in table \"%s\"",
351 : : SnapshotDirty.xmin,
352 : : ItemPointerGetBlockNumber(&tuple->t_self),
353 : : ItemPointerGetOffsetNumber(&tuple->t_self),
354 : : RelationGetRelationName(relation))));
355 : :
356 : : /*
357 : : * If tuple is being updated by other transaction then we
358 : : * have to wait for its commit/abort, or die trying.
359 : : */
2600 andres@anarazel.de 360 [ + + ]:CBC 197 : if (TransactionIdIsValid(SnapshotDirty.xmax))
361 : : {
362 : 2 : ReleaseBuffer(buffer);
363 [ - + + - ]: 2 : switch (wait_policy)
364 : : {
2600 andres@anarazel.de 365 :UBC 0 : case LockWaitBlock:
366 : 0 : XactLockTableWait(SnapshotDirty.xmax,
2600 andres@anarazel.de 367 :UIC 0 : relation, &tuple->t_self,
368 : : XLTW_FetchUpdated);
2600 andres@anarazel.de 369 :UBC 0 : break;
2600 andres@anarazel.de 370 :CBC 1 : case LockWaitSkip:
417 fujii@postgresql.org 371 [ + - ]: 1 : if (!ConditionalXactLockTableWait(SnapshotDirty.xmax, false))
372 : : /* skip instead of waiting */
2600 andres@anarazel.de 373 : 1 : return TM_WouldBlock;
2600 andres@anarazel.de 374 :UBC 0 : break;
2600 andres@anarazel.de 375 :CBC 1 : case LockWaitError:
336 fujii@postgresql.org 376 [ + - ]: 1 : if (!ConditionalXactLockTableWait(SnapshotDirty.xmax, log_lock_failures))
2600 andres@anarazel.de 377 [ + - ]: 1 : ereport(ERROR,
378 : : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
379 : : errmsg("could not obtain lock on row in relation \"%s\"",
380 : : RelationGetRelationName(relation))));
2600 andres@anarazel.de 381 :UBC 0 : break;
382 : : }
383 : 0 : continue; /* loop back to repeat heap_fetch */
384 : : }
385 : :
386 : : /*
387 : : * If tuple was inserted by our own transaction, we have
388 : : * to check cmin against cid: cmin >= current CID means
389 : : * our command cannot see the tuple, so we should ignore
390 : : * it. Otherwise heap_lock_tuple() will throw an error,
391 : : * and so would any later attempt to update or delete the
392 : : * tuple. (We need not check cmax because
393 : : * HeapTupleSatisfiesDirty will consider a tuple deleted
394 : : * by our transaction dead, regardless of cmax.) We just
395 : : * checked that priorXmax == xmin, so we can test that
396 : : * variable instead of doing HeapTupleHeaderGetXmin again.
397 : : */
2600 andres@anarazel.de 398 [ + + + - ]:CBC 202 : if (TransactionIdIsCurrentTransactionId(priorXmax) &&
399 : 7 : HeapTupleHeaderGetCmin(tuple->t_data) >= cid)
400 : : {
2585 401 : 7 : tmfd->xmax = priorXmax;
402 : :
403 : : /*
404 : : * Cmin is the problematic value, so store that. See
405 : : * above.
406 : : */
407 : 7 : tmfd->cmax = HeapTupleHeaderGetCmin(tuple->t_data);
2600 408 : 7 : ReleaseBuffer(buffer);
2585 409 : 7 : return TM_SelfModified;
410 : : }
411 : :
412 : : /*
413 : : * This is a live tuple, so try to lock it again.
414 : : */
754 akorotkov@postgresql 415 : 188 : ReleaseBuffer(buffer);
2600 andres@anarazel.de 416 : 188 : goto tuple_lock_retry;
417 : : }
418 : :
419 : : /*
420 : : * If the referenced slot was actually empty, the latest
421 : : * version of the row must have been deleted, so we need do
422 : : * nothing.
423 : : */
424 [ - + ]: 35 : if (tuple->t_data == NULL)
425 : : {
754 akorotkov@postgresql 426 [ # # ]:UBC 0 : Assert(!BufferIsValid(buffer));
2600 andres@anarazel.de 427 : 0 : return TM_Deleted;
428 : : }
429 : :
430 : : /*
431 : : * As above, if xmin isn't what we're expecting, do nothing.
432 : : */
2600 andres@anarazel.de 433 [ - + ]:CBC 35 : if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data),
434 : : priorXmax))
435 : : {
1483 tgl@sss.pgh.pa.us 436 :UBC 0 : ReleaseBuffer(buffer);
2600 andres@anarazel.de 437 : 0 : return TM_Deleted;
438 : : }
439 : :
440 : : /*
441 : : * If we get here, the tuple was found but failed
442 : : * SnapshotDirty. Assuming the xmin is either a committed xact
443 : : * or our own xact (as it certainly should be if we're trying
444 : : * to modify the tuple), this must mean that the row was
445 : : * updated or deleted by either a committed xact or our own
446 : : * xact. If it was deleted, we can ignore it; if it was
447 : : * updated then chain up to the next version and repeat the
448 : : * whole process.
449 : : *
450 : : * As above, it should be safe to examine xmax and t_ctid
451 : : * without the buffer content lock, because they can't be
452 : : * changing. We'd better hold a buffer pin though.
453 : : */
2600 andres@anarazel.de 454 [ + + ]:CBC 35 : if (ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
455 : : {
456 : : /* deleted, so forget about it */
1483 tgl@sss.pgh.pa.us 457 : 3 : ReleaseBuffer(buffer);
2600 andres@anarazel.de 458 : 3 : return TM_Deleted;
459 : : }
460 : :
461 : : /* updated, so look at the updated row */
462 : 32 : *tid = tuple->t_data->t_ctid;
463 : : /* updated row should have xmin matching this xmax */
464 : 32 : priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
1483 tgl@sss.pgh.pa.us 465 : 32 : ReleaseBuffer(buffer);
466 : : /* loop back to fetch next in chain */
467 : : }
468 : : }
469 : : else
470 : : {
471 : : /* tuple was deleted, so give up */
2600 andres@anarazel.de 472 :UBC 0 : return TM_Deleted;
473 : : }
474 : : }
475 : :
2600 andres@anarazel.de 476 :CBC 570250 : slot->tts_tableOid = RelationGetRelid(relation);
477 : 570250 : tuple->t_tableOid = slot->tts_tableOid;
478 : :
479 : : /* store in slot, transferring existing pin */
754 akorotkov@postgresql 480 : 570250 : ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
481 : :
2600 andres@anarazel.de 482 : 570250 : return result;
483 : : }
484 : :
485 : :
486 : : /* ------------------------------------------------------------------------
487 : : * DDL related callbacks for heap AM.
488 : : * ------------------------------------------------------------------------
489 : : */
490 : :
491 : : static void
1399 rhaas@postgresql.org 492 : 43418 : heapam_relation_set_new_filelocator(Relation rel,
493 : : const RelFileLocator *newrlocator,
494 : : char persistence,
495 : : TransactionId *freezeXid,
496 : : MultiXactId *minmulti)
497 : : {
498 : : SMgrRelation srel;
499 : :
500 : : /*
501 : : * Initialize to the minimum XID that could put tuples in the table. We
502 : : * know that no xacts older than RecentXmin are still running, so that
503 : : * will do.
504 : : */
2595 andres@anarazel.de 505 : 43418 : *freezeXid = RecentXmin;
506 : :
507 : : /*
508 : : * Similarly, initialize the minimum Multixact to the first value that
509 : : * could possibly be stored in tuples in the table. Running transactions
510 : : * could reuse values from their local cache, so we are careful to
511 : : * consider all currently running multis.
512 : : *
513 : : * XXX this could be refined further, but is it worth the hassle?
514 : : */
515 : 43418 : *minmulti = GetOldestMultiXactId();
516 : :
1399 rhaas@postgresql.org 517 : 43418 : srel = RelationCreateStorage(*newrlocator, persistence, true);
518 : :
519 : : /*
520 : : * If required, set up an init fork for an unlogged table so that it can
521 : : * be correctly reinitialized on restart.
522 : : */
2563 andres@anarazel.de 523 [ + + ]: 43418 : if (persistence == RELPERSISTENCE_UNLOGGED)
524 : : {
2595 525 [ + + - + ]: 186 : Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
526 : : rel->rd_rel->relkind == RELKIND_TOASTVALUE);
2563 527 : 186 : smgrcreate(srel, INIT_FORKNUM, false);
1399 rhaas@postgresql.org 528 : 186 : log_smgrcreate(newrlocator, INIT_FORKNUM);
529 : : }
530 : :
2563 andres@anarazel.de 531 : 43418 : smgrclose(srel);
2595 532 : 43418 : }
533 : :
534 : : static void
535 : 372 : heapam_relation_nontransactional_truncate(Relation rel)
536 : : {
537 : 372 : RelationTruncate(rel, 0);
538 : 372 : }
539 : :
540 : : static void
1399 rhaas@postgresql.org 541 : 58 : heapam_relation_copy_data(Relation rel, const RelFileLocator *newrlocator)
542 : : {
543 : : SMgrRelation dstrel;
544 : :
545 : : /*
546 : : * Since we copy the file directly without looking at the shared buffers,
547 : : * we'd better first flush out any pages of the source relation that are
548 : : * in shared buffers. We assume no new changes will be made while we are
549 : : * holding exclusive lock on the rel.
550 : : */
2563 andres@anarazel.de 551 : 58 : FlushRelationBuffers(rel);
552 : :
553 : : /*
554 : : * Create and copy all forks of the relation, and schedule unlinking of
555 : : * old physical files.
556 : : *
557 : : * NOTE: any conflict in relfilenumber value will be caught in
558 : : * RelationCreateStorage().
559 : : */
813 heikki.linnakangas@i 560 : 58 : dstrel = RelationCreateStorage(*newrlocator, rel->rd_rel->relpersistence, true);
561 : :
562 : : /* copy main fork */
1758 tgl@sss.pgh.pa.us 563 : 58 : RelationCopyStorage(RelationGetSmgr(rel), dstrel, MAIN_FORKNUM,
2595 andres@anarazel.de 564 : 58 : rel->rd_rel->relpersistence);
565 : :
566 : : /* copy those extra forks that exist */
567 : 58 : for (ForkNumber forkNum = MAIN_FORKNUM + 1;
568 [ + + ]: 232 : forkNum <= MAX_FORKNUM; forkNum++)
569 : : {
1758 tgl@sss.pgh.pa.us 570 [ + + ]: 174 : if (smgrexists(RelationGetSmgr(rel), forkNum))
571 : : {
2595 andres@anarazel.de 572 : 15 : smgrcreate(dstrel, forkNum, false);
573 : :
574 : : /*
575 : : * WAL log creation if the relation is persistent, or this is the
576 : : * init fork of an unlogged relation.
577 : : */
1870 bruce@momjian.us 578 [ + + ]: 15 : if (RelationIsPermanent(rel) ||
2595 andres@anarazel.de 579 [ - + - - ]: 8 : (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
580 : : forkNum == INIT_FORKNUM))
1399 rhaas@postgresql.org 581 : 7 : log_smgrcreate(newrlocator, forkNum);
1758 tgl@sss.pgh.pa.us 582 : 15 : RelationCopyStorage(RelationGetSmgr(rel), dstrel, forkNum,
2595 andres@anarazel.de 583 : 15 : rel->rd_rel->relpersistence);
584 : : }
585 : : }
586 : :
587 : :
588 : : /* drop old relation, and close new one */
589 : 58 : RelationDropStorage(rel);
590 : 58 : smgrclose(dstrel);
591 : 58 : }
592 : :
593 : : static void
594 : 392 : heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
595 : : Relation OldIndex, bool use_sort,
596 : : TransactionId OldestXmin,
597 : : Snapshot snapshot,
598 : : TransactionId *xid_cutoff,
599 : : MultiXactId *multi_cutoff,
600 : : double *num_tuples,
601 : : double *tups_vacuumed,
602 : : double *tups_recently_dead)
603 : : {
604 : : RewriteState rwstate;
605 : : BulkInsertState bistate;
606 : : IndexScanDesc indexScan;
607 : : TableScanDesc tableScan;
608 : : HeapScanDesc heapScan;
609 : : bool is_system_catalog;
610 : : Tuplesortstate *tuplesort;
611 : 392 : TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
612 : 392 : TupleDesc newTupDesc = RelationGetDescr(NewHeap);
613 : : TupleTableSlot *slot;
614 : : int natts;
615 : : Datum *values;
616 : : bool *isnull;
617 : : BufferHeapTupleTableSlot *hslot;
1985 fujii@postgresql.org 618 : 392 : BlockNumber prev_cblock = InvalidBlockNumber;
29 alvherre@kurilemu.de 619 :GNC 392 : bool concurrent = snapshot != NULL;
620 : :
621 : : /* Remember if it's a system catalog */
2595 andres@anarazel.de 622 :CBC 392 : is_system_catalog = IsSystemRelation(OldHeap);
623 : :
624 : : /*
625 : : * Valid smgr_targblock implies something already wrote to the relation.
626 : : * This may be harmless, but this function hasn't planned for it.
627 : : */
628 [ - + - - ]: 392 : Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber);
629 : :
630 : : /* Preallocate values/isnull arrays */
631 : 392 : natts = newTupDesc->natts;
146 michael@paquier.xyz 632 :GNC 392 : values = palloc_array(Datum, natts);
633 : 392 : isnull = palloc_array(bool, natts);
634 : :
635 : : /*
636 : : * In non-concurrent mode, initialize the rewrite operation. This is not
637 : : * needed in concurrent mode.
638 : : */
29 alvherre@kurilemu.de 639 [ + + ]: 392 : if (!concurrent)
640 : 389 : rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin,
641 : : *xid_cutoff, *multi_cutoff);
642 : : else
643 : 3 : rwstate = NULL;
644 : :
645 : : /* In concurrent mode, prepare for bulk-insert operation. */
646 [ + + ]: 392 : if (concurrent)
647 : 3 : bistate = GetBulkInsertState();
648 : : else
649 : 389 : bistate = NULL;
650 : :
651 : : /* Set up sorting if wanted */
2595 andres@anarazel.de 652 [ + + ]:CBC 392 : if (use_sort)
1060 pg@bowt.ie 653 : 80 : tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex,
654 : : maintenance_work_mem,
655 : : NULL, TUPLESORT_NONE);
656 : : else
2595 andres@anarazel.de 657 : 312 : tuplesort = NULL;
658 : :
659 : : /*
660 : : * Prepare to scan the OldHeap. To ensure we see recently-dead tuples
661 : : * that still need to be copied, we scan with SnapshotAny and use
662 : : * HeapTupleSatisfiesVacuum for the visibility test.
663 : : *
664 : : * In the CONCURRENTLY case, we do regular MVCC visibility tests, using
665 : : * the snapshot passed by the caller.
666 : : */
667 [ + + + + ]: 392 : if (OldIndex != NULL && !use_sort)
668 : 60 : {
2593 669 : 60 : const int ci_index[] = {
670 : : PROGRESS_REPACK_PHASE,
671 : : PROGRESS_REPACK_INDEX_RELID
672 : : };
673 : : int64 ci_val[2];
674 : :
675 : : /* Set phase and OIDOldIndex to columns */
56 alvherre@kurilemu.de 676 :GNC 60 : ci_val[0] = PROGRESS_REPACK_PHASE_INDEX_SCAN_HEAP;
2595 andres@anarazel.de 677 :CBC 60 : ci_val[1] = RelationGetRelid(OldIndex);
678 : 60 : pgstat_progress_update_multi_param(2, ci_index, ci_val);
679 : :
680 : 60 : tableScan = NULL;
681 : 60 : heapScan = NULL;
29 alvherre@kurilemu.de 682 [ + + ]:GNC 60 : indexScan = index_beginscan(OldHeap, OldIndex,
683 : : snapshot ? snapshot : SnapshotAny,
684 : : NULL, 0, 0,
685 : : SO_NONE);
2595 andres@anarazel.de 686 :CBC 60 : index_rescan(indexScan, NULL, 0, NULL, 0);
687 : : }
688 : : else
689 : : {
690 : : /* In scan-and-sort mode and also VACUUM FULL, set phase */
56 alvherre@kurilemu.de 691 :GNC 332 : pgstat_progress_update_param(PROGRESS_REPACK_PHASE,
692 : : PROGRESS_REPACK_PHASE_SEQ_SCAN_HEAP);
693 : :
29 694 [ + + ]: 332 : tableScan = table_beginscan(OldHeap,
695 : : snapshot ? snapshot : SnapshotAny,
696 : : 0, (ScanKey) NULL,
697 : : SO_NONE);
2595 andres@anarazel.de 698 :CBC 332 : heapScan = (HeapScanDesc) tableScan;
699 : 332 : indexScan = NULL;
700 : :
701 : : /* Set total heap blocks */
56 alvherre@kurilemu.de 702 :GNC 332 : pgstat_progress_update_param(PROGRESS_REPACK_TOTAL_HEAP_BLKS,
2595 andres@anarazel.de 703 :CBC 332 : heapScan->rs_nblocks);
704 : : }
705 : :
706 : 392 : slot = table_slot_create(OldHeap, NULL);
707 : 392 : hslot = (BufferHeapTupleTableSlot *) slot;
708 : :
709 : : /*
710 : : * Scan through the OldHeap, either in OldIndex order or sequentially;
711 : : * copy each tuple into the NewHeap, or transiently to the tuplesort
712 : : * module. Note that we don't bother sorting dead tuples (they won't get
713 : : * to the new table anyway).
714 : : */
715 : : for (;;)
716 : 484478 : {
717 : : HeapTuple tuple;
718 : : Buffer buf;
719 : : bool isdead;
720 : :
721 [ - + ]: 484870 : CHECK_FOR_INTERRUPTS();
722 : :
723 [ + + ]: 484870 : if (indexScan != NULL)
724 : : {
725 [ + + ]: 1198 : if (!index_getnext_slot(indexScan, ForwardScanDirection, slot))
726 : 60 : break;
727 : :
728 : : /* Since we used no scan keys, should never need to recheck */
729 [ - + ]: 1138 : if (indexScan->xs_recheck)
2595 andres@anarazel.de 730 [ # # ]:UBC 0 : elog(ERROR, "CLUSTER does not support lossy index conditions");
731 : : }
732 : : else
733 : : {
2595 andres@anarazel.de 734 [ + + ]:CBC 483672 : if (!table_scan_getnextslot(tableScan, ForwardScanDirection, slot))
735 : : {
736 : : /*
737 : : * If the last pages of the scan were empty, we would go to
738 : : * the next phase while heap_blks_scanned != heap_blks_total.
739 : : * Instead, to ensure that heap_blks_scanned is equivalent to
740 : : * heap_blks_total after the table scan phase, this parameter
741 : : * is manually updated to the correct value when the table
742 : : * scan finishes.
743 : : */
56 alvherre@kurilemu.de 744 :GNC 332 : pgstat_progress_update_param(PROGRESS_REPACK_HEAP_BLKS_SCANNED,
1985 fujii@postgresql.org 745 :CBC 332 : heapScan->rs_nblocks);
2595 andres@anarazel.de 746 : 332 : break;
747 : : }
748 : :
749 : : /*
750 : : * In scan-and-sort mode and also VACUUM FULL, set heap blocks
751 : : * scanned
752 : : *
753 : : * Note that heapScan may start at an offset and wrap around, i.e.
754 : : * rs_startblock may be >0, and rs_cblock may end with a number
755 : : * below rs_startblock. To prevent showing this wraparound to the
756 : : * user, we offset rs_cblock by rs_startblock (modulo rs_nblocks).
757 : : */
1985 fujii@postgresql.org 758 [ + + ]: 483340 : if (prev_cblock != heapScan->rs_cblock)
759 : : {
56 alvherre@kurilemu.de 760 :GNC 7121 : pgstat_progress_update_param(PROGRESS_REPACK_HEAP_BLKS_SCANNED,
1985 fujii@postgresql.org 761 :CBC 7121 : (heapScan->rs_cblock +
762 : 7121 : heapScan->rs_nblocks -
763 : 7121 : heapScan->rs_startblock
764 : 7121 : ) % heapScan->rs_nblocks + 1);
765 : 7121 : prev_cblock = heapScan->rs_cblock;
766 : : }
767 : : }
768 : :
2595 andres@anarazel.de 769 : 484478 : tuple = ExecFetchSlotHeapTuple(slot, false, NULL);
770 : 484478 : buf = hslot->buffer;
771 : :
772 : : /*
773 : : * In concurrent mode, our table or index scan has used regular MVCC
774 : : * visibility test against a snapshot passed by caller; therefore we
775 : : * don't need another visibility test. In non-concurrent mode
776 : : * however, we must test the visibility of each tuple we read.
777 : : */
29 alvherre@kurilemu.de 778 [ + + ]:GNC 484478 : if (!concurrent)
779 : : {
780 : : /*
781 : : * To be able to guarantee that we can set the hint bit, acquire
782 : : * an exclusive lock on the old buffer. We need the hint bits, set
783 : : * in heapam_relation_copy_for_cluster() ->
784 : : * HeapTupleSatisfiesVacuum(), to be set, as otherwise
785 : : * reform_and_rewrite_tuple() -> rewrite_heap_tuple() will get
786 : : * confused. Specifically, rewrite_heap_tuple() checks for
787 : : * HEAP_XMAX_INVALID in the old tuple to determine whether to
788 : : * check the old-to-new mapping hash table.
789 : : *
790 : : * It'd be better if we somehow could avoid setting hint bits on
791 : : * the old page. One reason to use VACUUM FULL are very bloated
792 : : * tables - rewriting most of the old table during VACUUM FULL
793 : : * doesn't exactly help...
794 : : */
795 : 484454 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
796 : :
797 [ + + + + : 484454 : switch (HeapTupleSatisfiesVacuum(tuple, OldestXmin, buf))
+ - ]
798 : : {
799 : 21152 : case HEAPTUPLE_DEAD:
800 : : /* Definitely dead */
801 : 21152 : isdead = true;
802 : 21152 : break;
803 : 17405 : case HEAPTUPLE_RECENTLY_DEAD:
804 : 17405 : *tups_recently_dead += 1;
805 : : pg_fallthrough;
806 : 463152 : case HEAPTUPLE_LIVE:
807 : : /* Live or recently dead, must copy it */
808 : 463152 : isdead = false;
809 : 463152 : break;
810 : 114 : case HEAPTUPLE_INSERT_IN_PROGRESS:
811 : :
812 : : /*
813 : : * As long as we hold exclusive lock on the relation,
814 : : * normally the only way to see this is if it was inserted
815 : : * earlier in our own transaction. However, it can happen
816 : : * in system catalogs, since we tend to release write lock
817 : : * before commit there. Give a warning if neither case
818 : : * applies; but in any case we had better copy it.
819 : : */
820 [ + + ]: 114 : if (!is_system_catalog &&
821 [ - + ]: 14 : !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data)))
29 alvherre@kurilemu.de 822 [ # # ]:UNC 0 : elog(WARNING, "concurrent insert in progress within table \"%s\"",
823 : : RelationGetRelationName(OldHeap));
824 : : /* treat as live */
29 alvherre@kurilemu.de 825 :GNC 114 : isdead = false;
826 : 114 : break;
827 : 36 : case HEAPTUPLE_DELETE_IN_PROGRESS:
828 : :
829 : : /*
830 : : * Similar situation to INSERT_IN_PROGRESS case.
831 : : */
832 [ + + ]: 36 : if (!is_system_catalog &&
833 [ - + ]: 20 : !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple->t_data)))
29 alvherre@kurilemu.de 834 [ # # ]:UNC 0 : elog(WARNING, "concurrent delete in progress within table \"%s\"",
835 : : RelationGetRelationName(OldHeap));
836 : : /* treat as recently dead */
29 alvherre@kurilemu.de 837 :GNC 36 : *tups_recently_dead += 1;
838 : 36 : isdead = false;
839 : 36 : break;
29 alvherre@kurilemu.de 840 :UNC 0 : default:
841 [ # # ]: 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
842 : : isdead = false; /* keep compiler quiet */
843 : : break;
844 : : }
845 : :
29 alvherre@kurilemu.de 846 :GNC 484454 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
847 : :
848 [ + + ]: 484454 : if (isdead)
849 : : {
2595 andres@anarazel.de 850 :GBC 21152 : *tups_vacuumed += 1;
851 : : /* heap rewrite module still needs to see it... */
29 alvherre@kurilemu.de 852 [ - + ]:GNC 21152 : if (rewrite_heap_dead_tuple(rwstate, tuple))
853 : : {
854 : : /* A previous recently-dead tuple is now known dead */
29 alvherre@kurilemu.de 855 :UNC 0 : *tups_vacuumed += 1;
856 : 0 : *tups_recently_dead -= 1;
857 : : }
858 : :
29 alvherre@kurilemu.de 859 :GNC 21152 : continue;
860 : : }
861 : : }
862 : :
2595 andres@anarazel.de 863 :CBC 463326 : *num_tuples += 1;
864 [ + + ]: 463326 : if (tuplesort != NULL)
865 : : {
866 : 363305 : tuplesort_putheaptuple(tuplesort, tuple);
867 : :
868 : : /*
869 : : * In scan-and-sort mode, report increase in number of tuples
870 : : * scanned
871 : : */
56 alvherre@kurilemu.de 872 :GNC 363305 : pgstat_progress_update_param(PROGRESS_REPACK_HEAP_TUPLES_SCANNED,
2595 andres@anarazel.de 873 :CBC 363305 : *num_tuples);
874 : : }
875 : : else
876 : : {
2593 877 : 100021 : const int ct_index[] = {
878 : : PROGRESS_REPACK_HEAP_TUPLES_SCANNED,
879 : : PROGRESS_REPACK_HEAP_TUPLES_INSERTED
880 : : };
881 : : int64 ct_val[2];
882 : :
29 alvherre@kurilemu.de 883 [ + + ]:GNC 100021 : if (!concurrent)
884 : 99997 : reform_and_rewrite_tuple(tuple, OldHeap, NewHeap,
885 : : values, isnull, rwstate);
886 : : else
887 : 24 : heap_insert_for_repack(tuple, OldHeap, NewHeap,
888 : : values, isnull, bistate);
889 : :
890 : : /*
891 : : * In indexscan mode and also VACUUM FULL, report increase in
892 : : * number of tuples scanned and written
893 : : */
2595 andres@anarazel.de 894 :CBC 100021 : ct_val[0] = *num_tuples;
895 : 100021 : ct_val[1] = *num_tuples;
896 : 100021 : pgstat_progress_update_multi_param(2, ct_index, ct_val);
897 : : }
898 : : }
899 : :
900 [ + + ]: 392 : if (indexScan != NULL)
901 : 60 : index_endscan(indexScan);
902 [ + + ]: 392 : if (tableScan != NULL)
903 : 332 : table_endscan(tableScan);
904 [ + - ]: 392 : if (slot)
905 : 392 : ExecDropSingleTupleTableSlot(slot);
906 : :
907 : : /*
908 : : * In scan-and-sort mode, complete the sort, then read out all live tuples
909 : : * from the tuplestore and write them to the new relation.
910 : : */
911 [ + + ]: 392 : if (tuplesort != NULL)
912 : : {
2593 913 : 80 : double n_tuples = 0;
914 : :
915 : : /* Report that we are now sorting tuples */
56 alvherre@kurilemu.de 916 :GNC 80 : pgstat_progress_update_param(PROGRESS_REPACK_PHASE,
917 : : PROGRESS_REPACK_PHASE_SORT_TUPLES);
918 : :
2595 andres@anarazel.de 919 :CBC 80 : tuplesort_performsort(tuplesort);
920 : :
921 : : /* Report that we are now writing new heap */
56 alvherre@kurilemu.de 922 :GNC 80 : pgstat_progress_update_param(PROGRESS_REPACK_PHASE,
923 : : PROGRESS_REPACK_PHASE_WRITE_NEW_HEAP);
924 : :
925 : : for (;;)
2595 andres@anarazel.de 926 :CBC 363305 : {
927 : : HeapTuple tuple;
928 : :
929 [ - + ]: 363385 : CHECK_FOR_INTERRUPTS();
930 : :
931 : 363385 : tuple = tuplesort_getheaptuple(tuplesort, true);
932 [ + + ]: 363385 : if (tuple == NULL)
933 : 80 : break;
934 : :
935 : 363305 : n_tuples += 1;
29 alvherre@kurilemu.de 936 [ + - ]:GNC 363305 : if (!concurrent)
937 : 363305 : reform_and_rewrite_tuple(tuple,
938 : : OldHeap, NewHeap,
939 : : values, isnull,
940 : : rwstate);
941 : : else
29 alvherre@kurilemu.de 942 :UNC 0 : heap_insert_for_repack(tuple, OldHeap, NewHeap,
943 : : values, isnull, bistate);
944 : :
945 : : /* Report n_tuples */
29 alvherre@kurilemu.de 946 :GNC 363305 : pgstat_progress_update_param(PROGRESS_REPACK_HEAP_TUPLES_INSERTED,
947 : : n_tuples);
948 : : }
949 : :
2595 andres@anarazel.de 950 :CBC 80 : tuplesort_end(tuplesort);
951 : : }
952 : :
953 : : /* Write out any remaining tuples, and fsync if needed */
29 alvherre@kurilemu.de 954 [ + + ]:GNC 392 : if (rwstate)
955 : 389 : end_heap_rewrite(rwstate);
956 [ + + ]: 392 : if (bistate)
957 : 3 : FreeBulkInsertState(bistate);
958 : :
959 : : /* Clean up */
2595 andres@anarazel.de 960 :CBC 392 : pfree(values);
961 : 392 : pfree(isnull);
962 : 392 : }
963 : :
964 : : /*
965 : : * Prepare to analyze the next block in the read stream. Returns false if
966 : : * the stream is exhausted and true otherwise. The scan must have been started
967 : : * with SO_TYPE_ANALYZE option.
968 : : *
969 : : * This routine holds a buffer pin and lock on the heap page. They are held
970 : : * until heapam_scan_analyze_next_tuple() returns false. That is until all the
971 : : * items of the heap page are analyzed.
972 : : */
973 : : static bool
757 tmunro@postgresql.or 974 : 107602 : heapam_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
975 : : {
2593 andres@anarazel.de 976 : 107602 : HeapScanDesc hscan = (HeapScanDesc) scan;
977 : :
978 : : /*
979 : : * We must maintain a pin on the target page's buffer to ensure that
980 : : * concurrent activity - e.g. HOT pruning - doesn't delete tuples out from
981 : : * under us. It comes from the stream already pinned. We also choose to
982 : : * hold sharelock on the buffer throughout --- we could release and
983 : : * re-acquire sharelock for each tuple, but since we aren't doing much
984 : : * work per tuple, the extra lock traffic is probably better avoided.
985 : : */
757 tmunro@postgresql.or 986 : 107602 : hscan->rs_cbuf = read_stream_next_buffer(stream, NULL);
987 [ + + ]: 107602 : if (!BufferIsValid(hscan->rs_cbuf))
988 : 11200 : return false;
989 : :
2593 andres@anarazel.de 990 : 96402 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
991 : :
757 tmunro@postgresql.or 992 : 96402 : hscan->rs_cblock = BufferGetBlockNumber(hscan->rs_cbuf);
993 : 96402 : hscan->rs_cindex = FirstOffsetNumber;
994 : 96402 : return true;
995 : : }
996 : :
997 : : static bool
68 melanieplageman@gmai 998 :GNC 7982023 : heapam_scan_analyze_next_tuple(TableScanDesc scan,
999 : : double *liverows, double *deadrows,
1000 : : TupleTableSlot *slot)
1001 : : {
2593 andres@anarazel.de 1002 :CBC 7982023 : HeapScanDesc hscan = (HeapScanDesc) scan;
1003 : : Page targpage;
1004 : : OffsetNumber maxoffset;
1005 : : BufferHeapTupleTableSlot *hslot;
1006 : :
1007 [ - + ]: 7982023 : Assert(TTS_IS_BUFFERTUPLE(slot));
1008 : :
1009 : 7982023 : hslot = (BufferHeapTupleTableSlot *) slot;
1010 : 7982023 : targpage = BufferGetPage(hscan->rs_cbuf);
1011 : 7982023 : maxoffset = PageGetMaxOffsetNumber(targpage);
1012 : :
1013 : : /* Inner loop over all tuples on the selected page */
1014 [ + + ]: 8386349 : for (; hscan->rs_cindex <= maxoffset; hscan->rs_cindex++)
1015 : : {
1016 : : ItemId itemid;
1017 : 8289947 : HeapTuple targtuple = &hslot->base.tupdata;
1018 : 8289947 : bool sample_it = false;
1019 : : TransactionId dead_after;
1020 : :
1021 : 8289947 : itemid = PageGetItemId(targpage, hscan->rs_cindex);
1022 : :
1023 : : /*
1024 : : * We ignore unused and redirect line pointers. DEAD line pointers
1025 : : * should be counted as dead, because we need vacuum to run to get rid
1026 : : * of them. Note that this rule agrees with the way that
1027 : : * heap_page_prune_and_freeze() counts things.
1028 : : */
1029 [ + + ]: 8289947 : if (!ItemIdIsNormal(itemid))
1030 : : {
1031 [ + + ]: 279737 : if (ItemIdIsDead(itemid))
1032 : 14787 : *deadrows += 1;
1033 : 279737 : continue;
1034 : : }
1035 : :
1036 : 8010210 : ItemPointerSet(&targtuple->t_self, hscan->rs_cblock, hscan->rs_cindex);
1037 : :
1038 : 8010210 : targtuple->t_tableOid = RelationGetRelid(scan->rs_rd);
1039 : 8010210 : targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
1040 : 8010210 : targtuple->t_len = ItemIdGetLength(itemid);
1041 : :
68 melanieplageman@gmai 1042 [ + + + + :GNC 8010210 : switch (HeapTupleSatisfiesVacuumHorizon(targtuple,
- ]
1043 : : hscan->rs_cbuf,
1044 : : &dead_after))
1045 : : {
2593 andres@anarazel.de 1046 :CBC 7659381 : case HEAPTUPLE_LIVE:
1047 : 7659381 : sample_it = true;
1048 : 7659381 : *liverows += 1;
1049 : 7659381 : break;
1050 : :
1051 : 123340 : case HEAPTUPLE_DEAD:
1052 : : case HEAPTUPLE_RECENTLY_DEAD:
1053 : : /* Count dead and recently-dead rows */
1054 : 123340 : *deadrows += 1;
1055 : 123340 : break;
1056 : :
1057 : 226291 : case HEAPTUPLE_INSERT_IN_PROGRESS:
1058 : :
1059 : : /*
1060 : : * Insert-in-progress rows are not counted. We assume that
1061 : : * when the inserting transaction commits or aborts, it will
1062 : : * send a stats message to increment the proper count. This
1063 : : * works right only if that transaction ends after we finish
1064 : : * analyzing the table; if things happen in the other order,
1065 : : * its stats update will be overwritten by ours. However, the
1066 : : * error will be large only if the other transaction runs long
1067 : : * enough to insert many tuples, so assuming it will finish
1068 : : * after us is the safer option.
1069 : : *
1070 : : * A special case is that the inserting transaction might be
1071 : : * our own. In this case we should count and sample the row,
1072 : : * to accommodate users who load a table and analyze it in one
1073 : : * transaction. (pgstat_report_analyze has to adjust the
1074 : : * numbers we report to the cumulative stats system to make
1075 : : * this come out right.)
1076 : : */
1077 [ + + ]: 226291 : if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple->t_data)))
1078 : : {
1079 : 226186 : sample_it = true;
1080 : 226186 : *liverows += 1;
1081 : : }
1082 : 226291 : break;
1083 : :
1084 : 1198 : case HEAPTUPLE_DELETE_IN_PROGRESS:
1085 : :
1086 : : /*
1087 : : * We count and sample delete-in-progress rows the same as
1088 : : * live ones, so that the stats counters come out right if the
1089 : : * deleting transaction commits after us, per the same
1090 : : * reasoning given above.
1091 : : *
1092 : : * If the delete was done by our own transaction, however, we
1093 : : * must count the row as dead to make pgstat_report_analyze's
1094 : : * stats adjustments come out right. (Note: this works out
1095 : : * properly when the row was both inserted and deleted in our
1096 : : * xact.)
1097 : : *
1098 : : * The net effect of these choices is that we act as though an
1099 : : * IN_PROGRESS transaction hasn't happened yet, except if it
1100 : : * is our own transaction, which we assume has happened.
1101 : : *
1102 : : * This approach ensures that we behave sanely if we see both
1103 : : * the pre-image and post-image rows for a row being updated
1104 : : * by a concurrent transaction: we will sample the pre-image
1105 : : * but not the post-image. We also get sane results if the
1106 : : * concurrent transaction never commits.
1107 : : */
1108 [ + + ]: 1198 : if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple->t_data)))
2513 1109 : 1144 : *deadrows += 1;
1110 : : else
1111 : : {
2593 1112 : 54 : sample_it = true;
2513 1113 : 54 : *liverows += 1;
1114 : : }
2593 1115 : 1198 : break;
1116 : :
2593 andres@anarazel.de 1117 :UBC 0 : default:
1118 [ # # ]: 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1119 : : break;
1120 : : }
1121 : :
2593 andres@anarazel.de 1122 [ + + ]:CBC 8010210 : if (sample_it)
1123 : : {
1124 : 7885621 : ExecStoreBufferHeapTuple(targtuple, slot, hscan->rs_cbuf);
1125 : 7885621 : hscan->rs_cindex++;
1126 : :
1127 : : /* note that we leave the buffer locked here! */
1128 : 7885621 : return true;
1129 : : }
1130 : : }
1131 : :
1132 : : /* Now release the lock and pin on the page */
1133 : 96402 : UnlockReleaseBuffer(hscan->rs_cbuf);
1134 : 96402 : hscan->rs_cbuf = InvalidBuffer;
1135 : :
1136 : : /* also prevent old slot contents from having pin on page */
1137 : 96402 : ExecClearTuple(slot);
1138 : :
1139 : 96402 : return false;
1140 : : }
1141 : :
1142 : : static double
2596 1143 : 36072 : heapam_index_build_range_scan(Relation heapRelation,
1144 : : Relation indexRelation,
1145 : : IndexInfo *indexInfo,
1146 : : bool allow_sync,
1147 : : bool anyvisible,
1148 : : bool progress,
1149 : : BlockNumber start_blockno,
1150 : : BlockNumber numblocks,
1151 : : IndexBuildCallback callback,
1152 : : void *callback_state,
1153 : : TableScanDesc scan)
1154 : : {
1155 : : HeapScanDesc hscan;
1156 : : bool is_system_catalog;
1157 : : bool checking_uniqueness;
1158 : : HeapTuple heapTuple;
1159 : : Datum values[INDEX_MAX_KEYS];
1160 : : bool isnull[INDEX_MAX_KEYS];
1161 : : double reltuples;
1162 : : ExprState *predicate;
1163 : : TupleTableSlot *slot;
1164 : : EState *estate;
1165 : : ExprContext *econtext;
1166 : : Snapshot snapshot;
1167 : 36072 : bool need_unregister_snapshot = false;
1168 : : TransactionId OldestXmin;
2540 tgl@sss.pgh.pa.us 1169 : 36072 : BlockNumber previous_blkno = InvalidBlockNumber;
2596 andres@anarazel.de 1170 : 36072 : BlockNumber root_blkno = InvalidBlockNumber;
1171 : : OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1172 : :
1173 : : /*
1174 : : * sanity checks
1175 : : */
1176 [ - + ]: 36072 : Assert(OidIsValid(indexRelation->rd_rel->relam));
1177 : :
1178 : : /* Remember if it's a system catalog */
1179 : 36072 : is_system_catalog = IsSystemRelation(heapRelation);
1180 : :
1181 : : /* See whether we're verifying uniqueness/exclusion properties */
1182 [ + + ]: 45481 : checking_uniqueness = (indexInfo->ii_Unique ||
1183 [ + + ]: 9409 : indexInfo->ii_ExclusionOps != NULL);
1184 : :
1185 : : /*
1186 : : * "Any visible" mode is not compatible with uniqueness checks; make sure
1187 : : * only one of those is requested.
1188 : : */
1189 [ + + - + ]: 36072 : Assert(!(anyvisible && checking_uniqueness));
1190 : :
1191 : : /*
1192 : : * Need an EState for evaluation of index expressions and partial-index
1193 : : * predicates. Also a slot to hold the current tuple.
1194 : : */
1195 : 36072 : estate = CreateExecutorState();
1196 [ - + ]: 36072 : econtext = GetPerTupleExprContext(estate);
1197 : 36072 : slot = table_slot_create(heapRelation, NULL);
1198 : :
1199 : : /* Arrange for econtext's scan tuple to be the tuple under test */
1200 : 36072 : econtext->ecxt_scantuple = slot;
1201 : :
1202 : : /* Set up execution state for predicate, if any. */
1203 : 36072 : predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1204 : :
1205 : : /*
1206 : : * Prepare for scan of the base relation. In a normal index build, we use
1207 : : * SnapshotAny because we must retrieve all tuples and do our own time
1208 : : * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1209 : : * concurrent build, or during bootstrap, we take a regular MVCC snapshot
1210 : : * and index whatever's live according to that.
1211 : : */
1212 : 36072 : OldestXmin = InvalidTransactionId;
1213 : :
1214 : : /* okay to ignore lazy VACUUMs here */
1215 [ + + + + ]: 36072 : if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
2092 1216 : 25772 : OldestXmin = GetOldestNonRemovableTransactionId(heapRelation);
1217 : :
2596 1218 [ + + ]: 36072 : if (!scan)
1219 : : {
1220 : : /*
1221 : : * Serial index build.
1222 : : *
1223 : : * Must begin our own heap scan in this case. We may also need to
1224 : : * register a snapshot whose lifetime is under our direct control.
1225 : : */
1226 [ + + ]: 35728 : if (!TransactionIdIsValid(OldestXmin))
1227 : : {
1228 : 10227 : snapshot = RegisterSnapshot(GetTransactionSnapshot());
1229 : 10227 : need_unregister_snapshot = true;
1230 : : }
1231 : : else
1232 : 25501 : snapshot = SnapshotAny;
1233 : :
1234 : 35728 : scan = table_beginscan_strat(heapRelation, /* relation */
1235 : : snapshot, /* snapshot */
1236 : : 0, /* number of keys */
1237 : : NULL, /* scan key */
1238 : : true, /* buffer access strategy OK */
1239 : : allow_sync); /* syncscan OK? */
1240 : : }
1241 : : else
1242 : : {
1243 : : /*
1244 : : * Parallel index build.
1245 : : *
1246 : : * Parallel case never registers/unregisters own snapshot. Snapshot
1247 : : * is taken from parallel heap scan, and is SnapshotAny or an MVCC
1248 : : * snapshot, based on same criteria as serial case.
1249 : : */
1250 [ - + ]: 344 : Assert(!IsBootstrapProcessingMode());
1251 [ - + ]: 344 : Assert(allow_sync);
1252 : 344 : snapshot = scan->rs_snapshot;
1253 : : }
1254 : :
1255 : 36072 : hscan = (HeapScanDesc) scan;
1256 : :
1257 : : /*
1258 : : * Must have called GetOldestNonRemovableTransactionId() if using
1259 : : * SnapshotAny. Shouldn't have for an MVCC snapshot. (It's especially
1260 : : * worth checking this for parallel builds, since ambuild routines that
1261 : : * support parallel builds must work these details out for themselves.)
1262 : : */
2092 1263 [ + + - + : 36072 : Assert(snapshot == SnapshotAny || IsMVCCSnapshot(snapshot));
- - ]
1264 [ + + - + ]: 36072 : Assert(snapshot == SnapshotAny ? TransactionIdIsValid(OldestXmin) :
1265 : : !TransactionIdIsValid(OldestXmin));
1266 [ + + - + ]: 36072 : Assert(snapshot == SnapshotAny || !anyvisible);
1267 : :
1268 : : /* Publish number of blocks to scan */
2590 alvherre@alvh.no-ip. 1269 [ + + ]: 36072 : if (progress)
1270 : : {
1271 : : BlockNumber nblocks;
1272 : :
1273 [ + + ]: 34367 : if (hscan->rs_base.rs_parallel != NULL)
1274 : : {
1275 : : ParallelBlockTableScanDesc pbscan;
1276 : :
1277 : 137 : pbscan = (ParallelBlockTableScanDesc) hscan->rs_base.rs_parallel;
1278 : 137 : nblocks = pbscan->phs_nblocks;
1279 : : }
1280 : : else
1281 : 34230 : nblocks = hscan->rs_nblocks;
1282 : :
1283 : 34367 : pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_TOTAL,
1284 : : nblocks);
1285 : : }
1286 : :
1287 : : /* set our scan endpoints */
2596 andres@anarazel.de 1288 [ + + ]: 36072 : if (!allow_sync)
1289 : 1904 : heap_setscanlimits(scan, start_blockno, numblocks);
1290 : : else
1291 : : {
1292 : : /* syncscan can only be requested on whole relation */
1293 [ - + ]: 34168 : Assert(start_blockno == 0);
1294 [ - + ]: 34168 : Assert(numblocks == InvalidBlockNumber);
1295 : : }
1296 : :
1297 : 36072 : reltuples = 0;
1298 : :
1299 : : /*
1300 : : * Scan all tuples in the base relation.
1301 : : */
1302 [ + + ]: 11415407 : while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1303 : : {
1304 : : bool tupleIsAlive;
1305 : :
1306 [ + + ]: 11379355 : CHECK_FOR_INTERRUPTS();
1307 : :
1308 : : /* Report scan progress, if asked to. */
2590 alvherre@alvh.no-ip. 1309 [ + + ]: 11379355 : if (progress)
1310 : : {
2540 tgl@sss.pgh.pa.us 1311 : 9331764 : BlockNumber blocks_done = heapam_scan_get_blocks_done(hscan);
1312 : :
2590 alvherre@alvh.no-ip. 1313 [ + + ]: 9331764 : if (blocks_done != previous_blkno)
1314 : : {
1315 : 118910 : pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
1316 : : blocks_done);
1317 : 118910 : previous_blkno = blocks_done;
1318 : : }
1319 : : }
1320 : :
1321 : : /*
1322 : : * When dealing with a HOT-chain of updated tuples, we want to index
1323 : : * the values of the live tuple (if any), but index it under the TID
1324 : : * of the chain's root tuple. This approach is necessary to preserve
1325 : : * the HOT-chain structure in the heap. So we need to be able to find
1326 : : * the root item offset for every tuple that's in a HOT-chain. When
1327 : : * first reaching a new page of the relation, call
1328 : : * heap_get_root_tuples() to build a map of root item offsets on the
1329 : : * page.
1330 : : *
1331 : : * It might look unsafe to use this information across buffer
1332 : : * lock/unlock. However, we hold ShareLock on the table so no
1333 : : * ordinary insert/update/delete should occur; and we hold pin on the
1334 : : * buffer continuously while visiting the page, so no pruning
1335 : : * operation can occur either.
1336 : : *
1337 : : * In cases with only ShareUpdateExclusiveLock on the table, it's
1338 : : * possible for some HOT tuples to appear that we didn't know about
1339 : : * when we first read the page. To handle that case, we re-obtain the
1340 : : * list of root offsets when a HOT tuple points to a root item that we
1341 : : * don't know about.
1342 : : *
1343 : : * Also, although our opinions about tuple liveness could change while
1344 : : * we scan the page (due to concurrent transaction commits/aborts),
1345 : : * the chain root locations won't, so this info doesn't need to be
1346 : : * rebuilt after waiting for another transaction.
1347 : : *
1348 : : * Note the implied assumption that there is no more than one live
1349 : : * tuple per HOT-chain --- else we could create more than one index
1350 : : * entry pointing to the same root tuple.
1351 : : */
2596 andres@anarazel.de 1352 [ + + ]: 11379355 : if (hscan->rs_cblock != root_blkno)
1353 : : {
1354 : 136843 : Page page = BufferGetPage(hscan->rs_cbuf);
1355 : :
1356 : 136843 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
1357 : 136843 : heap_get_root_tuples(page, root_offsets);
1358 : 136843 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1359 : :
1360 : 136843 : root_blkno = hscan->rs_cblock;
1361 : : }
1362 : :
1363 [ + + ]: 11379355 : if (snapshot == SnapshotAny)
1364 : : {
1365 : : /* do our own time qual check */
1366 : : bool indexIt;
1367 : : TransactionId xwait;
1368 : :
1369 : 8835419 : recheck:
1370 : :
1371 : : /*
1372 : : * We could possibly get away with not locking the buffer here,
1373 : : * since caller should hold ShareLock on the relation, but let's
1374 : : * be conservative about it. (This remark is still correct even
1375 : : * with HOT-pruning: our pin on the buffer prevents pruning.)
1376 : : */
1377 : 8835419 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
1378 : :
1379 : : /*
1380 : : * The criteria for counting a tuple as live in this block need to
1381 : : * match what analyze.c's heapam_scan_analyze_next_tuple() does,
1382 : : * otherwise CREATE INDEX and ANALYZE may produce wildly different
1383 : : * reltuples values, e.g. when there are many recently-dead
1384 : : * tuples.
1385 : : */
1386 [ + + + + : 8835419 : switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
+ - ]
1387 : : hscan->rs_cbuf))
1388 : : {
1389 : 1355 : case HEAPTUPLE_DEAD:
1390 : : /* Definitely dead, we can ignore it */
1391 : 1355 : indexIt = false;
1392 : 1355 : tupleIsAlive = false;
1393 : 1355 : break;
1394 : 6793005 : case HEAPTUPLE_LIVE:
1395 : : /* Normal case, index and unique-check it */
1396 : 6793005 : indexIt = true;
1397 : 6793005 : tupleIsAlive = true;
1398 : : /* Count it as live, too */
1399 : 6793005 : reltuples += 1;
1400 : 6793005 : break;
1401 : 154623 : case HEAPTUPLE_RECENTLY_DEAD:
1402 : :
1403 : : /*
1404 : : * If tuple is recently deleted then we must index it
1405 : : * anyway to preserve MVCC semantics. (Pre-existing
1406 : : * transactions could try to use the index after we finish
1407 : : * building it, and may need to see such tuples.)
1408 : : *
1409 : : * However, if it was HOT-updated then we must only index
1410 : : * the live tuple at the end of the HOT-chain. Since this
1411 : : * breaks semantics for pre-existing snapshots, mark the
1412 : : * index as unusable for them.
1413 : : *
1414 : : * We don't count recently-dead tuples in reltuples, even
1415 : : * if we index them; see heapam_scan_analyze_next_tuple().
1416 : : */
1417 [ + + ]: 154623 : if (HeapTupleIsHotUpdated(heapTuple))
1418 : : {
1419 : 132 : indexIt = false;
1420 : : /* mark the index as unsafe for old snapshots */
1421 : 132 : indexInfo->ii_BrokenHotChain = true;
1422 : : }
1423 : : else
1424 : 154491 : indexIt = true;
1425 : : /* In any case, exclude the tuple from unique-checking */
1426 : 154623 : tupleIsAlive = false;
1427 : 154623 : break;
1428 : 1886368 : case HEAPTUPLE_INSERT_IN_PROGRESS:
1429 : :
1430 : : /*
1431 : : * In "anyvisible" mode, this tuple is visible and we
1432 : : * don't need any further checks.
1433 : : */
1434 [ + + ]: 1886368 : if (anyvisible)
1435 : : {
1436 : 30736 : indexIt = true;
1437 : 30736 : tupleIsAlive = true;
1438 : 30736 : reltuples += 1;
1439 : 30736 : break;
1440 : : }
1441 : :
1442 : : /*
1443 : : * Since caller should hold ShareLock or better, normally
1444 : : * the only way to see this is if it was inserted earlier
1445 : : * in our own transaction. However, it can happen in
1446 : : * system catalogs, since we tend to release write lock
1447 : : * before commit there. Give a warning if neither case
1448 : : * applies.
1449 : : */
1450 : 1855632 : xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
1451 [ + + ]: 1855632 : if (!TransactionIdIsCurrentTransactionId(xwait))
1452 : : {
1453 [ - + ]: 105 : if (!is_system_catalog)
2596 andres@anarazel.de 1454 [ # # ]:UBC 0 : elog(WARNING, "concurrent insert in progress within table \"%s\"",
1455 : : RelationGetRelationName(heapRelation));
1456 : :
1457 : : /*
1458 : : * If we are performing uniqueness checks, indexing
1459 : : * such a tuple could lead to a bogus uniqueness
1460 : : * failure. In that case we wait for the inserting
1461 : : * transaction to finish and check again.
1462 : : */
2596 andres@anarazel.de 1463 [ - + ]:CBC 105 : if (checking_uniqueness)
1464 : : {
1465 : : /*
1466 : : * Must drop the lock on the buffer before we wait
1467 : : */
2596 andres@anarazel.de 1468 :UBC 0 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1469 : 0 : XactLockTableWait(xwait, heapRelation,
2596 andres@anarazel.de 1470 :UIC 0 : &heapTuple->t_self,
1471 : : XLTW_InsertIndexUnique);
2596 andres@anarazel.de 1472 [ # # ]:UBC 0 : CHECK_FOR_INTERRUPTS();
1473 : 0 : goto recheck;
1474 : : }
1475 : : }
1476 : : else
1477 : : {
1478 : : /*
1479 : : * For consistency with
1480 : : * heapam_scan_analyze_next_tuple(), count
1481 : : * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only
1482 : : * when inserted by our own transaction.
1483 : : */
2596 andres@anarazel.de 1484 :CBC 1855527 : reltuples += 1;
1485 : : }
1486 : :
1487 : : /*
1488 : : * We must index such tuples, since if the index build
1489 : : * commits then they're good.
1490 : : */
1491 : 1855632 : indexIt = true;
1492 : 1855632 : tupleIsAlive = true;
1493 : 1855632 : break;
1494 : 68 : case HEAPTUPLE_DELETE_IN_PROGRESS:
1495 : :
1496 : : /*
1497 : : * As with INSERT_IN_PROGRESS case, this is unexpected
1498 : : * unless it's our own deletion or a system catalog; but
1499 : : * in anyvisible mode, this tuple is visible.
1500 : : */
1501 [ - + ]: 68 : if (anyvisible)
1502 : : {
2596 andres@anarazel.de 1503 :UBC 0 : indexIt = true;
1504 : 0 : tupleIsAlive = false;
1505 : 0 : reltuples += 1;
1506 : 0 : break;
1507 : : }
1508 : :
2596 andres@anarazel.de 1509 :CBC 68 : xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
1510 [ + + ]: 68 : if (!TransactionIdIsCurrentTransactionId(xwait))
1511 : : {
1512 [ - + ]: 24 : if (!is_system_catalog)
2596 andres@anarazel.de 1513 [ # # ]:UBC 0 : elog(WARNING, "concurrent delete in progress within table \"%s\"",
1514 : : RelationGetRelationName(heapRelation));
1515 : :
1516 : : /*
1517 : : * If we are performing uniqueness checks, assuming
1518 : : * the tuple is dead could lead to missing a
1519 : : * uniqueness violation. In that case we wait for the
1520 : : * deleting transaction to finish and check again.
1521 : : *
1522 : : * Also, if it's a HOT-updated tuple, we should not
1523 : : * index it but rather the live tuple at the end of
1524 : : * the HOT-chain. However, the deleting transaction
1525 : : * could abort, possibly leaving this tuple as live
1526 : : * after all, in which case it has to be indexed. The
1527 : : * only way to know what to do is to wait for the
1528 : : * deleting transaction to finish and check again.
1529 : : */
2596 andres@anarazel.de 1530 [ + - - + ]:CBC 48 : if (checking_uniqueness ||
1531 : 24 : HeapTupleIsHotUpdated(heapTuple))
1532 : : {
1533 : : /*
1534 : : * Must drop the lock on the buffer before we wait
1535 : : */
2596 andres@anarazel.de 1536 :UBC 0 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1537 : 0 : XactLockTableWait(xwait, heapRelation,
2596 andres@anarazel.de 1538 :UIC 0 : &heapTuple->t_self,
1539 : : XLTW_InsertIndexUnique);
2596 andres@anarazel.de 1540 [ # # ]:UBC 0 : CHECK_FOR_INTERRUPTS();
1541 : 0 : goto recheck;
1542 : : }
1543 : :
1544 : : /*
1545 : : * Otherwise index it but don't check for uniqueness,
1546 : : * the same as a RECENTLY_DEAD tuple.
1547 : : */
2596 andres@anarazel.de 1548 :CBC 24 : indexIt = true;
1549 : :
1550 : : /*
1551 : : * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live,
1552 : : * if they were not deleted by the current
1553 : : * transaction. That's what
1554 : : * heapam_scan_analyze_next_tuple() does, and we want
1555 : : * the behavior to be consistent.
1556 : : */
1557 : 24 : reltuples += 1;
1558 : : }
1559 [ - + ]: 44 : else if (HeapTupleIsHotUpdated(heapTuple))
1560 : : {
1561 : : /*
1562 : : * It's a HOT-updated tuple deleted by our own xact.
1563 : : * We can assume the deletion will commit (else the
1564 : : * index contents don't matter), so treat the same as
1565 : : * RECENTLY_DEAD HOT-updated tuples.
1566 : : */
2596 andres@anarazel.de 1567 :UBC 0 : indexIt = false;
1568 : : /* mark the index as unsafe for old snapshots */
1569 : 0 : indexInfo->ii_BrokenHotChain = true;
1570 : : }
1571 : : else
1572 : : {
1573 : : /*
1574 : : * It's a regular tuple deleted by our own xact. Index
1575 : : * it, but don't check for uniqueness nor count in
1576 : : * reltuples, the same as a RECENTLY_DEAD tuple.
1577 : : */
2596 andres@anarazel.de 1578 :CBC 44 : indexIt = true;
1579 : : }
1580 : : /* In any case, exclude the tuple from unique-checking */
1581 : 68 : tupleIsAlive = false;
1582 : 68 : break;
2596 andres@anarazel.de 1583 :UBC 0 : default:
1584 [ # # ]: 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1585 : : indexIt = tupleIsAlive = false; /* keep compiler quiet */
1586 : : break;
1587 : : }
1588 : :
2596 andres@anarazel.de 1589 :CBC 8835419 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1590 : :
1591 [ + + ]: 8835419 : if (!indexIt)
1592 : 1487 : continue;
1593 : : }
1594 : : else
1595 : : {
1596 : : /* heap_getnext did the time qual check */
1597 : 2543936 : tupleIsAlive = true;
1598 : 2543936 : reltuples += 1;
1599 : : }
1600 : :
1601 : 11377868 : MemoryContextReset(econtext->ecxt_per_tuple_memory);
1602 : :
1603 : : /* Set up for predicate or expression evaluation */
1604 : 11377868 : ExecStoreBufferHeapTuple(heapTuple, slot, hscan->rs_cbuf);
1605 : :
1606 : : /*
1607 : : * In a partial index, discard tuples that don't satisfy the
1608 : : * predicate.
1609 : : */
1610 [ + + ]: 11377868 : if (predicate != NULL)
1611 : : {
1612 [ + + ]: 131172 : if (!ExecQual(predicate, econtext))
1613 : 72161 : continue;
1614 : : }
1615 : :
1616 : : /*
1617 : : * For the current heap tuple, extract all the attributes we use in
1618 : : * this index, and note which are null. This also performs evaluation
1619 : : * of any expressions needed.
1620 : : */
1621 : 11305707 : FormIndexDatum(indexInfo,
1622 : : slot,
1623 : : estate,
1624 : : values,
1625 : : isnull);
1626 : :
1627 : : /*
1628 : : * You'd think we should go ahead and build the index tuple here, but
1629 : : * some index AMs want to do further processing on the data first. So
1630 : : * pass the values[] and isnull[] arrays, instead.
1631 : : */
1632 : :
1633 [ + + ]: 11305687 : if (HeapTupleIsHeapOnly(heapTuple))
1634 : : {
1635 : : /*
1636 : : * For a heap-only tuple, pretend its TID is that of the root. See
1637 : : * src/backend/access/heap/README.HOT for discussion.
1638 : : */
1639 : : ItemPointerData tid;
1640 : : OffsetNumber offnum;
1641 : :
1642 : 4021 : offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
1643 : :
1644 : : /*
1645 : : * If a HOT tuple points to a root that we don't know about,
1646 : : * obtain root items afresh. If that still fails, report it as
1647 : : * corruption.
1648 : : */
2091 alvherre@alvh.no-ip. 1649 [ - + ]: 4021 : if (root_offsets[offnum - 1] == InvalidOffsetNumber)
1650 : : {
1819 tgl@sss.pgh.pa.us 1651 :UBC 0 : Page page = BufferGetPage(hscan->rs_cbuf);
1652 : :
2091 alvherre@alvh.no-ip. 1653 : 0 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
1654 : 0 : heap_get_root_tuples(page, root_offsets);
1655 : 0 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1656 : : }
1657 : :
2596 andres@anarazel.de 1658 [ + - + - :CBC 4021 : if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
- + ]
2596 andres@anarazel.de 1659 [ # # ]:UBC 0 : ereport(ERROR,
1660 : : (errcode(ERRCODE_DATA_CORRUPTED),
1661 : : errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1662 : : ItemPointerGetBlockNumber(&heapTuple->t_self),
1663 : : offnum,
1664 : : RelationGetRelationName(heapRelation))));
1665 : :
2370 andres@anarazel.de 1666 :CBC 4021 : ItemPointerSet(&tid, ItemPointerGetBlockNumber(&heapTuple->t_self),
1667 : 4021 : root_offsets[offnum - 1]);
1668 : :
1669 : : /* Call the AM's callback routine to process the tuple */
1670 : 4021 : callback(indexRelation, &tid, values, isnull, tupleIsAlive,
1671 : : callback_state);
1672 : : }
1673 : : else
1674 : : {
1675 : : /* Call the AM's callback routine to process the tuple */
1676 : 11301666 : callback(indexRelation, &heapTuple->t_self, values, isnull,
1677 : : tupleIsAlive, callback_state);
1678 : : }
1679 : : }
1680 : :
1681 : : /* Report scan progress one last time. */
2590 alvherre@alvh.no-ip. 1682 [ + + ]: 36052 : if (progress)
1683 : : {
1684 : : BlockNumber blks_done;
1685 : :
1686 [ + + ]: 34347 : if (hscan->rs_base.rs_parallel != NULL)
1687 : : {
1688 : : ParallelBlockTableScanDesc pbscan;
1689 : :
1690 : 137 : pbscan = (ParallelBlockTableScanDesc) hscan->rs_base.rs_parallel;
1691 : 137 : blks_done = pbscan->phs_nblocks;
1692 : : }
1693 : : else
1694 : 34210 : blks_done = hscan->rs_nblocks;
1695 : :
1696 : 34347 : pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
1697 : : blks_done);
1698 : : }
1699 : :
2596 andres@anarazel.de 1700 : 36052 : table_endscan(scan);
1701 : :
1702 : : /* we can now forget our snapshot, if set and registered by us */
1703 [ + + ]: 36052 : if (need_unregister_snapshot)
1704 : 10211 : UnregisterSnapshot(snapshot);
1705 : :
1706 : 36052 : ExecDropSingleTupleTableSlot(slot);
1707 : :
1708 : 36052 : FreeExecutorState(estate);
1709 : :
1710 : : /* These may have been pointing to the now-gone estate */
1711 : 36052 : indexInfo->ii_ExpressionsState = NIL;
1712 : 36052 : indexInfo->ii_PredicateState = NULL;
1713 : :
1714 : 36052 : return reltuples;
1715 : : }
1716 : :
1717 : : static void
1718 : 399 : heapam_index_validate_scan(Relation heapRelation,
1719 : : Relation indexRelation,
1720 : : IndexInfo *indexInfo,
1721 : : Snapshot snapshot,
1722 : : ValidateIndexState *state)
1723 : : {
1724 : : TableScanDesc scan;
1725 : : HeapScanDesc hscan;
1726 : : HeapTuple heapTuple;
1727 : : Datum values[INDEX_MAX_KEYS];
1728 : : bool isnull[INDEX_MAX_KEYS];
1729 : : ExprState *predicate;
1730 : : TupleTableSlot *slot;
1731 : : EState *estate;
1732 : : ExprContext *econtext;
1733 : 399 : BlockNumber root_blkno = InvalidBlockNumber;
1734 : : OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1735 : : bool in_index[MaxHeapTuplesPerPage];
2540 tgl@sss.pgh.pa.us 1736 : 399 : BlockNumber previous_blkno = InvalidBlockNumber;
1737 : :
1738 : : /* state variables for the merge */
2596 andres@anarazel.de 1739 : 399 : ItemPointer indexcursor = NULL;
1740 : : ItemPointerData decoded;
1741 : 399 : bool tuplesort_empty = false;
1742 : :
1743 : : /*
1744 : : * sanity checks
1745 : : */
1746 [ - + ]: 399 : Assert(OidIsValid(indexRelation->rd_rel->relam));
1747 : :
1748 : : /*
1749 : : * Need an EState for evaluation of index expressions and partial-index
1750 : : * predicates. Also a slot to hold the current tuple.
1751 : : */
1752 : 399 : estate = CreateExecutorState();
1753 [ - + ]: 399 : econtext = GetPerTupleExprContext(estate);
1754 : 399 : slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
1755 : : &TTSOpsHeapTuple);
1756 : :
1757 : : /* Arrange for econtext's scan tuple to be the tuple under test */
1758 : 399 : econtext->ecxt_scantuple = slot;
1759 : :
1760 : : /* Set up execution state for predicate, if any. */
1761 : 399 : predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1762 : :
1763 : : /*
1764 : : * Prepare for scan of the base relation. We need just those tuples
1765 : : * satisfying the passed-in reference snapshot. We must disable syncscan
1766 : : * here, because it's critical that we read from block zero forward to
1767 : : * match the sorted TIDs.
1768 : : */
1769 : 399 : scan = table_beginscan_strat(heapRelation, /* relation */
1770 : : snapshot, /* snapshot */
1771 : : 0, /* number of keys */
1772 : : NULL, /* scan key */
1773 : : true, /* buffer access strategy OK */
1774 : : false); /* syncscan not OK */
1775 : 399 : hscan = (HeapScanDesc) scan;
1776 : :
2590 alvherre@alvh.no-ip. 1777 : 399 : pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_TOTAL,
1778 : 399 : hscan->rs_nblocks);
1779 : :
1780 : : /*
1781 : : * Scan all tuples matching the snapshot.
1782 : : */
2596 andres@anarazel.de 1783 [ + + ]: 121236 : while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1784 : : {
1785 : 120837 : ItemPointer heapcursor = &heapTuple->t_self;
1786 : : ItemPointerData rootTuple;
1787 : : OffsetNumber root_offnum;
1788 : :
1789 [ - + ]: 120837 : CHECK_FOR_INTERRUPTS();
1790 : :
1791 : 120837 : state->htups += 1;
1792 : :
2590 alvherre@alvh.no-ip. 1793 [ + + ]: 120837 : if ((previous_blkno == InvalidBlockNumber) ||
1794 [ + + ]: 120626 : (hscan->rs_cblock != previous_blkno))
1795 : : {
1796 : 2211 : pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
1797 : 2211 : hscan->rs_cblock);
1798 : 2211 : previous_blkno = hscan->rs_cblock;
1799 : : }
1800 : :
1801 : : /*
1802 : : * As commented in table_index_build_scan, we should index heap-only
1803 : : * tuples under the TIDs of their root tuples; so when we advance onto
1804 : : * a new heap page, build a map of root item offsets on the page.
1805 : : *
1806 : : * This complicates merging against the tuplesort output: we will
1807 : : * visit the live tuples in order by their offsets, but the root
1808 : : * offsets that we need to compare against the index contents might be
1809 : : * ordered differently. So we might have to "look back" within the
1810 : : * tuplesort output, but only within the current page. We handle that
1811 : : * by keeping a bool array in_index[] showing all the
1812 : : * already-passed-over tuplesort output TIDs of the current page. We
1813 : : * clear that array here, when advancing onto a new heap page.
1814 : : */
2596 andres@anarazel.de 1815 [ + + ]: 120837 : if (hscan->rs_cblock != root_blkno)
1816 : : {
1817 : 2211 : Page page = BufferGetPage(hscan->rs_cbuf);
1818 : :
1819 : 2211 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
1820 : 2211 : heap_get_root_tuples(page, root_offsets);
1821 : 2211 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1822 : :
1823 : 2211 : memset(in_index, 0, sizeof(in_index));
1824 : :
1825 : 2211 : root_blkno = hscan->rs_cblock;
1826 : : }
1827 : :
1828 : : /* Convert actual tuple TID to root TID */
1829 : 120837 : rootTuple = *heapcursor;
1830 : 120837 : root_offnum = ItemPointerGetOffsetNumber(heapcursor);
1831 : :
1832 [ + + ]: 120837 : if (HeapTupleIsHeapOnly(heapTuple))
1833 : : {
1834 : 12 : root_offnum = root_offsets[root_offnum - 1];
1835 [ + - + - : 12 : if (!OffsetNumberIsValid(root_offnum))
- + ]
2596 andres@anarazel.de 1836 [ # # ]:UBC 0 : ereport(ERROR,
1837 : : (errcode(ERRCODE_DATA_CORRUPTED),
1838 : : errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1839 : : ItemPointerGetBlockNumber(heapcursor),
1840 : : ItemPointerGetOffsetNumber(heapcursor),
1841 : : RelationGetRelationName(heapRelation))));
2596 andres@anarazel.de 1842 :CBC 12 : ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
1843 : : }
1844 : :
1845 : : /*
1846 : : * "merge" by skipping through the index tuples until we find or pass
1847 : : * the current root tuple.
1848 : : */
1849 [ + + + + ]: 266899 : while (!tuplesort_empty &&
1850 [ + + ]: 266626 : (!indexcursor ||
1851 : 266626 : ItemPointerCompare(indexcursor, &rootTuple) < 0))
1852 : : {
1853 : : Datum ts_val;
1854 : : bool ts_isnull;
1855 : :
1856 [ + + ]: 146062 : if (indexcursor)
1857 : : {
1858 : : /*
1859 : : * Remember index items seen earlier on the current heap page
1860 : : */
1861 [ + + ]: 145851 : if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
1862 : 143334 : in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
1863 : : }
1864 : :
1865 : 146062 : tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
1866 : : false, &ts_val, &ts_isnull,
1285 drowley@postgresql.o 1867 : 146062 : NULL);
2596 andres@anarazel.de 1868 [ + + - + ]: 146062 : Assert(tuplesort_empty || !ts_isnull);
1869 [ + + ]: 146062 : if (!tuplesort_empty)
1870 : : {
1871 : 146035 : itemptr_decode(&decoded, DatumGetInt64(ts_val));
1872 : 146035 : indexcursor = &decoded;
1873 : : }
1874 : : else
1875 : : {
1876 : : /* Be tidy */
1877 : 27 : indexcursor = NULL;
1878 : : }
1879 : : }
1880 : :
1881 : : /*
1882 : : * If the tuplesort has overshot *and* we didn't see a match earlier,
1883 : : * then this tuple is missing from the index, so insert it.
1884 : : */
1885 [ + + + + ]: 241612 : if ((tuplesort_empty ||
1886 : 120775 : ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
1887 [ + + ]: 112 : !in_index[root_offnum - 1])
1888 : : {
1889 : 104 : MemoryContextReset(econtext->ecxt_per_tuple_memory);
1890 : :
1891 : : /* Set up for predicate or expression evaluation */
1892 : 104 : ExecStoreHeapTuple(heapTuple, slot, false);
1893 : :
1894 : : /*
1895 : : * In a partial index, discard tuples that don't satisfy the
1896 : : * predicate.
1897 : : */
1898 [ + + ]: 104 : if (predicate != NULL)
1899 : : {
1900 [ + - ]: 32 : if (!ExecQual(predicate, econtext))
1901 : 32 : continue;
1902 : : }
1903 : :
1904 : : /*
1905 : : * For the current heap tuple, extract all the attributes we use
1906 : : * in this index, and note which are null. This also performs
1907 : : * evaluation of any expressions needed.
1908 : : */
1909 : 72 : FormIndexDatum(indexInfo,
1910 : : slot,
1911 : : estate,
1912 : : values,
1913 : : isnull);
1914 : :
1915 : : /*
1916 : : * You'd think we should go ahead and build the index tuple here,
1917 : : * but some index AMs want to do further processing on the data
1918 : : * first. So pass the values[] and isnull[] arrays, instead.
1919 : : */
1920 : :
1921 : : /*
1922 : : * If the tuple is already committed dead, you might think we
1923 : : * could suppress uniqueness checking, but this is no longer true
1924 : : * in the presence of HOT, because the insert is actually a proxy
1925 : : * for a uniqueness check on the whole HOT-chain. That is, the
1926 : : * tuple we have here could be dead because it was already
1927 : : * HOT-updated, and if so the updating transaction will not have
1928 : : * thought it should insert index entries. The index AM will
1929 : : * check the whole HOT-chain and correctly detect a conflict if
1930 : : * there is one.
1931 : : */
1932 : :
1933 : 72 : index_insert(indexRelation,
1934 : : values,
1935 : : isnull,
1936 : : &rootTuple,
1937 : : heapRelation,
1938 : 72 : indexInfo->ii_Unique ?
1939 : : UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
1940 : : false,
1941 : : indexInfo);
1942 : :
1943 : 72 : state->tups_inserted += 1;
1944 : : }
1945 : : }
1946 : :
1947 : 399 : table_endscan(scan);
1948 : :
1949 : 399 : ExecDropSingleTupleTableSlot(slot);
1950 : :
1951 : 399 : FreeExecutorState(estate);
1952 : :
1953 : : /* These may have been pointing to the now-gone estate */
1954 : 399 : indexInfo->ii_ExpressionsState = NIL;
1955 : 399 : indexInfo->ii_PredicateState = NULL;
1956 : 399 : }
1957 : :
1958 : : /*
1959 : : * Return the number of blocks that have been read by this scan since
1960 : : * starting. This is meant for progress reporting rather than be fully
1961 : : * accurate: in a parallel scan, workers can be concurrently reading blocks
1962 : : * further ahead than what we report.
1963 : : */
1964 : : static BlockNumber
2590 alvherre@alvh.no-ip. 1965 : 9331764 : heapam_scan_get_blocks_done(HeapScanDesc hscan)
1966 : : {
1967 : 9331764 : ParallelBlockTableScanDesc bpscan = NULL;
1968 : : BlockNumber startblock;
1969 : : BlockNumber blocks_done;
1970 : :
1971 [ + + ]: 9331764 : if (hscan->rs_base.rs_parallel != NULL)
1972 : : {
1973 : 1197924 : bpscan = (ParallelBlockTableScanDesc) hscan->rs_base.rs_parallel;
1974 : 1197924 : startblock = bpscan->phs_startblock;
1975 : : }
1976 : : else
1977 : 8133840 : startblock = hscan->rs_startblock;
1978 : :
1979 : : /*
1980 : : * Might have wrapped around the end of the relation, if startblock was
1981 : : * not zero.
1982 : : */
1983 [ + + ]: 9331764 : if (hscan->rs_cblock > startblock)
1984 : 8997178 : blocks_done = hscan->rs_cblock - startblock;
1985 : : else
1986 : : {
1987 : : BlockNumber nblocks;
1988 : :
1989 [ + + ]: 334586 : nblocks = bpscan != NULL ? bpscan->phs_nblocks : hscan->rs_nblocks;
1990 : 334586 : blocks_done = nblocks - startblock +
1991 : 334586 : hscan->rs_cblock;
1992 : : }
1993 : :
1994 : 9331764 : return blocks_done;
1995 : : }
1996 : :
1997 : :
1998 : : /* ------------------------------------------------------------------------
1999 : : * Miscellaneous callbacks for the heap AM
2000 : : * ------------------------------------------------------------------------
2001 : : */
2002 : :
2003 : : /*
2004 : : * Check to see whether the table needs a TOAST table. It does only if
2005 : : * (1) there are any toastable attributes, and (2) the maximum length
2006 : : * of a tuple could exceed TOAST_TUPLE_THRESHOLD. (We don't want to
2007 : : * create a toast table for something like "f1 varchar(20)".)
2008 : : */
2009 : : static bool
2541 rhaas@postgresql.org 2010 : 30452 : heapam_relation_needs_toast_table(Relation rel)
2011 : : {
2012 : 30452 : int32 data_length = 0;
2013 : 30452 : bool maxlength_unknown = false;
2014 : 30452 : bool has_toastable_attrs = false;
2015 : 30452 : TupleDesc tupdesc = rel->rd_att;
2016 : : int32 tuple_length;
2017 : : int i;
2018 : :
2019 [ + + ]: 119646 : for (i = 0; i < tupdesc->natts; i++)
2020 : : {
2021 : 89194 : Form_pg_attribute att = TupleDescAttr(tupdesc, i);
2022 : :
2023 [ + + ]: 89194 : if (att->attisdropped)
2024 : 805 : continue;
452 peter@eisentraut.org 2025 [ + + ]: 88389 : if (att->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
2026 : 656 : continue;
2541 rhaas@postgresql.org 2027 [ + + + + : 87733 : data_length = att_align_nominal(data_length, att->attalign);
+ + - + ]
2028 [ + + ]: 87733 : if (att->attlen > 0)
2029 : : {
2030 : : /* Fixed-length types are never toastable */
2031 : 65751 : data_length += att->attlen;
2032 : : }
2033 : : else
2034 : : {
2035 : 21982 : int32 maxlen = type_maximum_size(att->atttypid,
2036 : : att->atttypmod);
2037 : :
2038 [ + + ]: 21982 : if (maxlen < 0)
2039 : 19985 : maxlength_unknown = true;
2040 : : else
2041 : 1997 : data_length += maxlen;
2253 tgl@sss.pgh.pa.us 2042 [ + + ]: 21982 : if (att->attstorage != TYPSTORAGE_PLAIN)
2541 rhaas@postgresql.org 2043 : 21286 : has_toastable_attrs = true;
2044 : : }
2045 : : }
2046 [ + + ]: 30452 : if (!has_toastable_attrs)
2047 : 18007 : return false; /* nothing to toast? */
2048 [ + + ]: 12445 : if (maxlength_unknown)
2049 : 10958 : return true; /* any unlimited-length attrs? */
2050 : 1487 : tuple_length = MAXALIGN(SizeofHeapTupleHeader +
2051 : 1487 : BITMAPLEN(tupdesc->natts)) +
2052 : 1487 : MAXALIGN(data_length);
2053 : 1487 : return (tuple_length > TOAST_TUPLE_THRESHOLD);
2054 : : }
2055 : :
2056 : : /*
2057 : : * TOAST tables for heap relations are just heap relations.
2058 : : */
2059 : : static Oid
2310 2060 : 11259 : heapam_relation_toast_am(Relation rel)
2061 : : {
2062 : 11259 : return rel->rd_rel->relam;
2063 : : }
2064 : :
2065 : :
2066 : : /* ------------------------------------------------------------------------
2067 : : * Planner related callbacks for the heap AM
2068 : : * ------------------------------------------------------------------------
2069 : : */
2070 : :
2071 : : #define HEAP_OVERHEAD_BYTES_PER_TUPLE \
2072 : : (MAXALIGN(SizeofHeapTupleHeader) + sizeof(ItemIdData))
2073 : : #define HEAP_USABLE_BYTES_PER_PAGE \
2074 : : (BLCKSZ - SizeOfPageHeaderData)
2075 : :
2076 : : static void
2593 andres@anarazel.de 2077 : 354827 : heapam_estimate_rel_size(Relation rel, int32 *attr_widths,
2078 : : BlockNumber *pages, double *tuples,
2079 : : double *allvisfrac)
2080 : : {
2493 rhaas@postgresql.org 2081 : 354827 : table_block_relation_estimate_size(rel, attr_widths, pages,
2082 : : tuples, allvisfrac,
2083 : : HEAP_OVERHEAD_BYTES_PER_TUPLE,
2084 : : HEAP_USABLE_BYTES_PER_PAGE);
2593 andres@anarazel.de 2085 : 354827 : }
2086 : :
2087 : :
2088 : : /* ------------------------------------------------------------------------
2089 : : * Executor related callbacks for the heap AM
2090 : : * ------------------------------------------------------------------------
2091 : : */
2092 : :
2093 : : static bool
416 melanieplageman@gmai 2094 : 4131449 : heapam_scan_bitmap_next_tuple(TableScanDesc scan,
2095 : : TupleTableSlot *slot,
2096 : : bool *recheck,
2097 : : uint64 *lossy_pages,
2098 : : uint64 *exact_pages)
2099 : : {
474 2100 : 4131449 : BitmapHeapScanDesc bscan = (BitmapHeapScanDesc) scan;
2101 : 4131449 : HeapScanDesc hscan = (HeapScanDesc) bscan;
2102 : : OffsetNumber targoffset;
2103 : : Page page;
2104 : : ItemId lp;
2105 : :
2106 : : /*
2107 : : * Out of range? If so, nothing more to look at on this page
2108 : : */
416 2109 [ + + ]: 4377834 : while (hscan->rs_cindex >= hscan->rs_ntuples)
2110 : : {
2111 : : /*
2112 : : * Returns false if the bitmap is exhausted and there are no further
2113 : : * blocks we need to scan.
2114 : : */
2115 [ + + ]: 261119 : if (!BitmapHeapScanNextBlock(scan, recheck, lossy_pages, exact_pages))
2116 : 14731 : return false;
2117 : : }
2118 : :
2592 andres@anarazel.de 2119 : 4116715 : targoffset = hscan->rs_vistuples[hscan->rs_cindex];
1266 peter@eisentraut.org 2120 : 4116715 : page = BufferGetPage(hscan->rs_cbuf);
2121 : 4116715 : lp = PageGetItemId(page, targoffset);
2592 andres@anarazel.de 2122 [ - + ]: 4116715 : Assert(ItemIdIsNormal(lp));
2123 : :
1266 peter@eisentraut.org 2124 : 4116715 : hscan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2592 andres@anarazel.de 2125 : 4116715 : hscan->rs_ctup.t_len = ItemIdGetLength(lp);
2126 : 4116715 : hscan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
2127 : 4116715 : ItemPointerSet(&hscan->rs_ctup.t_self, hscan->rs_cblock, targoffset);
2128 : :
2129 [ - + - - : 4116715 : pgstat_count_heap_fetch(scan->rs_rd);
+ - ]
2130 : :
2131 : : /*
2132 : : * Set up the result slot to point to this tuple. Note that the slot
2133 : : * acquires a pin on the buffer.
2134 : : */
2135 : 4116715 : ExecStoreBufferHeapTuple(&hscan->rs_ctup,
2136 : : slot,
2137 : : hscan->rs_cbuf);
2138 : :
2139 : 4116715 : hscan->rs_cindex++;
2140 : :
2141 : 4116715 : return true;
2142 : : }
2143 : :
2144 : : static bool
2593 2145 : 8585 : heapam_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate)
2146 : : {
2147 : 8585 : HeapScanDesc hscan = (HeapScanDesc) scan;
2148 : 8585 : TsmRoutine *tsm = scanstate->tsmroutine;
2149 : : BlockNumber blockno;
2150 : :
2151 : : /* return false immediately if relation is empty */
2152 [ - + ]: 8585 : if (hscan->rs_nblocks == 0)
2593 andres@anarazel.de 2153 :UBC 0 : return false;
2154 : :
2155 : : /* release previous scan buffer, if any */
761 drowley@postgresql.o 2156 [ + + ]:CBC 8585 : if (BufferIsValid(hscan->rs_cbuf))
2157 : : {
2158 : 8472 : ReleaseBuffer(hscan->rs_cbuf);
2159 : 8472 : hscan->rs_cbuf = InvalidBuffer;
2160 : : }
2161 : :
2162 [ + + ]: 8585 : if (tsm->NextSampleBlock)
2163 : 2941 : blockno = tsm->NextSampleBlock(scanstate, hscan->rs_nblocks);
2164 : : else
2165 : : {
2166 : : /* scanning table sequentially */
2167 : :
2593 andres@anarazel.de 2168 [ + + ]: 5644 : if (hscan->rs_cblock == InvalidBlockNumber)
2169 : : {
2170 [ - + ]: 52 : Assert(!hscan->rs_inited);
2171 : 52 : blockno = hscan->rs_startblock;
2172 : : }
2173 : : else
2174 : : {
2175 [ - + ]: 5592 : Assert(hscan->rs_inited);
2176 : :
2177 : 5592 : blockno = hscan->rs_cblock + 1;
2178 : :
2179 [ + + ]: 5592 : if (blockno >= hscan->rs_nblocks)
2180 : : {
2181 : : /* wrap to beginning of rel, might not have started at 0 */
2182 : 52 : blockno = 0;
2183 : : }
2184 : :
2185 : : /*
2186 : : * Report our new scan position for synchronization purposes.
2187 : : *
2188 : : * Note: we do this before checking for end of scan so that the
2189 : : * final state of the position hint is back at the start of the
2190 : : * rel. That's not strictly necessary, but otherwise when you run
2191 : : * the same query multiple times the starting position would shift
2192 : : * a little bit backwards on every invocation, which is confusing.
2193 : : * We don't guarantee any specific ordering in general, though.
2194 : : */
2543 2195 [ - + ]: 5592 : if (scan->rs_flags & SO_ALLOW_SYNC)
2593 andres@anarazel.de 2196 :UBC 0 : ss_report_location(scan->rs_rd, blockno);
2197 : :
2593 andres@anarazel.de 2198 [ + + ]:CBC 5592 : if (blockno == hscan->rs_startblock)
2199 : : {
2200 : 52 : blockno = InvalidBlockNumber;
2201 : : }
2202 : : }
2203 : : }
2204 : :
761 drowley@postgresql.o 2205 : 8585 : hscan->rs_cblock = blockno;
2206 : :
2593 andres@anarazel.de 2207 [ + + ]: 8585 : if (!BlockNumberIsValid(blockno))
2208 : : {
2209 : 109 : hscan->rs_inited = false;
2210 : 109 : return false;
2211 : : }
2212 : :
761 drowley@postgresql.o 2213 [ - + ]: 8476 : Assert(hscan->rs_cblock < hscan->rs_nblocks);
2214 : :
2215 : : /*
2216 : : * Be sure to check for interrupts at least once per page. Checks at
2217 : : * higher code levels won't be able to stop a sample scan that encounters
2218 : : * many pages' worth of consecutive dead tuples.
2219 : : */
2220 [ - + ]: 8476 : CHECK_FOR_INTERRUPTS();
2221 : :
2222 : : /* Read page using selected strategy */
2223 : 8476 : hscan->rs_cbuf = ReadBufferExtended(hscan->rs_base.rs_rd, MAIN_FORKNUM,
2224 : : blockno, RBM_NORMAL, hscan->rs_strategy);
2225 : :
2226 : : /* in pagemode, prune the page and determine visible tuple offsets */
2227 [ + + ]: 8476 : if (hscan->rs_base.rs_flags & SO_ALLOW_PAGEMODE)
2228 : 5684 : heap_prepare_pagescan(scan);
2229 : :
2230 : 8476 : hscan->rs_inited = true;
2593 andres@anarazel.de 2231 : 8476 : return true;
2232 : : }
2233 : :
2234 : : static bool
2235 : 169180 : heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate,
2236 : : TupleTableSlot *slot)
2237 : : {
2238 : 169180 : HeapScanDesc hscan = (HeapScanDesc) scan;
2239 : 169180 : TsmRoutine *tsm = scanstate->tsmroutine;
2240 : 169180 : BlockNumber blockno = hscan->rs_cblock;
2543 2241 : 169180 : bool pagemode = (scan->rs_flags & SO_ALLOW_PAGEMODE) != 0;
2242 : :
2243 : : Page page;
2244 : : bool all_visible;
2245 : : OffsetNumber maxoffset;
2246 : :
2247 : : /*
2248 : : * When not using pagemode, we must lock the buffer during tuple
2249 : : * visibility checks.
2250 : : */
2593 2251 [ + + ]: 169180 : if (!pagemode)
2252 : 2796 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
2253 : :
249 peter@eisentraut.org 2254 :GNC 169180 : page = BufferGetPage(hscan->rs_cbuf);
2593 andres@anarazel.de 2255 [ + + ]:CBC 337823 : all_visible = PageIsAllVisible(page) &&
2256 [ + - ]: 168643 : !scan->rs_snapshot->takenDuringRecovery;
2257 : 169180 : maxoffset = PageGetMaxOffsetNumber(page);
2258 : :
2259 : : for (;;)
2593 andres@anarazel.de 2260 :UBC 0 : {
2261 : : OffsetNumber tupoffset;
2262 : :
2593 andres@anarazel.de 2263 [ - + ]:CBC 169180 : CHECK_FOR_INTERRUPTS();
2264 : :
2265 : : /* Ask the tablesample method which tuples to check on this page. */
2266 : 169180 : tupoffset = tsm->NextSampleTuple(scanstate,
2267 : : blockno,
2268 : : maxoffset);
2269 : :
2270 [ + + + - : 169180 : if (OffsetNumberIsValid(tupoffset))
+ + ]
2271 : : {
2272 : : ItemId itemid;
2273 : : bool visible;
2274 : 160708 : HeapTuple tuple = &(hscan->rs_ctup);
2275 : :
2276 : : /* Skip invalid tuple pointers. */
2277 : 160708 : itemid = PageGetItemId(page, tupoffset);
2278 [ - + ]: 160708 : if (!ItemIdIsNormal(itemid))
2593 andres@anarazel.de 2279 :UBC 0 : continue;
2280 : :
2593 andres@anarazel.de 2281 :CBC 160708 : tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2282 : 160708 : tuple->t_len = ItemIdGetLength(itemid);
2283 : 160708 : ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
2284 : :
2285 : :
2286 [ + + ]: 160708 : if (all_visible)
2287 : 160324 : visible = true;
2288 : : else
2289 : 384 : visible = SampleHeapTupleVisible(scan, hscan->rs_cbuf,
2290 : : tuple, tupoffset);
2291 : :
2292 : : /* in pagemode, heap_prepare_pagescan did this for us */
2293 [ + + ]: 160708 : if (!pagemode)
2289 tmunro@postgresql.or 2294 : 4 : HeapCheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
2295 : : hscan->rs_cbuf, scan->rs_snapshot);
2296 : :
2297 : : /* Try next tuple from same page. */
2593 andres@anarazel.de 2298 [ - + ]: 160708 : if (!visible)
2593 andres@anarazel.de 2299 :UBC 0 : continue;
2300 : :
2301 : : /* Found visible tuple, return it. */
2593 andres@anarazel.de 2302 [ + + ]:CBC 160708 : if (!pagemode)
2303 : 4 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2304 : :
2305 : 160708 : ExecStoreBufferHeapTuple(tuple, slot, hscan->rs_cbuf);
2306 : :
2307 : : /* Count successfully-fetched tuples as heap fetches */
2308 [ - + - - : 160708 : pgstat_count_heap_getnext(scan->rs_rd);
+ - ]
2309 : :
2310 : 160708 : return true;
2311 : : }
2312 : : else
2313 : : {
2314 : : /*
2315 : : * If we get here, it means we've exhausted the items on this page
2316 : : * and it's time to move to the next.
2317 : : */
2318 [ + + ]: 8472 : if (!pagemode)
2319 : 2792 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2320 : :
2321 : 8472 : ExecClearTuple(slot);
2322 : 8472 : return false;
2323 : : }
2324 : : }
2325 : :
2326 : : Assert(0);
2327 : : }
2328 : :
2329 : :
2330 : : /* ----------------------------------------------------------------------------
2331 : : * Helper functions for the above.
2332 : : * ----------------------------------------------------------------------------
2333 : : */
2334 : :
2335 : : /*
2336 : : * Reconstruct and rewrite the given tuple
2337 : : *
2338 : : * We cannot simply copy the tuple as-is, for several reasons:
2339 : : *
2340 : : * 1. We'd like to squeeze out the values of any dropped columns, both
2341 : : * to save space and to ensure we have no corner-case failures. (It's
2342 : : * possible for example that the new table hasn't got a TOAST table
2343 : : * and so is unable to store any large values of dropped cols.)
2344 : : *
2345 : : * 2. The tuple might not even be legal for the new table; this is
2346 : : * currently only known to happen as an after-effect of ALTER TABLE
2347 : : * SET WITHOUT OIDS.
2348 : : *
2349 : : * So, we must reconstruct the tuple from component Datums.
2350 : : */
2351 : : static void
2352 : 463302 : reform_and_rewrite_tuple(HeapTuple tuple,
2353 : : Relation OldHeap, Relation NewHeap,
2354 : : Datum *values, bool *isnull, RewriteState rwstate)
2355 : : {
2356 : : HeapTuple newtuple;
2357 : :
29 alvherre@kurilemu.de 2358 :GNC 463302 : newtuple = reform_tuple(tuple, OldHeap, NewHeap, values, isnull);
2359 : :
2360 : : /* The heap rewrite module does the rest */
2361 : 463302 : rewrite_heap_tuple(rwstate, tuple, newtuple);
2362 : :
2363 : 463302 : heap_freetuple(newtuple);
2364 : 463302 : }
2365 : :
2366 : : /*
2367 : : * Insert tuple when processing REPACK CONCURRENTLY.
2368 : : *
2369 : : * rewriteheap.c is not used in the CONCURRENTLY case because it'd be
2370 : : * difficult to do the same in the catch-up phase (as the logical
2371 : : * decoding does not provide us with sufficient visibility
2372 : : * information). Thus we must use heap_insert() both during the
2373 : : * catch-up and here.
2374 : : *
2375 : : * We pass the NO_LOGICAL flag to heap_insert() in order to skip logical
2376 : : * decoding: as soon as REPACK CONCURRENTLY swaps the relation files, it drops
2377 : : * this relation, so no logical replication subscription should need the data.
2378 : : *
2379 : : * BulkInsertState is used because many tuples are inserted in the typical
2380 : : * case.
2381 : : */
2382 : : static void
2383 : 24 : heap_insert_for_repack(HeapTuple tuple, Relation OldHeap, Relation NewHeap,
2384 : : Datum *values, bool *isnull, BulkInsertState bistate)
2385 : : {
2386 : : HeapTuple newtuple;
2387 : :
2388 : 24 : newtuple = reform_tuple(tuple, OldHeap, NewHeap, values, isnull);
2389 : :
2390 : 24 : heap_insert(NewHeap, newtuple, GetCurrentCommandId(true),
2391 : : HEAP_INSERT_NO_LOGICAL, bistate);
2392 : :
2393 : 24 : heap_freetuple(newtuple);
2394 : 24 : }
2395 : :
2396 : : /*
2397 : : * Subroutine for reform_and_rewrite_tuple and heap_insert_for_repack.
2398 : : *
2399 : : * Deform the given tuple, set values of dropped columns to NULL, form a new
2400 : : * tuple and return it. If no attributes need to be changed in this way, a
2401 : : * copy of the original tuple is returned. Caller is responsible for freeing
2402 : : * the returned tuple.
2403 : : *
2404 : : * XXX this coding assumes that both relations have the same tupledesc.
2405 : : */
2406 : : static HeapTuple
2407 : 463326 : reform_tuple(HeapTuple tuple, Relation OldHeap, Relation NewHeap,
2408 : : Datum *values, bool *isnull)
2409 : : {
2593 andres@anarazel.de 2410 :CBC 463326 : TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
2411 : 463326 : TupleDesc newTupDesc = RelationGetDescr(NewHeap);
29 alvherre@kurilemu.de 2412 :GNC 463326 : bool needs_reform = false;
2413 : :
2414 : : /* Skip work if the tuple doesn't need any attributes changed */
2415 [ + + ]: 3942816 : for (int i = 0; i < newTupDesc->natts; i++)
2416 : : {
2417 [ + + ]: 3479490 : if (TupleDescCompactAttr(newTupDesc, i)->attisdropped &&
2418 [ + + ]: 40 : !heap_attisnull(tuple, i + 1, newTupDesc))
2419 : 20 : needs_reform = true;
2420 : : }
2421 [ + + ]: 463326 : if (!needs_reform)
2422 : 463316 : return heap_copytuple(tuple);
2423 : :
2593 andres@anarazel.de 2424 :CBC 10 : heap_deform_tuple(tuple, oldTupDesc, values, isnull);
2425 : :
29 alvherre@kurilemu.de 2426 [ + + ]:GNC 60 : for (int i = 0; i < newTupDesc->natts; i++)
2427 : : {
501 drowley@postgresql.o 2428 [ + + ]:CBC 50 : if (TupleDescCompactAttr(newTupDesc, i)->attisdropped)
2593 andres@anarazel.de 2429 :GBC 20 : isnull[i] = true;
2430 : : }
2431 : :
29 alvherre@kurilemu.de 2432 :GNC 10 : return heap_form_tuple(newTupDesc, values, isnull);
2593 andres@anarazel.de 2433 :ECB (360494) : }
2434 : :
2435 : : /*
2436 : : * Check visibility of the tuple.
2437 : : */
2438 : : static bool
2593 andres@anarazel.de 2439 :CBC 384 : SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
2440 : : HeapTuple tuple,
2441 : : OffsetNumber tupoffset)
2442 : : {
2443 : 384 : HeapScanDesc hscan = (HeapScanDesc) scan;
2444 : :
2543 2445 [ + + ]: 384 : if (scan->rs_flags & SO_ALLOW_PAGEMODE)
2446 : : {
501 melanieplageman@gmai 2447 : 380 : uint32 start = 0,
2448 : 380 : end = hscan->rs_ntuples;
2449 : :
2450 : : /*
2451 : : * In pageatatime mode, heap_prepare_pagescan() already did visibility
2452 : : * checks, so just look at the info it left in rs_vistuples[].
2453 : : *
2454 : : * We use a binary search over the known-sorted array. Note: we could
2455 : : * save some effort if we insisted that NextSampleTuple select tuples
2456 : : * in increasing order, but it's not clear that there would be enough
2457 : : * gain to justify the restriction.
2458 : : */
2459 [ + - ]: 720 : while (start < end)
2460 : : {
2461 : 720 : uint32 mid = start + (end - start) / 2;
2593 andres@anarazel.de 2462 : 720 : OffsetNumber curoffset = hscan->rs_vistuples[mid];
2463 : :
2464 [ + + ]: 720 : if (tupoffset == curoffset)
2465 : 380 : return true;
2466 [ + + ]: 340 : else if (tupoffset < curoffset)
501 melanieplageman@gmai 2467 : 177 : end = mid;
2468 : : else
2593 andres@anarazel.de 2469 : 163 : start = mid + 1;
2470 : : }
2471 : :
2593 andres@anarazel.de 2472 :UBC 0 : return false;
2473 : : }
2474 : : else
2475 : : {
2476 : : /* Otherwise, we have to check the tuple individually. */
2593 andres@anarazel.de 2477 :CBC 4 : return HeapTupleSatisfiesVisibility(tuple, scan->rs_snapshot,
2478 : : buffer);
2479 : : }
2480 : : }
2481 : :
2482 : : /*
2483 : : * Helper function get the next block of a bitmap heap scan. Returns true when
2484 : : * it got the next block and saved it in the scan descriptor and false when
2485 : : * the bitmap and or relation are exhausted.
2486 : : */
2487 : : static bool
416 melanieplageman@gmai 2488 : 261119 : BitmapHeapScanNextBlock(TableScanDesc scan,
2489 : : bool *recheck,
2490 : : uint64 *lossy_pages, uint64 *exact_pages)
2491 : : {
2492 : 261119 : BitmapHeapScanDesc bscan = (BitmapHeapScanDesc) scan;
2493 : 261119 : HeapScanDesc hscan = (HeapScanDesc) bscan;
2494 : : BlockNumber block;
2495 : : void *per_buffer_data;
2496 : : Buffer buffer;
2497 : : Snapshot snapshot;
2498 : : int ntup;
2499 : : TBMIterateResult *tbmres;
2500 : : OffsetNumber offsets[TBM_MAX_TUPLES_PER_PAGE];
2501 : 261119 : int noffsets = -1;
2502 : :
2503 [ - + ]: 261119 : Assert(scan->rs_flags & SO_TYPE_BITMAPSCAN);
2504 [ - + ]: 261119 : Assert(hscan->rs_read_stream);
2505 : :
2506 : 261119 : hscan->rs_cindex = 0;
2507 : 261119 : hscan->rs_ntuples = 0;
2508 : :
2509 : : /* Release buffer containing previous block. */
2510 [ + + ]: 261119 : if (BufferIsValid(hscan->rs_cbuf))
2511 : : {
2512 : 246056 : ReleaseBuffer(hscan->rs_cbuf);
2513 : 246056 : hscan->rs_cbuf = InvalidBuffer;
2514 : : }
2515 : :
2516 : 261119 : hscan->rs_cbuf = read_stream_next_buffer(hscan->rs_read_stream,
2517 : : &per_buffer_data);
2518 : :
2519 [ + + ]: 261119 : if (BufferIsInvalid(hscan->rs_cbuf))
2520 : : {
2521 : : /* the bitmap is exhausted */
398 andres@anarazel.de 2522 : 14731 : return false;
2523 : : }
2524 : :
416 melanieplageman@gmai 2525 [ - + ]: 246388 : Assert(per_buffer_data);
2526 : :
2527 : 246388 : tbmres = per_buffer_data;
2528 : :
2529 [ - + ]: 246388 : Assert(BlockNumberIsValid(tbmres->blockno));
2530 [ - + ]: 246388 : Assert(BufferGetBlockNumber(hscan->rs_cbuf) == tbmres->blockno);
2531 : :
2532 : : /* Exact pages need their tuple offsets extracted. */
2533 [ + + ]: 246388 : if (!tbmres->lossy)
2534 : 139458 : noffsets = tbm_extract_page_tuple(tbmres, offsets,
2535 : : TBM_MAX_TUPLES_PER_PAGE);
2536 : :
2537 : 246388 : *recheck = tbmres->recheck;
2538 : :
2539 : 246388 : block = hscan->rs_cblock = tbmres->blockno;
2540 : 246388 : buffer = hscan->rs_cbuf;
2541 : 246388 : snapshot = scan->rs_snapshot;
2542 : :
2543 : 246388 : ntup = 0;
2544 : :
2545 : : /*
2546 : : * Prune and repair fragmentation for the whole page, if possible.
2547 : : */
36 melanieplageman@gmai 2548 :GNC 246388 : heap_page_prune_opt(scan->rs_rd, buffer, &hscan->rs_vmbuffer,
2549 : 246388 : scan->rs_flags & SO_HINT_REL_READ_ONLY);
2550 : :
2551 : : /*
2552 : : * We must hold share lock on the buffer content while examining tuple
2553 : : * visibility. Afterwards, however, the tuples we have found to be
2554 : : * visible are guaranteed good as long as we hold the buffer pin.
2555 : : */
416 melanieplageman@gmai 2556 :CBC 246388 : LockBuffer(buffer, BUFFER_LOCK_SHARE);
2557 : :
2558 : : /*
2559 : : * We need two separate strategies for lossy and non-lossy cases.
2560 : : */
2561 [ + + ]: 246388 : if (!tbmres->lossy)
2562 : : {
2563 : : /*
2564 : : * Bitmap is non-lossy, so we just look through the offsets listed in
2565 : : * tbmres; but we have to follow any HOT chain starting at each such
2566 : : * offset.
2567 : : */
2568 : : int curslot;
2569 : :
2570 : : /* We must have extracted the tuple offsets by now */
2571 [ - + ]: 139458 : Assert(noffsets > -1);
2572 : :
2573 [ + + ]: 3601540 : for (curslot = 0; curslot < noffsets; curslot++)
2574 : : {
2575 : 3462085 : OffsetNumber offnum = offsets[curslot];
2576 : : ItemPointerData tid;
2577 : : HeapTupleData heapTuple;
2578 : :
2579 : 3462085 : ItemPointerSet(&tid, block, offnum);
2580 [ + + ]: 3462085 : if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
2581 : : &heapTuple, NULL, true))
2582 : 3302351 : hscan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
2583 : : }
2584 : : }
2585 : : else
2586 : : {
2587 : : /*
2588 : : * Bitmap is lossy, so we must examine each line pointer on the page.
2589 : : * But we can ignore HOT chains, since we'll check each tuple anyway.
2590 : : */
2591 : 106930 : Page page = BufferGetPage(buffer);
2592 : 106930 : OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
2593 : : OffsetNumber offnum;
2594 : :
2595 [ + + ]: 923170 : for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
2596 : : {
2597 : : ItemId lp;
2598 : : HeapTupleData loctup;
2599 : : bool valid;
2600 : :
2601 : 816240 : lp = PageGetItemId(page, offnum);
2602 [ - + ]: 816240 : if (!ItemIdIsNormal(lp))
416 melanieplageman@gmai 2603 :UBC 0 : continue;
416 melanieplageman@gmai 2604 :CBC 816240 : loctup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2605 : 816240 : loctup.t_len = ItemIdGetLength(lp);
2606 : 816240 : loctup.t_tableOid = scan->rs_rd->rd_id;
2607 : 816240 : ItemPointerSet(&loctup.t_self, block, offnum);
2608 : 816240 : valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
2609 [ + + ]: 816240 : if (valid)
2610 : : {
2611 : 816156 : hscan->rs_vistuples[ntup++] = offnum;
2612 : 816156 : PredicateLockTID(scan->rs_rd, &loctup.t_self, snapshot,
2613 : 816156 : HeapTupleHeaderGetXmin(loctup.t_data));
2614 : : }
2615 : 816240 : HeapCheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
2616 : : buffer, snapshot);
2617 : : }
2618 : : }
2619 : :
2620 : 246385 : LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2621 : :
2622 [ - + ]: 246385 : Assert(ntup <= MaxHeapTuplesPerPage);
2623 : 246385 : hscan->rs_ntuples = ntup;
2624 : :
2625 [ + + ]: 246385 : if (tbmres->lossy)
2626 : 106930 : (*lossy_pages)++;
2627 : : else
2628 : 139455 : (*exact_pages)++;
2629 : :
2630 : : /*
2631 : : * Return true to indicate that a valid block was found and the bitmap is
2632 : : * not exhausted. If there are no visible tuples on this page,
2633 : : * hscan->rs_ntuples will be 0 and heapam_scan_bitmap_next_tuple() will
2634 : : * return false returning control to this function to advance to the next
2635 : : * block in the bitmap.
2636 : : */
2637 : 246385 : return true;
2638 : : }
2639 : :
2640 : : /* ------------------------------------------------------------------------
2641 : : * Definition of the heap table access method.
2642 : : * ------------------------------------------------------------------------
2643 : : */
2644 : :
2645 : : static const TableAmRoutine heapam_methods = {
2646 : : .type = T_TableAmRoutine,
2647 : :
2648 : : .slot_callbacks = heapam_slot_callbacks,
2649 : :
2650 : : .scan_begin = heap_beginscan,
2651 : : .scan_end = heap_endscan,
2652 : : .scan_rescan = heap_rescan,
2653 : : .scan_getnextslot = heap_getnextslot,
2654 : :
2655 : : .scan_set_tidrange = heap_set_tidrange,
2656 : : .scan_getnextslot_tidrange = heap_getnextslot_tidrange,
2657 : :
2658 : : .parallelscan_estimate = table_block_parallelscan_estimate,
2659 : : .parallelscan_initialize = table_block_parallelscan_initialize,
2660 : : .parallelscan_reinitialize = table_block_parallelscan_reinitialize,
2661 : :
2662 : : .index_fetch_begin = heapam_index_fetch_begin,
2663 : : .index_fetch_reset = heapam_index_fetch_reset,
2664 : : .index_fetch_end = heapam_index_fetch_end,
2665 : : .index_fetch_tuple = heapam_index_fetch_tuple,
2666 : :
2667 : : .tuple_insert = heapam_tuple_insert,
2668 : : .tuple_insert_speculative = heapam_tuple_insert_speculative,
2669 : : .tuple_complete_speculative = heapam_tuple_complete_speculative,
2670 : : .multi_insert = heap_multi_insert,
2671 : : .tuple_delete = heapam_tuple_delete,
2672 : : .tuple_update = heapam_tuple_update,
2673 : : .tuple_lock = heapam_tuple_lock,
2674 : :
2675 : : .tuple_fetch_row_version = heapam_fetch_row_version,
2676 : : .tuple_get_latest_tid = heap_get_latest_tid,
2677 : : .tuple_tid_valid = heapam_tuple_tid_valid,
2678 : : .tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot,
2679 : : .index_delete_tuples = heap_index_delete_tuples,
2680 : :
2681 : : .relation_set_new_filelocator = heapam_relation_set_new_filelocator,
2682 : : .relation_nontransactional_truncate = heapam_relation_nontransactional_truncate,
2683 : : .relation_copy_data = heapam_relation_copy_data,
2684 : : .relation_copy_for_cluster = heapam_relation_copy_for_cluster,
2685 : : .relation_vacuum = heap_vacuum_rel,
2686 : : .scan_analyze_next_block = heapam_scan_analyze_next_block,
2687 : : .scan_analyze_next_tuple = heapam_scan_analyze_next_tuple,
2688 : : .index_build_range_scan = heapam_index_build_range_scan,
2689 : : .index_validate_scan = heapam_index_validate_scan,
2690 : :
2691 : : .relation_size = table_block_relation_size,
2692 : : .relation_needs_toast_table = heapam_relation_needs_toast_table,
2693 : : .relation_toast_am = heapam_relation_toast_am,
2694 : : .relation_fetch_toast_slice = heap_fetch_toast_slice,
2695 : :
2696 : : .relation_estimate_size = heapam_estimate_rel_size,
2697 : :
2698 : : .scan_bitmap_next_tuple = heapam_scan_bitmap_next_tuple,
2699 : : .scan_sample_next_block = heapam_scan_sample_next_block,
2700 : : .scan_sample_next_tuple = heapam_scan_sample_next_tuple
2701 : : };
2702 : :
2703 : :
2704 : : const TableAmRoutine *
2617 andres@anarazel.de 2705 : 11798230 : GetHeapamTableAmRoutine(void)
2706 : : {
2707 : 11798230 : return &heapam_methods;
2708 : : }
2709 : :
2710 : : Datum
2711 : 1355122 : heap_tableam_handler(PG_FUNCTION_ARGS)
2712 : : {
2713 : 1355122 : PG_RETURN_POINTER(&heapam_methods);
2714 : : }
|