Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * heapam_handler.c
4 : : * heap table access method code
5 : : *
6 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/access/heap/heapam_handler.c
12 : : *
13 : : *
14 : : * NOTES
15 : : * This files wires up the lower level heapam.c et al routines with the
16 : : * tableam abstraction.
17 : : *
18 : : *-------------------------------------------------------------------------
19 : : */
20 : : #include "postgres.h"
21 : :
22 : : #include "access/genam.h"
23 : : #include "access/heapam.h"
24 : : #include "access/heaptoast.h"
25 : : #include "access/multixact.h"
26 : : #include "access/rewriteheap.h"
27 : : #include "access/syncscan.h"
28 : : #include "access/tableam.h"
29 : : #include "access/tsmapi.h"
30 : : #include "access/visibilitymap.h"
31 : : #include "access/xact.h"
32 : : #include "catalog/catalog.h"
33 : : #include "catalog/index.h"
34 : : #include "catalog/storage.h"
35 : : #include "catalog/storage_xlog.h"
36 : : #include "commands/progress.h"
37 : : #include "executor/executor.h"
38 : : #include "miscadmin.h"
39 : : #include "pgstat.h"
40 : : #include "storage/bufmgr.h"
41 : : #include "storage/bufpage.h"
42 : : #include "storage/lmgr.h"
43 : : #include "storage/predicate.h"
44 : : #include "storage/procarray.h"
45 : : #include "storage/smgr.h"
46 : : #include "utils/builtins.h"
47 : : #include "utils/rel.h"
48 : :
49 : : static void reform_and_rewrite_tuple(HeapTuple tuple,
50 : : Relation OldHeap, Relation NewHeap,
51 : : Datum *values, bool *isnull, RewriteState rwstate);
52 : :
53 : : static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
54 : : HeapTuple tuple,
55 : : OffsetNumber tupoffset);
56 : :
57 : : static BlockNumber heapam_scan_get_blocks_done(HeapScanDesc hscan);
58 : :
59 : : static bool BitmapHeapScanNextBlock(TableScanDesc scan,
60 : : bool *recheck,
61 : : uint64 *lossy_pages, uint64 *exact_pages);
62 : :
63 : :
64 : : /* ------------------------------------------------------------------------
65 : : * Slot related callbacks for heap AM
66 : : * ------------------------------------------------------------------------
67 : : */
68 : :
69 : : static const TupleTableSlotOps *
2561 andres@anarazel.de 70 :CBC 14584226 : heapam_slot_callbacks(Relation relation)
71 : : {
72 : 14584226 : return &TTSOpsBufferHeapTuple;
73 : : }
74 : :
75 : :
76 : : /* ------------------------------------------------------------------------
77 : : * Index Scan Callbacks for heap AM
78 : : * ------------------------------------------------------------------------
79 : : */
80 : :
81 : : static IndexFetchTableData *
82 : 13274192 : heapam_index_fetch_begin(Relation rel)
83 : : {
95 michael@paquier.xyz 84 :GNC 13274192 : IndexFetchHeapData *hscan = palloc0_object(IndexFetchHeapData);
85 : :
2561 andres@anarazel.de 86 :CBC 13274192 : hscan->xs_base.rel = rel;
87 : 13274192 : hscan->xs_cbuf = InvalidBuffer;
88 : :
89 : 13274192 : return &hscan->xs_base;
90 : : }
91 : :
92 : : static void
93 : 24951481 : heapam_index_fetch_reset(IndexFetchTableData *scan)
94 : : {
95 : 24951481 : IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
96 : :
97 [ + + ]: 24951481 : if (BufferIsValid(hscan->xs_cbuf))
98 : : {
99 : 11099181 : ReleaseBuffer(hscan->xs_cbuf);
100 : 11099181 : hscan->xs_cbuf = InvalidBuffer;
101 : : }
102 : 24951481 : }
103 : :
104 : : static void
105 : 13273284 : heapam_index_fetch_end(IndexFetchTableData *scan)
106 : : {
107 : 13273284 : IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
108 : :
109 : 13273284 : heapam_index_fetch_reset(scan);
110 : :
111 : 13273284 : pfree(hscan);
112 : 13273284 : }
113 : :
114 : : static bool
115 : 18858323 : heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
116 : : ItemPointer tid,
117 : : Snapshot snapshot,
118 : : TupleTableSlot *slot,
119 : : bool *call_again, bool *all_dead)
120 : : {
121 : 18858323 : IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
122 : 18858323 : BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
123 : : bool got_heap_tuple;
124 : :
125 [ - + ]: 18858323 : Assert(TTS_IS_BUFFERTUPLE(slot));
126 : :
127 : : /* We can skip the buffer-switching logic if we're in mid-HOT chain. */
128 [ + + ]: 18858323 : if (!*call_again)
129 : : {
130 : : /* Switch to correct buffer if we don't have it already */
131 : 18771170 : Buffer prev_buf = hscan->xs_cbuf;
132 : :
133 : 18771170 : hscan->xs_cbuf = ReleaseAndReadBuffer(hscan->xs_cbuf,
134 : : hscan->xs_base.rel,
135 : : ItemPointerGetBlockNumber(tid));
136 : :
137 : : /*
138 : : * Prune page, but only if we weren't already on this page
139 : : */
140 [ + + ]: 18771167 : if (prev_buf != hscan->xs_cbuf)
141 : 13061365 : heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf);
142 : : }
143 : :
144 : : /* Obtain share-lock on the buffer so we can examine visibility */
145 : 18858320 : LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_SHARE);
146 : 18858320 : got_heap_tuple = heap_hot_search_buffer(tid,
147 : : hscan->xs_base.rel,
148 : : hscan->xs_cbuf,
149 : : snapshot,
150 : : &bslot->base.tupdata,
151 : : all_dead,
152 : 18858320 : !*call_again);
153 : 18858318 : bslot->base.tupdata.t_self = *tid;
154 : 18858318 : LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_UNLOCK);
155 : :
156 [ + + ]: 18858318 : if (got_heap_tuple)
157 : : {
158 : : /*
159 : : * Only in a non-MVCC snapshot can more than one member of the HOT
160 : : * chain be visible.
161 : : */
2 andres@anarazel.de 162 [ + + + + ]:GNC 12558626 : *call_again = !IsMVCCLikeSnapshot(snapshot);
163 : :
2561 andres@anarazel.de 164 :CBC 12558626 : slot->tts_tableOid = RelationGetRelid(scan->rel);
165 : 12558626 : ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, hscan->xs_cbuf);
166 : : }
167 : : else
168 : : {
169 : : /* We've reached the end of the HOT chain. */
170 : 6299692 : *call_again = false;
171 : : }
172 : :
173 : 18858318 : return got_heap_tuple;
174 : : }
175 : :
176 : :
177 : : /* ------------------------------------------------------------------------
178 : : * Callbacks for non-modifying operations on individual tuples for heap AM
179 : : * ------------------------------------------------------------------------
180 : : */
181 : :
182 : : static bool
2547 183 : 579332 : heapam_fetch_row_version(Relation relation,
184 : : ItemPointer tid,
185 : : Snapshot snapshot,
186 : : TupleTableSlot *slot)
187 : : {
188 : 579332 : BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
189 : : Buffer buffer;
190 : :
191 [ - + ]: 579332 : Assert(TTS_IS_BUFFERTUPLE(slot));
192 : :
193 : 579332 : bslot->base.tupdata.t_self = *tid;
1432 tgl@sss.pgh.pa.us 194 [ + + ]: 579332 : if (heap_fetch(relation, snapshot, &bslot->base.tupdata, &buffer, false))
195 : : {
196 : : /* store in slot, transferring existing pin */
2547 andres@anarazel.de 197 : 578986 : ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, slot, buffer);
198 : 578986 : slot->tts_tableOid = RelationGetRelid(relation);
199 : :
200 : 578986 : return true;
201 : : }
202 : :
203 : 334 : return false;
204 : : }
205 : :
206 : : static bool
2494 207 : 391 : heapam_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
208 : : {
209 : 391 : HeapScanDesc hscan = (HeapScanDesc) scan;
210 : :
211 [ + + ]: 773 : return ItemPointerIsValid(tid) &&
212 [ + + ]: 382 : ItemPointerGetBlockNumber(tid) < hscan->rs_nblocks;
213 : : }
214 : :
215 : : static bool
2561 216 : 528914 : heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
217 : : Snapshot snapshot)
218 : : {
219 : 528914 : BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
220 : : bool res;
221 : :
222 [ - + ]: 528914 : Assert(TTS_IS_BUFFERTUPLE(slot));
223 [ - + ]: 528914 : Assert(BufferIsValid(bslot->buffer));
224 : :
225 : : /*
226 : : * We need buffer pin and lock to call HeapTupleSatisfiesVisibility.
227 : : * Caller should be holding pin, but not lock.
228 : : */
229 : 528914 : LockBuffer(bslot->buffer, BUFFER_LOCK_SHARE);
230 : 528914 : res = HeapTupleSatisfiesVisibility(bslot->base.tuple, snapshot,
231 : : bslot->buffer);
232 : 528914 : LockBuffer(bslot->buffer, BUFFER_LOCK_UNLOCK);
233 : :
234 : 528914 : return res;
235 : : }
236 : :
237 : :
238 : : /* ----------------------------------------------------------------------------
239 : : * Functions for manipulations of physical tuples for heap AM.
240 : : * ----------------------------------------------------------------------------
241 : : */
242 : :
243 : : static void
2549 244 : 8188281 : heapam_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid,
245 : : int options, BulkInsertState bistate)
246 : : {
247 : 8188281 : bool shouldFree = true;
248 : 8188281 : HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
249 : :
250 : : /* Update the tuple with table oid */
251 : 8188281 : slot->tts_tableOid = RelationGetRelid(relation);
252 : 8188281 : tuple->t_tableOid = slot->tts_tableOid;
253 : :
254 : : /* Perform the insertion, and copy the resulting ItemPointer */
255 : 8188281 : heap_insert(relation, tuple, cid, options, bistate);
256 : 8188269 : ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
257 : :
258 [ + + ]: 8188269 : if (shouldFree)
259 : 1494752 : pfree(tuple);
260 : 8188269 : }
261 : :
262 : : static void
2542 263 : 2132 : heapam_tuple_insert_speculative(Relation relation, TupleTableSlot *slot,
264 : : CommandId cid, int options,
265 : : BulkInsertState bistate, uint32 specToken)
266 : : {
2549 267 : 2132 : bool shouldFree = true;
268 : 2132 : HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
269 : :
270 : : /* Update the tuple with table oid */
271 : 2132 : slot->tts_tableOid = RelationGetRelid(relation);
272 : 2132 : tuple->t_tableOid = slot->tts_tableOid;
273 : :
274 : 2132 : HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken);
275 : 2132 : options |= HEAP_INSERT_SPECULATIVE;
276 : :
277 : : /* Perform the insertion, and copy the resulting ItemPointer */
278 : 2132 : heap_insert(relation, tuple, cid, options, bistate);
279 : 2132 : ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
280 : :
281 [ + + ]: 2132 : if (shouldFree)
282 : 45 : pfree(tuple);
283 : 2132 : }
284 : :
285 : : static void
2542 286 : 2129 : heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot,
287 : : uint32 specToken, bool succeeded)
288 : : {
2549 289 : 2129 : bool shouldFree = true;
290 : 2129 : HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
291 : :
292 : : /* adjust the tuple's state accordingly */
2497 293 [ + + ]: 2129 : if (succeeded)
2549 294 : 2118 : heap_finish_speculative(relation, &slot->tts_tid);
295 : : else
296 : 11 : heap_abort_speculative(relation, &slot->tts_tid);
297 : :
298 [ + + ]: 2129 : if (shouldFree)
299 : 45 : pfree(tuple);
300 : 2129 : }
301 : :
302 : : static TM_Result
303 : 829495 : heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid,
304 : : Snapshot snapshot, Snapshot crosscheck, bool wait,
305 : : TM_FailureData *tmfd, bool changingPart)
306 : : {
307 : : /*
308 : : * Currently Deleting of index tuples are handled at vacuum, in case if
309 : : * the storage itself is cleaning the dead tuples by itself, it is the
310 : : * time to call the index tuple deletion also.
311 : : */
703 akorotkov@postgresql 312 : 829495 : return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
313 : : }
314 : :
315 : :
316 : : static TM_Result
2549 andres@anarazel.de 317 : 194854 : heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
318 : : CommandId cid, Snapshot snapshot, Snapshot crosscheck,
319 : : bool wait, TM_FailureData *tmfd,
320 : : LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
321 : : {
322 : 194854 : bool shouldFree = true;
323 : 194854 : HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
324 : : TM_Result result;
325 : :
326 : : /* Update the tuple with table oid */
327 : 194854 : slot->tts_tableOid = RelationGetRelid(relation);
328 : 194854 : tuple->t_tableOid = slot->tts_tableOid;
329 : :
703 akorotkov@postgresql 330 : 194854 : result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
331 : : tmfd, lockmode, update_indexes);
2549 andres@anarazel.de 332 : 194842 : ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
333 : :
334 : : /*
335 : : * Decide whether new index entries are needed for the tuple
336 : : *
337 : : * Note: heap_update returns the tid (location) of the new tuple in the
338 : : * t_self field.
339 : : *
340 : : * If the update is not HOT, we must update all indexes. If the update is
341 : : * HOT, it could be that we updated summarized columns, so we either
342 : : * update only summarized indexes, or none at all.
343 : : */
1091 tomas.vondra@postgre 344 [ + + ]: 194842 : if (result != TM_Ok)
345 : : {
346 [ - + ]: 167 : Assert(*update_indexes == TU_None);
347 : 167 : *update_indexes = TU_None;
348 : : }
349 [ + + ]: 194675 : else if (!HeapTupleIsHeapOnly(tuple))
350 [ - + ]: 128120 : Assert(*update_indexes == TU_All);
351 : : else
352 [ + + - + ]: 66555 : Assert((*update_indexes == TU_Summarizing) ||
353 : : (*update_indexes == TU_None));
354 : :
2549 andres@anarazel.de 355 [ + + ]: 194842 : if (shouldFree)
356 : 31942 : pfree(tuple);
357 : :
358 : 194842 : return result;
359 : : }
360 : :
361 : : static TM_Result
362 : 558332 : heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
363 : : TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
364 : : LockWaitPolicy wait_policy, uint8 flags,
365 : : TM_FailureData *tmfd)
366 : : {
367 : 558332 : BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
368 : : TM_Result result;
369 : : Buffer buffer;
370 : 558332 : HeapTuple tuple = &bslot->base.tupdata;
371 : : bool follow_updates;
372 : :
373 : 558332 : follow_updates = (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) != 0;
374 : 558332 : tmfd->traversed = false;
375 : :
376 [ + - ]: 558332 : Assert(TTS_IS_BUFFERTUPLE(slot));
377 : :
378 : 558332 : tuple_lock_retry:
703 akorotkov@postgresql 379 : 558510 : tuple->t_self = *tid;
380 : 558510 : result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
381 : : follow_updates, &buffer, tmfd);
382 : :
2549 andres@anarazel.de 383 [ + + ]: 558497 : if (result == TM_Updated &&
384 [ + + ]: 220 : (flags & TUPLE_LOCK_FLAG_FIND_LAST_VERSION))
385 : : {
386 : : /* Should not encounter speculative tuple on recheck */
1077 akorotkov@postgresql 387 [ - + ]: 201 : Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data));
388 : :
703 389 : 201 : ReleaseBuffer(buffer);
390 : :
2549 andres@anarazel.de 391 [ + - ]: 201 : if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
392 : : {
393 : : SnapshotData SnapshotDirty;
394 : : TransactionId priorXmax;
395 : :
396 : : /* it was updated, so look at the updated version */
397 : 201 : *tid = tmfd->ctid;
398 : : /* updated row should have xmin matching this xmax */
399 : 201 : priorXmax = tmfd->xmax;
400 : :
401 : : /* signal that a tuple later in the chain is getting locked */
402 : 201 : tmfd->traversed = true;
403 : :
404 : : /*
405 : : * fetch target tuple
406 : : *
407 : : * Loop here to deal with updated or busy tuples
408 : : */
409 : 201 : InitDirtySnapshot(SnapshotDirty);
410 : : for (;;)
411 : : {
412 [ + + ]: 232 : if (ItemPointerIndicatesMovedPartitions(tid))
413 [ + - ]: 11 : ereport(ERROR,
414 : : (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
415 : : errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
416 : :
417 : 221 : tuple->t_self = *tid;
1432 tgl@sss.pgh.pa.us 418 [ + + ]: 221 : if (heap_fetch(relation, &SnapshotDirty, tuple, &buffer, true))
419 : : {
420 : : /*
421 : : * If xmin isn't what we're expecting, the slot must have
422 : : * been recycled and reused for an unrelated tuple. This
423 : : * implies that the latest version of the row was deleted,
424 : : * so we need do nothing. (Should be safe to examine xmin
425 : : * without getting buffer's content lock. We assume
426 : : * reading a TransactionId to be atomic, and Xmin never
427 : : * changes in an existing tuple, except to invalid or
428 : : * frozen, and neither of those can match priorXmax.)
429 : : */
2549 andres@anarazel.de 430 [ - + ]: 187 : if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data),
431 : : priorXmax))
432 : : {
2549 andres@anarazel.de 433 :UBC 0 : ReleaseBuffer(buffer);
2549 andres@anarazel.de 434 :CBC 11 : return TM_Deleted;
435 : : }
436 : :
437 : : /* otherwise xmin should not be dirty... */
438 [ - + ]: 187 : if (TransactionIdIsValid(SnapshotDirty.xmin))
2418 peter@eisentraut.org 439 [ # # ]:UBC 0 : ereport(ERROR,
440 : : (errcode(ERRCODE_DATA_CORRUPTED),
441 : : errmsg_internal("t_xmin %u is uncommitted in tuple (%u,%u) to be updated in table \"%s\"",
442 : : SnapshotDirty.xmin,
443 : : ItemPointerGetBlockNumber(&tuple->t_self),
444 : : ItemPointerGetOffsetNumber(&tuple->t_self),
445 : : RelationGetRelationName(relation))));
446 : :
447 : : /*
448 : : * If tuple is being updated by other transaction then we
449 : : * have to wait for its commit/abort, or die trying.
450 : : */
2549 andres@anarazel.de 451 [ + + ]:CBC 187 : if (TransactionIdIsValid(SnapshotDirty.xmax))
452 : : {
453 : 2 : ReleaseBuffer(buffer);
454 [ - + + - ]: 2 : switch (wait_policy)
455 : : {
2549 andres@anarazel.de 456 :UBC 0 : case LockWaitBlock:
457 : 0 : XactLockTableWait(SnapshotDirty.xmax,
2549 andres@anarazel.de 458 :UIC 0 : relation, &tuple->t_self,
459 : : XLTW_FetchUpdated);
2549 andres@anarazel.de 460 :UBC 0 : break;
2549 andres@anarazel.de 461 :CBC 1 : case LockWaitSkip:
366 fujii@postgresql.org 462 [ + - ]: 1 : if (!ConditionalXactLockTableWait(SnapshotDirty.xmax, false))
463 : : /* skip instead of waiting */
2549 andres@anarazel.de 464 : 1 : return TM_WouldBlock;
2549 andres@anarazel.de 465 :UBC 0 : break;
2549 andres@anarazel.de 466 :CBC 1 : case LockWaitError:
285 fujii@postgresql.org 467 [ + - ]: 1 : if (!ConditionalXactLockTableWait(SnapshotDirty.xmax, log_lock_failures))
2549 andres@anarazel.de 468 [ + - ]: 1 : ereport(ERROR,
469 : : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
470 : : errmsg("could not obtain lock on row in relation \"%s\"",
471 : : RelationGetRelationName(relation))));
2549 andres@anarazel.de 472 :UBC 0 : break;
473 : : }
474 : 0 : continue; /* loop back to repeat heap_fetch */
475 : : }
476 : :
477 : : /*
478 : : * If tuple was inserted by our own transaction, we have
479 : : * to check cmin against cid: cmin >= current CID means
480 : : * our command cannot see the tuple, so we should ignore
481 : : * it. Otherwise heap_lock_tuple() will throw an error,
482 : : * and so would any later attempt to update or delete the
483 : : * tuple. (We need not check cmax because
484 : : * HeapTupleSatisfiesDirty will consider a tuple deleted
485 : : * by our transaction dead, regardless of cmax.) We just
486 : : * checked that priorXmax == xmin, so we can test that
487 : : * variable instead of doing HeapTupleHeaderGetXmin again.
488 : : */
2549 andres@anarazel.de 489 [ + + + - ]:CBC 192 : if (TransactionIdIsCurrentTransactionId(priorXmax) &&
490 : 7 : HeapTupleHeaderGetCmin(tuple->t_data) >= cid)
491 : : {
2534 492 : 7 : tmfd->xmax = priorXmax;
493 : :
494 : : /*
495 : : * Cmin is the problematic value, so store that. See
496 : : * above.
497 : : */
498 : 7 : tmfd->cmax = HeapTupleHeaderGetCmin(tuple->t_data);
2549 499 : 7 : ReleaseBuffer(buffer);
2534 500 : 7 : return TM_SelfModified;
501 : : }
502 : :
503 : : /*
504 : : * This is a live tuple, so try to lock it again.
505 : : */
703 akorotkov@postgresql 506 : 178 : ReleaseBuffer(buffer);
2549 andres@anarazel.de 507 : 178 : goto tuple_lock_retry;
508 : : }
509 : :
510 : : /*
511 : : * If the referenced slot was actually empty, the latest
512 : : * version of the row must have been deleted, so we need do
513 : : * nothing.
514 : : */
515 [ - + ]: 34 : if (tuple->t_data == NULL)
516 : : {
703 akorotkov@postgresql 517 [ # # ]:UBC 0 : Assert(!BufferIsValid(buffer));
2549 andres@anarazel.de 518 : 0 : return TM_Deleted;
519 : : }
520 : :
521 : : /*
522 : : * As above, if xmin isn't what we're expecting, do nothing.
523 : : */
2549 andres@anarazel.de 524 [ - + ]:CBC 34 : if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data),
525 : : priorXmax))
526 : : {
1432 tgl@sss.pgh.pa.us 527 :UBC 0 : ReleaseBuffer(buffer);
2549 andres@anarazel.de 528 : 0 : return TM_Deleted;
529 : : }
530 : :
531 : : /*
532 : : * If we get here, the tuple was found but failed
533 : : * SnapshotDirty. Assuming the xmin is either a committed xact
534 : : * or our own xact (as it certainly should be if we're trying
535 : : * to modify the tuple), this must mean that the row was
536 : : * updated or deleted by either a committed xact or our own
537 : : * xact. If it was deleted, we can ignore it; if it was
538 : : * updated then chain up to the next version and repeat the
539 : : * whole process.
540 : : *
541 : : * As above, it should be safe to examine xmax and t_ctid
542 : : * without the buffer content lock, because they can't be
543 : : * changing. We'd better hold a buffer pin though.
544 : : */
2549 andres@anarazel.de 545 [ + + ]:CBC 34 : if (ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
546 : : {
547 : : /* deleted, so forget about it */
1432 tgl@sss.pgh.pa.us 548 : 3 : ReleaseBuffer(buffer);
2549 andres@anarazel.de 549 : 3 : return TM_Deleted;
550 : : }
551 : :
552 : : /* updated, so look at the updated row */
553 : 31 : *tid = tuple->t_data->t_ctid;
554 : : /* updated row should have xmin matching this xmax */
555 : 31 : priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
1432 tgl@sss.pgh.pa.us 556 : 31 : ReleaseBuffer(buffer);
557 : : /* loop back to fetch next in chain */
558 : : }
559 : : }
560 : : else
561 : : {
562 : : /* tuple was deleted, so give up */
2549 andres@anarazel.de 563 :UBC 0 : return TM_Deleted;
564 : : }
565 : : }
566 : :
2549 andres@anarazel.de 567 :CBC 558296 : slot->tts_tableOid = RelationGetRelid(relation);
568 : 558296 : tuple->t_tableOid = slot->tts_tableOid;
569 : :
570 : : /* store in slot, transferring existing pin */
703 akorotkov@postgresql 571 : 558296 : ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
572 : :
2549 andres@anarazel.de 573 : 558296 : return result;
574 : : }
575 : :
576 : :
577 : : /* ------------------------------------------------------------------------
578 : : * DDL related callbacks for heap AM.
579 : : * ------------------------------------------------------------------------
580 : : */
581 : :
582 : : static void
1348 rhaas@postgresql.org 583 : 34085 : heapam_relation_set_new_filelocator(Relation rel,
584 : : const RelFileLocator *newrlocator,
585 : : char persistence,
586 : : TransactionId *freezeXid,
587 : : MultiXactId *minmulti)
588 : : {
589 : : SMgrRelation srel;
590 : :
591 : : /*
592 : : * Initialize to the minimum XID that could put tuples in the table. We
593 : : * know that no xacts older than RecentXmin are still running, so that
594 : : * will do.
595 : : */
2544 andres@anarazel.de 596 : 34085 : *freezeXid = RecentXmin;
597 : :
598 : : /*
599 : : * Similarly, initialize the minimum Multixact to the first value that
600 : : * could possibly be stored in tuples in the table. Running transactions
601 : : * could reuse values from their local cache, so we are careful to
602 : : * consider all currently running multis.
603 : : *
604 : : * XXX this could be refined further, but is it worth the hassle?
605 : : */
606 : 34085 : *minmulti = GetOldestMultiXactId();
607 : :
1348 rhaas@postgresql.org 608 : 34085 : srel = RelationCreateStorage(*newrlocator, persistence, true);
609 : :
610 : : /*
611 : : * If required, set up an init fork for an unlogged table so that it can
612 : : * be correctly reinitialized on restart.
613 : : */
2512 andres@anarazel.de 614 [ + + ]: 34085 : if (persistence == RELPERSISTENCE_UNLOGGED)
615 : : {
2544 616 [ + + - + ]: 139 : Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
617 : : rel->rd_rel->relkind == RELKIND_TOASTVALUE);
2512 618 : 139 : smgrcreate(srel, INIT_FORKNUM, false);
1348 rhaas@postgresql.org 619 : 139 : log_smgrcreate(newrlocator, INIT_FORKNUM);
620 : : }
621 : :
2512 andres@anarazel.de 622 : 34085 : smgrclose(srel);
2544 623 : 34085 : }
624 : :
625 : : static void
626 : 286 : heapam_relation_nontransactional_truncate(Relation rel)
627 : : {
628 : 286 : RelationTruncate(rel, 0);
629 : 286 : }
630 : :
631 : : static void
1348 rhaas@postgresql.org 632 : 49 : heapam_relation_copy_data(Relation rel, const RelFileLocator *newrlocator)
633 : : {
634 : : SMgrRelation dstrel;
635 : :
636 : : /*
637 : : * Since we copy the file directly without looking at the shared buffers,
638 : : * we'd better first flush out any pages of the source relation that are
639 : : * in shared buffers. We assume no new changes will be made while we are
640 : : * holding exclusive lock on the rel.
641 : : */
2512 andres@anarazel.de 642 : 49 : FlushRelationBuffers(rel);
643 : :
644 : : /*
645 : : * Create and copy all forks of the relation, and schedule unlinking of
646 : : * old physical files.
647 : : *
648 : : * NOTE: any conflict in relfilenumber value will be caught in
649 : : * RelationCreateStorage().
650 : : */
762 heikki.linnakangas@i 651 : 49 : dstrel = RelationCreateStorage(*newrlocator, rel->rd_rel->relpersistence, true);
652 : :
653 : : /* copy main fork */
1707 tgl@sss.pgh.pa.us 654 : 49 : RelationCopyStorage(RelationGetSmgr(rel), dstrel, MAIN_FORKNUM,
2544 andres@anarazel.de 655 : 49 : rel->rd_rel->relpersistence);
656 : :
657 : : /* copy those extra forks that exist */
658 : 49 : for (ForkNumber forkNum = MAIN_FORKNUM + 1;
659 [ + + ]: 196 : forkNum <= MAX_FORKNUM; forkNum++)
660 : : {
1707 tgl@sss.pgh.pa.us 661 [ + + ]: 147 : if (smgrexists(RelationGetSmgr(rel), forkNum))
662 : : {
2544 andres@anarazel.de 663 : 9 : smgrcreate(dstrel, forkNum, false);
664 : :
665 : : /*
666 : : * WAL log creation if the relation is persistent, or this is the
667 : : * init fork of an unlogged relation.
668 : : */
1819 bruce@momjian.us 669 [ + + ]: 9 : if (RelationIsPermanent(rel) ||
2544 andres@anarazel.de 670 [ - + - - ]: 3 : (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
671 : : forkNum == INIT_FORKNUM))
1348 rhaas@postgresql.org 672 : 6 : log_smgrcreate(newrlocator, forkNum);
1707 tgl@sss.pgh.pa.us 673 : 9 : RelationCopyStorage(RelationGetSmgr(rel), dstrel, forkNum,
2544 andres@anarazel.de 674 : 9 : rel->rd_rel->relpersistence);
675 : : }
676 : : }
677 : :
678 : :
679 : : /* drop old relation, and close new one */
680 : 49 : RelationDropStorage(rel);
681 : 49 : smgrclose(dstrel);
682 : 49 : }
683 : :
684 : : static void
685 : 319 : heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
686 : : Relation OldIndex, bool use_sort,
687 : : TransactionId OldestXmin,
688 : : TransactionId *xid_cutoff,
689 : : MultiXactId *multi_cutoff,
690 : : double *num_tuples,
691 : : double *tups_vacuumed,
692 : : double *tups_recently_dead)
693 : : {
694 : : RewriteState rwstate;
695 : : IndexScanDesc indexScan;
696 : : TableScanDesc tableScan;
697 : : HeapScanDesc heapScan;
698 : : bool is_system_catalog;
699 : : Tuplesortstate *tuplesort;
700 : 319 : TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
701 : 319 : TupleDesc newTupDesc = RelationGetDescr(NewHeap);
702 : : TupleTableSlot *slot;
703 : : int natts;
704 : : Datum *values;
705 : : bool *isnull;
706 : : BufferHeapTupleTableSlot *hslot;
1934 fujii@postgresql.org 707 : 319 : BlockNumber prev_cblock = InvalidBlockNumber;
708 : :
709 : : /* Remember if it's a system catalog */
2544 andres@anarazel.de 710 : 319 : is_system_catalog = IsSystemRelation(OldHeap);
711 : :
712 : : /*
713 : : * Valid smgr_targblock implies something already wrote to the relation.
714 : : * This may be harmless, but this function hasn't planned for it.
715 : : */
716 [ - + - - ]: 319 : Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber);
717 : :
718 : : /* Preallocate values/isnull arrays */
719 : 319 : natts = newTupDesc->natts;
95 michael@paquier.xyz 720 :GNC 319 : values = palloc_array(Datum, natts);
721 : 319 : isnull = palloc_array(bool, natts);
722 : :
723 : : /* Initialize the rewrite operation */
2518 andres@anarazel.de 724 :CBC 319 : rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, *xid_cutoff,
725 : : *multi_cutoff);
726 : :
727 : :
728 : : /* Set up sorting if wanted */
2544 729 [ + + ]: 319 : if (use_sort)
1009 pg@bowt.ie 730 : 68 : tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex,
731 : : maintenance_work_mem,
732 : : NULL, TUPLESORT_NONE);
733 : : else
2544 andres@anarazel.de 734 : 251 : tuplesort = NULL;
735 : :
736 : : /*
737 : : * Prepare to scan the OldHeap. To ensure we see recently-dead tuples
738 : : * that still need to be copied, we scan with SnapshotAny and use
739 : : * HeapTupleSatisfiesVacuum for the visibility test.
740 : : */
741 [ + + + + ]: 319 : if (OldIndex != NULL && !use_sort)
742 : 40 : {
2542 743 : 40 : const int ci_index[] = {
744 : : PROGRESS_REPACK_PHASE,
745 : : PROGRESS_REPACK_INDEX_RELID
746 : : };
747 : : int64 ci_val[2];
748 : :
749 : : /* Set phase and OIDOldIndex to columns */
5 alvherre@kurilemu.de 750 :GNC 40 : ci_val[0] = PROGRESS_REPACK_PHASE_INDEX_SCAN_HEAP;
2544 andres@anarazel.de 751 :CBC 40 : ci_val[1] = RelationGetRelid(OldIndex);
752 : 40 : pgstat_progress_update_multi_param(2, ci_index, ci_val);
753 : :
754 : 40 : tableScan = NULL;
755 : 40 : heapScan = NULL;
369 pg@bowt.ie 756 : 40 : indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, NULL, 0, 0);
2544 andres@anarazel.de 757 : 40 : index_rescan(indexScan, NULL, 0, NULL, 0);
758 : : }
759 : : else
760 : : {
761 : : /* In scan-and-sort mode and also VACUUM FULL, set phase */
5 alvherre@kurilemu.de 762 :GNC 279 : pgstat_progress_update_param(PROGRESS_REPACK_PHASE,
763 : : PROGRESS_REPACK_PHASE_SEQ_SCAN_HEAP);
764 : :
2544 andres@anarazel.de 765 :CBC 279 : tableScan = table_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL);
766 : 279 : heapScan = (HeapScanDesc) tableScan;
767 : 279 : indexScan = NULL;
768 : :
769 : : /* Set total heap blocks */
5 alvherre@kurilemu.de 770 :GNC 279 : pgstat_progress_update_param(PROGRESS_REPACK_TOTAL_HEAP_BLKS,
2544 andres@anarazel.de 771 :CBC 279 : heapScan->rs_nblocks);
772 : : }
773 : :
774 : 319 : slot = table_slot_create(OldHeap, NULL);
775 : 319 : hslot = (BufferHeapTupleTableSlot *) slot;
776 : :
777 : : /*
778 : : * Scan through the OldHeap, either in OldIndex order or sequentially;
779 : : * copy each tuple into the NewHeap, or transiently to the tuplesort
780 : : * module. Note that we don't bother sorting dead tuples (they won't get
781 : : * to the new table anyway).
782 : : */
783 : : for (;;)
784 : 377962 : {
785 : : HeapTuple tuple;
786 : : Buffer buf;
787 : : bool isdead;
788 : :
789 [ - + ]: 378281 : CHECK_FOR_INTERRUPTS();
790 : :
791 [ + + ]: 378281 : if (indexScan != NULL)
792 : : {
793 [ + + ]: 94 : if (!index_getnext_slot(indexScan, ForwardScanDirection, slot))
794 : 40 : break;
795 : :
796 : : /* Since we used no scan keys, should never need to recheck */
797 [ - + ]: 54 : if (indexScan->xs_recheck)
2544 andres@anarazel.de 798 [ # # ]:UBC 0 : elog(ERROR, "CLUSTER does not support lossy index conditions");
799 : : }
800 : : else
801 : : {
2544 andres@anarazel.de 802 [ + + ]:CBC 378187 : if (!table_scan_getnextslot(tableScan, ForwardScanDirection, slot))
803 : : {
804 : : /*
805 : : * If the last pages of the scan were empty, we would go to
806 : : * the next phase while heap_blks_scanned != heap_blks_total.
807 : : * Instead, to ensure that heap_blks_scanned is equivalent to
808 : : * heap_blks_total after the table scan phase, this parameter
809 : : * is manually updated to the correct value when the table
810 : : * scan finishes.
811 : : */
5 alvherre@kurilemu.de 812 :GNC 279 : pgstat_progress_update_param(PROGRESS_REPACK_HEAP_BLKS_SCANNED,
1934 fujii@postgresql.org 813 :CBC 279 : heapScan->rs_nblocks);
2544 andres@anarazel.de 814 : 279 : break;
815 : : }
816 : :
817 : : /*
818 : : * In scan-and-sort mode and also VACUUM FULL, set heap blocks
819 : : * scanned
820 : : *
821 : : * Note that heapScan may start at an offset and wrap around, i.e.
822 : : * rs_startblock may be >0, and rs_cblock may end with a number
823 : : * below rs_startblock. To prevent showing this wraparound to the
824 : : * user, we offset rs_cblock by rs_startblock (modulo rs_nblocks).
825 : : */
1934 fujii@postgresql.org 826 [ + + ]: 377908 : if (prev_cblock != heapScan->rs_cblock)
827 : : {
5 alvherre@kurilemu.de 828 :GNC 5600 : pgstat_progress_update_param(PROGRESS_REPACK_HEAP_BLKS_SCANNED,
1934 fujii@postgresql.org 829 :CBC 5600 : (heapScan->rs_cblock +
830 : 5600 : heapScan->rs_nblocks -
831 : 5600 : heapScan->rs_startblock
832 : 5600 : ) % heapScan->rs_nblocks + 1);
833 : 5600 : prev_cblock = heapScan->rs_cblock;
834 : : }
835 : : }
836 : :
2544 andres@anarazel.de 837 : 377962 : tuple = ExecFetchSlotHeapTuple(slot, false, NULL);
838 : 377962 : buf = hslot->buffer;
839 : :
840 : : /*
841 : : * To be able to guarantee that we can set the hint bit, acquire an
842 : : * exclusive lock on the old buffer. We need the hint bits, set in
843 : : * heapam_relation_copy_for_cluster() -> HeapTupleSatisfiesVacuum(),
844 : : * to be set, as otherwise reform_and_rewrite_tuple() ->
845 : : * rewrite_heap_tuple() will get confused. Specifically,
846 : : * rewrite_heap_tuple() checks for HEAP_XMAX_INVALID in the old tuple
847 : : * to determine whether to check the old-to-new mapping hash table.
848 : : *
849 : : * It'd be better if we somehow could avoid setting hint bits on the
850 : : * old page. One reason to use VACUUM FULL are very bloated tables -
851 : : * rewriting most of the old table during VACUUM FULL doesn't exactly
852 : : * help...
853 : : */
62 andres@anarazel.de 854 :GNC 377962 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
855 : :
2544 andres@anarazel.de 856 [ + + + + :CBC 377962 : switch (HeapTupleSatisfiesVacuum(tuple, OldestXmin, buf))
+ - ]
857 : : {
858 : 16498 : case HEAPTUPLE_DEAD:
859 : : /* Definitely dead */
860 : 16498 : isdead = true;
861 : 16498 : break;
862 : 12897 : case HEAPTUPLE_RECENTLY_DEAD:
863 : 12897 : *tups_recently_dead += 1;
864 : : pg_fallthrough;
865 : 361281 : case HEAPTUPLE_LIVE:
866 : : /* Live or recently dead, must copy it */
867 : 361281 : isdead = false;
868 : 361281 : break;
869 : 144 : case HEAPTUPLE_INSERT_IN_PROGRESS:
870 : :
871 : : /*
872 : : * Since we hold exclusive lock on the relation, normally the
873 : : * only way to see this is if it was inserted earlier in our
874 : : * own transaction. However, it can happen in system
875 : : * catalogs, since we tend to release write lock before commit
876 : : * there. Give a warning if neither case applies; but in any
877 : : * case we had better copy it.
878 : : */
879 [ + + ]: 144 : if (!is_system_catalog &&
880 [ - + ]: 11 : !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data)))
2544 andres@anarazel.de 881 [ # # ]:UBC 0 : elog(WARNING, "concurrent insert in progress within table \"%s\"",
882 : : RelationGetRelationName(OldHeap));
883 : : /* treat as live */
2544 andres@anarazel.de 884 :CBC 144 : isdead = false;
885 : 144 : break;
886 : 39 : case HEAPTUPLE_DELETE_IN_PROGRESS:
887 : :
888 : : /*
889 : : * Similar situation to INSERT_IN_PROGRESS case.
890 : : */
891 [ + + ]: 39 : if (!is_system_catalog &&
892 [ - + ]: 15 : !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple->t_data)))
2544 andres@anarazel.de 893 [ # # ]:UBC 0 : elog(WARNING, "concurrent delete in progress within table \"%s\"",
894 : : RelationGetRelationName(OldHeap));
895 : : /* treat as recently dead */
2544 andres@anarazel.de 896 :CBC 39 : *tups_recently_dead += 1;
897 : 39 : isdead = false;
898 : 39 : break;
2544 andres@anarazel.de 899 :UBC 0 : default:
900 [ # # ]: 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
901 : : isdead = false; /* keep compiler quiet */
902 : : break;
903 : : }
904 : :
2544 andres@anarazel.de 905 :CBC 377962 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
906 : :
907 [ + + ]: 377962 : if (isdead)
908 : : {
909 : 16498 : *tups_vacuumed += 1;
910 : : /* heap rewrite module still needs to see it... */
911 [ - + ]: 16498 : if (rewrite_heap_dead_tuple(rwstate, tuple))
912 : : {
913 : : /* A previous recently-dead tuple is now known dead */
2544 andres@anarazel.de 914 :UBC 0 : *tups_vacuumed += 1;
915 : 0 : *tups_recently_dead -= 1;
916 : : }
2544 andres@anarazel.de 917 :CBC 16498 : continue;
918 : : }
919 : :
920 : 361464 : *num_tuples += 1;
921 [ + + ]: 361464 : if (tuplesort != NULL)
922 : : {
923 : 273813 : tuplesort_putheaptuple(tuplesort, tuple);
924 : :
925 : : /*
926 : : * In scan-and-sort mode, report increase in number of tuples
927 : : * scanned
928 : : */
5 alvherre@kurilemu.de 929 :GNC 273813 : pgstat_progress_update_param(PROGRESS_REPACK_HEAP_TUPLES_SCANNED,
2544 andres@anarazel.de 930 :CBC 273813 : *num_tuples);
931 : : }
932 : : else
933 : : {
2542 934 : 87651 : const int ct_index[] = {
935 : : PROGRESS_REPACK_HEAP_TUPLES_SCANNED,
936 : : PROGRESS_REPACK_HEAP_TUPLES_WRITTEN
937 : : };
938 : : int64 ct_val[2];
939 : :
2544 940 : 87651 : reform_and_rewrite_tuple(tuple, OldHeap, NewHeap,
941 : : values, isnull, rwstate);
942 : :
943 : : /*
944 : : * In indexscan mode and also VACUUM FULL, report increase in
945 : : * number of tuples scanned and written
946 : : */
947 : 87651 : ct_val[0] = *num_tuples;
948 : 87651 : ct_val[1] = *num_tuples;
949 : 87651 : pgstat_progress_update_multi_param(2, ct_index, ct_val);
950 : : }
951 : : }
952 : :
953 [ + + ]: 319 : if (indexScan != NULL)
954 : 40 : index_endscan(indexScan);
955 [ + + ]: 319 : if (tableScan != NULL)
956 : 279 : table_endscan(tableScan);
957 [ + - ]: 319 : if (slot)
958 : 319 : ExecDropSingleTupleTableSlot(slot);
959 : :
960 : : /*
961 : : * In scan-and-sort mode, complete the sort, then read out all live tuples
962 : : * from the tuplestore and write them to the new relation.
963 : : */
964 [ + + ]: 319 : if (tuplesort != NULL)
965 : : {
2542 966 : 68 : double n_tuples = 0;
967 : :
968 : : /* Report that we are now sorting tuples */
5 alvherre@kurilemu.de 969 :GNC 68 : pgstat_progress_update_param(PROGRESS_REPACK_PHASE,
970 : : PROGRESS_REPACK_PHASE_SORT_TUPLES);
971 : :
2544 andres@anarazel.de 972 :CBC 68 : tuplesort_performsort(tuplesort);
973 : :
974 : : /* Report that we are now writing new heap */
5 alvherre@kurilemu.de 975 :GNC 68 : pgstat_progress_update_param(PROGRESS_REPACK_PHASE,
976 : : PROGRESS_REPACK_PHASE_WRITE_NEW_HEAP);
977 : :
978 : : for (;;)
2544 andres@anarazel.de 979 :CBC 273813 : {
980 : : HeapTuple tuple;
981 : :
982 [ - + ]: 273881 : CHECK_FOR_INTERRUPTS();
983 : :
984 : 273881 : tuple = tuplesort_getheaptuple(tuplesort, true);
985 [ + + ]: 273881 : if (tuple == NULL)
986 : 68 : break;
987 : :
988 : 273813 : n_tuples += 1;
989 : 273813 : reform_and_rewrite_tuple(tuple,
990 : : OldHeap, NewHeap,
991 : : values, isnull,
992 : : rwstate);
993 : : /* Report n_tuples */
5 alvherre@kurilemu.de 994 :GNC 273813 : pgstat_progress_update_param(PROGRESS_REPACK_HEAP_TUPLES_WRITTEN,
995 : : n_tuples);
996 : : }
997 : :
2544 andres@anarazel.de 998 :CBC 68 : tuplesort_end(tuplesort);
999 : : }
1000 : :
1001 : : /* Write out any remaining tuples, and fsync if needed */
1002 : 319 : end_heap_rewrite(rwstate);
1003 : :
1004 : : /* Clean up */
1005 : 319 : pfree(values);
1006 : 319 : pfree(isnull);
1007 : 319 : }
1008 : :
1009 : : /*
1010 : : * Prepare to analyze the next block in the read stream. Returns false if
1011 : : * the stream is exhausted and true otherwise. The scan must have been started
1012 : : * with SO_TYPE_ANALYZE option.
1013 : : *
1014 : : * This routine holds a buffer pin and lock on the heap page. They are held
1015 : : * until heapam_scan_analyze_next_tuple() returns false. That is until all the
1016 : : * items of the heap page are analyzed.
1017 : : */
1018 : : static bool
706 tmunro@postgresql.or 1019 : 86150 : heapam_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
1020 : : {
2542 andres@anarazel.de 1021 : 86150 : HeapScanDesc hscan = (HeapScanDesc) scan;
1022 : :
1023 : : /*
1024 : : * We must maintain a pin on the target page's buffer to ensure that
1025 : : * concurrent activity - e.g. HOT pruning - doesn't delete tuples out from
1026 : : * under us. It comes from the stream already pinned. We also choose to
1027 : : * hold sharelock on the buffer throughout --- we could release and
1028 : : * re-acquire sharelock for each tuple, but since we aren't doing much
1029 : : * work per tuple, the extra lock traffic is probably better avoided.
1030 : : */
706 tmunro@postgresql.or 1031 : 86150 : hscan->rs_cbuf = read_stream_next_buffer(stream, NULL);
1032 [ + + ]: 86150 : if (!BufferIsValid(hscan->rs_cbuf))
1033 : 9059 : return false;
1034 : :
2542 andres@anarazel.de 1035 : 77091 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
1036 : :
706 tmunro@postgresql.or 1037 : 77091 : hscan->rs_cblock = BufferGetBlockNumber(hscan->rs_cbuf);
1038 : 77091 : hscan->rs_cindex = FirstOffsetNumber;
1039 : 77091 : return true;
1040 : : }
1041 : :
1042 : : static bool
17 melanieplageman@gmai 1043 :GNC 6260377 : heapam_scan_analyze_next_tuple(TableScanDesc scan,
1044 : : double *liverows, double *deadrows,
1045 : : TupleTableSlot *slot)
1046 : : {
2542 andres@anarazel.de 1047 :CBC 6260377 : HeapScanDesc hscan = (HeapScanDesc) scan;
1048 : : Page targpage;
1049 : : OffsetNumber maxoffset;
1050 : : BufferHeapTupleTableSlot *hslot;
1051 : :
1052 [ - + ]: 6260377 : Assert(TTS_IS_BUFFERTUPLE(slot));
1053 : :
1054 : 6260377 : hslot = (BufferHeapTupleTableSlot *) slot;
1055 : 6260377 : targpage = BufferGetPage(hscan->rs_cbuf);
1056 : 6260377 : maxoffset = PageGetMaxOffsetNumber(targpage);
1057 : :
1058 : : /* Inner loop over all tuples on the selected page */
1059 [ + + ]: 6554584 : for (; hscan->rs_cindex <= maxoffset; hscan->rs_cindex++)
1060 : : {
1061 : : ItemId itemid;
1062 : 6477493 : HeapTuple targtuple = &hslot->base.tupdata;
1063 : 6477493 : bool sample_it = false;
1064 : : TransactionId dead_after;
1065 : :
1066 : 6477493 : itemid = PageGetItemId(targpage, hscan->rs_cindex);
1067 : :
1068 : : /*
1069 : : * We ignore unused and redirect line pointers. DEAD line pointers
1070 : : * should be counted as dead, because we need vacuum to run to get rid
1071 : : * of them. Note that this rule agrees with the way that
1072 : : * heap_page_prune_and_freeze() counts things.
1073 : : */
1074 [ + + ]: 6477493 : if (!ItemIdIsNormal(itemid))
1075 : : {
1076 [ + + ]: 198438 : if (ItemIdIsDead(itemid))
1077 : 13466 : *deadrows += 1;
1078 : 198438 : continue;
1079 : : }
1080 : :
1081 : 6279055 : ItemPointerSet(&targtuple->t_self, hscan->rs_cblock, hscan->rs_cindex);
1082 : :
1083 : 6279055 : targtuple->t_tableOid = RelationGetRelid(scan->rs_rd);
1084 : 6279055 : targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
1085 : 6279055 : targtuple->t_len = ItemIdGetLength(itemid);
1086 : :
17 melanieplageman@gmai 1087 [ + + + + :GNC 6279055 : switch (HeapTupleSatisfiesVacuumHorizon(targtuple,
- ]
1088 : : hscan->rs_cbuf,
1089 : : &dead_after))
1090 : : {
2542 andres@anarazel.de 1091 :CBC 6010127 : case HEAPTUPLE_LIVE:
1092 : 6010127 : sample_it = true;
1093 : 6010127 : *liverows += 1;
1094 : 6010127 : break;
1095 : :
1096 : 94784 : case HEAPTUPLE_DEAD:
1097 : : case HEAPTUPLE_RECENTLY_DEAD:
1098 : : /* Count dead and recently-dead rows */
1099 : 94784 : *deadrows += 1;
1100 : 94784 : break;
1101 : :
1102 : 173272 : case HEAPTUPLE_INSERT_IN_PROGRESS:
1103 : :
1104 : : /*
1105 : : * Insert-in-progress rows are not counted. We assume that
1106 : : * when the inserting transaction commits or aborts, it will
1107 : : * send a stats message to increment the proper count. This
1108 : : * works right only if that transaction ends after we finish
1109 : : * analyzing the table; if things happen in the other order,
1110 : : * its stats update will be overwritten by ours. However, the
1111 : : * error will be large only if the other transaction runs long
1112 : : * enough to insert many tuples, so assuming it will finish
1113 : : * after us is the safer option.
1114 : : *
1115 : : * A special case is that the inserting transaction might be
1116 : : * our own. In this case we should count and sample the row,
1117 : : * to accommodate users who load a table and analyze it in one
1118 : : * transaction. (pgstat_report_analyze has to adjust the
1119 : : * numbers we report to the cumulative stats system to make
1120 : : * this come out right.)
1121 : : */
1122 [ + + ]: 173272 : if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple->t_data)))
1123 : : {
1124 : 173145 : sample_it = true;
1125 : 173145 : *liverows += 1;
1126 : : }
1127 : 173272 : break;
1128 : :
1129 : 872 : case HEAPTUPLE_DELETE_IN_PROGRESS:
1130 : :
1131 : : /*
1132 : : * We count and sample delete-in-progress rows the same as
1133 : : * live ones, so that the stats counters come out right if the
1134 : : * deleting transaction commits after us, per the same
1135 : : * reasoning given above.
1136 : : *
1137 : : * If the delete was done by our own transaction, however, we
1138 : : * must count the row as dead to make pgstat_report_analyze's
1139 : : * stats adjustments come out right. (Note: this works out
1140 : : * properly when the row was both inserted and deleted in our
1141 : : * xact.)
1142 : : *
1143 : : * The net effect of these choices is that we act as though an
1144 : : * IN_PROGRESS transaction hasn't happened yet, except if it
1145 : : * is our own transaction, which we assume has happened.
1146 : : *
1147 : : * This approach ensures that we behave sanely if we see both
1148 : : * the pre-image and post-image rows for a row being updated
1149 : : * by a concurrent transaction: we will sample the pre-image
1150 : : * but not the post-image. We also get sane results if the
1151 : : * concurrent transaction never commits.
1152 : : */
1153 [ + + ]: 872 : if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple->t_data)))
2462 1154 : 858 : *deadrows += 1;
1155 : : else
1156 : : {
2542 1157 : 14 : sample_it = true;
2462 1158 : 14 : *liverows += 1;
1159 : : }
2542 1160 : 872 : break;
1161 : :
2542 andres@anarazel.de 1162 :UBC 0 : default:
1163 [ # # ]: 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1164 : : break;
1165 : : }
1166 : :
2542 andres@anarazel.de 1167 [ + + ]:CBC 6279055 : if (sample_it)
1168 : : {
1169 : 6183286 : ExecStoreBufferHeapTuple(targtuple, slot, hscan->rs_cbuf);
1170 : 6183286 : hscan->rs_cindex++;
1171 : :
1172 : : /* note that we leave the buffer locked here! */
1173 : 6183286 : return true;
1174 : : }
1175 : : }
1176 : :
1177 : : /* Now release the lock and pin on the page */
1178 : 77091 : UnlockReleaseBuffer(hscan->rs_cbuf);
1179 : 77091 : hscan->rs_cbuf = InvalidBuffer;
1180 : :
1181 : : /* also prevent old slot contents from having pin on page */
1182 : 77091 : ExecClearTuple(slot);
1183 : :
1184 : 77091 : return false;
1185 : : }
1186 : :
1187 : : static double
2545 1188 : 29343 : heapam_index_build_range_scan(Relation heapRelation,
1189 : : Relation indexRelation,
1190 : : IndexInfo *indexInfo,
1191 : : bool allow_sync,
1192 : : bool anyvisible,
1193 : : bool progress,
1194 : : BlockNumber start_blockno,
1195 : : BlockNumber numblocks,
1196 : : IndexBuildCallback callback,
1197 : : void *callback_state,
1198 : : TableScanDesc scan)
1199 : : {
1200 : : HeapScanDesc hscan;
1201 : : bool is_system_catalog;
1202 : : bool checking_uniqueness;
1203 : : HeapTuple heapTuple;
1204 : : Datum values[INDEX_MAX_KEYS];
1205 : : bool isnull[INDEX_MAX_KEYS];
1206 : : double reltuples;
1207 : : ExprState *predicate;
1208 : : TupleTableSlot *slot;
1209 : : EState *estate;
1210 : : ExprContext *econtext;
1211 : : Snapshot snapshot;
1212 : 29343 : bool need_unregister_snapshot = false;
1213 : : TransactionId OldestXmin;
2489 tgl@sss.pgh.pa.us 1214 : 29343 : BlockNumber previous_blkno = InvalidBlockNumber;
2545 andres@anarazel.de 1215 : 29343 : BlockNumber root_blkno = InvalidBlockNumber;
1216 : : OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1217 : :
1218 : : /*
1219 : : * sanity checks
1220 : : */
1221 [ - + ]: 29343 : Assert(OidIsValid(indexRelation->rd_rel->relam));
1222 : :
1223 : : /* Remember if it's a system catalog */
1224 : 29343 : is_system_catalog = IsSystemRelation(heapRelation);
1225 : :
1226 : : /* See whether we're verifying uniqueness/exclusion properties */
1227 [ + + ]: 37105 : checking_uniqueness = (indexInfo->ii_Unique ||
1228 [ + + ]: 7762 : indexInfo->ii_ExclusionOps != NULL);
1229 : :
1230 : : /*
1231 : : * "Any visible" mode is not compatible with uniqueness checks; make sure
1232 : : * only one of those is requested.
1233 : : */
1234 [ + + - + ]: 29343 : Assert(!(anyvisible && checking_uniqueness));
1235 : :
1236 : : /*
1237 : : * Need an EState for evaluation of index expressions and partial-index
1238 : : * predicates. Also a slot to hold the current tuple.
1239 : : */
1240 : 29343 : estate = CreateExecutorState();
1241 [ - + ]: 29343 : econtext = GetPerTupleExprContext(estate);
1242 : 29343 : slot = table_slot_create(heapRelation, NULL);
1243 : :
1244 : : /* Arrange for econtext's scan tuple to be the tuple under test */
1245 : 29343 : econtext->ecxt_scantuple = slot;
1246 : :
1247 : : /* Set up execution state for predicate, if any. */
1248 : 29343 : predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1249 : :
1250 : : /*
1251 : : * Prepare for scan of the base relation. In a normal index build, we use
1252 : : * SnapshotAny because we must retrieve all tuples and do our own time
1253 : : * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1254 : : * concurrent build, or during bootstrap, we take a regular MVCC snapshot
1255 : : * and index whatever's live according to that.
1256 : : */
1257 : 29343 : OldestXmin = InvalidTransactionId;
1258 : :
1259 : : /* okay to ignore lazy VACUUMs here */
1260 [ + + + + ]: 29343 : if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
2041 1261 : 20782 : OldestXmin = GetOldestNonRemovableTransactionId(heapRelation);
1262 : :
2545 1263 [ + + ]: 29343 : if (!scan)
1264 : : {
1265 : : /*
1266 : : * Serial index build.
1267 : : *
1268 : : * Must begin our own heap scan in this case. We may also need to
1269 : : * register a snapshot whose lifetime is under our direct control.
1270 : : */
1271 [ + + ]: 29054 : if (!TransactionIdIsValid(OldestXmin))
1272 : : {
1273 : 8479 : snapshot = RegisterSnapshot(GetTransactionSnapshot());
1274 : 8479 : need_unregister_snapshot = true;
1275 : : }
1276 : : else
1277 : 20575 : snapshot = SnapshotAny;
1278 : :
1279 : 29054 : scan = table_beginscan_strat(heapRelation, /* relation */
1280 : : snapshot, /* snapshot */
1281 : : 0, /* number of keys */
1282 : : NULL, /* scan key */
1283 : : true, /* buffer access strategy OK */
1284 : : allow_sync); /* syncscan OK? */
1285 : : }
1286 : : else
1287 : : {
1288 : : /*
1289 : : * Parallel index build.
1290 : : *
1291 : : * Parallel case never registers/unregisters own snapshot. Snapshot
1292 : : * is taken from parallel heap scan, and is SnapshotAny or an MVCC
1293 : : * snapshot, based on same criteria as serial case.
1294 : : */
1295 [ - + ]: 289 : Assert(!IsBootstrapProcessingMode());
1296 [ - + ]: 289 : Assert(allow_sync);
1297 : 289 : snapshot = scan->rs_snapshot;
1298 : : }
1299 : :
1300 : 29343 : hscan = (HeapScanDesc) scan;
1301 : :
1302 : : /*
1303 : : * Must have called GetOldestNonRemovableTransactionId() if using
1304 : : * SnapshotAny. Shouldn't have for an MVCC snapshot. (It's especially
1305 : : * worth checking this for parallel builds, since ambuild routines that
1306 : : * support parallel builds must work these details out for themselves.)
1307 : : */
2041 1308 [ + + - + : 29343 : Assert(snapshot == SnapshotAny || IsMVCCSnapshot(snapshot));
- - ]
1309 [ + + - + ]: 29343 : Assert(snapshot == SnapshotAny ? TransactionIdIsValid(OldestXmin) :
1310 : : !TransactionIdIsValid(OldestXmin));
1311 [ + + - + ]: 29343 : Assert(snapshot == SnapshotAny || !anyvisible);
1312 : :
1313 : : /* Publish number of blocks to scan */
2539 alvherre@alvh.no-ip. 1314 [ + + ]: 29343 : if (progress)
1315 : : {
1316 : : BlockNumber nblocks;
1317 : :
1318 [ + + ]: 27683 : if (hscan->rs_base.rs_parallel != NULL)
1319 : : {
1320 : : ParallelBlockTableScanDesc pbscan;
1321 : :
1322 : 105 : pbscan = (ParallelBlockTableScanDesc) hscan->rs_base.rs_parallel;
1323 : 105 : nblocks = pbscan->phs_nblocks;
1324 : : }
1325 : : else
1326 : 27578 : nblocks = hscan->rs_nblocks;
1327 : :
1328 : 27683 : pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_TOTAL,
1329 : : nblocks);
1330 : : }
1331 : :
1332 : : /* set our scan endpoints */
2545 andres@anarazel.de 1333 [ + + ]: 29343 : if (!allow_sync)
1334 : 1849 : heap_setscanlimits(scan, start_blockno, numblocks);
1335 : : else
1336 : : {
1337 : : /* syncscan can only be requested on whole relation */
1338 [ - + ]: 27494 : Assert(start_blockno == 0);
1339 [ - + ]: 27494 : Assert(numblocks == InvalidBlockNumber);
1340 : : }
1341 : :
1342 : 29343 : reltuples = 0;
1343 : :
1344 : : /*
1345 : : * Scan all tuples in the base relation.
1346 : : */
1347 [ + + ]: 9798208 : while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1348 : : {
1349 : : bool tupleIsAlive;
1350 : :
1351 [ + + ]: 9768871 : CHECK_FOR_INTERRUPTS();
1352 : :
1353 : : /* Report scan progress, if asked to. */
2539 alvherre@alvh.no-ip. 1354 [ + + ]: 9768871 : if (progress)
1355 : : {
2489 tgl@sss.pgh.pa.us 1356 : 7770139 : BlockNumber blocks_done = heapam_scan_get_blocks_done(hscan);
1357 : :
2539 alvherre@alvh.no-ip. 1358 [ + + ]: 7770139 : if (blocks_done != previous_blkno)
1359 : : {
1360 : 98359 : pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
1361 : : blocks_done);
1362 : 98359 : previous_blkno = blocks_done;
1363 : : }
1364 : : }
1365 : :
1366 : : /*
1367 : : * When dealing with a HOT-chain of updated tuples, we want to index
1368 : : * the values of the live tuple (if any), but index it under the TID
1369 : : * of the chain's root tuple. This approach is necessary to preserve
1370 : : * the HOT-chain structure in the heap. So we need to be able to find
1371 : : * the root item offset for every tuple that's in a HOT-chain. When
1372 : : * first reaching a new page of the relation, call
1373 : : * heap_get_root_tuples() to build a map of root item offsets on the
1374 : : * page.
1375 : : *
1376 : : * It might look unsafe to use this information across buffer
1377 : : * lock/unlock. However, we hold ShareLock on the table so no
1378 : : * ordinary insert/update/delete should occur; and we hold pin on the
1379 : : * buffer continuously while visiting the page, so no pruning
1380 : : * operation can occur either.
1381 : : *
1382 : : * In cases with only ShareUpdateExclusiveLock on the table, it's
1383 : : * possible for some HOT tuples to appear that we didn't know about
1384 : : * when we first read the page. To handle that case, we re-obtain the
1385 : : * list of root offsets when a HOT tuple points to a root item that we
1386 : : * don't know about.
1387 : : *
1388 : : * Also, although our opinions about tuple liveness could change while
1389 : : * we scan the page (due to concurrent transaction commits/aborts),
1390 : : * the chain root locations won't, so this info doesn't need to be
1391 : : * rebuilt after waiting for another transaction.
1392 : : *
1393 : : * Note the implied assumption that there is no more than one live
1394 : : * tuple per HOT-chain --- else we could create more than one index
1395 : : * entry pointing to the same root tuple.
1396 : : */
2545 andres@anarazel.de 1397 [ + + ]: 9768871 : if (hscan->rs_cblock != root_blkno)
1398 : : {
1399 : 115543 : Page page = BufferGetPage(hscan->rs_cbuf);
1400 : :
1401 : 115543 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
1402 : 115543 : heap_get_root_tuples(page, root_offsets);
1403 : 115543 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1404 : :
1405 : 115543 : root_blkno = hscan->rs_cblock;
1406 : : }
1407 : :
1408 [ + + ]: 9768871 : if (snapshot == SnapshotAny)
1409 : : {
1410 : : /* do our own time qual check */
1411 : : bool indexIt;
1412 : : TransactionId xwait;
1413 : :
1414 : 7368371 : recheck:
1415 : :
1416 : : /*
1417 : : * We could possibly get away with not locking the buffer here,
1418 : : * since caller should hold ShareLock on the relation, but let's
1419 : : * be conservative about it. (This remark is still correct even
1420 : : * with HOT-pruning: our pin on the buffer prevents pruning.)
1421 : : */
1422 : 7368371 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
1423 : :
1424 : : /*
1425 : : * The criteria for counting a tuple as live in this block need to
1426 : : * match what analyze.c's heapam_scan_analyze_next_tuple() does,
1427 : : * otherwise CREATE INDEX and ANALYZE may produce wildly different
1428 : : * reltuples values, e.g. when there are many recently-dead
1429 : : * tuples.
1430 : : */
1431 [ + + + + : 7368371 : switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
+ - ]
1432 : : hscan->rs_cbuf))
1433 : : {
1434 : 1090 : case HEAPTUPLE_DEAD:
1435 : : /* Definitely dead, we can ignore it */
1436 : 1090 : indexIt = false;
1437 : 1090 : tupleIsAlive = false;
1438 : 1090 : break;
1439 : 5477113 : case HEAPTUPLE_LIVE:
1440 : : /* Normal case, index and unique-check it */
1441 : 5477113 : indexIt = true;
1442 : 5477113 : tupleIsAlive = true;
1443 : : /* Count it as live, too */
1444 : 5477113 : reltuples += 1;
1445 : 5477113 : break;
1446 : 116315 : case HEAPTUPLE_RECENTLY_DEAD:
1447 : :
1448 : : /*
1449 : : * If tuple is recently deleted then we must index it
1450 : : * anyway to preserve MVCC semantics. (Pre-existing
1451 : : * transactions could try to use the index after we finish
1452 : : * building it, and may need to see such tuples.)
1453 : : *
1454 : : * However, if it was HOT-updated then we must only index
1455 : : * the live tuple at the end of the HOT-chain. Since this
1456 : : * breaks semantics for pre-existing snapshots, mark the
1457 : : * index as unusable for them.
1458 : : *
1459 : : * We don't count recently-dead tuples in reltuples, even
1460 : : * if we index them; see heapam_scan_analyze_next_tuple().
1461 : : */
1462 [ + + ]: 116315 : if (HeapTupleIsHotUpdated(heapTuple))
1463 : : {
1464 : 107 : indexIt = false;
1465 : : /* mark the index as unsafe for old snapshots */
1466 : 107 : indexInfo->ii_BrokenHotChain = true;
1467 : : }
1468 : : else
1469 : 116208 : indexIt = true;
1470 : : /* In any case, exclude the tuple from unique-checking */
1471 : 116315 : tupleIsAlive = false;
1472 : 116315 : break;
1473 : 1773766 : case HEAPTUPLE_INSERT_IN_PROGRESS:
1474 : :
1475 : : /*
1476 : : * In "anyvisible" mode, this tuple is visible and we
1477 : : * don't need any further checks.
1478 : : */
1479 [ + + ]: 1773766 : if (anyvisible)
1480 : : {
1481 : 30736 : indexIt = true;
1482 : 30736 : tupleIsAlive = true;
1483 : 30736 : reltuples += 1;
1484 : 30736 : break;
1485 : : }
1486 : :
1487 : : /*
1488 : : * Since caller should hold ShareLock or better, normally
1489 : : * the only way to see this is if it was inserted earlier
1490 : : * in our own transaction. However, it can happen in
1491 : : * system catalogs, since we tend to release write lock
1492 : : * before commit there. Give a warning if neither case
1493 : : * applies.
1494 : : */
1495 : 1743030 : xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
1496 [ + + ]: 1743030 : if (!TransactionIdIsCurrentTransactionId(xwait))
1497 : : {
1498 [ - + ]: 207 : if (!is_system_catalog)
2545 andres@anarazel.de 1499 [ # # ]:UBC 0 : elog(WARNING, "concurrent insert in progress within table \"%s\"",
1500 : : RelationGetRelationName(heapRelation));
1501 : :
1502 : : /*
1503 : : * If we are performing uniqueness checks, indexing
1504 : : * such a tuple could lead to a bogus uniqueness
1505 : : * failure. In that case we wait for the inserting
1506 : : * transaction to finish and check again.
1507 : : */
2545 andres@anarazel.de 1508 [ - + ]:CBC 207 : if (checking_uniqueness)
1509 : : {
1510 : : /*
1511 : : * Must drop the lock on the buffer before we wait
1512 : : */
2545 andres@anarazel.de 1513 :UBC 0 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1514 : 0 : XactLockTableWait(xwait, heapRelation,
2545 andres@anarazel.de 1515 :UIC 0 : &heapTuple->t_self,
1516 : : XLTW_InsertIndexUnique);
2545 andres@anarazel.de 1517 [ # # ]:UBC 0 : CHECK_FOR_INTERRUPTS();
1518 : 0 : goto recheck;
1519 : : }
1520 : : }
1521 : : else
1522 : : {
1523 : : /*
1524 : : * For consistency with
1525 : : * heapam_scan_analyze_next_tuple(), count
1526 : : * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only
1527 : : * when inserted by our own transaction.
1528 : : */
2545 andres@anarazel.de 1529 :CBC 1742823 : reltuples += 1;
1530 : : }
1531 : :
1532 : : /*
1533 : : * We must index such tuples, since if the index build
1534 : : * commits then they're good.
1535 : : */
1536 : 1743030 : indexIt = true;
1537 : 1743030 : tupleIsAlive = true;
1538 : 1743030 : break;
1539 : 87 : case HEAPTUPLE_DELETE_IN_PROGRESS:
1540 : :
1541 : : /*
1542 : : * As with INSERT_IN_PROGRESS case, this is unexpected
1543 : : * unless it's our own deletion or a system catalog; but
1544 : : * in anyvisible mode, this tuple is visible.
1545 : : */
1546 [ - + ]: 87 : if (anyvisible)
1547 : : {
2545 andres@anarazel.de 1548 :UBC 0 : indexIt = true;
1549 : 0 : tupleIsAlive = false;
1550 : 0 : reltuples += 1;
1551 : 0 : break;
1552 : : }
1553 : :
2545 andres@anarazel.de 1554 :CBC 87 : xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
1555 [ + + ]: 87 : if (!TransactionIdIsCurrentTransactionId(xwait))
1556 : : {
1557 [ - + ]: 48 : if (!is_system_catalog)
2545 andres@anarazel.de 1558 [ # # ]:UBC 0 : elog(WARNING, "concurrent delete in progress within table \"%s\"",
1559 : : RelationGetRelationName(heapRelation));
1560 : :
1561 : : /*
1562 : : * If we are performing uniqueness checks, assuming
1563 : : * the tuple is dead could lead to missing a
1564 : : * uniqueness violation. In that case we wait for the
1565 : : * deleting transaction to finish and check again.
1566 : : *
1567 : : * Also, if it's a HOT-updated tuple, we should not
1568 : : * index it but rather the live tuple at the end of
1569 : : * the HOT-chain. However, the deleting transaction
1570 : : * could abort, possibly leaving this tuple as live
1571 : : * after all, in which case it has to be indexed. The
1572 : : * only way to know what to do is to wait for the
1573 : : * deleting transaction to finish and check again.
1574 : : */
2545 andres@anarazel.de 1575 [ + - - + ]:CBC 96 : if (checking_uniqueness ||
1576 : 48 : HeapTupleIsHotUpdated(heapTuple))
1577 : : {
1578 : : /*
1579 : : * Must drop the lock on the buffer before we wait
1580 : : */
2545 andres@anarazel.de 1581 :UBC 0 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1582 : 0 : XactLockTableWait(xwait, heapRelation,
2545 andres@anarazel.de 1583 :UIC 0 : &heapTuple->t_self,
1584 : : XLTW_InsertIndexUnique);
2545 andres@anarazel.de 1585 [ # # ]:UBC 0 : CHECK_FOR_INTERRUPTS();
1586 : 0 : goto recheck;
1587 : : }
1588 : :
1589 : : /*
1590 : : * Otherwise index it but don't check for uniqueness,
1591 : : * the same as a RECENTLY_DEAD tuple.
1592 : : */
2545 andres@anarazel.de 1593 :CBC 48 : indexIt = true;
1594 : :
1595 : : /*
1596 : : * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live,
1597 : : * if they were not deleted by the current
1598 : : * transaction. That's what
1599 : : * heapam_scan_analyze_next_tuple() does, and we want
1600 : : * the behavior to be consistent.
1601 : : */
1602 : 48 : reltuples += 1;
1603 : : }
1604 [ - + ]: 39 : else if (HeapTupleIsHotUpdated(heapTuple))
1605 : : {
1606 : : /*
1607 : : * It's a HOT-updated tuple deleted by our own xact.
1608 : : * We can assume the deletion will commit (else the
1609 : : * index contents don't matter), so treat the same as
1610 : : * RECENTLY_DEAD HOT-updated tuples.
1611 : : */
2545 andres@anarazel.de 1612 :UBC 0 : indexIt = false;
1613 : : /* mark the index as unsafe for old snapshots */
1614 : 0 : indexInfo->ii_BrokenHotChain = true;
1615 : : }
1616 : : else
1617 : : {
1618 : : /*
1619 : : * It's a regular tuple deleted by our own xact. Index
1620 : : * it, but don't check for uniqueness nor count in
1621 : : * reltuples, the same as a RECENTLY_DEAD tuple.
1622 : : */
2545 andres@anarazel.de 1623 :CBC 39 : indexIt = true;
1624 : : }
1625 : : /* In any case, exclude the tuple from unique-checking */
1626 : 87 : tupleIsAlive = false;
1627 : 87 : break;
2545 andres@anarazel.de 1628 :UBC 0 : default:
1629 [ # # ]: 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1630 : : indexIt = tupleIsAlive = false; /* keep compiler quiet */
1631 : : break;
1632 : : }
1633 : :
2545 andres@anarazel.de 1634 :CBC 7368371 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1635 : :
1636 [ + + ]: 7368371 : if (!indexIt)
1637 : 1197 : continue;
1638 : : }
1639 : : else
1640 : : {
1641 : : /* heap_getnext did the time qual check */
1642 : 2400500 : tupleIsAlive = true;
1643 : 2400500 : reltuples += 1;
1644 : : }
1645 : :
1646 : 9767674 : MemoryContextReset(econtext->ecxt_per_tuple_memory);
1647 : :
1648 : : /* Set up for predicate or expression evaluation */
1649 : 9767674 : ExecStoreBufferHeapTuple(heapTuple, slot, hscan->rs_cbuf);
1650 : :
1651 : : /*
1652 : : * In a partial index, discard tuples that don't satisfy the
1653 : : * predicate.
1654 : : */
1655 [ + + ]: 9767674 : if (predicate != NULL)
1656 : : {
1657 [ + + ]: 102285 : if (!ExecQual(predicate, econtext))
1658 : 54837 : continue;
1659 : : }
1660 : :
1661 : : /*
1662 : : * For the current heap tuple, extract all the attributes we use in
1663 : : * this index, and note which are null. This also performs evaluation
1664 : : * of any expressions needed.
1665 : : */
1666 : 9712837 : FormIndexDatum(indexInfo,
1667 : : slot,
1668 : : estate,
1669 : : values,
1670 : : isnull);
1671 : :
1672 : : /*
1673 : : * You'd think we should go ahead and build the index tuple here, but
1674 : : * some index AMs want to do further processing on the data first. So
1675 : : * pass the values[] and isnull[] arrays, instead.
1676 : : */
1677 : :
1678 [ + + ]: 9712831 : if (HeapTupleIsHeapOnly(heapTuple))
1679 : : {
1680 : : /*
1681 : : * For a heap-only tuple, pretend its TID is that of the root. See
1682 : : * src/backend/access/heap/README.HOT for discussion.
1683 : : */
1684 : : ItemPointerData tid;
1685 : : OffsetNumber offnum;
1686 : :
1687 : 3431 : offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
1688 : :
1689 : : /*
1690 : : * If a HOT tuple points to a root that we don't know about,
1691 : : * obtain root items afresh. If that still fails, report it as
1692 : : * corruption.
1693 : : */
2040 alvherre@alvh.no-ip. 1694 [ - + ]: 3431 : if (root_offsets[offnum - 1] == InvalidOffsetNumber)
1695 : : {
1768 tgl@sss.pgh.pa.us 1696 :UBC 0 : Page page = BufferGetPage(hscan->rs_cbuf);
1697 : :
2040 alvherre@alvh.no-ip. 1698 : 0 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
1699 : 0 : heap_get_root_tuples(page, root_offsets);
1700 : 0 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1701 : : }
1702 : :
2545 andres@anarazel.de 1703 [ + - + - :CBC 3431 : if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
- + ]
2545 andres@anarazel.de 1704 [ # # ]:UBC 0 : ereport(ERROR,
1705 : : (errcode(ERRCODE_DATA_CORRUPTED),
1706 : : errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1707 : : ItemPointerGetBlockNumber(&heapTuple->t_self),
1708 : : offnum,
1709 : : RelationGetRelationName(heapRelation))));
1710 : :
2319 andres@anarazel.de 1711 :CBC 3431 : ItemPointerSet(&tid, ItemPointerGetBlockNumber(&heapTuple->t_self),
1712 : 3431 : root_offsets[offnum - 1]);
1713 : :
1714 : : /* Call the AM's callback routine to process the tuple */
1715 : 3431 : callback(indexRelation, &tid, values, isnull, tupleIsAlive,
1716 : : callback_state);
1717 : : }
1718 : : else
1719 : : {
1720 : : /* Call the AM's callback routine to process the tuple */
1721 : 9709400 : callback(indexRelation, &heapTuple->t_self, values, isnull,
1722 : : tupleIsAlive, callback_state);
1723 : : }
1724 : : }
1725 : :
1726 : : /* Report scan progress one last time. */
2539 alvherre@alvh.no-ip. 1727 [ + + ]: 29337 : if (progress)
1728 : : {
1729 : : BlockNumber blks_done;
1730 : :
1731 [ + + ]: 27677 : if (hscan->rs_base.rs_parallel != NULL)
1732 : : {
1733 : : ParallelBlockTableScanDesc pbscan;
1734 : :
1735 : 105 : pbscan = (ParallelBlockTableScanDesc) hscan->rs_base.rs_parallel;
1736 : 105 : blks_done = pbscan->phs_nblocks;
1737 : : }
1738 : : else
1739 : 27572 : blks_done = hscan->rs_nblocks;
1740 : :
1741 : 27677 : pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
1742 : : blks_done);
1743 : : }
1744 : :
2545 andres@anarazel.de 1745 : 29337 : table_endscan(scan);
1746 : :
1747 : : /* we can now forget our snapshot, if set and registered by us */
1748 [ + + ]: 29337 : if (need_unregister_snapshot)
1749 : 8476 : UnregisterSnapshot(snapshot);
1750 : :
1751 : 29337 : ExecDropSingleTupleTableSlot(slot);
1752 : :
1753 : 29337 : FreeExecutorState(estate);
1754 : :
1755 : : /* These may have been pointing to the now-gone estate */
1756 : 29337 : indexInfo->ii_ExpressionsState = NIL;
1757 : 29337 : indexInfo->ii_PredicateState = NULL;
1758 : :
1759 : 29337 : return reltuples;
1760 : : }
1761 : :
1762 : : static void
1763 : 361 : heapam_index_validate_scan(Relation heapRelation,
1764 : : Relation indexRelation,
1765 : : IndexInfo *indexInfo,
1766 : : Snapshot snapshot,
1767 : : ValidateIndexState *state)
1768 : : {
1769 : : TableScanDesc scan;
1770 : : HeapScanDesc hscan;
1771 : : HeapTuple heapTuple;
1772 : : Datum values[INDEX_MAX_KEYS];
1773 : : bool isnull[INDEX_MAX_KEYS];
1774 : : ExprState *predicate;
1775 : : TupleTableSlot *slot;
1776 : : EState *estate;
1777 : : ExprContext *econtext;
1778 : 361 : BlockNumber root_blkno = InvalidBlockNumber;
1779 : : OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1780 : : bool in_index[MaxHeapTuplesPerPage];
2489 tgl@sss.pgh.pa.us 1781 : 361 : BlockNumber previous_blkno = InvalidBlockNumber;
1782 : :
1783 : : /* state variables for the merge */
2545 andres@anarazel.de 1784 : 361 : ItemPointer indexcursor = NULL;
1785 : : ItemPointerData decoded;
1786 : 361 : bool tuplesort_empty = false;
1787 : :
1788 : : /*
1789 : : * sanity checks
1790 : : */
1791 [ - + ]: 361 : Assert(OidIsValid(indexRelation->rd_rel->relam));
1792 : :
1793 : : /*
1794 : : * Need an EState for evaluation of index expressions and partial-index
1795 : : * predicates. Also a slot to hold the current tuple.
1796 : : */
1797 : 361 : estate = CreateExecutorState();
1798 [ - + ]: 361 : econtext = GetPerTupleExprContext(estate);
1799 : 361 : slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
1800 : : &TTSOpsHeapTuple);
1801 : :
1802 : : /* Arrange for econtext's scan tuple to be the tuple under test */
1803 : 361 : econtext->ecxt_scantuple = slot;
1804 : :
1805 : : /* Set up execution state for predicate, if any. */
1806 : 361 : predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1807 : :
1808 : : /*
1809 : : * Prepare for scan of the base relation. We need just those tuples
1810 : : * satisfying the passed-in reference snapshot. We must disable syncscan
1811 : : * here, because it's critical that we read from block zero forward to
1812 : : * match the sorted TIDs.
1813 : : */
1814 : 361 : scan = table_beginscan_strat(heapRelation, /* relation */
1815 : : snapshot, /* snapshot */
1816 : : 0, /* number of keys */
1817 : : NULL, /* scan key */
1818 : : true, /* buffer access strategy OK */
1819 : : false); /* syncscan not OK */
1820 : 361 : hscan = (HeapScanDesc) scan;
1821 : :
2539 alvherre@alvh.no-ip. 1822 : 361 : pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_TOTAL,
1823 : 361 : hscan->rs_nblocks);
1824 : :
1825 : : /*
1826 : : * Scan all tuples matching the snapshot.
1827 : : */
2545 andres@anarazel.de 1828 [ + + ]: 122718 : while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1829 : : {
1830 : 122357 : ItemPointer heapcursor = &heapTuple->t_self;
1831 : : ItemPointerData rootTuple;
1832 : : OffsetNumber root_offnum;
1833 : :
1834 [ - + ]: 122357 : CHECK_FOR_INTERRUPTS();
1835 : :
1836 : 122357 : state->htups += 1;
1837 : :
2539 alvherre@alvh.no-ip. 1838 [ + + ]: 122357 : if ((previous_blkno == InvalidBlockNumber) ||
1839 [ + + ]: 122144 : (hscan->rs_cblock != previous_blkno))
1840 : : {
1841 : 2476 : pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
1842 : 2476 : hscan->rs_cblock);
1843 : 2476 : previous_blkno = hscan->rs_cblock;
1844 : : }
1845 : :
1846 : : /*
1847 : : * As commented in table_index_build_scan, we should index heap-only
1848 : : * tuples under the TIDs of their root tuples; so when we advance onto
1849 : : * a new heap page, build a map of root item offsets on the page.
1850 : : *
1851 : : * This complicates merging against the tuplesort output: we will
1852 : : * visit the live tuples in order by their offsets, but the root
1853 : : * offsets that we need to compare against the index contents might be
1854 : : * ordered differently. So we might have to "look back" within the
1855 : : * tuplesort output, but only within the current page. We handle that
1856 : : * by keeping a bool array in_index[] showing all the
1857 : : * already-passed-over tuplesort output TIDs of the current page. We
1858 : : * clear that array here, when advancing onto a new heap page.
1859 : : */
2545 andres@anarazel.de 1860 [ + + ]: 122357 : if (hscan->rs_cblock != root_blkno)
1861 : : {
1862 : 2476 : Page page = BufferGetPage(hscan->rs_cbuf);
1863 : :
1864 : 2476 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
1865 : 2476 : heap_get_root_tuples(page, root_offsets);
1866 : 2476 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1867 : :
1868 : 2476 : memset(in_index, 0, sizeof(in_index));
1869 : :
1870 : 2476 : root_blkno = hscan->rs_cblock;
1871 : : }
1872 : :
1873 : : /* Convert actual tuple TID to root TID */
1874 : 122357 : rootTuple = *heapcursor;
1875 : 122357 : root_offnum = ItemPointerGetOffsetNumber(heapcursor);
1876 : :
1877 [ + + ]: 122357 : if (HeapTupleIsHeapOnly(heapTuple))
1878 : : {
1879 : 8 : root_offnum = root_offsets[root_offnum - 1];
1880 [ + - + - : 8 : if (!OffsetNumberIsValid(root_offnum))
- + ]
2545 andres@anarazel.de 1881 [ # # ]:UBC 0 : ereport(ERROR,
1882 : : (errcode(ERRCODE_DATA_CORRUPTED),
1883 : : errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1884 : : ItemPointerGetBlockNumber(heapcursor),
1885 : : ItemPointerGetOffsetNumber(heapcursor),
1886 : : RelationGetRelationName(heapRelation))));
2545 andres@anarazel.de 1887 :CBC 8 : ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
1888 : : }
1889 : :
1890 : : /*
1891 : : * "merge" by skipping through the index tuples until we find or pass
1892 : : * the current root tuple.
1893 : : */
1894 [ + + + + ]: 274110 : while (!tuplesort_empty &&
1895 [ + + ]: 273857 : (!indexcursor ||
1896 : 273857 : ItemPointerCompare(indexcursor, &rootTuple) < 0))
1897 : : {
1898 : : Datum ts_val;
1899 : : bool ts_isnull;
1900 : :
1901 [ + + ]: 151753 : if (indexcursor)
1902 : : {
1903 : : /*
1904 : : * Remember index items seen earlier on the current heap page
1905 : : */
1906 [ + + ]: 151540 : if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
1907 : 148353 : in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
1908 : : }
1909 : :
1910 : 151753 : tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
1911 : : false, &ts_val, &ts_isnull,
1234 drowley@postgresql.o 1912 : 151753 : NULL);
2545 andres@anarazel.de 1913 [ + + - + ]: 151753 : Assert(tuplesort_empty || !ts_isnull);
1914 [ + + ]: 151753 : if (!tuplesort_empty)
1915 : : {
1916 : 151737 : itemptr_decode(&decoded, DatumGetInt64(ts_val));
1917 : 151737 : indexcursor = &decoded;
1918 : : }
1919 : : else
1920 : : {
1921 : : /* Be tidy */
1922 : 16 : indexcursor = NULL;
1923 : : }
1924 : : }
1925 : :
1926 : : /*
1927 : : * If the tuplesort has overshot *and* we didn't see a match earlier,
1928 : : * then this tuple is missing from the index, so insert it.
1929 : : */
1930 [ + + + + ]: 244674 : if ((tuplesort_empty ||
1931 : 122317 : ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
1932 [ + + ]: 107 : !in_index[root_offnum - 1])
1933 : : {
1934 : 102 : MemoryContextReset(econtext->ecxt_per_tuple_memory);
1935 : :
1936 : : /* Set up for predicate or expression evaluation */
1937 : 102 : ExecStoreHeapTuple(heapTuple, slot, false);
1938 : :
1939 : : /*
1940 : : * In a partial index, discard tuples that don't satisfy the
1941 : : * predicate.
1942 : : */
1943 [ + + ]: 102 : if (predicate != NULL)
1944 : : {
1945 [ + - ]: 24 : if (!ExecQual(predicate, econtext))
1946 : 24 : continue;
1947 : : }
1948 : :
1949 : : /*
1950 : : * For the current heap tuple, extract all the attributes we use
1951 : : * in this index, and note which are null. This also performs
1952 : : * evaluation of any expressions needed.
1953 : : */
1954 : 78 : FormIndexDatum(indexInfo,
1955 : : slot,
1956 : : estate,
1957 : : values,
1958 : : isnull);
1959 : :
1960 : : /*
1961 : : * You'd think we should go ahead and build the index tuple here,
1962 : : * but some index AMs want to do further processing on the data
1963 : : * first. So pass the values[] and isnull[] arrays, instead.
1964 : : */
1965 : :
1966 : : /*
1967 : : * If the tuple is already committed dead, you might think we
1968 : : * could suppress uniqueness checking, but this is no longer true
1969 : : * in the presence of HOT, because the insert is actually a proxy
1970 : : * for a uniqueness check on the whole HOT-chain. That is, the
1971 : : * tuple we have here could be dead because it was already
1972 : : * HOT-updated, and if so the updating transaction will not have
1973 : : * thought it should insert index entries. The index AM will
1974 : : * check the whole HOT-chain and correctly detect a conflict if
1975 : : * there is one.
1976 : : */
1977 : :
1978 : 78 : index_insert(indexRelation,
1979 : : values,
1980 : : isnull,
1981 : : &rootTuple,
1982 : : heapRelation,
1983 : 78 : indexInfo->ii_Unique ?
1984 : : UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
1985 : : false,
1986 : : indexInfo);
1987 : :
1988 : 78 : state->tups_inserted += 1;
1989 : : }
1990 : : }
1991 : :
1992 : 361 : table_endscan(scan);
1993 : :
1994 : 361 : ExecDropSingleTupleTableSlot(slot);
1995 : :
1996 : 361 : FreeExecutorState(estate);
1997 : :
1998 : : /* These may have been pointing to the now-gone estate */
1999 : 361 : indexInfo->ii_ExpressionsState = NIL;
2000 : 361 : indexInfo->ii_PredicateState = NULL;
2001 : 361 : }
2002 : :
2003 : : /*
2004 : : * Return the number of blocks that have been read by this scan since
2005 : : * starting. This is meant for progress reporting rather than be fully
2006 : : * accurate: in a parallel scan, workers can be concurrently reading blocks
2007 : : * further ahead than what we report.
2008 : : */
2009 : : static BlockNumber
2539 alvherre@alvh.no-ip. 2010 : 7770139 : heapam_scan_get_blocks_done(HeapScanDesc hscan)
2011 : : {
2012 : 7770139 : ParallelBlockTableScanDesc bpscan = NULL;
2013 : : BlockNumber startblock;
2014 : : BlockNumber blocks_done;
2015 : :
2016 [ + + ]: 7770139 : if (hscan->rs_base.rs_parallel != NULL)
2017 : : {
2018 : 994492 : bpscan = (ParallelBlockTableScanDesc) hscan->rs_base.rs_parallel;
2019 : 994492 : startblock = bpscan->phs_startblock;
2020 : : }
2021 : : else
2022 : 6775647 : startblock = hscan->rs_startblock;
2023 : :
2024 : : /*
2025 : : * Might have wrapped around the end of the relation, if startblock was
2026 : : * not zero.
2027 : : */
2028 [ + + ]: 7770139 : if (hscan->rs_cblock > startblock)
2029 : 7480148 : blocks_done = hscan->rs_cblock - startblock;
2030 : : else
2031 : : {
2032 : : BlockNumber nblocks;
2033 : :
2034 [ + + ]: 289991 : nblocks = bpscan != NULL ? bpscan->phs_nblocks : hscan->rs_nblocks;
2035 : 289991 : blocks_done = nblocks - startblock +
2036 : 289991 : hscan->rs_cblock;
2037 : : }
2038 : :
2039 : 7770139 : return blocks_done;
2040 : : }
2041 : :
2042 : :
2043 : : /* ------------------------------------------------------------------------
2044 : : * Miscellaneous callbacks for the heap AM
2045 : : * ------------------------------------------------------------------------
2046 : : */
2047 : :
2048 : : /*
2049 : : * Check to see whether the table needs a TOAST table. It does only if
2050 : : * (1) there are any toastable attributes, and (2) the maximum length
2051 : : * of a tuple could exceed TOAST_TUPLE_THRESHOLD. (We don't want to
2052 : : * create a toast table for something like "f1 varchar(20)".)
2053 : : */
2054 : : static bool
2490 rhaas@postgresql.org 2055 : 23527 : heapam_relation_needs_toast_table(Relation rel)
2056 : : {
2057 : 23527 : int32 data_length = 0;
2058 : 23527 : bool maxlength_unknown = false;
2059 : 23527 : bool has_toastable_attrs = false;
2060 : 23527 : TupleDesc tupdesc = rel->rd_att;
2061 : : int32 tuple_length;
2062 : : int i;
2063 : :
2064 [ + + ]: 93944 : for (i = 0; i < tupdesc->natts; i++)
2065 : : {
2066 : 70417 : Form_pg_attribute att = TupleDescAttr(tupdesc, i);
2067 : :
2068 [ + + ]: 70417 : if (att->attisdropped)
2069 : 600 : continue;
401 peter@eisentraut.org 2070 [ + + ]: 69817 : if (att->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
2071 : 478 : continue;
2490 rhaas@postgresql.org 2072 [ + + + + : 69339 : data_length = att_align_nominal(data_length, att->attalign);
+ + - + ]
2073 [ + + ]: 69339 : if (att->attlen > 0)
2074 : : {
2075 : : /* Fixed-length types are never toastable */
2076 : 51901 : data_length += att->attlen;
2077 : : }
2078 : : else
2079 : : {
2080 : 17438 : int32 maxlen = type_maximum_size(att->atttypid,
2081 : : att->atttypmod);
2082 : :
2083 [ + + ]: 17438 : if (maxlen < 0)
2084 : 15886 : maxlength_unknown = true;
2085 : : else
2086 : 1552 : data_length += maxlen;
2202 tgl@sss.pgh.pa.us 2087 [ + + ]: 17438 : if (att->attstorage != TYPSTORAGE_PLAIN)
2490 rhaas@postgresql.org 2088 : 16817 : has_toastable_attrs = true;
2089 : : }
2090 : : }
2091 [ + + ]: 23527 : if (!has_toastable_attrs)
2092 : 13612 : return false; /* nothing to toast? */
2093 [ + + ]: 9915 : if (maxlength_unknown)
2094 : 8755 : return true; /* any unlimited-length attrs? */
2095 : 1160 : tuple_length = MAXALIGN(SizeofHeapTupleHeader +
2096 : 1160 : BITMAPLEN(tupdesc->natts)) +
2097 : 1160 : MAXALIGN(data_length);
2098 : 1160 : return (tuple_length > TOAST_TUPLE_THRESHOLD);
2099 : : }
2100 : :
2101 : : /*
2102 : : * TOAST tables for heap relations are just heap relations.
2103 : : */
2104 : : static Oid
2259 2105 : 9035 : heapam_relation_toast_am(Relation rel)
2106 : : {
2107 : 9035 : return rel->rd_rel->relam;
2108 : : }
2109 : :
2110 : :
2111 : : /* ------------------------------------------------------------------------
2112 : : * Planner related callbacks for the heap AM
2113 : : * ------------------------------------------------------------------------
2114 : : */
2115 : :
2116 : : #define HEAP_OVERHEAD_BYTES_PER_TUPLE \
2117 : : (MAXALIGN(SizeofHeapTupleHeader) + sizeof(ItemIdData))
2118 : : #define HEAP_USABLE_BYTES_PER_PAGE \
2119 : : (BLCKSZ - SizeOfPageHeaderData)
2120 : :
2121 : : static void
2542 andres@anarazel.de 2122 : 253279 : heapam_estimate_rel_size(Relation rel, int32 *attr_widths,
2123 : : BlockNumber *pages, double *tuples,
2124 : : double *allvisfrac)
2125 : : {
2442 rhaas@postgresql.org 2126 : 253279 : table_block_relation_estimate_size(rel, attr_widths, pages,
2127 : : tuples, allvisfrac,
2128 : : HEAP_OVERHEAD_BYTES_PER_TUPLE,
2129 : : HEAP_USABLE_BYTES_PER_PAGE);
2542 andres@anarazel.de 2130 : 253279 : }
2131 : :
2132 : :
2133 : : /* ------------------------------------------------------------------------
2134 : : * Executor related callbacks for the heap AM
2135 : : * ------------------------------------------------------------------------
2136 : : */
2137 : :
2138 : : static bool
365 melanieplageman@gmai 2139 : 3453272 : heapam_scan_bitmap_next_tuple(TableScanDesc scan,
2140 : : TupleTableSlot *slot,
2141 : : bool *recheck,
2142 : : uint64 *lossy_pages,
2143 : : uint64 *exact_pages)
2144 : : {
423 2145 : 3453272 : BitmapHeapScanDesc bscan = (BitmapHeapScanDesc) scan;
2146 : 3453272 : HeapScanDesc hscan = (HeapScanDesc) bscan;
2147 : : OffsetNumber targoffset;
2148 : : Page page;
2149 : : ItemId lp;
2150 : :
2151 : : /*
2152 : : * Out of range? If so, nothing more to look at on this page
2153 : : */
365 2154 [ + + ]: 3655907 : while (hscan->rs_cindex >= hscan->rs_ntuples)
2155 : : {
2156 : : /*
2157 : : * Returns false if the bitmap is exhausted and there are no further
2158 : : * blocks we need to scan.
2159 : : */
2160 [ + + ]: 215218 : if (!BitmapHeapScanNextBlock(scan, recheck, lossy_pages, exact_pages))
2161 : 12580 : return false;
2162 : : }
2163 : :
2541 andres@anarazel.de 2164 : 3440689 : targoffset = hscan->rs_vistuples[hscan->rs_cindex];
1215 peter@eisentraut.org 2165 : 3440689 : page = BufferGetPage(hscan->rs_cbuf);
2166 : 3440689 : lp = PageGetItemId(page, targoffset);
2541 andres@anarazel.de 2167 [ - + ]: 3440689 : Assert(ItemIdIsNormal(lp));
2168 : :
1215 peter@eisentraut.org 2169 : 3440689 : hscan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2541 andres@anarazel.de 2170 : 3440689 : hscan->rs_ctup.t_len = ItemIdGetLength(lp);
2171 : 3440689 : hscan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
2172 : 3440689 : ItemPointerSet(&hscan->rs_ctup.t_self, hscan->rs_cblock, targoffset);
2173 : :
2174 [ - + - - : 3440689 : pgstat_count_heap_fetch(scan->rs_rd);
+ - ]
2175 : :
2176 : : /*
2177 : : * Set up the result slot to point to this tuple. Note that the slot
2178 : : * acquires a pin on the buffer.
2179 : : */
2180 : 3440689 : ExecStoreBufferHeapTuple(&hscan->rs_ctup,
2181 : : slot,
2182 : : hscan->rs_cbuf);
2183 : :
2184 : 3440689 : hscan->rs_cindex++;
2185 : :
2186 : 3440689 : return true;
2187 : : }
2188 : :
2189 : : static bool
2542 2190 : 6455 : heapam_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate)
2191 : : {
2192 : 6455 : HeapScanDesc hscan = (HeapScanDesc) scan;
2193 : 6455 : TsmRoutine *tsm = scanstate->tsmroutine;
2194 : : BlockNumber blockno;
2195 : :
2196 : : /* return false immediately if relation is empty */
2197 [ - + ]: 6455 : if (hscan->rs_nblocks == 0)
2542 andres@anarazel.de 2198 :UBC 0 : return false;
2199 : :
2200 : : /* release previous scan buffer, if any */
710 drowley@postgresql.o 2201 [ + + ]:CBC 6455 : if (BufferIsValid(hscan->rs_cbuf))
2202 : : {
2203 : 6367 : ReleaseBuffer(hscan->rs_cbuf);
2204 : 6367 : hscan->rs_cbuf = InvalidBuffer;
2205 : : }
2206 : :
2207 [ + + ]: 6455 : if (tsm->NextSampleBlock)
2208 : 2222 : blockno = tsm->NextSampleBlock(scanstate, hscan->rs_nblocks);
2209 : : else
2210 : : {
2211 : : /* scanning table sequentially */
2212 : :
2542 andres@anarazel.de 2213 [ + + ]: 4233 : if (hscan->rs_cblock == InvalidBlockNumber)
2214 : : {
2215 [ - + ]: 39 : Assert(!hscan->rs_inited);
2216 : 39 : blockno = hscan->rs_startblock;
2217 : : }
2218 : : else
2219 : : {
2220 [ - + ]: 4194 : Assert(hscan->rs_inited);
2221 : :
2222 : 4194 : blockno = hscan->rs_cblock + 1;
2223 : :
2224 [ + + ]: 4194 : if (blockno >= hscan->rs_nblocks)
2225 : : {
2226 : : /* wrap to beginning of rel, might not have started at 0 */
2227 : 39 : blockno = 0;
2228 : : }
2229 : :
2230 : : /*
2231 : : * Report our new scan position for synchronization purposes.
2232 : : *
2233 : : * Note: we do this before checking for end of scan so that the
2234 : : * final state of the position hint is back at the start of the
2235 : : * rel. That's not strictly necessary, but otherwise when you run
2236 : : * the same query multiple times the starting position would shift
2237 : : * a little bit backwards on every invocation, which is confusing.
2238 : : * We don't guarantee any specific ordering in general, though.
2239 : : */
2492 2240 [ - + ]: 4194 : if (scan->rs_flags & SO_ALLOW_SYNC)
2542 andres@anarazel.de 2241 :UBC 0 : ss_report_location(scan->rs_rd, blockno);
2242 : :
2542 andres@anarazel.de 2243 [ + + ]:CBC 4194 : if (blockno == hscan->rs_startblock)
2244 : : {
2245 : 39 : blockno = InvalidBlockNumber;
2246 : : }
2247 : : }
2248 : : }
2249 : :
710 drowley@postgresql.o 2250 : 6455 : hscan->rs_cblock = blockno;
2251 : :
2542 andres@anarazel.de 2252 [ + + ]: 6455 : if (!BlockNumberIsValid(blockno))
2253 : : {
2254 : 85 : hscan->rs_inited = false;
2255 : 85 : return false;
2256 : : }
2257 : :
710 drowley@postgresql.o 2258 [ - + ]: 6370 : Assert(hscan->rs_cblock < hscan->rs_nblocks);
2259 : :
2260 : : /*
2261 : : * Be sure to check for interrupts at least once per page. Checks at
2262 : : * higher code levels won't be able to stop a sample scan that encounters
2263 : : * many pages' worth of consecutive dead tuples.
2264 : : */
2265 [ - + ]: 6370 : CHECK_FOR_INTERRUPTS();
2266 : :
2267 : : /* Read page using selected strategy */
2268 : 6370 : hscan->rs_cbuf = ReadBufferExtended(hscan->rs_base.rs_rd, MAIN_FORKNUM,
2269 : : blockno, RBM_NORMAL, hscan->rs_strategy);
2270 : :
2271 : : /* in pagemode, prune the page and determine visible tuple offsets */
2272 [ + + ]: 6370 : if (hscan->rs_base.rs_flags & SO_ALLOW_PAGEMODE)
2273 : 4276 : heap_prepare_pagescan(scan);
2274 : :
2275 : 6370 : hscan->rs_inited = true;
2542 andres@anarazel.de 2276 : 6370 : return true;
2277 : : }
2278 : :
2279 : : static bool
2280 : 126947 : heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate,
2281 : : TupleTableSlot *slot)
2282 : : {
2283 : 126947 : HeapScanDesc hscan = (HeapScanDesc) scan;
2284 : 126947 : TsmRoutine *tsm = scanstate->tsmroutine;
2285 : 126947 : BlockNumber blockno = hscan->rs_cblock;
2492 2286 : 126947 : bool pagemode = (scan->rs_flags & SO_ALLOW_PAGEMODE) != 0;
2287 : :
2288 : : Page page;
2289 : : bool all_visible;
2290 : : OffsetNumber maxoffset;
2291 : :
2292 : : /*
2293 : : * When not using pagemode, we must lock the buffer during tuple
2294 : : * visibility checks.
2295 : : */
2542 2296 [ + + ]: 126947 : if (!pagemode)
2297 : 2097 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
2298 : :
198 peter@eisentraut.org 2299 :GNC 126947 : page = BufferGetPage(hscan->rs_cbuf);
2542 andres@anarazel.de 2300 [ + + ]:CBC 253343 : all_visible = PageIsAllVisible(page) &&
2301 [ + - ]: 126396 : !scan->rs_snapshot->takenDuringRecovery;
2302 : 126947 : maxoffset = PageGetMaxOffsetNumber(page);
2303 : :
2304 : : for (;;)
2542 andres@anarazel.de 2305 :UBC 0 : {
2306 : : OffsetNumber tupoffset;
2307 : :
2542 andres@anarazel.de 2308 [ - + ]:CBC 126947 : CHECK_FOR_INTERRUPTS();
2309 : :
2310 : : /* Ask the tablesample method which tuples to check on this page. */
2311 : 126947 : tupoffset = tsm->NextSampleTuple(scanstate,
2312 : : blockno,
2313 : : maxoffset);
2314 : :
2315 [ + + + - : 126947 : if (OffsetNumberIsValid(tupoffset))
+ + ]
2316 : : {
2317 : : ItemId itemid;
2318 : : bool visible;
2319 : 120580 : HeapTuple tuple = &(hscan->rs_ctup);
2320 : :
2321 : : /* Skip invalid tuple pointers. */
2322 : 120580 : itemid = PageGetItemId(page, tupoffset);
2323 [ - + ]: 120580 : if (!ItemIdIsNormal(itemid))
2542 andres@anarazel.de 2324 :UBC 0 : continue;
2325 : :
2542 andres@anarazel.de 2326 :CBC 120580 : tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2327 : 120580 : tuple->t_len = ItemIdGetLength(itemid);
2328 : 120580 : ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
2329 : :
2330 : :
2331 [ + + ]: 120580 : if (all_visible)
2332 : 120174 : visible = true;
2333 : : else
2334 : 406 : visible = SampleHeapTupleVisible(scan, hscan->rs_cbuf,
2335 : : tuple, tupoffset);
2336 : :
2337 : : /* in pagemode, heap_prepare_pagescan did this for us */
2338 [ + + ]: 120580 : if (!pagemode)
2238 tmunro@postgresql.or 2339 : 3 : HeapCheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
2340 : : hscan->rs_cbuf, scan->rs_snapshot);
2341 : :
2342 : : /* Try next tuple from same page. */
2542 andres@anarazel.de 2343 [ - + ]: 120580 : if (!visible)
2542 andres@anarazel.de 2344 :UBC 0 : continue;
2345 : :
2346 : : /* Found visible tuple, return it. */
2542 andres@anarazel.de 2347 [ + + ]:CBC 120580 : if (!pagemode)
2348 : 3 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2349 : :
2350 : 120580 : ExecStoreBufferHeapTuple(tuple, slot, hscan->rs_cbuf);
2351 : :
2352 : : /* Count successfully-fetched tuples as heap fetches */
2353 [ - + - - : 120580 : pgstat_count_heap_getnext(scan->rs_rd);
+ - ]
2354 : :
2355 : 120580 : return true;
2356 : : }
2357 : : else
2358 : : {
2359 : : /*
2360 : : * If we get here, it means we've exhausted the items on this page
2361 : : * and it's time to move to the next.
2362 : : */
2363 [ + + ]: 6367 : if (!pagemode)
2364 : 2094 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2365 : :
2366 : 6367 : ExecClearTuple(slot);
2367 : 6367 : return false;
2368 : : }
2369 : : }
2370 : :
2371 : : Assert(0);
2372 : : }
2373 : :
2374 : :
2375 : : /* ----------------------------------------------------------------------------
2376 : : * Helper functions for the above.
2377 : : * ----------------------------------------------------------------------------
2378 : : */
2379 : :
2380 : : /*
2381 : : * Reconstruct and rewrite the given tuple
2382 : : *
2383 : : * We cannot simply copy the tuple as-is, for several reasons:
2384 : : *
2385 : : * 1. We'd like to squeeze out the values of any dropped columns, both
2386 : : * to save space and to ensure we have no corner-case failures. (It's
2387 : : * possible for example that the new table hasn't got a TOAST table
2388 : : * and so is unable to store any large values of dropped cols.)
2389 : : *
2390 : : * 2. The tuple might not even be legal for the new table; this is
2391 : : * currently only known to happen as an after-effect of ALTER TABLE
2392 : : * SET WITHOUT OIDS.
2393 : : *
2394 : : * So, we must reconstruct the tuple from component Datums.
2395 : : */
2396 : : static void
2397 : 361464 : reform_and_rewrite_tuple(HeapTuple tuple,
2398 : : Relation OldHeap, Relation NewHeap,
2399 : : Datum *values, bool *isnull, RewriteState rwstate)
2400 : : {
2401 : 361464 : TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
2402 : 361464 : TupleDesc newTupDesc = RelationGetDescr(NewHeap);
2403 : : HeapTuple copiedTuple;
2404 : : int i;
2405 : :
2406 : 361464 : heap_deform_tuple(tuple, oldTupDesc, values, isnull);
2407 : :
2408 : : /* Be sure to null out any dropped columns */
2409 [ + + ]: 3197381 : for (i = 0; i < newTupDesc->natts; i++)
2410 : : {
450 drowley@postgresql.o 2411 [ - + ]: 2835917 : if (TupleDescCompactAttr(newTupDesc, i)->attisdropped)
2542 andres@anarazel.de 2412 :UBC 0 : isnull[i] = true;
2413 : : }
2414 : :
2542 andres@anarazel.de 2415 :CBC 361464 : copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
2416 : :
2417 : : /* The heap rewrite module does the rest */
2418 : 361464 : rewrite_heap_tuple(rwstate, tuple, copiedTuple);
2419 : :
2420 : 361464 : heap_freetuple(copiedTuple);
2421 : 361464 : }
2422 : :
2423 : : /*
2424 : : * Check visibility of the tuple.
2425 : : */
2426 : : static bool
2427 : 406 : SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
2428 : : HeapTuple tuple,
2429 : : OffsetNumber tupoffset)
2430 : : {
2431 : 406 : HeapScanDesc hscan = (HeapScanDesc) scan;
2432 : :
2492 2433 [ + + ]: 406 : if (scan->rs_flags & SO_ALLOW_PAGEMODE)
2434 : : {
450 melanieplageman@gmai 2435 : 403 : uint32 start = 0,
2436 : 403 : end = hscan->rs_ntuples;
2437 : :
2438 : : /*
2439 : : * In pageatatime mode, heap_prepare_pagescan() already did visibility
2440 : : * checks, so just look at the info it left in rs_vistuples[].
2441 : : *
2442 : : * We use a binary search over the known-sorted array. Note: we could
2443 : : * save some effort if we insisted that NextSampleTuple select tuples
2444 : : * in increasing order, but it's not clear that there would be enough
2445 : : * gain to justify the restriction.
2446 : : */
2447 [ + - ]: 776 : while (start < end)
2448 : : {
2449 : 776 : uint32 mid = start + (end - start) / 2;
2542 andres@anarazel.de 2450 : 776 : OffsetNumber curoffset = hscan->rs_vistuples[mid];
2451 : :
2452 [ + + ]: 776 : if (tupoffset == curoffset)
2453 : 403 : return true;
2454 [ + + ]: 373 : else if (tupoffset < curoffset)
450 melanieplageman@gmai 2455 : 219 : end = mid;
2456 : : else
2542 andres@anarazel.de 2457 : 154 : start = mid + 1;
2458 : : }
2459 : :
2542 andres@anarazel.de 2460 :UBC 0 : return false;
2461 : : }
2462 : : else
2463 : : {
2464 : : /* Otherwise, we have to check the tuple individually. */
2542 andres@anarazel.de 2465 :CBC 3 : return HeapTupleSatisfiesVisibility(tuple, scan->rs_snapshot,
2466 : : buffer);
2467 : : }
2468 : : }
2469 : :
2470 : : /*
2471 : : * Helper function get the next block of a bitmap heap scan. Returns true when
2472 : : * it got the next block and saved it in the scan descriptor and false when
2473 : : * the bitmap and or relation are exhausted.
2474 : : */
2475 : : static bool
365 melanieplageman@gmai 2476 : 215218 : BitmapHeapScanNextBlock(TableScanDesc scan,
2477 : : bool *recheck,
2478 : : uint64 *lossy_pages, uint64 *exact_pages)
2479 : : {
2480 : 215218 : BitmapHeapScanDesc bscan = (BitmapHeapScanDesc) scan;
2481 : 215218 : HeapScanDesc hscan = (HeapScanDesc) bscan;
2482 : : BlockNumber block;
2483 : : void *per_buffer_data;
2484 : : Buffer buffer;
2485 : : Snapshot snapshot;
2486 : : int ntup;
2487 : : TBMIterateResult *tbmres;
2488 : : OffsetNumber offsets[TBM_MAX_TUPLES_PER_PAGE];
2489 : 215218 : int noffsets = -1;
2490 : :
2491 [ - + ]: 215218 : Assert(scan->rs_flags & SO_TYPE_BITMAPSCAN);
2492 [ - + ]: 215218 : Assert(hscan->rs_read_stream);
2493 : :
2494 : 215218 : hscan->rs_cindex = 0;
2495 : 215218 : hscan->rs_ntuples = 0;
2496 : :
2497 : : /* Release buffer containing previous block. */
2498 [ + + ]: 215218 : if (BufferIsValid(hscan->rs_cbuf))
2499 : : {
2500 : 202441 : ReleaseBuffer(hscan->rs_cbuf);
2501 : 202441 : hscan->rs_cbuf = InvalidBuffer;
2502 : : }
2503 : :
2504 : 215218 : hscan->rs_cbuf = read_stream_next_buffer(hscan->rs_read_stream,
2505 : : &per_buffer_data);
2506 : :
2507 [ + + ]: 215218 : if (BufferIsInvalid(hscan->rs_cbuf))
2508 : : {
2509 : : /* the bitmap is exhausted */
347 andres@anarazel.de 2510 : 12580 : return false;
2511 : : }
2512 : :
365 melanieplageman@gmai 2513 [ - + ]: 202638 : Assert(per_buffer_data);
2514 : :
2515 : 202638 : tbmres = per_buffer_data;
2516 : :
2517 [ - + ]: 202638 : Assert(BlockNumberIsValid(tbmres->blockno));
2518 [ - + ]: 202638 : Assert(BufferGetBlockNumber(hscan->rs_cbuf) == tbmres->blockno);
2519 : :
2520 : : /* Exact pages need their tuple offsets extracted. */
2521 [ + + ]: 202638 : if (!tbmres->lossy)
2522 : 122448 : noffsets = tbm_extract_page_tuple(tbmres, offsets,
2523 : : TBM_MAX_TUPLES_PER_PAGE);
2524 : :
2525 : 202638 : *recheck = tbmres->recheck;
2526 : :
2527 : 202638 : block = hscan->rs_cblock = tbmres->blockno;
2528 : 202638 : buffer = hscan->rs_cbuf;
2529 : 202638 : snapshot = scan->rs_snapshot;
2530 : :
2531 : 202638 : ntup = 0;
2532 : :
2533 : : /*
2534 : : * Prune and repair fragmentation for the whole page, if possible.
2535 : : */
2536 : 202638 : heap_page_prune_opt(scan->rs_rd, buffer);
2537 : :
2538 : : /*
2539 : : * We must hold share lock on the buffer content while examining tuple
2540 : : * visibility. Afterwards, however, the tuples we have found to be
2541 : : * visible are guaranteed good as long as we hold the buffer pin.
2542 : : */
2543 : 202638 : LockBuffer(buffer, BUFFER_LOCK_SHARE);
2544 : :
2545 : : /*
2546 : : * We need two separate strategies for lossy and non-lossy cases.
2547 : : */
2548 [ + + ]: 202638 : if (!tbmres->lossy)
2549 : : {
2550 : : /*
2551 : : * Bitmap is non-lossy, so we just look through the offsets listed in
2552 : : * tbmres; but we have to follow any HOT chain starting at each such
2553 : : * offset.
2554 : : */
2555 : : int curslot;
2556 : :
2557 : : /* We must have extracted the tuple offsets by now */
2558 [ - + ]: 122448 : Assert(noffsets > -1);
2559 : :
2560 [ + + ]: 3085687 : for (curslot = 0; curslot < noffsets; curslot++)
2561 : : {
2562 : 2963242 : OffsetNumber offnum = offsets[curslot];
2563 : : ItemPointerData tid;
2564 : : HeapTupleData heapTuple;
2565 : :
2566 : 2963242 : ItemPointerSet(&tid, block, offnum);
2567 [ + + ]: 2963242 : if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
2568 : : &heapTuple, NULL, true))
2569 : 2830405 : hscan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
2570 : : }
2571 : : }
2572 : : else
2573 : : {
2574 : : /*
2575 : : * Bitmap is lossy, so we must examine each line pointer on the page.
2576 : : * But we can ignore HOT chains, since we'll check each tuple anyway.
2577 : : */
2578 : 80190 : Page page = BufferGetPage(buffer);
2579 : 80190 : OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
2580 : : OffsetNumber offnum;
2581 : :
2582 [ + + ]: 692235 : for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
2583 : : {
2584 : : ItemId lp;
2585 : : HeapTupleData loctup;
2586 : : bool valid;
2587 : :
2588 : 612045 : lp = PageGetItemId(page, offnum);
2589 [ - + ]: 612045 : if (!ItemIdIsNormal(lp))
365 melanieplageman@gmai 2590 :UBC 0 : continue;
365 melanieplageman@gmai 2591 :CBC 612045 : loctup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2592 : 612045 : loctup.t_len = ItemIdGetLength(lp);
2593 : 612045 : loctup.t_tableOid = scan->rs_rd->rd_id;
2594 : 612045 : ItemPointerSet(&loctup.t_self, block, offnum);
2595 : 612045 : valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
2596 [ + + ]: 612045 : if (valid)
2597 : : {
2598 : 611982 : hscan->rs_vistuples[ntup++] = offnum;
2599 : 611982 : PredicateLockTID(scan->rs_rd, &loctup.t_self, snapshot,
2600 : 611982 : HeapTupleHeaderGetXmin(loctup.t_data));
2601 : : }
2602 : 612045 : HeapCheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
2603 : : buffer, snapshot);
2604 : : }
2605 : : }
2606 : :
2607 : 202635 : LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2608 : :
2609 [ - + ]: 202635 : Assert(ntup <= MaxHeapTuplesPerPage);
2610 : 202635 : hscan->rs_ntuples = ntup;
2611 : :
2612 [ + + ]: 202635 : if (tbmres->lossy)
2613 : 80190 : (*lossy_pages)++;
2614 : : else
2615 : 122445 : (*exact_pages)++;
2616 : :
2617 : : /*
2618 : : * Return true to indicate that a valid block was found and the bitmap is
2619 : : * not exhausted. If there are no visible tuples on this page,
2620 : : * hscan->rs_ntuples will be 0 and heapam_scan_bitmap_next_tuple() will
2621 : : * return false returning control to this function to advance to the next
2622 : : * block in the bitmap.
2623 : : */
2624 : 202635 : return true;
2625 : : }
2626 : :
2627 : : /* ------------------------------------------------------------------------
2628 : : * Definition of the heap table access method.
2629 : : * ------------------------------------------------------------------------
2630 : : */
2631 : :
2632 : : static const TableAmRoutine heapam_methods = {
2633 : : .type = T_TableAmRoutine,
2634 : :
2635 : : .slot_callbacks = heapam_slot_callbacks,
2636 : :
2637 : : .scan_begin = heap_beginscan,
2638 : : .scan_end = heap_endscan,
2639 : : .scan_rescan = heap_rescan,
2640 : : .scan_getnextslot = heap_getnextslot,
2641 : :
2642 : : .scan_set_tidrange = heap_set_tidrange,
2643 : : .scan_getnextslot_tidrange = heap_getnextslot_tidrange,
2644 : :
2645 : : .parallelscan_estimate = table_block_parallelscan_estimate,
2646 : : .parallelscan_initialize = table_block_parallelscan_initialize,
2647 : : .parallelscan_reinitialize = table_block_parallelscan_reinitialize,
2648 : :
2649 : : .index_fetch_begin = heapam_index_fetch_begin,
2650 : : .index_fetch_reset = heapam_index_fetch_reset,
2651 : : .index_fetch_end = heapam_index_fetch_end,
2652 : : .index_fetch_tuple = heapam_index_fetch_tuple,
2653 : :
2654 : : .tuple_insert = heapam_tuple_insert,
2655 : : .tuple_insert_speculative = heapam_tuple_insert_speculative,
2656 : : .tuple_complete_speculative = heapam_tuple_complete_speculative,
2657 : : .multi_insert = heap_multi_insert,
2658 : : .tuple_delete = heapam_tuple_delete,
2659 : : .tuple_update = heapam_tuple_update,
2660 : : .tuple_lock = heapam_tuple_lock,
2661 : :
2662 : : .tuple_fetch_row_version = heapam_fetch_row_version,
2663 : : .tuple_get_latest_tid = heap_get_latest_tid,
2664 : : .tuple_tid_valid = heapam_tuple_tid_valid,
2665 : : .tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot,
2666 : : .index_delete_tuples = heap_index_delete_tuples,
2667 : :
2668 : : .relation_set_new_filelocator = heapam_relation_set_new_filelocator,
2669 : : .relation_nontransactional_truncate = heapam_relation_nontransactional_truncate,
2670 : : .relation_copy_data = heapam_relation_copy_data,
2671 : : .relation_copy_for_cluster = heapam_relation_copy_for_cluster,
2672 : : .relation_vacuum = heap_vacuum_rel,
2673 : : .scan_analyze_next_block = heapam_scan_analyze_next_block,
2674 : : .scan_analyze_next_tuple = heapam_scan_analyze_next_tuple,
2675 : : .index_build_range_scan = heapam_index_build_range_scan,
2676 : : .index_validate_scan = heapam_index_validate_scan,
2677 : :
2678 : : .relation_size = table_block_relation_size,
2679 : : .relation_needs_toast_table = heapam_relation_needs_toast_table,
2680 : : .relation_toast_am = heapam_relation_toast_am,
2681 : : .relation_fetch_toast_slice = heap_fetch_toast_slice,
2682 : :
2683 : : .relation_estimate_size = heapam_estimate_rel_size,
2684 : :
2685 : : .scan_bitmap_next_tuple = heapam_scan_bitmap_next_tuple,
2686 : : .scan_sample_next_block = heapam_scan_sample_next_block,
2687 : : .scan_sample_next_tuple = heapam_scan_sample_next_tuple
2688 : : };
2689 : :
2690 : :
2691 : : const TableAmRoutine *
2566 andres@anarazel.de 2692 : 10112687 : GetHeapamTableAmRoutine(void)
2693 : : {
2694 : 10112687 : return &heapam_methods;
2695 : : }
2696 : :
2697 : : Datum
2698 : 1112375 : heap_tableam_handler(PG_FUNCTION_ARGS)
2699 : : {
2700 : 1112375 : PG_RETURN_POINTER(&heapam_methods);
2701 : : }
|