Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * nodeIndexonlyscan.c
4 : : * Routines to support index-only scans
5 : : *
6 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/executor/nodeIndexonlyscan.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : /*
16 : : * INTERFACE ROUTINES
17 : : * ExecIndexOnlyScan scans an index
18 : : * IndexOnlyNext retrieve next tuple
19 : : * ExecInitIndexOnlyScan creates and initializes state info.
20 : : * ExecReScanIndexOnlyScan rescans the indexed relation.
21 : : * ExecEndIndexOnlyScan releases all storage.
22 : : * ExecIndexOnlyMarkPos marks scan position.
23 : : * ExecIndexOnlyRestrPos restores scan position.
24 : : * ExecIndexOnlyScanEstimate estimates DSM space needed for
25 : : * parallel index-only scan
26 : : * ExecIndexOnlyScanInitializeDSM initialize DSM for parallel
27 : : * index-only scan
28 : : * ExecIndexOnlyScanReInitializeDSM reinitialize DSM for fresh scan
29 : : * ExecIndexOnlyScanInitializeWorker attach to DSM info in parallel worker
30 : : */
31 : : #include "postgres.h"
32 : :
33 : : #include "access/genam.h"
34 : : #include "access/relscan.h"
35 : : #include "access/tableam.h"
36 : : #include "access/tupdesc.h"
37 : : #include "access/visibilitymap.h"
38 : : #include "catalog/pg_type.h"
39 : : #include "executor/executor.h"
40 : : #include "executor/instrument.h"
41 : : #include "executor/nodeIndexonlyscan.h"
42 : : #include "executor/nodeIndexscan.h"
43 : : #include "miscadmin.h"
44 : : #include "storage/bufmgr.h"
45 : : #include "storage/predicate.h"
46 : : #include "utils/builtins.h"
47 : : #include "utils/rel.h"
48 : :
49 : :
50 : : static TupleTableSlot *IndexOnlyNext(IndexOnlyScanState *node);
51 : : static void StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot,
52 : : IndexTuple itup, TupleDesc itupdesc);
53 : :
54 : :
55 : : /* ----------------------------------------------------------------
56 : : * IndexOnlyNext
57 : : *
58 : : * Retrieve a tuple from the IndexOnlyScan node's index.
59 : : * ----------------------------------------------------------------
60 : : */
61 : : static TupleTableSlot *
5320 tgl@sss.pgh.pa.us 62 :CBC 3873634 : IndexOnlyNext(IndexOnlyScanState *node)
63 : : {
64 : : EState *estate;
65 : : ExprContext *econtext;
66 : : ScanDirection direction;
67 : : IndexScanDesc scandesc;
68 : : TupleTableSlot *slot;
69 : : ItemPointer tid;
70 : :
71 : : /*
72 : : * extract necessary information from index scan node
73 : : */
74 : 3873634 : estate = node->ss.ps.state;
75 : :
76 : : /*
77 : : * Determine which direction to scan the index in based on the plan's scan
78 : : * direction and the current direction of execution.
79 : : */
1189 drowley@postgresql.o 80 : 3873634 : direction = ScanDirectionCombine(estate->es_direction,
81 : : ((IndexOnlyScan *) node->ss.ps.plan)->indexorderdir);
5320 tgl@sss.pgh.pa.us 82 : 3873634 : scandesc = node->ioss_ScanDesc;
83 : 3873634 : econtext = node->ss.ps.ps_ExprContext;
84 : 3873634 : slot = node->ss.ss_ScanTupleSlot;
85 : :
3345 rhaas@postgresql.org 86 [ + + ]: 3873634 : if (scandesc == NULL)
87 : : {
88 : : /*
89 : : * We reach here if the index only scan is not parallel, or if we're
90 : : * serially executing an index only scan that was planned to be
91 : : * parallel.
92 : : */
93 [ + - ]: 5803 : scandesc = index_beginscan(node->ss.ss_currentRelation,
94 : : node->ioss_RelationDesc,
95 : : estate->es_snapshot,
96 : : node->ioss_Instrument,
97 : : node->ioss_NumScanKeys,
98 : : node->ioss_NumOrderByKeys,
36 melanieplageman@gmai 99 :GNC 5803 : ScanRelIsReadOnly(&node->ss) ?
100 : : SO_HINT_REL_READ_ONLY : SO_NONE);
101 : :
3345 rhaas@postgresql.org 102 :CBC 5803 : node->ioss_ScanDesc = scandesc;
103 : :
104 : :
105 : : /* Set it up for index-only scan */
106 : 5803 : node->ioss_ScanDesc->xs_want_itup = true;
107 : 5803 : node->ioss_VMBuffer = InvalidBuffer;
108 : :
109 : : /*
110 : : * If no run-time keys to calculate or they are ready, go ahead and
111 : : * pass the scankeys to the index AM.
112 : : */
113 [ + + + - ]: 5803 : if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
114 : 5803 : index_rescan(scandesc,
3345 rhaas@postgresql.org 115 :ECB (4443) : node->ioss_ScanKeys,
116 : : node->ioss_NumScanKeys,
117 : (4443) : node->ioss_OrderByKeys,
118 : : node->ioss_NumOrderByKeys);
119 : : }
120 : :
121 : : /*
122 : : * OK, now that we have what we need, fetch the next tuple.
123 : : */
5320 tgl@sss.pgh.pa.us 124 [ + + ]:CBC 3938955 : while ((tid = index_getnext_tid(scandesc, direction)) != NULL)
125 : : {
2612 andres@anarazel.de 126 : 3804752 : bool tuple_from_heap = false;
127 : :
3206 128 [ + + ]: 3804752 : CHECK_FOR_INTERRUPTS();
129 : :
130 : : /*
131 : : * We can skip the heap fetch if the TID references a heap page on
132 : : * which all tuples are known visible to everybody. In any case,
133 : : * we'll use the index tuple not the heap tuple as the data source.
134 : : *
135 : : * Note on Memory Ordering Effects: visibilitymap_get_status does not
136 : : * lock the visibility map buffer, and therefore the result we read
137 : : * here could be slightly stale. However, it can't be stale enough to
138 : : * matter.
139 : : *
140 : : * We need to detect clearing a VM bit due to an insert right away,
141 : : * because the tuple is present in the index page but not visible. The
142 : : * reading of the TID by this scan (using a shared lock on the index
143 : : * buffer) is serialized with the insert of the TID into the index
144 : : * (using an exclusive lock on the index buffer). Because the VM bit
145 : : * is cleared before updating the index, and locking/unlocking of the
146 : : * index page acts as a full memory barrier, we are sure to see the
147 : : * cleared bit if we see a recently-inserted TID.
148 : : *
149 : : * Deletes do not update the index page (only VACUUM will clear out
150 : : * the TID), so the clearing of the VM bit by a delete is not
151 : : * serialized with this test below, and we may see a value that is
152 : : * significantly stale. However, we don't care about the delete right
153 : : * away, because the tuple is still visible until the deleting
154 : : * transaction commits or the statement ends (if it's our
155 : : * transaction). In either case, the lock on the VM buffer will have
156 : : * been released (acting as a write barrier) after clearing the bit.
157 : : * And for us to have a snapshot that includes the deleting
158 : : * transaction (making the tuple invisible), we must have acquired
159 : : * ProcArrayLock after that time, acting as a read barrier.
160 : : *
161 : : * It's worth going through this complexity to avoid needing to lock
162 : : * the VM buffer, which could cause significant contention.
163 : : */
3717 rhaas@postgresql.org 164 [ + + ]: 3804752 : if (!VM_ALL_VISIBLE(scandesc->heapRelation,
165 : : ItemPointerGetBlockNumber(tid),
166 : : &node->ioss_VMBuffer))
167 : : {
168 : : /*
169 : : * Rats, we have to visit the heap to check visibility.
170 : : */
2947 alvherre@alvh.no-ip. 171 [ + + ]: 531907 : InstrCountTuples2(node, 1);
2525 heikki.linnakangas@i 172 [ + + ]: 531907 : if (!index_fetch_heap(scandesc, node->ioss_TableSlot))
5077 bruce@momjian.us 173 : 65317 : continue; /* no visible tuple, try next index entry */
174 : :
2525 heikki.linnakangas@i 175 : 466590 : ExecClearTuple(node->ioss_TableSlot);
176 : :
177 : : /*
178 : : * Only MVCC snapshots are supported here, so there should be no
179 : : * need to keep following the HOT chain once a visible entry has
180 : : * been found. If we did want to allow that, we'd need to keep
181 : : * more state to remember not to call index_getnext_tid next time.
182 : : */
2612 andres@anarazel.de 183 [ - + ]: 466590 : if (scandesc->xs_heap_continue)
5320 tgl@sss.pgh.pa.us 184 [ # # ]:UBC 0 : elog(ERROR, "non-MVCC snapshots are not supported in index-only scans");
185 : :
186 : : /*
187 : : * Note: at this point we are holding a pin on the heap page, as
188 : : * recorded in scandesc->xs_cbuf. We could release that pin now,
189 : : * but it's not clear whether it's a win to do so. The next index
190 : : * entry might require a visit to the same heap page.
191 : : */
192 : :
2612 andres@anarazel.de 193 :CBC 466590 : tuple_from_heap = true;
194 : : }
195 : :
196 : : /*
197 : : * Fill the scan tuple slot with data from the index. This might be
198 : : * provided in either HeapTuple or IndexTuple format. Conceivably an
199 : : * index AM might fill both fields, in which case we prefer the heap
200 : : * format, since it's probably a bit cheaper to fill a slot from.
201 : : */
3354 tgl@sss.pgh.pa.us 202 [ + + ]: 3739435 : if (scandesc->xs_hitup)
203 : : {
204 : : /*
205 : : * We don't take the trouble to verify that the provided tuple has
206 : : * exactly the slot's format, but it seems worth doing a quick
207 : : * check on the number of fields.
208 : : */
209 [ - + ]: 969041 : Assert(slot->tts_tupleDescriptor->natts ==
210 : : scandesc->xs_hitupdesc->natts);
2573 andres@anarazel.de 211 : 969041 : ExecForceStoreHeapTuple(scandesc->xs_hitup, slot, false);
212 : : }
3354 tgl@sss.pgh.pa.us 213 [ + - ]: 2770394 : else if (scandesc->xs_itup)
734 drowley@postgresql.o 214 : 2770394 : StoreIndexTuple(node, slot, scandesc->xs_itup, scandesc->xs_itupdesc);
215 : : else
3354 tgl@sss.pgh.pa.us 216 [ # # ]:UBC 0 : elog(ERROR, "no data returned for index-only scan");
217 : :
218 : : /*
219 : : * If the index was lossy, we have to recheck the index quals.
220 : : */
5320 tgl@sss.pgh.pa.us 221 [ + + ]:CBC 3739435 : if (scandesc->xs_recheck)
222 : : {
223 : 9 : econtext->ecxt_scantuple = slot;
1583 224 [ + + ]: 9 : if (!ExecQualAndReset(node->recheckqual, econtext))
225 : : {
226 : : /* Fails recheck, so drop it and loop back for another */
5320 227 [ - + ]: 4 : InstrCountFiltered2(node, 1);
228 : 4 : continue;
229 : : }
230 : : }
231 : :
232 : : /*
233 : : * We don't currently support rechecking ORDER BY distances. (In
234 : : * principle, if the index can support retrieval of the originally
235 : : * indexed value, it should be able to produce an exact distance
236 : : * calculation too. So it's not clear that adding code here for
237 : : * recheck/re-sort would be worth the trouble. But we should at least
238 : : * throw an error if someone tries it.)
239 : : */
4000 240 [ + + + + ]: 3739431 : if (scandesc->numberOfOrderBys > 0 && scandesc->xs_recheckorderby)
241 [ + - ]: 4 : ereport(ERROR,
242 : : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
243 : : errmsg("lossy distance functions are not supported in index-only scans")));
244 : :
245 : : /*
246 : : * If we didn't access the heap, then we'll need to take a predicate
247 : : * lock explicitly, as if we had. For now we do that at page level.
248 : : */
2612 andres@anarazel.de 249 [ + + ]: 3739427 : if (!tuple_from_heap)
4991 kgrittn@postgresql.o 250 : 3272845 : PredicateLockPage(scandesc->heapRelation,
251 : : ItemPointerGetBlockNumber(tid),
252 : : estate->es_snapshot);
253 : :
5320 tgl@sss.pgh.pa.us 254 : 3739427 : return slot;
255 : : }
256 : :
257 : : /*
258 : : * if we get here it means the index scan failed so we are at the end of
259 : : * the scan..
260 : : */
261 : 134203 : return ExecClearTuple(slot);
262 : : }
263 : :
264 : : /*
265 : : * StoreIndexTuple
266 : : * Fill the slot with data from the index tuple.
267 : : *
268 : : * At some point this might be generally-useful functionality, but
269 : : * right now we don't need it elsewhere.
270 : : */
271 : : static void
734 drowley@postgresql.o 272 : 2770394 : StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot,
273 : : IndexTuple itup, TupleDesc itupdesc)
274 : : {
275 : : /*
276 : : * Note: we must use the tupdesc supplied by the AM in index_deform_tuple,
277 : : * not the slot's tupdesc, in case the latter has different datatypes
278 : : * (this happens for btree name_ops in particular). They'd better have
279 : : * the same number of columns though, as well as being datatype-compatible
280 : : * which is something we can't so easily check.
281 : : */
2620 tgl@sss.pgh.pa.us 282 [ - + ]: 2770394 : Assert(slot->tts_tupleDescriptor->natts == itupdesc->natts);
283 : :
5320 284 : 2770394 : ExecClearTuple(slot);
2620 285 : 2770394 : index_deform_tuple(itup, itupdesc, slot->tts_values, slot->tts_isnull);
286 : :
287 : : /*
288 : : * Copy all name columns stored as cstrings back into a NAMEDATALEN byte
289 : : * sized allocation. We mark this branch as unlikely as generally "name"
290 : : * is used only for the system catalogs and this would have to be a user
291 : : * query running on those or some other user table with an index on a name
292 : : * column.
293 : : */
734 drowley@postgresql.o 294 [ + + ]: 2770394 : if (unlikely(node->ioss_NameCStringAttNums != NULL))
295 : : {
296 : 11605 : int attcount = node->ioss_NameCStringCount;
297 : :
298 [ + + ]: 23210 : for (int idx = 0; idx < attcount; idx++)
299 : : {
300 : 11605 : int attnum = node->ioss_NameCStringAttNums[idx];
301 : : Name name;
302 : :
303 : : /* skip null Datums */
304 [ - + ]: 11605 : if (slot->tts_isnull[attnum])
734 drowley@postgresql.o 305 :UBC 0 : continue;
306 : :
307 : : /* allocate the NAMEDATALEN and copy the datum into that memory */
734 drowley@postgresql.o 308 :CBC 11605 : name = (Name) MemoryContextAlloc(node->ss.ps.ps_ExprContext->ecxt_per_tuple_memory,
309 : : NAMEDATALEN);
310 : :
311 : : /* use namestrcpy to zero-pad all trailing bytes */
312 : 11605 : namestrcpy(name, DatumGetCString(slot->tts_values[attnum]));
313 : 11605 : slot->tts_values[attnum] = NameGetDatum(name);
314 : : }
315 : : }
316 : :
5320 tgl@sss.pgh.pa.us 317 : 2770394 : ExecStoreVirtualTuple(slot);
318 : 2770394 : }
319 : :
320 : : /*
321 : : * IndexOnlyRecheck -- access method routine to recheck a tuple in EvalPlanQual
322 : : *
323 : : * This can't really happen, since an index can't supply CTID which would
324 : : * be necessary data for any potential EvalPlanQual target relation. If it
325 : : * did happen, the EPQ code would pass us the wrong data, namely a heap
326 : : * tuple not an index tuple. So throw an error.
327 : : */
328 : : static bool
5320 tgl@sss.pgh.pa.us 329 :UBC 0 : IndexOnlyRecheck(IndexOnlyScanState *node, TupleTableSlot *slot)
330 : : {
331 [ # # ]: 0 : elog(ERROR, "EvalPlanQual recheck is not supported in index-only scans");
332 : : return false; /* keep compiler quiet */
333 : : }
334 : :
335 : : /* ----------------------------------------------------------------
336 : : * ExecIndexOnlyScan(node)
337 : : * ----------------------------------------------------------------
338 : : */
339 : : static TupleTableSlot *
3214 andres@anarazel.de 340 :CBC 3661077 : ExecIndexOnlyScan(PlanState *pstate)
341 : : {
342 : 3661077 : IndexOnlyScanState *node = castNode(IndexOnlyScanState, pstate);
343 : :
344 : : /*
345 : : * If we have runtime keys and they've not already been set up, do it now.
346 : : */
5320 tgl@sss.pgh.pa.us 347 [ + + + + ]: 3661077 : if (node->ioss_NumRuntimeKeys != 0 && !node->ioss_RuntimeKeysReady)
348 : 376 : ExecReScan((PlanState *) node);
349 : :
350 : 3661077 : return ExecScan(&node->ss,
351 : : (ExecScanAccessMtd) IndexOnlyNext,
352 : : (ExecScanRecheckMtd) IndexOnlyRecheck);
353 : : }
354 : :
355 : : /* ----------------------------------------------------------------
356 : : * ExecReScanIndexOnlyScan(node)
357 : : *
358 : : * Recalculates the values of any scan keys whose value depends on
359 : : * information known at runtime, then rescans the indexed relation.
360 : : *
361 : : * Updating the scan key was formerly done separately in
362 : : * ExecUpdateIndexScanKeys. Integrating it into ReScan makes
363 : : * rescans of indices and relations/general streams more uniform.
364 : : * ----------------------------------------------------------------
365 : : */
366 : : void
367 : 149680 : ExecReScanIndexOnlyScan(IndexOnlyScanState *node)
368 : : {
369 : : /*
370 : : * If we are doing runtime key calculations (ie, any of the index key
371 : : * values weren't simple Consts), compute the new key values. But first,
372 : : * reset the context so we don't leak memory as each outer tuple is
373 : : * scanned. Note this assumes that we will recalculate *all* runtime keys
374 : : * on each call.
375 : : */
376 [ + + ]: 149680 : if (node->ioss_NumRuntimeKeys != 0)
377 : : {
378 : 149547 : ExprContext *econtext = node->ioss_RuntimeContext;
379 : :
380 : 149547 : ResetExprContext(econtext);
381 : 149547 : ExecIndexEvalRuntimeKeys(econtext,
382 : : node->ioss_RuntimeKeys,
383 : : node->ioss_NumRuntimeKeys);
384 : : }
385 : 149680 : node->ioss_RuntimeKeysReady = true;
386 : :
387 : : /* reset index scan */
3362 rhaas@postgresql.org 388 [ + + ]: 149680 : if (node->ioss_ScanDesc)
389 : 148133 : index_rescan(node->ioss_ScanDesc,
3362 rhaas@postgresql.org 390 :ECB (105205) : node->ioss_ScanKeys, node->ioss_NumScanKeys,
391 : (105205) : node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
392 : :
5320 tgl@sss.pgh.pa.us 393 :CBC 149680 : ExecScanReScan(&node->ss);
394 : 149680 : }
395 : :
396 : :
397 : : /* ----------------------------------------------------------------
398 : : * ExecEndIndexOnlyScan
399 : : * ----------------------------------------------------------------
400 : : */
401 : : void
402 : 11575 : ExecEndIndexOnlyScan(IndexOnlyScanState *node)
403 : : {
404 : : Relation indexRelationDesc;
405 : : IndexScanDesc indexScanDesc;
406 : :
407 : : /*
408 : : * extract information from the node
409 : : */
410 : 11575 : indexRelationDesc = node->ioss_RelationDesc;
411 : 11575 : indexScanDesc = node->ioss_ScanDesc;
412 : :
413 : : /* Release VM buffer pin, if any. */
414 [ + + ]: 11575 : if (node->ioss_VMBuffer != InvalidBuffer)
415 : : {
416 : 3997 : ReleaseBuffer(node->ioss_VMBuffer);
417 : 3997 : node->ioss_VMBuffer = InvalidBuffer;
418 : : }
419 : :
420 : : /*
421 : : * When ending a parallel worker, copy the statistics gathered by the
422 : : * worker back into shared memory so that it can be picked up by the main
423 : : * process to report in EXPLAIN ANALYZE
424 : : */
420 pg@bowt.ie 425 [ - + - - ]: 11575 : if (node->ioss_SharedInfo != NULL && IsParallelWorker())
426 : : {
427 : : IndexScanInstrumentation *winstrument;
428 : :
52 tomas.vondra@postgre 429 [ # # ]:UBC 0 : Assert(ParallelWorkerNumber < node->ioss_SharedInfo->num_workers);
420 pg@bowt.ie 430 : 0 : winstrument = &node->ioss_SharedInfo->winstrument[ParallelWorkerNumber];
431 : :
432 : : /*
433 : : * We have to accumulate the stats rather than performing a memcpy.
434 : : * When a Gather/GatherMerge node finishes it will perform planner
435 : : * shutdown on the workers. On rescan it will spin up new workers
436 : : * which will have a new IndexOnlyScanState and zeroed stats.
437 : : */
44 pg@bowt.ie 438 :UNC 0 : winstrument->nsearches += node->ioss_Instrument->nsearches;
439 : : }
440 : :
441 : : /*
442 : : * close the index relation (no-op if we didn't open it)
443 : : */
5320 tgl@sss.pgh.pa.us 444 [ + + ]:CBC 11575 : if (indexScanDesc)
445 : 5936 : index_endscan(indexScanDesc);
446 [ + + ]: 11575 : if (indexRelationDesc)
447 : 9818 : index_close(indexRelationDesc, NoLock);
448 : 11575 : }
449 : :
450 : : /* ----------------------------------------------------------------
451 : : * ExecIndexOnlyMarkPos
452 : : *
453 : : * Note: we assume that no caller attempts to set a mark before having read
454 : : * at least one tuple. Otherwise, ioss_ScanDesc might still be NULL.
455 : : * ----------------------------------------------------------------
456 : : */
457 : : void
458 : 82019 : ExecIndexOnlyMarkPos(IndexOnlyScanState *node)
459 : : {
3020 460 : 82019 : EState *estate = node->ss.ps.state;
2434 andres@anarazel.de 461 : 82019 : EPQState *epqstate = estate->es_epq_active;
462 : :
463 [ - + ]: 82019 : if (epqstate != NULL)
464 : : {
465 : : /*
466 : : * We are inside an EvalPlanQual recheck. If a test tuple exists for
467 : : * this relation, then we shouldn't access the index at all. We would
468 : : * instead need to save, and later restore, the state of the
469 : : * relsubs_done flag, so that re-fetching the test tuple is possible.
470 : : * However, given the assumption that no caller sets a mark at the
471 : : * start of the scan, we can only get here with relsubs_done[i]
472 : : * already set, and so no state need be saved.
473 : : */
3020 tgl@sss.pgh.pa.us 474 :UBC 0 : Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
475 : :
476 [ # # ]: 0 : Assert(scanrelid > 0);
2434 andres@anarazel.de 477 [ # # ]: 0 : if (epqstate->relsubs_slot[scanrelid - 1] != NULL ||
478 [ # # ]: 0 : epqstate->relsubs_rowmark[scanrelid - 1] != NULL)
479 : : {
480 : : /* Verify the claim above */
481 [ # # ]: 0 : if (!epqstate->relsubs_done[scanrelid - 1])
3020 tgl@sss.pgh.pa.us 482 [ # # ]: 0 : elog(ERROR, "unexpected ExecIndexOnlyMarkPos call in EPQ recheck");
483 : 0 : return;
484 : : }
485 : : }
486 : :
5320 tgl@sss.pgh.pa.us 487 :CBC 82019 : index_markpos(node->ioss_ScanDesc);
488 : : }
489 : :
490 : : /* ----------------------------------------------------------------
491 : : * ExecIndexOnlyRestrPos
492 : : * ----------------------------------------------------------------
493 : : */
494 : : void
5320 tgl@sss.pgh.pa.us 495 :UBC 0 : ExecIndexOnlyRestrPos(IndexOnlyScanState *node)
496 : : {
3020 497 : 0 : EState *estate = node->ss.ps.state;
2434 andres@anarazel.de 498 : 0 : EPQState *epqstate = estate->es_epq_active;
499 : :
500 [ # # ]: 0 : if (estate->es_epq_active != NULL)
501 : : {
502 : : /* See comments in ExecIndexMarkPos */
3020 tgl@sss.pgh.pa.us 503 : 0 : Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
504 : :
505 [ # # ]: 0 : Assert(scanrelid > 0);
2434 andres@anarazel.de 506 [ # # ]: 0 : if (epqstate->relsubs_slot[scanrelid - 1] != NULL ||
507 [ # # ]: 0 : epqstate->relsubs_rowmark[scanrelid - 1] != NULL)
508 : : {
509 : : /* Verify the claim above */
510 [ # # ]: 0 : if (!epqstate->relsubs_done[scanrelid - 1])
3020 tgl@sss.pgh.pa.us 511 [ # # ]: 0 : elog(ERROR, "unexpected ExecIndexOnlyRestrPos call in EPQ recheck");
512 : 0 : return;
513 : : }
514 : : }
515 : :
5320 516 : 0 : index_restrpos(node->ioss_ScanDesc);
517 : : }
518 : :
519 : : /* ----------------------------------------------------------------
520 : : * ExecInitIndexOnlyScan
521 : : *
522 : : * Initializes the index scan's state information, creates
523 : : * scan keys, and opens the base and index relations.
524 : : *
525 : : * Note: index scans have 2 sets of state information because
526 : : * we have to keep track of the base relation and the
527 : : * index relation.
528 : : * ----------------------------------------------------------------
529 : : */
530 : : IndexOnlyScanState *
5320 tgl@sss.pgh.pa.us 531 :CBC 11608 : ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
532 : : {
533 : : IndexOnlyScanState *indexstate;
534 : : Relation currentRelation;
535 : : Relation indexRelation;
536 : : LOCKMODE lockmode;
537 : : TupleDesc tupDesc;
538 : : int indnkeyatts;
539 : : int namecount;
540 : :
541 : : /*
542 : : * create state structure
543 : : */
544 : 11608 : indexstate = makeNode(IndexOnlyScanState);
545 : 11608 : indexstate->ss.ps.plan = (Plan *) node;
546 : 11608 : indexstate->ss.ps.state = estate;
3214 andres@anarazel.de 547 : 11608 : indexstate->ss.ps.ExecProcNode = ExecIndexOnlyScan;
548 : :
549 : : /*
550 : : * Miscellaneous initialization
551 : : *
552 : : * create expression context for node
553 : : */
5320 tgl@sss.pgh.pa.us 554 : 11608 : ExecAssignExprContext(estate, &indexstate->ss.ps);
555 : :
556 : : /*
557 : : * open the scan relation
558 : : */
4756 559 : 11608 : currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
560 : :
5320 561 : 11608 : indexstate->ss.ss_currentRelation = currentRelation;
562 : 11608 : indexstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */
563 : :
564 : : /*
565 : : * Build the scan tuple type using the indextlist generated by the
566 : : * planner. We use this, rather than the index's physical tuple
567 : : * descriptor, because the latter contains storage column types not the
568 : : * types of the original datums. (It's the AM's responsibility to return
569 : : * suitable data anyway.)
570 : : */
2723 andres@anarazel.de 571 : 11608 : tupDesc = ExecTypeFromTL(node->indextlist);
2612 572 : 11608 : ExecInitScanTupleSlot(estate, &indexstate->ss, tupDesc,
573 : : &TTSOpsVirtual,
574 : : 0);
575 : :
576 : : /*
577 : : * We need another slot, in a format that's suitable for the table AM, for
578 : : * when we need to fetch a tuple from the table for rechecking visibility.
579 : : */
2525 heikki.linnakangas@i 580 : 11608 : indexstate->ioss_TableSlot =
581 : 11608 : ExecAllocTableSlot(&estate->es_tupleTable,
582 : : RelationGetDescr(currentRelation),
583 : : table_slot_callbacks(currentRelation), 0);
584 : :
585 : : /*
586 : : * Initialize result type and projection info. The node's targetlist will
587 : : * contain Vars with varno = INDEX_VAR, referencing the scan tuple.
588 : : */
2734 andres@anarazel.de 589 : 11608 : ExecInitResultTypeTL(&indexstate->ss.ps);
4013 tgl@sss.pgh.pa.us 590 : 11608 : ExecAssignScanProjectionInfoWithVarno(&indexstate->ss, INDEX_VAR);
591 : :
592 : : /*
593 : : * initialize child expressions
594 : : *
595 : : * Note: we don't initialize all of the indexorderby expression, only the
596 : : * sub-parts corresponding to runtime keys (see below).
597 : : */
3000 andres@anarazel.de 598 : 11608 : indexstate->ss.ps.qual =
599 : 11608 : ExecInitQual(node->scan.plan.qual, (PlanState *) indexstate);
1583 tgl@sss.pgh.pa.us 600 : 11608 : indexstate->recheckqual =
601 : 11608 : ExecInitQual(node->recheckqual, (PlanState *) indexstate);
602 : :
603 : : /*
604 : : * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop
605 : : * here. This allows an index-advisor plugin to EXPLAIN a plan containing
606 : : * references to nonexistent indexes.
607 : : */
5320 608 [ + + ]: 11608 : if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
609 : 1757 : return indexstate;
610 : :
611 : : /* Set up instrumentation of index-only scans if requested */
44 pg@bowt.ie 612 [ + + ]:GNC 9851 : if (estate->es_instrument)
613 : 84 : indexstate->ioss_Instrument = palloc0_object(IndexScanInstrumentation);
614 : :
615 : : /* Open the index relation. */
2588 tgl@sss.pgh.pa.us 616 :CBC 9851 : lockmode = exec_rt_fetch(node->scan.scanrelid, estate)->rellockmode;
734 drowley@postgresql.o 617 : 9851 : indexRelation = index_open(node->indexid, lockmode);
618 : 9851 : indexstate->ioss_RelationDesc = indexRelation;
619 : :
620 : : /*
621 : : * Initialize index-specific scan state
622 : : */
5320 tgl@sss.pgh.pa.us 623 : 9851 : indexstate->ioss_RuntimeKeysReady = false;
624 : 9851 : indexstate->ioss_RuntimeKeys = NULL;
625 : 9851 : indexstate->ioss_NumRuntimeKeys = 0;
626 : :
627 : : /*
628 : : * build the index scan keys from the index qualification
629 : : */
630 : 9851 : ExecIndexBuildScanKeys((PlanState *) indexstate,
631 : : indexRelation,
632 : : node->indexqual,
633 : : false,
634 : 9851 : &indexstate->ioss_ScanKeys,
635 : : &indexstate->ioss_NumScanKeys,
636 : : &indexstate->ioss_RuntimeKeys,
637 : : &indexstate->ioss_NumRuntimeKeys,
638 : : NULL, /* no ArrayKeys */
639 : : NULL);
640 : :
641 : : /*
642 : : * any ORDER BY exprs have to be turned into scankeys in the same way
643 : : */
644 : 9851 : ExecIndexBuildScanKeys((PlanState *) indexstate,
645 : : indexRelation,
646 : : node->indexorderby,
647 : : true,
648 : 9851 : &indexstate->ioss_OrderByKeys,
649 : : &indexstate->ioss_NumOrderByKeys,
650 : : &indexstate->ioss_RuntimeKeys,
651 : : &indexstate->ioss_NumRuntimeKeys,
652 : : NULL, /* no ArrayKeys */
653 : : NULL);
654 : :
655 : : /*
656 : : * If we have runtime keys, we need an ExprContext to evaluate them. The
657 : : * node's standard context won't do because we want to reset that context
658 : : * for every tuple. So, build another context just like the other one...
659 : : * -tgl 7/11/00
660 : : */
661 [ + + ]: 9851 : if (indexstate->ioss_NumRuntimeKeys != 0)
662 : : {
663 : 3259 : ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
664 : :
665 : 3259 : ExecAssignExprContext(estate, &indexstate->ss.ps);
666 : 3259 : indexstate->ioss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
667 : 3259 : indexstate->ss.ps.ps_ExprContext = stdecontext;
668 : : }
669 : : else
670 : : {
671 : 6592 : indexstate->ioss_RuntimeContext = NULL;
672 : : }
673 : :
734 drowley@postgresql.o 674 : 9851 : indexstate->ioss_NameCStringAttNums = NULL;
675 : 9851 : indnkeyatts = indexRelation->rd_index->indnkeyatts;
676 : 9851 : namecount = 0;
677 : :
678 : : /*
679 : : * The "name" type for btree uses text_ops which results in storing
680 : : * cstrings in the indexed keys rather than names. Here we detect that in
681 : : * a generic way in case other index AMs want to do the same optimization.
682 : : * Check for opclasses with an opcintype of NAMEOID and an index tuple
683 : : * descriptor with CSTRINGOID. If any of these are found, create an array
684 : : * marking the index attribute number of each of them. StoreIndexTuple()
685 : : * handles copying the name Datums into a NAMEDATALEN-byte allocation.
686 : : */
687 : :
688 : : /* First, count the number of such index keys */
689 [ + + ]: 22255 : for (int attnum = 0; attnum < indnkeyatts; attnum++)
690 : : {
672 691 [ + + ]: 12404 : if (TupleDescAttr(indexRelation->rd_att, attnum)->atttypid == CSTRINGOID &&
734 692 [ + - ]: 959 : indexRelation->rd_opcintype[attnum] == NAMEOID)
693 : 959 : namecount++;
694 : : }
695 : :
696 [ + + ]: 9851 : if (namecount > 0)
697 : : {
698 : 959 : int idx = 0;
699 : :
700 : : /*
701 : : * Now create an array to mark the attribute numbers of the keys that
702 : : * need to be converted from cstring to name.
703 : : */
146 michael@paquier.xyz 704 :GNC 959 : indexstate->ioss_NameCStringAttNums = palloc_array(AttrNumber, namecount);
705 : :
734 drowley@postgresql.o 706 [ + + ]:CBC 2933 : for (int attnum = 0; attnum < indnkeyatts; attnum++)
707 : : {
672 708 [ + + ]: 1974 : if (TupleDescAttr(indexRelation->rd_att, attnum)->atttypid == CSTRINGOID &&
734 709 [ + - ]: 959 : indexRelation->rd_opcintype[attnum] == NAMEOID)
710 : 959 : indexstate->ioss_NameCStringAttNums[idx++] = (AttrNumber) attnum;
711 : : }
712 : : }
713 : :
714 : 9851 : indexstate->ioss_NameCStringCount = namecount;
715 : :
716 : : /*
717 : : * all done.
718 : : */
5320 tgl@sss.pgh.pa.us 719 : 9851 : return indexstate;
720 : : }
721 : :
722 : : /* ----------------------------------------------------------------
723 : : * Parallel Index-only Scan Support
724 : : * ----------------------------------------------------------------
725 : : */
726 : :
727 : : /* ----------------------------------------------------------------
728 : : * ExecIndexOnlyScanEstimate
729 : : *
730 : : * Compute the amount of space we'll need in the parallel
731 : : * query DSM, and inform pcxt->estimator about our needs.
732 : : * ----------------------------------------------------------------
733 : : */
734 : : void
3362 rhaas@postgresql.org 735 : 30 : ExecIndexOnlyScanEstimate(IndexOnlyScanState *node,
736 : : ParallelContext *pcxt)
737 : : {
738 : 30 : EState *estate = node->ss.ps.state;
739 : :
740 : 30 : node->ioss_PscanLen = index_parallelscan_estimate(node->ioss_RelationDesc,
741 : : node->ioss_NumScanKeys,
742 : : node->ioss_NumOrderByKeys,
743 : : estate->es_snapshot);
744 : 30 : shm_toc_estimate_chunk(&pcxt->estimator, node->ioss_PscanLen);
745 : 30 : shm_toc_estimate_keys(&pcxt->estimator, 1);
3362 rhaas@postgresql.org 746 :GIC 30 : }
747 : :
748 : : /* ----------------------------------------------------------------
749 : : * ExecIndexOnlyScanInitializeDSM
750 : : *
751 : : * Set up a parallel index-only scan descriptor.
752 : : * ----------------------------------------------------------------
753 : : */
754 : : void
3362 rhaas@postgresql.org 755 :CBC 30 : ExecIndexOnlyScanInitializeDSM(IndexOnlyScanState *node,
756 : : ParallelContext *pcxt)
757 : : {
758 : 30 : EState *estate = node->ss.ps.state;
759 : : ParallelIndexScanDesc piscan;
760 : :
761 : 30 : piscan = shm_toc_allocate(pcxt->toc, node->ioss_PscanLen);
762 : 30 : index_parallelscan_initialize(node->ss.ss_currentRelation,
763 : : node->ioss_RelationDesc,
764 : : estate->es_snapshot,
765 : : piscan);
766 : 30 : shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, piscan);
767 : :
768 : 30 : node->ioss_ScanDesc =
769 [ + - ]: 30 : index_beginscan_parallel(node->ss.ss_currentRelation,
770 : : node->ioss_RelationDesc,
771 : : node->ioss_Instrument,
772 : : node->ioss_NumScanKeys,
773 : : node->ioss_NumOrderByKeys,
774 : : piscan,
36 melanieplageman@gmai 775 :GNC 30 : ScanRelIsReadOnly(&node->ss) ?
776 : : SO_HINT_REL_READ_ONLY : SO_NONE);
3362 rhaas@postgresql.org 777 :CBC 30 : node->ioss_ScanDesc->xs_want_itup = true;
778 : 30 : node->ioss_VMBuffer = InvalidBuffer;
779 : :
780 : : /*
781 : : * If no run-time keys to calculate or they are ready, go ahead and pass
782 : : * the scankeys to the index AM.
783 : : */
3345 784 [ - + - - ]: 30 : if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
3362 785 : 30 : index_rescan(node->ioss_ScanDesc,
3362 rhaas@postgresql.org 786 :ECB (23) : node->ioss_ScanKeys, node->ioss_NumScanKeys,
787 : (23) : node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
3362 rhaas@postgresql.org 788 :GIC 30 : }
789 : :
790 : : /* ----------------------------------------------------------------
791 : : * ExecIndexOnlyScanReInitializeDSM
792 : : *
793 : : * Reset shared state before beginning a fresh scan.
794 : : * ----------------------------------------------------------------
795 : : */
796 : : void
3170 tgl@sss.pgh.pa.us 797 :CBC 8 : ExecIndexOnlyScanReInitializeDSM(IndexOnlyScanState *node,
798 : : ParallelContext *pcxt)
799 : : {
420 pg@bowt.ie 800 [ - + ]: 8 : Assert(node->ss.ps.plan->parallel_aware);
3170 tgl@sss.pgh.pa.us 801 : 8 : index_parallelrescan(node->ioss_ScanDesc);
802 : 8 : }
803 : :
804 : : /* ----------------------------------------------------------------
805 : : * ExecIndexOnlyScanInitializeWorker
806 : : *
807 : : * Copy relevant information from TOC into planstate.
808 : : * ----------------------------------------------------------------
809 : : */
810 : : void
3092 andres@anarazel.de 811 : 136 : ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState *node,
812 : : ParallelWorkerContext *pwcxt)
813 : : {
814 : : ParallelIndexScanDesc piscan;
815 : :
816 : 136 : piscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
817 : :
3362 rhaas@postgresql.org 818 : 136 : node->ioss_ScanDesc =
819 [ + - ]: 136 : index_beginscan_parallel(node->ss.ss_currentRelation,
820 : : node->ioss_RelationDesc,
821 : : node->ioss_Instrument,
822 : : node->ioss_NumScanKeys,
823 : : node->ioss_NumOrderByKeys,
824 : : piscan,
36 melanieplageman@gmai 825 :GNC 136 : ScanRelIsReadOnly(&node->ss) ?
826 : : SO_HINT_REL_READ_ONLY : SO_NONE);
3362 rhaas@postgresql.org 827 :CBC 136 : node->ioss_ScanDesc->xs_want_itup = true;
828 : :
829 : : /*
830 : : * If no run-time keys to calculate or they are ready, go ahead and pass
831 : : * the scankeys to the index AM.
832 : : */
3345 833 [ - + - - ]: 136 : if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
3362 834 : 136 : index_rescan(node->ioss_ScanDesc,
3362 rhaas@postgresql.org 835 :ECB (103) : node->ioss_ScanKeys, node->ioss_NumScanKeys,
836 : (103) : node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
3362 rhaas@postgresql.org 837 :GIC 136 : }
838 : :
839 : : /*
840 : : * Compute the amount of space we'll need for the shared instrumentation and
841 : : * inform pcxt->estimator.
842 : : */
843 : : void
29 melanieplageman@gmai 844 :GNC 42 : ExecIndexOnlyScanInstrumentEstimate(IndexOnlyScanState *node,
845 : : ParallelContext *pcxt)
846 : : {
847 : : Size size;
848 : :
849 [ - + - - ]: 42 : if (!node->ss.ps.instrument || pcxt->nworkers == 0)
850 : 42 : return;
851 : :
852 : : /*
853 : : * This size calculation is trivial enough that we don't bother saving it
854 : : * in the IndexOnlyScanState. We'll recalculate the needed size in
855 : : * ExecIndexOnlyScanInstrumentInitDSM().
856 : : */
28 tomas.vondra@postgre 857 :UNC 0 : size = add_size(offsetof(SharedIndexScanInstrumentation, winstrument),
858 : 0 : mul_size(pcxt->nworkers, sizeof(IndexScanInstrumentation)));
29 melanieplageman@gmai 859 : 0 : shm_toc_estimate_chunk(&pcxt->estimator, size);
860 : 0 : shm_toc_estimate_keys(&pcxt->estimator, 1);
861 : : }
862 : :
863 : : /*
864 : : * Set up parallel index-only scan instrumentation.
865 : : */
866 : : void
29 melanieplageman@gmai 867 :GNC 42 : ExecIndexOnlyScanInstrumentInitDSM(IndexOnlyScanState *node,
868 : : ParallelContext *pcxt)
869 : : {
870 : : Size size;
871 : :
872 [ - + - - ]: 42 : if (!node->ss.ps.instrument || pcxt->nworkers == 0)
873 : 42 : return;
874 : :
28 tomas.vondra@postgre 875 :UNC 0 : size = add_size(offsetof(SharedIndexScanInstrumentation, winstrument),
876 : 0 : mul_size(pcxt->nworkers, sizeof(IndexScanInstrumentation)));
29 melanieplageman@gmai 877 : 0 : node->ioss_SharedInfo =
878 : 0 : (SharedIndexScanInstrumentation *) shm_toc_allocate(pcxt->toc, size);
879 : :
880 : : /* Each per-worker area must start out as zeroes */
881 : 0 : memset(node->ioss_SharedInfo, 0, size);
882 : 0 : node->ioss_SharedInfo->num_workers = pcxt->nworkers;
883 : 0 : shm_toc_insert(pcxt->toc,
884 : 0 : node->ss.ps.plan->plan_node_id +
885 : : PARALLEL_KEY_SCAN_INSTRUMENT_OFFSET,
886 : 0 : node->ioss_SharedInfo);
887 : : }
888 : :
889 : : /*
890 : : * Look up and save the location of the shared instrumentation.
891 : : */
892 : : void
29 melanieplageman@gmai 893 :GNC 168 : ExecIndexOnlyScanInstrumentInitWorker(IndexOnlyScanState *node,
894 : : ParallelWorkerContext *pwcxt)
895 : : {
896 [ + - ]: 168 : if (!node->ss.ps.instrument)
897 : 168 : return;
898 : :
29 melanieplageman@gmai 899 :UNC 0 : node->ioss_SharedInfo = (SharedIndexScanInstrumentation *)
900 : 0 : shm_toc_lookup(pwcxt->toc,
901 : 0 : node->ss.ps.plan->plan_node_id +
902 : : PARALLEL_KEY_SCAN_INSTRUMENT_OFFSET,
903 : : false);
904 : : }
905 : :
906 : : /* ----------------------------------------------------------------
907 : : * ExecIndexOnlyScanRetrieveInstrumentation
908 : : *
909 : : * Transfer index-only scan statistics from DSM to private memory.
910 : : * ----------------------------------------------------------------
911 : : */
912 : : void
420 pg@bowt.ie 913 :UBC 0 : ExecIndexOnlyScanRetrieveInstrumentation(IndexOnlyScanState *node)
914 : : {
915 : 0 : SharedIndexScanInstrumentation *SharedInfo = node->ioss_SharedInfo;
916 : : size_t size;
917 : :
918 [ # # ]: 0 : if (SharedInfo == NULL)
919 : 0 : return;
920 : :
921 : : /* Create a copy of SharedInfo in backend-local memory */
922 : 0 : size = offsetof(SharedIndexScanInstrumentation, winstrument) +
923 : 0 : SharedInfo->num_workers * sizeof(IndexScanInstrumentation);
924 : 0 : node->ioss_SharedInfo = palloc(size);
925 : 0 : memcpy(node->ioss_SharedInfo, SharedInfo, size);
926 : : }
|