Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * nodeIndexonlyscan.c
4 : : * Routines to support index-only scans
5 : : *
6 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/executor/nodeIndexonlyscan.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : /*
16 : : * INTERFACE ROUTINES
17 : : * ExecIndexOnlyScan scans an index
18 : : * IndexOnlyNext retrieve next tuple
19 : : * ExecInitIndexOnlyScan creates and initializes state info.
20 : : * ExecReScanIndexOnlyScan rescans the indexed relation.
21 : : * ExecEndIndexOnlyScan releases all storage.
22 : : * ExecIndexOnlyMarkPos marks scan position.
23 : : * ExecIndexOnlyRestrPos restores scan position.
24 : : * ExecIndexOnlyScanEstimate estimates DSM space needed for
25 : : * parallel index-only scan
26 : : * ExecIndexOnlyScanInitializeDSM initialize DSM for parallel
27 : : * index-only scan
28 : : * ExecIndexOnlyScanReInitializeDSM reinitialize DSM for fresh scan
29 : : * ExecIndexOnlyScanInitializeWorker attach to DSM info in parallel worker
30 : : */
31 : : #include "postgres.h"
32 : :
33 : : #include "access/genam.h"
34 : : #include "access/relscan.h"
35 : : #include "access/tableam.h"
36 : : #include "access/tupdesc.h"
37 : : #include "access/visibilitymap.h"
38 : : #include "catalog/pg_type.h"
39 : : #include "executor/executor.h"
40 : : #include "executor/nodeIndexonlyscan.h"
41 : : #include "executor/nodeIndexscan.h"
42 : : #include "miscadmin.h"
43 : : #include "storage/bufmgr.h"
44 : : #include "storage/predicate.h"
45 : : #include "utils/builtins.h"
46 : : #include "utils/rel.h"
47 : :
48 : :
49 : : static TupleTableSlot *IndexOnlyNext(IndexOnlyScanState *node);
50 : : static void StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot,
51 : : IndexTuple itup, TupleDesc itupdesc);
52 : :
53 : :
54 : : /* ----------------------------------------------------------------
55 : : * IndexOnlyNext
56 : : *
57 : : * Retrieve a tuple from the IndexOnlyScan node's index.
58 : : * ----------------------------------------------------------------
59 : : */
60 : : static TupleTableSlot *
5079 tgl@sss.pgh.pa.us 61 :CBC 2876512 : IndexOnlyNext(IndexOnlyScanState *node)
62 : : {
63 : : EState *estate;
64 : : ExprContext *econtext;
65 : : ScanDirection direction;
66 : : IndexScanDesc scandesc;
67 : : TupleTableSlot *slot;
68 : : ItemPointer tid;
69 : :
70 : : /*
71 : : * extract necessary information from index scan node
72 : : */
73 : 2876512 : estate = node->ss.ps.state;
74 : :
75 : : /*
76 : : * Determine which direction to scan the index in based on the plan's scan
77 : : * direction and the current direction of execution.
78 : : */
948 drowley@postgresql.o 79 : 2876512 : direction = ScanDirectionCombine(estate->es_direction,
80 : : ((IndexOnlyScan *) node->ss.ps.plan)->indexorderdir);
5079 tgl@sss.pgh.pa.us 81 : 2876512 : scandesc = node->ioss_ScanDesc;
82 : 2876512 : econtext = node->ss.ps.ps_ExprContext;
83 : 2876512 : slot = node->ss.ss_ScanTupleSlot;
84 : :
3104 rhaas@postgresql.org 85 [ + + ]: 2876512 : if (scandesc == NULL)
86 : : {
87 : : /*
88 : : * We reach here if the index only scan is not parallel, or if we're
89 : : * serially executing an index only scan that was planned to be
90 : : * parallel.
91 : : */
92 : 4504 : scandesc = index_beginscan(node->ss.ss_currentRelation,
93 : : node->ioss_RelationDesc,
94 : : estate->es_snapshot,
95 : : &node->ioss_Instrument,
96 : : node->ioss_NumScanKeys,
97 : : node->ioss_NumOrderByKeys);
98 : :
99 : 4504 : node->ioss_ScanDesc = scandesc;
100 : :
101 : :
102 : : /* Set it up for index-only scan */
103 : 4504 : node->ioss_ScanDesc->xs_want_itup = true;
104 : 4504 : node->ioss_VMBuffer = InvalidBuffer;
105 : :
106 : : /*
107 : : * If no run-time keys to calculate or they are ready, go ahead and
108 : : * pass the scankeys to the index AM.
109 : : */
110 [ + + + - ]: 4504 : if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
111 : 4504 : index_rescan(scandesc,
112 : 4504 : node->ioss_ScanKeys,
113 : : node->ioss_NumScanKeys,
114 : 4504 : node->ioss_OrderByKeys,
115 : : node->ioss_NumOrderByKeys);
116 : : }
117 : :
118 : : /*
119 : : * OK, now that we have what we need, fetch the next tuple.
120 : : */
5079 tgl@sss.pgh.pa.us 121 [ + + ]: 2928707 : while ((tid = index_getnext_tid(scandesc, direction)) != NULL)
122 : : {
2371 andres@anarazel.de 123 : 2827882 : bool tuple_from_heap = false;
124 : :
2965 125 [ - + ]: 2827882 : CHECK_FOR_INTERRUPTS();
126 : :
127 : : /*
128 : : * We can skip the heap fetch if the TID references a heap page on
129 : : * which all tuples are known visible to everybody. In any case,
130 : : * we'll use the index tuple not the heap tuple as the data source.
131 : : *
132 : : * Note on Memory Ordering Effects: visibilitymap_get_status does not
133 : : * lock the visibility map buffer, and therefore the result we read
134 : : * here could be slightly stale. However, it can't be stale enough to
135 : : * matter.
136 : : *
137 : : * We need to detect clearing a VM bit due to an insert right away,
138 : : * because the tuple is present in the index page but not visible. The
139 : : * reading of the TID by this scan (using a shared lock on the index
140 : : * buffer) is serialized with the insert of the TID into the index
141 : : * (using an exclusive lock on the index buffer). Because the VM bit
142 : : * is cleared before updating the index, and locking/unlocking of the
143 : : * index page acts as a full memory barrier, we are sure to see the
144 : : * cleared bit if we see a recently-inserted TID.
145 : : *
146 : : * Deletes do not update the index page (only VACUUM will clear out
147 : : * the TID), so the clearing of the VM bit by a delete is not
148 : : * serialized with this test below, and we may see a value that is
149 : : * significantly stale. However, we don't care about the delete right
150 : : * away, because the tuple is still visible until the deleting
151 : : * transaction commits or the statement ends (if it's our
152 : : * transaction). In either case, the lock on the VM buffer will have
153 : : * been released (acting as a write barrier) after clearing the bit.
154 : : * And for us to have a snapshot that includes the deleting
155 : : * transaction (making the tuple invisible), we must have acquired
156 : : * ProcArrayLock after that time, acting as a read barrier.
157 : : *
158 : : * It's worth going through this complexity to avoid needing to lock
159 : : * the VM buffer, which could cause significant contention.
160 : : */
3476 rhaas@postgresql.org 161 [ + + ]: 2827882 : if (!VM_ALL_VISIBLE(scandesc->heapRelation,
162 : : ItemPointerGetBlockNumber(tid),
163 : : &node->ioss_VMBuffer))
164 : : {
165 : : /*
166 : : * Rats, we have to visit the heap to check visibility.
167 : : */
2706 alvherre@alvh.no-ip. 168 [ + + ]: 1018747 : InstrCountTuples2(node, 1);
2284 heikki.linnakangas@i 169 [ + + ]: 1018747 : if (!index_fetch_heap(scandesc, node->ioss_TableSlot))
4836 bruce@momjian.us 170 : 52192 : continue; /* no visible tuple, try next index entry */
171 : :
2284 heikki.linnakangas@i 172 : 966555 : ExecClearTuple(node->ioss_TableSlot);
173 : :
174 : : /*
175 : : * Only MVCC snapshots are supported here, so there should be no
176 : : * need to keep following the HOT chain once a visible entry has
177 : : * been found. If we did want to allow that, we'd need to keep
178 : : * more state to remember not to call index_getnext_tid next time.
179 : : */
2371 andres@anarazel.de 180 [ - + ]: 966555 : if (scandesc->xs_heap_continue)
5079 tgl@sss.pgh.pa.us 181 [ # # ]:UBC 0 : elog(ERROR, "non-MVCC snapshots are not supported in index-only scans");
182 : :
183 : : /*
184 : : * Note: at this point we are holding a pin on the heap page, as
185 : : * recorded in scandesc->xs_cbuf. We could release that pin now,
186 : : * but it's not clear whether it's a win to do so. The next index
187 : : * entry might require a visit to the same heap page.
188 : : */
189 : :
2371 andres@anarazel.de 190 :CBC 966555 : tuple_from_heap = true;
191 : : }
192 : :
193 : : /*
194 : : * Fill the scan tuple slot with data from the index. This might be
195 : : * provided in either HeapTuple or IndexTuple format. Conceivably an
196 : : * index AM might fill both fields, in which case we prefer the heap
197 : : * format, since it's probably a bit cheaper to fill a slot from.
198 : : */
3113 tgl@sss.pgh.pa.us 199 [ + + ]: 2775690 : if (scandesc->xs_hitup)
200 : : {
201 : : /*
202 : : * We don't take the trouble to verify that the provided tuple has
203 : : * exactly the slot's format, but it seems worth doing a quick
204 : : * check on the number of fields.
205 : : */
206 [ - + ]: 718842 : Assert(slot->tts_tupleDescriptor->natts ==
207 : : scandesc->xs_hitupdesc->natts);
2332 andres@anarazel.de 208 : 718842 : ExecForceStoreHeapTuple(scandesc->xs_hitup, slot, false);
209 : : }
3113 tgl@sss.pgh.pa.us 210 [ + - ]: 2056848 : else if (scandesc->xs_itup)
493 drowley@postgresql.o 211 : 2056848 : StoreIndexTuple(node, slot, scandesc->xs_itup, scandesc->xs_itupdesc);
212 : : else
3113 tgl@sss.pgh.pa.us 213 [ # # ]:UBC 0 : elog(ERROR, "no data returned for index-only scan");
214 : :
215 : : /*
216 : : * If the index was lossy, we have to recheck the index quals.
217 : : */
5079 tgl@sss.pgh.pa.us 218 [ + + ]:CBC 2775690 : if (scandesc->xs_recheck)
219 : : {
220 : 7 : econtext->ecxt_scantuple = slot;
1342 221 [ + + ]: 7 : if (!ExecQualAndReset(node->recheckqual, econtext))
222 : : {
223 : : /* Fails recheck, so drop it and loop back for another */
5079 224 [ - + ]: 3 : InstrCountFiltered2(node, 1);
225 : 3 : continue;
226 : : }
227 : : }
228 : :
229 : : /*
230 : : * We don't currently support rechecking ORDER BY distances. (In
231 : : * principle, if the index can support retrieval of the originally
232 : : * indexed value, it should be able to produce an exact distance
233 : : * calculation too. So it's not clear that adding code here for
234 : : * recheck/re-sort would be worth the trouble. But we should at least
235 : : * throw an error if someone tries it.)
236 : : */
3759 237 [ + + + + ]: 2775687 : if (scandesc->numberOfOrderBys > 0 && scandesc->xs_recheckorderby)
238 [ + - ]: 3 : ereport(ERROR,
239 : : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
240 : : errmsg("lossy distance functions are not supported in index-only scans")));
241 : :
242 : : /*
243 : : * If we didn't access the heap, then we'll need to take a predicate
244 : : * lock explicitly, as if we had. For now we do that at page level.
245 : : */
2371 andres@anarazel.de 246 [ + + ]: 2775684 : if (!tuple_from_heap)
4750 kgrittn@postgresql.o 247 : 1809135 : PredicateLockPage(scandesc->heapRelation,
248 : : ItemPointerGetBlockNumber(tid),
249 : : estate->es_snapshot);
250 : :
5079 tgl@sss.pgh.pa.us 251 : 2775684 : return slot;
252 : : }
253 : :
254 : : /*
255 : : * if we get here it means the index scan failed so we are at the end of
256 : : * the scan..
257 : : */
258 : 100825 : return ExecClearTuple(slot);
259 : : }
260 : :
261 : : /*
262 : : * StoreIndexTuple
263 : : * Fill the slot with data from the index tuple.
264 : : *
265 : : * At some point this might be generally-useful functionality, but
266 : : * right now we don't need it elsewhere.
267 : : */
268 : : static void
493 drowley@postgresql.o 269 : 2056848 : StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot,
270 : : IndexTuple itup, TupleDesc itupdesc)
271 : : {
272 : : /*
273 : : * Note: we must use the tupdesc supplied by the AM in index_deform_tuple,
274 : : * not the slot's tupdesc, in case the latter has different datatypes
275 : : * (this happens for btree name_ops in particular). They'd better have
276 : : * the same number of columns though, as well as being datatype-compatible
277 : : * which is something we can't so easily check.
278 : : */
2379 tgl@sss.pgh.pa.us 279 [ - + ]: 2056848 : Assert(slot->tts_tupleDescriptor->natts == itupdesc->natts);
280 : :
5079 281 : 2056848 : ExecClearTuple(slot);
2379 282 : 2056848 : index_deform_tuple(itup, itupdesc, slot->tts_values, slot->tts_isnull);
283 : :
284 : : /*
285 : : * Copy all name columns stored as cstrings back into a NAMEDATALEN byte
286 : : * sized allocation. We mark this branch as unlikely as generally "name"
287 : : * is used only for the system catalogs and this would have to be a user
288 : : * query running on those or some other user table with an index on a name
289 : : * column.
290 : : */
493 drowley@postgresql.o 291 [ + + ]: 2056848 : if (unlikely(node->ioss_NameCStringAttNums != NULL))
292 : : {
293 : 1036 : int attcount = node->ioss_NameCStringCount;
294 : :
295 [ + + ]: 2072 : for (int idx = 0; idx < attcount; idx++)
296 : : {
297 : 1036 : int attnum = node->ioss_NameCStringAttNums[idx];
298 : : Name name;
299 : :
300 : : /* skip null Datums */
301 [ - + ]: 1036 : if (slot->tts_isnull[attnum])
493 drowley@postgresql.o 302 :UBC 0 : continue;
303 : :
304 : : /* allocate the NAMEDATALEN and copy the datum into that memory */
493 drowley@postgresql.o 305 :CBC 1036 : name = (Name) MemoryContextAlloc(node->ss.ps.ps_ExprContext->ecxt_per_tuple_memory,
306 : : NAMEDATALEN);
307 : :
308 : : /* use namestrcpy to zero-pad all trailing bytes */
309 : 1036 : namestrcpy(name, DatumGetCString(slot->tts_values[attnum]));
310 : 1036 : slot->tts_values[attnum] = NameGetDatum(name);
311 : : }
312 : : }
313 : :
5079 tgl@sss.pgh.pa.us 314 : 2056848 : ExecStoreVirtualTuple(slot);
315 : 2056848 : }
316 : :
317 : : /*
318 : : * IndexOnlyRecheck -- access method routine to recheck a tuple in EvalPlanQual
319 : : *
320 : : * This can't really happen, since an index can't supply CTID which would
321 : : * be necessary data for any potential EvalPlanQual target relation. If it
322 : : * did happen, the EPQ code would pass us the wrong data, namely a heap
323 : : * tuple not an index tuple. So throw an error.
324 : : */
325 : : static bool
5079 tgl@sss.pgh.pa.us 326 :UBC 0 : IndexOnlyRecheck(IndexOnlyScanState *node, TupleTableSlot *slot)
327 : : {
328 [ # # ]: 0 : elog(ERROR, "EvalPlanQual recheck is not supported in index-only scans");
329 : : return false; /* keep compiler quiet */
330 : : }
331 : :
332 : : /* ----------------------------------------------------------------
333 : : * ExecIndexOnlyScan(node)
334 : : * ----------------------------------------------------------------
335 : : */
336 : : static TupleTableSlot *
2973 andres@anarazel.de 337 :CBC 2726133 : ExecIndexOnlyScan(PlanState *pstate)
338 : : {
339 : 2726133 : IndexOnlyScanState *node = castNode(IndexOnlyScanState, pstate);
340 : :
341 : : /*
342 : : * If we have runtime keys and they've not already been set up, do it now.
343 : : */
5079 tgl@sss.pgh.pa.us 344 [ + + + + ]: 2726133 : if (node->ioss_NumRuntimeKeys != 0 && !node->ioss_RuntimeKeysReady)
345 : 276 : ExecReScan((PlanState *) node);
346 : :
347 : 2726133 : return ExecScan(&node->ss,
348 : : (ExecScanAccessMtd) IndexOnlyNext,
349 : : (ExecScanRecheckMtd) IndexOnlyRecheck);
350 : : }
351 : :
352 : : /* ----------------------------------------------------------------
353 : : * ExecReScanIndexOnlyScan(node)
354 : : *
355 : : * Recalculates the values of any scan keys whose value depends on
356 : : * information known at runtime, then rescans the indexed relation.
357 : : *
358 : : * Updating the scan key was formerly done separately in
359 : : * ExecUpdateIndexScanKeys. Integrating it into ReScan makes
360 : : * rescans of indices and relations/general streams more uniform.
361 : : * ----------------------------------------------------------------
362 : : */
363 : : void
364 : 110722 : ExecReScanIndexOnlyScan(IndexOnlyScanState *node)
365 : : {
366 : : /*
367 : : * If we are doing runtime key calculations (ie, any of the index key
368 : : * values weren't simple Consts), compute the new key values. But first,
369 : : * reset the context so we don't leak memory as each outer tuple is
370 : : * scanned. Note this assumes that we will recalculate *all* runtime keys
371 : : * on each call.
372 : : */
373 [ + + ]: 110722 : if (node->ioss_NumRuntimeKeys != 0)
374 : : {
375 : 110625 : ExprContext *econtext = node->ioss_RuntimeContext;
376 : :
377 : 110625 : ResetExprContext(econtext);
378 : 110625 : ExecIndexEvalRuntimeKeys(econtext,
379 : : node->ioss_RuntimeKeys,
380 : : node->ioss_NumRuntimeKeys);
381 : : }
382 : 110722 : node->ioss_RuntimeKeysReady = true;
383 : :
384 : : /* reset index scan */
3121 rhaas@postgresql.org 385 [ + + ]: 110722 : if (node->ioss_ScanDesc)
386 : 109674 : index_rescan(node->ioss_ScanDesc,
387 : 109674 : node->ioss_ScanKeys, node->ioss_NumScanKeys,
388 : 109674 : node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
389 : :
5079 tgl@sss.pgh.pa.us 390 : 110722 : ExecScanReScan(&node->ss);
391 : 110722 : }
392 : :
393 : :
394 : : /* ----------------------------------------------------------------
395 : : * ExecEndIndexOnlyScan
396 : : * ----------------------------------------------------------------
397 : : */
398 : : void
399 : 7782 : ExecEndIndexOnlyScan(IndexOnlyScanState *node)
400 : : {
401 : : Relation indexRelationDesc;
402 : : IndexScanDesc indexScanDesc;
403 : :
404 : : /*
405 : : * extract information from the node
406 : : */
407 : 7782 : indexRelationDesc = node->ioss_RelationDesc;
408 : 7782 : indexScanDesc = node->ioss_ScanDesc;
409 : :
410 : : /* Release VM buffer pin, if any. */
411 [ + + ]: 7782 : if (node->ioss_VMBuffer != InvalidBuffer)
412 : : {
413 : 2400 : ReleaseBuffer(node->ioss_VMBuffer);
414 : 2400 : node->ioss_VMBuffer = InvalidBuffer;
415 : : }
416 : :
417 : : /*
418 : : * When ending a parallel worker, copy the statistics gathered by the
419 : : * worker back into shared memory so that it can be picked up by the main
420 : : * process to report in EXPLAIN ANALYZE
421 : : */
179 pg@bowt.ie 422 [ - + - - ]: 7782 : if (node->ioss_SharedInfo != NULL && IsParallelWorker())
423 : : {
424 : : IndexScanInstrumentation *winstrument;
425 : :
179 pg@bowt.ie 426 [ # # ]:UBC 0 : Assert(ParallelWorkerNumber <= node->ioss_SharedInfo->num_workers);
427 : 0 : winstrument = &node->ioss_SharedInfo->winstrument[ParallelWorkerNumber];
428 : :
429 : : /*
430 : : * We have to accumulate the stats rather than performing a memcpy.
431 : : * When a Gather/GatherMerge node finishes it will perform planner
432 : : * shutdown on the workers. On rescan it will spin up new workers
433 : : * which will have a new IndexOnlyScanState and zeroed stats.
434 : : */
435 : 0 : winstrument->nsearches += node->ioss_Instrument.nsearches;
436 : : }
437 : :
438 : : /*
439 : : * close the index relation (no-op if we didn't open it)
440 : : */
5079 tgl@sss.pgh.pa.us 441 [ + + ]:CBC 7782 : if (indexScanDesc)
442 : 4605 : index_endscan(indexScanDesc);
443 [ + + ]: 7782 : if (indexRelationDesc)
444 : 6441 : index_close(indexRelationDesc, NoLock);
445 : 7782 : }
446 : :
447 : : /* ----------------------------------------------------------------
448 : : * ExecIndexOnlyMarkPos
449 : : *
450 : : * Note: we assume that no caller attempts to set a mark before having read
451 : : * at least one tuple. Otherwise, ioss_ScanDesc might still be NULL.
452 : : * ----------------------------------------------------------------
453 : : */
454 : : void
455 : 62019 : ExecIndexOnlyMarkPos(IndexOnlyScanState *node)
456 : : {
2779 457 : 62019 : EState *estate = node->ss.ps.state;
2193 andres@anarazel.de 458 : 62019 : EPQState *epqstate = estate->es_epq_active;
459 : :
460 [ - + ]: 62019 : if (epqstate != NULL)
461 : : {
462 : : /*
463 : : * We are inside an EvalPlanQual recheck. If a test tuple exists for
464 : : * this relation, then we shouldn't access the index at all. We would
465 : : * instead need to save, and later restore, the state of the
466 : : * relsubs_done flag, so that re-fetching the test tuple is possible.
467 : : * However, given the assumption that no caller sets a mark at the
468 : : * start of the scan, we can only get here with relsubs_done[i]
469 : : * already set, and so no state need be saved.
470 : : */
2779 tgl@sss.pgh.pa.us 471 :UBC 0 : Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
472 : :
473 [ # # ]: 0 : Assert(scanrelid > 0);
2193 andres@anarazel.de 474 [ # # ]: 0 : if (epqstate->relsubs_slot[scanrelid - 1] != NULL ||
475 [ # # ]: 0 : epqstate->relsubs_rowmark[scanrelid - 1] != NULL)
476 : : {
477 : : /* Verify the claim above */
478 [ # # ]: 0 : if (!epqstate->relsubs_done[scanrelid - 1])
2779 tgl@sss.pgh.pa.us 479 [ # # ]: 0 : elog(ERROR, "unexpected ExecIndexOnlyMarkPos call in EPQ recheck");
480 : 0 : return;
481 : : }
482 : : }
483 : :
5079 tgl@sss.pgh.pa.us 484 :CBC 62019 : index_markpos(node->ioss_ScanDesc);
485 : : }
486 : :
487 : : /* ----------------------------------------------------------------
488 : : * ExecIndexOnlyRestrPos
489 : : * ----------------------------------------------------------------
490 : : */
491 : : void
5079 tgl@sss.pgh.pa.us 492 :UBC 0 : ExecIndexOnlyRestrPos(IndexOnlyScanState *node)
493 : : {
2779 494 : 0 : EState *estate = node->ss.ps.state;
2193 andres@anarazel.de 495 : 0 : EPQState *epqstate = estate->es_epq_active;
496 : :
497 [ # # ]: 0 : if (estate->es_epq_active != NULL)
498 : : {
499 : : /* See comments in ExecIndexMarkPos */
2779 tgl@sss.pgh.pa.us 500 : 0 : Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
501 : :
502 [ # # ]: 0 : Assert(scanrelid > 0);
2193 andres@anarazel.de 503 [ # # ]: 0 : if (epqstate->relsubs_slot[scanrelid - 1] != NULL ||
504 [ # # ]: 0 : epqstate->relsubs_rowmark[scanrelid - 1] != NULL)
505 : : {
506 : : /* Verify the claim above */
507 [ # # ]: 0 : if (!epqstate->relsubs_done[scanrelid - 1])
2779 tgl@sss.pgh.pa.us 508 [ # # ]: 0 : elog(ERROR, "unexpected ExecIndexOnlyRestrPos call in EPQ recheck");
509 : 0 : return;
510 : : }
511 : : }
512 : :
5079 513 : 0 : index_restrpos(node->ioss_ScanDesc);
514 : : }
515 : :
516 : : /* ----------------------------------------------------------------
517 : : * ExecInitIndexOnlyScan
518 : : *
519 : : * Initializes the index scan's state information, creates
520 : : * scan keys, and opens the base and index relations.
521 : : *
522 : : * Note: index scans have 2 sets of state information because
523 : : * we have to keep track of the base relation and the
524 : : * index relation.
525 : : * ----------------------------------------------------------------
526 : : */
527 : : IndexOnlyScanState *
5079 tgl@sss.pgh.pa.us 528 :CBC 7807 : ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
529 : : {
530 : : IndexOnlyScanState *indexstate;
531 : : Relation currentRelation;
532 : : Relation indexRelation;
533 : : LOCKMODE lockmode;
534 : : TupleDesc tupDesc;
535 : : int indnkeyatts;
536 : : int namecount;
537 : :
538 : : /*
539 : : * create state structure
540 : : */
541 : 7807 : indexstate = makeNode(IndexOnlyScanState);
542 : 7807 : indexstate->ss.ps.plan = (Plan *) node;
543 : 7807 : indexstate->ss.ps.state = estate;
2973 andres@anarazel.de 544 : 7807 : indexstate->ss.ps.ExecProcNode = ExecIndexOnlyScan;
545 : :
546 : : /*
547 : : * Miscellaneous initialization
548 : : *
549 : : * create expression context for node
550 : : */
5079 tgl@sss.pgh.pa.us 551 : 7807 : ExecAssignExprContext(estate, &indexstate->ss.ps);
552 : :
553 : : /*
554 : : * open the scan relation
555 : : */
4515 556 : 7807 : currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
557 : :
5079 558 : 7807 : indexstate->ss.ss_currentRelation = currentRelation;
559 : 7807 : indexstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */
560 : :
561 : : /*
562 : : * Build the scan tuple type using the indextlist generated by the
563 : : * planner. We use this, rather than the index's physical tuple
564 : : * descriptor, because the latter contains storage column types not the
565 : : * types of the original datums. (It's the AM's responsibility to return
566 : : * suitable data anyway.)
567 : : */
2482 andres@anarazel.de 568 : 7807 : tupDesc = ExecTypeFromTL(node->indextlist);
2371 569 : 7807 : ExecInitScanTupleSlot(estate, &indexstate->ss, tupDesc,
570 : : &TTSOpsVirtual);
571 : :
572 : : /*
573 : : * We need another slot, in a format that's suitable for the table AM, for
574 : : * when we need to fetch a tuple from the table for rechecking visibility.
575 : : */
2284 heikki.linnakangas@i 576 : 7807 : indexstate->ioss_TableSlot =
577 : 7807 : ExecAllocTableSlot(&estate->es_tupleTable,
578 : : RelationGetDescr(currentRelation),
579 : : table_slot_callbacks(currentRelation));
580 : :
581 : : /*
582 : : * Initialize result type and projection info. The node's targetlist will
583 : : * contain Vars with varno = INDEX_VAR, referencing the scan tuple.
584 : : */
2493 andres@anarazel.de 585 : 7807 : ExecInitResultTypeTL(&indexstate->ss.ps);
3772 tgl@sss.pgh.pa.us 586 : 7807 : ExecAssignScanProjectionInfoWithVarno(&indexstate->ss, INDEX_VAR);
587 : :
588 : : /*
589 : : * initialize child expressions
590 : : *
591 : : * Note: we don't initialize all of the indexorderby expression, only the
592 : : * sub-parts corresponding to runtime keys (see below).
593 : : */
2759 andres@anarazel.de 594 : 7807 : indexstate->ss.ps.qual =
595 : 7807 : ExecInitQual(node->scan.plan.qual, (PlanState *) indexstate);
1342 tgl@sss.pgh.pa.us 596 : 7807 : indexstate->recheckqual =
597 : 7807 : ExecInitQual(node->recheckqual, (PlanState *) indexstate);
598 : :
599 : : /*
600 : : * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop
601 : : * here. This allows an index-advisor plugin to EXPLAIN a plan containing
602 : : * references to nonexistent indexes.
603 : : */
5079 604 [ + + ]: 7807 : if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
605 : 1341 : return indexstate;
606 : :
607 : : /* Open the index relation. */
2347 608 : 6466 : lockmode = exec_rt_fetch(node->scan.scanrelid, estate)->rellockmode;
493 drowley@postgresql.o 609 : 6466 : indexRelation = index_open(node->indexid, lockmode);
610 : 6466 : indexstate->ioss_RelationDesc = indexRelation;
611 : :
612 : : /*
613 : : * Initialize index-specific scan state
614 : : */
5079 tgl@sss.pgh.pa.us 615 : 6466 : indexstate->ioss_RuntimeKeysReady = false;
616 : 6466 : indexstate->ioss_RuntimeKeys = NULL;
617 : 6466 : indexstate->ioss_NumRuntimeKeys = 0;
618 : :
619 : : /*
620 : : * build the index scan keys from the index qualification
621 : : */
622 : 6466 : ExecIndexBuildScanKeys((PlanState *) indexstate,
623 : : indexRelation,
624 : : node->indexqual,
625 : : false,
626 : 6466 : &indexstate->ioss_ScanKeys,
627 : : &indexstate->ioss_NumScanKeys,
628 : : &indexstate->ioss_RuntimeKeys,
629 : : &indexstate->ioss_NumRuntimeKeys,
630 : : NULL, /* no ArrayKeys */
631 : : NULL);
632 : :
633 : : /*
634 : : * any ORDER BY exprs have to be turned into scankeys in the same way
635 : : */
636 : 6466 : ExecIndexBuildScanKeys((PlanState *) indexstate,
637 : : indexRelation,
638 : : node->indexorderby,
639 : : true,
640 : 6466 : &indexstate->ioss_OrderByKeys,
641 : : &indexstate->ioss_NumOrderByKeys,
642 : : &indexstate->ioss_RuntimeKeys,
643 : : &indexstate->ioss_NumRuntimeKeys,
644 : : NULL, /* no ArrayKeys */
645 : : NULL);
646 : :
647 : : /*
648 : : * If we have runtime keys, we need an ExprContext to evaluate them. The
649 : : * node's standard context won't do because we want to reset that context
650 : : * for every tuple. So, build another context just like the other one...
651 : : * -tgl 7/11/00
652 : : */
653 [ + + ]: 6466 : if (indexstate->ioss_NumRuntimeKeys != 0)
654 : : {
655 : 1291 : ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
656 : :
657 : 1291 : ExecAssignExprContext(estate, &indexstate->ss.ps);
658 : 1291 : indexstate->ioss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
659 : 1291 : indexstate->ss.ps.ps_ExprContext = stdecontext;
660 : : }
661 : : else
662 : : {
663 : 5175 : indexstate->ioss_RuntimeContext = NULL;
664 : : }
665 : :
493 drowley@postgresql.o 666 : 6466 : indexstate->ioss_NameCStringAttNums = NULL;
667 : 6466 : indnkeyatts = indexRelation->rd_index->indnkeyatts;
668 : 6466 : namecount = 0;
669 : :
670 : : /*
671 : : * The "name" type for btree uses text_ops which results in storing
672 : : * cstrings in the indexed keys rather than names. Here we detect that in
673 : : * a generic way in case other index AMs want to do the same optimization.
674 : : * Check for opclasses with an opcintype of NAMEOID and an index tuple
675 : : * descriptor with CSTRINGOID. If any of these are found, create an array
676 : : * marking the index attribute number of each of them. StoreIndexTuple()
677 : : * handles copying the name Datums into a NAMEDATALEN-byte allocation.
678 : : */
679 : :
680 : : /* First, count the number of such index keys */
681 [ + + ]: 14781 : for (int attnum = 0; attnum < indnkeyatts; attnum++)
682 : : {
431 683 [ + + ]: 8315 : if (TupleDescAttr(indexRelation->rd_att, attnum)->atttypid == CSTRINGOID &&
493 684 [ + - ]: 910 : indexRelation->rd_opcintype[attnum] == NAMEOID)
685 : 910 : namecount++;
686 : : }
687 : :
688 [ + + ]: 6466 : if (namecount > 0)
689 : : {
690 : 910 : int idx = 0;
691 : :
692 : : /*
693 : : * Now create an array to mark the attribute numbers of the keys that
694 : : * need to be converted from cstring to name.
695 : : */
696 : 910 : indexstate->ioss_NameCStringAttNums = (AttrNumber *)
697 : 910 : palloc(sizeof(AttrNumber) * namecount);
698 : :
699 [ + + ]: 2769 : for (int attnum = 0; attnum < indnkeyatts; attnum++)
700 : : {
431 701 [ + + ]: 1859 : if (TupleDescAttr(indexRelation->rd_att, attnum)->atttypid == CSTRINGOID &&
493 702 [ + - ]: 910 : indexRelation->rd_opcintype[attnum] == NAMEOID)
703 : 910 : indexstate->ioss_NameCStringAttNums[idx++] = (AttrNumber) attnum;
704 : : }
705 : : }
706 : :
707 : 6466 : indexstate->ioss_NameCStringCount = namecount;
708 : :
709 : : /*
710 : : * all done.
711 : : */
5079 tgl@sss.pgh.pa.us 712 : 6466 : return indexstate;
713 : : }
714 : :
715 : : /* ----------------------------------------------------------------
716 : : * Parallel Index-only Scan Support
717 : : * ----------------------------------------------------------------
718 : : */
719 : :
720 : : /* ----------------------------------------------------------------
721 : : * ExecIndexOnlyScanEstimate
722 : : *
723 : : * Compute the amount of space we'll need in the parallel
724 : : * query DSM, and inform pcxt->estimator about our needs.
725 : : * ----------------------------------------------------------------
726 : : */
727 : : void
3121 rhaas@postgresql.org 728 : 29 : ExecIndexOnlyScanEstimate(IndexOnlyScanState *node,
729 : : ParallelContext *pcxt)
730 : : {
731 : 29 : EState *estate = node->ss.ps.state;
179 pg@bowt.ie 732 : 29 : bool instrument = (node->ss.ps.instrument != NULL);
733 : 29 : bool parallel_aware = node->ss.ps.plan->parallel_aware;
734 : :
735 [ + - + + ]: 29 : if (!instrument && !parallel_aware)
736 : : {
737 : : /* No DSM required by the scan */
738 : 6 : return;
739 : : }
740 : :
3121 rhaas@postgresql.org 741 : 23 : node->ioss_PscanLen = index_parallelscan_estimate(node->ioss_RelationDesc,
742 : : node->ioss_NumScanKeys,
743 : : node->ioss_NumOrderByKeys,
744 : : estate->es_snapshot,
745 : : instrument, parallel_aware,
746 : : pcxt->nworkers);
747 : 23 : shm_toc_estimate_chunk(&pcxt->estimator, node->ioss_PscanLen);
748 : 23 : shm_toc_estimate_keys(&pcxt->estimator, 1);
749 : : }
750 : :
751 : : /* ----------------------------------------------------------------
752 : : * ExecIndexOnlyScanInitializeDSM
753 : : *
754 : : * Set up a parallel index-only scan descriptor.
755 : : * ----------------------------------------------------------------
756 : : */
757 : : void
758 : 29 : ExecIndexOnlyScanInitializeDSM(IndexOnlyScanState *node,
759 : : ParallelContext *pcxt)
760 : : {
761 : 29 : EState *estate = node->ss.ps.state;
762 : : ParallelIndexScanDesc piscan;
179 pg@bowt.ie 763 : 29 : bool instrument = node->ss.ps.instrument != NULL;
764 : 29 : bool parallel_aware = node->ss.ps.plan->parallel_aware;
765 : :
766 [ + - + + ]: 29 : if (!instrument && !parallel_aware)
767 : : {
768 : : /* No DSM required by the scan */
769 : 6 : return;
770 : : }
771 : :
3121 rhaas@postgresql.org 772 : 23 : piscan = shm_toc_allocate(pcxt->toc, node->ioss_PscanLen);
773 : 23 : index_parallelscan_initialize(node->ss.ss_currentRelation,
774 : : node->ioss_RelationDesc,
775 : : estate->es_snapshot,
776 : : instrument, parallel_aware, pcxt->nworkers,
777 : : &node->ioss_SharedInfo, piscan);
778 : 23 : shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, piscan);
779 : :
179 pg@bowt.ie 780 [ - + ]: 23 : if (!parallel_aware)
781 : : {
782 : : /* Only here to initialize SharedInfo in DSM */
179 pg@bowt.ie 783 :UBC 0 : return;
784 : : }
785 : :
3121 rhaas@postgresql.org 786 :CBC 23 : node->ioss_ScanDesc =
787 : 23 : index_beginscan_parallel(node->ss.ss_currentRelation,
788 : : node->ioss_RelationDesc,
789 : : &node->ioss_Instrument,
790 : : node->ioss_NumScanKeys,
791 : : node->ioss_NumOrderByKeys,
792 : : piscan);
793 : 23 : node->ioss_ScanDesc->xs_want_itup = true;
794 : 23 : node->ioss_VMBuffer = InvalidBuffer;
795 : :
796 : : /*
797 : : * If no run-time keys to calculate or they are ready, go ahead and pass
798 : : * the scankeys to the index AM.
799 : : */
3104 800 [ - + - - ]: 23 : if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
3121 801 : 23 : index_rescan(node->ioss_ScanDesc,
802 : 23 : node->ioss_ScanKeys, node->ioss_NumScanKeys,
803 : 23 : node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
804 : : }
805 : :
806 : : /* ----------------------------------------------------------------
807 : : * ExecIndexOnlyScanReInitializeDSM
808 : : *
809 : : * Reset shared state before beginning a fresh scan.
810 : : * ----------------------------------------------------------------
811 : : */
812 : : void
2929 tgl@sss.pgh.pa.us 813 : 6 : ExecIndexOnlyScanReInitializeDSM(IndexOnlyScanState *node,
814 : : ParallelContext *pcxt)
815 : : {
179 pg@bowt.ie 816 [ - + ]: 6 : Assert(node->ss.ps.plan->parallel_aware);
2929 tgl@sss.pgh.pa.us 817 : 6 : index_parallelrescan(node->ioss_ScanDesc);
818 : 6 : }
819 : :
820 : : /* ----------------------------------------------------------------
821 : : * ExecIndexOnlyScanInitializeWorker
822 : : *
823 : : * Copy relevant information from TOC into planstate.
824 : : * ----------------------------------------------------------------
825 : : */
826 : : void
2851 andres@anarazel.de 827 : 121 : ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState *node,
828 : : ParallelWorkerContext *pwcxt)
829 : : {
830 : : ParallelIndexScanDesc piscan;
179 pg@bowt.ie 831 : 121 : bool instrument = node->ss.ps.instrument != NULL;
832 : 121 : bool parallel_aware = node->ss.ps.plan->parallel_aware;
833 : :
834 [ + - + + ]: 121 : if (!instrument && !parallel_aware)
835 : : {
836 : : /* No DSM required by the scan */
837 : 18 : return;
838 : : }
839 : :
2851 andres@anarazel.de 840 : 103 : piscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
841 : :
179 pg@bowt.ie 842 [ - + ]: 103 : if (instrument)
179 pg@bowt.ie 843 :UBC 0 : node->ioss_SharedInfo = (SharedIndexScanInstrumentation *)
844 : 0 : OffsetToPointer(piscan, piscan->ps_offset_ins);
845 : :
179 pg@bowt.ie 846 [ - + ]:CBC 103 : if (!parallel_aware)
847 : : {
848 : : /* Only here to set up worker node's SharedInfo */
179 pg@bowt.ie 849 :UBC 0 : return;
850 : : }
851 : :
3121 rhaas@postgresql.org 852 :CBC 103 : node->ioss_ScanDesc =
853 : 103 : index_beginscan_parallel(node->ss.ss_currentRelation,
854 : : node->ioss_RelationDesc,
855 : : &node->ioss_Instrument,
856 : : node->ioss_NumScanKeys,
857 : : node->ioss_NumOrderByKeys,
858 : : piscan);
859 : 103 : node->ioss_ScanDesc->xs_want_itup = true;
860 : :
861 : : /*
862 : : * If no run-time keys to calculate or they are ready, go ahead and pass
863 : : * the scankeys to the index AM.
864 : : */
3104 865 [ - + - - ]: 103 : if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
3121 866 : 103 : index_rescan(node->ioss_ScanDesc,
867 : 103 : node->ioss_ScanKeys, node->ioss_NumScanKeys,
868 : 103 : node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
869 : : }
870 : :
871 : : /* ----------------------------------------------------------------
872 : : * ExecIndexOnlyScanRetrieveInstrumentation
873 : : *
874 : : * Transfer index-only scan statistics from DSM to private memory.
875 : : * ----------------------------------------------------------------
876 : : */
877 : : void
179 pg@bowt.ie 878 :UBC 0 : ExecIndexOnlyScanRetrieveInstrumentation(IndexOnlyScanState *node)
879 : : {
880 : 0 : SharedIndexScanInstrumentation *SharedInfo = node->ioss_SharedInfo;
881 : : size_t size;
882 : :
883 [ # # ]: 0 : if (SharedInfo == NULL)
884 : 0 : return;
885 : :
886 : : /* Create a copy of SharedInfo in backend-local memory */
887 : 0 : size = offsetof(SharedIndexScanInstrumentation, winstrument) +
888 : 0 : SharedInfo->num_workers * sizeof(IndexScanInstrumentation);
889 : 0 : node->ioss_SharedInfo = palloc(size);
890 : 0 : memcpy(node->ioss_SharedInfo, SharedInfo, size);
891 : : }
|