Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * genam.c
4 : : * general index access method routines
5 : : *
6 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/access/index/genam.c
12 : : *
13 : : * NOTES
14 : : * many of the old access method routines have been turned into
15 : : * macros and moved to genam.h -cim 4/30/91
16 : : *
17 : : *-------------------------------------------------------------------------
18 : : */
19 : :
20 : : #include "postgres.h"
21 : :
22 : : #include "access/genam.h"
23 : : #include "access/heapam.h"
24 : : #include "access/relscan.h"
25 : : #include "access/tableam.h"
26 : : #include "access/transam.h"
27 : : #include "catalog/index.h"
28 : : #include "lib/stringinfo.h"
29 : : #include "miscadmin.h"
30 : : #include "storage/bufmgr.h"
31 : : #include "storage/procarray.h"
32 : : #include "utils/acl.h"
33 : : #include "utils/injection_point.h"
34 : : #include "utils/lsyscache.h"
35 : : #include "utils/rel.h"
36 : : #include "utils/rls.h"
37 : : #include "utils/ruleutils.h"
38 : : #include "utils/snapmgr.h"
39 : :
40 : :
41 : : /* ----------------------------------------------------------------
42 : : * general access method routines
43 : : *
44 : : * All indexed access methods use an identical scan structure.
45 : : * We don't know how the various AMs do locking, however, so we don't
46 : : * do anything about that here.
47 : : *
48 : : * The intent is that an AM implementor will define a beginscan routine
49 : : * that calls RelationGetIndexScan, to fill in the scan, and then does
50 : : * whatever kind of locking he wants.
51 : : *
52 : : * At the end of a scan, the AM's endscan routine undoes the locking,
53 : : * but does *not* call IndexScanEnd --- the higher-level index_endscan
54 : : * routine does that. (We can't do it in the AM because index_endscan
55 : : * still needs to touch the IndexScanDesc after calling the AM.)
56 : : *
57 : : * Because of this, the AM does not have a choice whether to call
58 : : * RelationGetIndexScan or not; its beginscan routine must return an
59 : : * object made by RelationGetIndexScan. This is kinda ugly but not
60 : : * worth cleaning up now.
61 : : * ----------------------------------------------------------------
62 : : */
63 : :
64 : : /* ----------------
65 : : * RelationGetIndexScan -- Create and fill an IndexScanDesc.
66 : : *
67 : : * This routine creates an index scan structure and sets up initial
68 : : * contents for it.
69 : : *
70 : : * Parameters:
71 : : * indexRelation -- index relation for scan.
72 : : * nkeys -- count of scan keys (index qual conditions).
73 : : * norderbys -- count of index order-by operators.
74 : : *
75 : : * Returns:
76 : : * An initialized IndexScanDesc.
77 : : * ----------------
78 : : */
79 : : IndexScanDesc
5633 tgl@sss.pgh.pa.us 80 :CBC 9208435 : RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
81 : : {
82 : : IndexScanDesc scan;
83 : :
146 michael@paquier.xyz 84 :GNC 9208435 : scan = palloc_object(IndexScanDescData);
85 : :
8751 tgl@sss.pgh.pa.us 86 :CBC 9208435 : scan->heapRelation = NULL; /* may be set later */
2612 andres@anarazel.de 87 : 9208435 : scan->xs_heapfetch = NULL;
8751 tgl@sss.pgh.pa.us 88 : 9208435 : scan->indexRelation = indexRelation;
3240 89 : 9208435 : scan->xs_snapshot = InvalidSnapshot; /* caller must initialize this */
8751 90 : 9208435 : scan->numberOfKeys = nkeys;
5633 91 : 9208435 : scan->numberOfOrderBys = norderbys;
92 : :
93 : : /*
94 : : * We allocate key workspace here, but it won't get filled until amrescan.
95 : : */
8751 96 [ + + ]: 9208435 : if (nkeys > 0)
146 michael@paquier.xyz 97 :GNC 9200670 : scan->keyData = palloc_array(ScanKeyData, nkeys);
98 : : else
8751 tgl@sss.pgh.pa.us 99 :CBC 7765 : scan->keyData = NULL;
5633 100 [ + + ]: 9208435 : if (norderbys > 0)
146 michael@paquier.xyz 101 :GNC 119 : scan->orderByData = palloc_array(ScanKeyData, norderbys);
102 : : else
5633 tgl@sss.pgh.pa.us 103 :CBC 9208316 : scan->orderByData = NULL;
104 : :
5077 bruce@momjian.us 105 : 9208435 : scan->xs_want_itup = false; /* may be set later */
106 : :
107 : : /*
108 : : * During recovery we ignore killed tuples and don't bother to kill them
109 : : * either. We do this because the xmin on the primary node could easily be
110 : : * later than the xmin on the standby node, so that what the primary
111 : : * thinks is killed is supposed to be visible on standby. So for correct
112 : : * MVCC for queries during recovery we must ignore these hints and check
113 : : * all tuples. Do *not* set ignore_killed_tuples to true when running in a
114 : : * transaction that was started during recovery. xactStartedInRecovery
115 : : * should not be altered by index AMs.
116 : : */
8747 tgl@sss.pgh.pa.us 117 : 9208435 : scan->kill_prior_tuple = false;
5981 simon@2ndQuadrant.co 118 : 9208435 : scan->xactStartedInRecovery = TransactionStartedDuringRecovery();
119 : 9208435 : scan->ignore_killed_tuples = !scan->xactStartedInRecovery;
120 : :
10467 bruce@momjian.us 121 : 9208435 : scan->opaque = NULL;
420 pg@bowt.ie 122 : 9208435 : scan->instrument = NULL;
123 : :
5324 tgl@sss.pgh.pa.us 124 : 9208435 : scan->xs_itup = NULL;
5315 125 : 9208435 : scan->xs_itupdesc = NULL;
3354 126 : 9208435 : scan->xs_hitup = NULL;
127 : 9208435 : scan->xs_hitupdesc = NULL;
128 : :
10108 bruce@momjian.us 129 : 9208435 : return scan;
130 : : }
131 : :
132 : : /* ----------------
133 : : * IndexScanEnd -- End an index scan.
134 : : *
135 : : * This routine just releases the storage acquired by
136 : : * RelationGetIndexScan(). Any AM-level resources are
137 : : * assumed to already have been released by the AM's
138 : : * endscan routine.
139 : : *
140 : : * Returns:
141 : : * None.
142 : : * ----------------
143 : : */
144 : : void
9623 tgl@sss.pgh.pa.us 145 : 9207104 : IndexScanEnd(IndexScanDesc scan)
146 : : {
147 [ + + ]: 9207104 : if (scan->keyData != NULL)
148 : 9199362 : pfree(scan->keyData);
5633 149 [ + + ]: 9207104 : if (scan->orderByData != NULL)
150 : 115 : pfree(scan->orderByData);
151 : :
9623 152 : 9207104 : pfree(scan);
153 : 9207104 : }
154 : :
155 : : /*
156 : : * BuildIndexValueDescription
157 : : *
158 : : * Construct a string describing the contents of an index entry, in the
159 : : * form "(key_name, ...)=(key_value, ...)". This is currently used
160 : : * for building unique-constraint, exclusion-constraint error messages, and
161 : : * logical replication conflict error messages so only key columns of the index
162 : : * are checked and printed.
163 : : *
164 : : * Note that if the user does not have permissions to view all of the
165 : : * columns involved then a NULL is returned. Returning a partial key seems
166 : : * unlikely to be useful and we have no way to know which of the columns the
167 : : * user provided (unlike in ExecBuildSlotValueDescription).
168 : : *
169 : : * The passed-in values/nulls arrays are the "raw" input to the index AM,
170 : : * e.g. results of FormIndexDatum --- this is not necessarily what is stored
171 : : * in the index, but it's what the user perceives to be stored.
172 : : *
173 : : * Note: if you change anything here, check whether
174 : : * ExecBuildSlotPartitionKeyDescription() in execMain.c needs a similar
175 : : * change.
176 : : */
177 : : char *
6121 178 : 716 : BuildIndexValueDescription(Relation indexRelation,
179 : : const Datum *values, const bool *isnull)
180 : : {
181 : : StringInfoData buf;
182 : : Form_pg_index idxrec;
183 : : int indnkeyatts;
184 : : int i;
185 : : int keyno;
4131 sfrost@snowman.net 186 : 716 : Oid indexrelid = RelationGetRelid(indexRelation);
187 : : Oid indrelid;
188 : : AclResult aclresult;
189 : :
2950 teodor@sigaev.ru 190 : 716 : indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRelation);
191 : :
192 : : /*
193 : : * Check permissions- if the user does not have access to view all of the
194 : : * key columns then return NULL to avoid leaking data.
195 : : *
196 : : * First check if RLS is enabled for the relation. If so, return NULL to
197 : : * avoid leaking data.
198 : : *
199 : : * Next we need to check table-level SELECT access and then, if there is
200 : : * no access there, check column-level permissions.
201 : : */
2850 alvherre@alvh.no-ip. 202 : 716 : idxrec = indexRelation->rd_index;
4131 sfrost@snowman.net 203 : 716 : indrelid = idxrec->indrelid;
204 [ - + ]: 716 : Assert(indexrelid == idxrec->indexrelid);
205 : :
206 : : /* RLS check- if RLS is enabled then we don't return anything. */
3934 mail@joeconway.com 207 [ + + ]: 716 : if (check_enable_rls(indrelid, InvalidOid, true) == RLS_ENABLED)
4131 sfrost@snowman.net 208 : 8 : return NULL;
209 : :
210 : : /* Table-level SELECT is enough, if the user has it */
211 : 708 : aclresult = pg_class_aclcheck(indrelid, GetUserId(), ACL_SELECT);
212 [ + + ]: 708 : if (aclresult != ACLCHECK_OK)
213 : : {
214 : : /*
215 : : * No table-level access, so step through the columns in the index and
216 : : * make sure the user has SELECT rights on all of them.
217 : : */
2850 alvherre@alvh.no-ip. 218 [ + - ]: 16 : for (keyno = 0; keyno < indnkeyatts; keyno++)
219 : : {
4131 sfrost@snowman.net 220 : 16 : AttrNumber attnum = idxrec->indkey.values[keyno];
221 : :
222 : : /*
223 : : * Note that if attnum == InvalidAttrNumber, then this is an index
224 : : * based on an expression and we return no detail rather than try
225 : : * to figure out what column(s) the expression includes and if the
226 : : * user has SELECT rights on them.
227 : : */
4114 228 [ + - + + ]: 32 : if (attnum == InvalidAttrNumber ||
229 : 16 : pg_attribute_aclcheck(indrelid, attnum, GetUserId(),
230 : : ACL_SELECT) != ACLCHECK_OK)
231 : : {
232 : : /* No access, so clean up and return */
4131 233 : 8 : return NULL;
234 : : }
235 : : }
236 : : }
237 : :
6121 tgl@sss.pgh.pa.us 238 : 700 : initStringInfo(&buf);
239 : 700 : appendStringInfo(&buf, "(%s)=(",
240 : : pg_get_indexdef_columns(indexrelid, true));
241 : :
2950 teodor@sigaev.ru 242 [ + + ]: 1662 : for (i = 0; i < indnkeyatts; i++)
243 : : {
244 : : char *val;
245 : :
6121 tgl@sss.pgh.pa.us 246 [ + + ]: 962 : if (isnull[i])
247 : 12 : val = "null";
248 : : else
249 : : {
250 : : Oid foutoid;
251 : : bool typisvarlena;
252 : :
253 : : /*
254 : : * The provided data is not necessarily of the type stored in the
255 : : * index; rather it is of the index opclass's input type. So look
256 : : * at rd_opcintype not the index tupdesc.
257 : : *
258 : : * Note: this is a bit shaky for opclasses that have pseudotype
259 : : * input types such as ANYARRAY or RECORD. Currently, the
260 : : * typoutput functions associated with the pseudotypes will work
261 : : * okay, but we might have to try harder in future.
262 : : */
5993 263 : 950 : getTypeOutputInfo(indexRelation->rd_opcintype[i],
264 : : &foutoid, &typisvarlena);
6121 265 : 950 : val = OidOutputFunctionCall(foutoid, values[i]);
266 : : }
267 : :
268 [ + + ]: 962 : if (i > 0)
269 : 262 : appendStringInfoString(&buf, ", ");
270 : 962 : appendStringInfoString(&buf, val);
271 : : }
272 : :
273 : 700 : appendStringInfoChar(&buf, ')');
274 : :
275 : 700 : return buf.data;
276 : : }
277 : :
278 : : /*
279 : : * Get the snapshotConflictHorizon from the table entries pointed to by the
280 : : * index tuples being deleted using an AM-generic approach.
281 : : *
282 : : * This is a table_index_delete_tuples() shim used by index AMs that only need
283 : : * to consult the tableam to get a snapshotConflictHorizon value, and only
284 : : * expect to delete index tuples that are already known deletable (typically
285 : : * due to having LP_DEAD bits set). When a snapshotConflictHorizon value
286 : : * isn't needed in index AM's deletion WAL record, it is safe for it to skip
287 : : * calling here entirely.
288 : : *
289 : : * We assume that caller index AM uses the standard IndexTuple representation,
290 : : * with table TIDs stored in the t_tid field. We also expect (and assert)
291 : : * that the line pointers on page for 'itemnos' offsets are already marked
292 : : * LP_DEAD.
293 : : */
294 : : TransactionId
2597 andres@anarazel.de 295 :GBC 4 : index_compute_xid_horizon_for_tuples(Relation irel,
296 : : Relation hrel,
297 : : Buffer ibuf,
298 : : OffsetNumber *itemnos,
299 : : int nitems)
300 : : {
301 : : TM_IndexDeleteOp delstate;
1265 pg@bowt.ie 302 : 4 : TransactionId snapshotConflictHorizon = InvalidTransactionId;
2597 andres@anarazel.de 303 : 4 : Page ipage = BufferGetPage(ibuf);
304 : : IndexTuple itup;
305 : :
1925 pg@bowt.ie 306 [ - + ]: 4 : Assert(nitems > 0);
307 : :
1643 308 : 4 : delstate.irel = irel;
309 : 4 : delstate.iblknum = BufferGetBlockNumber(ibuf);
1938 310 : 4 : delstate.bottomup = false;
311 : 4 : delstate.bottomupfreespace = 0;
312 : 4 : delstate.ndeltids = 0;
146 michael@paquier.xyz 313 :GNC 4 : delstate.deltids = palloc_array(TM_IndexDelete, nitems);
314 : 4 : delstate.status = palloc_array(TM_IndexStatus, nitems);
315 : :
316 : : /* identify what the index tuples about to be deleted point to */
2597 andres@anarazel.de 317 [ + + ]:GBC 888 : for (int i = 0; i < nitems; i++)
318 : : {
1643 pg@bowt.ie 319 : 884 : OffsetNumber offnum = itemnos[i];
320 : : ItemId iitemid;
321 : :
322 : 884 : iitemid = PageGetItemId(ipage, offnum);
2597 andres@anarazel.de 323 : 884 : itup = (IndexTuple) PageGetItem(ipage, iitemid);
324 : :
1938 pg@bowt.ie 325 [ - + ]: 884 : Assert(ItemIdIsDead(iitemid));
326 : :
327 : 884 : ItemPointerCopy(&itup->t_tid, &delstate.deltids[i].tid);
328 : 884 : delstate.deltids[i].id = delstate.ndeltids;
1643 329 : 884 : delstate.status[i].idxoffnum = offnum;
1938 330 : 884 : delstate.status[i].knowndeletable = true; /* LP_DEAD-marked */
331 : 884 : delstate.status[i].promising = false; /* unused */
332 : 884 : delstate.status[i].freespace = 0; /* unused */
333 : :
334 : 884 : delstate.ndeltids++;
335 : : }
336 : :
337 : : /* determine the actual xid horizon */
1265 338 : 4 : snapshotConflictHorizon = table_index_delete_tuples(hrel, &delstate);
339 : :
340 : : /* assert tableam agrees that all items are deletable */
1938 341 [ - + ]: 4 : Assert(delstate.ndeltids == nitems);
342 : :
343 : 4 : pfree(delstate.deltids);
344 : 4 : pfree(delstate.status);
345 : :
1265 346 : 4 : return snapshotConflictHorizon;
347 : : }
348 : :
349 : :
350 : : /* ----------------------------------------------------------------
351 : : * heap-or-index-scan access to system catalogs
352 : : *
353 : : * These functions support system catalog accesses that normally use
354 : : * an index but need to be capable of being switched to heap scans
355 : : * if the system indexes are unavailable.
356 : : *
357 : : * The specified scan keys must be compatible with the named index.
358 : : * Generally this means that they must constrain either all columns
359 : : * of the index, or the first K columns of an N-column index.
360 : : *
361 : : * These routines could work with non-system tables, actually,
362 : : * but they're only useful when there is a known index to use with
363 : : * the given scan keys; so in practice they're only good for
364 : : * predetermined types of scans of system catalogs.
365 : : * ----------------------------------------------------------------
366 : : */
367 : :
368 : : /*
369 : : * systable_beginscan --- set up for heap-or-index scan
370 : : *
371 : : * rel: catalog to scan, already opened and suitably locked
372 : : * indexId: OID of index to conditionally use
373 : : * indexOK: if false, forces a heap scan (see notes below)
374 : : * snapshot: time qual to use (NULL for a recent catalog snapshot)
375 : : * nkeys, key: scan keys
376 : : *
377 : : * The attribute numbers in the scan key should be set for the heap case.
378 : : * If we choose to index, we convert them to 1..n to reference the index
379 : : * columns. Note this means there must be one scankey qualification per
380 : : * index column! This is checked by the Asserts in the normal, index-using
381 : : * case, but won't be checked if the heapscan path is taken.
382 : : *
383 : : * The routine checks the normal cases for whether an indexscan is safe,
384 : : * but caller can make additional checks and pass indexOK=false if needed.
385 : : * In standard case indexOK can simply be constant TRUE.
386 : : */
387 : : SysScanDesc
8751 tgl@sss.pgh.pa.us 388 :CBC 9121997 : systable_beginscan(Relation heapRelation,
389 : : Oid indexId,
390 : : bool indexOK,
391 : : Snapshot snapshot,
392 : : int nkeys, ScanKey key)
393 : : {
394 : : SysScanDesc sysscan;
395 : : Relation irel;
396 : :
397 : : /*
398 : : * If this backend promised that it won't access shared catalogs during
399 : : * logical decoding, this it the right place to verify.
400 : : */
28 alvherre@kurilemu.de 401 [ + + + + :GNC 9121997 : Assert(!HistoricSnapshotActive() ||
- + ]
402 : : accessSharedCatalogsInDecoding ||
403 : : !heapRelation->rd_rel->relisshared);
404 : :
7691 tgl@sss.pgh.pa.us 405 [ + + ]:CBC 9121997 : if (indexOK &&
7425 peter_e@gmx.net 406 [ + + ]: 8975976 : !IgnoreSystemIndexes &&
7691 tgl@sss.pgh.pa.us 407 [ + + ]: 8900663 : !ReindexIsProcessingIndex(indexId))
7218 408 : 8892004 : irel = index_open(indexId, AccessShareLock);
409 : : else
8259 410 : 229993 : irel = NULL;
411 : :
146 michael@paquier.xyz 412 :GNC 9121992 : sysscan = palloc_object(SysScanDescData);
413 : :
8751 tgl@sss.pgh.pa.us 414 :CBC 9121992 : sysscan->heap_rel = heapRelation;
8259 415 : 9121992 : sysscan->irel = irel;
2612 andres@anarazel.de 416 : 9121992 : sysscan->slot = table_slot_create(heapRelation, NULL);
417 : :
4690 rhaas@postgresql.org 418 [ + + ]: 9121992 : if (snapshot == NULL)
419 : : {
4382 bruce@momjian.us 420 : 8441547 : Oid relid = RelationGetRelid(heapRelation);
421 : :
4690 rhaas@postgresql.org 422 : 8441547 : snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
423 : 8441547 : sysscan->snapshot = snapshot;
424 : : }
425 : : else
426 : : {
427 : : /* Caller is responsible for any snapshot. */
428 : 680445 : sysscan->snapshot = NULL;
429 : : }
430 : :
431 : : /*
432 : : * If CheckXidAlive is set then set a flag to indicate that system table
433 : : * scan is in-progress. See detailed comments in xact.c where these
434 : : * variables are declared.
435 : : */
96 andres@anarazel.de 436 [ + + ]:GNC 9121992 : if (TransactionIdIsValid(CheckXidAlive))
437 : 1052 : bsysscan = true;
438 : :
8259 tgl@sss.pgh.pa.us 439 [ + + ]:CBC 9121992 : if (irel)
440 : : {
441 : : int i;
442 : : ScanKey idxkey;
443 : :
635 peter@eisentraut.org 444 : 8891999 : idxkey = palloc_array(ScanKeyData, nkeys);
445 : :
446 : : /* Convert attribute numbers to be index column numbers. */
8841 tgl@sss.pgh.pa.us 447 [ + + ]: 23704429 : for (i = 0; i < nkeys; i++)
448 : : {
449 : : int j;
450 : :
635 peter@eisentraut.org 451 : 14812430 : memcpy(&idxkey[i], &key[i], sizeof(ScanKeyData));
452 : :
2950 teodor@sigaev.ru 453 [ + - ]: 21849406 : for (j = 0; j < IndexRelationGetNumberOfAttributes(irel); j++)
454 : : {
6389 heikki.linnakangas@i 455 [ + + ]: 21849406 : if (key[i].sk_attno == irel->rd_index->indkey.values[j])
456 : : {
635 peter@eisentraut.org 457 : 14812430 : idxkey[i].sk_attno = j + 1;
6389 heikki.linnakangas@i 458 : 14812430 : break;
459 : : }
460 : : }
2950 teodor@sigaev.ru 461 [ - + ]: 14812430 : if (j == IndexRelationGetNumberOfAttributes(irel))
6389 heikki.linnakangas@i 462 [ # # ]:UBC 0 : elog(ERROR, "column is not in index");
463 : : }
464 : :
7218 tgl@sss.pgh.pa.us 465 :CBC 8891999 : sysscan->iscan = index_beginscan(heapRelation, irel,
466 : : snapshot, NULL, nkeys, 0,
467 : : SO_NONE);
635 peter@eisentraut.org 468 : 8891999 : index_rescan(sysscan->iscan, idxkey, nkeys, NULL, 0);
8841 tgl@sss.pgh.pa.us 469 : 8891999 : sysscan->scan = NULL;
470 : :
518 peter@eisentraut.org 471 : 8891999 : pfree(idxkey);
472 : : }
473 : : else
474 : : {
475 : : /*
476 : : * We disallow synchronized scans when forced to use a heapscan on a
477 : : * catalog. In most cases the desired rows are near the front, so
478 : : * that the unpredictable start point of a syncscan is a serious
479 : : * disadvantage; and there are no compensating advantages, because
480 : : * it's unlikely that such scans will occur in parallel.
481 : : */
2612 andres@anarazel.de 482 : 229993 : sysscan->scan = table_beginscan_strat(heapRelation, snapshot,
483 : : nkeys, key,
484 : : true, false);
8841 tgl@sss.pgh.pa.us 485 : 229993 : sysscan->iscan = NULL;
486 : : }
487 : :
488 : 9121992 : return sysscan;
489 : : }
490 : :
491 : : /*
492 : : * HandleConcurrentAbort - Handle concurrent abort of the CheckXidAlive.
493 : : *
494 : : * Error out, if CheckXidAlive is aborted. We can't directly use
495 : : * TransactionIdDidAbort as after crash such transaction might not have been
496 : : * marked as aborted. See detailed comments in xact.c where the variable
497 : : * is declared.
498 : : */
499 : : static inline void
153 nathan@postgresql.or 500 :GNC 18814879 : HandleConcurrentAbort(void)
501 : : {
2096 akapila@postgresql.o 502 [ + + ]:CBC 18814879 : if (TransactionIdIsValid(CheckXidAlive) &&
503 [ + + ]: 1720 : !TransactionIdIsInProgress(CheckXidAlive) &&
504 [ + - ]: 8 : !TransactionIdDidCommit(CheckXidAlive))
505 [ + - ]: 8 : ereport(ERROR,
506 : : (errcode(ERRCODE_TRANSACTION_ROLLBACK),
507 : : errmsg("transaction aborted during system catalog scan")));
508 : 18814871 : }
509 : :
510 : : /*
511 : : * systable_getnext --- get next tuple in a heap-or-index scan
512 : : *
513 : : * Returns NULL if no more tuples available.
514 : : *
515 : : * Note that returned tuple is a reference to data in a disk buffer;
516 : : * it must not be modified, and should be presumed inaccessible after
517 : : * next getnext() or endscan() call.
518 : : *
519 : : * XXX: It'd probably make sense to offer a slot based interface, at least
520 : : * optionally.
521 : : */
522 : : HeapTuple
8841 tgl@sss.pgh.pa.us 523 : 18509179 : systable_getnext(SysScanDesc sysscan)
524 : : {
2612 andres@anarazel.de 525 : 18509179 : HeapTuple htup = NULL;
526 : :
8841 tgl@sss.pgh.pa.us 527 [ + + ]: 18509179 : if (sysscan->irel)
528 : : {
2612 andres@anarazel.de 529 [ + + ]: 16841121 : if (index_getnext_slot(sysscan->iscan, ForwardScanDirection, sysscan->slot))
530 : : {
531 : : bool shouldFree;
532 : :
533 : 12827542 : htup = ExecFetchSlotHeapTuple(sysscan->slot, false, &shouldFree);
534 [ - + ]: 12827542 : Assert(!shouldFree);
535 : :
536 : : /*
537 : : * We currently don't need to support lossy index operators for
538 : : * any system catalog scan. It could be done here, using the scan
539 : : * keys to drive the operator calls, if we arranged to save the
540 : : * heap attnums during systable_beginscan(); this is practical
541 : : * because we still wouldn't need to support indexes on
542 : : * expressions.
543 : : */
544 [ - + ]: 12827542 : if (sysscan->iscan->xs_recheck)
2612 andres@anarazel.de 545 [ # # ]:UBC 0 : elog(ERROR, "system catalog scans with lossy index conditions are not implemented");
546 : : }
547 : : }
548 : : else
549 : : {
2612 andres@anarazel.de 550 [ + + ]:CBC 1668058 : if (table_scan_getnextslot(sysscan->scan, ForwardScanDirection, sysscan->slot))
551 : : {
552 : : bool shouldFree;
553 : :
554 : 1613249 : htup = ExecFetchSlotHeapTuple(sysscan->slot, false, &shouldFree);
555 [ - + ]: 1613249 : Assert(!shouldFree);
556 : : }
557 : : }
558 : :
559 : : /*
560 : : * Handle the concurrent abort while fetching the catalog tuple during
561 : : * logical streaming of a transaction.
562 : : */
2096 akapila@postgresql.o 563 : 18509174 : HandleConcurrentAbort();
564 : :
8841 tgl@sss.pgh.pa.us 565 : 18509166 : return htup;
566 : : }
567 : :
568 : : /*
569 : : * systable_recheck_tuple --- recheck visibility of most-recently-fetched tuple
570 : : *
571 : : * In particular, determine if this tuple would be visible to a catalog scan
572 : : * that started now. We don't handle the case of a non-MVCC scan snapshot,
573 : : * because no caller needs that yet.
574 : : *
575 : : * This is useful to test whether an object was deleted while we waited to
576 : : * acquire lock on it.
577 : : *
578 : : * Note: we don't actually *need* the tuple to be passed in, but it's a
579 : : * good crosscheck that the caller is interested in the right tuple.
580 : : */
581 : : bool
6540 582 : 163182 : systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup)
583 : : {
584 : : Snapshot freshsnap;
585 : : bool result;
586 : :
2612 andres@anarazel.de 587 [ - + ]: 163182 : Assert(tup == ExecFetchSlotHeapTuple(sysscan->slot, false, NULL));
588 : :
4676 noah@leadboat.com 589 : 163182 : freshsnap = GetCatalogSnapshot(RelationGetRelid(sysscan->heap_rel));
420 heikki.linnakangas@i 590 : 163182 : freshsnap = RegisterSnapshot(freshsnap);
591 : :
2612 andres@anarazel.de 592 : 163182 : result = table_tuple_satisfies_snapshot(sysscan->heap_rel,
593 : 163182 : sysscan->slot,
594 : : freshsnap);
420 heikki.linnakangas@i 595 : 163182 : UnregisterSnapshot(freshsnap);
596 : :
597 : : /*
598 : : * Handle the concurrent abort while fetching the catalog tuple during
599 : : * logical streaming of a transaction.
600 : : */
2096 akapila@postgresql.o 601 : 163182 : HandleConcurrentAbort();
602 : :
6540 tgl@sss.pgh.pa.us 603 : 163182 : return result;
604 : : }
605 : :
606 : : /*
607 : : * systable_endscan --- close scan, release resources
608 : : *
609 : : * Note that it's still up to the caller to close the heap relation.
610 : : */
611 : : void
8841 612 : 9121442 : systable_endscan(SysScanDesc sysscan)
613 : : {
2612 andres@anarazel.de 614 [ + - ]: 9121442 : if (sysscan->slot)
615 : : {
616 : 9121442 : ExecDropSingleTupleTableSlot(sysscan->slot);
617 : 9121442 : sysscan->slot = NULL;
618 : : }
619 : :
8841 tgl@sss.pgh.pa.us 620 [ + + ]: 9121442 : if (sysscan->irel)
621 : : {
622 : 8891461 : index_endscan(sysscan->iscan);
7218 623 : 8891461 : index_close(sysscan->irel, AccessShareLock);
624 : : }
625 : : else
2612 andres@anarazel.de 626 : 229981 : table_endscan(sysscan->scan);
627 : :
4690 rhaas@postgresql.org 628 [ + + ]: 9121442 : if (sysscan->snapshot)
629 : 8440997 : UnregisterSnapshot(sysscan->snapshot);
630 : :
631 : : /*
632 : : * Reset the bsysscan flag at the end of the systable scan. See detailed
633 : : * comments in xact.c where these variables are declared.
634 : : */
2096 akapila@postgresql.o 635 [ + + ]: 9121442 : if (TransactionIdIsValid(CheckXidAlive))
636 : 1044 : bsysscan = false;
637 : :
8841 tgl@sss.pgh.pa.us 638 : 9121442 : pfree(sysscan);
639 : 9121442 : }
640 : :
641 : :
642 : : /*
643 : : * systable_beginscan_ordered --- set up for ordered catalog scan
644 : : *
645 : : * These routines have essentially the same API as systable_beginscan etc,
646 : : * except that they guarantee to return multiple matching tuples in
647 : : * index order. Also, for largely historical reasons, the index to use
648 : : * is opened and locked by the caller, not here.
649 : : *
650 : : * Currently we do not support non-index-based scans here. (In principle
651 : : * we could do a heapscan and sort, but the uses are in places that
652 : : * probably don't need to still work with corrupted catalog indexes.)
653 : : * For the moment, therefore, these functions are merely the thinest of
654 : : * wrappers around index_beginscan/index_getnext_slot. The main reason for
655 : : * their existence is to centralize possible future support of lossy operators
656 : : * in catalog scans.
657 : : */
658 : : SysScanDesc
6597 659 : 38248 : systable_beginscan_ordered(Relation heapRelation,
660 : : Relation indexRelation,
661 : : Snapshot snapshot,
662 : : int nkeys, ScanKey key)
663 : : {
664 : : SysScanDesc sysscan;
665 : : int i;
666 : : ScanKey idxkey;
667 : :
668 : : /* REINDEX can probably be a hard error here ... */
669 [ - + ]: 38248 : if (ReindexIsProcessingIndex(RelationGetRelid(indexRelation)))
800 tgl@sss.pgh.pa.us 670 [ # # ]:UBC 0 : ereport(ERROR,
671 : : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
672 : : errmsg("cannot access index \"%s\" while it is being reindexed",
673 : : RelationGetRelationName(indexRelation))));
674 : : /* ... but we only throw a warning about violating IgnoreSystemIndexes */
6597 tgl@sss.pgh.pa.us 675 [ - + ]:CBC 38248 : if (IgnoreSystemIndexes)
6597 tgl@sss.pgh.pa.us 676 [ # # ]:UBC 0 : elog(WARNING, "using index \"%s\" despite IgnoreSystemIndexes",
677 : : RelationGetRelationName(indexRelation));
678 : :
146 michael@paquier.xyz 679 :GNC 38248 : sysscan = palloc_object(SysScanDescData);
680 : :
6597 tgl@sss.pgh.pa.us 681 :CBC 38248 : sysscan->heap_rel = heapRelation;
682 : 38248 : sysscan->irel = indexRelation;
2612 andres@anarazel.de 683 : 38248 : sysscan->slot = table_slot_create(heapRelation, NULL);
684 : :
4690 rhaas@postgresql.org 685 [ + + ]: 38248 : if (snapshot == NULL)
686 : : {
4382 bruce@momjian.us 687 : 5264 : Oid relid = RelationGetRelid(heapRelation);
688 : :
4690 rhaas@postgresql.org 689 : 5264 : snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
690 : 5264 : sysscan->snapshot = snapshot;
691 : : }
692 : : else
693 : : {
694 : : /* Caller is responsible for any snapshot. */
695 : 32984 : sysscan->snapshot = NULL;
696 : : }
697 : :
635 peter@eisentraut.org 698 : 38248 : idxkey = palloc_array(ScanKeyData, nkeys);
699 : :
700 : : /* Convert attribute numbers to be index column numbers. */
6597 tgl@sss.pgh.pa.us 701 [ + + ]: 74549 : for (i = 0; i < nkeys; i++)
702 : : {
703 : : int j;
704 : :
635 peter@eisentraut.org 705 : 36301 : memcpy(&idxkey[i], &key[i], sizeof(ScanKeyData));
706 : :
2950 teodor@sigaev.ru 707 [ + - ]: 38348 : for (j = 0; j < IndexRelationGetNumberOfAttributes(indexRelation); j++)
708 : : {
6389 heikki.linnakangas@i 709 [ + + ]: 38348 : if (key[i].sk_attno == indexRelation->rd_index->indkey.values[j])
710 : : {
635 peter@eisentraut.org 711 : 36301 : idxkey[i].sk_attno = j + 1;
6389 heikki.linnakangas@i 712 : 36301 : break;
713 : : }
714 : : }
2950 teodor@sigaev.ru 715 [ - + ]: 36301 : if (j == IndexRelationGetNumberOfAttributes(indexRelation))
6389 heikki.linnakangas@i 716 [ # # ]:UBC 0 : elog(ERROR, "column is not in index");
717 : : }
718 : :
719 : : /*
720 : : * If CheckXidAlive is set then set a flag to indicate that system table
721 : : * scan is in-progress. See detailed comments in xact.c where these
722 : : * variables are declared.
723 : : */
575 akapila@postgresql.o 724 [ - + ]:CBC 38248 : if (TransactionIdIsValid(CheckXidAlive))
575 akapila@postgresql.o 725 :LBC (1) : bsysscan = true;
726 : :
96 andres@anarazel.de 727 :GNC 38248 : sysscan->iscan = index_beginscan(heapRelation, indexRelation,
728 : : snapshot, NULL, nkeys, 0,
729 : : SO_NONE);
730 : 38248 : index_rescan(sysscan->iscan, idxkey, nkeys, NULL, 0);
731 : 38248 : sysscan->scan = NULL;
732 : :
733 : 38248 : pfree(idxkey);
734 : :
6597 tgl@sss.pgh.pa.us 735 :CBC 38248 : return sysscan;
736 : : }
737 : :
738 : : /*
739 : : * systable_getnext_ordered --- get next tuple in an ordered catalog scan
740 : : */
741 : : HeapTuple
742 : 142526 : systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction)
743 : : {
2612 andres@anarazel.de 744 : 142526 : HeapTuple htup = NULL;
745 : :
6597 tgl@sss.pgh.pa.us 746 [ - + ]: 142526 : Assert(sysscan->irel);
2612 andres@anarazel.de 747 [ + + ]: 142526 : if (index_getnext_slot(sysscan->iscan, direction, sysscan->slot))
748 : 105130 : htup = ExecFetchSlotHeapTuple(sysscan->slot, false, NULL);
749 : :
750 : : /* See notes in systable_getnext */
6596 tgl@sss.pgh.pa.us 751 [ + + - + ]: 142523 : if (htup && sysscan->iscan->xs_recheck)
6596 tgl@sss.pgh.pa.us 752 [ # # ]:UBC 0 : elog(ERROR, "system catalog scans with lossy index conditions are not implemented");
753 : :
754 : : /*
755 : : * Handle the concurrent abort while fetching the catalog tuple during
756 : : * logical streaming of a transaction.
757 : : */
2096 akapila@postgresql.o 758 :CBC 142523 : HandleConcurrentAbort();
759 : :
6597 tgl@sss.pgh.pa.us 760 : 142523 : return htup;
761 : : }
762 : :
763 : : /*
764 : : * systable_endscan_ordered --- close scan, release resources
765 : : */
766 : : void
767 : 38237 : systable_endscan_ordered(SysScanDesc sysscan)
768 : : {
2612 andres@anarazel.de 769 [ + - ]: 38237 : if (sysscan->slot)
770 : : {
771 : 38237 : ExecDropSingleTupleTableSlot(sysscan->slot);
772 : 38237 : sysscan->slot = NULL;
773 : : }
774 : :
6597 tgl@sss.pgh.pa.us 775 [ - + ]: 38237 : Assert(sysscan->irel);
776 : 38237 : index_endscan(sysscan->iscan);
4690 rhaas@postgresql.org 777 [ + + ]: 38237 : if (sysscan->snapshot)
778 : 5256 : UnregisterSnapshot(sysscan->snapshot);
779 : :
780 : : /*
781 : : * Reset the bsysscan flag at the end of the systable scan. See detailed
782 : : * comments in xact.c where these variables are declared.
783 : : */
575 akapila@postgresql.o 784 [ - + ]: 38237 : if (TransactionIdIsValid(CheckXidAlive))
575 akapila@postgresql.o 785 :LBC (1) : bsysscan = false;
786 : :
6597 tgl@sss.pgh.pa.us 787 :CBC 38237 : pfree(sysscan);
788 : 38237 : }
789 : :
790 : : /*
791 : : * systable_inplace_update_begin --- update a row "in place" (overwrite it)
792 : : *
793 : : * Overwriting violates both MVCC and transactional safety, so the uses of
794 : : * this function in Postgres are extremely limited. This makes no effort to
795 : : * support updating cache key columns or other indexed columns. Nonetheless
796 : : * we find some places to use it. See README.tuplock section "Locking to
797 : : * write inplace-updated tables" and later sections for expectations of
798 : : * readers and writers of a table that gets inplace updates. Standard flow:
799 : : *
800 : : * ... [any slow preparation not requiring oldtup] ...
801 : : * systable_inplace_update_begin([...], &tup, &inplace_state);
802 : : * if (!HeapTupleIsValid(tup))
803 : : * elog(ERROR, [...]);
804 : : * ... [buffer is exclusive-locked; mutate "tup"] ...
805 : : * if (dirty)
806 : : * systable_inplace_update_finish(inplace_state, tup);
807 : : * else
808 : : * systable_inplace_update_cancel(inplace_state);
809 : : *
810 : : * The first several params duplicate the systable_beginscan() param list.
811 : : * "oldtupcopy" is an output parameter, assigned NULL if the key ceases to
812 : : * find a live tuple. (In PROC_IN_VACUUM, that is a low-probability transient
813 : : * condition.) If "oldtupcopy" gets non-NULL, you must pass output parameter
814 : : * "state" to systable_inplace_update_finish() or
815 : : * systable_inplace_update_cancel().
816 : : */
817 : : void
588 noah@leadboat.com 818 : 116992 : systable_inplace_update_begin(Relation relation,
819 : : Oid indexId,
820 : : bool indexOK,
821 : : Snapshot snapshot,
822 : : int nkeys, const ScanKeyData *key,
823 : : HeapTuple *oldtupcopy,
824 : : void **state)
825 : : {
826 : 116992 : int retries = 0;
827 : : SysScanDesc scan;
828 : : HeapTuple oldtup;
829 : : BufferHeapTupleTableSlot *bslot;
830 : :
831 : : /*
832 : : * For now, we don't allow parallel updates. Unlike a regular update,
833 : : * this should never create a combo CID, so it might be possible to relax
834 : : * this restriction, but not without more thought and testing. It's not
835 : : * clear that it would be useful, anyway.
836 : : */
837 [ - + ]: 116992 : if (IsInParallelMode())
588 noah@leadboat.com 838 [ # # ]:UBC 0 : ereport(ERROR,
839 : : (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
840 : : errmsg("cannot update tuples during a parallel operation")));
841 : :
842 : : /*
843 : : * Accept a snapshot argument, for symmetry, but this function advances
844 : : * its snapshot as needed to reach the tail of the updated tuple chain.
845 : : */
588 noah@leadboat.com 846 [ - + ]:CBC 116992 : Assert(snapshot == NULL);
847 : :
848 [ - + - - ]: 116992 : Assert(IsInplaceUpdateRelation(relation) || !IsSystemRelation(relation));
849 : :
850 : : /* Loop for an exclusive-locked buffer of a non-updated tuple. */
851 : : do
852 : : {
853 : : TupleTableSlot *slot;
854 : :
855 [ + + ]: 117041 : CHECK_FOR_INTERRUPTS();
856 : :
857 : : /*
858 : : * Processes issuing heap_update (e.g. GRANT) at maximum speed could
859 : : * drive us to this error. A hostile table owner has stronger ways to
860 : : * damage their own table, so that's minor.
861 : : */
862 [ - + ]: 117041 : if (retries++ > 10000)
588 noah@leadboat.com 863 [ # # ]:UBC 0 : elog(ERROR, "giving up after too many tries to overwrite row");
864 : :
360 michael@paquier.xyz 865 :CBC 117041 : INJECTION_POINT("inplace-before-pin", NULL);
588 noah@leadboat.com 866 : 117041 : scan = systable_beginscan(relation, indexId, indexOK, snapshot,
588 noah@leadboat.com 867 :ECB (91419) : nkeys, unconstify(ScanKeyData *, key));
588 noah@leadboat.com 868 :CBC 117041 : oldtup = systable_getnext(scan);
869 [ - + ]: 117041 : if (!HeapTupleIsValid(oldtup))
870 : : {
588 noah@leadboat.com 871 :UBC 0 : systable_endscan(scan);
872 : 0 : *oldtupcopy = NULL;
873 : 0 : return;
874 : : }
875 : :
588 noah@leadboat.com 876 :CBC 117041 : slot = scan->slot;
877 [ - + ]: 117041 : Assert(TTS_IS_BUFFERTUPLE(slot));
878 : 117041 : bslot = (BufferHeapTupleTableSlot *) slot;
553 879 [ + + ]: 117041 : } while (!heap_inplace_lock(scan->heap_rel,
880 : : bslot->base.tuple, bslot->buffer,
881 : : (void (*) (void *)) systable_endscan, scan));
882 : :
588 883 : 116992 : *oldtupcopy = heap_copytuple(oldtup);
884 : 116992 : *state = scan;
885 : : }
886 : :
887 : : /*
888 : : * systable_inplace_update_finish --- second phase of inplace update
889 : : *
890 : : * The tuple cannot change size, and therefore its header fields and null
891 : : * bitmap (if any) don't change either.
892 : : */
893 : : void
894 : 81297 : systable_inplace_update_finish(void *state, HeapTuple tuple)
895 : : {
896 : 81297 : SysScanDesc scan = (SysScanDesc) state;
897 : 81297 : Relation relation = scan->heap_rel;
898 : 81297 : TupleTableSlot *slot = scan->slot;
899 : 81297 : BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
900 : 81297 : HeapTuple oldtup = bslot->base.tuple;
901 : 81297 : Buffer buffer = bslot->buffer;
902 : :
903 : 81297 : heap_inplace_update_and_unlock(relation, oldtup, tuple, buffer);
904 : 81297 : systable_endscan(scan);
905 : 81297 : }
906 : :
907 : : /*
908 : : * systable_inplace_update_cancel --- abandon inplace update
909 : : *
910 : : * This is an alternative to making a no-op update.
911 : : */
912 : : void
913 : 35695 : systable_inplace_update_cancel(void *state)
914 : : {
915 : 35695 : SysScanDesc scan = (SysScanDesc) state;
916 : 35695 : Relation relation = scan->heap_rel;
917 : 35695 : TupleTableSlot *slot = scan->slot;
918 : 35695 : BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
919 : 35695 : HeapTuple oldtup = bslot->base.tuple;
920 : 35695 : Buffer buffer = bslot->buffer;
921 : :
922 : 35695 : heap_inplace_unlock(relation, oldtup, buffer);
923 : 35695 : systable_endscan(scan);
924 : 35695 : }
|