Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * execPartition.c
4 : : * Support routines for partitioning.
5 : : *
6 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : * IDENTIFICATION
10 : : * src/backend/executor/execPartition.c
11 : : *
12 : : *-------------------------------------------------------------------------
13 : : */
14 : : #include "postgres.h"
15 : :
16 : : #include "access/table.h"
17 : : #include "access/tableam.h"
18 : : #include "catalog/index.h"
19 : : #include "catalog/partition.h"
20 : : #include "executor/execPartition.h"
21 : : #include "executor/executor.h"
22 : : #include "executor/nodeModifyTable.h"
23 : : #include "foreign/fdwapi.h"
24 : : #include "mb/pg_wchar.h"
25 : : #include "miscadmin.h"
26 : : #include "partitioning/partbounds.h"
27 : : #include "partitioning/partdesc.h"
28 : : #include "partitioning/partprune.h"
29 : : #include "rewrite/rewriteManip.h"
30 : : #include "utils/acl.h"
31 : : #include "utils/injection_point.h"
32 : : #include "utils/lsyscache.h"
33 : : #include "utils/partcache.h"
34 : : #include "utils/rls.h"
35 : : #include "utils/ruleutils.h"
36 : :
37 : :
38 : : /*-----------------------
39 : : * PartitionTupleRouting - Encapsulates all information required to
40 : : * route a tuple inserted into a partitioned table to one of its leaf
41 : : * partitions.
42 : : *
43 : : * partition_root
44 : : * The partitioned table that's the target of the command.
45 : : *
46 : : * partition_dispatch_info
47 : : * Array of 'max_dispatch' elements containing a pointer to a
48 : : * PartitionDispatch object for every partitioned table touched by tuple
49 : : * routing. The entry for the target partitioned table is *always*
50 : : * present in the 0th element of this array. See comment for
51 : : * PartitionDispatchData->indexes for details on how this array is
52 : : * indexed.
53 : : *
54 : : * nonleaf_partitions
55 : : * Array of 'max_dispatch' elements containing pointers to fake
56 : : * ResultRelInfo objects for nonleaf partitions, useful for checking
57 : : * the partition constraint.
58 : : *
59 : : * num_dispatch
60 : : * The current number of items stored in the 'partition_dispatch_info'
61 : : * array. Also serves as the index of the next free array element for
62 : : * new PartitionDispatch objects that need to be stored.
63 : : *
64 : : * max_dispatch
65 : : * The current allocated size of the 'partition_dispatch_info' array.
66 : : *
67 : : * partitions
68 : : * Array of 'max_partitions' elements containing a pointer to a
69 : : * ResultRelInfo for every leaf partition touched by tuple routing.
70 : : * Some of these are pointers to ResultRelInfos which are borrowed out of
71 : : * the owning ModifyTableState node. The remainder have been built
72 : : * especially for tuple routing. See comment for
73 : : * PartitionDispatchData->indexes for details on how this array is
74 : : * indexed.
75 : : *
76 : : * is_borrowed_rel
77 : : * Array of 'max_partitions' booleans recording whether a given entry
78 : : * in 'partitions' is a ResultRelInfo pointer borrowed from the owning
79 : : * ModifyTableState node, rather than being built here.
80 : : *
81 : : * num_partitions
82 : : * The current number of items stored in the 'partitions' array. Also
83 : : * serves as the index of the next free array element for new
84 : : * ResultRelInfo objects that need to be stored.
85 : : *
86 : : * max_partitions
87 : : * The current allocated size of the 'partitions' array.
88 : : *
89 : : * memcxt
90 : : * Memory context used to allocate subsidiary structs.
91 : : *-----------------------
92 : : */
93 : : struct PartitionTupleRouting
94 : : {
95 : : Relation partition_root;
96 : : PartitionDispatch *partition_dispatch_info;
97 : : ResultRelInfo **nonleaf_partitions;
98 : : int num_dispatch;
99 : : int max_dispatch;
100 : : ResultRelInfo **partitions;
101 : : bool *is_borrowed_rel;
102 : : int num_partitions;
103 : : int max_partitions;
104 : : MemoryContext memcxt;
105 : : };
106 : :
107 : : /*-----------------------
108 : : * PartitionDispatch - information about one partitioned table in a partition
109 : : * hierarchy required to route a tuple to any of its partitions. A
110 : : * PartitionDispatch is always encapsulated inside a PartitionTupleRouting
111 : : * struct and stored inside its 'partition_dispatch_info' array.
112 : : *
113 : : * reldesc
114 : : * Relation descriptor of the table
115 : : *
116 : : * key
117 : : * Partition key information of the table
118 : : *
119 : : * keystate
120 : : * Execution state required for expressions in the partition key
121 : : *
122 : : * partdesc
123 : : * Partition descriptor of the table
124 : : *
125 : : * tupslot
126 : : * A standalone TupleTableSlot initialized with this table's tuple
127 : : * descriptor, or NULL if no tuple conversion between the parent is
128 : : * required.
129 : : *
130 : : * tupmap
131 : : * TupleConversionMap to convert from the parent's rowtype to this table's
132 : : * rowtype (when extracting the partition key of a tuple just before
133 : : * routing it through this table). A NULL value is stored if no tuple
134 : : * conversion is required.
135 : : *
136 : : * indexes
137 : : * Array of partdesc->nparts elements. For leaf partitions the index
138 : : * corresponds to the partition's ResultRelInfo in the encapsulating
139 : : * PartitionTupleRouting's partitions array. For partitioned partitions,
140 : : * the index corresponds to the PartitionDispatch for it in its
141 : : * partition_dispatch_info array. -1 indicates we've not yet allocated
142 : : * anything in PartitionTupleRouting for the partition.
143 : : *-----------------------
144 : : */
145 : : typedef struct PartitionDispatchData
146 : : {
147 : : Relation reldesc;
148 : : PartitionKey key;
149 : : List *keystate; /* list of ExprState */
150 : : PartitionDesc partdesc;
151 : : TupleTableSlot *tupslot;
152 : : AttrMap *tupmap;
153 : : int indexes[FLEXIBLE_ARRAY_MEMBER];
154 : : } PartitionDispatchData;
155 : :
156 : :
157 : : static ResultRelInfo *ExecInitPartitionInfo(ModifyTableState *mtstate,
158 : : EState *estate, PartitionTupleRouting *proute,
159 : : PartitionDispatch dispatch,
160 : : ResultRelInfo *rootResultRelInfo,
161 : : int partidx);
162 : : static void ExecInitRoutingInfo(ModifyTableState *mtstate,
163 : : EState *estate,
164 : : PartitionTupleRouting *proute,
165 : : PartitionDispatch dispatch,
166 : : ResultRelInfo *partRelInfo,
167 : : int partidx,
168 : : bool is_borrowed_rel);
169 : : static PartitionDispatch ExecInitPartitionDispatchInfo(EState *estate,
170 : : PartitionTupleRouting *proute,
171 : : Oid partoid, PartitionDispatch parent_pd,
172 : : int partidx, ResultRelInfo *rootResultRelInfo);
173 : : static void FormPartitionKeyDatum(PartitionDispatch pd,
174 : : TupleTableSlot *slot,
175 : : EState *estate,
176 : : Datum *values,
177 : : bool *isnull);
178 : : static int get_partition_for_tuple(PartitionDispatch pd, const Datum *values,
179 : : const bool *isnull);
180 : : static char *ExecBuildSlotPartitionKeyDescription(Relation rel,
181 : : const Datum *values,
182 : : const bool *isnull,
183 : : int maxfieldlen);
184 : : static List *adjust_partition_colnos(List *colnos, ResultRelInfo *leaf_part_rri);
185 : : static List *adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap);
186 : : static PartitionPruneState *CreatePartitionPruneState(EState *estate,
187 : : PartitionPruneInfo *pruneinfo,
188 : : Bitmapset **all_leafpart_rtis);
189 : : static void InitPartitionPruneContext(PartitionPruneContext *context,
190 : : List *pruning_steps,
191 : : PartitionDesc partdesc,
192 : : PartitionKey partkey,
193 : : PlanState *planstate,
194 : : ExprContext *econtext);
195 : : static void InitExecPartitionPruneContexts(PartitionPruneState *prunestate,
196 : : PlanState *parent_plan,
197 : : Bitmapset *initially_valid_subplans,
198 : : int n_total_subplans);
199 : : static void find_matching_subplans_recurse(PartitionPruningData *prunedata,
200 : : PartitionedRelPruningData *pprune,
201 : : bool initial_prune,
202 : : Bitmapset **validsubplans,
203 : : Bitmapset **validsubplan_rtis);
204 : :
205 : :
206 : : /*
207 : : * ExecSetupPartitionTupleRouting - sets up information needed during
208 : : * tuple routing for partitioned tables, encapsulates it in
209 : : * PartitionTupleRouting, and returns it.
210 : : *
211 : : * Callers must use the returned PartitionTupleRouting during calls to
212 : : * ExecFindPartition(). The actual ResultRelInfo for a partition is only
213 : : * allocated when the partition is found for the first time.
214 : : *
215 : : * The current memory context is used to allocate this struct and all
216 : : * subsidiary structs that will be allocated from it later on. Typically
217 : : * it should be estate->es_query_cxt.
218 : : */
219 : : PartitionTupleRouting *
1804 tgl@sss.pgh.pa.us 220 :CBC 2897 : ExecSetupPartitionTupleRouting(EState *estate, Relation rel)
221 : : {
222 : : PartitionTupleRouting *proute;
223 : :
224 : : /*
225 : : * Here we attempt to expend as little effort as possible in setting up
226 : : * the PartitionTupleRouting. Each partition's ResultRelInfo is built on
227 : : * demand, only when we actually need to route a tuple to that partition.
228 : : * The reason for this is that a common case is for INSERT to insert a
229 : : * single tuple into a partitioned table and this must be fast.
230 : : */
94 michael@paquier.xyz 231 :GNC 2897 : proute = palloc0_object(PartitionTupleRouting);
2676 alvherre@alvh.no-ip. 232 :CBC 2897 : proute->partition_root = rel;
233 : 2897 : proute->memcxt = CurrentMemoryContext;
234 : : /* Rest of members initialized by zeroing */
235 : :
236 : : /*
237 : : * Initialize this table's PartitionDispatch object. Here we pass in the
238 : : * parent as NULL as we don't need to care about any parent of the target
239 : : * partitioned table.
240 : : */
2565 rhaas@postgresql.org 241 : 2897 : ExecInitPartitionDispatchInfo(estate, proute, RelationGetRelid(rel),
242 : : NULL, 0, NULL);
243 : :
2992 244 : 2897 : return proute;
245 : : }
246 : :
247 : : /*
248 : : * ExecFindPartition -- Return the ResultRelInfo for the leaf partition that
249 : : * the tuple contained in *slot should belong to.
250 : : *
251 : : * If the partition's ResultRelInfo does not yet exist in 'proute' then we set
252 : : * one up or reuse one from mtstate's resultRelInfo array. When reusing a
253 : : * ResultRelInfo from the mtstate we verify that the relation is a valid
254 : : * target for INSERTs and initialize tuple routing information.
255 : : *
256 : : * rootResultRelInfo is the relation named in the query.
257 : : *
258 : : * estate must be non-NULL; we'll need it to compute any expressions in the
259 : : * partition keys. Also, its per-tuple contexts are used as evaluation
260 : : * scratch space.
261 : : *
262 : : * If no leaf partition is found, this routine errors out with the appropriate
263 : : * error message. An error may also be raised if the found target partition
264 : : * is not a valid target for an INSERT.
265 : : */
266 : : ResultRelInfo *
2676 alvherre@alvh.no-ip. 267 : 525263 : ExecFindPartition(ModifyTableState *mtstate,
268 : : ResultRelInfo *rootResultRelInfo,
269 : : PartitionTupleRouting *proute,
270 : : TupleTableSlot *slot, EState *estate)
271 : : {
272 : 525263 : PartitionDispatch *pd = proute->partition_dispatch_info;
273 : : Datum values[PARTITION_MAX_KEYS];
274 : : bool isnull[PARTITION_MAX_KEYS];
275 : : Relation rel;
276 : : PartitionDispatch dispatch;
277 : : PartitionDesc partdesc;
3042 rhaas@postgresql.org 278 [ + + ]: 525263 : ExprContext *ecxt = GetPerTupleExprContext(estate);
2014 alvherre@alvh.no-ip. 279 : 525263 : TupleTableSlot *ecxt_scantuple_saved = ecxt->ecxt_scantuple;
280 : 525263 : TupleTableSlot *rootslot = slot;
2783 281 : 525263 : TupleTableSlot *myslot = NULL;
282 : : MemoryContext oldcxt;
2014 283 : 525263 : ResultRelInfo *rri = NULL;
284 : :
285 : : /* use per-tuple context here to avoid leaking memory */
2783 286 [ + - ]: 525263 : oldcxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
287 : :
288 : : /*
289 : : * First check the root table's partition constraint, if any. No point in
290 : : * routing the tuple if it doesn't belong in the root table itself.
291 : : */
2006 tgl@sss.pgh.pa.us 292 [ + + ]: 525263 : if (rootResultRelInfo->ri_RelationDesc->rd_rel->relispartition)
2676 alvherre@alvh.no-ip. 293 : 2251 : ExecPartitionCheck(rootResultRelInfo, slot, estate, true);
294 : :
295 : : /* start with the root partitioned table */
2783 296 : 525247 : dispatch = pd[0];
2014 297 [ + + ]: 1108683 : while (dispatch != NULL)
298 : : {
2676 299 : 583532 : int partidx = -1;
300 : : bool is_leaf;
301 : :
302 [ - + ]: 583532 : CHECK_FOR_INTERRUPTS();
303 : :
2783 304 : 583532 : rel = dispatch->reldesc;
2676 305 : 583532 : partdesc = dispatch->partdesc;
306 : :
307 : : /*
308 : : * Extract partition key from tuple. Expression evaluation machinery
309 : : * that FormPartitionKeyDatum() invokes expects ecxt_scantuple to
310 : : * point to the correct tuple slot. The slot might have changed from
311 : : * what was used for the parent table if the table of the current
312 : : * partitioning level has different tuple descriptor from the parent.
313 : : * So update ecxt_scantuple accordingly.
314 : : */
3042 rhaas@postgresql.org 315 : 583532 : ecxt->ecxt_scantuple = slot;
2783 alvherre@alvh.no-ip. 316 : 583532 : FormPartitionKeyDatum(dispatch, slot, estate, values, isnull);
317 : :
318 : : /*
319 : : * If this partitioned table has no partitions or no partition for
320 : : * these values, error out.
321 : : */
2676 322 [ + + + + ]: 1167037 : if (partdesc->nparts == 0 ||
1438 323 : 583511 : (partidx = get_partition_for_tuple(dispatch, values, isnull)) < 0)
324 : : {
325 : : char *val_desc;
326 : :
2676 327 : 77 : val_desc = ExecBuildSlotPartitionKeyDescription(rel,
328 : : values, isnull, 64);
329 [ - + ]: 77 : Assert(OidIsValid(RelationGetRelid(rel)));
330 [ + - + + ]: 77 : ereport(ERROR,
331 : : (errcode(ERRCODE_CHECK_VIOLATION),
332 : : errmsg("no partition of relation \"%s\" found for row",
333 : : RelationGetRelationName(rel)),
334 : : val_desc ?
335 : : errdetail("Partition key of the failing row contains %s.",
336 : : val_desc) : 0,
337 : : errtable(rel)));
338 : : }
339 : :
1973 heikki.linnakangas@i 340 : 583449 : is_leaf = partdesc->is_leaf[partidx];
341 [ + + ]: 583449 : if (is_leaf)
342 : : {
343 : : /*
344 : : * We've reached the leaf -- hurray, we're done. Look to see if
345 : : * we've already got a ResultRelInfo for this partition.
346 : : */
2676 alvherre@alvh.no-ip. 347 [ + + ]: 525163 : if (likely(dispatch->indexes[partidx] >= 0))
348 : : {
349 : : /* ResultRelInfo already built */
350 [ - + ]: 521177 : Assert(dispatch->indexes[partidx] < proute->num_partitions);
351 : 521177 : rri = proute->partitions[dispatch->indexes[partidx]];
352 : : }
353 : : else
354 : : {
355 : : /*
356 : : * If the partition is known in the owning ModifyTableState
357 : : * node, we can re-use that ResultRelInfo instead of creating
358 : : * a new one with ExecInitPartitionInfo().
359 : : */
1804 tgl@sss.pgh.pa.us 360 : 3986 : rri = ExecLookupResultRelByOid(mtstate,
361 : 3986 : partdesc->oids[partidx],
362 : : true, false);
363 [ + + ]: 3986 : if (rri)
364 : : {
192 dean.a.rasheed@gmail 365 : 254 : ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
366 : :
367 : : /* Verify this ResultRelInfo allows INSERTs */
368 [ + - ]: 254 : CheckValidResultRel(rri, CMD_INSERT,
369 : : node ? node->onConflictAction : ONCONFLICT_NONE,
370 : : NIL);
371 : :
372 : : /*
373 : : * Initialize information needed to insert this and
374 : : * subsequent tuples routed to this partition.
375 : : */
1804 tgl@sss.pgh.pa.us 376 : 254 : ExecInitRoutingInfo(mtstate, estate, proute, dispatch,
377 : : rri, partidx, true);
378 : : }
379 : : else
380 : : {
381 : : /* We need to create a new one. */
2676 alvherre@alvh.no-ip. 382 : 3732 : rri = ExecInitPartitionInfo(mtstate, estate, proute,
383 : : dispatch,
384 : : rootResultRelInfo, partidx);
385 : : }
386 : : }
2014 387 [ - + ]: 525151 : Assert(rri != NULL);
388 : :
389 : : /* Signal to terminate the loop */
390 : 525151 : dispatch = NULL;
391 : : }
392 : : else
393 : : {
394 : : /*
395 : : * Partition is a sub-partitioned table; get the PartitionDispatch
396 : : */
2676 397 [ + + ]: 58286 : if (likely(dispatch->indexes[partidx] >= 0))
398 : : {
399 : : /* Already built. */
400 [ - + ]: 57676 : Assert(dispatch->indexes[partidx] < proute->num_dispatch);
401 : :
2014 402 : 57676 : rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
403 : :
404 : : /*
405 : : * Move down to the next partition level and search again
406 : : * until we find a leaf partition that matches this tuple
407 : : */
2676 408 : 57676 : dispatch = pd[dispatch->indexes[partidx]];
409 : : }
410 : : else
411 : : {
412 : : /* Not yet built. Do that now. */
413 : : PartitionDispatch subdispatch;
414 : :
415 : : /*
416 : : * Create the new PartitionDispatch. We pass the current one
417 : : * in as the parent PartitionDispatch
418 : : */
1861 heikki.linnakangas@i 419 : 610 : subdispatch = ExecInitPartitionDispatchInfo(estate,
420 : : proute,
2676 alvherre@alvh.no-ip. 421 : 610 : partdesc->oids[partidx],
422 : : dispatch, partidx,
423 : : mtstate->rootResultRelInfo);
424 [ + - - + ]: 610 : Assert(dispatch->indexes[partidx] >= 0 &&
425 : : dispatch->indexes[partidx] < proute->num_dispatch);
426 : :
2014 427 : 610 : rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
2676 428 : 610 : dispatch = subdispatch;
429 : : }
430 : :
431 : : /*
432 : : * Convert the tuple to the new parent's layout, if different from
433 : : * the previous parent.
434 : : */
2014 435 [ + + ]: 58286 : if (dispatch->tupslot)
436 : : {
437 : 30861 : AttrMap *map = dispatch->tupmap;
438 : 30861 : TupleTableSlot *tempslot = myslot;
439 : :
440 : 30861 : myslot = dispatch->tupslot;
441 : 30861 : slot = execute_attr_map_slot(map, slot, myslot);
442 : :
443 [ + + ]: 30861 : if (tempslot != NULL)
444 : 147 : ExecClearTuple(tempslot);
445 : : }
446 : : }
447 : :
448 : : /*
449 : : * If this partition is the default one, we must check its partition
450 : : * constraint now, which may have changed concurrently due to
451 : : * partitions being added to the parent.
452 : : *
453 : : * (We do this here, and do not rely on ExecInsert doing it, because
454 : : * we don't want to miss doing it for non-leaf partitions.)
455 : : */
456 [ + + ]: 583437 : if (partidx == partdesc->boundinfo->default_index)
457 : : {
458 : : /*
459 : : * The tuple must match the partition's layout for the constraint
460 : : * expression to be evaluated successfully. If the partition is
461 : : * sub-partitioned, that would already be the case due to the code
462 : : * above, but for a leaf partition the tuple still matches the
463 : : * parent's layout.
464 : : *
465 : : * Note that we have a map to convert from root to current
466 : : * partition, but not from immediate parent to current partition.
467 : : * So if we have to convert, do it from the root slot; if not, use
468 : : * the root slot as-is.
469 : : */
1973 heikki.linnakangas@i 470 [ + + ]: 416 : if (is_leaf)
471 : : {
1199 alvherre@alvh.no-ip. 472 : 394 : TupleConversionMap *map = ExecGetRootToChildMap(rri, estate);
473 : :
2014 474 [ + + ]: 394 : if (map)
475 : 81 : slot = execute_attr_map_slot(map->attrMap, rootslot,
476 : : rri->ri_PartitionTupleSlot);
477 : : else
478 : 313 : slot = rootslot;
479 : : }
480 : :
481 : 416 : ExecPartitionCheck(rri, slot, estate, true);
482 : : }
483 : : }
484 : :
485 : : /* Release the tuple in the lowest parent's dedicated slot. */
486 [ + + ]: 525151 : if (myslot != NULL)
487 : 30695 : ExecClearTuple(myslot);
488 : : /* and restore ecxt's scantuple */
489 : 525151 : ecxt->ecxt_scantuple = ecxt_scantuple_saved;
490 : 525151 : MemoryContextSwitchTo(oldcxt);
491 : :
492 : 525151 : return rri;
493 : : }
494 : :
495 : : /*
496 : : * IsIndexCompatibleAsArbiter
497 : : * Return true if two indexes are identical for INSERT ON CONFLICT
498 : : * purposes.
499 : : *
500 : : * Only indexes of the same relation are supported.
501 : : */
502 : : static bool
103 alvherre@kurilemu.de 503 :GNC 19 : IsIndexCompatibleAsArbiter(Relation arbiterIndexRelation,
504 : : IndexInfo *arbiterIndexInfo,
505 : : Relation indexRelation,
506 : : IndexInfo *indexInfo)
507 : : {
508 [ - + ]: 19 : Assert(arbiterIndexRelation->rd_index->indrelid == indexRelation->rd_index->indrelid);
509 : :
510 : : /* must match whether they're unique */
511 [ - + ]: 19 : if (arbiterIndexInfo->ii_Unique != indexInfo->ii_Unique)
103 alvherre@kurilemu.de 512 :UNC 0 : return false;
513 : :
514 : : /* No support currently for comparing exclusion indexes. */
103 alvherre@kurilemu.de 515 [ + - ]:GNC 19 : if (arbiterIndexInfo->ii_ExclusionOps != NULL ||
516 [ - + ]: 19 : indexInfo->ii_ExclusionOps != NULL)
103 alvherre@kurilemu.de 517 :UNC 0 : return false;
518 : :
519 : : /* the "nulls not distinct" criterion must match */
103 alvherre@kurilemu.de 520 :GNC 19 : if (arbiterIndexInfo->ii_NullsNotDistinct !=
521 [ - + ]: 19 : indexInfo->ii_NullsNotDistinct)
103 alvherre@kurilemu.de 522 :UNC 0 : return false;
523 : :
524 : : /* number of key attributes must match */
103 alvherre@kurilemu.de 525 :GNC 19 : if (arbiterIndexInfo->ii_NumIndexKeyAttrs !=
526 [ - + ]: 19 : indexInfo->ii_NumIndexKeyAttrs)
103 alvherre@kurilemu.de 527 :UNC 0 : return false;
528 : :
103 alvherre@kurilemu.de 529 [ + + ]:GNC 26 : for (int i = 0; i < arbiterIndexInfo->ii_NumIndexKeyAttrs; i++)
530 : : {
531 : 19 : if (arbiterIndexRelation->rd_indcollation[i] !=
532 [ + + ]: 19 : indexRelation->rd_indcollation[i])
533 : 12 : return false;
534 : :
535 : 7 : if (arbiterIndexRelation->rd_opfamily[i] !=
536 [ - + ]: 7 : indexRelation->rd_opfamily[i])
103 alvherre@kurilemu.de 537 :UNC 0 : return false;
538 : :
103 alvherre@kurilemu.de 539 :GNC 7 : if (arbiterIndexRelation->rd_index->indkey.values[i] !=
540 [ - + ]: 7 : indexRelation->rd_index->indkey.values[i])
103 alvherre@kurilemu.de 541 :UNC 0 : return false;
542 : : }
543 : :
103 alvherre@kurilemu.de 544 [ - + ]:GNC 7 : if (list_difference(RelationGetIndexExpressions(arbiterIndexRelation),
545 : 7 : RelationGetIndexExpressions(indexRelation)) != NIL)
103 alvherre@kurilemu.de 546 :UNC 0 : return false;
547 : :
103 alvherre@kurilemu.de 548 [ - + ]:GNC 7 : if (list_difference(RelationGetIndexPredicate(arbiterIndexRelation),
549 : 7 : RelationGetIndexPredicate(indexRelation)) != NIL)
103 alvherre@kurilemu.de 550 :UNC 0 : return false;
103 alvherre@kurilemu.de 551 :GNC 7 : return true;
552 : : }
553 : :
554 : : /*
555 : : * ExecInitPartitionInfo
556 : : * Lock the partition and initialize ResultRelInfo. Also setup other
557 : : * information for the partition and store it in the next empty slot in
558 : : * the proute->partitions array.
559 : : *
560 : : * Returns the ResultRelInfo
561 : : */
562 : : static ResultRelInfo *
2676 alvherre@alvh.no-ip. 563 :CBC 3732 : ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
564 : : PartitionTupleRouting *proute,
565 : : PartitionDispatch dispatch,
566 : : ResultRelInfo *rootResultRelInfo,
567 : : int partidx)
568 : : {
2900 rhaas@postgresql.org 569 : 3732 : ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
1816 alvherre@alvh.no-ip. 570 : 3732 : Oid partOid = dispatch->partdesc->oids[partidx];
571 : : Relation partrel;
1861 heikki.linnakangas@i 572 : 3732 : int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
2890 alvherre@alvh.no-ip. 573 : 3732 : Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
574 : : ResultRelInfo *leaf_part_rri;
575 : : MemoryContext oldcxt;
2279 michael@paquier.xyz 576 : 3732 : AttrMap *part_attmap = NULL;
577 : : bool found_whole_row;
578 : :
2676 alvherre@alvh.no-ip. 579 : 3732 : oldcxt = MemoryContextSwitchTo(proute->memcxt);
580 : :
1816 581 : 3732 : partrel = table_open(partOid, RowExclusiveLock);
582 : :
2911 583 : 3732 : leaf_part_rri = makeNode(ResultRelInfo);
2943 rhaas@postgresql.org 584 : 3732 : InitResultRelInfo(leaf_part_rri,
585 : : partrel,
586 : : 0,
587 : : rootResultRelInfo,
588 : : estate->es_instrument);
589 : :
590 : : /*
591 : : * Verify result relation is a valid target for an INSERT. An UPDATE of a
592 : : * partition-key becomes a DELETE+INSERT operation, so this check is still
593 : : * required when the operation is CMD_UPDATE.
594 : : */
192 dean.a.rasheed@gmail 595 [ + + ]: 3732 : CheckValidResultRel(leaf_part_rri, CMD_INSERT,
596 : : node ? node->onConflictAction : ONCONFLICT_NONE, NIL);
597 : :
598 : : /*
599 : : * Open partition indices. The user may have asked to check for conflicts
600 : : * within this leaf partition and do "nothing" instead of throwing an
601 : : * error. Be prepared in that case by initializing the index information
602 : : * needed by ExecInsert() to perform speculative insertions.
603 : : */
2943 rhaas@postgresql.org 604 [ + + ]: 3726 : if (partrel->rd_rel->relhasindex &&
605 [ + - ]: 1119 : leaf_part_rri->ri_IndexRelationDescs == NULL)
606 : 1119 : ExecOpenIndices(leaf_part_rri,
2918 alvherre@alvh.no-ip. 607 [ + + ]: 2130 : (node != NULL &&
608 [ + + ]: 1011 : node->onConflictAction != ONCONFLICT_NONE));
609 : :
610 : : /*
611 : : * Build WITH CHECK OPTION constraints for the partition. Note that we
612 : : * didn't build the withCheckOptionList for partitions within the planner,
613 : : * but simple translation of varattnos will suffice. This only occurs for
614 : : * the INSERT case or in the case of UPDATE/MERGE tuple routing where we
615 : : * didn't find a result rel to reuse.
616 : : */
2943 rhaas@postgresql.org 617 [ + + + + ]: 3726 : if (node && node->withCheckOptionLists != NIL)
618 : : {
619 : : List *wcoList;
620 : 48 : List *wcoExprs = NIL;
621 : : ListCell *ll;
622 : :
623 : : /*
624 : : * In the case of INSERT on a partitioned table, there is only one
625 : : * plan. Likewise, there is only one WCO list, not one per partition.
626 : : * For UPDATE/MERGE, there are as many WCO lists as there are plans.
627 : : */
628 [ + + + - : 48 : Assert((node->operation == CMD_INSERT &&
- + + + -
+ + - -
+ ]
629 : : list_length(node->withCheckOptionLists) == 1 &&
630 : : list_length(node->resultRelations) == 1) ||
631 : : (node->operation == CMD_UPDATE &&
632 : : list_length(node->withCheckOptionLists) ==
633 : : list_length(node->resultRelations)) ||
634 : : (node->operation == CMD_MERGE &&
635 : : list_length(node->withCheckOptionLists) ==
636 : : list_length(node->resultRelations)));
637 : :
638 : : /*
639 : : * Use the WCO list of the first plan as a reference to calculate
640 : : * attno's for the WCO list of this partition. In the INSERT case,
641 : : * that refers to the root partitioned table, whereas in the UPDATE
642 : : * tuple routing case, that refers to the first partition in the
643 : : * mtstate->resultRelInfo array. In any case, both that relation and
644 : : * this partition should have the same columns, so we should be able
645 : : * to map attributes successfully.
646 : : */
647 : 48 : wcoList = linitial(node->withCheckOptionLists);
648 : :
649 : : /*
650 : : * Convert Vars in it to contain this partition's attribute numbers.
651 : : */
652 : : part_attmap =
2279 michael@paquier.xyz 653 : 48 : build_attrmap_by_name(RelationGetDescr(partrel),
654 : : RelationGetDescr(firstResultRel),
655 : : false);
656 : : wcoList = (List *)
2887 alvherre@alvh.no-ip. 657 : 48 : map_variable_attnos((Node *) wcoList,
658 : : firstVarno, 0,
659 : : part_attmap,
660 : 48 : RelationGetForm(partrel)->reltype,
661 : : &found_whole_row);
662 : : /* We ignore the value of found_whole_row. */
663 : :
2943 rhaas@postgresql.org 664 [ + - + + : 135 : foreach(ll, wcoList)
+ + ]
665 : : {
1700 peter@eisentraut.org 666 : 87 : WithCheckOption *wco = lfirst_node(WithCheckOption, ll);
2943 rhaas@postgresql.org 667 : 87 : ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual),
668 : : &mtstate->ps);
669 : :
670 : 87 : wcoExprs = lappend(wcoExprs, wcoExpr);
671 : : }
672 : :
673 : 48 : leaf_part_rri->ri_WithCheckOptions = wcoList;
674 : 48 : leaf_part_rri->ri_WithCheckOptionExprs = wcoExprs;
675 : : }
676 : :
677 : : /*
678 : : * Build the RETURNING projection for the partition. Note that we didn't
679 : : * build the returningList for partitions within the planner, but simple
680 : : * translation of varattnos will suffice. This only occurs for the INSERT
681 : : * case or in the case of UPDATE/MERGE tuple routing where we didn't find
682 : : * a result rel to reuse.
683 : : */
684 [ + + + + ]: 3726 : if (node && node->returningLists != NIL)
685 : : {
686 : : TupleTableSlot *slot;
687 : : ExprContext *econtext;
688 : : List *returningList;
689 : :
690 : : /* See the comment above for WCO lists. */
691 [ + + + - : 148 : Assert((node->operation == CMD_INSERT &&
- + + + -
+ + - -
+ ]
692 : : list_length(node->returningLists) == 1 &&
693 : : list_length(node->resultRelations) == 1) ||
694 : : (node->operation == CMD_UPDATE &&
695 : : list_length(node->returningLists) ==
696 : : list_length(node->resultRelations)) ||
697 : : (node->operation == CMD_MERGE &&
698 : : list_length(node->returningLists) ==
699 : : list_length(node->resultRelations)));
700 : :
701 : : /*
702 : : * Use the RETURNING list of the first plan as a reference to
703 : : * calculate attno's for the RETURNING list of this partition. See
704 : : * the comment above for WCO lists for more details on why this is
705 : : * okay.
706 : : */
707 : 148 : returningList = linitial(node->returningLists);
708 : :
709 : : /*
710 : : * Convert Vars in it to contain this partition's attribute numbers.
711 : : */
2279 michael@paquier.xyz 712 [ + - ]: 148 : if (part_attmap == NULL)
713 : : part_attmap =
714 : 148 : build_attrmap_by_name(RelationGetDescr(partrel),
715 : : RelationGetDescr(firstResultRel),
716 : : false);
717 : : returningList = (List *)
2887 alvherre@alvh.no-ip. 718 : 148 : map_variable_attnos((Node *) returningList,
719 : : firstVarno, 0,
720 : : part_attmap,
721 : 148 : RelationGetForm(partrel)->reltype,
722 : : &found_whole_row);
723 : : /* We ignore the value of found_whole_row. */
724 : :
2900 rhaas@postgresql.org 725 : 148 : leaf_part_rri->ri_returningList = returningList;
726 : :
727 : : /*
728 : : * Initialize the projection itself.
729 : : *
730 : : * Use the slot and the expression context that would have been set up
731 : : * in ExecInitModifyTable() for projection's output.
732 : : */
2943 733 [ - + ]: 148 : Assert(mtstate->ps.ps_ResultTupleSlot != NULL);
734 : 148 : slot = mtstate->ps.ps_ResultTupleSlot;
735 [ - + ]: 148 : Assert(mtstate->ps.ps_ExprContext != NULL);
736 : 148 : econtext = mtstate->ps.ps_ExprContext;
737 : 148 : leaf_part_rri->ri_projectReturning =
738 : 148 : ExecBuildProjectionInfo(returningList, econtext, slot,
739 : : &mtstate->ps, RelationGetDescr(partrel));
740 : : }
741 : :
742 : : /* Set up information needed for routing tuples to the partition. */
2676 alvherre@alvh.no-ip. 743 : 3726 : ExecInitRoutingInfo(mtstate, estate, proute, dispatch,
744 : : leaf_part_rri, partidx, false);
745 : :
746 : : /*
747 : : * If there is an ON CONFLICT clause, initialize state for it.
748 : : */
2911 749 [ + + + + ]: 3726 : if (node && node->onConflictAction != ONCONFLICT_NONE)
750 : : {
751 : 169 : TupleDesc partrelDesc = RelationGetDescr(partrel);
752 : 169 : ExprContext *econtext = mtstate->ps.ps_ExprContext;
753 : 169 : List *arbiterIndexes = NIL;
103 alvherre@kurilemu.de 754 :GNC 169 : int additional_arbiters = 0;
755 : :
756 : : /*
757 : : * If there is a list of arbiter indexes, map it to a list of indexes
758 : : * in the partition. We also add any "identical indexes" to any of
759 : : * those, to cover the case where one of them is concurrently being
760 : : * reindexed.
761 : : */
1306 tgl@sss.pgh.pa.us 762 [ + + ]:CBC 169 : if (rootResultRelInfo->ri_onConflictArbiterIndexes != NIL)
763 : : {
103 alvherre@kurilemu.de 764 :GNC 141 : List *unparented_idxs = NIL,
46 765 : 141 : *arbiters_listidxs = NIL,
766 : 141 : *ancestors_seen = NIL;
767 : :
103 768 [ + + ]: 301 : for (int listidx = 0; listidx < leaf_part_rri->ri_NumIndices; listidx++)
769 : : {
770 : : Oid indexoid;
771 : : List *ancestors;
772 : :
773 : : /*
774 : : * If one of this index's ancestors is in the root's arbiter
775 : : * list, then use this index as arbiter for this partition.
776 : : * Otherwise, if this index has no parent, track it for later,
777 : : * in case REINDEX CONCURRENTLY is working on one of the
778 : : * arbiters.
779 : : *
780 : : * However, if two indexes appear to have the same parent,
781 : : * treat the second of these as if it had no parent. This
782 : : * sounds counterintuitive, but it can happen if a transaction
783 : : * running REINDEX CONCURRENTLY commits right between those
784 : : * two indexes are checked by another process in this loop.
785 : : * This will have the effect of also treating that second
786 : : * index as arbiter.
787 : : *
788 : : * XXX get_partition_ancestors scans pg_inherits, which is not
789 : : * only slow, but also means the catalog snapshot can get
790 : : * invalidated each time through the loop (cf.
791 : : * GetNonHistoricCatalogSnapshot). Consider a syscache or
792 : : * some other way to cache?
793 : : */
794 : 160 : indexoid = RelationGetRelid(leaf_part_rri->ri_IndexRelationDescs[listidx]);
795 : 160 : ancestors = get_partition_ancestors(indexoid);
46 796 : 160 : INJECTION_POINT("exec-init-partition-after-get-partition-ancestors", NULL);
797 : :
798 [ + + ]: 160 : if (ancestors != NIL &&
799 [ + + ]: 142 : !list_member_oid(ancestors_seen, linitial_oid(ancestors)))
800 : : {
103 801 [ + - + - : 282 : foreach_oid(parent_idx, rootResultRelInfo->ri_onConflictArbiterIndexes)
+ + ]
802 : : {
803 [ + - ]: 141 : if (list_member_oid(ancestors, parent_idx))
804 : : {
46 805 : 141 : ancestors_seen = lappend_oid(ancestors_seen, linitial_oid(ancestors));
103 806 : 141 : arbiterIndexes = lappend_oid(arbiterIndexes, indexoid);
807 : 141 : arbiters_listidxs = lappend_int(arbiters_listidxs, listidx);
808 : 141 : break;
809 : : }
810 : : }
811 : : }
812 : : else
813 : 19 : unparented_idxs = lappend_int(unparented_idxs, listidx);
814 : :
2911 alvherre@alvh.no-ip. 815 :CBC 160 : list_free(ancestors);
816 : : }
817 : :
818 : : /*
819 : : * If we found any indexes with no ancestors, it's possible that
820 : : * some arbiter index is undergoing concurrent reindex. Match all
821 : : * unparented indexes against arbiters; add unparented matching
822 : : * ones as "additional arbiters".
823 : : *
824 : : * This is critical so that all concurrent transactions use the
825 : : * same set as arbiters during REINDEX CONCURRENTLY, to avoid
826 : : * spurious "duplicate key" errors.
827 : : */
103 alvherre@kurilemu.de 828 [ + + + - ]:GNC 141 : if (unparented_idxs && arbiterIndexes)
829 : : {
830 [ + - + + : 57 : foreach_int(unparented_i, unparented_idxs)
+ + ]
831 : : {
832 : : Relation unparented_rel;
833 : : IndexInfo *unparented_ii;
834 : :
835 : 19 : unparented_rel = leaf_part_rri->ri_IndexRelationDescs[unparented_i];
46 836 : 19 : unparented_ii = leaf_part_rri->ri_IndexRelationInfo[unparented_i];
837 : :
103 838 [ - + ]: 19 : Assert(!list_member_oid(arbiterIndexes,
839 : : unparented_rel->rd_index->indexrelid));
840 : :
841 : : /* Ignore indexes not ready */
46 842 [ - + ]: 19 : if (!unparented_ii->ii_ReadyForInserts)
103 alvherre@kurilemu.de 843 :UNC 0 : continue;
844 : :
103 alvherre@kurilemu.de 845 [ + - + + :GNC 50 : foreach_int(arbiter_i, arbiters_listidxs)
+ + ]
846 : : {
847 : : Relation arbiter_rel;
848 : : IndexInfo *arbiter_ii;
849 : :
850 : 19 : arbiter_rel = leaf_part_rri->ri_IndexRelationDescs[arbiter_i];
851 : 19 : arbiter_ii = leaf_part_rri->ri_IndexRelationInfo[arbiter_i];
852 : :
853 : : /*
854 : : * If the non-ancestor index is compatible with the
855 : : * arbiter, use the non-ancestor as arbiter too.
856 : : */
857 [ + + ]: 19 : if (IsIndexCompatibleAsArbiter(arbiter_rel,
858 : : arbiter_ii,
859 : : unparented_rel,
860 : : unparented_ii))
861 : : {
862 : 7 : arbiterIndexes = lappend_oid(arbiterIndexes,
863 : 7 : unparented_rel->rd_index->indexrelid);
864 : 7 : additional_arbiters++;
865 : 7 : break;
866 : : }
867 : : }
868 : : }
869 : : }
870 : 141 : list_free(unparented_idxs);
871 : 141 : list_free(arbiters_listidxs);
46 872 : 141 : list_free(ancestors_seen);
873 : : }
874 : :
875 : : /*
876 : : * We expect to find as many arbiter indexes on this partition as the
877 : : * root has, plus however many "additional arbiters" (to wit: those
878 : : * being concurrently rebuilt) we found.
879 : : */
2676 alvherre@alvh.no-ip. 880 [ - + ]:CBC 169 : if (list_length(rootResultRelInfo->ri_onConflictArbiterIndexes) !=
103 alvherre@kurilemu.de 881 [ - + ]:GNC 169 : list_length(arbiterIndexes) - additional_arbiters)
2911 alvherre@alvh.no-ip. 882 [ # # ]:UBC 0 : elog(ERROR, "invalid arbiter index list");
2911 alvherre@alvh.no-ip. 883 :CBC 169 : leaf_part_rri->ri_onConflictArbiterIndexes = arbiterIndexes;
884 : :
885 : : /*
886 : : * In the DO UPDATE and DO SELECT cases, we have some more state to
887 : : * initialize.
888 : : */
31 dean.a.rasheed@gmail 889 [ + + ]:GNC 169 : if (node->onConflictAction == ONCONFLICT_UPDATE ||
890 [ + + ]: 76 : node->onConflictAction == ONCONFLICT_SELECT)
891 : : {
892 : 135 : OnConflictActionState *onconfl = makeNode(OnConflictActionState);
893 : : TupleConversionMap *map;
894 : :
1199 alvherre@alvh.no-ip. 895 :CBC 135 : map = ExecGetRootToChildMap(leaf_part_rri, estate);
896 : :
31 dean.a.rasheed@gmail 897 [ + + - + ]:GNC 135 : Assert(node->onConflictSet != NIL ||
898 : : node->onConflictAction == ONCONFLICT_SELECT);
2676 alvherre@alvh.no-ip. 899 [ - + ]:CBC 135 : Assert(rootResultRelInfo->ri_onConflict != NULL);
900 : :
1770 tgl@sss.pgh.pa.us 901 : 135 : leaf_part_rri->ri_onConflict = onconfl;
902 : :
903 : : /* Lock strength for DO SELECT [FOR UPDATE/SHARE] */
31 dean.a.rasheed@gmail 904 :GNC 135 : onconfl->oc_LockStrength =
905 : 135 : rootResultRelInfo->ri_onConflict->oc_LockStrength;
906 : :
907 : : /*
908 : : * Need a separate existing slot for each partition, as the
909 : : * partition could be of a different AM, even if the tuple
910 : : * descriptors match.
911 : : */
1770 tgl@sss.pgh.pa.us 912 :CBC 135 : onconfl->oc_Existing =
2561 andres@anarazel.de 913 : 135 : table_slot_create(leaf_part_rri->ri_RelationDesc,
914 : 135 : &mtstate->ps.state->es_tupleTable);
915 : :
916 : : /*
917 : : * If the partition's tuple descriptor matches exactly the root
918 : : * parent (the common case), we can re-use most of the parent's ON
919 : : * CONFLICT action state, skipping a bunch of work. Otherwise, we
920 : : * need to create state specific to this partition.
921 : : */
2911 alvherre@alvh.no-ip. 922 [ + + ]: 135 : if (map == NULL)
923 : : {
924 : : /*
925 : : * It's safe to reuse these from the partition root, as we
926 : : * only process one tuple at a time (therefore we won't
927 : : * overwrite needed data in slots), and the results of any
928 : : * projections are independent of the underlying storage.
929 : : * Projections and where clauses themselves don't store state
930 : : * / are independent of the underlying storage.
931 : : */
1770 tgl@sss.pgh.pa.us 932 : 73 : onconfl->oc_ProjSlot =
2566 andres@anarazel.de 933 : 73 : rootResultRelInfo->ri_onConflict->oc_ProjSlot;
1770 tgl@sss.pgh.pa.us 934 : 73 : onconfl->oc_ProjInfo =
2566 andres@anarazel.de 935 : 73 : rootResultRelInfo->ri_onConflict->oc_ProjInfo;
1770 tgl@sss.pgh.pa.us 936 : 73 : onconfl->oc_WhereClause =
2566 andres@anarazel.de 937 : 73 : rootResultRelInfo->ri_onConflict->oc_WhereClause;
938 : : }
939 : : else
940 : : {
941 : : /*
942 : : * For ON CONFLICT DO UPDATE, translate expressions in
943 : : * onConflictSet to account for different attribute numbers.
944 : : * For that, map partition varattnos twice: first to catch the
945 : : * EXCLUDED pseudo-relation (INNER_VAR), and second to handle
946 : : * the main target relation (firstVarno).
947 : : */
31 dean.a.rasheed@gmail 948 [ + + ]:GNC 62 : if (node->onConflictAction == ONCONFLICT_UPDATE)
949 : : {
950 : : List *onconflset;
951 : : List *onconflcols;
952 : :
953 : 38 : onconflset = copyObject(node->onConflictSet);
954 [ + + ]: 38 : if (part_attmap == NULL)
955 : : part_attmap =
956 : 35 : build_attrmap_by_name(RelationGetDescr(partrel),
957 : : RelationGetDescr(firstResultRel),
958 : : false);
959 : : onconflset = (List *)
960 : 38 : map_variable_attnos((Node *) onconflset,
961 : : INNER_VAR, 0,
962 : : part_attmap,
963 : 38 : RelationGetForm(partrel)->reltype,
964 : : &found_whole_row);
965 : : /* We ignore the value of found_whole_row. */
966 : : onconflset = (List *)
967 : 38 : map_variable_attnos((Node *) onconflset,
968 : : firstVarno, 0,
969 : : part_attmap,
970 : 38 : RelationGetForm(partrel)->reltype,
971 : : &found_whole_row);
972 : : /* We ignore the value of found_whole_row. */
973 : :
974 : : /*
975 : : * Finally, adjust the target colnos to match the
976 : : * partition.
977 : : */
978 : 38 : onconflcols = adjust_partition_colnos(node->onConflictCols,
979 : : leaf_part_rri);
980 : :
981 : : /* create the tuple slot for the UPDATE SET projection */
982 : 38 : onconfl->oc_ProjSlot =
983 : 38 : table_slot_create(partrel,
984 : 38 : &mtstate->ps.state->es_tupleTable);
985 : :
986 : : /* build UPDATE SET projection state */
987 : 38 : onconfl->oc_ProjInfo =
988 : 38 : ExecBuildUpdateProjection(onconflset,
989 : : true,
990 : : onconflcols,
991 : : partrelDesc,
992 : : econtext,
993 : : onconfl->oc_ProjSlot,
994 : : &mtstate->ps);
995 : : }
996 : :
997 : : /*
998 : : * For both ON CONFLICT DO UPDATE and ON CONFLICT DO SELECT,
999 : : * there may be a WHERE clause. If so, initialize state where
1000 : : * it will be evaluated, mapping the attribute numbers
1001 : : * appropriately. As with onConflictSet, we need to map
1002 : : * partition varattnos twice, to catch both the EXCLUDED
1003 : : * pseudo-relation (INNER_VAR), and the main target relation
1004 : : * (firstVarno).
1005 : : */
2911 alvherre@alvh.no-ip. 1006 [ + + ]:CBC 62 : if (node->onConflictWhere)
1007 : : {
1008 : : List *clause;
1009 : :
31 dean.a.rasheed@gmail 1010 [ - + ]:GNC 27 : if (part_attmap == NULL)
1011 : : part_attmap =
31 dean.a.rasheed@gmail 1012 :UNC 0 : build_attrmap_by_name(RelationGetDescr(partrel),
1013 : : RelationGetDescr(firstResultRel),
1014 : : false);
1015 : :
2911 alvherre@alvh.no-ip. 1016 :CBC 27 : clause = copyObject((List *) node->onConflictWhere);
1017 : : clause = (List *)
2887 1018 : 27 : map_variable_attnos((Node *) clause,
1019 : : INNER_VAR, 0,
1020 : : part_attmap,
1021 : 27 : RelationGetForm(partrel)->reltype,
1022 : : &found_whole_row);
1023 : : /* We ignore the value of found_whole_row. */
1024 : : clause = (List *)
1025 : 27 : map_variable_attnos((Node *) clause,
1026 : : firstVarno, 0,
1027 : : part_attmap,
1028 : 27 : RelationGetForm(partrel)->reltype,
1029 : : &found_whole_row);
1030 : : /* We ignore the value of found_whole_row. */
1770 tgl@sss.pgh.pa.us 1031 : 27 : onconfl->oc_WhereClause =
103 peter@eisentraut.org 1032 :GNC 27 : ExecInitQual(clause, &mtstate->ps);
1033 : : }
1034 : : }
1035 : : }
1036 : : }
1037 : :
1038 : : /*
1039 : : * Since we've just initialized this ResultRelInfo, it's not in any list
1040 : : * attached to the estate as yet. Add it, so that it can be found later.
1041 : : *
1042 : : * Note that the entries in this list appear in no predetermined order,
1043 : : * because partition result rels are initialized as and when they're
1044 : : * needed.
1045 : : */
2676 alvherre@alvh.no-ip. 1046 :CBC 3726 : MemoryContextSwitchTo(estate->es_query_cxt);
1047 : 3726 : estate->es_tuple_routing_result_relations =
1048 : 3726 : lappend(estate->es_tuple_routing_result_relations,
1049 : : leaf_part_rri);
1050 : :
1051 : : /*
1052 : : * Initialize information about this partition that's needed to handle
1053 : : * MERGE. We take the "first" result relation's mergeActionList as
1054 : : * reference and make copy for this relation, converting stuff that
1055 : : * references attribute numbers to match this relation's.
1056 : : *
1057 : : * This duplicates much of the logic in ExecInitMerge(), so if something
1058 : : * changes there, look here too.
1059 : : */
1448 1060 [ + + + + ]: 3726 : if (node && node->operation == CMD_MERGE)
1061 : : {
1062 : 12 : List *firstMergeActionList = linitial(node->mergeActionLists);
1063 : : ListCell *lc;
1064 : 12 : ExprContext *econtext = mtstate->ps.ps_ExprContext;
1065 : : Node *joinCondition;
1066 : :
1067 [ + + ]: 12 : if (part_attmap == NULL)
1068 : : part_attmap =
1069 : 6 : build_attrmap_by_name(RelationGetDescr(partrel),
1070 : : RelationGetDescr(firstResultRel),
1071 : : false);
1072 : :
1073 [ + - ]: 12 : if (unlikely(!leaf_part_rri->ri_projectNewInfoValid))
1074 : 12 : ExecInitMergeTupleSlots(mtstate, leaf_part_rri);
1075 : :
1076 : : /* Initialize state for join condition checking. */
1077 : : joinCondition =
715 dean.a.rasheed@gmail 1078 : 12 : map_variable_attnos(linitial(node->mergeJoinConditions),
1079 : : firstVarno, 0,
1080 : : part_attmap,
1081 : 12 : RelationGetForm(partrel)->reltype,
1082 : : &found_whole_row);
1083 : : /* We ignore the value of found_whole_row. */
1084 : 12 : leaf_part_rri->ri_MergeJoinCondition =
1085 : 12 : ExecInitQual((List *) joinCondition, &mtstate->ps);
1086 : :
1448 alvherre@alvh.no-ip. 1087 [ + - + + : 30 : foreach(lc, firstMergeActionList)
+ + ]
1088 : : {
1089 : : /* Make a copy for this relation to be safe. */
1090 : 18 : MergeAction *action = copyObject(lfirst(lc));
1091 : : MergeActionState *action_state;
1092 : :
1093 : : /* Generate the action's state for this relation */
1094 : 18 : action_state = makeNode(MergeActionState);
1095 : 18 : action_state->mas_action = action;
1096 : :
1097 : : /* And put the action in the appropriate list */
715 dean.a.rasheed@gmail 1098 : 36 : leaf_part_rri->ri_MergeActions[action->matchKind] =
1099 : 18 : lappend(leaf_part_rri->ri_MergeActions[action->matchKind],
1100 : : action_state);
1101 : :
1448 alvherre@alvh.no-ip. 1102 [ + + + - ]: 18 : switch (action->commandType)
1103 : : {
1104 : 6 : case CMD_INSERT:
1105 : :
1106 : : /*
1107 : : * ExecCheckPlanOutput() already done on the targetlist
1108 : : * when "first" result relation initialized and it is same
1109 : : * for all result relations.
1110 : : */
1111 : 6 : action_state->mas_proj =
1112 : 6 : ExecBuildProjectionInfo(action->targetList, econtext,
1113 : : leaf_part_rri->ri_newTupleSlot,
1114 : : &mtstate->ps,
1115 : : RelationGetDescr(partrel));
1116 : 6 : break;
1117 : 9 : case CMD_UPDATE:
1118 : :
1119 : : /*
1120 : : * Convert updateColnos from "first" result relation
1121 : : * attribute numbers to this result rel's.
1122 : : */
1123 [ + - ]: 9 : if (part_attmap)
1124 : 9 : action->updateColnos =
1125 : 9 : adjust_partition_colnos_using_map(action->updateColnos,
1126 : : part_attmap);
1127 : 9 : action_state->mas_proj =
1128 : 9 : ExecBuildUpdateProjection(action->targetList,
1129 : : true,
1130 : : action->updateColnos,
1131 : 9 : RelationGetDescr(leaf_part_rri->ri_RelationDesc),
1132 : : econtext,
1133 : : leaf_part_rri->ri_newTupleSlot,
1134 : : NULL);
1135 : 9 : break;
1136 : 3 : case CMD_DELETE:
1137 : : case CMD_NOTHING:
1138 : : /* Nothing to do */
1139 : 3 : break;
1140 : :
1448 alvherre@alvh.no-ip. 1141 :UBC 0 : default:
1142 [ # # ]: 0 : elog(ERROR, "unknown action in MERGE WHEN clause");
1143 : : }
1144 : :
1145 : : /* found_whole_row intentionally ignored. */
1448 alvherre@alvh.no-ip. 1146 :CBC 18 : action->qual =
1147 : 18 : map_variable_attnos(action->qual,
1148 : : firstVarno, 0,
1149 : : part_attmap,
1150 : 18 : RelationGetForm(partrel)->reltype,
1151 : : &found_whole_row);
1152 : 18 : action_state->mas_whenqual =
1153 : 18 : ExecInitQual((List *) action->qual, &mtstate->ps);
1154 : : }
1155 : : }
2676 1156 : 3726 : MemoryContextSwitchTo(oldcxt);
1157 : :
2943 rhaas@postgresql.org 1158 : 3726 : return leaf_part_rri;
1159 : : }
1160 : :
1161 : : /*
1162 : : * ExecInitRoutingInfo
1163 : : * Set up information needed for translating tuples between root
1164 : : * partitioned table format and partition format, and keep track of it
1165 : : * in PartitionTupleRouting.
1166 : : */
1167 : : static void
2900 1168 : 3980 : ExecInitRoutingInfo(ModifyTableState *mtstate,
1169 : : EState *estate,
1170 : : PartitionTupleRouting *proute,
1171 : : PartitionDispatch dispatch,
1172 : : ResultRelInfo *partRelInfo,
1173 : : int partidx,
1174 : : bool is_borrowed_rel)
1175 : : {
1176 : : MemoryContext oldcxt;
1177 : : int rri_index;
1178 : :
2676 alvherre@alvh.no-ip. 1179 : 3980 : oldcxt = MemoryContextSwitchTo(proute->memcxt);
1180 : :
1181 : : /*
1182 : : * Set up tuple conversion between root parent and the partition if the
1183 : : * two have different rowtypes. If conversion is indeed required, also
1184 : : * initialize a slot dedicated to storing this partition's converted
1185 : : * tuples. Various operations that are applied to tuples after routing,
1186 : : * such as checking constraints, will refer to this slot.
1187 : : */
1199 1188 [ + + ]: 3980 : if (ExecGetRootToChildMap(partRelInfo, estate) != NULL)
1189 : : {
2721 andres@anarazel.de 1190 : 705 : Relation partrel = partRelInfo->ri_RelationDesc;
1191 : :
1192 : : /*
1193 : : * This pins the partition's TupleDesc, which will be released at the
1194 : : * end of the command.
1195 : : */
1973 heikki.linnakangas@i 1196 : 705 : partRelInfo->ri_PartitionTupleSlot =
2561 andres@anarazel.de 1197 : 705 : table_slot_create(partrel, &estate->es_tupleTable);
1198 : : }
1199 : : else
1973 heikki.linnakangas@i 1200 : 3275 : partRelInfo->ri_PartitionTupleSlot = NULL;
1201 : :
1202 : : /*
1203 : : * If the partition is a foreign table, let the FDW init itself for
1204 : : * routing tuples to the partition.
1205 : : */
2900 rhaas@postgresql.org 1206 [ + + ]: 3980 : if (partRelInfo->ri_FdwRoutine != NULL &&
1207 [ + - ]: 46 : partRelInfo->ri_FdwRoutine->BeginForeignInsert != NULL)
1208 : 46 : partRelInfo->ri_FdwRoutine->BeginForeignInsert(mtstate, partRelInfo);
1209 : :
1210 : : /*
1211 : : * Determine if the FDW supports batch insert and determine the batch size
1212 : : * (a FDW may support batching, but it may be disabled for the
1213 : : * server/table or for this particular query).
1214 : : *
1215 : : * If the FDW does not support batching, we set the batch size to 1.
1216 : : */
1181 efujita@postgresql.o 1217 [ + + ]: 3974 : if (partRelInfo->ri_FdwRoutine != NULL &&
1880 tomas.vondra@postgre 1218 [ + - ]: 40 : partRelInfo->ri_FdwRoutine->GetForeignModifyBatchSize &&
1219 [ + - ]: 40 : partRelInfo->ri_FdwRoutine->ExecForeignBatchInsert)
1220 : 40 : partRelInfo->ri_BatchSize =
1221 : 40 : partRelInfo->ri_FdwRoutine->GetForeignModifyBatchSize(partRelInfo);
1222 : : else
1223 : 3934 : partRelInfo->ri_BatchSize = 1;
1224 : :
1225 [ - + ]: 3974 : Assert(partRelInfo->ri_BatchSize >= 1);
1226 : :
2537 andres@anarazel.de 1227 : 3974 : partRelInfo->ri_CopyMultiInsertBuffer = NULL;
1228 : :
1229 : : /*
1230 : : * Keep track of it in the PartitionTupleRouting->partitions array.
1231 : : */
2676 alvherre@alvh.no-ip. 1232 [ - + ]: 3974 : Assert(dispatch->indexes[partidx] == -1);
1233 : :
1234 : 3974 : rri_index = proute->num_partitions++;
1235 : :
1236 : : /* Allocate or enlarge the array, as needed */
1237 [ + + ]: 3974 : if (proute->num_partitions >= proute->max_partitions)
1238 : : {
1239 [ + + ]: 2740 : if (proute->max_partitions == 0)
1240 : : {
1241 : 2734 : proute->max_partitions = 8;
94 michael@paquier.xyz 1242 :GNC 2734 : proute->partitions = palloc_array(ResultRelInfo *, proute->max_partitions);
1243 : 2734 : proute->is_borrowed_rel = palloc_array(bool, proute->max_partitions);
1244 : : }
1245 : : else
1246 : : {
2676 alvherre@alvh.no-ip. 1247 :CBC 6 : proute->max_partitions *= 2;
1248 : 6 : proute->partitions = (ResultRelInfo **)
1249 : 6 : repalloc(proute->partitions, sizeof(ResultRelInfo *) *
1250 : 6 : proute->max_partitions);
1804 tgl@sss.pgh.pa.us 1251 : 6 : proute->is_borrowed_rel = (bool *)
1252 : 6 : repalloc(proute->is_borrowed_rel, sizeof(bool) *
1253 : 6 : proute->max_partitions);
1254 : : }
1255 : : }
1256 : :
2676 alvherre@alvh.no-ip. 1257 : 3974 : proute->partitions[rri_index] = partRelInfo;
1804 tgl@sss.pgh.pa.us 1258 : 3974 : proute->is_borrowed_rel[rri_index] = is_borrowed_rel;
2676 alvherre@alvh.no-ip. 1259 : 3974 : dispatch->indexes[partidx] = rri_index;
1260 : :
1261 : 3974 : MemoryContextSwitchTo(oldcxt);
2900 rhaas@postgresql.org 1262 : 3974 : }
1263 : :
1264 : : /*
1265 : : * ExecInitPartitionDispatchInfo
1266 : : * Lock the partitioned table (if not locked already) and initialize
1267 : : * PartitionDispatch for a partitioned table and store it in the next
1268 : : * available slot in the proute->partition_dispatch_info array. Also,
1269 : : * record the index into this array in the parent_pd->indexes[] array in
1270 : : * the partidx element so that we can properly retrieve the newly created
1271 : : * PartitionDispatch later.
1272 : : */
1273 : : static PartitionDispatch
2565 1274 : 3507 : ExecInitPartitionDispatchInfo(EState *estate,
1275 : : PartitionTupleRouting *proute, Oid partoid,
1276 : : PartitionDispatch parent_pd, int partidx,
1277 : : ResultRelInfo *rootResultRelInfo)
1278 : : {
1279 : : Relation rel;
1280 : : PartitionDesc partdesc;
1281 : : PartitionDispatch pd;
1282 : : int dispatchidx;
1283 : : MemoryContext oldcxt;
1284 : :
1285 : : /*
1286 : : * For data modification, it is better that executor does not include
1287 : : * partitions being detached, except when running in snapshot-isolation
1288 : : * mode. This means that a read-committed transaction immediately gets a
1289 : : * "no partition for tuple" error when a tuple is inserted into a
1290 : : * partition that's being detached concurrently, but a transaction in
1291 : : * repeatable-read mode can still use such a partition.
1292 : : */
1293 [ + + ]: 3507 : if (estate->es_partition_directory == NULL)
1294 : 2879 : estate->es_partition_directory =
1816 alvherre@alvh.no-ip. 1295 : 2879 : CreatePartitionDirectory(estate->es_query_cxt,
1296 : : !IsolationUsesXactSnapshot());
1297 : :
2676 1298 : 3507 : oldcxt = MemoryContextSwitchTo(proute->memcxt);
1299 : :
1300 : : /*
1301 : : * Only sub-partitioned tables need to be locked here. The root
1302 : : * partitioned table will already have been locked as it's referenced in
1303 : : * the query's rtable.
1304 : : */
1305 [ + + ]: 3507 : if (partoid != RelationGetRelid(proute->partition_root))
2579 rhaas@postgresql.org 1306 : 610 : rel = table_open(partoid, RowExclusiveLock);
1307 : : else
2676 alvherre@alvh.no-ip. 1308 : 2897 : rel = proute->partition_root;
2565 rhaas@postgresql.org 1309 : 3507 : partdesc = PartitionDirectoryLookup(estate->es_partition_directory, rel);
1310 : :
2676 alvherre@alvh.no-ip. 1311 : 3507 : pd = (PartitionDispatch) palloc(offsetof(PartitionDispatchData, indexes) +
1312 : 3507 : partdesc->nparts * sizeof(int));
1313 : 3507 : pd->reldesc = rel;
1314 : 3507 : pd->key = RelationGetPartitionKey(rel);
1315 : 3507 : pd->keystate = NIL;
1316 : 3507 : pd->partdesc = partdesc;
1317 [ + + ]: 3507 : if (parent_pd != NULL)
1318 : : {
1319 : 610 : TupleDesc tupdesc = RelationGetDescr(rel);
1320 : :
1321 : : /*
1322 : : * For sub-partitioned tables where the column order differs from its
1323 : : * direct parent partitioned table, we must store a tuple table slot
1324 : : * initialized with its tuple descriptor and a tuple conversion map to
1325 : : * convert a tuple from its parent's rowtype to its own. This is to
1326 : : * make sure that we are looking at the correct row using the correct
1327 : : * tuple descriptor when computing its partition key for tuple
1328 : : * routing.
1329 : : */
2279 michael@paquier.xyz 1330 : 610 : pd->tupmap = build_attrmap_by_name_if_req(RelationGetDescr(parent_pd->reldesc),
1331 : : tupdesc,
1332 : : false);
2676 alvherre@alvh.no-ip. 1333 : 610 : pd->tupslot = pd->tupmap ?
2571 andres@anarazel.de 1334 [ + + ]: 610 : MakeSingleTupleTableSlot(tupdesc, &TTSOpsVirtual) : NULL;
1335 : : }
1336 : : else
1337 : : {
1338 : : /* Not required for the root partitioned table */
2676 alvherre@alvh.no-ip. 1339 : 2897 : pd->tupmap = NULL;
1340 : 2897 : pd->tupslot = NULL;
1341 : : }
1342 : :
1343 : : /*
1344 : : * Initialize with -1 to signify that the corresponding partition's
1345 : : * ResultRelInfo or PartitionDispatch has not been created yet.
1346 : : */
1347 : 3507 : memset(pd->indexes, -1, sizeof(int) * partdesc->nparts);
1348 : :
1349 : : /* Track in PartitionTupleRouting for later use */
1350 : 3507 : dispatchidx = proute->num_dispatch++;
1351 : :
1352 : : /* Allocate or enlarge the array, as needed */
1353 [ + + ]: 3507 : if (proute->num_dispatch >= proute->max_dispatch)
1354 : : {
1355 [ + - ]: 2897 : if (proute->max_dispatch == 0)
1356 : : {
1357 : 2897 : proute->max_dispatch = 4;
94 michael@paquier.xyz 1358 :GNC 2897 : proute->partition_dispatch_info = palloc_array(PartitionDispatch, proute->max_dispatch);
1359 : 2897 : proute->nonleaf_partitions = palloc_array(ResultRelInfo *, proute->max_dispatch);
1360 : : }
1361 : : else
1362 : : {
2676 alvherre@alvh.no-ip. 1363 :UBC 0 : proute->max_dispatch *= 2;
1364 : 0 : proute->partition_dispatch_info = (PartitionDispatch *)
1365 : 0 : repalloc(proute->partition_dispatch_info,
1366 : 0 : sizeof(PartitionDispatch) * proute->max_dispatch);
2014 1367 : 0 : proute->nonleaf_partitions = (ResultRelInfo **)
1368 : 0 : repalloc(proute->nonleaf_partitions,
1369 : 0 : sizeof(ResultRelInfo *) * proute->max_dispatch);
1370 : : }
1371 : : }
2676 alvherre@alvh.no-ip. 1372 :CBC 3507 : proute->partition_dispatch_info[dispatchidx] = pd;
1373 : :
1374 : : /*
1375 : : * If setting up a PartitionDispatch for a sub-partitioned table, we may
1376 : : * also need a minimally valid ResultRelInfo for checking the partition
1377 : : * constraint later; set that up now.
1378 : : */
2014 1379 [ + + ]: 3507 : if (parent_pd)
1380 : : {
1381 : 610 : ResultRelInfo *rri = makeNode(ResultRelInfo);
1382 : :
1861 heikki.linnakangas@i 1383 : 610 : InitResultRelInfo(rri, rel, 0, rootResultRelInfo, 0);
2014 alvherre@alvh.no-ip. 1384 : 610 : proute->nonleaf_partitions[dispatchidx] = rri;
1385 : : }
1386 : : else
1387 : 2897 : proute->nonleaf_partitions[dispatchidx] = NULL;
1388 : :
1389 : : /*
1390 : : * Finally, if setting up a PartitionDispatch for a sub-partitioned table,
1391 : : * install a downlink in the parent to allow quick descent.
1392 : : */
2676 1393 [ + + ]: 3507 : if (parent_pd)
1394 : : {
1395 [ - + ]: 610 : Assert(parent_pd->indexes[partidx] == -1);
1396 : 610 : parent_pd->indexes[partidx] = dispatchidx;
1397 : : }
1398 : :
1399 : 3507 : MemoryContextSwitchTo(oldcxt);
1400 : :
1401 : 3507 : return pd;
1402 : : }
1403 : :
1404 : : /*
1405 : : * ExecCleanupTupleRouting -- Clean up objects allocated for partition tuple
1406 : : * routing.
1407 : : *
1408 : : * Close all the partitioned tables, leaf partitions, and their indices.
1409 : : */
1410 : : void
2900 rhaas@postgresql.org 1411 : 2475 : ExecCleanupTupleRouting(ModifyTableState *mtstate,
1412 : : PartitionTupleRouting *proute)
1413 : : {
1414 : : int i;
1415 : :
1416 : : /*
1417 : : * Remember, proute->partition_dispatch_info[0] corresponds to the root
1418 : : * partitioned table, which we must not try to close, because it is the
1419 : : * main target table of the query that will be closed by callers such as
1420 : : * ExecEndPlan() or DoCopy(). Also, tupslot is NULL for the root
1421 : : * partitioned table.
1422 : : */
2992 1423 [ + + ]: 2973 : for (i = 1; i < proute->num_dispatch; i++)
1424 : : {
1425 : 498 : PartitionDispatch pd = proute->partition_dispatch_info[i];
1426 : :
2610 andres@anarazel.de 1427 : 498 : table_close(pd->reldesc, NoLock);
1428 : :
2676 alvherre@alvh.no-ip. 1429 [ + + ]: 498 : if (pd->tupslot)
1430 : 230 : ExecDropSingleTupleTableSlot(pd->tupslot);
1431 : : }
1432 : :
2992 rhaas@postgresql.org 1433 [ + + ]: 6147 : for (i = 0; i < proute->num_partitions; i++)
1434 : : {
1435 : 3672 : ResultRelInfo *resultRelInfo = proute->partitions[i];
1436 : :
1437 : : /* Allow any FDWs to shut down */
2526 efujita@postgresql.o 1438 [ + + ]: 3672 : if (resultRelInfo->ri_FdwRoutine != NULL &&
1439 [ + - ]: 34 : resultRelInfo->ri_FdwRoutine->EndForeignInsert != NULL)
1440 : 34 : resultRelInfo->ri_FdwRoutine->EndForeignInsert(mtstate->ps.state,
1441 : : resultRelInfo);
1442 : :
1443 : : /*
1444 : : * Close it if it's not one of the result relations borrowed from the
1445 : : * owning ModifyTableState; those will be closed by ExecEndPlan().
1446 : : */
1804 tgl@sss.pgh.pa.us 1447 [ + + ]: 3672 : if (proute->is_borrowed_rel[i])
1448 : 230 : continue;
1449 : :
2992 rhaas@postgresql.org 1450 : 3442 : ExecCloseIndices(resultRelInfo);
2610 andres@anarazel.de 1451 : 3442 : table_close(resultRelInfo->ri_RelationDesc, NoLock);
1452 : : }
3042 rhaas@postgresql.org 1453 : 2475 : }
1454 : :
1455 : : /* ----------------
1456 : : * FormPartitionKeyDatum
1457 : : * Construct values[] and isnull[] arrays for the partition key
1458 : : * of a tuple.
1459 : : *
1460 : : * pd Partition dispatch object of the partitioned table
1461 : : * slot Heap tuple from which to extract partition key
1462 : : * estate executor state for evaluating any partition key
1463 : : * expressions (must be non-NULL)
1464 : : * values Array of partition key Datums (output area)
1465 : : * isnull Array of is-null indicators (output area)
1466 : : *
1467 : : * the ecxt_scantuple slot of estate's per-tuple expr context must point to
1468 : : * the heap tuple passed in.
1469 : : * ----------------
1470 : : */
1471 : : static void
1472 : 583532 : FormPartitionKeyDatum(PartitionDispatch pd,
1473 : : TupleTableSlot *slot,
1474 : : EState *estate,
1475 : : Datum *values,
1476 : : bool *isnull)
1477 : : {
1478 : : ListCell *partexpr_item;
1479 : : int i;
1480 : :
1481 [ + + + + ]: 583532 : if (pd->key->partexprs != NIL && pd->keystate == NIL)
1482 : : {
1483 : : /* Check caller has set up context correctly */
1484 [ + - + - : 273 : Assert(estate != NULL &&
- + ]
1485 : : GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1486 : :
1487 : : /* First time through, set up expression evaluation state */
1488 : 273 : pd->keystate = ExecPrepareExprList(pd->key->partexprs, estate);
1489 : : }
1490 : :
1491 : 583532 : partexpr_item = list_head(pd->keystate);
1492 [ + + ]: 1178602 : for (i = 0; i < pd->key->partnatts; i++)
1493 : : {
1494 : 595070 : AttrNumber keycol = pd->key->partattrs[i];
1495 : : Datum datum;
1496 : : bool isNull;
1497 : :
1498 [ + + ]: 595070 : if (keycol != 0)
1499 : : {
1500 : : /* Plain column; get the value directly from the heap tuple */
1501 : 551252 : datum = slot_getattr(slot, keycol, &isNull);
1502 : : }
1503 : : else
1504 : : {
1505 : : /* Expression; need to evaluate it */
1506 [ - + ]: 43818 : if (partexpr_item == NULL)
3042 rhaas@postgresql.org 1507 [ # # ]:UBC 0 : elog(ERROR, "wrong number of partition key expressions");
3042 rhaas@postgresql.org 1508 :CBC 43818 : datum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item),
1509 [ + - ]: 43818 : GetPerTupleExprContext(estate),
1510 : : &isNull);
2435 tgl@sss.pgh.pa.us 1511 : 43818 : partexpr_item = lnext(pd->keystate, partexpr_item);
1512 : : }
3042 rhaas@postgresql.org 1513 : 595070 : values[i] = datum;
1514 : 595070 : isnull[i] = isNull;
1515 : : }
1516 : :
1517 [ - + ]: 583532 : if (partexpr_item != NULL)
3042 rhaas@postgresql.org 1518 [ # # ]:UBC 0 : elog(ERROR, "wrong number of partition key expressions");
3042 rhaas@postgresql.org 1519 :CBC 583532 : }
1520 : :
1521 : : /*
1522 : : * The number of times the same partition must be found in a row before we
1523 : : * switch from a binary search for the given values to just checking if the
1524 : : * values belong to the last found partition. This must be above 0.
1525 : : */
1526 : : #define PARTITION_CACHED_FIND_THRESHOLD 16
1527 : :
1528 : : /*
1529 : : * get_partition_for_tuple
1530 : : * Finds partition of relation which accepts the partition key specified
1531 : : * in values and isnull.
1532 : : *
1533 : : * Calling this function can be quite expensive when LIST and RANGE
1534 : : * partitioned tables have many partitions. This is due to the binary search
1535 : : * that's done to find the correct partition. Many of the use cases for LIST
1536 : : * and RANGE partitioned tables make it likely that the same partition is
1537 : : * found in subsequent ExecFindPartition() calls. This is especially true for
1538 : : * cases such as RANGE partitioned tables on a TIMESTAMP column where the
1539 : : * partition key is the current time. When asked to find a partition for a
1540 : : * RANGE or LIST partitioned table, we record the partition index and datum
1541 : : * offset we've found for the given 'values' in the PartitionDesc (which is
1542 : : * stored in relcache), and if we keep finding the same partition
1543 : : * PARTITION_CACHED_FIND_THRESHOLD times in a row, then we'll enable caching
1544 : : * logic and instead of performing a binary search to find the correct
1545 : : * partition, we'll just double-check that 'values' still belong to the last
1546 : : * found partition, and if so, we'll return that partition index, thus
1547 : : * skipping the need for the binary search. If we fail to match the last
1548 : : * partition when double checking, then we fall back on doing a binary search.
1549 : : * In this case, unless we find 'values' belong to the DEFAULT partition,
1550 : : * we'll reset the number of times we've hit the same partition so that we
1551 : : * don't attempt to use the cache again until we've found that partition at
1552 : : * least PARTITION_CACHED_FIND_THRESHOLD times in a row.
1553 : : *
1554 : : * For cases where the partition changes on each lookup, the amount of
1555 : : * additional work required just amounts to recording the last found partition
1556 : : * and bound offset then resetting the found counter. This is cheap and does
1557 : : * not appear to cause any meaningful slowdowns for such cases.
1558 : : *
1559 : : * No caching of partitions is done when the last found partition is the
1560 : : * DEFAULT or NULL partition. For the case of the DEFAULT partition, there
1561 : : * is no bound offset storing the matching datum, so we cannot confirm the
1562 : : * indexes match. For the NULL partition, this is just so cheap, there's no
1563 : : * sense in caching.
1564 : : *
1565 : : * Return value is index of the partition (>= 0 and < partdesc->nparts) if one
1566 : : * found or -1 if none found.
1567 : : */
1568 : : static int
135 peter@eisentraut.org 1569 :GNC 583511 : get_partition_for_tuple(PartitionDispatch pd, const Datum *values, const bool *isnull)
1570 : : {
1321 drowley@postgresql.o 1571 :CBC 583511 : int bound_offset = -1;
2892 alvherre@alvh.no-ip. 1572 : 583511 : int part_index = -1;
1438 1573 : 583511 : PartitionKey key = pd->key;
1574 : 583511 : PartitionDesc partdesc = pd->partdesc;
2832 tgl@sss.pgh.pa.us 1575 : 583511 : PartitionBoundInfo boundinfo = partdesc->boundinfo;
1576 : :
1577 : : /*
1578 : : * In the switch statement below, when we perform a cached lookup for
1579 : : * RANGE and LIST partitioned tables, if we find that the last found
1580 : : * partition matches the 'values', we return the partition index right
1581 : : * away. We do this instead of breaking out of the switch as we don't
1582 : : * want to execute the code about the DEFAULT partition or do any updates
1583 : : * for any of the cache-related fields. That would be a waste of effort
1584 : : * as we already know it's not the DEFAULT partition and have no need to
1585 : : * increment the number of times we found the same partition any higher
1586 : : * than PARTITION_CACHED_FIND_THRESHOLD.
1587 : : */
1588 : :
1589 : : /* Route as appropriate based on partitioning strategy. */
2892 alvherre@alvh.no-ip. 1590 [ + + + - ]: 583511 : switch (key->strategy)
1591 : : {
1592 : 105369 : case PARTITION_STRATEGY_HASH:
1593 : : {
1594 : : uint64 rowHash;
1595 : :
1596 : : /* hash partitioning is too cheap to bother caching */
2832 tgl@sss.pgh.pa.us 1597 : 105369 : rowHash = compute_partition_hash_value(key->partnatts,
1598 : : key->partsupfunc,
2550 peter@eisentraut.org 1599 : 105369 : key->partcollation,
1600 : : values, isnull);
1601 : :
1602 : : /*
1603 : : * HASH partitions can't have a DEFAULT partition and we don't
1604 : : * do any caching work for them, so just return the part index
1605 : : */
1321 drowley@postgresql.o 1606 : 105363 : return boundinfo->indexes[rowHash % boundinfo->nindexes];
1607 : : }
1608 : :
2892 alvherre@alvh.no-ip. 1609 : 85667 : case PARTITION_STRATEGY_LIST:
1610 [ + + ]: 85667 : if (isnull[0])
1611 : : {
1612 : : /* this is far too cheap to bother doing any caching */
2832 tgl@sss.pgh.pa.us 1613 [ + + ]: 66 : if (partition_bound_accepts_nulls(boundinfo))
1614 : : {
1615 : : /*
1616 : : * When there is a NULL partition we just return that
1617 : : * directly. We don't have a bound_offset so it's not
1618 : : * valid to drop into the code after the switch which
1619 : : * checks and updates the cache fields. We perhaps should
1620 : : * be invalidating the details of the last cached
1621 : : * partition but there's no real need to. Keeping those
1622 : : * fields set gives a chance at matching to the cached
1623 : : * partition on the next lookup.
1624 : : */
1321 drowley@postgresql.o 1625 : 51 : return boundinfo->null_index;
1626 : : }
1627 : : }
1628 : : else
1629 : : {
1630 : : bool equal;
1631 : :
1632 [ + + ]: 85601 : if (partdesc->last_found_count >= PARTITION_CACHED_FIND_THRESHOLD)
1633 : : {
1634 : 11946 : int last_datum_offset = partdesc->last_found_datum_index;
1635 : 11946 : Datum lastDatum = boundinfo->datums[last_datum_offset][0];
1636 : : int32 cmpval;
1637 : :
1638 : : /* does the last found datum index match this datum? */
1639 : 11946 : cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
1640 : 11946 : key->partcollation[0],
1641 : : lastDatum,
1642 : : values[0]));
1643 : :
1644 [ + + ]: 11946 : if (cmpval == 0)
1645 : 11769 : return boundinfo->indexes[last_datum_offset];
1646 : :
1647 : : /* fall-through and do a manual lookup */
1648 : : }
1649 : :
2892 alvherre@alvh.no-ip. 1650 : 73832 : bound_offset = partition_list_bsearch(key->partsupfunc,
1651 : : key->partcollation,
1652 : : boundinfo,
1653 : : values[0], &equal);
1654 [ + + + + ]: 73832 : if (bound_offset >= 0 && equal)
2832 tgl@sss.pgh.pa.us 1655 : 73632 : part_index = boundinfo->indexes[bound_offset];
1656 : : }
2892 alvherre@alvh.no-ip. 1657 : 73847 : break;
1658 : :
1659 : 392475 : case PARTITION_STRATEGY_RANGE:
1660 : : {
1661 : 392475 : bool equal = false,
1662 : 392475 : range_partkey_has_null = false;
1663 : : int i;
1664 : :
1665 : : /*
1666 : : * No range includes NULL, so this will be accepted by the
1667 : : * default partition if there is one, and otherwise rejected.
1668 : : */
1669 [ + + ]: 796278 : for (i = 0; i < key->partnatts; i++)
1670 : : {
1671 [ + + ]: 403830 : if (isnull[i])
1672 : : {
1673 : 27 : range_partkey_has_null = true;
1674 : 27 : break;
1675 : : }
1676 : : }
1677 : :
1678 : : /* NULLs belong in the DEFAULT partition */
1321 drowley@postgresql.o 1679 [ + + ]: 392475 : if (range_partkey_has_null)
1680 : 27 : break;
1681 : :
1682 [ + + ]: 392448 : if (partdesc->last_found_count >= PARTITION_CACHED_FIND_THRESHOLD)
1683 : : {
1684 : 133158 : int last_datum_offset = partdesc->last_found_datum_index;
1685 : 133158 : Datum *lastDatums = boundinfo->datums[last_datum_offset];
1686 : 133158 : PartitionRangeDatumKind *kind = boundinfo->kind[last_datum_offset];
1687 : : int32 cmpval;
1688 : :
1689 : : /* check if the value is >= to the lower bound */
1690 : 133158 : cmpval = partition_rbound_datum_cmp(key->partsupfunc,
1691 : : key->partcollation,
1692 : : lastDatums,
1693 : : kind,
1694 : : values,
1695 : 133158 : key->partnatts);
1696 : :
1697 : : /*
1698 : : * If it's equal to the lower bound then no need to check
1699 : : * the upper bound.
1700 : : */
1701 [ + + ]: 133158 : if (cmpval == 0)
1702 : 132996 : return boundinfo->indexes[last_datum_offset + 1];
1703 : :
1704 [ + + + - ]: 130209 : if (cmpval < 0 && last_datum_offset + 1 < boundinfo->ndatums)
1705 : : {
1706 : : /* check if the value is below the upper bound */
1707 : 130179 : lastDatums = boundinfo->datums[last_datum_offset + 1];
1708 : 130179 : kind = boundinfo->kind[last_datum_offset + 1];
1709 : 130179 : cmpval = partition_rbound_datum_cmp(key->partsupfunc,
1710 : : key->partcollation,
1711 : : lastDatums,
1712 : : kind,
1713 : : values,
1714 : 130179 : key->partnatts);
1715 : :
1716 [ + + ]: 130179 : if (cmpval > 0)
1717 : 130047 : return boundinfo->indexes[last_datum_offset + 1];
1718 : : }
1719 : : /* fall-through and do a manual lookup */
1720 : : }
1721 : :
1722 : 259452 : bound_offset = partition_range_datum_bsearch(key->partsupfunc,
1723 : : key->partcollation,
1724 : : boundinfo,
1725 : 259452 : key->partnatts,
1726 : : values,
1727 : : &equal);
1728 : :
1729 : : /*
1730 : : * The bound at bound_offset is less than or equal to the
1731 : : * tuple value, so the bound at offset+1 is the upper bound of
1732 : : * the partition we're looking for, if there actually exists
1733 : : * one.
1734 : : */
1735 : 259452 : part_index = boundinfo->indexes[bound_offset + 1];
1736 : : }
2892 alvherre@alvh.no-ip. 1737 : 259452 : break;
1738 : :
2892 alvherre@alvh.no-ip. 1739 :UBC 0 : default:
1740 [ # # ]: 0 : elog(ERROR, "unexpected partition strategy: %d",
1741 : : (int) key->strategy);
1742 : : }
1743 : :
1744 : : /*
1745 : : * part_index < 0 means we failed to find a partition of this parent. Use
1746 : : * the default partition, if there is one.
1747 : : */
2892 alvherre@alvh.no-ip. 1748 [ + + ]:CBC 333326 : if (part_index < 0)
1749 : : {
1750 : : /*
1751 : : * No need to reset the cache fields here. The next set of values
1752 : : * might end up belonging to the cached partition, so leaving the
1753 : : * cache alone improves the chances of a cache hit on the next lookup.
1754 : : */
1321 drowley@postgresql.o 1755 : 472 : return boundinfo->default_index;
1756 : : }
1757 : :
1758 : : /* we should only make it here when the code above set bound_offset */
1759 [ - + ]: 332854 : Assert(bound_offset >= 0);
1760 : :
1761 : : /*
1762 : : * Attend to the cache fields. If the bound_offset matches the last
1763 : : * cached bound offset then we've found the same partition as last time,
1764 : : * so bump the count by one. If all goes well, we'll eventually reach
1765 : : * PARTITION_CACHED_FIND_THRESHOLD and try the cache path next time
1766 : : * around. Otherwise, we'll reset the cache count back to 1 to mark that
1767 : : * we've found this partition for the first time.
1768 : : */
1769 [ + + ]: 332854 : if (bound_offset == partdesc->last_found_datum_index)
1770 : 231033 : partdesc->last_found_count++;
1771 : : else
1772 : : {
1773 : 101821 : partdesc->last_found_count = 1;
1774 : 101821 : partdesc->last_found_part_index = part_index;
1775 : 101821 : partdesc->last_found_datum_index = bound_offset;
1776 : : }
1777 : :
2892 alvherre@alvh.no-ip. 1778 : 332854 : return part_index;
1779 : : }
1780 : :
1781 : : /*
1782 : : * ExecBuildSlotPartitionKeyDescription
1783 : : *
1784 : : * This works very much like BuildIndexValueDescription() and is currently
1785 : : * used for building error messages when ExecFindPartition() fails to find
1786 : : * partition for a row.
1787 : : */
1788 : : static char *
3042 rhaas@postgresql.org 1789 : 77 : ExecBuildSlotPartitionKeyDescription(Relation rel,
1790 : : const Datum *values,
1791 : : const bool *isnull,
1792 : : int maxfieldlen)
1793 : : {
1794 : : StringInfoData buf;
1795 : 77 : PartitionKey key = RelationGetPartitionKey(rel);
1796 : 77 : int partnatts = get_partition_natts(key);
1797 : : int i;
1798 : 77 : Oid relid = RelationGetRelid(rel);
1799 : : AclResult aclresult;
1800 : :
1801 [ - + ]: 77 : if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED)
3042 rhaas@postgresql.org 1802 :UBC 0 : return NULL;
1803 : :
1804 : : /* If the user has table-level access, just go build the description. */
3042 rhaas@postgresql.org 1805 :CBC 77 : aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_SELECT);
1806 [ + + ]: 77 : if (aclresult != ACLCHECK_OK)
1807 : : {
1808 : : /*
1809 : : * Step through the columns of the partition key and make sure the
1810 : : * user has SELECT rights on all of them.
1811 : : */
1812 [ + + ]: 12 : for (i = 0; i < partnatts; i++)
1813 : : {
1814 : 9 : AttrNumber attnum = get_partition_col_attnum(key, i);
1815 : :
1816 : : /*
1817 : : * If this partition key column is an expression, we return no
1818 : : * detail rather than try to figure out what column(s) the
1819 : : * expression includes and if the user has SELECT rights on them.
1820 : : */
1821 [ + + + + ]: 15 : if (attnum == InvalidAttrNumber ||
1822 : 6 : pg_attribute_aclcheck(relid, attnum, GetUserId(),
1823 : : ACL_SELECT) != ACLCHECK_OK)
1824 : 6 : return NULL;
1825 : : }
1826 : : }
1827 : :
1828 : 71 : initStringInfo(&buf);
1829 : 71 : appendStringInfo(&buf, "(%s) = (",
1830 : : pg_get_partkeydef_columns(relid, true));
1831 : :
1832 [ + + ]: 169 : for (i = 0; i < partnatts; i++)
1833 : : {
1834 : : char *val;
1835 : : int vallen;
1836 : :
1837 [ + + ]: 98 : if (isnull[i])
1838 : 15 : val = "null";
1839 : : else
1840 : : {
1841 : : Oid foutoid;
1842 : : bool typisvarlena;
1843 : :
1844 : 83 : getTypeOutputInfo(get_partition_col_typid(key, i),
1845 : : &foutoid, &typisvarlena);
1846 : 83 : val = OidOutputFunctionCall(foutoid, values[i]);
1847 : : }
1848 : :
1849 [ + + ]: 98 : if (i > 0)
1850 : 27 : appendStringInfoString(&buf, ", ");
1851 : :
1852 : : /* truncate if needed */
1853 : 98 : vallen = strlen(val);
1854 [ + - ]: 98 : if (vallen <= maxfieldlen)
2427 drowley@postgresql.o 1855 : 98 : appendBinaryStringInfo(&buf, val, vallen);
1856 : : else
1857 : : {
3042 rhaas@postgresql.org 1858 :UBC 0 : vallen = pg_mbcliplen(val, vallen, maxfieldlen);
1859 : 0 : appendBinaryStringInfo(&buf, val, vallen);
1860 : 0 : appendStringInfoString(&buf, "...");
1861 : : }
1862 : : }
1863 : :
3042 rhaas@postgresql.org 1864 :CBC 71 : appendStringInfoChar(&buf, ')');
1865 : :
1866 : 71 : return buf.data;
1867 : : }
1868 : :
1869 : : /*
1870 : : * adjust_partition_colnos
1871 : : * Adjust the list of UPDATE target column numbers to account for
1872 : : * attribute differences between the parent and the partition.
1873 : : *
1874 : : * Note: mustn't be called if no adjustment is required.
1875 : : */
1876 : : static List *
1770 tgl@sss.pgh.pa.us 1877 : 38 : adjust_partition_colnos(List *colnos, ResultRelInfo *leaf_part_rri)
1878 : : {
1879 : 38 : TupleConversionMap *map = ExecGetChildToRootMap(leaf_part_rri);
1880 : :
1433 alvherre@alvh.no-ip. 1881 [ - + ]: 38 : Assert(map != NULL);
1882 : :
1448 1883 : 38 : return adjust_partition_colnos_using_map(colnos, map->attrMap);
1884 : : }
1885 : :
1886 : : /*
1887 : : * adjust_partition_colnos_using_map
1888 : : * Like adjust_partition_colnos, but uses a caller-supplied map instead
1889 : : * of assuming to map from the "root" result relation.
1890 : : *
1891 : : * Note: mustn't be called if no adjustment is required.
1892 : : */
1893 : : static List *
1894 : 47 : adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap)
1895 : : {
1896 : 47 : List *new_colnos = NIL;
1897 : : ListCell *lc;
1898 : :
1899 [ - + ]: 47 : Assert(attrMap != NULL); /* else we shouldn't be here */
1900 : :
1770 tgl@sss.pgh.pa.us 1901 [ + - + + : 116 : foreach(lc, colnos)
+ + ]
1902 : : {
1903 : 69 : AttrNumber parentattrno = lfirst_int(lc);
1904 : :
1905 [ + - ]: 69 : if (parentattrno <= 0 ||
1906 [ + - ]: 69 : parentattrno > attrMap->maplen ||
1907 [ - + ]: 69 : attrMap->attnums[parentattrno - 1] == 0)
1770 tgl@sss.pgh.pa.us 1908 [ # # ]:UBC 0 : elog(ERROR, "unexpected attno %d in target column list",
1909 : : parentattrno);
1770 tgl@sss.pgh.pa.us 1910 :CBC 69 : new_colnos = lappend_int(new_colnos,
1911 : 69 : attrMap->attnums[parentattrno - 1]);
1912 : : }
1913 : :
1914 : 47 : return new_colnos;
1915 : : }
1916 : :
1917 : : /*-------------------------------------------------------------------------
1918 : : * Run-Time Partition Pruning Support.
1919 : : *
1920 : : * The following series of functions exist to support the removal of unneeded
1921 : : * subplans for queries against partitioned tables. The supporting functions
1922 : : * here are designed to work with any plan type which supports an arbitrary
1923 : : * number of subplans, e.g. Append, MergeAppend.
1924 : : *
1925 : : * When pruning involves comparison of a partition key to a constant, it's
1926 : : * done by the planner. However, if we have a comparison to a non-constant
1927 : : * but not volatile expression, that presents an opportunity for run-time
1928 : : * pruning by the executor, allowing irrelevant partitions to be skipped
1929 : : * dynamically.
1930 : : *
1931 : : * We must distinguish expressions containing PARAM_EXEC Params from
1932 : : * expressions that don't contain those. Even though a PARAM_EXEC Param is
1933 : : * considered to be a stable expression, it can change value from one plan
1934 : : * node scan to the next during query execution. Stable comparison
1935 : : * expressions that don't involve such Params allow partition pruning to be
1936 : : * done once during executor startup. Expressions that do involve such Params
1937 : : * require us to prune separately for each scan of the parent plan node.
1938 : : *
1939 : : * Note that pruning away unneeded subplans during executor startup has the
1940 : : * added benefit of not having to initialize the unneeded subplans at all.
1941 : : *
1942 : : *
1943 : : * Functions:
1944 : : *
1945 : : * ExecDoInitialPruning:
1946 : : * Perform runtime "initial" pruning, if necessary, to determine the set
1947 : : * of child subnodes that need to be initialized during ExecInitNode() for
1948 : : * all plan nodes that contain a PartitionPruneInfo.
1949 : : *
1950 : : * ExecInitPartitionExecPruning:
1951 : : * Updates the PartitionPruneState found at given part_prune_index in
1952 : : * EState.es_part_prune_states for use during "exec" pruning if required.
1953 : : * Also returns the set of subplans to initialize that would be stored at
1954 : : * part_prune_index in EState.es_part_prune_results by
1955 : : * ExecDoInitialPruning(). Maps in PartitionPruneState are updated to
1956 : : * account for initial pruning possibly having eliminated some of the
1957 : : * subplans.
1958 : : *
1959 : : * ExecFindMatchingSubPlans:
1960 : : * Returns indexes of matching subplans after evaluating the expressions
1961 : : * that are safe to evaluate at a given point. This function is first
1962 : : * called during ExecDoInitialPruning() to find the initially matching
1963 : : * subplans based on performing the initial pruning steps and then must be
1964 : : * called again each time the value of a Param listed in
1965 : : * PartitionPruneState's 'execparamids' changes.
1966 : : *-------------------------------------------------------------------------
1967 : : */
1968 : :
1969 : :
1970 : : /*
1971 : : * ExecDoInitialPruning
1972 : : * Perform runtime "initial" pruning, if necessary, to determine the set
1973 : : * of child subnodes that need to be initialized during ExecInitNode() for
1974 : : * plan nodes that support partition pruning.
1975 : : *
1976 : : * This function iterates over each PartitionPruneInfo entry in
1977 : : * estate->es_part_prune_infos. For each entry, it creates a PartitionPruneState
1978 : : * and adds it to es_part_prune_states. ExecInitPartitionExecPruning() accesses
1979 : : * these states through their corresponding indexes in es_part_prune_states and
1980 : : * assign each state to the parent node's PlanState, from where it will be used
1981 : : * for "exec" pruning.
1982 : : *
1983 : : * If initial pruning steps exist for a PartitionPruneInfo entry, this function
1984 : : * executes those pruning steps and stores the result as a bitmapset of valid
1985 : : * child subplans, identifying which subplans should be initialized for
1986 : : * execution. The results are saved in estate->es_part_prune_results.
1987 : : *
1988 : : * If no initial pruning is performed for a given PartitionPruneInfo, a NULL
1989 : : * entry is still added to es_part_prune_results to maintain alignment with
1990 : : * es_part_prune_infos. This ensures that ExecInitPartitionExecPruning() can
1991 : : * use the same index to retrieve the pruning results.
1992 : : */
1993 : : void
408 amitlan@postgresql.o 1994 : 708213 : ExecDoInitialPruning(EState *estate)
1995 : : {
1996 : : ListCell *lc;
1997 : :
1998 [ + + + + : 708616 : foreach(lc, estate->es_part_prune_infos)
+ + ]
1999 : : {
2000 : 403 : PartitionPruneInfo *pruneinfo = lfirst_node(PartitionPruneInfo, lc);
2001 : : PartitionPruneState *prunestate;
2002 : 403 : Bitmapset *validsubplans = NULL;
401 2003 : 403 : Bitmapset *all_leafpart_rtis = NULL;
2004 : 403 : Bitmapset *validsubplan_rtis = NULL;
2005 : :
2006 : : /* Create and save the PartitionPruneState. */
2007 : 403 : prunestate = CreatePartitionPruneState(estate, pruneinfo,
2008 : : &all_leafpart_rtis);
408 2009 : 403 : estate->es_part_prune_states = lappend(estate->es_part_prune_states,
2010 : : prunestate);
2011 : :
2012 : : /*
2013 : : * Perform initial pruning steps, if any, and save the result
2014 : : * bitmapset or NULL as described in the header comment.
2015 : : */
2016 [ + + ]: 403 : if (prunestate->do_initial_prune)
401 2017 : 224 : validsubplans = ExecFindMatchingSubPlans(prunestate, true,
2018 : : &validsubplan_rtis);
2019 : : else
2020 : 179 : validsubplan_rtis = all_leafpart_rtis;
2021 : :
2022 : 403 : estate->es_unpruned_relids = bms_add_members(estate->es_unpruned_relids,
2023 : : validsubplan_rtis);
408 2024 : 403 : estate->es_part_prune_results = lappend(estate->es_part_prune_results,
2025 : : validsubplans);
2026 : : }
2027 : 708213 : }
2028 : :
2029 : : /*
2030 : : * ExecInitPartitionExecPruning
2031 : : * Initialize the data structures needed for runtime "exec" partition
2032 : : * pruning and return the result of initial pruning, if available.
2033 : : *
2034 : : * 'relids' identifies the relation to which both the parent plan and the
2035 : : * PartitionPruneInfo given by 'part_prune_index' belong.
2036 : : *
2037 : : * On return, *initially_valid_subplans is assigned the set of indexes of
2038 : : * child subplans that must be initialized along with the parent plan node.
2039 : : * Initial pruning would have been performed by ExecDoInitialPruning(), if
2040 : : * necessary, and the bitmapset of surviving subplans' indexes would have
2041 : : * been stored as the part_prune_index'th element of
2042 : : * EState.es_part_prune_results.
2043 : : *
2044 : : * If subplans were indeed pruned during initial pruning, the subplan_map
2045 : : * arrays in the returned PartitionPruneState are re-sequenced to exclude those
2046 : : * subplans, but only if the maps will be needed for subsequent execution
2047 : : * pruning passes.
2048 : : */
2049 : : PartitionPruneState *
2050 : 405 : ExecInitPartitionExecPruning(PlanState *planstate,
2051 : : int n_total_subplans,
2052 : : int part_prune_index,
2053 : : Bitmapset *relids,
2054 : : Bitmapset **initially_valid_subplans)
2055 : : {
2056 : : PartitionPruneState *prunestate;
1440 alvherre@alvh.no-ip. 2057 : 405 : EState *estate = planstate->state;
2058 : : PartitionPruneInfo *pruneinfo;
2059 : :
2060 : : /* Obtain the pruneinfo we need. */
409 amitlan@postgresql.o 2061 : 405 : pruneinfo = list_nth_node(PartitionPruneInfo, estate->es_part_prune_infos,
2062 : : part_prune_index);
2063 : :
2064 : : /* Its relids better match the plan node's or the planner messed up. */
2065 [ - + ]: 405 : if (!bms_equal(relids, pruneinfo->relids))
409 amitlan@postgresql.o 2066 [ # # ]:UBC 0 : elog(ERROR, "wrong pruneinfo with relids=%s found at part_prune_index=%d contained in plan node with relids=%s",
2067 : : bmsToString(pruneinfo->relids), part_prune_index,
2068 : : bmsToString(relids));
2069 : :
2070 : : /*
2071 : : * The PartitionPruneState would have been created by
2072 : : * ExecDoInitialPruning() and stored as the part_prune_index'th element of
2073 : : * EState.es_part_prune_states.
2074 : : */
408 amitlan@postgresql.o 2075 :CBC 405 : prunestate = list_nth(estate->es_part_prune_states, part_prune_index);
2076 [ - + ]: 405 : Assert(prunestate != NULL);
2077 : :
2078 : : /* Use the result of initial pruning done by ExecDoInitialPruning(). */
1440 alvherre@alvh.no-ip. 2079 [ + + ]: 405 : if (prunestate->do_initial_prune)
408 amitlan@postgresql.o 2080 : 225 : *initially_valid_subplans = list_nth_node(Bitmapset,
2081 : : estate->es_part_prune_results,
2082 : : part_prune_index);
2083 : : else
2084 : : {
2085 : : /* No pruning, so we'll need to initialize all subplans */
1440 alvherre@alvh.no-ip. 2086 [ - + ]: 180 : Assert(n_total_subplans > 0);
2087 : 180 : *initially_valid_subplans = bms_add_range(NULL, 0,
2088 : : n_total_subplans - 1);
2089 : : }
2090 : :
2091 : : /*
2092 : : * The exec pruning state must also be initialized, if needed, before it
2093 : : * can be used for pruning during execution.
2094 : : *
2095 : : * This also re-sequences subplan indexes contained in prunestate to
2096 : : * account for any that were removed due to initial pruning; refer to the
2097 : : * condition in InitExecPartitionPruneContexts() that is used to determine
2098 : : * whether to do this. If no exec pruning needs to be done, we would thus
2099 : : * leave the maps to be in an invalid state, but that's ok since that data
2100 : : * won't be consulted again (cf initial Assert in
2101 : : * ExecFindMatchingSubPlans).
2102 : : */
408 amitlan@postgresql.o 2103 [ + + ]: 405 : if (prunestate->do_exec_prune)
2104 : 201 : InitExecPartitionPruneContexts(prunestate, planstate,
2105 : : *initially_valid_subplans,
2106 : : n_total_subplans);
2107 : :
1440 alvherre@alvh.no-ip. 2108 : 405 : return prunestate;
2109 : : }
2110 : :
2111 : : /*
2112 : : * CreatePartitionPruneState
2113 : : * Build the data structure required for calling ExecFindMatchingSubPlans
2114 : : *
2115 : : * This includes PartitionPruneContexts (stored in each
2116 : : * PartitionedRelPruningData corresponding to a PartitionedRelPruneInfo),
2117 : : * which hold the ExprStates needed to evaluate pruning expressions, and
2118 : : * mapping arrays to convert partition indexes from the pruning logic
2119 : : * into subplan indexes in the parent plan node's list of child subplans.
2120 : : *
2121 : : * 'pruneinfo' is a PartitionPruneInfo as generated by
2122 : : * make_partition_pruneinfo. Here we build a PartitionPruneState containing a
2123 : : * PartitionPruningData for each partitioning hierarchy (i.e., each sublist of
2124 : : * pruneinfo->prune_infos), each of which contains a PartitionedRelPruningData
2125 : : * for each PartitionedRelPruneInfo appearing in that sublist. This two-level
2126 : : * system is needed to keep from confusing the different hierarchies when a
2127 : : * UNION ALL contains multiple partitioned tables as children. The data
2128 : : * stored in each PartitionedRelPruningData can be re-used each time we
2129 : : * re-evaluate which partitions match the pruning steps provided in each
2130 : : * PartitionedRelPruneInfo.
2131 : : *
2132 : : * Note that only the PartitionPruneContexts for initial pruning are
2133 : : * initialized here. Those required for exec pruning are initialized later in
2134 : : * ExecInitPartitionExecPruning(), as they depend on the availability of the
2135 : : * parent plan node's PlanState.
2136 : : *
2137 : : * If initial pruning steps are to be skipped (e.g., during EXPLAIN
2138 : : * (GENERIC_PLAN)), *all_leafpart_rtis will be populated with the RT indexes of
2139 : : * all leaf partitions whose scanning subnode is included in the parent plan
2140 : : * node's list of child plans. The caller must add these RT indexes to
2141 : : * estate->es_unpruned_relids.
2142 : : */
2143 : : static PartitionPruneState *
401 amitlan@postgresql.o 2144 : 403 : CreatePartitionPruneState(EState *estate, PartitionPruneInfo *pruneinfo,
2145 : : Bitmapset **all_leafpart_rtis)
2146 : : {
2147 : : PartitionPruneState *prunestate;
2148 : : int n_part_hierarchies;
2149 : : ListCell *lc;
2150 : : int i;
2151 : :
2152 : : /*
2153 : : * Expression context that will be used by partkey_datum_from_expr() to
2154 : : * evaluate expressions for comparison against partition bounds.
2155 : : */
408 2156 : 403 : ExprContext *econtext = CreateExprContext(estate);
2157 : :
2158 : : /* For data reading, executor always includes detached partitions */
2565 rhaas@postgresql.org 2159 [ + + ]: 403 : if (estate->es_partition_directory == NULL)
2160 : 379 : estate->es_partition_directory =
1788 alvherre@alvh.no-ip. 2161 : 379 : CreatePartitionDirectory(estate->es_query_cxt, false);
2162 : :
1440 2163 : 403 : n_part_hierarchies = list_length(pruneinfo->prune_infos);
2783 tgl@sss.pgh.pa.us 2164 [ - + ]: 403 : Assert(n_part_hierarchies > 0);
2165 : :
2166 : : /*
2167 : : * Allocate the data structure
2168 : : */
2169 : : prunestate = (PartitionPruneState *)
2170 : 403 : palloc(offsetof(PartitionPruneState, partprunedata) +
2171 : : sizeof(PartitionPruningData *) * n_part_hierarchies);
2172 : :
2173 : : /* Save ExprContext for use during InitExecPartitionPruneContexts(). */
408 amitlan@postgresql.o 2174 : 403 : prunestate->econtext = econtext;
2783 tgl@sss.pgh.pa.us 2175 : 403 : prunestate->execparamids = NULL;
2176 : : /* other_subplans can change at runtime, so we need our own copy */
1440 alvherre@alvh.no-ip. 2177 : 403 : prunestate->other_subplans = bms_copy(pruneinfo->other_subplans);
2835 tgl@sss.pgh.pa.us 2178 : 403 : prunestate->do_initial_prune = false; /* may be set below */
2179 : 403 : prunestate->do_exec_prune = false; /* may be set below */
2783 2180 : 403 : prunestate->num_partprunedata = n_part_hierarchies;
2181 : :
2182 : : /*
2183 : : * Create a short-term memory context which we'll use when making calls to
2184 : : * the partition pruning functions. This avoids possible memory leaks,
2185 : : * since the pruning functions call comparison functions that aren't under
2186 : : * our control.
2187 : : */
2899 alvherre@alvh.no-ip. 2188 : 403 : prunestate->prune_context =
2189 : 403 : AllocSetContextCreate(CurrentMemoryContext,
2190 : : "Partition Prune",
2191 : : ALLOCSET_DEFAULT_SIZES);
2192 : :
2193 : 403 : i = 0;
1440 2194 [ + - + + : 818 : foreach(lc, pruneinfo->prune_infos)
+ + ]
2195 : : {
2783 tgl@sss.pgh.pa.us 2196 : 415 : List *partrelpruneinfos = lfirst_node(List, lc);
2197 : 415 : int npartrelpruneinfos = list_length(partrelpruneinfos);
2198 : : PartitionPruningData *prunedata;
2199 : : ListCell *lc2;
2200 : : int j;
2201 : :
2202 : : prunedata = (PartitionPruningData *)
2203 : 415 : palloc(offsetof(PartitionPruningData, partrelprunedata) +
2204 : 415 : npartrelpruneinfos * sizeof(PartitionedRelPruningData));
2205 : 415 : prunestate->partprunedata[i] = prunedata;
2206 : 415 : prunedata->num_partrelprunedata = npartrelpruneinfos;
2207 : :
2208 : 415 : j = 0;
2209 [ + - + + : 1235 : foreach(lc2, partrelpruneinfos)
+ + ]
2210 : : {
2211 : 820 : PartitionedRelPruneInfo *pinfo = lfirst_node(PartitionedRelPruneInfo, lc2);
2212 : 820 : PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
2213 : : Relation partrel;
2214 : : PartitionDesc partdesc;
2215 : : PartitionKey partkey;
2216 : :
2217 : : /*
2218 : : * We can rely on the copies of the partitioned table's partition
2219 : : * key and partition descriptor appearing in its relcache entry,
2220 : : * because that entry will be held open and locked for the
2221 : : * duration of this executor run.
2222 : : */
361 amitlan@postgresql.o 2223 : 820 : partrel = ExecGetRangeTableRelation(estate, pinfo->rtindex, false);
2224 : :
2225 : : /* Remember for InitExecPartitionPruneContexts(). */
408 2226 : 820 : pprune->partrel = partrel;
2227 : :
2719 tgl@sss.pgh.pa.us 2228 : 820 : partkey = RelationGetPartitionKey(partrel);
2565 rhaas@postgresql.org 2229 : 820 : partdesc = PartitionDirectoryLookup(estate->es_partition_directory,
2230 : : partrel);
2231 : :
2232 : : /*
2233 : : * Initialize the subplan_map and subpart_map.
2234 : : *
2235 : : * The set of partitions that exist now might not be the same that
2236 : : * existed when the plan was made. The normal case is that it is;
2237 : : * optimize for that case with a quick comparison, and just copy
2238 : : * the subplan_map and make subpart_map, leafpart_rti_map point to
2239 : : * the ones in PruneInfo.
2240 : : *
2241 : : * For the case where they aren't identical, we could have more
2242 : : * partitions on either side; or even exactly the same number of
2243 : : * them on both but the set of OIDs doesn't match fully. Handle
2244 : : * this by creating new subplan_map and subpart_map arrays that
2245 : : * corresponds to the ones in the PruneInfo where the new
2246 : : * partition descriptor's OIDs match. Any that don't match can be
2247 : : * set to -1, as if they were pruned. By construction, both
2248 : : * arrays are in partition bounds order.
2249 : : */
2494 tgl@sss.pgh.pa.us 2250 : 820 : pprune->nparts = partdesc->nparts;
94 michael@paquier.xyz 2251 :GNC 820 : pprune->subplan_map = palloc_array(int, partdesc->nparts);
2252 : :
627 alvherre@alvh.no-ip. 2253 [ + + ]:CBC 820 : if (partdesc->nparts == pinfo->nparts &&
2254 : 819 : memcmp(partdesc->oids, pinfo->relid_map,
2255 [ + + ]: 819 : sizeof(int) * partdesc->nparts) == 0)
2256 : : {
2565 rhaas@postgresql.org 2257 : 758 : pprune->subpart_map = pinfo->subpart_map;
401 amitlan@postgresql.o 2258 : 758 : pprune->leafpart_rti_map = pinfo->leafpart_rti_map;
2565 rhaas@postgresql.org 2259 : 758 : memcpy(pprune->subplan_map, pinfo->subplan_map,
2260 : 758 : sizeof(int) * pinfo->nparts);
2261 : : }
2262 : : else
2263 : : {
2489 tgl@sss.pgh.pa.us 2264 : 62 : int pd_idx = 0;
2265 : : int pp_idx;
2266 : :
2267 : : /*
2268 : : * When the partition arrays are not identical, there could be
2269 : : * some new ones but it's also possible that one was removed;
2270 : : * we cope with both situations by walking the arrays and
2271 : : * discarding those that don't match.
2272 : : *
2273 : : * If the number of partitions on both sides match, it's still
2274 : : * possible that one partition has been detached and another
2275 : : * attached. Cope with that by creating a map that skips any
2276 : : * mismatches.
2277 : : */
94 michael@paquier.xyz 2278 :GNC 62 : pprune->subpart_map = palloc_array(int, partdesc->nparts);
2279 : 62 : pprune->leafpart_rti_map = palloc_array(int, partdesc->nparts);
2280 : :
2048 tgl@sss.pgh.pa.us 2281 [ + + ]:CBC 264 : for (pp_idx = 0; pp_idx < partdesc->nparts; pp_idx++)
2282 : : {
2283 : : /* Skip any InvalidOid relid_map entries */
2284 [ + + ]: 312 : while (pd_idx < pinfo->nparts &&
2285 [ + + ]: 252 : !OidIsValid(pinfo->relid_map[pd_idx]))
2286 : 110 : pd_idx++;
2287 : :
627 alvherre@alvh.no-ip. 2288 : 202 : recheck:
2048 tgl@sss.pgh.pa.us 2289 [ + + ]: 202 : if (pd_idx < pinfo->nparts &&
2290 [ + + ]: 142 : pinfo->relid_map[pd_idx] == partdesc->oids[pp_idx])
2291 : : {
2292 : : /* match... */
2565 rhaas@postgresql.org 2293 : 91 : pprune->subplan_map[pp_idx] =
2294 : 91 : pinfo->subplan_map[pd_idx];
2295 : 91 : pprune->subpart_map[pp_idx] =
2048 tgl@sss.pgh.pa.us 2296 : 91 : pinfo->subpart_map[pd_idx];
401 amitlan@postgresql.o 2297 : 91 : pprune->leafpart_rti_map[pp_idx] =
2298 : 91 : pinfo->leafpart_rti_map[pd_idx];
2048 tgl@sss.pgh.pa.us 2299 : 91 : pd_idx++;
627 alvherre@alvh.no-ip. 2300 : 91 : continue;
2301 : : }
2302 : :
2303 : : /*
2304 : : * There isn't an exact match in the corresponding
2305 : : * positions of both arrays. Peek ahead in
2306 : : * pinfo->relid_map to see if we have a match for the
2307 : : * current partition in partdesc. Normally if a match
2308 : : * exists it's just one element ahead, and it means the
2309 : : * planner saw one extra partition that we no longer see
2310 : : * now (its concurrent detach finished just in between);
2311 : : * so we skip that one by updating pd_idx to the new
2312 : : * location and jumping above. We can then continue to
2313 : : * match the rest of the elements after skipping the OID
2314 : : * with no match; no future matches are tried for the
2315 : : * element that was skipped, because we know the arrays to
2316 : : * be in the same order.
2317 : : *
2318 : : * If we don't see a match anywhere in the rest of the
2319 : : * pinfo->relid_map array, that means we see an element
2320 : : * now that the planner didn't see, so mark that one as
2321 : : * pruned and move on.
2322 : : */
2323 [ + + ]: 144 : for (int pd_idx2 = pd_idx + 1; pd_idx2 < pinfo->nparts; pd_idx2++)
2324 : : {
2325 [ - + ]: 33 : if (pd_idx2 >= pinfo->nparts)
627 alvherre@alvh.no-ip. 2326 :UBC 0 : break;
627 alvherre@alvh.no-ip. 2327 [ - + ]:CBC 33 : if (pinfo->relid_map[pd_idx2] == partdesc->oids[pp_idx])
2328 : : {
627 alvherre@alvh.no-ip. 2329 :UBC 0 : pd_idx = pd_idx2;
2330 : 0 : goto recheck;
2331 : : }
2332 : : }
2333 : :
627 alvherre@alvh.no-ip. 2334 :CBC 111 : pprune->subpart_map[pp_idx] = -1;
2335 : 111 : pprune->subplan_map[pp_idx] = -1;
401 amitlan@postgresql.o 2336 : 111 : pprune->leafpart_rti_map[pp_idx] = 0;
2337 : : }
2338 : : }
2339 : :
2340 : : /* present_parts is also subject to later modification */
2494 tgl@sss.pgh.pa.us 2341 : 820 : pprune->present_parts = bms_copy(pinfo->present_parts);
2342 : :
2343 : : /*
2344 : : * Only initial_context is initialized here. exec_context is
2345 : : * initialized during ExecInitPartitionExecPruning() when the
2346 : : * parent plan's PlanState is available.
2347 : : *
2348 : : * Note that we must skip execution-time (both "init" and "exec")
2349 : : * partition pruning in EXPLAIN (GENERIC_PLAN), since parameter
2350 : : * values may be missing.
2351 : : */
2352 : 820 : pprune->initial_pruning_steps = pinfo->initial_pruning_steps;
1087 2353 [ + + ]: 820 : if (pinfo->initial_pruning_steps &&
2354 [ + + ]: 278 : !(econtext->ecxt_estate->es_top_eflags & EXEC_FLAG_EXPLAIN_GENERIC))
2355 : : {
1440 alvherre@alvh.no-ip. 2356 : 275 : InitPartitionPruneContext(&pprune->initial_context,
2357 : : pprune->initial_pruning_steps,
2358 : : partdesc, partkey, NULL,
2359 : : econtext);
2360 : : /* Record whether initial pruning is needed at any level */
2494 tgl@sss.pgh.pa.us 2361 : 275 : prunestate->do_initial_prune = true;
2362 : : }
2363 : 820 : pprune->exec_pruning_steps = pinfo->exec_pruning_steps;
1087 2364 [ + + ]: 820 : if (pinfo->exec_pruning_steps &&
2365 [ + - ]: 257 : !(econtext->ecxt_estate->es_top_eflags & EXEC_FLAG_EXPLAIN_GENERIC))
2366 : : {
2367 : : /* Record whether exec pruning is needed at any level */
2494 2368 : 257 : prunestate->do_exec_prune = true;
2369 : : }
2370 : :
2371 : : /*
2372 : : * Accumulate the IDs of all PARAM_EXEC Params affecting the
2373 : : * partitioning decisions at this plan node.
2374 : : */
2783 2375 : 1640 : prunestate->execparamids = bms_add_members(prunestate->execparamids,
2376 : 820 : pinfo->execparamids);
2377 : :
2378 : : /*
2379 : : * Return all leaf partition indexes if we're skipping pruning in
2380 : : * the EXPLAIN (GENERIC_PLAN) case.
2381 : : */
401 amitlan@postgresql.o 2382 [ + + + + ]: 820 : if (pinfo->initial_pruning_steps && !prunestate->do_initial_prune)
2383 : : {
2384 : 3 : int part_index = -1;
2385 : :
2386 : 9 : while ((part_index = bms_next_member(pprune->present_parts,
2387 [ + + ]: 9 : part_index)) >= 0)
2388 : : {
2389 : 6 : Index rtindex = pprune->leafpart_rti_map[part_index];
2390 : :
2391 [ + - ]: 6 : if (rtindex)
2392 : 6 : *all_leafpart_rtis = bms_add_member(*all_leafpart_rtis,
2393 : : rtindex);
2394 : : }
2395 : : }
2396 : :
2783 tgl@sss.pgh.pa.us 2397 : 820 : j++;
2398 : : }
2899 alvherre@alvh.no-ip. 2399 : 415 : i++;
2400 : : }
2401 : :
2402 : 403 : return prunestate;
2403 : : }
2404 : :
2405 : : /*
2406 : : * Initialize a PartitionPruneContext for the given list of pruning steps.
2407 : : */
2408 : : static void
1440 2409 : 533 : InitPartitionPruneContext(PartitionPruneContext *context,
2410 : : List *pruning_steps,
2411 : : PartitionDesc partdesc,
2412 : : PartitionKey partkey,
2413 : : PlanState *planstate,
2414 : : ExprContext *econtext)
2415 : : {
2416 : : int n_steps;
2417 : : int partnatts;
2418 : : ListCell *lc;
2419 : :
2494 tgl@sss.pgh.pa.us 2420 : 533 : n_steps = list_length(pruning_steps);
2421 : :
2422 : 533 : context->strategy = partkey->strategy;
2423 : 533 : context->partnatts = partnatts = partkey->partnatts;
2424 : 533 : context->nparts = partdesc->nparts;
2425 : 533 : context->boundinfo = partdesc->boundinfo;
2426 : 533 : context->partcollation = partkey->partcollation;
2427 : 533 : context->partsupfunc = partkey->partsupfunc;
2428 : :
2429 : : /* We'll look up type-specific support functions as needed */
94 michael@paquier.xyz 2430 :GNC 533 : context->stepcmpfuncs = palloc0_array(FmgrInfo, n_steps * partnatts);
2431 : :
2494 tgl@sss.pgh.pa.us 2432 :CBC 533 : context->ppccontext = CurrentMemoryContext;
2433 : 533 : context->planstate = planstate;
1440 alvherre@alvh.no-ip. 2434 : 533 : context->exprcontext = econtext;
2435 : :
2436 : : /* Initialize expression state for each expression we need */
94 michael@paquier.xyz 2437 :GNC 533 : context->exprstates = palloc0_array(ExprState *, n_steps * partnatts);
2494 tgl@sss.pgh.pa.us 2438 [ + - + + :CBC 1397 : foreach(lc, pruning_steps)
+ + ]
2439 : : {
2440 : 864 : PartitionPruneStepOp *step = (PartitionPruneStepOp *) lfirst(lc);
884 drowley@postgresql.o 2441 : 864 : ListCell *lc2 = list_head(step->exprs);
2442 : : int keyno;
2443 : :
2444 : : /* not needed for other step kinds */
2494 tgl@sss.pgh.pa.us 2445 [ + + ]: 864 : if (!IsA(step, PartitionPruneStepOp))
2446 : 143 : continue;
2447 : :
2448 [ - + ]: 721 : Assert(list_length(step->exprs) <= partnatts);
2449 : :
884 drowley@postgresql.o 2450 [ + + ]: 1517 : for (keyno = 0; keyno < partnatts; keyno++)
2451 : : {
2452 [ + + ]: 796 : if (bms_is_member(keyno, step->nullkeys))
2453 : 3 : continue;
2454 : :
2455 [ + + ]: 793 : if (lc2 != NULL)
2456 : : {
2457 : 745 : Expr *expr = lfirst(lc2);
2458 : :
2459 : : /* not needed for Consts */
2460 [ + + ]: 745 : if (!IsA(expr, Const))
2461 : : {
2462 : 698 : int stateidx = PruneCxtStateIdx(partnatts,
2463 : : step->step.step_id,
2464 : : keyno);
2465 : :
2466 : : /*
2467 : : * When planstate is NULL, pruning_steps is known not to
2468 : : * contain any expressions that depend on the parent plan.
2469 : : * Information of any available EXTERN parameters must be
2470 : : * passed explicitly in that case, which the caller must
2471 : : * have made available via econtext.
2472 : : */
2473 [ + + ]: 698 : if (planstate == NULL)
2474 : 407 : context->exprstates[stateidx] =
2475 : 407 : ExecInitExprWithParams(expr,
2476 : : econtext->ecxt_param_list_info);
2477 : : else
2478 : 291 : context->exprstates[stateidx] =
2479 : 291 : ExecInitExpr(expr, context->planstate);
2480 : : }
2481 : 745 : lc2 = lnext(step->exprs, lc2);
2482 : : }
2483 : : }
2484 : : }
2494 tgl@sss.pgh.pa.us 2485 : 533 : }
2486 : :
2487 : : /*
2488 : : * InitExecPartitionPruneContexts
2489 : : * Initialize exec pruning contexts deferred by CreatePartitionPruneState()
2490 : : *
2491 : : * This function finalizes exec pruning setup for a PartitionPruneState by
2492 : : * initializing contexts for pruning steps that require the parent plan's
2493 : : * PlanState. It iterates over PartitionPruningData entries and sets up the
2494 : : * necessary execution contexts for pruning during query execution.
2495 : : *
2496 : : * Also fix the mapping of partition indexes to subplan indexes contained in
2497 : : * prunestate by considering the new list of subplans that survived initial
2498 : : * pruning.
2499 : : *
2500 : : * Current values of the indexes present in PartitionPruneState count all the
2501 : : * subplans that would be present before initial pruning was done. If initial
2502 : : * pruning got rid of some of the subplans, any subsequent pruning passes will
2503 : : * be looking at a different set of target subplans to choose from than those
2504 : : * in the pre-initial-pruning set, so the maps in PartitionPruneState
2505 : : * containing those indexes must be updated to reflect the new indexes of
2506 : : * subplans in the post-initial-pruning set.
2507 : : */
2508 : : static void
408 amitlan@postgresql.o 2509 : 201 : InitExecPartitionPruneContexts(PartitionPruneState *prunestate,
2510 : : PlanState *parent_plan,
2511 : : Bitmapset *initially_valid_subplans,
2512 : : int n_total_subplans)
2513 : : {
2514 : : EState *estate;
2515 : 201 : int *new_subplan_indexes = NULL;
2516 : : Bitmapset *new_other_subplans;
2517 : : int i;
2518 : : int newidx;
2519 : 201 : bool fix_subplan_map = false;
2520 : :
2521 [ - + ]: 201 : Assert(prunestate->do_exec_prune);
2522 [ - + ]: 201 : Assert(parent_plan != NULL);
2523 : 201 : estate = parent_plan->state;
2524 : :
2525 : : /*
2526 : : * No need to fix subplans maps if initial pruning didn't eliminate any
2527 : : * subplans.
2528 : : */
2529 [ + + ]: 201 : if (bms_num_members(initially_valid_subplans) < n_total_subplans)
2530 : : {
2531 : 24 : fix_subplan_map = true;
2532 : :
2533 : : /*
2534 : : * First we must build a temporary array which maps old subplan
2535 : : * indexes to new ones. For convenience of initialization, we use
2536 : : * 1-based indexes in this array and leave pruned items as 0.
2537 : : */
94 michael@paquier.xyz 2538 :GNC 24 : new_subplan_indexes = palloc0_array(int, n_total_subplans);
408 amitlan@postgresql.o 2539 :CBC 24 : newidx = 1;
2540 : 24 : i = -1;
2541 [ + + ]: 93 : while ((i = bms_next_member(initially_valid_subplans, i)) >= 0)
2542 : : {
2543 [ - + ]: 69 : Assert(i < n_total_subplans);
2544 : 69 : new_subplan_indexes[i] = newidx++;
2545 : : }
2546 : : }
2547 : :
2548 : : /*
2549 : : * Now we can update each PartitionedRelPruneInfo's subplan_map with new
2550 : : * subplan indexes. We must also recompute its present_parts bitmap.
2551 : : */
1440 alvherre@alvh.no-ip. 2552 [ + + ]: 414 : for (i = 0; i < prunestate->num_partprunedata; i++)
2553 : : {
2554 : 213 : PartitionPruningData *prunedata = prunestate->partprunedata[i];
2555 : : int j;
2556 : :
2557 : : /*
2558 : : * Within each hierarchy, we perform this loop in back-to-front order
2559 : : * so that we determine present_parts for the lowest-level partitioned
2560 : : * tables first. This way we can tell whether a sub-partitioned
2561 : : * table's partitions were entirely pruned so we can exclude it from
2562 : : * the current level's present_parts.
2563 : : */
2564 [ + + ]: 654 : for (j = prunedata->num_partrelprunedata - 1; j >= 0; j--)
2565 : : {
2566 : 441 : PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
2567 : 441 : int nparts = pprune->nparts;
2568 : : int k;
2569 : :
2570 : : /* Initialize PartitionPruneContext for exec pruning, if needed. */
408 amitlan@postgresql.o 2571 [ + + ]: 441 : if (pprune->exec_pruning_steps != NIL)
2572 : : {
2573 : : PartitionKey partkey;
2574 : : PartitionDesc partdesc;
2575 : :
2576 : : /*
2577 : : * See the comment in CreatePartitionPruneState() regarding
2578 : : * the usage of partdesc and partkey.
2579 : : */
2580 : 258 : partkey = RelationGetPartitionKey(pprune->partrel);
2581 : 258 : partdesc = PartitionDirectoryLookup(estate->es_partition_directory,
2582 : : pprune->partrel);
2583 : :
2584 : 258 : InitPartitionPruneContext(&pprune->exec_context,
2585 : : pprune->exec_pruning_steps,
2586 : : partdesc, partkey, parent_plan,
2587 : : prunestate->econtext);
2588 : : }
2589 : :
2590 [ + + ]: 441 : if (!fix_subplan_map)
2591 : 345 : continue;
2592 : :
2593 : : /* We just rebuild present_parts from scratch */
1440 alvherre@alvh.no-ip. 2594 : 96 : bms_free(pprune->present_parts);
2595 : 96 : pprune->present_parts = NULL;
2596 : :
2597 [ + + ]: 354 : for (k = 0; k < nparts; k++)
2598 : : {
2599 : 258 : int oldidx = pprune->subplan_map[k];
2600 : : int subidx;
2601 : :
2602 : : /*
2603 : : * If this partition existed as a subplan then change the old
2604 : : * subplan index to the new subplan index. The new index may
2605 : : * become -1 if the partition was pruned above, or it may just
2606 : : * come earlier in the subplan list due to some subplans being
2607 : : * removed earlier in the list. If it's a subpartition, add
2608 : : * it to present_parts unless it's entirely pruned.
2609 : : */
2610 [ + + ]: 258 : if (oldidx >= 0)
2611 : : {
2612 [ - + ]: 198 : Assert(oldidx < n_total_subplans);
2613 : 198 : pprune->subplan_map[k] = new_subplan_indexes[oldidx] - 1;
2614 : :
2615 [ + + ]: 198 : if (new_subplan_indexes[oldidx] > 0)
2616 : 57 : pprune->present_parts =
2617 : 57 : bms_add_member(pprune->present_parts, k);
2618 : : }
2619 [ + - ]: 60 : else if ((subidx = pprune->subpart_map[k]) >= 0)
2620 : : {
2621 : : PartitionedRelPruningData *subprune;
2622 : :
2623 : 60 : subprune = &prunedata->partrelprunedata[subidx];
2624 : :
2625 [ + + ]: 60 : if (!bms_is_empty(subprune->present_parts))
2626 : 24 : pprune->present_parts =
2627 : 24 : bms_add_member(pprune->present_parts, k);
2628 : : }
2629 : : }
2630 : : }
2631 : : }
2632 : :
2633 : : /*
2634 : : * If we fixed subplan maps, we must also recompute the other_subplans
2635 : : * set, since indexes in it may change.
2636 : : */
408 amitlan@postgresql.o 2637 [ + + ]: 201 : if (fix_subplan_map)
2638 : : {
2639 : 24 : new_other_subplans = NULL;
2640 : 24 : i = -1;
2641 [ + + ]: 36 : while ((i = bms_next_member(prunestate->other_subplans, i)) >= 0)
2642 : 12 : new_other_subplans = bms_add_member(new_other_subplans,
2643 : 12 : new_subplan_indexes[i] - 1);
2644 : :
2645 : 24 : bms_free(prunestate->other_subplans);
2646 : 24 : prunestate->other_subplans = new_other_subplans;
2647 : :
2648 : 24 : pfree(new_subplan_indexes);
2649 : : }
2899 alvherre@alvh.no-ip. 2650 : 201 : }
2651 : :
2652 : : /*
2653 : : * ExecFindMatchingSubPlans
2654 : : * Determine which subplans match the pruning steps detailed in
2655 : : * 'prunestate' for the current comparison expression values.
2656 : : *
2657 : : * Pass initial_prune if PARAM_EXEC Params cannot yet be evaluated. This
2658 : : * differentiates the initial executor-time pruning step from later
2659 : : * runtime pruning.
2660 : : *
2661 : : * The caller must pass a non-NULL validsubplan_rtis during initial pruning
2662 : : * to collect the RT indexes of leaf partitions whose subnodes will be
2663 : : * executed. These RT indexes are later added to EState.es_unpruned_relids.
2664 : : */
2665 : : Bitmapset *
1440 2666 : 1949 : ExecFindMatchingSubPlans(PartitionPruneState *prunestate,
2667 : : bool initial_prune,
2668 : : Bitmapset **validsubplan_rtis)
2669 : : {
2899 2670 : 1949 : Bitmapset *result = NULL;
2671 : : MemoryContext oldcontext;
2672 : : int i;
2673 : :
2674 : : /*
2675 : : * Either we're here on the initial prune done during pruning
2676 : : * initialization, or we're at a point where PARAM_EXEC Params can be
2677 : : * evaluated *and* there are steps in which to do so.
2678 : : */
1440 2679 [ + + - + ]: 1949 : Assert(initial_prune || prunestate->do_exec_prune);
401 amitlan@postgresql.o 2680 [ + + - + ]: 1949 : Assert(validsubplan_rtis != NULL || !initial_prune);
2681 : :
2682 : : /*
2683 : : * Switch to a temp context to avoid leaking memory in the executor's
2684 : : * query-lifespan memory context.
2685 : : */
2899 alvherre@alvh.no-ip. 2686 : 1949 : oldcontext = MemoryContextSwitchTo(prunestate->prune_context);
2687 : :
2688 : : /*
2689 : : * For each hierarchy, do the pruning tests, and add nondeletable
2690 : : * subplans' indexes to "result".
2691 : : */
2783 tgl@sss.pgh.pa.us 2692 [ + + ]: 3919 : for (i = 0; i < prunestate->num_partprunedata; i++)
2693 : : {
1440 alvherre@alvh.no-ip. 2694 : 1970 : PartitionPruningData *prunedata = prunestate->partprunedata[i];
2695 : : PartitionedRelPruningData *pprune;
2696 : :
2697 : : /*
2698 : : * We pass the zeroth item, belonging to the root table of the
2699 : : * hierarchy, and find_matching_subplans_recurse() takes care of
2700 : : * recursing to other (lower-level) parents as needed.
2701 : : */
2783 tgl@sss.pgh.pa.us 2702 : 1970 : pprune = &prunedata->partrelprunedata[0];
1440 alvherre@alvh.no-ip. 2703 : 1970 : find_matching_subplans_recurse(prunedata, pprune, initial_prune,
2704 : : &result, validsubplan_rtis);
2705 : :
2706 : : /*
2707 : : * Expression eval may have used space in ExprContext too. Avoid
2708 : : * accessing exec_context during initial pruning, as it is not valid
2709 : : * at that stage.
2710 : : */
408 amitlan@postgresql.o 2711 [ + + + + ]: 1970 : if (!initial_prune && pprune->exec_pruning_steps)
1440 alvherre@alvh.no-ip. 2712 : 1698 : ResetExprContext(pprune->exec_context.exprcontext);
2713 : : }
2714 : :
2715 : : /* Add in any subplans that partition pruning didn't account for */
2677 tgl@sss.pgh.pa.us 2716 : 1949 : result = bms_add_members(result, prunestate->other_subplans);
2717 : :
2899 alvherre@alvh.no-ip. 2718 : 1949 : MemoryContextSwitchTo(oldcontext);
2719 : :
2720 : : /* Copy result out of the temp context before we reset it */
2721 : 1949 : result = bms_copy(result);
401 amitlan@postgresql.o 2722 [ + + ]: 1949 : if (validsubplan_rtis)
2723 : 224 : *validsubplan_rtis = bms_copy(*validsubplan_rtis);
2724 : :
2899 alvherre@alvh.no-ip. 2725 : 1949 : MemoryContextReset(prunestate->prune_context);
2726 : :
2727 : 1949 : return result;
2728 : : }
2729 : :
2730 : : /*
2731 : : * find_matching_subplans_recurse
2732 : : * Recursive worker function for ExecFindMatchingSubPlans
2733 : : *
2734 : : * Adds valid (non-prunable) subplan IDs to *validsubplans. If
2735 : : * *validsubplan_rtis is non-NULL, it also adds the RT indexes of their
2736 : : * corresponding partitions, but only if they are leaf partitions.
2737 : : */
2738 : : static void
2783 tgl@sss.pgh.pa.us 2739 : 2177 : find_matching_subplans_recurse(PartitionPruningData *prunedata,
2740 : : PartitionedRelPruningData *pprune,
2741 : : bool initial_prune,
2742 : : Bitmapset **validsubplans,
2743 : : Bitmapset **validsubplan_rtis)
2744 : : {
2745 : : Bitmapset *partset;
2746 : : int i;
2747 : :
2748 : : /* Guard against stack overflow due to overly deep partition hierarchy. */
2899 alvherre@alvh.no-ip. 2749 : 2177 : check_stack_depth();
2750 : :
2751 : : /*
2752 : : * Prune as appropriate, if we have pruning steps matching the current
2753 : : * execution context. Otherwise just include all partitions at this
2754 : : * level.
2755 : : */
2494 tgl@sss.pgh.pa.us 2756 [ + + + + ]: 2177 : if (initial_prune && pprune->initial_pruning_steps)
2757 : 266 : partset = get_matching_partitions(&pprune->initial_context,
2758 : : pprune->initial_pruning_steps);
2759 [ + + + + ]: 1911 : else if (!initial_prune && pprune->exec_pruning_steps)
2760 : 1740 : partset = get_matching_partitions(&pprune->exec_context,
2761 : : pprune->exec_pruning_steps);
2762 : : else
2899 alvherre@alvh.no-ip. 2763 : 171 : partset = pprune->present_parts;
2764 : :
2765 : : /* Translate partset into subplan indexes */
2766 : 2177 : i = -1;
2767 [ + + ]: 3082 : while ((i = bms_next_member(partset, i)) >= 0)
2768 : : {
2835 tgl@sss.pgh.pa.us 2769 [ + + ]: 905 : if (pprune->subplan_map[i] >= 0)
2770 : : {
2899 alvherre@alvh.no-ip. 2771 : 1394 : *validsubplans = bms_add_member(*validsubplans,
2835 tgl@sss.pgh.pa.us 2772 : 697 : pprune->subplan_map[i]);
2773 : :
2774 : : /*
2775 : : * Only report leaf partitions. Non-leaf partitions may appear
2776 : : * here when they use an unflattened Append or MergeAppend.
2777 : : */
383 amitlan@postgresql.o 2778 [ + + + + ]: 697 : if (validsubplan_rtis && pprune->leafpart_rti_map[i])
401 2779 : 337 : *validsubplan_rtis = bms_add_member(*validsubplan_rtis,
2780 : 337 : pprune->leafpart_rti_map[i]);
2781 : : }
2782 : : else
2783 : : {
2899 alvherre@alvh.no-ip. 2784 : 208 : int partidx = pprune->subpart_map[i];
2785 : :
2835 tgl@sss.pgh.pa.us 2786 [ + + ]: 208 : if (partidx >= 0)
2783 2787 : 207 : find_matching_subplans_recurse(prunedata,
2788 : : &prunedata->partrelprunedata[partidx],
2789 : : initial_prune, validsubplans,
2790 : : validsubplan_rtis);
2791 : : else
2792 : : {
2793 : : /*
2794 : : * We get here if the planner already pruned all the sub-
2795 : : * partitions for this partition. Silently ignore this
2796 : : * partition in this case. The end result is the same: we
2797 : : * would have pruned all partitions just the same, but we
2798 : : * don't have any pruning steps to execute to verify this.
2799 : : */
2800 : : }
2801 : : }
2802 : : }
2899 alvherre@alvh.no-ip. 2803 : 2177 : }
|