Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * analyzejoins.c
4 : : * Routines for simplifying joins after initial query analysis
5 : : *
6 : : * While we do a great deal of join simplification in prep/prepjointree.c,
7 : : * certain optimizations cannot be performed at that stage for lack of
8 : : * detailed information about the query. The routines here are invoked
9 : : * after initsplan.c has done its work, and can do additional join removal
10 : : * and simplification steps based on the information extracted. The penalty
11 : : * is that we have to work harder to clean up after ourselves when we modify
12 : : * the query, since the derived data structures have to be updated too.
13 : : *
14 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
15 : : * Portions Copyright (c) 1994, Regents of the University of California
16 : : *
17 : : *
18 : : * IDENTIFICATION
19 : : * src/backend/optimizer/plan/analyzejoins.c
20 : : *
21 : : *-------------------------------------------------------------------------
22 : : */
23 : : #include "postgres.h"
24 : :
25 : : #include "catalog/pg_class.h"
26 : : #include "nodes/nodeFuncs.h"
27 : : #include "optimizer/joininfo.h"
28 : : #include "optimizer/optimizer.h"
29 : : #include "optimizer/pathnode.h"
30 : : #include "optimizer/paths.h"
31 : : #include "optimizer/placeholder.h"
32 : : #include "optimizer/planmain.h"
33 : : #include "optimizer/restrictinfo.h"
34 : : #include "rewrite/rewriteManip.h"
35 : : #include "utils/lsyscache.h"
36 : :
37 : : /*
38 : : * Utility structure. A sorting procedure is needed to simplify the search
39 : : * of SJE-candidate baserels referencing the same database relation. Having
40 : : * collected all baserels from the query jointree, the planner sorts them
41 : : * according to the reloid value, groups them with the next pass and attempts
42 : : * to remove self-joins.
43 : : *
44 : : * Preliminary sorting prevents quadratic behavior that can be harmful in the
45 : : * case of numerous joins.
46 : : */
47 : : typedef struct
48 : : {
49 : : int relid;
50 : : Oid reloid;
51 : : } SelfJoinCandidate;
52 : :
53 : : bool enable_self_join_elimination;
54 : :
55 : : /* local functions */
56 : : static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
57 : : static void remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
58 : : SpecialJoinInfo *sjinfo);
59 : : static void remove_rel_from_restrictinfo(RestrictInfo *rinfo,
60 : : int relid, int ojrelid);
61 : : static void remove_rel_from_eclass(EquivalenceClass *ec,
62 : : SpecialJoinInfo *sjinfo,
63 : : int relid, int subst);
64 : : static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
65 : : static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
66 : : static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
67 : : List *clause_list, List **extra_clauses);
68 : : static Oid distinct_col_search(int colno, List *colnos, List *opids);
69 : : static bool is_innerrel_unique_for(PlannerInfo *root,
70 : : Relids joinrelids,
71 : : Relids outerrelids,
72 : : RelOptInfo *innerrel,
73 : : JoinType jointype,
74 : : List *restrictlist,
75 : : List **extra_clauses);
76 : : static int self_join_candidates_cmp(const void *a, const void *b);
77 : : static bool replace_relid_callback(Node *node,
78 : : ChangeVarNodes_context *context);
79 : :
80 : :
81 : : /*
82 : : * remove_useless_joins
83 : : * Check for relations that don't actually need to be joined at all,
84 : : * and remove them from the query.
85 : : *
86 : : * We are passed the current joinlist and return the updated list. Other
87 : : * data structures that have to be updated are accessible via "root".
88 : : */
89 : : List *
5692 tgl@sss.pgh.pa.us 90 :CBC 162951 : remove_useless_joins(PlannerInfo *root, List *joinlist)
91 : : {
92 : : ListCell *lc;
93 : :
94 : : /*
95 : : * We are only interested in relations that are left-joined to, so we can
96 : : * scan the join_info_list to find them easily.
97 : : */
98 : 168275 : restart:
99 [ + + + + : 189807 : foreach(lc, root->join_info_list)
+ + ]
100 : : {
101 : 26856 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
102 : : int innerrelid;
103 : : int nremoved;
104 : :
105 : : /* Skip if not removable */
106 [ + + ]: 26856 : if (!join_is_removable(root, sjinfo))
107 : 21532 : continue;
108 : :
109 : : /*
110 : : * Currently, join_is_removable can only succeed when the sjinfo's
111 : : * righthand is a single baserel. Remove that rel from the query and
112 : : * joinlist.
113 : : */
114 : 5324 : innerrelid = bms_singleton_member(sjinfo->min_righthand);
115 : :
256 akorotkov@postgresql 116 : 5324 : remove_leftjoinrel_from_query(root, innerrelid, sjinfo);
117 : :
118 : : /* We verify that exactly one reference gets removed from joinlist */
5692 tgl@sss.pgh.pa.us 119 : 5324 : nremoved = 0;
120 : 5324 : joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
121 [ - + ]: 5324 : if (nremoved != 1)
5692 tgl@sss.pgh.pa.us 122 [ # # ]:UBC 0 : elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
123 : :
124 : : /*
125 : : * We can delete this SpecialJoinInfo from the list too, since it's no
126 : : * longer of interest. (Since we'll restart the foreach loop
127 : : * immediately, we don't bother with foreach_delete_current.)
128 : : */
2296 tgl@sss.pgh.pa.us 129 :CBC 5324 : root->join_info_list = list_delete_cell(root->join_info_list, lc);
130 : :
131 : : /*
132 : : * Restart the scan. This is necessary to ensure we find all
133 : : * removable joins independently of ordering of the join_info_list
134 : : * (note that removal of attr_needed bits may make a join appear
135 : : * removable that did not before).
136 : : */
5692 137 : 5324 : goto restart;
138 : : }
139 : :
140 : 162951 : return joinlist;
141 : : }
142 : :
143 : : /*
144 : : * join_is_removable
145 : : * Check whether we need not perform this special join at all, because
146 : : * it will just duplicate its left input.
147 : : *
148 : : * This is true for a left join for which the join condition cannot match
149 : : * more than one inner-side row. (There are other possibly interesting
150 : : * cases, but we don't have the infrastructure to prove them.) We also
151 : : * have to check that the inner side doesn't generate any variables needed
152 : : * above the join.
153 : : */
154 : : static bool
155 : 26856 : join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
156 : : {
157 : : int innerrelid;
158 : : RelOptInfo *innerrel;
159 : : Relids inputrelids;
160 : : Relids joinrelids;
161 : 26856 : List *clause_list = NIL;
162 : : ListCell *l;
163 : : int attroff;
164 : :
165 : : /*
166 : : * Must be a left join to a single baserel, else we aren't going to be
167 : : * able to do anything with it.
168 : : */
1001 169 [ + + ]: 26856 : if (sjinfo->jointype != JOIN_LEFT)
3986 170 : 4970 : return false;
171 : :
172 [ + + ]: 21886 : if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
5692 173 : 653 : return false;
174 : :
175 : : /*
176 : : * Never try to eliminate a left join to the query result rel. Although
177 : : * the case is syntactically impossible in standard SQL, MERGE will build
178 : : * a join tree that looks exactly like that.
179 : : */
980 180 [ + + ]: 21233 : if (innerrelid == root->parse->resultRelation)
181 : 391 : return false;
182 : :
5692 183 : 20842 : innerrel = find_base_rel(root, innerrelid);
184 : :
185 : : /*
186 : : * Before we go to the effort of checking whether any innerrel variables
187 : : * are needed above the join, make a quick check to eliminate cases in
188 : : * which we will surely be unable to prove uniqueness of the innerrel.
189 : : */
3490 190 [ + + ]: 20842 : if (!rel_supports_distinctness(root, innerrel))
191 : 1539 : return false;
192 : :
193 : : /* Compute the relid set for the join we are considering */
892 194 : 19303 : inputrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
996 195 [ - + ]: 19303 : Assert(sjinfo->ojrelid != 0);
196 : 19303 : joinrelids = bms_copy(inputrelids);
197 : 19303 : joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid);
198 : :
199 : : /*
200 : : * We can't remove the join if any inner-rel attributes are used above the
201 : : * join. Here, "above" the join includes pushed-down conditions, so we
202 : : * should reject if attr_needed includes the OJ's own relid; therefore,
203 : : * compare to inputrelids not joinrelids.
204 : : *
205 : : * As a micro-optimization, it seems better to start with max_attr and
206 : : * count down rather than starting with min_attr and counting up, on the
207 : : * theory that the system attributes are somewhat less likely to be wanted
208 : : * and should be tested last.
209 : : */
5692 210 : 19303 : for (attroff = innerrel->max_attr - innerrel->min_attr;
211 [ + + ]: 176842 : attroff >= 0;
212 : 157539 : attroff--)
213 : : {
996 214 [ + + ]: 171433 : if (!bms_is_subset(innerrel->attr_needed[attroff], inputrelids))
5692 215 : 13894 : return false;
216 : : }
217 : :
218 : : /*
219 : : * Similarly check that the inner rel isn't needed by any PlaceHolderVars
220 : : * that will be used above the join. The PHV case is a little bit more
221 : : * complicated, because PHVs may have been assigned a ph_eval_at location
222 : : * that includes the innerrel, yet their contained expression might not
223 : : * actually reference the innerrel (it could be just a constant, for
224 : : * instance). If such a PHV is due to be evaluated above the join then it
225 : : * needn't prevent join removal.
226 : : */
227 [ + + + + : 5507 : foreach(l, root->placeholder_list)
+ + ]
228 : : {
229 : 116 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
230 : :
4454 231 [ - + ]: 116 : if (bms_overlap(phinfo->ph_lateral, innerrel->relids))
232 : 18 : return false; /* it references innerrel laterally */
5511 233 [ + + ]: 116 : if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids))
234 : 27 : continue; /* it definitely doesn't reference innerrel */
994 235 [ + + ]: 89 : if (bms_is_subset(phinfo->ph_needed, inputrelids))
236 : 3 : continue; /* PHV is not used above the join */
237 [ + + ]: 86 : if (!bms_is_member(sjinfo->ojrelid, phinfo->ph_eval_at))
238 : 15 : return false; /* it has to be evaluated below the join */
239 : :
240 : : /*
241 : : * We need to be sure there will still be a place to evaluate the PHV
242 : : * if we remove the join, ie that ph_eval_at wouldn't become empty.
243 : : */
244 [ + + ]: 71 : if (!bms_overlap(sjinfo->min_lefthand, phinfo->ph_eval_at))
4454 245 : 3 : return false; /* there isn't any other place to eval PHV */
246 : : /* Check contained expression last, since this is a bit expensive */
1740 247 [ - + ]: 68 : if (bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
5511 248 : 68 : innerrel->relids))
994 tgl@sss.pgh.pa.us 249 :UBC 0 : return false; /* contained expression references innerrel */
250 : : }
251 : :
252 : : /*
253 : : * Search for mergejoinable clauses that constrain the inner rel against
254 : : * either the outer rel or a pseudoconstant. If an operator is
255 : : * mergejoinable then it behaves like equality for some btree opclass, so
256 : : * it's what we want. The mergejoinability test also eliminates clauses
257 : : * containing volatile functions, which we couldn't depend on.
258 : : */
5692 tgl@sss.pgh.pa.us 259 [ + + + + :CBC 10866 : foreach(l, innerrel->joininfo)
+ + ]
260 : : {
261 : 5475 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
262 : :
263 : : /*
264 : : * If the current join commutes with some other outer join(s) via
265 : : * outer join identity 3, there will be multiple clones of its join
266 : : * clauses in the joininfo list. We want to consider only the
267 : : * has_clone form of such clauses. Processing more than one form
268 : : * would be wasteful, and also some of the others would confuse the
269 : : * RINFO_IS_PUSHED_DOWN test below.
270 : : */
1001 271 [ + + ]: 5475 : if (restrictinfo->is_clone)
272 : 50 : continue; /* ignore it */
273 : :
274 : : /*
275 : : * If it's not a join clause for this outer join, we can't use it.
276 : : * Note that if the clause is pushed-down, then it is logically from
277 : : * above the outer join, even if it references no other rels (it might
278 : : * be from WHERE, for example).
279 : : */
2747 280 [ + + + + ]: 5425 : if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
996 281 : 61 : continue; /* ignore; not useful here */
282 : :
283 : : /* Ignore if it's not a mergejoinable clause */
5692 284 [ + + ]: 5364 : if (!restrictinfo->can_join ||
285 [ - + ]: 5316 : restrictinfo->mergeopfamilies == NIL)
286 : 48 : continue; /* not mergejoinable */
287 : :
288 : : /*
289 : : * Check if the clause has the form "outer op inner" or "inner op
290 : : * outer", and if so mark which side is inner.
291 : : */
292 [ + + ]: 5316 : if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
293 : : innerrel->relids))
294 : 3 : continue; /* no good for these input relations */
295 : :
296 : : /* OK, add to list */
297 : 5313 : clause_list = lappend(clause_list, restrictinfo);
298 : : }
299 : :
300 : : /*
301 : : * Now that we have the relevant equality join clauses, try to prove the
302 : : * innerrel distinct.
303 : : */
256 akorotkov@postgresql 304 [ + + ]: 5391 : if (rel_is_distinct_for(root, innerrel, clause_list, NULL))
3490 tgl@sss.pgh.pa.us 305 : 5324 : return true;
306 : :
307 : : /*
308 : : * Some day it would be nice to check for other methods of establishing
309 : : * distinctness.
310 : : */
5692 311 : 67 : return false;
312 : : }
313 : :
314 : :
315 : : /*
316 : : * Remove the target rel->relid and references to the target join from the
317 : : * planner's data structures, having determined that there is no need
318 : : * to include them in the query. Optionally replace them with subst if subst
319 : : * is non-negative.
320 : : *
321 : : * This function updates only parts needed for both left-join removal and
322 : : * self-join removal.
323 : : */
324 : : static void
256 akorotkov@postgresql 325 : 5624 : remove_rel_from_query(PlannerInfo *root, RelOptInfo *rel,
326 : : int subst, SpecialJoinInfo *sjinfo,
327 : : Relids joinrelids)
328 : : {
329 : 5624 : int relid = rel->relid;
330 : : Index rti;
331 : : ListCell *l;
332 : :
333 : : /*
334 : : * Update all_baserels and related relid sets.
335 : : */
336 : 5624 : root->all_baserels = adjust_relid_set(root->all_baserels, relid, subst);
337 : 5624 : root->all_query_rels = adjust_relid_set(root->all_query_rels, relid, subst);
338 : :
246 339 [ + + ]: 5624 : if (sjinfo != NULL)
340 : : {
341 : 10648 : root->outer_join_rels = bms_del_member(root->outer_join_rels,
342 : 5324 : sjinfo->ojrelid);
343 : 5324 : root->all_query_rels = bms_del_member(root->all_query_rels,
344 : 5324 : sjinfo->ojrelid);
345 : : }
346 : :
347 : : /*
348 : : * Likewise remove references from SpecialJoinInfo data structures.
349 : : *
350 : : * This is relevant in case the outer join we're deleting is nested inside
351 : : * other outer joins: the upper joins' relid sets have to be adjusted. The
352 : : * RHS of the target outer join will be made empty here, but that's OK
353 : : * since caller will delete that SpecialJoinInfo entirely.
354 : : */
5636 tgl@sss.pgh.pa.us 355 [ + + + + : 13075 : foreach(l, root->join_info_list)
+ + ]
356 : : {
885 357 : 7451 : SpecialJoinInfo *sjinf = (SpecialJoinInfo *) lfirst(l);
358 : :
359 : : /*
360 : : * initsplan.c is fairly cavalier about allowing SpecialJoinInfos'
361 : : * lefthand/righthand relid sets to be shared with other data
362 : : * structures. Ensure that we don't modify the original relid sets.
363 : : * (The commute_xxx sets are always per-SpecialJoinInfo though.)
364 : : */
536 365 : 7451 : sjinf->min_lefthand = bms_copy(sjinf->min_lefthand);
366 : 7451 : sjinf->min_righthand = bms_copy(sjinf->min_righthand);
367 : 7451 : sjinf->syn_lefthand = bms_copy(sjinf->syn_lefthand);
368 : 7451 : sjinf->syn_righthand = bms_copy(sjinf->syn_righthand);
369 : : /* Now remove relid from the sets: */
256 akorotkov@postgresql 370 : 7451 : sjinf->min_lefthand = adjust_relid_set(sjinf->min_lefthand, relid, subst);
371 : 7451 : sjinf->min_righthand = adjust_relid_set(sjinf->min_righthand, relid, subst);
372 : 7451 : sjinf->syn_lefthand = adjust_relid_set(sjinf->syn_lefthand, relid, subst);
373 : 7451 : sjinf->syn_righthand = adjust_relid_set(sjinf->syn_righthand, relid, subst);
374 : :
375 [ + + ]: 7451 : if (sjinfo != NULL)
376 : : {
246 377 [ - + ]: 7400 : Assert(subst <= 0);
378 : :
379 : : /* Remove sjinfo->ojrelid bits from the sets: */
380 : 14800 : sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand,
381 : 7400 : sjinfo->ojrelid);
382 : 14800 : sjinf->min_righthand = bms_del_member(sjinf->min_righthand,
383 : 7400 : sjinfo->ojrelid);
384 : 14800 : sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand,
385 : 7400 : sjinfo->ojrelid);
386 : 14800 : sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand,
387 : 7400 : sjinfo->ojrelid);
388 : : /* relid cannot appear in these fields, but ojrelid can: */
389 : 14800 : sjinf->commute_above_l = bms_del_member(sjinf->commute_above_l,
390 : 7400 : sjinfo->ojrelid);
391 : 14800 : sjinf->commute_above_r = bms_del_member(sjinf->commute_above_r,
392 : 7400 : sjinfo->ojrelid);
393 : 14800 : sjinf->commute_below_l = bms_del_member(sjinf->commute_below_l,
394 : 7400 : sjinfo->ojrelid);
395 : 7400 : sjinf->commute_below_r = bms_del_member(sjinf->commute_below_r,
396 : 7400 : sjinfo->ojrelid);
397 : : }
398 : : else
399 : : {
400 [ - + ]: 51 : Assert(subst > 0);
401 : :
173 402 : 51 : ChangeVarNodesExtended((Node *) sjinf->semi_rhs_exprs, relid, subst,
403 : : 0, replace_relid_callback);
404 : : }
405 : : }
406 : :
407 : : /*
408 : : * Likewise remove references from PlaceHolderVar data structures,
409 : : * removing any no-longer-needed placeholders entirely. We remove PHV
410 : : * only for left-join removal. With self-join elimination, PHVs already
411 : : * get moved to the remaining relation, where they might still be needed.
412 : : * It might also happen that we skip the removal of some PHVs that could
413 : : * be removed. However, the overhead of extra PHVs is small compared to
414 : : * the complexity of analysis needed to remove them.
415 : : *
416 : : * Removal is a bit trickier than it might seem: we can remove PHVs that
417 : : * are used at the target rel and/or in the join qual, but not those that
418 : : * are used at join partner rels or above the join. It's not that easy to
419 : : * distinguish PHVs used at partner rels from those used in the join qual,
420 : : * since they will both have ph_needed sets that are subsets of
421 : : * joinrelids. However, a PHV used at a partner rel could not have the
422 : : * target rel in ph_eval_at, so we check that while deciding whether to
423 : : * remove or just update the PHV. There is no corresponding test in
424 : : * join_is_removable because it doesn't need to distinguish those cases.
425 : : */
2296 tgl@sss.pgh.pa.us 426 [ + + + + : 5734 : foreach(l, root->placeholder_list)
+ + ]
427 : : {
5692 428 : 110 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
429 : :
256 akorotkov@postgresql 430 [ + + - + ]: 110 : Assert(sjinfo == NULL || !bms_is_member(relid, phinfo->ph_lateral));
182 431 [ + + + + ]: 199 : if (sjinfo != NULL &&
432 [ + + ]: 104 : bms_is_subset(phinfo->ph_needed, joinrelids) &&
872 tgl@sss.pgh.pa.us 433 : 15 : bms_is_member(relid, phinfo->ph_eval_at) &&
182 akorotkov@postgresql 434 [ + + ]: 6 : !bms_is_member(sjinfo->ojrelid, phinfo->ph_eval_at))
435 : : {
436 : : /*
437 : : * This code shouldn't be executed if one relation is substituted
438 : : * with another: in this case, the placeholder may be employed in
439 : : * a filter inside the scan node the SJE removes.
440 : : */
2296 tgl@sss.pgh.pa.us 441 : 3 : root->placeholder_list = foreach_delete_current(root->placeholder_list,
442 : : l);
1167 443 : 3 : root->placeholder_array[phinfo->phid] = NULL;
444 : : }
445 : : else
446 : : {
992 447 : 107 : PlaceHolderVar *phv = phinfo->ph_var;
448 : :
256 akorotkov@postgresql 449 : 107 : phinfo->ph_eval_at = adjust_relid_set(phinfo->ph_eval_at, relid, subst);
246 450 [ + + ]: 107 : if (sjinfo != NULL)
451 : 86 : phinfo->ph_eval_at = adjust_relid_set(phinfo->ph_eval_at,
452 : 86 : sjinfo->ojrelid, subst);
992 tgl@sss.pgh.pa.us 453 [ - + ]: 107 : Assert(!bms_is_empty(phinfo->ph_eval_at)); /* checked previously */
454 : : /* Reduce ph_needed to contain only "relation 0"; see below */
395 455 [ + + ]: 107 : if (bms_is_member(0, phinfo->ph_needed))
456 : 52 : phinfo->ph_needed = bms_make_singleton(0);
457 : : else
458 : 55 : phinfo->ph_needed = NULL;
459 : :
256 akorotkov@postgresql 460 : 107 : phinfo->ph_lateral = adjust_relid_set(phinfo->ph_lateral, relid, subst);
461 : :
462 : : /*
463 : : * ph_lateral might contain rels mentioned in ph_eval_at after the
464 : : * replacement, remove them.
465 : : */
466 : 107 : phinfo->ph_lateral = bms_difference(phinfo->ph_lateral, phinfo->ph_eval_at);
467 : : /* ph_lateral might or might not be empty */
468 : :
469 : 107 : phv->phrels = adjust_relid_set(phv->phrels, relid, subst);
246 470 [ + + ]: 107 : if (sjinfo != NULL)
471 : 86 : phv->phrels = adjust_relid_set(phv->phrels,
472 : 86 : sjinfo->ojrelid, subst);
992 tgl@sss.pgh.pa.us 473 [ - + ]: 107 : Assert(!bms_is_empty(phv->phrels));
474 : :
173 akorotkov@postgresql 475 : 107 : ChangeVarNodesExtended((Node *) phv->phexpr, relid, subst, 0,
476 : : replace_relid_callback);
477 : :
992 tgl@sss.pgh.pa.us 478 [ - + ]: 107 : Assert(phv->phnullingrels == NULL); /* no need to adjust */
479 : : }
480 : : }
481 : :
482 : : /*
483 : : * Likewise remove references from EquivalenceClasses.
484 : : */
256 akorotkov@postgresql 485 [ + - + + : 29921 : foreach(l, root->eq_classes)
+ + ]
486 : : {
487 : 24297 : EquivalenceClass *ec = (EquivalenceClass *) lfirst(l);
488 : :
489 [ + + + + ]: 24297 : if (bms_is_member(relid, ec->ec_relids) ||
246 490 [ - + ]: 16275 : (sjinfo == NULL || bms_is_member(sjinfo->ojrelid, ec->ec_relids)))
491 : 8022 : remove_rel_from_eclass(ec, sjinfo, relid, subst);
492 : : }
493 : :
494 : : /*
495 : : * Finally, we must recompute per-Var attr_needed and per-PlaceHolderVar
496 : : * ph_needed relid sets. These have to be known accurately, else we may
497 : : * fail to remove other now-removable outer joins. And our removal of the
498 : : * join clause(s) for this outer join may mean that Vars that were
499 : : * formerly needed no longer are. So we have to do this honestly by
500 : : * repeating the construction of those relid sets. We can cheat to one
501 : : * small extent: we can avoid re-examining the targetlist and HAVING qual
502 : : * by preserving "relation 0" bits from the existing relid sets. This is
503 : : * safe because we'd never remove such references.
504 : : *
505 : : * So, start by removing all other bits from attr_needed sets and
506 : : * lateral_vars lists. (We already did this above for ph_needed.)
507 : : */
256 508 [ + + ]: 35919 : for (rti = 1; rti < root->simple_rel_array_size; rti++)
509 : : {
510 : 30295 : RelOptInfo *otherrel = root->simple_rel_array[rti];
511 : : int attroff;
512 : :
513 : : /* there may be empty slots corresponding to non-baserel RTEs */
514 [ + + ]: 30295 : if (otherrel == NULL)
515 : 14966 : continue;
516 : :
517 [ - + ]: 15329 : Assert(otherrel->relid == rti); /* sanity check on array */
518 : :
519 : 15329 : for (attroff = otherrel->max_attr - otherrel->min_attr;
520 [ + + ]: 342814 : attroff >= 0;
521 : 327485 : attroff--)
522 : : {
523 [ + + ]: 327485 : if (bms_is_member(0, otherrel->attr_needed[attroff]))
524 : 23064 : otherrel->attr_needed[attroff] = bms_make_singleton(0);
525 : : else
526 : 304421 : otherrel->attr_needed[attroff] = NULL;
527 : : }
528 : :
529 [ + + ]: 15329 : if (subst > 0)
173 530 : 748 : ChangeVarNodesExtended((Node *) otherrel->lateral_vars, relid,
531 : : subst, 0, replace_relid_callback);
532 : : }
256 533 : 5624 : }
534 : :
535 : : /*
536 : : * Remove the target relid and references to the target join from the
537 : : * planner's data structures, having determined that there is no need
538 : : * to include them in the query.
539 : : *
540 : : * We are not terribly thorough here. We only bother to update parts of
541 : : * the planner's data structures that will actually be consulted later.
542 : : */
543 : : static void
544 : 5324 : remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
545 : : SpecialJoinInfo *sjinfo)
546 : : {
547 : 5324 : RelOptInfo *rel = find_base_rel(root, relid);
548 : 5324 : int ojrelid = sjinfo->ojrelid;
549 : : Relids joinrelids;
550 : : Relids join_plus_commute;
551 : : List *joininfos;
552 : : ListCell *l;
553 : :
554 : : /* Compute the relid set for the join we are considering */
555 : 5324 : joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
556 [ - + ]: 5324 : Assert(ojrelid != 0);
557 : 5324 : joinrelids = bms_add_member(joinrelids, ojrelid);
558 : :
559 : 5324 : remove_rel_from_query(root, rel, -1, sjinfo, joinrelids);
560 : :
561 : : /*
562 : : * Remove any joinquals referencing the rel from the joininfo lists.
563 : : *
564 : : * In some cases, a joinqual has to be put back after deleting its
565 : : * reference to the target rel. This can occur for pseudoconstant and
566 : : * outerjoin-delayed quals, which can get marked as requiring the rel in
567 : : * order to force them to be evaluated at or above the join. We can't
568 : : * just discard them, though. Only quals that logically belonged to the
569 : : * outer join being discarded should be removed from the query.
570 : : *
571 : : * We might encounter a qual that is a clone of a deletable qual with some
572 : : * outer-join relids added (see deconstruct_distribute_oj_quals). To
573 : : * ensure we get rid of such clones as well, add the relids of all OJs
574 : : * commutable with this one to the set we test against for
575 : : * pushed-down-ness.
576 : : */
885 tgl@sss.pgh.pa.us 577 : 5324 : join_plus_commute = bms_union(joinrelids,
578 : 5324 : sjinfo->commute_above_r);
579 : 5324 : join_plus_commute = bms_add_members(join_plus_commute,
580 : 5324 : sjinfo->commute_below_l);
581 : :
582 : : /*
583 : : * We must make a copy of the rel's old joininfo list before starting the
584 : : * loop, because otherwise remove_join_clause_from_rels would destroy the
585 : : * list while we're scanning it.
586 : : */
5522 587 : 5324 : joininfos = list_copy(rel->joininfo);
588 [ + + + + : 10744 : foreach(l, joininfos)
+ + ]
589 : : {
590 : 5420 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
591 : :
592 : 5420 : remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
593 : :
885 594 [ + + + + ]: 5420 : if (RINFO_IS_PUSHED_DOWN(rinfo, join_plus_commute))
595 : : {
596 : : /*
597 : : * There might be references to relid or ojrelid in the
598 : : * RestrictInfo's relid sets, as a consequence of PHVs having had
599 : : * ph_eval_at sets that include those. We already checked above
600 : : * that any such PHV is safe (and updated its ph_eval_at), so we
601 : : * can just drop those references.
602 : : */
990 603 : 61 : remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
604 : :
605 : : /*
606 : : * Cross-check that the clause itself does not reference the
607 : : * target rel or join.
608 : : */
609 : : #ifdef USE_ASSERT_CHECKING
610 : : {
885 611 : 61 : Relids clause_varnos = pull_varnos(root,
612 : 61 : (Node *) rinfo->clause);
613 : :
614 [ - + ]: 61 : Assert(!bms_is_member(relid, clause_varnos));
615 [ - + ]: 61 : Assert(!bms_is_member(ojrelid, clause_varnos));
616 : : }
617 : : #endif
618 : : /* Now throw it back into the joininfo lists */
5522 619 : 61 : distribute_restrictinfo_to_rels(root, rinfo);
620 : : }
621 : : }
622 : :
623 : : /*
624 : : * There may be references to the rel in root->fkey_list, but if so,
625 : : * match_foreign_keys_to_quals() will get rid of them.
626 : : */
627 : :
628 : : /*
629 : : * Now remove the rel from the baserel array to prevent it from being
630 : : * referenced again. (We can't do this earlier because
631 : : * remove_join_clause_from_rels will touch it.)
632 : : */
987 633 : 5324 : root->simple_rel_array[relid] = NULL;
64 akorotkov@postgresql 634 : 5324 : root->simple_rte_array[relid] = NULL;
635 : :
636 : : /* And nuke the RelOptInfo, just in case there's another access path */
987 tgl@sss.pgh.pa.us 637 : 5324 : pfree(rel);
638 : :
639 : : /*
640 : : * Now repeat construction of attr_needed bits coming from all other
641 : : * sources.
642 : : */
395 643 : 5324 : rebuild_placeholder_attr_needed(root);
644 : 5324 : rebuild_joinclause_attr_needed(root);
645 : 5324 : rebuild_eclass_attr_needed(root);
646 : 5324 : rebuild_lateral_attr_needed(root);
5692 647 : 5324 : }
648 : :
649 : : /*
650 : : * Remove any references to relid or ojrelid from the RestrictInfo.
651 : : *
652 : : * We only bother to clean out bits in clause_relids and required_relids,
653 : : * not nullingrel bits in contained Vars and PHVs. (This might have to be
654 : : * improved sometime.) However, if the RestrictInfo contains an OR clause
655 : : * we have to also clean up the sub-clauses.
656 : : */
657 : : static void
990 658 : 2264 : remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid)
659 : : {
660 : : /*
661 : : * initsplan.c is fairly cavalier about allowing RestrictInfos to share
662 : : * relid sets with other RestrictInfos, and SpecialJoinInfos too. Make
663 : : * sure this RestrictInfo has its own relid sets before we modify them.
664 : : * (In present usage, clause_relids is probably not shared, but
665 : : * required_relids could be; let's not assume anything.)
666 : : */
667 : 2264 : rinfo->clause_relids = bms_copy(rinfo->clause_relids);
668 : 2264 : rinfo->clause_relids = bms_del_member(rinfo->clause_relids, relid);
669 : 2264 : rinfo->clause_relids = bms_del_member(rinfo->clause_relids, ojrelid);
670 : : /* Likewise for required_relids */
671 : 2264 : rinfo->required_relids = bms_copy(rinfo->required_relids);
672 : 2264 : rinfo->required_relids = bms_del_member(rinfo->required_relids, relid);
673 : 2264 : rinfo->required_relids = bms_del_member(rinfo->required_relids, ojrelid);
674 : :
675 : : /* If it's an OR, recurse to clean up sub-clauses */
676 [ + + ]: 2264 : if (restriction_is_or_clause(rinfo))
677 : : {
678 : : ListCell *lc;
679 : :
680 [ - + ]: 3 : Assert(is_orclause(rinfo->orclause));
681 [ + - + + : 9 : foreach(lc, ((BoolExpr *) rinfo->orclause)->args)
+ + ]
682 : : {
683 : 6 : Node *orarg = (Node *) lfirst(lc);
684 : :
685 : : /* OR arguments should be ANDs or sub-RestrictInfos */
686 [ - + ]: 6 : if (is_andclause(orarg))
687 : : {
990 tgl@sss.pgh.pa.us 688 :UBC 0 : List *andargs = ((BoolExpr *) orarg)->args;
689 : : ListCell *lc2;
690 : :
691 [ # # # # : 0 : foreach(lc2, andargs)
# # ]
692 : : {
693 : 0 : RestrictInfo *rinfo2 = lfirst_node(RestrictInfo, lc2);
694 : :
695 : 0 : remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
696 : : }
697 : : }
698 : : else
699 : : {
990 tgl@sss.pgh.pa.us 700 :CBC 6 : RestrictInfo *rinfo2 = castNode(RestrictInfo, orarg);
701 : :
702 : 6 : remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
703 : : }
704 : : }
705 : : }
706 : 2264 : }
707 : :
708 : : /*
709 : : * Remove any references to relid or sjinfo->ojrelid (if sjinfo != NULL)
710 : : * from the EquivalenceClass.
711 : : *
712 : : * Like remove_rel_from_restrictinfo, we don't worry about cleaning out
713 : : * any nullingrel bits in contained Vars and PHVs. (This might have to be
714 : : * improved sometime.) We do need to fix the EC and EM relid sets to ensure
715 : : * that implied join equalities will be generated at the appropriate join
716 : : * level(s).
717 : : */
718 : : static void
246 akorotkov@postgresql 719 : 8022 : remove_rel_from_eclass(EquivalenceClass *ec, SpecialJoinInfo *sjinfo,
720 : : int relid, int subst)
721 : : {
722 : : ListCell *lc;
723 : :
724 : : /* Fix up the EC's overall relids */
256 725 : 8022 : ec->ec_relids = adjust_relid_set(ec->ec_relids, relid, subst);
246 726 [ + + ]: 8022 : if (sjinfo != NULL)
727 : 7488 : ec->ec_relids = adjust_relid_set(ec->ec_relids,
728 : 7488 : sjinfo->ojrelid, subst);
729 : :
730 : : /*
731 : : * We don't expect any EC child members to exist at this point. Ensure
732 : : * that's the case, otherwise, we might be getting asked to do something
733 : : * this function hasn't been coded for.
734 : : */
202 drowley@postgresql.o 735 [ - + ]: 8022 : Assert(ec->ec_childmembers == NULL);
736 : :
737 : : /*
738 : : * Fix up the member expressions. Any non-const member that ends with
739 : : * empty em_relids must be a Var or PHV of the removed relation. We don't
740 : : * need it anymore, so we can drop it.
741 : : */
865 tgl@sss.pgh.pa.us 742 [ + + + + : 18632 : foreach(lc, ec->ec_members)
+ + ]
743 : : {
744 : 10610 : EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
745 : :
746 [ + + + + ]: 10610 : if (bms_is_member(relid, cur_em->em_relids) ||
246 akorotkov@postgresql 747 [ - + ]: 2197 : (sjinfo != NULL && bms_is_member(sjinfo->ojrelid,
748 : 2197 : cur_em->em_relids)))
749 : : {
865 tgl@sss.pgh.pa.us 750 [ - + ]: 7488 : Assert(!cur_em->em_is_const);
256 akorotkov@postgresql 751 : 7488 : cur_em->em_relids = adjust_relid_set(cur_em->em_relids, relid, subst);
246 752 [ + - ]: 7488 : if (sjinfo != NULL)
753 : 7488 : cur_em->em_relids = adjust_relid_set(cur_em->em_relids,
754 : 7488 : sjinfo->ojrelid, subst);
865 tgl@sss.pgh.pa.us 755 [ + + ]: 7488 : if (bms_is_empty(cur_em->em_relids))
756 : 7482 : ec->ec_members = foreach_delete_current(ec->ec_members, lc);
757 : : }
758 : : }
759 : :
760 : : /* Fix up the source clauses, in case we can re-use them later */
761 [ + + + + : 10747 : foreach(lc, ec->ec_sources)
+ + ]
762 : : {
763 : 2725 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
764 : :
246 akorotkov@postgresql 765 [ + + ]: 2725 : if (sjinfo == NULL)
173 766 : 528 : ChangeVarNodesExtended((Node *) rinfo, relid, subst, 0,
767 : : replace_relid_callback);
768 : : else
246 769 : 2197 : remove_rel_from_restrictinfo(rinfo, relid, sjinfo->ojrelid);
770 : : }
771 : :
772 : : /*
773 : : * Rather than expend code on fixing up any already-derived clauses, just
774 : : * drop them. (At this point, any such clauses would be base restriction
775 : : * clauses, which we'd not need anymore anyway.)
776 : : */
206 amitlan@postgresql.o 777 : 8022 : ec_clear_derived_clauses(ec);
865 tgl@sss.pgh.pa.us 778 : 8022 : }
779 : :
780 : : /*
781 : : * Remove any occurrences of the target relid from a joinlist structure.
782 : : *
783 : : * It's easiest to build a whole new list structure, so we handle it that
784 : : * way. Efficiency is not a big deal here.
785 : : *
786 : : * *nremoved is incremented by the number of occurrences removed (there
787 : : * should be exactly one, but the caller checks that).
788 : : */
789 : : static List *
5692 790 : 5756 : remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
791 : : {
792 : 5756 : List *result = NIL;
793 : : ListCell *jl;
794 : :
795 [ + - + + : 21217 : foreach(jl, joinlist)
+ + ]
796 : : {
797 : 15461 : Node *jlnode = (Node *) lfirst(jl);
798 : :
799 [ + + ]: 15461 : if (IsA(jlnode, RangeTblRef))
800 : : {
801 : 15329 : int varno = ((RangeTblRef *) jlnode)->rtindex;
802 : :
803 [ + + ]: 15329 : if (varno == relid)
804 : 5624 : (*nremoved)++;
805 : : else
806 : 9705 : result = lappend(result, jlnode);
807 : : }
808 [ + - ]: 132 : else if (IsA(jlnode, List))
809 : : {
810 : : /* Recurse to handle subproblem */
811 : : List *sublist;
812 : :
813 : 132 : sublist = remove_rel_from_joinlist((List *) jlnode,
814 : : relid, nremoved);
815 : : /* Avoid including empty sub-lists in the result */
816 [ + - ]: 132 : if (sublist)
817 : 132 : result = lappend(result, sublist);
818 : : }
819 : : else
820 : : {
5692 tgl@sss.pgh.pa.us 821 [ # # ]:UBC 0 : elog(ERROR, "unrecognized joinlist node type: %d",
822 : : (int) nodeTag(jlnode));
823 : : }
824 : : }
825 : :
5692 tgl@sss.pgh.pa.us 826 :CBC 5756 : return result;
827 : : }
828 : :
829 : :
830 : : /*
831 : : * reduce_unique_semijoins
832 : : * Check for semijoins that can be simplified to plain inner joins
833 : : * because the inner relation is provably unique for the join clauses.
834 : : *
835 : : * Ideally this would happen during reduce_outer_joins, but we don't have
836 : : * enough information at that point.
837 : : *
838 : : * To perform the strength reduction when applicable, we need only delete
839 : : * the semijoin's SpecialJoinInfo from root->join_info_list. (We don't
840 : : * bother fixing the join type attributed to it in the query jointree,
841 : : * since that won't be consulted again.)
842 : : */
843 : : void
3101 844 : 162951 : reduce_unique_semijoins(PlannerInfo *root)
845 : : {
846 : : ListCell *lc;
847 : :
848 : : /*
849 : : * Scan the join_info_list to find semijoins.
850 : : */
2296 851 [ + + + + : 184261 : foreach(lc, root->join_info_list)
+ + ]
852 : : {
3101 853 : 21310 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
854 : : int innerrelid;
855 : : RelOptInfo *innerrel;
856 : : Relids joinrelids;
857 : : List *restrictlist;
858 : :
859 : : /*
860 : : * Must be a semijoin to a single baserel, else we aren't going to be
861 : : * able to do anything with it.
862 : : */
1001 863 [ + + ]: 21310 : if (sjinfo->jointype != JOIN_SEMI)
3101 864 : 21119 : continue;
865 : :
866 [ + + ]: 2502 : if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
867 : 91 : continue;
868 : :
869 : 2411 : innerrel = find_base_rel(root, innerrelid);
870 : :
871 : : /*
872 : : * Before we trouble to run generate_join_implied_equalities, make a
873 : : * quick check to eliminate cases in which we will surely be unable to
874 : : * prove uniqueness of the innerrel.
875 : : */
876 [ + + ]: 2411 : if (!rel_supports_distinctness(root, innerrel))
877 : 513 : continue;
878 : :
879 : : /* Compute the relid set for the join we are considering */
880 : 1898 : joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
1001 881 [ - + ]: 1898 : Assert(sjinfo->ojrelid == 0); /* SEMI joins don't have RT indexes */
882 : :
883 : : /*
884 : : * Since we're only considering a single-rel RHS, any join clauses it
885 : : * has must be clauses linking it to the semijoin's min_lefthand. We
886 : : * can also consider EC-derived join clauses.
887 : : */
888 : : restrictlist =
3101 889 : 1898 : list_concat(generate_join_implied_equalities(root,
890 : : joinrelids,
891 : : sjinfo->min_lefthand,
892 : : innerrel,
893 : : NULL),
894 : 1898 : innerrel->joininfo);
895 : :
896 : : /* Test whether the innerrel is unique for those clauses. */
2747 897 [ + + ]: 1898 : if (!innerrel_is_unique(root,
898 : : joinrelids, sjinfo->min_lefthand, innerrel,
899 : : JOIN_SEMI, restrictlist, true))
3101 900 : 1707 : continue;
901 : :
902 : : /* OK, remove the SpecialJoinInfo from the list. */
2296 903 : 191 : root->join_info_list = foreach_delete_current(root->join_info_list, lc);
904 : : }
3101 905 : 162951 : }
906 : :
907 : :
908 : : /*
909 : : * rel_supports_distinctness
910 : : * Could the relation possibly be proven distinct on some set of columns?
911 : : *
912 : : * This is effectively a pre-checking function for rel_is_distinct_for().
913 : : * It must return true if rel_is_distinct_for() could possibly return true
914 : : * with this rel, but it should not expend a lot of cycles. The idea is
915 : : * that callers can avoid doing possibly-expensive processing to compute
916 : : * rel_is_distinct_for()'s argument lists if the call could not possibly
917 : : * succeed.
918 : : */
919 : : static bool
3490 920 : 343176 : rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
921 : : {
922 : : /* We only know about baserels ... */
923 [ + + ]: 343176 : if (rel->reloptkind != RELOPT_BASEREL)
924 : 125602 : return false;
925 [ + + ]: 217574 : if (rel->rtekind == RTE_RELATION)
926 : : {
927 : : /*
928 : : * For a plain relation, we only know how to prove uniqueness by
929 : : * reference to unique indexes. Make sure there's at least one
930 : : * suitable unique index. It must be immediately enforced, and not a
931 : : * partial index. (Keep these conditions in sync with
932 : : * relation_has_unique_index_for!)
933 : : */
934 : : ListCell *lc;
935 : :
936 [ + + + + : 270125 : foreach(lc, rel->indexlist)
+ + ]
937 : : {
938 : 236524 : IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
939 : :
861 drowley@postgresql.o 940 [ + + + - : 236524 : if (ind->unique && ind->immediate && ind->indpred == NIL)
+ + ]
3490 tgl@sss.pgh.pa.us 941 : 165827 : return true;
942 : : }
943 : : }
944 [ + + ]: 18146 : else if (rel->rtekind == RTE_SUBQUERY)
945 : : {
946 : 5268 : Query *subquery = root->simple_rte_array[rel->relid]->subquery;
947 : :
948 : : /* Check if the subquery has any qualities that support distinctness */
949 [ + + ]: 5268 : if (query_supports_distinctness(subquery))
950 : 4360 : return true;
951 : : }
952 : : /* We have no proof rules for any other rtekinds. */
953 : 47387 : return false;
954 : : }
955 : :
956 : : /*
957 : : * rel_is_distinct_for
958 : : * Does the relation return only distinct rows according to clause_list?
959 : : *
960 : : * clause_list is a list of join restriction clauses involving this rel and
961 : : * some other one. Return true if no two rows emitted by this rel could
962 : : * possibly join to the same row of the other rel.
963 : : *
964 : : * The caller must have already determined that each condition is a
965 : : * mergejoinable equality with an expression in this relation on one side, and
966 : : * an expression not involving this relation on the other. The transient
967 : : * outer_is_left flag is used to identify which side references this relation:
968 : : * left side if outer_is_left is false, right side if it is true.
969 : : *
970 : : * Note that the passed-in clause_list may be destructively modified! This
971 : : * is OK for current uses, because the clause_list is built by the caller for
972 : : * the sole purpose of passing to this function.
973 : : *
974 : : * (*extra_clauses) to be set to the right sides of baserestrictinfo clauses,
975 : : * looking like "x = const" if distinctness is derived from such clauses, not
976 : : * joininfo clauses. Pass NULL to the extra_clauses if this value is not
977 : : * needed.
978 : : */
979 : : static bool
256 akorotkov@postgresql 980 : 111411 : rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list,
981 : : List **extra_clauses)
982 : : {
983 : : /*
984 : : * We could skip a couple of tests here if we assume all callers checked
985 : : * rel_supports_distinctness first, but it doesn't seem worth taking any
986 : : * risk for.
987 : : */
3490 tgl@sss.pgh.pa.us 988 [ - + ]: 111411 : if (rel->reloptkind != RELOPT_BASEREL)
3490 tgl@sss.pgh.pa.us 989 :UBC 0 : return false;
3490 tgl@sss.pgh.pa.us 990 [ + + ]:CBC 111411 : if (rel->rtekind == RTE_RELATION)
991 : : {
992 : : /*
993 : : * Examine the indexes to see if we have a matching unique index.
994 : : * relation_has_unique_index_for automatically adds any usable
995 : : * restriction clauses for the rel, so we needn't do that here.
996 : : */
69 rguo@postgresql.org 997 [ + + ]:GNC 108899 : if (relation_has_unique_index_for(root, rel, clause_list, extra_clauses))
3490 tgl@sss.pgh.pa.us 998 :CBC 61620 : return true;
999 : : }
1000 [ + - ]: 2512 : else if (rel->rtekind == RTE_SUBQUERY)
1001 : : {
1002 : 2512 : Index relid = rel->relid;
1003 : 2512 : Query *subquery = root->simple_rte_array[relid]->subquery;
1004 : 2512 : List *colnos = NIL;
1005 : 2512 : List *opids = NIL;
1006 : : ListCell *l;
1007 : :
1008 : : /*
1009 : : * Build the argument lists for query_is_distinct_for: a list of
1010 : : * output column numbers that the query needs to be distinct over, and
1011 : : * a list of equality operators that the output columns need to be
1012 : : * distinct according to.
1013 : : *
1014 : : * (XXX we are not considering restriction clauses attached to the
1015 : : * subquery; is that worth doing?)
1016 : : */
1017 [ + + + + : 4934 : foreach(l, clause_list)
+ + ]
1018 : : {
3122 1019 : 2422 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
1020 : : Oid op;
1021 : : Var *var;
1022 : :
1023 : : /*
1024 : : * Get the equality operator we need uniqueness according to.
1025 : : * (This might be a cross-type operator and thus not exactly the
1026 : : * same operator the subquery would consider; that's all right
1027 : : * since query_is_distinct_for can resolve such cases.) The
1028 : : * caller's mergejoinability test should have selected only
1029 : : * OpExprs.
1030 : : */
3170 peter_e@gmx.net 1031 : 2422 : op = castNode(OpExpr, rinfo->clause)->opno;
1032 : :
1033 : : /* caller identified the inner side for us */
3490 tgl@sss.pgh.pa.us 1034 [ + + ]: 2422 : if (rinfo->outer_is_left)
1035 : 2247 : var = (Var *) get_rightop(rinfo->clause);
1036 : : else
1037 : 175 : var = (Var *) get_leftop(rinfo->clause);
1038 : :
1039 : : /*
1040 : : * We may ignore any RelabelType node above the operand. (There
1041 : : * won't be more than one, since eval_const_expressions() has been
1042 : : * applied already.)
1043 : : */
2962 1044 [ + - + + ]: 2422 : if (var && IsA(var, RelabelType))
1045 : 1579 : var = (Var *) ((RelabelType *) var)->arg;
1046 : :
1047 : : /*
1048 : : * If inner side isn't a Var referencing a subquery output column,
1049 : : * this clause doesn't help us.
1050 : : */
3490 1051 [ + - + + ]: 2422 : if (!var || !IsA(var, Var) ||
1052 [ + - - + ]: 2416 : var->varno != relid || var->varlevelsup != 0)
1053 : 6 : continue;
1054 : :
1055 : 2416 : colnos = lappend_int(colnos, var->varattno);
1056 : 2416 : opids = lappend_oid(opids, op);
1057 : : }
1058 : :
1059 [ + + ]: 2512 : if (query_is_distinct_for(subquery, colnos, opids))
1060 : 220 : return true;
1061 : : }
1062 : 49571 : return false;
1063 : : }
1064 : :
1065 : :
1066 : : /*
1067 : : * query_supports_distinctness - could the query possibly be proven distinct
1068 : : * on some set of output columns?
1069 : : *
1070 : : * This is effectively a pre-checking function for query_is_distinct_for().
1071 : : * It must return true if query_is_distinct_for() could possibly return true
1072 : : * with this query, but it should not expend a lot of cycles. The idea is
1073 : : * that callers can avoid doing possibly-expensive processing to compute
1074 : : * query_is_distinct_for()'s argument lists if the call could not possibly
1075 : : * succeed.
1076 : : */
1077 : : bool
4122 1078 : 5268 : query_supports_distinctness(Query *query)
1079 : : {
1080 : : /* SRFs break distinctness except with DISTINCT, see below */
2893 1081 [ + + + - ]: 5268 : if (query->hasTargetSRFs && query->distinctClause == NIL)
3331 1082 : 508 : return false;
1083 : :
1084 : : /* check for features we can prove distinctness with */
4122 1085 [ + + ]: 4760 : if (query->distinctClause != NIL ||
1086 [ + + ]: 4685 : query->groupClause != NIL ||
3817 andres@anarazel.de 1087 [ + - ]: 4592 : query->groupingSets != NIL ||
4122 tgl@sss.pgh.pa.us 1088 [ + + ]: 4592 : query->hasAggs ||
1089 [ + - ]: 4384 : query->havingQual ||
1090 [ + + ]: 4384 : query->setOperations)
1091 : 4360 : return true;
1092 : :
1093 : 400 : return false;
1094 : : }
1095 : :
1096 : : /*
1097 : : * query_is_distinct_for - does query never return duplicates of the
1098 : : * specified columns?
1099 : : *
1100 : : * query is a not-yet-planned subquery (in current usage, it's always from
1101 : : * a subquery RTE, which the planner avoids scribbling on).
1102 : : *
1103 : : * colnos is an integer list of output column numbers (resno's). We are
1104 : : * interested in whether rows consisting of just these columns are certain
1105 : : * to be distinct. "Distinctness" is defined according to whether the
1106 : : * corresponding upper-level equality operators listed in opids would think
1107 : : * the values are distinct. (Note: the opids entries could be cross-type
1108 : : * operators, and thus not exactly the equality operators that the subquery
1109 : : * would use itself. We use equality_ops_are_compatible() to check
1110 : : * compatibility. That looks at opfamily membership for index AMs that have
1111 : : * declared that they support consistent equality semantics within an
1112 : : * opfamily, and so should give trustworthy answers for all operators that we
1113 : : * might need to deal with here.)
1114 : : */
1115 : : bool
1116 : 2512 : query_is_distinct_for(Query *query, List *colnos, List *opids)
1117 : : {
1118 : : ListCell *l;
1119 : : Oid opid;
1120 : :
1121 [ - + ]: 2512 : Assert(list_length(colnos) == list_length(opids));
1122 : :
1123 : : /*
1124 : : * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
1125 : : * columns in the DISTINCT clause appear in colnos and operator semantics
1126 : : * match. This is true even if there are SRFs in the DISTINCT columns or
1127 : : * elsewhere in the tlist.
1128 : : */
1129 [ + + ]: 2512 : if (query->distinctClause)
1130 : : {
1131 [ + - + + : 84 : foreach(l, query->distinctClause)
+ + ]
1132 : : {
1133 : 63 : SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
1134 : 63 : TargetEntry *tle = get_sortgroupclause_tle(sgc,
1135 : : query->targetList);
1136 : :
1137 : 63 : opid = distinct_col_search(tle->resno, colnos, opids);
1138 [ + + ]: 63 : if (!OidIsValid(opid) ||
1139 [ + - ]: 30 : !equality_ops_are_compatible(opid, sgc->eqop))
1140 : : break; /* exit early if no match */
1141 : : }
1142 [ + + ]: 54 : if (l == NULL) /* had matches for all? */
1143 : 21 : return true;
1144 : : }
1145 : :
1146 : : /*
1147 : : * Otherwise, a set-returning function in the query's targetlist can
1148 : : * result in returning duplicate rows, despite any grouping that might
1149 : : * occur before tlist evaluation. (If all tlist SRFs are within GROUP BY
1150 : : * columns, it would be safe because they'd be expanded before grouping.
1151 : : * But it doesn't currently seem worth the effort to check for that.)
1152 : : */
2893 1153 [ - + ]: 2491 : if (query->hasTargetSRFs)
2893 tgl@sss.pgh.pa.us 1154 :UBC 0 : return false;
1155 : :
1156 : : /*
1157 : : * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
1158 : : * the grouped columns appear in colnos and operator semantics match.
1159 : : */
3817 andres@anarazel.de 1160 [ + + + - ]:CBC 2491 : if (query->groupClause && !query->groupingSets)
1161 : : {
4122 tgl@sss.pgh.pa.us 1162 [ + - + + : 114 : foreach(l, query->groupClause)
+ + ]
1163 : : {
1164 : 79 : SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
1165 : 79 : TargetEntry *tle = get_sortgroupclause_tle(sgc,
1166 : : query->targetList);
1167 : :
1168 : 79 : opid = distinct_col_search(tle->resno, colnos, opids);
1169 [ + + ]: 79 : if (!OidIsValid(opid) ||
1170 [ + - ]: 56 : !equality_ops_are_compatible(opid, sgc->eqop))
1171 : : break; /* exit early if no match */
1172 : : }
1173 [ + + ]: 58 : if (l == NULL) /* had matches for all? */
1174 : 35 : return true;
1175 : : }
3817 andres@anarazel.de 1176 [ - + ]: 2433 : else if (query->groupingSets)
1177 : : {
1178 : : /*
1179 : : * If we have grouping sets with expressions, we probably don't have
1180 : : * uniqueness and analysis would be hard. Punt.
1181 : : */
3817 andres@anarazel.de 1182 [ # # ]:UBC 0 : if (query->groupClause)
1183 : 0 : return false;
1184 : :
1185 : : /*
1186 : : * If we have no groupClause (therefore no grouping expressions), we
1187 : : * might have one or many empty grouping sets. If there's just one,
1188 : : * then we're returning only one row and are certainly unique. But
1189 : : * otherwise, we know we're certainly not unique.
1190 : : */
1191 [ # # ]: 0 : if (list_length(query->groupingSets) == 1 &&
3810 bruce@momjian.us 1192 [ # # ]: 0 : ((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY)
3817 andres@anarazel.de 1193 : 0 : return true;
1194 : : else
1195 : 0 : return false;
1196 : : }
1197 : : else
1198 : : {
1199 : : /*
1200 : : * If we have no GROUP BY, but do have aggregates or HAVING, then the
1201 : : * result is at most one row so it's surely unique, for any operators.
1202 : : */
4122 tgl@sss.pgh.pa.us 1203 [ + + - + ]:CBC 2433 : if (query->hasAggs || query->havingQual)
1204 : 122 : return true;
1205 : : }
1206 : :
1207 : : /*
1208 : : * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
1209 : : * except with ALL.
1210 : : */
1211 [ + + ]: 2334 : if (query->setOperations)
1212 : : {
3170 peter_e@gmx.net 1213 : 2278 : SetOperationStmt *topop = castNode(SetOperationStmt, query->setOperations);
1214 : :
4122 tgl@sss.pgh.pa.us 1215 [ - + ]: 2278 : Assert(topop->op != SETOP_NONE);
1216 : :
1217 [ + + ]: 2278 : if (!topop->all)
1218 : : {
1219 : : ListCell *lg;
1220 : :
1221 : : /* We're good if all the nonjunk output columns are in colnos */
1222 : 69 : lg = list_head(topop->groupClauses);
1223 [ + - + + : 114 : foreach(l, query->targetList)
+ + ]
1224 : : {
1225 : 72 : TargetEntry *tle = (TargetEntry *) lfirst(l);
1226 : : SortGroupClause *sgc;
1227 : :
1228 [ - + ]: 72 : if (tle->resjunk)
4122 tgl@sss.pgh.pa.us 1229 :UBC 0 : continue; /* ignore resjunk columns */
1230 : :
1231 : : /* non-resjunk columns should have grouping clauses */
4122 tgl@sss.pgh.pa.us 1232 [ - + ]:CBC 72 : Assert(lg != NULL);
1233 : 72 : sgc = (SortGroupClause *) lfirst(lg);
2296 1234 : 72 : lg = lnext(topop->groupClauses, lg);
1235 : :
4122 1236 : 72 : opid = distinct_col_search(tle->resno, colnos, opids);
1237 [ + + ]: 72 : if (!OidIsValid(opid) ||
1238 [ + - ]: 45 : !equality_ops_are_compatible(opid, sgc->eqop))
1239 : : break; /* exit early if no match */
1240 : : }
1241 [ + + ]: 69 : if (l == NULL) /* had matches for all? */
1242 : 42 : return true;
1243 : : }
1244 : : }
1245 : :
1246 : : /*
1247 : : * XXX Are there any other cases in which we can easily see the result
1248 : : * must be distinct?
1249 : : *
1250 : : * If you do add more smarts to this function, be sure to update
1251 : : * query_supports_distinctness() to match.
1252 : : */
1253 : :
1254 : 2292 : return false;
1255 : : }
1256 : :
1257 : : /*
1258 : : * distinct_col_search - subroutine for query_is_distinct_for
1259 : : *
1260 : : * If colno is in colnos, return the corresponding element of opids,
1261 : : * else return InvalidOid. (Ordinarily colnos would not contain duplicates,
1262 : : * but if it does, we arbitrarily select the first match.)
1263 : : */
1264 : : static Oid
1265 : 214 : distinct_col_search(int colno, List *colnos, List *opids)
1266 : : {
1267 : : ListCell *lc1,
1268 : : *lc2;
1269 : :
1270 [ + - + + : 311 : forboth(lc1, colnos, lc2, opids)
+ - + + +
+ + - +
+ ]
1271 : : {
1272 [ + + ]: 228 : if (colno == lfirst_int(lc1))
1273 : 131 : return lfirst_oid(lc2);
1274 : : }
1275 : 83 : return InvalidOid;
1276 : : }
1277 : :
1278 : :
1279 : : /*
1280 : : * innerrel_is_unique
1281 : : * Check if the innerrel provably contains at most one tuple matching any
1282 : : * tuple from the outerrel, based on join clauses in the 'restrictlist'.
1283 : : *
1284 : : * We need an actual RelOptInfo for the innerrel, but it's sufficient to
1285 : : * identify the outerrel by its Relids. This asymmetry supports use of this
1286 : : * function before joinrels have been built. (The caller is expected to
1287 : : * also supply the joinrelids, just to save recalculating that.)
1288 : : *
1289 : : * The proof must be made based only on clauses that will be "joinquals"
1290 : : * rather than "otherquals" at execution. For an inner join there's no
1291 : : * difference; but if the join is outer, we must ignore pushed-down quals,
1292 : : * as those will become "otherquals". Note that this means the answer might
1293 : : * vary depending on whether IS_OUTER_JOIN(jointype); since we cache the
1294 : : * answer without regard to that, callers must take care not to call this
1295 : : * with jointypes that would be classified differently by IS_OUTER_JOIN().
1296 : : *
1297 : : * The actual proof is undertaken by is_innerrel_unique_for(); this function
1298 : : * is a frontend that is mainly concerned with caching the answers.
1299 : : * In particular, the force_cache argument allows overriding the internal
1300 : : * heuristic about whether to cache negative answers; it should be "true"
1301 : : * if making an inquiry that is not part of the normal bottom-up join search
1302 : : * sequence.
1303 : : */
1304 : : bool
3125 1305 : 371508 : innerrel_is_unique(PlannerInfo *root,
1306 : : Relids joinrelids,
1307 : : Relids outerrelids,
1308 : : RelOptInfo *innerrel,
1309 : : JoinType jointype,
1310 : : List *restrictlist,
1311 : : bool force_cache)
1312 : : {
256 akorotkov@postgresql 1313 : 371508 : return innerrel_is_unique_ext(root, joinrelids, outerrelids, innerrel,
1314 : : jointype, restrictlist, force_cache, NULL);
1315 : : }
1316 : :
1317 : : /*
1318 : : * innerrel_is_unique_ext
1319 : : * Do the same as innerrel_is_unique(), but also set to (*extra_clauses)
1320 : : * additional clauses from a baserestrictinfo list used to prove the
1321 : : * uniqueness.
1322 : : *
1323 : : * A non-NULL extra_clauses indicates that we're checking for self-join and
1324 : : * correspondingly dealing with filtered clauses.
1325 : : */
1326 : : bool
1327 : 372529 : innerrel_is_unique_ext(PlannerInfo *root,
1328 : : Relids joinrelids,
1329 : : Relids outerrelids,
1330 : : RelOptInfo *innerrel,
1331 : : JoinType jointype,
1332 : : List *restrictlist,
1333 : : bool force_cache,
1334 : : List **extra_clauses)
1335 : : {
1336 : : MemoryContext old_context;
1337 : : ListCell *lc;
1338 : : UniqueRelInfo *uniqueRelInfo;
1339 : 372529 : List *outer_exprs = NIL;
1340 : 372529 : bool self_join = (extra_clauses != NULL);
1341 : :
1342 : : /* Certainly can't prove uniqueness when there are no joinclauses */
3125 tgl@sss.pgh.pa.us 1343 [ + + ]: 372529 : if (restrictlist == NIL)
1344 : 52606 : return false;
1345 : :
1346 : : /*
1347 : : * Make a quick check to eliminate cases in which we will surely be unable
1348 : : * to prove uniqueness of the innerrel.
1349 : : */
1350 [ + + ]: 319923 : if (!rel_supports_distinctness(root, innerrel))
1351 : 170937 : return false;
1352 : :
1353 : : /*
1354 : : * Query the cache to see if we've managed to prove that innerrel is
1355 : : * unique for any subset of this outerrel. For non-self-join search, we
1356 : : * don't need an exact match, as extra outerrels can't make the innerrel
1357 : : * any less unique (or more formally, the restrictlist for a join to a
1358 : : * superset outerrel must be a superset of the conditions we successfully
1359 : : * used before). For self-join search, we require an exact match of
1360 : : * outerrels because we need extra clauses to be valid for our case. Also,
1361 : : * for self-join checking we've filtered the clauses list. Thus, we can
1362 : : * match only the result cached for a self-join search for another
1363 : : * self-join check.
1364 : : */
1365 [ + + + + : 164847 : foreach(lc, innerrel->unique_for_rels)
+ + ]
1366 : : {
256 akorotkov@postgresql 1367 : 58647 : uniqueRelInfo = (UniqueRelInfo *) lfirst(lc);
1368 : :
1369 [ + + + + : 58647 : if ((!self_join && bms_is_subset(uniqueRelInfo->outerrelids, outerrelids)) ||
+ + ]
1370 [ + + ]: 37 : (self_join && bms_equal(uniqueRelInfo->outerrelids, outerrelids) &&
1371 [ + + ]: 28 : uniqueRelInfo->self_join))
1372 : : {
1373 [ + + ]: 42786 : if (extra_clauses)
1374 : 6 : *extra_clauses = uniqueRelInfo->extra_clauses;
3125 tgl@sss.pgh.pa.us 1375 : 42786 : return true; /* Success! */
1376 : : }
1377 : : }
1378 : :
1379 : : /*
1380 : : * Conversely, we may have already determined that this outerrel, or some
1381 : : * superset thereof, cannot prove this innerrel to be unique.
1382 : : */
1383 [ + + + + : 106478 : foreach(lc, innerrel->non_unique_for_rels)
+ + ]
1384 : : {
1385 : 458 : Relids unique_for_rels = (Relids) lfirst(lc);
1386 : :
3101 1387 [ + + ]: 458 : if (bms_is_subset(outerrelids, unique_for_rels))
3125 1388 : 180 : return false;
1389 : : }
1390 : :
1391 : : /* No cached information, so try to make the proof. */
2747 1392 [ + + + + ]: 106020 : if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel,
1393 : : jointype, restrictlist,
1394 : : self_join ? &outer_exprs : NULL))
1395 : : {
1396 : : /*
1397 : : * Cache the positive result for future probes, being sure to keep it
1398 : : * in the planner_cxt even if we are working in GEQO.
1399 : : *
1400 : : * Note: one might consider trying to isolate the minimal subset of
1401 : : * the outerrels that proved the innerrel unique. But it's not worth
1402 : : * the trouble, because the planner builds up joinrels incrementally
1403 : : * and so we'll see the minimally sufficient outerrels before any
1404 : : * supersets of them anyway.
1405 : : */
3125 1406 : 56516 : old_context = MemoryContextSwitchTo(root->planner_cxt);
256 akorotkov@postgresql 1407 : 56516 : uniqueRelInfo = makeNode(UniqueRelInfo);
1408 : 56516 : uniqueRelInfo->outerrelids = bms_copy(outerrelids);
1409 : 56516 : uniqueRelInfo->self_join = self_join;
1410 : 56516 : uniqueRelInfo->extra_clauses = outer_exprs;
3125 tgl@sss.pgh.pa.us 1411 : 56516 : innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
1412 : : uniqueRelInfo);
1413 : 56516 : MemoryContextSwitchTo(old_context);
1414 : :
256 akorotkov@postgresql 1415 [ + + ]: 56516 : if (extra_clauses)
1416 : 327 : *extra_clauses = outer_exprs;
3125 tgl@sss.pgh.pa.us 1417 : 56516 : return true; /* Success! */
1418 : : }
1419 : : else
1420 : : {
1421 : : /*
1422 : : * None of the join conditions for outerrel proved innerrel unique, so
1423 : : * we can safely reject this outerrel or any subset of it in future
1424 : : * checks.
1425 : : *
1426 : : * However, in normal planning mode, caching this knowledge is totally
1427 : : * pointless; it won't be queried again, because we build up joinrels
1428 : : * from smaller to larger. It's only useful when using GEQO or
1429 : : * another planner extension that attempts planning multiple times.
1430 : : *
1431 : : * Also, allow callers to override that heuristic and force caching;
1432 : : * that's useful for reduce_unique_semijoins, which calls here before
1433 : : * the normal join search starts.
1434 : : */
68 rhaas@postgresql.org 1435 [ + + - + ]:GNC 49504 : if (force_cache || root->assumeReplanning)
1436 : : {
3125 tgl@sss.pgh.pa.us 1437 :CBC 1887 : old_context = MemoryContextSwitchTo(root->planner_cxt);
1438 : 1887 : innerrel->non_unique_for_rels =
1439 : 1887 : lappend(innerrel->non_unique_for_rels,
3101 1440 : 1887 : bms_copy(outerrelids));
3125 1441 : 1887 : MemoryContextSwitchTo(old_context);
1442 : : }
1443 : :
1444 : 49504 : return false;
1445 : : }
1446 : : }
1447 : :
1448 : : /*
1449 : : * is_innerrel_unique_for
1450 : : * Check if the innerrel provably contains at most one tuple matching any
1451 : : * tuple from the outerrel, based on join clauses in the 'restrictlist'.
1452 : : */
1453 : : static bool
1454 : 106020 : is_innerrel_unique_for(PlannerInfo *root,
1455 : : Relids joinrelids,
1456 : : Relids outerrelids,
1457 : : RelOptInfo *innerrel,
1458 : : JoinType jointype,
1459 : : List *restrictlist,
1460 : : List **extra_clauses)
1461 : : {
1462 : 106020 : List *clause_list = NIL;
1463 : : ListCell *lc;
1464 : :
1465 : : /*
1466 : : * Search for mergejoinable clauses that constrain the inner rel against
1467 : : * the outer rel. If an operator is mergejoinable then it behaves like
1468 : : * equality for some btree opclass, so it's what we want. The
1469 : : * mergejoinability test also eliminates clauses containing volatile
1470 : : * functions, which we couldn't depend on.
1471 : : */
1472 [ + - + + : 237992 : foreach(lc, restrictlist)
+ + ]
1473 : : {
1474 : 131972 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
1475 : :
1476 : : /*
1477 : : * As noted above, if it's a pushed-down clause and we're at an outer
1478 : : * join, we can't use it.
1479 : : */
2747 1480 [ + + ]: 131972 : if (IS_OUTER_JOIN(jointype) &&
1481 [ + + - + ]: 53513 : RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
3125 1482 : 2162 : continue;
1483 : :
1484 : : /* Ignore if it's not a mergejoinable clause */
1485 [ + + ]: 129810 : if (!restrictinfo->can_join ||
1486 [ + + ]: 121570 : restrictinfo->mergeopfamilies == NIL)
1487 : 8731 : continue; /* not mergejoinable */
1488 : :
1489 : : /*
1490 : : * Check if the clause has the form "outer op inner" or "inner op
1491 : : * outer", and if so mark which side is inner.
1492 : : */
3101 1493 [ + + ]: 121079 : if (!clause_sides_match_join(restrictinfo, outerrelids,
1494 : : innerrel->relids))
3125 1495 : 20 : continue; /* no good for these input relations */
1496 : :
1497 : : /* OK, add to the list */
1498 : 121059 : clause_list = lappend(clause_list, restrictinfo);
1499 : : }
1500 : :
1501 : : /* Let rel_is_distinct_for() do the hard work */
256 akorotkov@postgresql 1502 : 106020 : return rel_is_distinct_for(root, innerrel, clause_list, extra_clauses);
1503 : : }
1504 : :
1505 : : /*
1506 : : * Update EC members to point to the remaining relation instead of the removed
1507 : : * one, removing duplicates.
1508 : : *
1509 : : * Restriction clauses for base relations are already distributed to
1510 : : * the respective baserestrictinfo lists (see
1511 : : * generate_implied_equalities_for_column). The above code has already processed
1512 : : * this list and updated these clauses to reference the remaining
1513 : : * relation, so that we can skip them here based on their relids.
1514 : : *
1515 : : * Likewise, we have already processed the join clauses that join the
1516 : : * removed relation to the remaining one.
1517 : : *
1518 : : * Finally, there might be join clauses tying the removed relation to
1519 : : * some third relation. We can't just delete the source clauses and
1520 : : * regenerate them from the EC because the corresponding equality
1521 : : * operators might be missing (see the handling of ec_broken).
1522 : : * Therefore, we will update the references in the source clauses.
1523 : : *
1524 : : * Derived clauses can be generated again, so it is simpler just to
1525 : : * delete them.
1526 : : */
1527 : : static void
1528 : 462 : update_eclasses(EquivalenceClass *ec, int from, int to)
1529 : : {
1530 : 462 : List *new_members = NIL;
1531 : 462 : List *new_sources = NIL;
1532 : :
1533 : : /*
1534 : : * We don't expect any EC child members to exist at this point. Ensure
1535 : : * that's the case, otherwise, we might be getting asked to do something
1536 : : * this function hasn't been coded for.
1537 : : */
202 drowley@postgresql.o 1538 [ - + ]: 462 : Assert(ec->ec_childmembers == NULL);
1539 : :
256 akorotkov@postgresql 1540 [ + - + + : 1863 : foreach_node(EquivalenceMember, em, ec->ec_members)
+ + ]
1541 : : {
1542 : 939 : bool is_redundant = false;
1543 : :
1544 [ + + ]: 939 : if (!bms_is_member(from, em->em_relids))
1545 : : {
1546 : 468 : new_members = lappend(new_members, em);
1547 : 468 : continue;
1548 : : }
1549 : :
1550 : 471 : em->em_relids = adjust_relid_set(em->em_relids, from, to);
1551 : 471 : em->em_jdomain->jd_relids = adjust_relid_set(em->em_jdomain->jd_relids, from, to);
1552 : :
1553 : : /* We only process inner joins */
173 1554 : 471 : ChangeVarNodesExtended((Node *) em->em_expr, from, to, 0,
1555 : : replace_relid_callback);
1556 : :
256 1557 [ + + + + : 954 : foreach_node(EquivalenceMember, other, new_members)
+ + ]
1558 : : {
1559 [ + + ]: 152 : if (!equal(em->em_relids, other->em_relids))
1560 : 12 : continue;
1561 : :
1562 [ + - ]: 140 : if (equal(em->em_expr, other->em_expr))
1563 : : {
1564 : 140 : is_redundant = true;
1565 : 140 : break;
1566 : : }
1567 : : }
1568 : :
1569 [ + + ]: 471 : if (!is_redundant)
1570 : 331 : new_members = lappend(new_members, em);
1571 : : }
1572 : :
1573 : 462 : list_free(ec->ec_members);
1574 : 462 : ec->ec_members = new_members;
1575 : :
206 amitlan@postgresql.o 1576 : 462 : ec_clear_derived_clauses(ec);
1577 : :
1578 : : /* Update EC source expressions */
256 akorotkov@postgresql 1579 [ + + + + : 1401 : foreach_node(RestrictInfo, rinfo, ec->ec_sources)
+ + ]
1580 : : {
1581 : 477 : bool is_redundant = false;
1582 : :
1583 [ + + ]: 477 : if (!bms_is_member(from, rinfo->required_relids))
1584 : : {
1585 : 59 : new_sources = lappend(new_sources, rinfo);
1586 : 59 : continue;
1587 : : }
1588 : :
173 1589 : 418 : ChangeVarNodesExtended((Node *) rinfo, from, to, 0,
1590 : : replace_relid_callback);
1591 : :
1592 : : /*
1593 : : * After switching the clause to the remaining relation, check it for
1594 : : * redundancy with existing ones. We don't have to check for
1595 : : * redundancy with derived clauses, because we've just deleted them.
1596 : : */
256 1597 [ + + + + : 848 : foreach_node(RestrictInfo, other, new_sources)
+ + ]
1598 : : {
1599 [ + + ]: 18 : if (!equal(rinfo->clause_relids, other->clause_relids))
1600 : 12 : continue;
1601 : :
1602 [ + - ]: 6 : if (equal(rinfo->clause, other->clause))
1603 : : {
1604 : 6 : is_redundant = true;
1605 : 6 : break;
1606 : : }
1607 : : }
1608 : :
1609 [ + + ]: 418 : if (!is_redundant)
1610 : 412 : new_sources = lappend(new_sources, rinfo);
1611 : : }
1612 : :
1613 : 462 : list_free(ec->ec_sources);
1614 : 462 : ec->ec_sources = new_sources;
1615 : 462 : ec->ec_relids = adjust_relid_set(ec->ec_relids, from, to);
1616 : 462 : }
1617 : :
1618 : : /*
1619 : : * "Logically" compares two RestrictInfo's ignoring the 'rinfo_serial' field,
1620 : : * which makes almost every RestrictInfo unique. This type of comparison is
1621 : : * useful when removing duplicates while moving RestrictInfo's from removed
1622 : : * relation to remaining relation during self-join elimination.
1623 : : *
1624 : : * XXX: In the future, we might remove the 'rinfo_serial' field completely and
1625 : : * get rid of this function.
1626 : : */
1627 : : static bool
1628 : 256 : restrict_infos_logically_equal(RestrictInfo *a, RestrictInfo *b)
1629 : : {
1630 : 256 : int saved_rinfo_serial = a->rinfo_serial;
1631 : : bool result;
1632 : :
1633 : 256 : a->rinfo_serial = b->rinfo_serial;
1634 : 256 : result = equal(a, b);
1635 : 256 : a->rinfo_serial = saved_rinfo_serial;
1636 : :
1637 : 256 : return result;
1638 : : }
1639 : :
1640 : : /*
1641 : : * This function adds all non-redundant clauses to the keeping relation
1642 : : * during self-join elimination. That is a contradictory operation. On the
1643 : : * one hand, we reduce the length of the `restrict` lists, which can
1644 : : * impact planning or executing time. Additionally, we improve the
1645 : : * accuracy of cardinality estimation. On the other hand, it is one more
1646 : : * place that can make planning time much longer in specific cases. It
1647 : : * would have been better to avoid calling the equal() function here, but
1648 : : * it's the only way to detect duplicated inequality expressions.
1649 : : *
1650 : : * (*keep_rinfo_list) is given by pointer because it might be altered by
1651 : : * distribute_restrictinfo_to_rels().
1652 : : */
1653 : : static void
1654 : 600 : add_non_redundant_clauses(PlannerInfo *root,
1655 : : List *rinfo_candidates,
1656 : : List **keep_rinfo_list,
1657 : : Index removed_relid)
1658 : : {
1659 [ + + + + : 1664 : foreach_node(RestrictInfo, rinfo, rinfo_candidates)
+ + ]
1660 : : {
1661 : 464 : bool is_redundant = false;
1662 : :
1663 [ - + ]: 464 : Assert(!bms_is_member(removed_relid, rinfo->required_relids));
1664 : :
1665 [ + + + + : 1116 : foreach_node(RestrictInfo, src, (*keep_rinfo_list))
+ + ]
1666 : : {
1667 [ + + ]: 265 : if (!bms_equal(src->clause_relids, rinfo->clause_relids))
1668 : : /* Can't compare trivially different clauses */
1669 : 6 : continue;
1670 : :
1671 [ + - ]: 259 : if (src == rinfo ||
1672 [ + + ]: 259 : (rinfo->parent_ec != NULL &&
1673 [ + + + + ]: 408 : src->parent_ec == rinfo->parent_ec) ||
1674 : 256 : restrict_infos_logically_equal(rinfo, src))
1675 : : {
1676 : 77 : is_redundant = true;
1677 : 77 : break;
1678 : : }
1679 : : }
1680 [ + + ]: 464 : if (!is_redundant)
1681 : 387 : distribute_restrictinfo_to_rels(root, rinfo);
1682 : : }
1683 : 600 : }
1684 : :
1685 : : /*
1686 : : * A custom callback for ChangeVarNodesExtended() providing
1687 : : * Self-join elimination (SJE) related functionality
1688 : : *
1689 : : * SJE needs to skip the RangeTblRef node
1690 : : * type. During SJE's last step, remove_rel_from_joinlist() removes
1691 : : * remaining RangeTblRefs with target relid. If ChangeVarNodes() replaces
1692 : : * the target relid before, remove_rel_from_joinlist() fails to identify
1693 : : * the nodes to delete.
1694 : : *
1695 : : * SJE also needs to change the relids within RestrictInfo's.
1696 : : */
1697 : : static bool
173 1698 : 20526 : replace_relid_callback(Node *node, ChangeVarNodes_context *context)
1699 : : {
1700 [ + + ]: 20526 : if (IsA(node, RangeTblRef))
1701 : : {
1702 : 772 : return true;
1703 : : }
1704 [ + + ]: 19754 : else if (IsA(node, RestrictInfo))
1705 : : {
1706 : 1422 : RestrictInfo *rinfo = (RestrictInfo *) node;
1707 : 1422 : int relid = -1;
1708 : 1422 : bool is_req_equal =
1709 : 1422 : (rinfo->required_relids == rinfo->clause_relids);
1710 : 1422 : bool clause_relids_is_multiple =
1711 : 1422 : (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
1712 : :
1713 : : /*
1714 : : * Recurse down into clauses if the target relation is present in
1715 : : * clause_relids or required_relids. We must check required_relids
1716 : : * because the relation not present in clause_relids might still be
1717 : : * present somewhere in orclause.
1718 : : */
1719 [ + + + + ]: 1980 : if (bms_is_member(context->rt_index, rinfo->clause_relids) ||
1720 : 558 : bms_is_member(context->rt_index, rinfo->required_relids))
1721 : : {
1722 : : Relids new_clause_relids;
1723 : :
1724 : 888 : ChangeVarNodesWalkExpression((Node *) rinfo->clause, context);
1725 : 888 : ChangeVarNodesWalkExpression((Node *) rinfo->orclause, context);
1726 : :
1727 : 888 : new_clause_relids = adjust_relid_set(rinfo->clause_relids,
1728 : : context->rt_index,
1729 : : context->new_index);
1730 : :
1731 : : /*
1732 : : * Incrementally adjust num_base_rels based on the change of
1733 : : * clause_relids, which could contain both base relids and
1734 : : * outer-join relids. This operation is legal until we remove
1735 : : * only baserels.
1736 : : */
1737 : 888 : rinfo->num_base_rels -= bms_num_members(rinfo->clause_relids) -
1738 : 888 : bms_num_members(new_clause_relids);
1739 : :
1740 : 888 : rinfo->clause_relids = new_clause_relids;
1741 : 888 : rinfo->left_relids =
1742 : 888 : adjust_relid_set(rinfo->left_relids, context->rt_index, context->new_index);
1743 : 888 : rinfo->right_relids =
1744 : 888 : adjust_relid_set(rinfo->right_relids, context->rt_index, context->new_index);
1745 : : }
1746 : :
1747 [ + + ]: 1422 : if (is_req_equal)
1748 : 12 : rinfo->required_relids = rinfo->clause_relids;
1749 : : else
1750 : 1410 : rinfo->required_relids =
1751 : 1410 : adjust_relid_set(rinfo->required_relids, context->rt_index, context->new_index);
1752 : :
1753 : 1422 : rinfo->outer_relids =
1754 : 1422 : adjust_relid_set(rinfo->outer_relids, context->rt_index, context->new_index);
1755 : 1422 : rinfo->incompatible_relids =
1756 : 1422 : adjust_relid_set(rinfo->incompatible_relids, context->rt_index, context->new_index);
1757 : :
1758 [ + + + + ]: 2418 : if (rinfo->mergeopfamilies &&
1759 [ + + ]: 1852 : bms_get_singleton_member(rinfo->clause_relids, &relid) &&
1760 : 636 : clause_relids_is_multiple &&
1761 [ + - + - ]: 636 : relid == context->new_index && IsA(rinfo->clause, OpExpr))
1762 : : {
1763 : : Expr *leftOp;
1764 : : Expr *rightOp;
1765 : :
1766 : 636 : leftOp = (Expr *) get_leftop(rinfo->clause);
1767 : 636 : rightOp = (Expr *) get_rightop(rinfo->clause);
1768 : :
1769 : : /*
1770 : : * For self-join elimination, changing varnos could transform
1771 : : * "t1.a = t2.a" into "t1.a = t1.a". That is always true as long
1772 : : * as "t1.a" is not null. We use qual() to check for such a case,
1773 : : * and then we replace the qual for a check for not null
1774 : : * (NullTest).
1775 : : */
1776 [ + - + + ]: 636 : if (leftOp != NULL && equal(leftOp, rightOp))
1777 : : {
1778 : 630 : NullTest *ntest = makeNode(NullTest);
1779 : :
1780 : 630 : ntest->arg = leftOp;
1781 : 630 : ntest->nulltesttype = IS_NOT_NULL;
1782 : 630 : ntest->argisrow = false;
1783 : 630 : ntest->location = -1;
1784 : 630 : rinfo->clause = (Expr *) ntest;
1785 : 630 : rinfo->mergeopfamilies = NIL;
1786 : 630 : rinfo->left_em = NULL;
1787 : 630 : rinfo->right_em = NULL;
1788 : : }
1789 [ - + ]: 636 : Assert(rinfo->orclause == NULL);
1790 : : }
1791 : 1422 : return true;
1792 : : }
1793 : :
1794 : 18332 : return false;
1795 : : }
1796 : :
1797 : : /*
1798 : : * Remove a relation after we have proven that it participates only in an
1799 : : * unneeded unique self-join.
1800 : : *
1801 : : * Replace any links in planner info structures.
1802 : : *
1803 : : * Transfer join and restriction clauses from the removed relation to the
1804 : : * remaining one. We change the Vars of the clause to point to the
1805 : : * remaining relation instead of the removed one. The clauses that require
1806 : : * a subset of joinrelids become restriction clauses of the remaining
1807 : : * relation, and others remain join clauses. We append them to
1808 : : * baserestrictinfo and joininfo, respectively, trying not to introduce
1809 : : * duplicates.
1810 : : *
1811 : : * We also have to process the 'joinclauses' list here, because it
1812 : : * contains EC-derived join clauses which must become filter clauses. It
1813 : : * is not enough to just correct the ECs because the EC-derived
1814 : : * restrictions are generated before join removal (see
1815 : : * generate_base_implied_equalities).
1816 : : *
1817 : : * NOTE: Remember to keep the code in sync with PlannerInfo to be sure all
1818 : : * cached relids and relid bitmapsets can be correctly cleaned during the
1819 : : * self-join elimination procedure.
1820 : : */
1821 : : static void
256 1822 : 300 : remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark,
1823 : : RelOptInfo *toKeep, RelOptInfo *toRemove,
1824 : : List *restrictlist)
1825 : : {
1826 : : List *joininfos;
1827 : : ListCell *lc;
1828 : : int i;
1829 : 300 : List *jinfo_candidates = NIL;
1830 : 300 : List *binfo_candidates = NIL;
1831 : :
1832 [ - + ]: 300 : Assert(toKeep->relid > 0);
1833 [ - + ]: 300 : Assert(toRemove->relid > 0);
1834 : :
1835 : : /*
1836 : : * Replace the index of the removing table with the keeping one. The
1837 : : * technique of removing/distributing restrictinfo is used here to attach
1838 : : * just appeared (for keeping relation) join clauses and avoid adding
1839 : : * duplicates of those that already exist in the joininfo list.
1840 : : */
1841 : 300 : joininfos = list_copy(toRemove->joininfo);
1842 [ + + + + : 645 : foreach_node(RestrictInfo, rinfo, joininfos)
+ + ]
1843 : : {
1844 : 45 : remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
173 1845 : 45 : ChangeVarNodesExtended((Node *) rinfo, toRemove->relid, toKeep->relid,
1846 : : 0, replace_relid_callback);
1847 : :
256 1848 [ + + ]: 45 : if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
1849 : 36 : jinfo_candidates = lappend(jinfo_candidates, rinfo);
1850 : : else
1851 : 9 : binfo_candidates = lappend(binfo_candidates, rinfo);
1852 : : }
1853 : :
1854 : : /*
1855 : : * Concatenate restrictlist to the list of base restrictions of the
1856 : : * removing table just to simplify the replacement procedure: all of them
1857 : : * weren't connected to any keeping relations and need to be added to some
1858 : : * rels.
1859 : : */
1860 : 300 : toRemove->baserestrictinfo = list_concat(toRemove->baserestrictinfo,
1861 : : restrictlist);
1862 [ + - + + : 1019 : foreach_node(RestrictInfo, rinfo, toRemove->baserestrictinfo)
+ + ]
1863 : : {
173 1864 : 419 : ChangeVarNodesExtended((Node *) rinfo, toRemove->relid, toKeep->relid,
1865 : : 0, replace_relid_callback);
1866 : :
256 1867 [ - + ]: 419 : if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
256 akorotkov@postgresql 1868 :UBC 0 : jinfo_candidates = lappend(jinfo_candidates, rinfo);
1869 : : else
256 akorotkov@postgresql 1870 :CBC 419 : binfo_candidates = lappend(binfo_candidates, rinfo);
1871 : : }
1872 : :
1873 : : /*
1874 : : * Now, add all non-redundant clauses to the keeping relation.
1875 : : */
1876 : 300 : add_non_redundant_clauses(root, binfo_candidates,
1877 : : &toKeep->baserestrictinfo, toRemove->relid);
1878 : 300 : add_non_redundant_clauses(root, jinfo_candidates,
1879 : : &toKeep->joininfo, toRemove->relid);
1880 : :
1881 : 300 : list_free(binfo_candidates);
1882 : 300 : list_free(jinfo_candidates);
1883 : :
1884 : : /*
1885 : : * Arrange equivalence classes, mentioned removing a table, with the
1886 : : * keeping one: varno of removing table should be replaced in members and
1887 : : * sources lists. Also, remove duplicated elements if this replacement
1888 : : * procedure created them.
1889 : : */
1890 : 300 : i = -1;
1891 [ + + ]: 762 : while ((i = bms_next_member(toRemove->eclass_indexes, i)) >= 0)
1892 : : {
1893 : 462 : EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, i);
1894 : :
1895 : 462 : update_eclasses(ec, toRemove->relid, toKeep->relid);
1896 : 462 : toKeep->eclass_indexes = bms_add_member(toKeep->eclass_indexes, i);
1897 : : }
1898 : :
1899 : : /*
1900 : : * Transfer the targetlist and attr_needed flags.
1901 : : */
1902 : :
1903 [ + - + + : 1198 : foreach(lc, toRemove->reltarget->exprs)
+ + ]
1904 : : {
1905 : 898 : Node *node = lfirst(lc);
1906 : :
173 1907 : 898 : ChangeVarNodesExtended(node, toRemove->relid, toKeep->relid, 0,
1908 : : replace_relid_callback);
256 1909 [ + + ]: 898 : if (!list_member(toKeep->reltarget->exprs, node))
1910 : 90 : toKeep->reltarget->exprs = lappend(toKeep->reltarget->exprs, node);
1911 : : }
1912 : :
1913 [ + + ]: 3795 : for (i = toKeep->min_attr; i <= toKeep->max_attr; i++)
1914 : : {
1915 : 3495 : int attno = i - toKeep->min_attr;
1916 : :
1917 : 6990 : toRemove->attr_needed[attno] = adjust_relid_set(toRemove->attr_needed[attno],
1918 : 3495 : toRemove->relid, toKeep->relid);
1919 : 3495 : toKeep->attr_needed[attno] = bms_add_members(toKeep->attr_needed[attno],
1920 : 3495 : toRemove->attr_needed[attno]);
1921 : : }
1922 : :
1923 : : /*
1924 : : * If the removed relation has a row mark, transfer it to the remaining
1925 : : * one.
1926 : : *
1927 : : * If both rels have row marks, just keep the one corresponding to the
1928 : : * remaining relation because we verified earlier that they have the same
1929 : : * strength.
1930 : : */
1931 [ + + ]: 300 : if (rmark)
1932 : : {
1933 [ + - ]: 37 : if (kmark)
1934 : : {
1935 [ - + ]: 37 : Assert(kmark->markType == rmark->markType);
1936 : :
1937 : 37 : root->rowMarks = list_delete_ptr(root->rowMarks, rmark);
1938 : : }
1939 : : else
1940 : : {
1941 : : /* Shouldn't have inheritance children here. */
256 akorotkov@postgresql 1942 [ # # ]:UBC 0 : Assert(rmark->rti == rmark->prti);
1943 : :
1944 : 0 : rmark->rti = rmark->prti = toKeep->relid;
1945 : : }
1946 : : }
1947 : :
1948 : : /*
1949 : : * Replace varno in all the query structures, except nodes RangeTblRef
1950 : : * otherwise later remove_rel_from_joinlist will yield errors.
1951 : : */
173 akorotkov@postgresql 1952 :CBC 300 : ChangeVarNodesExtended((Node *) root->parse, toRemove->relid, toKeep->relid,
1953 : : 0, replace_relid_callback);
1954 : :
1955 : : /* Replace links in the planner info */
256 1956 : 300 : remove_rel_from_query(root, toRemove, toKeep->relid, NULL, NULL);
1957 : :
1958 : : /* At last, replace varno in root targetlist and HAVING clause */
173 1959 : 300 : ChangeVarNodesExtended((Node *) root->processed_tlist, toRemove->relid,
1960 : 300 : toKeep->relid, 0, replace_relid_callback);
1961 : 300 : ChangeVarNodesExtended((Node *) root->processed_groupClause,
1962 : 300 : toRemove->relid, toKeep->relid, 0,
1963 : : replace_relid_callback);
1964 : :
256 1965 : 300 : adjust_relid_set(root->all_result_relids, toRemove->relid, toKeep->relid);
1966 : 300 : adjust_relid_set(root->leaf_result_relids, toRemove->relid, toKeep->relid);
1967 : :
1968 : : /*
1969 : : * There may be references to the rel in root->fkey_list, but if so,
1970 : : * match_foreign_keys_to_quals() will get rid of them.
1971 : : */
1972 : :
1973 : : /*
1974 : : * Finally, remove the rel from the baserel array to prevent it from being
1975 : : * referenced again. (We can't do this earlier because
1976 : : * remove_join_clause_from_rels will touch it.)
1977 : : */
1978 : 300 : root->simple_rel_array[toRemove->relid] = NULL;
64 1979 : 300 : root->simple_rte_array[toRemove->relid] = NULL;
1980 : :
1981 : : /* And nuke the RelOptInfo, just in case there's another access path. */
256 1982 : 300 : pfree(toRemove);
1983 : :
1984 : :
1985 : : /*
1986 : : * Now repeat construction of attr_needed bits coming from all other
1987 : : * sources.
1988 : : */
1989 : 300 : rebuild_placeholder_attr_needed(root);
1990 : 300 : rebuild_joinclause_attr_needed(root);
1991 : 300 : rebuild_eclass_attr_needed(root);
1992 : 300 : rebuild_lateral_attr_needed(root);
1993 : 300 : }
1994 : :
1995 : : /*
1996 : : * split_selfjoin_quals
1997 : : * Processes 'joinquals' by building two lists: one containing the quals
1998 : : * where the columns/exprs are on either side of the join match and
1999 : : * another one containing the remaining quals.
2000 : : *
2001 : : * 'joinquals' must only contain quals for a RTE_RELATION being joined to
2002 : : * itself.
2003 : : */
2004 : : static void
2005 : 1021 : split_selfjoin_quals(PlannerInfo *root, List *joinquals, List **selfjoinquals,
2006 : : List **otherjoinquals, int from, int to)
2007 : : {
2008 : 1021 : List *sjoinquals = NIL;
2009 : 1021 : List *ojoinquals = NIL;
2010 : :
2011 [ + - + + : 3130 : foreach_node(RestrictInfo, rinfo, joinquals)
+ + ]
2012 : : {
2013 : : OpExpr *expr;
2014 : : Node *leftexpr;
2015 : : Node *rightexpr;
2016 : :
2017 : : /* In general, clause looks like F(arg1) = G(arg2) */
2018 [ + - + - ]: 2176 : if (!rinfo->mergeopfamilies ||
2019 [ + - ]: 2176 : bms_num_members(rinfo->clause_relids) != 2 ||
2020 [ - + ]: 2176 : bms_membership(rinfo->left_relids) != BMS_SINGLETON ||
2021 : 1088 : bms_membership(rinfo->right_relids) != BMS_SINGLETON)
2022 : : {
256 akorotkov@postgresql 2023 :UBC 0 : ojoinquals = lappend(ojoinquals, rinfo);
2024 : 0 : continue;
2025 : : }
2026 : :
256 akorotkov@postgresql 2027 :CBC 1088 : expr = (OpExpr *) rinfo->clause;
2028 : :
2029 [ + - - + ]: 1088 : if (!IsA(expr, OpExpr) || list_length(expr->args) != 2)
2030 : : {
256 akorotkov@postgresql 2031 :UBC 0 : ojoinquals = lappend(ojoinquals, rinfo);
2032 : 0 : continue;
2033 : : }
2034 : :
256 akorotkov@postgresql 2035 :CBC 1088 : leftexpr = get_leftop(rinfo->clause);
2036 : 1088 : rightexpr = copyObject(get_rightop(rinfo->clause));
2037 : :
2038 [ + - + + ]: 1088 : if (leftexpr && IsA(leftexpr, RelabelType))
2039 : 6 : leftexpr = (Node *) ((RelabelType *) leftexpr)->arg;
2040 [ + - + + ]: 1088 : if (rightexpr && IsA(rightexpr, RelabelType))
2041 : 3 : rightexpr = (Node *) ((RelabelType *) rightexpr)->arg;
2042 : :
2043 : : /*
2044 : : * Quite an expensive operation, narrowing the use case. For example,
2045 : : * when we have cast of the same var to different (but compatible)
2046 : : * types.
2047 : : */
173 2048 : 1088 : ChangeVarNodesExtended(rightexpr,
2049 : 1088 : bms_singleton_member(rinfo->right_relids),
2050 : 1088 : bms_singleton_member(rinfo->left_relids), 0,
2051 : : replace_relid_callback);
2052 : :
256 2053 [ + + ]: 1088 : if (equal(leftexpr, rightexpr))
2054 : 836 : sjoinquals = lappend(sjoinquals, rinfo);
2055 : : else
2056 : 252 : ojoinquals = lappend(ojoinquals, rinfo);
2057 : : }
2058 : :
2059 : 1021 : *selfjoinquals = sjoinquals;
2060 : 1021 : *otherjoinquals = ojoinquals;
2061 : 1021 : }
2062 : :
2063 : : /*
2064 : : * Check for a case when uniqueness is at least partly derived from a
2065 : : * baserestrictinfo clause. In this case, we have a chance to return only
2066 : : * one row (if such clauses on both sides of SJ are equal) or nothing (if they
2067 : : * are different).
2068 : : */
2069 : : static bool
2070 : 333 : match_unique_clauses(PlannerInfo *root, RelOptInfo *outer, List *uclauses,
2071 : : Index relid)
2072 : : {
2073 [ + + + + : 675 : foreach_node(RestrictInfo, rinfo, uclauses)
+ + ]
2074 : : {
2075 : : Expr *clause;
2076 : : Node *iclause;
2077 : : Node *c1;
2078 : 75 : bool matched = false;
2079 : :
2080 [ + - - + ]: 75 : Assert(outer->relid > 0 && relid > 0);
2081 : :
2082 : : /* Only filters like f(R.x1,...,R.xN) == expr we should consider. */
2083 [ - + ]: 75 : Assert(bms_is_empty(rinfo->left_relids) ^
2084 : : bms_is_empty(rinfo->right_relids));
2085 : :
2086 : 75 : clause = (Expr *) copyObject(rinfo->clause);
173 2087 : 75 : ChangeVarNodesExtended((Node *) clause, relid, outer->relid, 0,
2088 : : replace_relid_callback);
2089 : :
256 2090 [ + + ]: 75 : iclause = bms_is_empty(rinfo->left_relids) ? get_rightop(clause) :
2091 : 72 : get_leftop(clause);
2092 [ + + ]: 75 : c1 = bms_is_empty(rinfo->left_relids) ? get_leftop(clause) :
2093 : 72 : get_rightop(clause);
2094 : :
2095 : : /*
2096 : : * Compare these left and right sides with the corresponding sides of
2097 : : * the outer's filters. If no one is detected - return immediately.
2098 : : */
2099 [ + + + + : 204 : foreach_node(RestrictInfo, orinfo, outer->baserestrictinfo)
+ + ]
2100 : : {
2101 : : Node *oclause;
2102 : : Node *c2;
2103 : :
2104 [ + + ]: 96 : if (orinfo->mergeopfamilies == NIL)
2105 : : /* Don't consider clauses that aren't similar to 'F(X)=G(Y)' */
2106 : 30 : continue;
2107 : :
2108 [ - + ]: 66 : Assert(is_opclause(orinfo->clause));
2109 : :
2110 : 132 : oclause = bms_is_empty(orinfo->left_relids) ?
2111 [ + + ]: 66 : get_rightop(orinfo->clause) : get_leftop(orinfo->clause);
2112 : 132 : c2 = (bms_is_empty(orinfo->left_relids) ?
2113 [ + + ]: 66 : get_leftop(orinfo->clause) : get_rightop(orinfo->clause));
2114 : :
2115 [ + + + + ]: 66 : if (equal(iclause, oclause) && equal(c1, c2))
2116 : : {
2117 : 42 : matched = true;
2118 : 42 : break;
2119 : : }
2120 : : }
2121 : :
2122 [ + + ]: 75 : if (!matched)
2123 : 33 : return false;
2124 : : }
2125 : :
2126 : 300 : return true;
2127 : : }
2128 : :
2129 : : /*
2130 : : * Find and remove unique self-joins in a group of base relations that have
2131 : : * the same Oid.
2132 : : *
2133 : : * Returns a set of relids that were removed.
2134 : : */
2135 : : static Relids
2136 : 5516 : remove_self_joins_one_group(PlannerInfo *root, Relids relids)
2137 : : {
2138 : 5516 : Relids result = NULL;
2139 : : int k; /* Index of kept relation */
2140 : 5516 : int r = -1; /* Index of removed relation */
2141 : :
2142 [ + + ]: 17200 : while ((r = bms_next_member(relids, r)) > 0)
2143 : : {
62 2144 : 11684 : RelOptInfo *rrel = root->simple_rel_array[r];
2145 : :
256 2146 : 11684 : k = r;
2147 : :
2148 [ + + ]: 18337 : while ((k = bms_next_member(relids, k)) > 0)
2149 : : {
2150 : 6953 : Relids joinrelids = NULL;
62 2151 : 6953 : RelOptInfo *krel = root->simple_rel_array[k];
2152 : : List *restrictlist;
2153 : : List *selfjoinquals;
2154 : : List *otherjoinquals;
2155 : : ListCell *lc;
256 2156 : 6953 : bool jinfo_check = true;
62 2157 : 6953 : PlanRowMark *kmark = NULL;
2158 : 6953 : PlanRowMark *rmark = NULL;
256 2159 : 6953 : List *uclauses = NIL;
2160 : :
2161 : : /* A sanity check: the relations have the same Oid. */
2162 [ - + ]: 6953 : Assert(root->simple_rte_array[k]->relid ==
2163 : : root->simple_rte_array[r]->relid);
2164 : :
2165 : : /*
2166 : : * It is impossible to eliminate the join of two relations if they
2167 : : * belong to different rules of order. Otherwise, the planner
2168 : : * can't find any variants of the correct query plan.
2169 : : */
2170 [ + + + + : 8600 : foreach(lc, root->join_info_list)
+ + ]
2171 : : {
2172 : 5480 : SpecialJoinInfo *info = (SpecialJoinInfo *) lfirst(lc);
2173 : :
2174 [ + + ]: 10960 : if ((bms_is_member(k, info->syn_lefthand) ^
2175 [ + + ]: 7779 : bms_is_member(r, info->syn_lefthand)) ||
2176 : 2299 : (bms_is_member(k, info->syn_righthand) ^
2177 : 2299 : bms_is_member(r, info->syn_righthand)))
2178 : : {
2179 : 3833 : jinfo_check = false;
2180 : 3833 : break;
2181 : : }
2182 : : }
2183 [ + + ]: 6953 : if (!jinfo_check)
2184 : 6653 : continue;
2185 : :
2186 : : /*
2187 : : * Check Row Marks equivalence. We can't remove the join if the
2188 : : * relations have row marks of different strength (e.g., one is
2189 : : * locked FOR UPDATE, and another just has ROW_MARK_REFERENCE for
2190 : : * EvalPlanQual rechecking).
2191 : : */
2192 [ + + + - : 3224 : foreach(lc, root->rowMarks)
+ + ]
2193 : : {
2194 : 190 : PlanRowMark *rowMark = (PlanRowMark *) lfirst(lc);
2195 : :
64 2196 [ + + ]: 190 : if (rowMark->rti == r)
2197 : : {
62 2198 [ - + ]: 86 : Assert(rmark == NULL);
2199 : 86 : rmark = rowMark;
2200 : : }
64 2201 [ + + ]: 104 : else if (rowMark->rti == k)
2202 : : {
62 2203 [ - + ]: 86 : Assert(kmark == NULL);
2204 : 86 : kmark = rowMark;
2205 : : }
2206 : :
2207 [ + + + - ]: 190 : if (kmark && rmark)
256 2208 : 86 : break;
2209 : : }
62 2210 [ + + + - : 3120 : if (kmark && rmark && kmark->markType != rmark->markType)
+ + ]
256 2211 : 26 : continue;
2212 : :
2213 : : /*
2214 : : * We only deal with base rels here, so their relids bitset
2215 : : * contains only one member -- their relid.
2216 : : */
2217 : 3094 : joinrelids = bms_add_member(joinrelids, r);
2218 : 3094 : joinrelids = bms_add_member(joinrelids, k);
2219 : :
2220 : : /*
2221 : : * PHVs should not impose any constraints on removing self-joins.
2222 : : */
2223 : :
2224 : : /*
2225 : : * At this stage, joininfo lists of inner and outer can contain
2226 : : * only clauses required for a superior outer join that can't
2227 : : * influence this optimization. So, we can avoid to call the
2228 : : * build_joinrel_restrictlist() routine.
2229 : : */
2230 : 3094 : restrictlist = generate_join_implied_equalities(root, joinrelids,
2231 : : rrel->relids,
2232 : : krel, NULL);
2233 [ + + ]: 3094 : if (restrictlist == NIL)
2234 : 2073 : continue;
2235 : :
2236 : : /*
2237 : : * Process restrictlist to separate the self-join quals from the
2238 : : * other quals. e.g., "x = x" goes to selfjoinquals and "a = b" to
2239 : : * otherjoinquals.
2240 : : */
2241 : 1021 : split_selfjoin_quals(root, restrictlist, &selfjoinquals,
62 2242 : 1021 : &otherjoinquals, rrel->relid, krel->relid);
2243 : :
256 2244 [ - + ]: 1021 : Assert(list_length(restrictlist) ==
2245 : : (list_length(selfjoinquals) + list_length(otherjoinquals)));
2246 : :
2247 : : /*
2248 : : * To enable SJE for the only degenerate case without any self
2249 : : * join clauses at all, add baserestrictinfo to this list. The
2250 : : * degenerate case works only if both sides have the same clause.
2251 : : * So doesn't matter which side to add.
2252 : : */
62 2253 : 1021 : selfjoinquals = list_concat(selfjoinquals, krel->baserestrictinfo);
2254 : :
2255 : : /*
2256 : : * Determine if the rrel can duplicate outer rows. We must bypass
2257 : : * the unique rel cache here since we're possibly using a subset
2258 : : * of join quals. We can use 'force_cache' == true when all join
2259 : : * quals are self-join quals. Otherwise, we could end up putting
2260 : : * false negatives in the cache.
2261 : : */
2262 [ + + ]: 1021 : if (!innerrel_is_unique_ext(root, joinrelids, rrel->relids,
2263 : : krel, JOIN_INNER, selfjoinquals,
256 2264 : 1021 : list_length(otherjoinquals) == 0,
2265 : : &uclauses))
2266 : 688 : continue;
2267 : :
2268 : : /*
2269 : : * 'uclauses' is the copy of outer->baserestrictinfo that are
2270 : : * associated with an index. We proved by matching selfjoinquals
2271 : : * to a unique index that the outer relation has at most one
2272 : : * matching row for each inner row. Sometimes that is not enough.
2273 : : * e.g. "WHERE s1.b = s2.b AND s1.a = 1 AND s2.a = 2" when the
2274 : : * unique index is (a,b). Having non-empty uclauses, we must
2275 : : * validate that the inner baserestrictinfo contains the same
2276 : : * expressions, or we won't match the same row on each side of the
2277 : : * join.
2278 : : */
62 2279 [ + + ]: 333 : if (!match_unique_clauses(root, rrel, uclauses, krel->relid))
256 2280 : 33 : continue;
2281 : :
2282 : : /*
2283 : : * Remove rrel ReloptInfo from the planner structures and the
2284 : : * corresponding row mark.
2285 : : */
62 2286 : 300 : remove_self_join_rel(root, kmark, rmark, krel, rrel, restrictlist);
2287 : :
256 2288 : 300 : result = bms_add_member(result, r);
2289 : :
2290 : : /* We have removed the outer relation, try the next one. */
2291 : 300 : break;
2292 : : }
2293 : : }
2294 : :
2295 : 5516 : return result;
2296 : : }
2297 : :
2298 : : /*
2299 : : * Gather indexes of base relations from the joinlist and try to eliminate self
2300 : : * joins.
2301 : : */
2302 : : static Relids
2303 : 51980 : remove_self_joins_recurse(PlannerInfo *root, List *joinlist, Relids toRemove)
2304 : : {
2305 : : ListCell *jl;
2306 : 51980 : Relids relids = NULL;
2307 : 51980 : SelfJoinCandidate *candidates = NULL;
2308 : : int i;
2309 : : int j;
2310 : : int numRels;
2311 : :
2312 : : /* Collect indexes of base relations of the join tree */
2313 [ + - + + : 173386 : foreach(jl, joinlist)
+ + ]
2314 : : {
2315 : 121406 : Node *jlnode = (Node *) lfirst(jl);
2316 : :
2317 [ + + ]: 121406 : if (IsA(jlnode, RangeTblRef))
2318 : : {
2319 : 119717 : int varno = ((RangeTblRef *) jlnode)->rtindex;
2320 : 119717 : RangeTblEntry *rte = root->simple_rte_array[varno];
2321 : :
2322 : : /*
2323 : : * We only consider ordinary relations as candidates to be
2324 : : * removed, and these relations should not have TABLESAMPLE
2325 : : * clauses specified. Removing a relation with TABLESAMPLE clause
2326 : : * could potentially change the syntax of the query. Because of
2327 : : * UPDATE/DELETE EPQ mechanism, currently Query->resultRelation or
2328 : : * Query->mergeTargetRelation associated rel cannot be eliminated.
2329 : : */
2330 [ + + ]: 119717 : if (rte->rtekind == RTE_RELATION &&
2331 [ + + ]: 104540 : rte->relkind == RELKIND_RELATION &&
2332 [ + + ]: 101740 : rte->tablesample == NULL &&
2333 [ + + ]: 101728 : varno != root->parse->resultRelation &&
2334 [ + - ]: 100815 : varno != root->parse->mergeTargetRelation)
2335 : : {
2336 [ - + ]: 100815 : Assert(!bms_is_member(varno, relids));
2337 : 100815 : relids = bms_add_member(relids, varno);
2338 : : }
2339 : : }
2340 [ + - ]: 1689 : else if (IsA(jlnode, List))
2341 : : {
2342 : : /* Recursively go inside the sub-joinlist */
2343 : 1689 : toRemove = remove_self_joins_recurse(root, (List *) jlnode,
2344 : : toRemove);
2345 : : }
2346 : : else
256 akorotkov@postgresql 2347 [ # # ]:UBC 0 : elog(ERROR, "unrecognized joinlist node type: %d",
2348 : : (int) nodeTag(jlnode));
2349 : : }
2350 : :
256 akorotkov@postgresql 2351 :CBC 51980 : numRels = bms_num_members(relids);
2352 : :
2353 : : /* Need at least two relations for the join */
2354 [ + + ]: 51980 : if (numRels < 2)
2355 : 15182 : return toRemove;
2356 : :
2357 : : /*
2358 : : * In order to find relations with the same oid we first build an array of
2359 : : * candidates and then sort it by oid.
2360 : : */
2361 : 36798 : candidates = (SelfJoinCandidate *) palloc(sizeof(SelfJoinCandidate) *
2362 : : numRels);
2363 : 36798 : i = -1;
2364 : 36798 : j = 0;
2365 [ + + ]: 126650 : while ((i = bms_next_member(relids, i)) >= 0)
2366 : : {
2367 : 89852 : candidates[j].relid = i;
2368 : 89852 : candidates[j].reloid = root->simple_rte_array[i]->relid;
2369 : 89852 : j++;
2370 : : }
2371 : :
2372 : 36798 : qsort(candidates, numRels, sizeof(SelfJoinCandidate),
2373 : : self_join_candidates_cmp);
2374 : :
2375 : : /*
2376 : : * Iteratively form a group of relation indexes with the same oid and
2377 : : * launch the routine that detects self-joins in this group and removes
2378 : : * excessive range table entries.
2379 : : *
2380 : : * At the end of the iteration, exclude the group from the overall relids
2381 : : * list. So each next iteration of the cycle will involve less and less
2382 : : * value of relids.
2383 : : */
2384 : 36798 : i = 0;
2385 [ + + ]: 126650 : for (j = 1; j < numRels + 1; j++)
2386 : : {
2387 [ + + + + ]: 89852 : if (j == numRels || candidates[j].reloid != candidates[i].reloid)
2388 : : {
2389 [ + + ]: 83726 : if (j - i >= 2)
2390 : : {
2391 : : /* Create a group of relation indexes with the same oid */
2392 : 5477 : Relids group = NULL;
2393 : : Relids removed;
2394 : :
2395 [ + + ]: 17080 : while (i < j)
2396 : : {
2397 : 11603 : group = bms_add_member(group, candidates[i].relid);
2398 : 11603 : i++;
2399 : : }
2400 : 5477 : relids = bms_del_members(relids, group);
2401 : :
2402 : : /*
2403 : : * Try to remove self-joins from a group of identical entries.
2404 : : * Make the next attempt iteratively - if something is deleted
2405 : : * from a group, changes in clauses and equivalence classes
2406 : : * can give us a chance to find more candidates.
2407 : : */
2408 : : do
2409 : : {
2410 [ - + ]: 5516 : Assert(!bms_overlap(group, toRemove));
2411 : 5516 : removed = remove_self_joins_one_group(root, group);
2412 : 5516 : toRemove = bms_add_members(toRemove, removed);
2413 : 5516 : group = bms_del_members(group, removed);
2414 [ + + + + ]: 5804 : } while (!bms_is_empty(removed) &&
2415 : 288 : bms_membership(group) == BMS_MULTIPLE);
2416 : 5477 : bms_free(removed);
2417 : 5477 : bms_free(group);
2418 : : }
2419 : : else
2420 : : {
2421 : : /* Single relation, just remove it from the set */
2422 : 78249 : relids = bms_del_member(relids, candidates[i].relid);
2423 : 78249 : i = j;
2424 : : }
2425 : : }
2426 : : }
2427 : :
2428 [ - + ]: 36798 : Assert(bms_is_empty(relids));
2429 : :
2430 : 36798 : return toRemove;
2431 : : }
2432 : :
2433 : : /*
2434 : : * Compare self-join candidates by their oids.
2435 : : */
2436 : : static int
2437 : 66117 : self_join_candidates_cmp(const void *a, const void *b)
2438 : : {
2439 : 66117 : const SelfJoinCandidate *ca = (const SelfJoinCandidate *) a;
2440 : 66117 : const SelfJoinCandidate *cb = (const SelfJoinCandidate *) b;
2441 : :
2442 [ + + ]: 66117 : if (ca->reloid != cb->reloid)
2443 [ + + ]: 59964 : return (ca->reloid < cb->reloid ? -1 : 1);
2444 : : else
2445 : 6153 : return 0;
2446 : : }
2447 : :
2448 : : /*
2449 : : * Find and remove useless self joins.
2450 : : *
2451 : : * Search for joins where a relation is joined to itself. If the join clause
2452 : : * for each tuple from one side of the join is proven to match the same
2453 : : * physical row (or nothing) on the other side, that self-join can be
2454 : : * eliminated from the query. Suitable join clauses are assumed to be in the
2455 : : * form of X = X, and can be replaced with NOT NULL clauses.
2456 : : *
2457 : : * For the sake of simplicity, we don't apply this optimization to special
2458 : : * joins. Here is a list of what we could do in some particular cases:
2459 : : * 'a a1 semi join a a2': is reduced to inner by reduce_unique_semijoins,
2460 : : * and then removed normally.
2461 : : * 'a a1 anti join a a2': could simplify to a scan with 'outer quals AND
2462 : : * (IS NULL on join columns OR NOT inner quals)'.
2463 : : * 'a a1 left join a a2': could simplify to a scan like inner but without
2464 : : * NOT NULL conditions on join columns.
2465 : : * 'a a1 left join (a a2 join b)': can't simplify this, because join to b
2466 : : * can both remove rows and introduce duplicates.
2467 : : *
2468 : : * To search for removable joins, we order all the relations on their Oid,
2469 : : * go over each set with the same Oid, and consider each pair of relations
2470 : : * in this set.
2471 : : *
2472 : : * To remove the join, we mark one of the participating relations as dead
2473 : : * and rewrite all references to it to point to the remaining relation.
2474 : : * This includes modifying RestrictInfos, EquivalenceClasses, and
2475 : : * EquivalenceMembers. We also have to modify the row marks. The join clauses
2476 : : * of the removed relation become either restriction or join clauses, based on
2477 : : * whether they reference any relations not participating in the removed join.
2478 : : *
2479 : : * 'joinlist' is the top-level joinlist of the query. If it has any
2480 : : * references to the removed relations, we update them to point to the
2481 : : * remaining ones.
2482 : : */
2483 : : List *
2484 : 162951 : remove_useless_self_joins(PlannerInfo *root, List *joinlist)
2485 : : {
2486 : 162951 : Relids toRemove = NULL;
2487 : 162951 : int relid = -1;
2488 : :
2489 [ + - + - : 325902 : if (!enable_self_join_elimination || joinlist == NIL ||
+ + ]
2490 [ + + ]: 276048 : (list_length(joinlist) == 1 && !IsA(linitial(joinlist), List)))
2491 : 112660 : return joinlist;
2492 : :
2493 : : /*
2494 : : * Merge pairs of relations participated in self-join. Remove unnecessary
2495 : : * range table entries.
2496 : : */
2497 : 50291 : toRemove = remove_self_joins_recurse(root, joinlist, toRemove);
2498 : :
2499 [ + + ]: 50291 : if (unlikely(toRemove != NULL))
2500 : : {
2501 : : /* At the end, remove orphaned relation links */
2502 [ + + ]: 585 : while ((relid = bms_next_member(toRemove, relid)) >= 0)
2503 : : {
2504 : 300 : int nremoved = 0;
2505 : :
2506 : 300 : joinlist = remove_rel_from_joinlist(joinlist, relid, &nremoved);
2507 [ - + ]: 300 : if (nremoved != 1)
256 akorotkov@postgresql 2508 [ # # ]:UBC 0 : elog(ERROR, "failed to find relation %d in joinlist", relid);
2509 : : }
2510 : : }
2511 : :
256 akorotkov@postgresql 2512 :CBC 50291 : return joinlist;
2513 : : }
|