Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * joinpath.c
4 : : * Routines to find all possible paths for processing a set of joins
5 : : *
6 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/optimizer/path/joinpath.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : #include "postgres.h"
16 : :
17 : : #include <math.h>
18 : :
19 : : #include "executor/executor.h"
20 : : #include "foreign/fdwapi.h"
21 : : #include "nodes/nodeFuncs.h"
22 : : #include "optimizer/cost.h"
23 : : #include "optimizer/optimizer.h"
24 : : #include "optimizer/pathnode.h"
25 : : #include "optimizer/paths.h"
26 : : #include "optimizer/placeholder.h"
27 : : #include "optimizer/planmain.h"
28 : : #include "optimizer/restrictinfo.h"
29 : : #include "utils/lsyscache.h"
30 : : #include "utils/typcache.h"
31 : :
32 : : /* Hook for plugins to get control in add_paths_to_joinrel() */
33 : : set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
34 : :
35 : : /*
36 : : * Paths parameterized by a parent rel can be considered to be parameterized
37 : : * by any of its children, when we are performing partitionwise joins. These
38 : : * macros simplify checking for such cases. Beware multiple eval of args.
39 : : */
40 : : #define PATH_PARAM_BY_PARENT(path, rel) \
41 : : ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), \
42 : : (rel)->top_parent_relids))
43 : : #define PATH_PARAM_BY_REL_SELF(path, rel) \
44 : : ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
45 : :
46 : : #define PATH_PARAM_BY_REL(path, rel) \
47 : : (PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel))
48 : :
49 : : static void try_partial_mergejoin_path(PlannerInfo *root,
50 : : RelOptInfo *joinrel,
51 : : Path *outer_path,
52 : : Path *inner_path,
53 : : List *pathkeys,
54 : : List *mergeclauses,
55 : : List *outersortkeys,
56 : : List *innersortkeys,
57 : : JoinType jointype,
58 : : JoinPathExtraData *extra);
59 : : static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
60 : : RelOptInfo *outerrel, RelOptInfo *innerrel,
61 : : JoinType jointype, JoinPathExtraData *extra);
62 : : static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
63 : : RelOptInfo *outerrel, RelOptInfo *innerrel,
64 : : JoinType jointype, JoinPathExtraData *extra);
65 : : static void consider_parallel_nestloop(PlannerInfo *root,
66 : : RelOptInfo *joinrel,
67 : : RelOptInfo *outerrel,
68 : : RelOptInfo *innerrel,
69 : : JoinType jointype,
70 : : JoinPathExtraData *extra);
71 : : static void consider_parallel_mergejoin(PlannerInfo *root,
72 : : RelOptInfo *joinrel,
73 : : RelOptInfo *outerrel,
74 : : RelOptInfo *innerrel,
75 : : JoinType jointype,
76 : : JoinPathExtraData *extra,
77 : : Path *inner_cheapest_total);
78 : : static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
79 : : RelOptInfo *outerrel, RelOptInfo *innerrel,
80 : : JoinType jointype, JoinPathExtraData *extra);
81 : : static List *select_mergejoin_clauses(PlannerInfo *root,
82 : : RelOptInfo *joinrel,
83 : : RelOptInfo *outerrel,
84 : : RelOptInfo *innerrel,
85 : : List *restrictlist,
86 : : JoinType jointype,
87 : : bool *mergejoin_allowed);
88 : : static void generate_mergejoin_paths(PlannerInfo *root,
89 : : RelOptInfo *joinrel,
90 : : RelOptInfo *innerrel,
91 : : Path *outerpath,
92 : : JoinType jointype,
93 : : JoinPathExtraData *extra,
94 : : bool useallclauses,
95 : : Path *inner_cheapest_total,
96 : : List *merge_pathkeys,
97 : : bool is_partial);
98 : :
99 : :
100 : : /*
101 : : * add_paths_to_joinrel
102 : : * Given a join relation and two component rels from which it can be made,
103 : : * consider all possible paths that use the two component rels as outer
104 : : * and inner rel respectively. Add these paths to the join rel's pathlist
105 : : * if they survive comparison with other paths (and remove any existing
106 : : * paths that are dominated by these paths).
107 : : *
108 : : * Modifies the pathlist field of the joinrel node to contain the best
109 : : * paths found so far.
110 : : *
111 : : * jointype is not necessarily the same as sjinfo->jointype; it might be
112 : : * "flipped around" if we are considering joining the rels in the opposite
113 : : * direction from what's indicated in sjinfo.
114 : : *
115 : : * Also, this routine accepts the special JoinTypes JOIN_UNIQUE_OUTER and
116 : : * JOIN_UNIQUE_INNER to indicate that the outer or inner relation has been
117 : : * unique-ified and a regular inner join should then be applied. These values
118 : : * are not allowed to propagate outside this routine, however. Path cost
119 : : * estimation code, as well as match_unsorted_outer, may need to recognize that
120 : : * it's dealing with such a case --- the combination of nominal jointype INNER
121 : : * with sjinfo->jointype == JOIN_SEMI indicates that.
122 : : */
123 : : void
7398 tgl@sss.pgh.pa.us 124 :CBC 323888 : add_paths_to_joinrel(PlannerInfo *root,
125 : : RelOptInfo *joinrel,
126 : : RelOptInfo *outerrel,
127 : : RelOptInfo *innerrel,
128 : : JoinType jointype,
129 : : SpecialJoinInfo *sjinfo,
130 : : List *restrictlist)
131 : : {
18 rguo@postgresql.org 132 :GNC 323888 : JoinType save_jointype = jointype;
133 : : JoinPathExtraData extra;
5363 tgl@sss.pgh.pa.us 134 :CBC 323888 : bool mergejoin_allowed = true;
135 : : ListCell *lc;
136 : : Relids joinrelids;
137 : :
138 : : /*
139 : : * PlannerInfo doesn't contain the SpecialJoinInfos created for joins
140 : : * between child relations, even if there is a SpecialJoinInfo node for
141 : : * the join between the topmost parents. So, while calculating Relids set
142 : : * representing the restriction, consider relids of topmost parent of
143 : : * partitions.
144 : : */
2892 rhaas@postgresql.org 145 [ + + ]: 323888 : if (joinrel->reloptkind == RELOPT_OTHER_JOINREL)
146 : 6002 : joinrelids = joinrel->top_parent_relids;
147 : : else
148 : 317886 : joinrelids = joinrel->relids;
149 : :
3772 tgl@sss.pgh.pa.us 150 : 323888 : extra.restrictlist = restrictlist;
151 : 323888 : extra.mergeclause_list = NIL;
152 : 323888 : extra.sjinfo = sjinfo;
153 : 323888 : extra.param_source_rels = NULL;
154 : :
155 : : /*
156 : : * See if the inner relation is provably unique for this outer rel.
157 : : *
158 : : * We have some special cases: for JOIN_SEMI, it doesn't matter since the
159 : : * executor can make the equivalent optimization anyway. It also doesn't
160 : : * help enable use of Memoize, since a semijoin with a provably unique
161 : : * inner side should have been reduced to an inner join in that case.
162 : : * Therefore, we need not expend planner cycles on proofs. (For
163 : : * JOIN_ANTI, although it doesn't help the executor for the same reason,
164 : : * it can benefit Memoize paths.) For JOIN_UNIQUE_INNER, we must be
165 : : * considering a semijoin whose inner side is not provably unique (else
166 : : * reduce_unique_semijoins would've simplified it), so there's no point in
167 : : * calling innerrel_is_unique. However, if the LHS covers all of the
168 : : * semijoin's min_lefthand, then it's appropriate to set inner_unique
169 : : * because the unique relation produced by create_unique_paths will be
170 : : * unique relative to the LHS. (If we have an LHS that's only part of the
171 : : * min_lefthand, that is *not* true.) For JOIN_UNIQUE_OUTER, pass
172 : : * JOIN_INNER to avoid letting that value escape this module.
173 : : */
3074 174 [ + + + + ]: 323888 : switch (jointype)
175 : : {
176 : 3757 : case JOIN_SEMI:
177 : 3757 : extra.inner_unique = false; /* well, unproven */
178 : 3757 : break;
179 : 3099 : case JOIN_UNIQUE_INNER:
3050 180 : 6198 : extra.inner_unique = bms_is_subset(sjinfo->min_lefthand,
181 : 3099 : outerrel->relids);
3074 182 : 3099 : break;
183 : 3099 : case JOIN_UNIQUE_OUTER:
3050 184 : 3099 : extra.inner_unique = innerrel_is_unique(root,
185 : : joinrel->relids,
186 : : outerrel->relids,
187 : : innerrel,
188 : : JOIN_INNER,
189 : : restrictlist,
190 : : false);
3074 191 : 3099 : break;
192 : 313933 : default:
3050 193 : 313933 : extra.inner_unique = innerrel_is_unique(root,
194 : : joinrel->relids,
195 : : outerrel->relids,
196 : : innerrel,
197 : : jointype,
198 : : restrictlist,
199 : : false);
3074 200 : 313933 : break;
201 : : }
202 : :
203 : : /*
204 : : * If the outer or inner relation has been unique-ified, handle as a plain
205 : : * inner join.
206 : : */
18 rguo@postgresql.org 207 [ + + + + ]:GNC 323888 : if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
208 : 6198 : jointype = JOIN_INNER;
209 : :
210 : : /*
211 : : * Find potential mergejoin clauses. We can skip this if we are not
212 : : * interested in doing a mergejoin. However, mergejoin may be our only
213 : : * way of implementing a full outer join, so override enable_mergejoin if
214 : : * it's a full join.
215 : : */
9053 tgl@sss.pgh.pa.us 216 [ + + + + ]:CBC 323888 : if (enable_mergejoin || jointype == JOIN_FULL)
3772 217 : 322090 : extra.mergeclause_list = select_mergejoin_clauses(root,
218 : : joinrel,
219 : : outerrel,
220 : : innerrel,
221 : : restrictlist,
222 : : jointype,
223 : : &mergejoin_allowed);
224 : :
225 : : /*
226 : : * If it's SEMI, ANTI, or inner_unique join, compute correction factors
227 : : * for cost estimation. These will be the same for all paths.
228 : : */
3074 229 [ + + + + : 323888 : if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || extra.inner_unique)
+ + ]
2696 230 : 100271 : compute_semi_anti_join_factors(root, joinrel, outerrel, innerrel,
231 : : jointype, sjinfo, restrictlist,
232 : : &extra.semifactors);
233 : :
234 : : /*
235 : : * Decide whether it's sensible to generate parameterized paths for this
236 : : * joinrel, and if so, which relations such paths should require. There
237 : : * is usually no need to create a parameterized result path unless there
238 : : * is a join order restriction that prevents joining one of our input rels
239 : : * directly to the parameter source rel instead of joining to the other
240 : : * input rel. (But see allow_star_schema_join().) This restriction
241 : : * reduces the number of parameterized paths we have to deal with at
242 : : * higher join levels, without compromising the quality of the resulting
243 : : * plan. We express the restriction as a Relids set that must overlap the
244 : : * parameterization of any proposed join path. Note: param_source_rels
245 : : * should contain only baserels, not OJ relids, so starting from
246 : : * all_baserels not all_query_rels is correct.
247 : : */
4971 248 [ + + + + : 680052 : foreach(lc, root->join_info_list)
+ + ]
249 : : {
3209 250 : 356164 : SpecialJoinInfo *sjinfo2 = (SpecialJoinInfo *) lfirst(lc);
251 : :
252 : : /*
253 : : * SJ is relevant to this join if we have some part of its RHS
254 : : * (possibly not all of it), and haven't yet joined to its LHS. (This
255 : : * test is pretty simplistic, but should be sufficient considering the
256 : : * join has already been proven legal.) If the SJ is relevant, it
257 : : * presents constraints for joining to anything not in its RHS.
258 : : */
2892 rhaas@postgresql.org 259 [ + + ]: 356164 : if (bms_overlap(joinrelids, sjinfo2->min_righthand) &&
260 [ + + ]: 237318 : !bms_overlap(joinrelids, sjinfo2->min_lefthand))
3772 tgl@sss.pgh.pa.us 261 : 11012 : extra.param_source_rels = bms_join(extra.param_source_rels,
2999 262 : 11012 : bms_difference(root->all_baserels,
263 : 11012 : sjinfo2->min_righthand));
264 : :
265 : : /* full joins constrain both sides symmetrically */
3209 266 [ + + + + ]: 358616 : if (sjinfo2->jointype == JOIN_FULL &&
2892 rhaas@postgresql.org 267 : 2452 : bms_overlap(joinrelids, sjinfo2->min_lefthand) &&
268 [ + + ]: 2424 : !bms_overlap(joinrelids, sjinfo2->min_righthand))
3772 tgl@sss.pgh.pa.us 269 : 336 : extra.param_source_rels = bms_join(extra.param_source_rels,
2999 270 : 336 : bms_difference(root->all_baserels,
271 : 336 : sjinfo2->min_lefthand));
272 : : }
273 : :
274 : : /*
275 : : * However, when a LATERAL subquery is involved, there will simply not be
276 : : * any paths for the joinrel that aren't parameterized by whatever the
277 : : * subquery is parameterized by, unless its parameterization is resolved
278 : : * within the joinrel. So we might as well allow additional dependencies
279 : : * on whatever residual lateral dependencies the joinrel will have.
280 : : */
3561 281 : 647776 : extra.param_source_rels = bms_add_members(extra.param_source_rels,
282 : 323888 : joinrel->lateral_relids);
283 : :
284 : : /*
285 : : * 1. Consider mergejoin paths where both relations must be explicitly
286 : : * sorted. Skip this if we can't mergejoin.
287 : : */
5363 288 [ + + ]: 323888 : if (mergejoin_allowed)
5364 289 : 315966 : sort_inner_and_outer(root, joinrel, outerrel, innerrel,
290 : : jointype, &extra);
291 : :
292 : : /*
293 : : * 2. Consider paths where the outer relation need not be explicitly
294 : : * sorted. This includes both nestloops and mergejoins where the outer
295 : : * path is already ordered. Again, skip this if we can't mergejoin.
296 : : * (That's okay because we know that nestloop can't handle
297 : : * right/right-anti/right-semi/full joins at all, so it wouldn't work in
298 : : * the prohibited cases either.)
299 : : */
5363 300 [ + + ]: 323888 : if (mergejoin_allowed)
5364 301 : 315966 : match_unsorted_outer(root, joinrel, outerrel, innerrel,
302 : : jointype, &extra);
303 : :
304 : : #ifdef NOT_USED
305 : :
306 : : /*
307 : : * 3. Consider paths where the inner relation need not be explicitly
308 : : * sorted. This includes mergejoins only (nestloops were already built in
309 : : * match_unsorted_outer).
310 : : *
311 : : * Diked out as redundant 2/13/2000 -- tgl. There isn't any really
312 : : * significant difference between the inner and outer side of a mergejoin,
313 : : * so match_unsorted_inner creates no paths that aren't equivalent to
314 : : * those made by match_unsorted_outer when add_paths_to_joinrel() is
315 : : * invoked with the two rels given in the other order.
316 : : */
317 : : if (mergejoin_allowed)
318 : : match_unsorted_inner(root, joinrel, outerrel, innerrel,
319 : : jointype, &extra);
320 : : #endif
321 : :
322 : : /*
323 : : * 4. Consider paths where both outer and inner relations must be hashed
324 : : * before being joined. As above, disregard enable_hashjoin for full
325 : : * joins, because there may be no other alternative.
326 : : */
327 [ + + + + ]: 323888 : if (enable_hashjoin || jointype == JOIN_FULL)
9335 328 : 321222 : hash_inner_and_outer(root, joinrel, outerrel, innerrel,
329 : : jointype, &extra);
330 : :
331 : : /*
332 : : * 5. If inner and outer relations are foreign tables (or joins) belonging
333 : : * to the same server and assigned to the same user to check access
334 : : * permissions as, give the FDW a chance to push down joins.
335 : : */
3781 rhaas@postgresql.org 336 [ + + ]: 323888 : if (joinrel->fdwroutine &&
753 efujita@postgresql.o 337 [ + + ]: 1354 : joinrel->fdwroutine->GetForeignJoinPaths)
3781 rhaas@postgresql.org 338 : 1352 : joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel,
339 : : outerrel, innerrel,
340 : : save_jointype, &extra);
341 : :
342 : : /*
343 : : * 6. Finally, give extensions a chance to manipulate the path list. They
344 : : * could add new paths (such as CustomPaths) by calling add_path(), or
345 : : * add_partial_path() if parallel aware. They could also delete or modify
346 : : * paths added by the core code.
347 : : */
753 efujita@postgresql.o 348 [ - + ]: 323888 : if (set_join_pathlist_hook)
3781 rhaas@postgresql.org 349 :UBC 0 : set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
350 : : save_jointype, &extra);
4971 tgl@sss.pgh.pa.us 351 :CBC 323888 : }
352 : :
353 : : /*
354 : : * We override the param_source_rels heuristic to accept nestloop paths in
355 : : * which the outer rel satisfies some but not all of the inner path's
356 : : * parameterization. This is necessary to get good plans for star-schema
357 : : * scenarios, in which a parameterized path for a large table may require
358 : : * parameters from multiple small tables that will not get joined directly to
359 : : * each other. We can handle that by stacking nestloops that have the small
360 : : * tables on the outside; but this breaks the rule the param_source_rels
361 : : * heuristic is based on, namely that parameters should not be passed down
362 : : * across joins unless there's a join-order-constraint-based reason to do so.
363 : : * So we ignore the param_source_rels restriction when this case applies.
364 : : *
365 : : * allow_star_schema_join() returns true if the param_source_rels restriction
366 : : * should be overridden, ie, it's okay to perform this join.
367 : : */
368 : : static inline bool
3686 369 : 137218 : allow_star_schema_join(PlannerInfo *root,
370 : : Relids outerrelids,
371 : : Relids inner_paramrels)
372 : : {
373 : : /*
374 : : * It's a star-schema case if the outer rel provides some but not all of
375 : : * the inner rel's parameterization.
376 : : */
2944 rhaas@postgresql.org 377 [ + + + + ]: 161087 : return (bms_overlap(inner_paramrels, outerrelids) &&
378 : 23869 : bms_nonempty_difference(inner_paramrels, outerrelids));
379 : : }
380 : :
381 : : /*
382 : : * If the parameterization is only partly satisfied by the outer rel,
383 : : * the unsatisfied part can't include any outer-join relids that could
384 : : * null rels of the satisfied part. That would imply that we're trying
385 : : * to use a clause involving a Var with nonempty varnullingrels at
386 : : * a join level where that value isn't yet computable.
387 : : *
388 : : * In practice, this test never finds a problem because earlier join order
389 : : * restrictions prevent us from attempting a join that would cause a problem.
390 : : * (That's unsurprising, because the code worked before we ever added
391 : : * outer-join relids to expression relids.) It still seems worth checking
392 : : * as a backstop, but we only do so in assert-enabled builds.
393 : : */
394 : : #ifdef USE_ASSERT_CHECKING
395 : : static inline bool
950 tgl@sss.pgh.pa.us 396 : 1344549 : have_unsafe_outer_join_ref(PlannerInfo *root,
397 : : Relids outerrelids,
398 : : Relids inner_paramrels)
399 : : {
400 : 1344549 : bool result = false;
401 : 1344549 : Relids unsatisfied = bms_difference(inner_paramrels, outerrelids);
936 402 : 1344549 : Relids satisfied = bms_intersect(inner_paramrels, outerrelids);
403 : :
404 [ + + ]: 1344549 : if (bms_overlap(unsatisfied, root->outer_join_rels))
405 : : {
406 : : ListCell *lc;
407 : :
950 408 [ + - + + : 90 : foreach(lc, root->join_info_list)
+ + ]
409 : : {
410 : 60 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
411 : :
412 [ + + ]: 60 : if (!bms_is_member(sjinfo->ojrelid, unsatisfied))
413 : 12 : continue; /* not relevant */
936 414 [ + - ]: 48 : if (bms_overlap(satisfied, sjinfo->min_righthand) ||
950 415 [ - + - - ]: 48 : (sjinfo->jointype == JOIN_FULL &&
936 tgl@sss.pgh.pa.us 416 :UBC 0 : bms_overlap(satisfied, sjinfo->min_lefthand)))
417 : : {
950 418 : 0 : result = true; /* doesn't work */
419 : 0 : break;
420 : : }
421 : : }
422 : : }
423 : :
424 : : /* Waste no memory when we reject a path here */
950 tgl@sss.pgh.pa.us 425 :CBC 1344549 : bms_free(unsatisfied);
936 426 : 1344549 : bms_free(satisfied);
427 : :
950 428 : 1344549 : return result;
429 : : }
430 : : #endif /* USE_ASSERT_CHECKING */
431 : :
432 : : /*
433 : : * paraminfo_get_equal_hashops
434 : : * Determine if the clauses in param_info and innerrel's lateral vars
435 : : * can be hashed.
436 : : * Returns true if hashing is possible, otherwise false.
437 : : *
438 : : * Additionally, on success we collect the outer expressions and the
439 : : * appropriate equality operators for each hashable parameter to innerrel.
440 : : * These are returned in parallel lists in *param_exprs and *operators.
441 : : * We also set *binary_mode to indicate whether strict binary matching is
442 : : * required.
443 : : */
444 : : static bool
1618 drowley@postgresql.o 445 : 182737 : paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
446 : : RelOptInfo *outerrel, RelOptInfo *innerrel,
447 : : List *ph_lateral_vars, List **param_exprs,
448 : : List **operators, bool *binary_mode)
449 : :
450 : : {
451 : : List *lateral_vars;
452 : : ListCell *lc;
453 : :
454 : 182737 : *param_exprs = NIL;
455 : 182737 : *operators = NIL;
1382 456 : 182737 : *binary_mode = false;
457 : :
458 : : /* Add join clauses from param_info to the hash key */
1618 459 [ + - ]: 182737 : if (param_info != NULL)
460 : : {
461 : 182737 : List *clauses = param_info->ppi_clauses;
462 : :
463 [ + + + + : 348465 : foreach(lc, clauses)
+ + ]
464 : : {
465 : 201467 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
466 : : OpExpr *opexpr;
467 : : Node *expr;
468 : : Oid hasheqoperator;
469 : :
1398 470 : 201467 : opexpr = (OpExpr *) rinfo->clause;
471 : :
472 : : /*
473 : : * Bail if the rinfo is not compatible. We need a join OpExpr
474 : : * with 2 args.
475 : : */
476 [ + + + - ]: 201467 : if (!IsA(opexpr, OpExpr) || list_length(opexpr->args) != 2 ||
326 477 [ + + ]: 192607 : !clause_sides_match_join(rinfo, outerrel->relids,
478 : : innerrel->relids))
479 : : {
1618 480 : 35661 : list_free(*operators);
481 : 35661 : list_free(*param_exprs);
482 : 35739 : return false;
483 : : }
484 : :
485 [ + + ]: 165806 : if (rinfo->outer_is_left)
486 : : {
487 : 91964 : expr = (Node *) linitial(opexpr->args);
1398 488 : 91964 : hasheqoperator = rinfo->left_hasheqoperator;
489 : : }
490 : : else
491 : : {
1618 492 : 73842 : expr = (Node *) lsecond(opexpr->args);
1398 493 : 73842 : hasheqoperator = rinfo->right_hasheqoperator;
494 : : }
495 : :
496 : : /* can't do memoize if we can't hash the outer type */
497 [ + + ]: 165806 : if (!OidIsValid(hasheqoperator))
498 : : {
499 : 78 : list_free(*operators);
500 : 78 : list_free(*param_exprs);
501 : 78 : return false;
502 : : }
503 : :
504 : : /*
505 : : * 'expr' may already exist as a parameter from a previous item in
506 : : * ppi_clauses. No need to include it again, however we'd better
507 : : * ensure we do switch into binary mode if required. See below.
508 : : */
589 509 [ + + ]: 165728 : if (!list_member(*param_exprs, expr))
510 : : {
511 : 165725 : *operators = lappend_oid(*operators, hasheqoperator);
512 : 165725 : *param_exprs = lappend(*param_exprs, expr);
513 : : }
514 : :
515 : : /*
516 : : * When the join operator is not hashable then it's possible that
517 : : * the operator will be able to distinguish something that the
518 : : * hash equality operator could not. For example with floating
519 : : * point types -0.0 and +0.0 are classed as equal by the hash
520 : : * function and equality function, but some other operator may be
521 : : * able to tell those values apart. This means that we must put
522 : : * memoize into binary comparison mode so that it does bit-by-bit
523 : : * comparisons rather than a "logical" comparison as it would
524 : : * using the hash equality operator.
525 : : */
1382 526 [ + + ]: 165728 : if (!OidIsValid(rinfo->hashjoinoperator))
527 : 617 : *binary_mode = true;
528 : : }
529 : : }
530 : :
531 : : /* Now add any lateral vars to the cache key too */
418 rguo@postgresql.org 532 : 146998 : lateral_vars = list_concat(ph_lateral_vars, innerrel->lateral_vars);
533 [ + + + + : 149380 : foreach(lc, lateral_vars)
+ + ]
534 : : {
1618 drowley@postgresql.o 535 : 2481 : Node *expr = (Node *) lfirst(lc);
536 : : TypeCacheEntry *typentry;
537 : :
538 : : /* Reject if there are any volatile functions in lateral vars */
539 [ - + ]: 2481 : if (contain_volatile_functions(expr))
540 : : {
1618 drowley@postgresql.o 541 :UBC 0 : list_free(*operators);
542 : 0 : list_free(*param_exprs);
1618 drowley@postgresql.o 543 :CBC 99 : return false;
544 : : }
545 : :
546 : 2481 : typentry = lookup_type_cache(exprType(expr),
547 : : TYPECACHE_HASH_PROC | TYPECACHE_EQ_OPR);
548 : :
549 : : /* can't use memoize without a valid hash proc and equals operator */
550 [ + + - + ]: 2481 : if (!OidIsValid(typentry->hash_proc) || !OidIsValid(typentry->eq_opr))
551 : : {
552 : 99 : list_free(*operators);
553 : 99 : list_free(*param_exprs);
554 : 99 : return false;
555 : : }
556 : :
557 : : /*
558 : : * 'expr' may already exist as a parameter from the ppi_clauses. No
559 : : * need to include it again, however we'd better ensure we do switch
560 : : * into binary mode.
561 : : */
589 562 [ + + ]: 2382 : if (!list_member(*param_exprs, expr))
563 : : {
564 : 2157 : *operators = lappend_oid(*operators, typentry->eq_opr);
565 : 2157 : *param_exprs = lappend(*param_exprs, expr);
566 : : }
567 : :
568 : : /*
569 : : * We must go into binary mode as we don't have too much of an idea of
570 : : * how these lateral Vars are being used. See comment above when we
571 : : * set *binary_mode for the non-lateral Var case. This could be
572 : : * relaxed a bit if we had the RestrictInfos and knew the operators
573 : : * being used, however for cases like Vars that are arguments to
574 : : * functions we must operate in binary mode as we don't have
575 : : * visibility into what the function is doing with the Vars.
576 : : */
1382 577 : 2382 : *binary_mode = true;
578 : : }
579 : :
580 : : /* We're okay to use memoize */
1618 581 : 146899 : return true;
582 : : }
583 : :
584 : : /*
585 : : * extract_lateral_vars_from_PHVs
586 : : * Extract lateral references within PlaceHolderVars that are due to be
587 : : * evaluated at 'innerrelids'.
588 : : */
589 : : static List *
418 rguo@postgresql.org 590 : 605459 : extract_lateral_vars_from_PHVs(PlannerInfo *root, Relids innerrelids)
591 : : {
592 : 605459 : List *ph_lateral_vars = NIL;
593 : : ListCell *lc;
594 : :
595 : : /* Nothing would be found if the query contains no LATERAL RTEs */
596 [ + + ]: 605459 : if (!root->hasLateralRTEs)
597 : 595235 : return NIL;
598 : :
599 : : /*
600 : : * No need to consider PHVs that are due to be evaluated at joinrels,
601 : : * since we do not add Memoize nodes on top of joinrel paths.
602 : : */
603 [ + + ]: 10224 : if (bms_membership(innerrelids) == BMS_MULTIPLE)
604 : 3259 : return NIL;
605 : :
606 [ + + + + : 8397 : foreach(lc, root->placeholder_list)
+ + ]
607 : : {
608 : 1432 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
609 : : List *vars;
610 : : ListCell *cell;
611 : :
612 : : /* PHV is uninteresting if no lateral refs */
613 [ + + ]: 1432 : if (phinfo->ph_lateral == NULL)
614 : 819 : continue;
615 : :
616 : : /* PHV is uninteresting if not due to be evaluated at innerrelids */
617 [ + + ]: 613 : if (!bms_equal(phinfo->ph_eval_at, innerrelids))
618 : 491 : continue;
619 : :
620 : : /*
621 : : * If the PHV does not reference any rels in innerrelids, use its
622 : : * contained expression as a cache key rather than extracting the
623 : : * Vars/PHVs from it and using those. This can be beneficial in cases
624 : : * where the expression results in fewer distinct values to cache
625 : : * tuples for.
626 : : */
627 [ + + ]: 122 : if (!bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
628 : : innerrelids))
629 : : {
630 : 116 : ph_lateral_vars = lappend(ph_lateral_vars, phinfo->ph_var->phexpr);
631 : 116 : continue;
632 : : }
633 : :
634 : : /* Fetch Vars and PHVs of lateral references within PlaceHolderVars */
635 : 6 : vars = pull_vars_of_level((Node *) phinfo->ph_var->phexpr, 0);
636 [ + - + + : 18 : foreach(cell, vars)
+ + ]
637 : : {
638 : 12 : Node *node = (Node *) lfirst(cell);
639 : :
640 [ + + ]: 12 : if (IsA(node, Var))
641 : : {
642 : 6 : Var *var = (Var *) node;
643 : :
644 [ - + ]: 6 : Assert(var->varlevelsup == 0);
645 : :
646 [ - + ]: 6 : if (bms_is_member(var->varno, phinfo->ph_lateral))
418 rguo@postgresql.org 647 :UBC 0 : ph_lateral_vars = lappend(ph_lateral_vars, node);
648 : : }
418 rguo@postgresql.org 649 [ + - ]:CBC 6 : else if (IsA(node, PlaceHolderVar))
650 : : {
651 : 6 : PlaceHolderVar *phv = (PlaceHolderVar *) node;
652 : :
653 [ - + ]: 6 : Assert(phv->phlevelsup == 0);
654 : :
655 [ + - ]: 6 : if (bms_is_subset(find_placeholder_info(root, phv)->ph_eval_at,
656 : 6 : phinfo->ph_lateral))
657 : 6 : ph_lateral_vars = lappend(ph_lateral_vars, node);
658 : : }
659 : : else
418 rguo@postgresql.org 660 :UBC 0 : Assert(false);
661 : : }
662 : :
418 rguo@postgresql.org 663 :CBC 6 : list_free(vars);
664 : : }
665 : :
666 : 6965 : return ph_lateral_vars;
667 : : }
668 : :
669 : : /*
670 : : * get_memoize_path
671 : : * If possible, make and return a Memoize path atop of 'inner_path'.
672 : : * Otherwise return NULL.
673 : : *
674 : : * Note that currently we do not add Memoize nodes on top of join relation
675 : : * paths. This is because the ParamPathInfos for join relation paths do not
676 : : * maintain ppi_clauses, as the set of relevant clauses varies depending on how
677 : : * the join is formed. In addition, joinrels do not maintain lateral_vars. So
678 : : * we do not have a way to extract cache keys from joinrels.
679 : : */
680 : : static Path *
1515 drowley@postgresql.o 681 : 858992 : get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
682 : : RelOptInfo *outerrel, Path *inner_path,
683 : : Path *outer_path, JoinType jointype,
684 : : JoinPathExtraData *extra)
685 : : {
686 : : List *param_exprs;
687 : : List *hash_operators;
688 : : ListCell *lc;
689 : : bool binary_mode;
690 : : List *ph_lateral_vars;
691 : :
692 : : /* Obviously not if it's disabled */
693 [ + + ]: 858992 : if (!enable_memoize)
1618 694 : 206 : return NULL;
695 : :
696 : : /*
697 : : * We can safely not bother with all this unless we expect to perform more
698 : : * than one inner scan. The first scan is always going to be a cache
699 : : * miss. This would likely fail later anyway based on costs, so this is
700 : : * really just to save some wasted effort.
701 : : */
702 [ + + ]: 858786 : if (outer_path->parent->rows < 2)
703 : 253327 : return NULL;
704 : :
705 : : /*
706 : : * Extract lateral Vars/PHVs within PlaceHolderVars that are due to be
707 : : * evaluated at innerrel. These lateral Vars/PHVs could be used as
708 : : * memoize cache keys.
709 : : */
418 rguo@postgresql.org 710 : 605459 : ph_lateral_vars = extract_lateral_vars_from_PHVs(root, innerrel->relids);
711 : :
712 : : /*
713 : : * We can only have a memoize node when there's some kind of cache key,
714 : : * either parameterized path clauses or lateral Vars. No cache key sounds
715 : : * more like something a Materialize node might be more useful for.
716 : : */
1618 drowley@postgresql.o 717 [ + + ]: 605459 : if ((inner_path->param_info == NULL ||
718 [ + + ]: 248954 : inner_path->param_info->ppi_clauses == NIL) &&
418 rguo@postgresql.org 719 [ + + + + ]: 369367 : innerrel->lateral_vars == NIL &&
720 : : ph_lateral_vars == NIL)
1618 drowley@postgresql.o 721 : 367712 : return NULL;
722 : :
723 : : /*
724 : : * Currently we don't do this for SEMI and ANTI joins, because nested loop
725 : : * SEMI/ANTI joins don't scan the inner node to completion, which means
726 : : * memoize cannot mark the cache entry as complete. Nor can we mark the
727 : : * cache entry as complete after fetching the first inner tuple, because
728 : : * if that tuple and the current outer tuple don't satisfy the join
729 : : * clauses, a second inner tuple that satisfies the parameters would find
730 : : * the cache entry already marked as complete. The only exception is when
731 : : * the inner relation is provably unique, as in that case, there won't be
732 : : * a second matching tuple and we can safely mark the cache entry as
733 : : * complete after fetching the first inner tuple. Note that in such
734 : : * cases, the SEMI join should have been reduced to an inner join by
735 : : * reduce_unique_semijoins.
736 : : */
65 rguo@postgresql.org 737 [ + + + + ]:GNC 237747 : if ((jointype == JOIN_SEMI || jointype == JOIN_ANTI) &&
738 [ + + ]: 5054 : !extra->inner_unique)
1618 drowley@postgresql.o 739 :CBC 3182 : return NULL;
740 : :
741 : : /*
742 : : * Memoize normally marks cache entries as complete when it runs out of
743 : : * tuples to read from its subplan. However, with unique joins, Nested
744 : : * Loop will skip to the next outer tuple after finding the first matching
745 : : * inner tuple. This means that we may not read the inner side of the
746 : : * join to completion which leaves no opportunity to mark the cache entry
747 : : * as complete. To work around that, when the join is unique we
748 : : * automatically mark cache entries as complete after fetching the first
749 : : * tuple. This works when the entire join condition is parameterized.
750 : : * Otherwise, when the parameterization is only a subset of the join
751 : : * condition, we can't be sure which part of it causes the join to be
752 : : * unique. This means there are no guarantees that only 1 tuple will be
753 : : * read. We cannot mark the cache entry as complete after reading the
754 : : * first tuple without that guarantee. This means the scope of Memoize
755 : : * node's usefulness is limited to only outer rows that have no join
756 : : * partner as this is the only case where Nested Loop would exhaust the
757 : : * inner scan of a unique join. Since the scope is limited to that, we
758 : : * just don't bother making a memoize path in this case.
759 : : *
760 : : * Lateral vars needn't be considered here as they're not considered when
761 : : * determining if the join is unique.
762 : : */
143 rguo@postgresql.org 763 [ + + ]: 234565 : if (extra->inner_unique)
764 : : {
765 : : Bitmapset *ppi_serials;
766 : :
767 [ - + ]: 141022 : if (inner_path->param_info == NULL)
143 rguo@postgresql.org 768 :UBC 0 : return NULL;
769 : :
143 rguo@postgresql.org 770 :CBC 141022 : ppi_serials = inner_path->param_info->ppi_serials;
771 : :
772 [ + - + + : 338134 : foreach_node(RestrictInfo, rinfo, extra->restrictlist)
+ + ]
773 : : {
774 [ + + ]: 159732 : if (!bms_is_member(rinfo->rinfo_serial, ppi_serials))
775 : 51821 : return NULL;
776 : : }
777 : : }
778 : :
779 : : /*
780 : : * We can't use a memoize node if there are volatile functions in the
781 : : * inner rel's target list or restrict list. A cache hit could reduce the
782 : : * number of calls to these functions.
783 : : */
1618 drowley@postgresql.o 784 [ - + ]: 182744 : if (contain_volatile_functions((Node *) innerrel->reltarget))
1618 drowley@postgresql.o 785 :UBC 0 : return NULL;
786 : :
1618 drowley@postgresql.o 787 [ + + + + :CBC 301421 : foreach(lc, innerrel->baserestrictinfo)
+ + ]
788 : : {
789 : 118683 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
790 : :
791 [ + + ]: 118683 : if (contain_volatile_functions((Node *) rinfo))
792 : 6 : return NULL;
793 : : }
794 : :
795 : : /*
796 : : * Also check the parameterized path restrictinfos for volatile functions.
797 : : * Indexed functions must be immutable so shouldn't have any volatile
798 : : * functions, however, with a lateral join the inner scan may not be an
799 : : * index scan.
800 : : */
761 801 [ + - ]: 182738 : if (inner_path->param_info != NULL)
802 : : {
803 [ + + + + : 396677 : foreach(lc, inner_path->param_info->ppi_clauses)
+ + ]
804 : : {
805 : 213940 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
806 : :
807 [ + + ]: 213940 : if (contain_volatile_functions((Node *) rinfo))
808 : 1 : return NULL;
809 : : }
810 : : }
811 : :
812 : : /* Check if we have hash ops for each parameter to the path */
1618 813 [ + + ]: 182737 : if (paraminfo_get_equal_hashops(root,
814 : : inner_path->param_info,
1006 tgl@sss.pgh.pa.us 815 [ + + ]: 182737 : outerrel->top_parent ?
816 : : outerrel->top_parent : outerrel,
817 : : innerrel,
818 : : ph_lateral_vars,
819 : : ¶m_exprs,
820 : : &hash_operators,
821 : : &binary_mode))
822 : : {
1515 drowley@postgresql.o 823 : 146899 : return (Path *) create_memoize_path(root,
824 : : innerrel,
825 : : inner_path,
826 : : param_exprs,
827 : : hash_operators,
828 : 146899 : extra->inner_unique,
829 : : binary_mode,
830 : : outer_path->rows);
831 : : }
832 : :
1618 833 : 35838 : return NULL;
834 : : }
835 : :
836 : : /*
837 : : * try_nestloop_path
838 : : * Consider a nestloop join path; if it appears useful, push it into
839 : : * the joinrel's pathlist via add_path().
840 : : */
841 : : static void
4971 tgl@sss.pgh.pa.us 842 : 1480645 : try_nestloop_path(PlannerInfo *root,
843 : : RelOptInfo *joinrel,
844 : : Path *outer_path,
845 : : Path *inner_path,
846 : : List *pathkeys,
847 : : JoinType jointype,
848 : : JoinPathExtraData *extra)
849 : : {
850 : : Relids required_outer;
851 : : JoinCostWorkspace workspace;
2944 rhaas@postgresql.org 852 : 1480645 : RelOptInfo *innerrel = inner_path->parent;
853 : 1480645 : RelOptInfo *outerrel = outer_path->parent;
854 : : Relids innerrelids;
855 : : Relids outerrelids;
856 [ + + ]: 1480645 : Relids inner_paramrels = PATH_REQ_OUTER(inner_path);
857 [ + + ]: 1480645 : Relids outer_paramrels = PATH_REQ_OUTER(outer_path);
858 : :
859 : : /*
860 : : * If we are forming an outer join at this join, it's nonsensical to use
861 : : * an input path that uses the outer join as part of its parameterization.
862 : : * (This can happen despite our join order restrictions, since those apply
863 : : * to what is in an input relation not what its parameters are.)
864 : : */
800 tgl@sss.pgh.pa.us 865 [ + + + - ]: 1820214 : if (extra->sjinfo->ojrelid != 0 &&
866 [ + + ]: 679138 : (bms_is_member(extra->sjinfo->ojrelid, inner_paramrels) ||
867 : 339569 : bms_is_member(extra->sjinfo->ojrelid, outer_paramrels)))
868 : 136096 : return;
869 : :
870 : : /*
871 : : * Any parameterization of the input paths refers to topmost parents of
872 : : * the relevant relations, because reparameterize_path_by_child() hasn't
873 : : * been called yet. So we must consider topmost parents of the relations
874 : : * being joined, too, while determining parameterization of the result and
875 : : * checking for disallowed parameterization cases.
876 : : */
2892 rhaas@postgresql.org 877 [ + + ]: 1480585 : if (innerrel->top_parent_relids)
878 : 20690 : innerrelids = innerrel->top_parent_relids;
879 : : else
880 : 1459895 : innerrelids = innerrel->relids;
881 : :
882 [ + + ]: 1480585 : if (outerrel->top_parent_relids)
883 : 20690 : outerrelids = outerrel->top_parent_relids;
884 : : else
885 : 1459895 : outerrelids = outerrel->relids;
886 : :
887 : : /*
888 : : * Check to see if proposed path is still parameterized, and reject if the
889 : : * parameterization wouldn't be sensible --- unless allow_star_schema_join
890 : : * says to allow it anyway.
891 : : */
2944 892 : 1480585 : required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels,
893 : : innerrelids, inner_paramrels);
4971 tgl@sss.pgh.pa.us 894 [ + + ]: 1480585 : if (required_outer &&
78 895 [ + + ]: 155791 : !bms_overlap(required_outer, extra->param_source_rels) &&
896 [ + + ]: 137218 : !allow_star_schema_join(root, outerrelids, inner_paramrels))
897 : : {
898 : : /* Waste no memory when we reject a path here */
3686 899 : 136036 : bms_free(required_outer);
900 : 136036 : return;
901 : : }
902 : :
903 : : /* If we got past that, we shouldn't have any unsafe outer-join refs */
936 904 [ - + ]: 1344549 : Assert(!have_unsafe_outer_join_ref(root, outerrelids, inner_paramrels));
905 : :
906 : : /*
907 : : * If the inner path is parameterized, it is parameterized by the topmost
908 : : * parent of the outer rel, not the outer rel itself. We will need to
909 : : * translate the parameterization, if this path is chosen, during
910 : : * create_plan(). Here we just check whether we will be able to perform
911 : : * the translation, and if not avoid creating a nestloop path.
912 : : */
536 913 [ + + + - : 1344549 : if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent) &&
+ + ]
914 [ - + ]: 6668 : !path_is_reparameterizable_by_child(inner_path, outer_path->parent))
915 : : {
536 tgl@sss.pgh.pa.us 916 :UBC 0 : bms_free(required_outer);
917 : 0 : return;
918 : : }
919 : :
920 : : /*
921 : : * Do a precheck to quickly eliminate obviously-inferior paths. We
922 : : * calculate a cheap lower bound on the path's cost and then use
923 : : * add_path_precheck() to see if the path is clearly going to be dominated
924 : : * by some existing path for the joinrel. If not, do the full pushup with
925 : : * creating a fully valid path structure and submitting it to add_path().
926 : : * The latter two steps are expensive enough to make this two-phase
927 : : * methodology worthwhile.
928 : : */
4971 tgl@sss.pgh.pa.us 929 :CBC 1344549 : initial_cost_nestloop(root, &workspace, jointype,
930 : : outer_path, inner_path, extra);
931 : :
381 rhaas@postgresql.org 932 [ + + ]: 1344549 : if (add_path_precheck(joinrel, workspace.disabled_nodes,
933 : : workspace.startup_cost, workspace.total_cost,
934 : : pathkeys, required_outer))
935 : : {
4971 tgl@sss.pgh.pa.us 936 : 659976 : add_path(joinrel, (Path *)
937 : 659976 : create_nestloop_path(root,
938 : : joinrel,
939 : : jointype,
940 : : &workspace,
941 : : extra,
942 : : outer_path,
943 : : inner_path,
944 : : extra->restrictlist,
945 : : pathkeys,
946 : : required_outer));
947 : : }
948 : : else
949 : : {
950 : : /* Waste no memory when we reject a path here */
951 : 684573 : bms_free(required_outer);
952 : : }
953 : : }
954 : :
955 : : /*
956 : : * try_partial_nestloop_path
957 : : * Consider a partial nestloop join path; if it appears useful, push it into
958 : : * the joinrel's partial_pathlist via add_partial_path().
959 : : */
960 : : static void
3517 rhaas@postgresql.org 961 : 22555 : try_partial_nestloop_path(PlannerInfo *root,
962 : : RelOptInfo *joinrel,
963 : : Path *outer_path,
964 : : Path *inner_path,
965 : : List *pathkeys,
966 : : JoinType jointype,
967 : : JoinPathExtraData *extra)
968 : : {
969 : : JoinCostWorkspace workspace;
970 : :
971 : : /*
972 : : * If the inner path is parameterized, the parameterization must be fully
973 : : * satisfied by the proposed outer path. Parameterized partial paths are
974 : : * not supported. The caller should already have verified that no lateral
975 : : * rels are required here.
976 : : */
977 [ - + ]: 22555 : Assert(bms_is_empty(joinrel->lateral_relids));
403 rguo@postgresql.org 978 [ - + - - ]: 22555 : Assert(bms_is_empty(PATH_REQ_OUTER(outer_path)));
3517 rhaas@postgresql.org 979 [ + + ]: 22555 : if (inner_path->param_info != NULL)
980 : : {
981 : 7865 : Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
2892 982 : 7865 : RelOptInfo *outerrel = outer_path->parent;
983 : : Relids outerrelids;
984 : :
985 : : /*
986 : : * The inner and outer paths are parameterized, if at all, by the top
987 : : * level parents, not the child relations, so we must use those relids
988 : : * for our parameterization tests.
989 : : */
990 [ + + ]: 7865 : if (outerrel->top_parent_relids)
991 : 5796 : outerrelids = outerrel->top_parent_relids;
992 : : else
993 : 2069 : outerrelids = outerrel->relids;
994 : :
995 [ + + ]: 7865 : if (!bms_is_subset(inner_paramrels, outerrelids))
3517 996 : 15806 : return;
997 : : }
998 : :
999 : : /*
1000 : : * If the inner path is parameterized, it is parameterized by the topmost
1001 : : * parent of the outer rel, not the outer rel itself. We will need to
1002 : : * translate the parameterization, if this path is chosen, during
1003 : : * create_plan(). Here we just check whether we will be able to perform
1004 : : * the translation, and if not avoid creating a nestloop path.
1005 : : */
536 tgl@sss.pgh.pa.us 1006 [ + + + - : 21885 : if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent) &&
+ + ]
1007 [ - + ]: 5310 : !path_is_reparameterizable_by_child(inner_path, outer_path->parent))
536 tgl@sss.pgh.pa.us 1008 :UBC 0 : return;
1009 : :
1010 : : /*
1011 : : * Before creating a path, get a quick lower bound on what it is likely to
1012 : : * cost. Bail out right away if it looks terrible.
1013 : : */
3517 rhaas@postgresql.org 1014 :CBC 21885 : initial_cost_nestloop(root, &workspace, jointype,
1015 : : outer_path, inner_path, extra);
381 1016 [ + + ]: 21885 : if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
1017 : : workspace.total_cost, pathkeys))
3517 1018 : 15136 : return;
1019 : :
1020 : : /* Might be good enough to be worth trying, so let's try it. */
1021 : 6749 : add_partial_path(joinrel, (Path *)
3440 tgl@sss.pgh.pa.us 1022 : 6749 : create_nestloop_path(root,
1023 : : joinrel,
1024 : : jointype,
1025 : : &workspace,
1026 : : extra,
1027 : : outer_path,
1028 : : inner_path,
1029 : : extra->restrictlist,
1030 : : pathkeys,
1031 : : NULL));
1032 : : }
1033 : :
1034 : : /*
1035 : : * try_mergejoin_path
1036 : : * Consider a merge join path; if it appears useful, push it into
1037 : : * the joinrel's pathlist via add_path().
1038 : : */
1039 : : static void
4971 1040 : 608288 : try_mergejoin_path(PlannerInfo *root,
1041 : : RelOptInfo *joinrel,
1042 : : Path *outer_path,
1043 : : Path *inner_path,
1044 : : List *pathkeys,
1045 : : List *mergeclauses,
1046 : : List *outersortkeys,
1047 : : List *innersortkeys,
1048 : : JoinType jointype,
1049 : : JoinPathExtraData *extra,
1050 : : bool is_partial)
1051 : : {
1052 : : Relids required_outer;
121 rguo@postgresql.org 1053 : 608288 : int outer_presorted_keys = 0;
1054 : : JoinCostWorkspace workspace;
1055 : :
3105 rhaas@postgresql.org 1056 [ + + ]: 608288 : if (is_partial)
1057 : : {
1058 : 3749 : try_partial_mergejoin_path(root,
1059 : : joinrel,
1060 : : outer_path,
1061 : : inner_path,
1062 : : pathkeys,
1063 : : mergeclauses,
1064 : : outersortkeys,
1065 : : innersortkeys,
1066 : : jointype,
1067 : : extra);
1068 : 22471 : return;
1069 : : }
1070 : :
1071 : : /*
1072 : : * If we are forming an outer join at this join, it's nonsensical to use
1073 : : * an input path that uses the outer join as part of its parameterization.
1074 : : * (This can happen despite our join order restrictions, since those apply
1075 : : * to what is in an input relation not what its parameters are.)
1076 : : */
800 tgl@sss.pgh.pa.us 1077 [ + + + - ]: 794149 : if (extra->sjinfo->ojrelid != 0 &&
1078 [ + + + + ]: 379220 : (bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(inner_path)) ||
1079 [ + + ]: 189610 : bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(outer_path))))
1080 : 6 : return;
1081 : :
1082 : : /*
1083 : : * Check to see if proposed path is still parameterized, and reject if the
1084 : : * parameterization wouldn't be sensible.
1085 : : */
4971 1086 : 604533 : required_outer = calc_non_nestloop_required_outer(outer_path,
1087 : : inner_path);
1088 [ + + ]: 604533 : if (required_outer &&
3772 1089 [ + + ]: 20129 : !bms_overlap(required_outer, extra->param_source_rels))
1090 : : {
1091 : : /* Waste no memory when we reject a path here */
4971 1092 : 18716 : bms_free(required_outer);
1093 : 18716 : return;
1094 : : }
1095 : :
1096 : : /*
1097 : : * If the given paths are already well enough ordered, we can skip doing
1098 : : * an explicit sort.
1099 : : *
1100 : : * We need to determine the number of presorted keys of the outer path to
1101 : : * decide whether explicit incremental sort can be applied when
1102 : : * outersortkeys is not NIL. We do not need to do the same for the inner
1103 : : * path though, as incremental sort currently does not support
1104 : : * mark/restore.
1105 : : */
1106 [ + + + + ]: 872615 : if (outersortkeys &&
121 rguo@postgresql.org 1107 : 286798 : pathkeys_count_contained_in(outersortkeys, outer_path->pathkeys,
1108 : : &outer_presorted_keys))
4971 tgl@sss.pgh.pa.us 1109 : 8397 : outersortkeys = NIL;
1110 [ + + + + ]: 1059696 : if (innersortkeys &&
1111 : 473879 : pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
1112 : 14486 : innersortkeys = NIL;
1113 : :
1114 : : /*
1115 : : * See comments in try_nestloop_path().
1116 : : */
1117 : 585817 : initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
1118 : : outer_path, inner_path,
1119 : : outersortkeys, innersortkeys,
1120 : : outer_presorted_keys,
1121 : : extra);
1122 : :
381 rhaas@postgresql.org 1123 [ + + ]: 585817 : if (add_path_precheck(joinrel, workspace.disabled_nodes,
1124 : : workspace.startup_cost, workspace.total_cost,
1125 : : pathkeys, required_outer))
1126 : : {
4971 tgl@sss.pgh.pa.us 1127 : 152938 : add_path(joinrel, (Path *)
1128 : 152938 : create_mergejoin_path(root,
1129 : : joinrel,
1130 : : jointype,
1131 : : &workspace,
1132 : : extra,
1133 : : outer_path,
1134 : : inner_path,
1135 : : extra->restrictlist,
1136 : : pathkeys,
1137 : : required_outer,
1138 : : mergeclauses,
1139 : : outersortkeys,
1140 : : innersortkeys,
1141 : : outer_presorted_keys));
1142 : : }
1143 : : else
1144 : : {
1145 : : /* Waste no memory when we reject a path here */
1146 : 432879 : bms_free(required_outer);
1147 : : }
1148 : : }
1149 : :
1150 : : /*
1151 : : * try_partial_mergejoin_path
1152 : : * Consider a partial merge join path; if it appears useful, push it into
1153 : : * the joinrel's pathlist via add_partial_path().
1154 : : */
1155 : : static void
3105 rhaas@postgresql.org 1156 : 10495 : try_partial_mergejoin_path(PlannerInfo *root,
1157 : : RelOptInfo *joinrel,
1158 : : Path *outer_path,
1159 : : Path *inner_path,
1160 : : List *pathkeys,
1161 : : List *mergeclauses,
1162 : : List *outersortkeys,
1163 : : List *innersortkeys,
1164 : : JoinType jointype,
1165 : : JoinPathExtraData *extra)
1166 : : {
121 rguo@postgresql.org 1167 : 10495 : int outer_presorted_keys = 0;
1168 : : JoinCostWorkspace workspace;
1169 : :
1170 : : /*
1171 : : * See comments in try_partial_hashjoin_path().
1172 : : */
3105 rhaas@postgresql.org 1173 [ - + ]: 10495 : Assert(bms_is_empty(joinrel->lateral_relids));
403 rguo@postgresql.org 1174 [ - + - - ]: 10495 : Assert(bms_is_empty(PATH_REQ_OUTER(outer_path)));
1175 [ - + - - ]: 10495 : if (!bms_is_empty(PATH_REQ_OUTER(inner_path)))
1176 : 5472 : return;
1177 : :
1178 : : /*
1179 : : * If the given paths are already well enough ordered, we can skip doing
1180 : : * an explicit sort.
1181 : : *
1182 : : * We need to determine the number of presorted keys of the outer path to
1183 : : * decide whether explicit incremental sort can be applied when
1184 : : * outersortkeys is not NIL. We do not need to do the same for the inner
1185 : : * path though, as incremental sort currently does not support
1186 : : * mark/restore.
1187 : : */
3105 rhaas@postgresql.org 1188 [ + + + + ]: 17241 : if (outersortkeys &&
121 rguo@postgresql.org 1189 : 6746 : pathkeys_count_contained_in(outersortkeys, outer_path->pathkeys,
1190 : : &outer_presorted_keys))
3105 rhaas@postgresql.org 1191 : 93 : outersortkeys = NIL;
1192 [ + + + + ]: 19366 : if (innersortkeys &&
1193 : 8871 : pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
1194 : 306 : innersortkeys = NIL;
1195 : :
1196 : : /*
1197 : : * See comments in try_partial_nestloop_path().
1198 : : */
1199 : 10495 : initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
1200 : : outer_path, inner_path,
1201 : : outersortkeys, innersortkeys,
1202 : : outer_presorted_keys,
1203 : : extra);
1204 : :
381 1205 [ + + ]: 10495 : if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
1206 : : workspace.total_cost, pathkeys))
3105 1207 : 5472 : return;
1208 : :
1209 : : /* Might be good enough to be worth trying, so let's try it. */
1210 : 5023 : add_partial_path(joinrel, (Path *)
1211 : 5023 : create_mergejoin_path(root,
1212 : : joinrel,
1213 : : jointype,
1214 : : &workspace,
1215 : : extra,
1216 : : outer_path,
1217 : : inner_path,
1218 : : extra->restrictlist,
1219 : : pathkeys,
1220 : : NULL,
1221 : : mergeclauses,
1222 : : outersortkeys,
1223 : : innersortkeys,
1224 : : outer_presorted_keys));
1225 : : }
1226 : :
1227 : : /*
1228 : : * try_hashjoin_path
1229 : : * Consider a hash join path; if it appears useful, push it into
1230 : : * the joinrel's pathlist via add_path().
1231 : : */
1232 : : static void
4971 tgl@sss.pgh.pa.us 1233 : 364250 : try_hashjoin_path(PlannerInfo *root,
1234 : : RelOptInfo *joinrel,
1235 : : Path *outer_path,
1236 : : Path *inner_path,
1237 : : List *hashclauses,
1238 : : JoinType jointype,
1239 : : JoinPathExtraData *extra)
1240 : : {
1241 : : Relids required_outer;
1242 : : JoinCostWorkspace workspace;
1243 : :
1244 : : /*
1245 : : * If we are forming an outer join at this join, it's nonsensical to use
1246 : : * an input path that uses the outer join as part of its parameterization.
1247 : : * (This can happen despite our join order restrictions, since those apply
1248 : : * to what is in an input relation not what its parameters are.)
1249 : : */
800 1250 [ + + + + ]: 491283 : if (extra->sjinfo->ojrelid != 0 &&
1251 [ + + + + ]: 254051 : (bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(inner_path)) ||
1252 [ + + ]: 127018 : bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(outer_path))))
1253 : 53892 : return;
1254 : :
1255 : : /*
1256 : : * Check to see if proposed path is still parameterized, and reject if the
1257 : : * parameterization wouldn't be sensible.
1258 : : */
4971 1259 : 364220 : required_outer = calc_non_nestloop_required_outer(outer_path,
1260 : : inner_path);
1261 [ + + ]: 364220 : if (required_outer &&
3772 1262 [ + + ]: 61107 : !bms_overlap(required_outer, extra->param_source_rels))
1263 : : {
1264 : : /* Waste no memory when we reject a path here */
4971 1265 : 53862 : bms_free(required_outer);
1266 : 53862 : return;
1267 : : }
1268 : :
1269 : : /*
1270 : : * See comments in try_nestloop_path(). Also note that hashjoin paths
1271 : : * never have any output pathkeys, per comments in create_hashjoin_path.
1272 : : */
1273 : 310358 : initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
1274 : : outer_path, inner_path, extra, false);
1275 : :
381 rhaas@postgresql.org 1276 [ + + ]: 310358 : if (add_path_precheck(joinrel, workspace.disabled_nodes,
1277 : : workspace.startup_cost, workspace.total_cost,
1278 : : NIL, required_outer))
1279 : : {
4971 tgl@sss.pgh.pa.us 1280 : 135610 : add_path(joinrel, (Path *)
1281 : 135610 : create_hashjoin_path(root,
1282 : : joinrel,
1283 : : jointype,
1284 : : &workspace,
1285 : : extra,
1286 : : outer_path,
1287 : : inner_path,
1288 : : false, /* parallel_hash */
1289 : : extra->restrictlist,
1290 : : required_outer,
1291 : : hashclauses));
1292 : : }
1293 : : else
1294 : : {
1295 : : /* Waste no memory when we reject a path here */
1296 : 174748 : bms_free(required_outer);
1297 : : }
1298 : : }
1299 : :
1300 : : /*
1301 : : * try_partial_hashjoin_path
1302 : : * Consider a partial hashjoin join path; if it appears useful, push it into
1303 : : * the joinrel's partial_pathlist via add_partial_path().
1304 : : * The outer side is partial. If parallel_hash is true, then the inner path
1305 : : * must be partial and will be run in parallel to create one or more shared
1306 : : * hash tables; otherwise the inner path must be complete and a copy of it
1307 : : * is run in every process to create separate identical private hash tables.
1308 : : */
1309 : : static void
3517 rhaas@postgresql.org 1310 : 11179 : try_partial_hashjoin_path(PlannerInfo *root,
1311 : : RelOptInfo *joinrel,
1312 : : Path *outer_path,
1313 : : Path *inner_path,
1314 : : List *hashclauses,
1315 : : JoinType jointype,
1316 : : JoinPathExtraData *extra,
1317 : : bool parallel_hash)
1318 : : {
1319 : : JoinCostWorkspace workspace;
1320 : :
1321 : : /*
1322 : : * If the inner path is parameterized, we can't use a partial hashjoin.
1323 : : * Parameterized partial paths are not supported. The caller should
1324 : : * already have verified that no lateral rels are required here.
1325 : : */
1326 [ - + ]: 11179 : Assert(bms_is_empty(joinrel->lateral_relids));
403 rguo@postgresql.org 1327 [ - + - - ]: 11179 : Assert(bms_is_empty(PATH_REQ_OUTER(outer_path)));
1328 [ - + - - ]: 11179 : if (!bms_is_empty(PATH_REQ_OUTER(inner_path)))
1329 : 5295 : return;
1330 : :
1331 : : /*
1332 : : * Before creating a path, get a quick lower bound on what it is likely to
1333 : : * cost. Bail out right away if it looks terrible.
1334 : : */
3517 rhaas@postgresql.org 1335 : 11179 : initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
1336 : : outer_path, inner_path, extra, parallel_hash);
381 1337 [ + + ]: 11179 : if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
1338 : : workspace.total_cost, NIL))
3517 1339 : 5295 : return;
1340 : :
1341 : : /* Might be good enough to be worth trying, so let's try it. */
1342 : 5884 : add_partial_path(joinrel, (Path *)
3440 tgl@sss.pgh.pa.us 1343 : 5884 : create_hashjoin_path(root,
1344 : : joinrel,
1345 : : jointype,
1346 : : &workspace,
1347 : : extra,
1348 : : outer_path,
1349 : : inner_path,
1350 : : parallel_hash,
1351 : : extra->restrictlist,
1352 : : NULL,
1353 : : hashclauses));
1354 : : }
1355 : :
1356 : : /*
1357 : : * sort_inner_and_outer
1358 : : * Create mergejoin join paths by explicitly sorting both the outer and
1359 : : * inner join relations on each available merge ordering.
1360 : : *
1361 : : * 'joinrel' is the join relation
1362 : : * 'outerrel' is the outer join relation
1363 : : * 'innerrel' is the inner join relation
1364 : : * 'jointype' is the type of join to do
1365 : : * 'extra' contains additional input values
1366 : : */
1367 : : static void
7398 1368 : 315966 : sort_inner_and_outer(PlannerInfo *root,
1369 : : RelOptInfo *joinrel,
1370 : : RelOptInfo *outerrel,
1371 : : RelOptInfo *innerrel,
1372 : : JoinType jointype,
1373 : : JoinPathExtraData *extra)
1374 : : {
1375 : : Path *outer_path;
1376 : : Path *inner_path;
3105 rhaas@postgresql.org 1377 : 315966 : Path *cheapest_partial_outer = NULL;
1378 : 315966 : Path *cheapest_safe_inner = NULL;
1379 : : List *all_pathkeys;
1380 : : ListCell *l;
1381 : :
1382 : : /* Nothing to do if there are no available mergejoin clauses */
403 rguo@postgresql.org 1383 [ + + ]: 315966 : if (extra->mergeclause_list == NIL)
1384 : 57757 : return;
1385 : :
1386 : : /*
1387 : : * We only consider the cheapest-total-cost input paths, since we are
1388 : : * assuming here that a sort is required. We will consider
1389 : : * cheapest-startup-cost input paths later, and only if they don't need a
1390 : : * sort.
1391 : : *
1392 : : * This function intentionally does not consider parameterized input
1393 : : * paths, except when the cheapest-total is parameterized. If we did so,
1394 : : * we'd have a combinatorial explosion of mergejoin paths of dubious
1395 : : * value. This interacts with decisions elsewhere that also discriminate
1396 : : * against mergejoins with parameterized inputs; see comments in
1397 : : * src/backend/optimizer/README.
1398 : : */
8265 tgl@sss.pgh.pa.us 1399 : 258209 : outer_path = outerrel->cheapest_total_path;
1400 : 258209 : inner_path = innerrel->cheapest_total_path;
1401 : :
1402 : : /*
1403 : : * If either cheapest-total path is parameterized by the other rel, we
1404 : : * can't use a mergejoin. (There's no use looking for alternative input
1405 : : * paths, since these should already be the least-parameterized available
1406 : : * paths.)
1407 : : */
4756 1408 [ + + + - : 258209 : if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
+ + + + +
- + + ]
1409 [ + + + - : 257763 : PATH_PARAM_BY_REL(inner_path, outerrel))
+ + + + +
- + + ]
4778 1410 : 910 : return;
1411 : :
1412 : : /*
1413 : : * If the joinrel is parallel-safe, we may be able to consider a partial
1414 : : * merge join. However, we can't handle JOIN_FULL, JOIN_RIGHT and
1415 : : * JOIN_RIGHT_ANTI, because they can produce false null extended rows.
1416 : : * Also, the resulting path must not be parameterized.
1417 : : */
3105 rhaas@postgresql.org 1418 [ + + + + ]: 257299 : if (joinrel->consider_parallel &&
18 rguo@postgresql.org 1419 [ + + ]:GNC 227433 : jointype != JOIN_FULL &&
1420 [ + + ]: 186661 : jointype != JOIN_RIGHT &&
1421 : 185927 : jointype != JOIN_RIGHT_ANTI &&
3105 rhaas@postgresql.org 1422 [ + + ]:CBC 185927 : outerrel->partial_pathlist != NIL &&
1423 [ + - ]: 4960 : bms_is_empty(joinrel->lateral_relids))
1424 : : {
1425 : 4960 : cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist);
1426 : :
1427 [ + + ]: 4960 : if (inner_path->parallel_safe)
1428 : 4900 : cheapest_safe_inner = inner_path;
1429 : : else
1430 : : cheapest_safe_inner =
1431 : 60 : get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
1432 : : }
1433 : :
1434 : : /*
1435 : : * Each possible ordering of the available mergejoin clauses will generate
1436 : : * a differently-sorted result path at essentially the same cost. We have
1437 : : * no basis for choosing one over another at this level of joining, but
1438 : : * some sort orders may be more useful than others for higher-level
1439 : : * mergejoins, so it's worth considering multiple orderings.
1440 : : *
1441 : : * Actually, it's not quite true that every mergeclause ordering will
1442 : : * generate a different path order, because some of the clauses may be
1443 : : * partially redundant (refer to the same EquivalenceClasses). Therefore,
1444 : : * what we do is convert the mergeclause list to a list of canonical
1445 : : * pathkeys, and then consider different orderings of the pathkeys.
1446 : : *
1447 : : * Generating a path for *every* permutation of the pathkeys doesn't seem
1448 : : * like a winning strategy; the cost in planning time is too high. For
1449 : : * now, we generate one path for each pathkey, listing that pathkey first
1450 : : * and the rest in random order. This should allow at least a one-clause
1451 : : * mergejoin without re-sorting against any other possible mergejoin
1452 : : * partner path. But if we've not guessed the right ordering of secondary
1453 : : * keys, we may end up evaluating clauses as qpquals when they could have
1454 : : * been done as mergeclauses. (In practice, it's rare that there's more
1455 : : * than two or three mergeclauses, so expending a huge amount of thought
1456 : : * on that is probably not worth it.)
1457 : : *
1458 : : * The pathkey order returned by select_outer_pathkeys_for_merge() has
1459 : : * some heuristics behind it (see that function), so be sure to try it
1460 : : * exactly as-is as well as making variants.
1461 : : */
6804 tgl@sss.pgh.pa.us 1462 : 257299 : all_pathkeys = select_outer_pathkeys_for_merge(root,
1463 : : extra->mergeclause_list,
1464 : : joinrel);
1465 : :
7773 neilc@samurai.com 1466 [ + - + + : 544097 : foreach(l, all_pathkeys)
+ + ]
1467 : : {
1213 tgl@sss.pgh.pa.us 1468 : 286798 : PathKey *front_pathkey = (PathKey *) lfirst(l);
1469 : : List *cur_mergeclauses;
1470 : : List *outerkeys;
1471 : : List *innerkeys;
1472 : : List *merge_pathkeys;
1473 : :
1474 : : /* Make a pathkey list with this guy first */
7773 neilc@samurai.com 1475 [ + + ]: 286798 : if (l != list_head(all_pathkeys))
6804 tgl@sss.pgh.pa.us 1476 : 29499 : outerkeys = lcons(front_pathkey,
1477 : : list_delete_nth_cell(list_copy(all_pathkeys),
1478 : : foreach_current_index(l)));
1479 : : else
6505 bruce@momjian.us 1480 : 257299 : outerkeys = all_pathkeys; /* no work at first one... */
1481 : :
1482 : : /* Sort the mergeclauses into the corresponding ordering */
1483 : : cur_mergeclauses =
2752 tgl@sss.pgh.pa.us 1484 : 286798 : find_mergeclauses_for_outer_pathkeys(root,
1485 : : outerkeys,
1486 : : extra->mergeclause_list);
1487 : :
1488 : : /* Should have used them all... */
3772 1489 [ - + ]: 286798 : Assert(list_length(cur_mergeclauses) == list_length(extra->mergeclause_list));
1490 : :
1491 : : /* Build sort pathkeys for the inner side */
6804 1492 : 286798 : innerkeys = make_inner_pathkeys_for_merge(root,
1493 : : cur_mergeclauses,
1494 : : outerkeys);
1495 : :
1496 : : /* Build pathkeys representing output sort order */
7531 1497 : 286798 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1498 : : outerkeys);
1499 : :
1500 : : /*
1501 : : * And now we can make the path.
1502 : : *
1503 : : * Note: it's possible that the cheapest paths will already be sorted
1504 : : * properly. try_mergejoin_path will detect that case and suppress an
1505 : : * explicit sort step, so we needn't do so here.
1506 : : */
4971 1507 : 286798 : try_mergejoin_path(root,
1508 : : joinrel,
1509 : : outer_path,
1510 : : inner_path,
1511 : : merge_pathkeys,
1512 : : cur_mergeclauses,
1513 : : outerkeys,
1514 : : innerkeys,
1515 : : jointype,
1516 : : extra,
1517 : : false);
1518 : :
1519 : : /*
1520 : : * If we have partial outer and parallel safe inner path then try
1521 : : * partial mergejoin path.
1522 : : */
3105 rhaas@postgresql.org 1523 [ + + + + ]: 286798 : if (cheapest_partial_outer && cheapest_safe_inner)
1524 : 6746 : try_partial_mergejoin_path(root,
1525 : : joinrel,
1526 : : cheapest_partial_outer,
1527 : : cheapest_safe_inner,
1528 : : merge_pathkeys,
1529 : : cur_mergeclauses,
1530 : : outerkeys,
1531 : : innerkeys,
1532 : : jointype,
1533 : : extra);
1534 : : }
1535 : : }
1536 : :
1537 : : /*
1538 : : * generate_mergejoin_paths
1539 : : * Creates possible mergejoin paths for input outerpath.
1540 : : *
1541 : : * We generate mergejoins if mergejoin clauses are available. We have
1542 : : * two ways to generate the inner path for a mergejoin: sort the cheapest
1543 : : * inner path, or use an inner path that is already suitably ordered for the
1544 : : * merge. If we have several mergeclauses, it could be that there is no inner
1545 : : * path (or only a very expensive one) for the full list of mergeclauses, but
1546 : : * better paths exist if we truncate the mergeclause list (thereby discarding
1547 : : * some sort key requirements). So, we consider truncations of the
1548 : : * mergeclause list as well as the full list. (Ideally we'd consider all
1549 : : * subsets of the mergeclause list, but that seems way too expensive.)
1550 : : */
1551 : : static void
3181 1552 : 613557 : generate_mergejoin_paths(PlannerInfo *root,
1553 : : RelOptInfo *joinrel,
1554 : : RelOptInfo *innerrel,
1555 : : Path *outerpath,
1556 : : JoinType jointype,
1557 : : JoinPathExtraData *extra,
1558 : : bool useallclauses,
1559 : : Path *inner_cheapest_total,
1560 : : List *merge_pathkeys,
1561 : : bool is_partial)
1562 : : {
1563 : : List *mergeclauses;
1564 : : List *innersortkeys;
1565 : : List *trialsortkeys;
1566 : : Path *cheapest_startup_inner;
1567 : : Path *cheapest_total_inner;
1568 : : int num_sortkeys;
1569 : : int sortkeycnt;
1570 : :
1571 : : /* Look for useful mergeclauses (if any) */
1572 : : mergeclauses =
2752 tgl@sss.pgh.pa.us 1573 : 613557 : find_mergeclauses_for_outer_pathkeys(root,
1574 : : outerpath->pathkeys,
1575 : : extra->mergeclause_list);
1576 : :
1577 : : /*
1578 : : * Done with this outer path if no chance for a mergejoin.
1579 : : *
1580 : : * Special corner case: for "x FULL JOIN y ON true", there will be no join
1581 : : * clauses at all. Ordinarily we'd generate a clauseless nestloop path,
1582 : : * but since mergejoin is our only join type that supports FULL JOIN
1583 : : * without any join clauses, it's necessary to generate a clauseless
1584 : : * mergejoin path instead.
1585 : : */
3181 rhaas@postgresql.org 1586 [ + + ]: 613557 : if (mergeclauses == NIL)
1587 : : {
1588 [ + + ]: 408681 : if (jointype == JOIN_FULL)
1589 : : /* okay to try for mergejoin */ ;
1590 : : else
1591 : 406989 : return;
1592 : : }
1593 [ + + + + ]: 250421 : if (useallclauses &&
1594 : 43853 : list_length(mergeclauses) != list_length(extra->mergeclause_list))
1595 : 6784 : return;
1596 : :
1597 : : /* Compute the required ordering of the inner path */
1598 : 199784 : innersortkeys = make_inner_pathkeys_for_merge(root,
1599 : : mergeclauses,
1600 : : outerpath->pathkeys);
1601 : :
1602 : : /*
1603 : : * Generate a mergejoin on the basis of sorting the cheapest inner. Since
1604 : : * a sort will be needed, only cheapest total cost matters. (But
1605 : : * try_mergejoin_path will do the right thing if inner_cheapest_total is
1606 : : * already correctly sorted.)
1607 : : */
1608 : 199784 : try_mergejoin_path(root,
1609 : : joinrel,
1610 : : outerpath,
1611 : : inner_cheapest_total,
1612 : : merge_pathkeys,
1613 : : mergeclauses,
1614 : : NIL,
1615 : : innersortkeys,
1616 : : jointype,
1617 : : extra,
1618 : : is_partial);
1619 : :
1620 : : /*
1621 : : * Look for presorted inner paths that satisfy the innersortkey list ---
1622 : : * or any truncation thereof, if we are allowed to build a mergejoin using
1623 : : * a subset of the merge clauses. Here, we consider both cheap startup
1624 : : * cost and cheap total cost.
1625 : : *
1626 : : * Currently we do not consider parameterized inner paths here. This
1627 : : * interacts with decisions elsewhere that also discriminate against
1628 : : * mergejoins with parameterized inputs; see comments in
1629 : : * src/backend/optimizer/README.
1630 : : *
1631 : : * As we shorten the sortkey list, we should consider only paths that are
1632 : : * strictly cheaper than (in particular, not the same as) any path found
1633 : : * in an earlier iteration. Otherwise we'd be intentionally using fewer
1634 : : * merge keys than a given path allows (treating the rest as plain
1635 : : * joinquals), which is unlikely to be a good idea. Also, eliminating
1636 : : * paths here on the basis of compare_path_costs is a lot cheaper than
1637 : : * building the mergejoin path only to throw it away.
1638 : : *
1639 : : * If inner_cheapest_total is well enough sorted to have not required a
1640 : : * sort in the path made above, we shouldn't make a duplicate path with
1641 : : * it, either. We handle that case with the same logic that handles the
1642 : : * previous consideration, by initializing the variables that track
1643 : : * cheapest-so-far properly. Note that we do NOT reject
1644 : : * inner_cheapest_total if we find it matches some shorter set of
1645 : : * pathkeys. That case corresponds to using fewer mergekeys to avoid
1646 : : * sorting inner_cheapest_total, whereas we did sort it above, so the
1647 : : * plans being considered are different.
1648 : : */
1649 [ + + ]: 199784 : if (pathkeys_contained_in(innersortkeys,
1650 : : inner_cheapest_total->pathkeys))
1651 : : {
1652 : : /* inner_cheapest_total didn't require a sort */
1653 : 6676 : cheapest_startup_inner = inner_cheapest_total;
1654 : 6676 : cheapest_total_inner = inner_cheapest_total;
1655 : : }
1656 : : else
1657 : : {
1658 : : /* it did require a sort, at least for the full set of keys */
1659 : 193108 : cheapest_startup_inner = NULL;
1660 : 193108 : cheapest_total_inner = NULL;
1661 : : }
1662 : 199784 : num_sortkeys = list_length(innersortkeys);
1663 [ + + + + ]: 199784 : if (num_sortkeys > 1 && !useallclauses)
2999 tgl@sss.pgh.pa.us 1664 : 7802 : trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
1665 : : else
3181 rhaas@postgresql.org 1666 : 191982 : trialsortkeys = innersortkeys; /* won't really truncate */
1667 : :
1668 [ + + ]: 370369 : for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
1669 : : {
1670 : : Path *innerpath;
1671 : 207582 : List *newclauses = NIL;
1672 : :
1673 : : /*
1674 : : * Look for an inner path ordered well enough for the first
1675 : : * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
1676 : : * destructively, which is why we made a copy...
1677 : : */
1678 : 207582 : trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
1679 : 207582 : innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1680 : : trialsortkeys,
1681 : : NULL,
1682 : : TOTAL_COST,
1683 : : is_partial);
1684 [ + + + + ]: 207582 : if (innerpath != NULL &&
1685 [ + + ]: 11981 : (cheapest_total_inner == NULL ||
1686 : 11981 : compare_path_costs(innerpath, cheapest_total_inner,
1687 : : TOTAL_COST) < 0))
1688 : : {
1689 : : /* Found a cheap (or even-cheaper) sorted path */
1690 : : /* Select the right mergeclauses, if we didn't already */
1691 [ + + ]: 120260 : if (sortkeycnt < num_sortkeys)
1692 : : {
1693 : : newclauses =
2752 tgl@sss.pgh.pa.us 1694 : 2745 : trim_mergeclauses_for_inner_pathkeys(root,
1695 : : mergeclauses,
1696 : : trialsortkeys);
3181 rhaas@postgresql.org 1697 [ - + ]: 2745 : Assert(newclauses != NIL);
1698 : : }
1699 : : else
1700 : 117515 : newclauses = mergeclauses;
1701 : 120260 : try_mergejoin_path(root,
1702 : : joinrel,
1703 : : outerpath,
1704 : : innerpath,
1705 : : merge_pathkeys,
1706 : : newclauses,
1707 : : NIL,
1708 : : NIL,
1709 : : jointype,
1710 : : extra,
1711 : : is_partial);
1712 : 120260 : cheapest_total_inner = innerpath;
1713 : : }
1714 : : /* Same on the basis of cheapest startup cost ... */
1715 : 207582 : innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1716 : : trialsortkeys,
1717 : : NULL,
1718 : : STARTUP_COST,
1719 : : is_partial);
1720 [ + + + + ]: 207582 : if (innerpath != NULL &&
1721 [ + + ]: 11981 : (cheapest_startup_inner == NULL ||
1722 : 11981 : compare_path_costs(innerpath, cheapest_startup_inner,
1723 : : STARTUP_COST) < 0))
1724 : : {
1725 : : /* Found a cheap (or even-cheaper) sorted path */
1726 [ + + ]: 118959 : if (innerpath != cheapest_total_inner)
1727 : : {
1728 : : /*
1729 : : * Avoid rebuilding clause list if we already made one; saves
1730 : : * memory in big join trees...
1731 : : */
1732 [ + + ]: 1446 : if (newclauses == NIL)
1733 : : {
1734 [ - + ]: 41 : if (sortkeycnt < num_sortkeys)
1735 : : {
1736 : : newclauses =
2752 tgl@sss.pgh.pa.us 1737 :LBC (1) : trim_mergeclauses_for_inner_pathkeys(root,
1738 : : mergeclauses,
1739 : : trialsortkeys);
3181 rhaas@postgresql.org 1740 [ # # ]: (1) : Assert(newclauses != NIL);
1741 : : }
1742 : : else
3181 rhaas@postgresql.org 1743 :CBC 41 : newclauses = mergeclauses;
1744 : : }
1745 : 1446 : try_mergejoin_path(root,
1746 : : joinrel,
1747 : : outerpath,
1748 : : innerpath,
1749 : : merge_pathkeys,
1750 : : newclauses,
1751 : : NIL,
1752 : : NIL,
1753 : : jointype,
1754 : : extra,
1755 : : is_partial);
1756 : : }
1757 : 118959 : cheapest_startup_inner = innerpath;
1758 : : }
1759 : :
1760 : : /*
1761 : : * Don't consider truncated sortkeys if we need all clauses.
1762 : : */
1763 [ + + ]: 207582 : if (useallclauses)
1764 : 36997 : break;
1765 : : }
1766 : : }
1767 : :
1768 : : /*
1769 : : * match_unsorted_outer
1770 : : * Creates possible join paths for processing a single join relation
1771 : : * 'joinrel' by employing either iterative substitution or
1772 : : * mergejoining on each of its possible outer paths (considering
1773 : : * only outer paths that are already ordered well enough for merging).
1774 : : *
1775 : : * We always generate a nestloop path for each available outer path.
1776 : : * In fact we may generate as many as five: one on the cheapest-total-cost
1777 : : * inner path, one on the same with materialization, one on the
1778 : : * cheapest-startup-cost inner path (if different), one on the
1779 : : * cheapest-total inner-indexscan path (if any), and one on the
1780 : : * cheapest-startup inner-indexscan path (if different).
1781 : : *
1782 : : * We also consider mergejoins if mergejoin clauses are available. See
1783 : : * detailed comments in generate_mergejoin_paths.
1784 : : *
1785 : : * 'joinrel' is the join relation
1786 : : * 'outerrel' is the outer join relation
1787 : : * 'innerrel' is the inner join relation
1788 : : * 'jointype' is the type of join to do
1789 : : * 'extra' contains additional input values
1790 : : */
1791 : : static void
7398 tgl@sss.pgh.pa.us 1792 : 315966 : match_unsorted_outer(PlannerInfo *root,
1793 : : RelOptInfo *joinrel,
1794 : : RelOptInfo *outerrel,
1795 : : RelOptInfo *innerrel,
1796 : : JoinType jointype,
1797 : : JoinPathExtraData *extra)
1798 : : {
1799 : : bool nestjoinOK;
1800 : : bool useallclauses;
8265 1801 : 315966 : Path *inner_cheapest_total = innerrel->cheapest_total_path;
8316 1802 : 315966 : Path *matpath = NULL;
1803 : : ListCell *lc1;
1804 : :
1805 : : /*
1806 : : * For now we do not support RIGHT_SEMI join in mergejoin or nestloop
1807 : : * join.
1808 : : */
428 rguo@postgresql.org 1809 [ + + ]: 315966 : if (jointype == JOIN_RIGHT_SEMI)
1810 : 51 : return;
1811 : :
1812 : : /*
1813 : : * Nestloop only supports inner, left, semi, and anti joins. Also, if we
1814 : : * are doing a right, right-anti or full mergejoin, we must use *all* the
1815 : : * mergeclauses as join clauses, else we will not have a valid plan.
1816 : : * (Although these two flags are currently inverses, keep them separate
1817 : : * for clarity and possible future changes.)
1818 : : */
9125 tgl@sss.pgh.pa.us 1819 [ + + - - ]: 315915 : switch (jointype)
1820 : : {
1821 : 268174 : case JOIN_INNER:
1822 : : case JOIN_LEFT:
1823 : : case JOIN_SEMI:
1824 : : case JOIN_ANTI:
1825 : 268174 : nestjoinOK = true;
8910 1826 : 268174 : useallclauses = false;
9125 1827 : 268174 : break;
8910 1828 : 47741 : case JOIN_RIGHT:
1829 : : case JOIN_RIGHT_ANTI:
1830 : : case JOIN_FULL:
9125 1831 : 47741 : nestjoinOK = false;
8910 1832 : 47741 : useallclauses = true;
1833 : 47741 : break;
8910 tgl@sss.pgh.pa.us 1834 :UBC 0 : default:
8079 1835 [ # # ]: 0 : elog(ERROR, "unrecognized join type: %d",
1836 : : (int) jointype);
1837 : : nestjoinOK = false; /* keep compiler quiet */
1838 : : useallclauses = false;
1839 : : break;
1840 : : }
1841 : :
1842 : : /*
1843 : : * If inner_cheapest_total is parameterized by the outer rel, ignore it;
1844 : : * we will consider it below as a member of cheapest_parameterized_paths,
1845 : : * but the other possibilities considered in this routine aren't usable.
1846 : : *
1847 : : * Furthermore, if the inner side is a unique-ified relation, we cannot
1848 : : * generate any valid paths here, because the inner rel's dependency on
1849 : : * the outer rel makes unique-ification meaningless.
1850 : : */
4756 tgl@sss.pgh.pa.us 1851 [ + + + - :CBC 315915 : if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
+ + + + +
- + + ]
1852 : : {
1853 : 6087 : inner_cheapest_total = NULL;
1854 : :
18 rguo@postgresql.org 1855 [ + + + + :GNC 6087 : if (RELATION_WAS_MADE_UNIQUE(innerrel, extra->sjinfo, jointype))
+ - ]
4778 tgl@sss.pgh.pa.us 1856 :CBC 18 : return;
1857 : : }
1858 : :
18 rguo@postgresql.org 1859 [ + + ]:GNC 315897 : if (nestjoinOK)
1860 : : {
1861 : : /*
1862 : : * Consider materializing the cheapest inner path, unless
1863 : : * enable_material is off or the path in question materializes its
1864 : : * output anyway.
1865 : : */
4778 tgl@sss.pgh.pa.us 1866 [ + + + + ]:CBC 268156 : if (enable_material && inner_cheapest_total != NULL &&
5619 rhaas@postgresql.org 1867 [ + + ]: 261846 : !ExecMaterializesOutput(inner_cheapest_total->pathtype))
1868 : : matpath = (Path *)
8265 tgl@sss.pgh.pa.us 1869 : 248929 : create_material_path(innerrel, inner_cheapest_total);
1870 : : }
1871 : :
4971 1872 [ + - + + : 1049842 : foreach(lc1, outerrel->pathlist)
+ + ]
1873 : : {
1874 : 733945 : Path *outerpath = (Path *) lfirst(lc1);
1875 : : List *merge_pathkeys;
1876 : :
1877 : : /*
1878 : : * We cannot use an outer path that is parameterized by the inner rel.
1879 : : */
4756 1880 [ + + + - : 733945 : if (PATH_PARAM_BY_REL(outerpath, innerrel))
+ + + + +
- + + ]
4971 1881 : 121409 : continue;
1882 : :
1883 : : /*
1884 : : * The result will have this sort order (even if it is implemented as
1885 : : * a nestloop, and even if some of the mergeclauses are implemented by
1886 : : * qpquals rather than as true mergeclauses):
1887 : : */
7531 1888 : 612536 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1889 : : outerpath->pathkeys);
1890 : :
18 rguo@postgresql.org 1891 [ + + ]:GNC 612536 : if (nestjoinOK)
1892 : : {
1893 : : /*
1894 : : * Consider nestloop joins using this outer path and various
1895 : : * available paths for the inner relation. We consider the
1896 : : * cheapest-total paths for each available parameterization of the
1897 : : * inner relation, including the unparameterized case.
1898 : : */
1899 : : ListCell *lc2;
1900 : :
4971 tgl@sss.pgh.pa.us 1901 [ + - + + :CBC 1368860 : foreach(lc2, innerrel->cheapest_parameterized_paths)
+ + ]
1902 : : {
1903 : 846815 : Path *innerpath = (Path *) lfirst(lc2);
1904 : : Path *mpath;
1905 : :
1906 : 846815 : try_nestloop_path(root,
1907 : : joinrel,
1908 : : outerpath,
1909 : : innerpath,
1910 : : merge_pathkeys,
1911 : : jointype,
1912 : : extra);
1913 : :
1914 : : /*
1915 : : * Try generating a memoize path and see if that makes the
1916 : : * nested loop any cheaper.
1917 : : */
1515 drowley@postgresql.o 1918 : 846815 : mpath = get_memoize_path(root, innerrel, outerrel,
1919 : : innerpath, outerpath, jointype,
1920 : : extra);
1921 [ + + ]: 846815 : if (mpath != NULL)
1618 1922 : 143838 : try_nestloop_path(root,
1923 : : joinrel,
1924 : : outerpath,
1925 : : mpath,
1926 : : merge_pathkeys,
1927 : : jointype,
1928 : : extra);
1929 : : }
1930 : :
1931 : : /* Also consider materialized form of the cheapest inner path */
8316 tgl@sss.pgh.pa.us 1932 [ + + ]: 522045 : if (matpath != NULL)
4971 1933 : 489992 : try_nestloop_path(root,
1934 : : joinrel,
1935 : : outerpath,
1936 : : matpath,
1937 : : merge_pathkeys,
1938 : : jointype,
1939 : : extra);
1940 : : }
1941 : :
1942 : : /* Can't do anything else if inner rel is parameterized by outer */
4756 1943 [ + + ]: 612536 : if (inner_cheapest_total == NULL)
4778 1944 : 6550 : continue;
1945 : :
1946 : : /* Generate merge join paths */
3181 rhaas@postgresql.org 1947 : 605986 : generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
1948 : : jointype, extra, useallclauses,
1949 : : inner_cheapest_total, merge_pathkeys,
1950 : : false);
1951 : : }
1952 : :
1953 : : /*
1954 : : * Consider partial nestloop and mergejoin plan if outerrel has any
1955 : : * partial path and the joinrel is parallel-safe. However, we can't
1956 : : * handle joins needing lateral rels, since partial paths must not be
1957 : : * parameterized. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT and
1958 : : * JOIN_RIGHT_ANTI, because they can produce false null extended rows.
1959 : : */
3105 1960 [ + + + + ]: 315897 : if (joinrel->consider_parallel &&
18 rguo@postgresql.org 1961 [ + + ]:GNC 264842 : jointype != JOIN_FULL &&
1962 [ + + ]: 223245 : jointype != JOIN_RIGHT &&
1963 : 222502 : jointype != JOIN_RIGHT_ANTI &&
3105 rhaas@postgresql.org 1964 [ + + ]:CBC 222502 : outerrel->partial_pathlist != NIL &&
3517 1965 [ + - ]: 5777 : bms_is_empty(joinrel->lateral_relids))
1966 : : {
3105 1967 [ + - ]: 5777 : if (nestjoinOK)
1968 : 5777 : consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
1969 : : jointype, extra);
1970 : :
1971 : : /*
1972 : : * If inner_cheapest_total is NULL or non parallel-safe then find the
1973 : : * cheapest total parallel safe path.
1974 : : */
1975 [ + + ]: 5777 : if (inner_cheapest_total == NULL ||
1976 [ + + ]: 5573 : !inner_cheapest_total->parallel_safe)
1977 : : {
1978 : : inner_cheapest_total =
18 rguo@postgresql.org 1979 :GNC 402 : get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
1980 : : }
1981 : :
3105 rhaas@postgresql.org 1982 [ + + ]:CBC 5777 : if (inner_cheapest_total)
1983 : 5561 : consider_parallel_mergejoin(root, joinrel, outerrel, innerrel,
1984 : : jointype, extra,
1985 : : inner_cheapest_total);
1986 : : }
1987 : : }
1988 : :
1989 : : /*
1990 : : * consider_parallel_mergejoin
1991 : : * Try to build partial paths for a joinrel by joining a partial path
1992 : : * for the outer relation to a complete path for the inner relation.
1993 : : *
1994 : : * 'joinrel' is the join relation
1995 : : * 'outerrel' is the outer join relation
1996 : : * 'innerrel' is the inner join relation
1997 : : * 'jointype' is the type of join to do
1998 : : * 'extra' contains additional input values
1999 : : * 'inner_cheapest_total' cheapest total path for innerrel
2000 : : */
2001 : : static void
2002 : 5561 : consider_parallel_mergejoin(PlannerInfo *root,
2003 : : RelOptInfo *joinrel,
2004 : : RelOptInfo *outerrel,
2005 : : RelOptInfo *innerrel,
2006 : : JoinType jointype,
2007 : : JoinPathExtraData *extra,
2008 : : Path *inner_cheapest_total)
2009 : : {
2010 : : ListCell *lc1;
2011 : :
2012 : : /* generate merge join path for each partial outer path */
2013 [ + - + + : 13132 : foreach(lc1, outerrel->partial_pathlist)
+ + ]
2014 : : {
2015 : 7571 : Path *outerpath = (Path *) lfirst(lc1);
2016 : : List *merge_pathkeys;
2017 : :
2018 : : /*
2019 : : * Figure out what useful ordering any paths we create will have.
2020 : : */
2021 : 7571 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
2022 : : outerpath->pathkeys);
2023 : :
2024 : 7571 : generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype,
2025 : : extra, false, inner_cheapest_total,
2026 : : merge_pathkeys, true);
2027 : : }
3517 2028 : 5561 : }
2029 : :
2030 : : /*
2031 : : * consider_parallel_nestloop
2032 : : * Try to build partial paths for a joinrel by joining a partial path for the
2033 : : * outer relation to a complete path for the inner relation.
2034 : : *
2035 : : * 'joinrel' is the join relation
2036 : : * 'outerrel' is the outer join relation
2037 : : * 'innerrel' is the inner join relation
2038 : : * 'jointype' is the type of join to do
2039 : : * 'extra' contains additional input values
2040 : : */
2041 : : static void
2042 : 5777 : consider_parallel_nestloop(PlannerInfo *root,
2043 : : RelOptInfo *joinrel,
2044 : : RelOptInfo *outerrel,
2045 : : RelOptInfo *innerrel,
2046 : : JoinType jointype,
2047 : : JoinPathExtraData *extra)
2048 : : {
421 rguo@postgresql.org 2049 : 5777 : Path *inner_cheapest_total = innerrel->cheapest_total_path;
2050 : 5777 : Path *matpath = NULL;
2051 : : ListCell *lc1;
2052 : :
2053 : : /*
2054 : : * Consider materializing the cheapest inner path, unless: 1)
2055 : : * enable_material is off, 2) the cheapest inner path is not
2056 : : * parallel-safe, 3) the cheapest inner path is parameterized by the outer
2057 : : * rel, or 4) the cheapest inner path materializes its output anyway.
2058 : : */
18 rguo@postgresql.org 2059 [ + + + + ]:GNC 5777 : if (enable_material && inner_cheapest_total->parallel_safe &&
421 rguo@postgresql.org 2060 [ + + + - :CBC 5562 : !PATH_PARAM_BY_REL(inner_cheapest_total, outerrel) &&
+ + + + +
- - + ]
2061 [ + + ]: 5367 : !ExecMaterializesOutput(inner_cheapest_total->pathtype))
2062 : : {
2063 : : matpath = (Path *)
2064 : 5319 : create_material_path(innerrel, inner_cheapest_total);
2065 [ - + ]: 5319 : Assert(matpath->parallel_safe);
2066 : : }
2067 : :
3517 rhaas@postgresql.org 2068 [ + - + + : 13633 : foreach(lc1, outerrel->partial_pathlist)
+ + ]
2069 : : {
2070 : 7856 : Path *outerpath = (Path *) lfirst(lc1);
2071 : : List *pathkeys;
2072 : : ListCell *lc2;
2073 : :
2074 : : /* Figure out what useful ordering any paths we create will have. */
2075 : 7856 : pathkeys = build_join_pathkeys(root, joinrel, jointype,
2076 : : outerpath->pathkeys);
2077 : :
2078 : : /*
2079 : : * Try the cheapest parameterized paths; only those which will produce
2080 : : * an unparameterized path when joined to this outerrel will survive
2081 : : * try_partial_nestloop_path. The cheapest unparameterized path is
2082 : : * also in this list.
2083 : : */
2084 [ + - + + : 20258 : foreach(lc2, innerrel->cheapest_parameterized_paths)
+ + ]
2085 : : {
2086 : 12402 : Path *innerpath = (Path *) lfirst(lc2);
2087 : : Path *mpath;
2088 : :
2089 : : /* Can't join to an inner path that is not parallel-safe */
2090 [ + + ]: 12402 : if (!innerpath->parallel_safe)
2091 : 225 : continue;
2092 : :
2093 : 12177 : try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
2094 : : pathkeys, jointype, extra);
2095 : :
2096 : : /*
2097 : : * Try generating a memoize path and see if that makes the nested
2098 : : * loop any cheaper.
2099 : : */
1515 drowley@postgresql.o 2100 : 12177 : mpath = get_memoize_path(root, innerrel, outerrel,
2101 : : innerpath, outerpath, jointype,
2102 : : extra);
2103 [ + + ]: 12177 : if (mpath != NULL)
2104 : 3061 : try_partial_nestloop_path(root, joinrel, outerpath, mpath,
2105 : : pathkeys, jointype, extra);
2106 : : }
2107 : :
2108 : : /* Also consider materialized form of the cheapest inner path */
421 rguo@postgresql.org 2109 [ + + ]: 7856 : if (matpath != NULL)
2110 : 7317 : try_partial_nestloop_path(root, joinrel, outerpath, matpath,
2111 : : pathkeys, jointype, extra);
2112 : : }
10651 scrappy@hub.org 2113 : 5777 : }
2114 : :
2115 : : /*
2116 : : * hash_inner_and_outer
2117 : : * Create hashjoin join paths by explicitly hashing both the outer and
2118 : : * inner keys of each available hash clause.
2119 : : *
2120 : : * 'joinrel' is the join relation
2121 : : * 'outerrel' is the outer join relation
2122 : : * 'innerrel' is the inner join relation
2123 : : * 'jointype' is the type of join to do
2124 : : * 'extra' contains additional input values
2125 : : */
2126 : : static void
7398 tgl@sss.pgh.pa.us 2127 : 321222 : hash_inner_and_outer(PlannerInfo *root,
2128 : : RelOptInfo *joinrel,
2129 : : RelOptInfo *outerrel,
2130 : : RelOptInfo *innerrel,
2131 : : JoinType jointype,
2132 : : JoinPathExtraData *extra)
2133 : : {
5364 2134 : 321222 : bool isouterjoin = IS_OUTER_JOIN(jointype);
2135 : : List *hashclauses;
2136 : : ListCell *l;
2137 : :
2138 : : /*
2139 : : * We need to build only one hashclauses list for any given pair of outer
2140 : : * and inner relations; all of the hashable clauses will be used as keys.
2141 : : *
2142 : : * Scan the join's restrictinfo list to find hashjoinable clauses that are
2143 : : * usable with this pair of sub-relations.
2144 : : */
8316 2145 : 321222 : hashclauses = NIL;
3772 2146 [ + + + + : 667368 : foreach(l, extra->restrictlist)
+ + ]
2147 : : {
7773 neilc@samurai.com 2148 : 346146 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
2149 : :
2150 : : /*
2151 : : * If processing an outer join, only use its own join clauses for
2152 : : * hashing. For inner joins we need not be so picky.
2153 : : */
2696 tgl@sss.pgh.pa.us 2154 [ + + + + : 346146 : if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
- + ]
9125 2155 : 5146 : continue;
2156 : :
5832 2157 [ + + ]: 341000 : if (!restrictinfo->can_join ||
2158 [ + + ]: 303036 : restrictinfo->hashjoinoperator == InvalidOid)
2159 : 47068 : continue; /* not hashjoinable */
2160 : :
2161 : : /*
2162 : : * Check if clause has the form "outer op inner" or "inner op outer".
2163 : : */
326 drowley@postgresql.o 2164 [ + + ]: 293932 : if (!clause_sides_match_join(restrictinfo, outerrel->relids,
2165 : : innerrel->relids))
9343 tgl@sss.pgh.pa.us 2166 : 108 : continue; /* no good for these input relations */
2167 : :
2168 : : /*
2169 : : * If clause has the form "inner op outer", check if its operator has
2170 : : * valid commutator. This is necessary because hashclauses in this
2171 : : * form will get commuted in createplan.c to put the outer var on the
2172 : : * left (see get_switched_clauses). This probably shouldn't ever
2173 : : * fail, since hashable operators ought to have commutators, but be
2174 : : * paranoid.
2175 : : *
2176 : : * The clause being hashjoinable indicates that it's an OpExpr.
2177 : : */
367 rguo@postgresql.org 2178 [ + + + + ]: 440736 : if (!restrictinfo->outer_is_left &&
2179 : 146912 : !OidIsValid(get_commutator(castNode(OpExpr, restrictinfo->clause)->opno)))
2180 : 3 : continue;
2181 : :
8316 tgl@sss.pgh.pa.us 2182 : 293821 : hashclauses = lappend(hashclauses, restrictinfo);
2183 : : }
2184 : :
2185 : : /* If we found any usable hashclauses, make paths */
2186 [ + + ]: 321222 : if (hashclauses)
2187 : : {
2188 : : /*
2189 : : * We consider both the cheapest-total-cost and cheapest-startup-cost
2190 : : * outer paths. There's no need to consider any but the
2191 : : * cheapest-total-cost inner path, however.
2192 : : */
8069 bruce@momjian.us 2193 : 263913 : Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
2194 : 263913 : Path *cheapest_total_outer = outerrel->cheapest_total_path;
2195 : 263913 : Path *cheapest_total_inner = innerrel->cheapest_total_path;
2196 : : ListCell *lc1;
2197 : : ListCell *lc2;
2198 : :
2199 : : /*
2200 : : * If either cheapest-total path is parameterized by the other rel, we
2201 : : * can't use a hashjoin. (There's no use looking for alternative
2202 : : * input paths, since these should already be the least-parameterized
2203 : : * available paths.)
2204 : : */
4756 tgl@sss.pgh.pa.us 2205 [ + + + - : 263913 : if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
+ + + + +
- + + ]
2206 [ + + + - : 263461 : PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
+ + + + +
- + + ]
4778 2207 : 904 : return;
2208 : :
2209 : : /*
2210 : : * Consider the cheapest startup outer together with the cheapest
2211 : : * total inner, and then consider pairings of cheapest-total paths
2212 : : * including parameterized ones. There is no use in generating
2213 : : * parameterized paths on the basis of possibly cheap startup cost, so
2214 : : * this is sufficient.
2215 : : */
18 rguo@postgresql.org 2216 [ + + ]:GNC 263009 : if (cheapest_startup_outer != NULL)
4971 tgl@sss.pgh.pa.us 2217 :CBC 262652 : try_hashjoin_path(root,
2218 : : joinrel,
2219 : : cheapest_startup_outer,
2220 : : cheapest_total_inner,
2221 : : hashclauses,
2222 : : jointype,
2223 : : extra);
2224 : :
18 rguo@postgresql.org 2225 [ + - + + :GNC 670644 : foreach(lc1, outerrel->cheapest_parameterized_paths)
+ + ]
2226 : : {
2227 : 407635 : Path *outerpath = (Path *) lfirst(lc1);
2228 : :
2229 : : /*
2230 : : * We cannot use an outer path that is parameterized by the inner
2231 : : * rel.
2232 : : */
2233 [ + + + - : 407635 : if (PATH_PARAM_BY_REL(outerpath, innerrel))
+ + + + +
- + + ]
2234 : 117927 : continue;
2235 : :
2236 [ + - + + : 746816 : foreach(lc2, innerrel->cheapest_parameterized_paths)
+ + ]
2237 : : {
2238 : 457108 : Path *innerpath = (Path *) lfirst(lc2);
2239 : :
2240 : : /*
2241 : : * We cannot use an inner path that is parameterized by the
2242 : : * outer rel, either.
2243 : : */
2244 [ + + + - : 457108 : if (PATH_PARAM_BY_REL(innerpath, outerrel))
+ + + + +
- + + ]
4971 tgl@sss.pgh.pa.us 2245 :CBC 133591 : continue;
2246 : :
18 rguo@postgresql.org 2247 [ + + + + ]:GNC 323517 : if (outerpath == cheapest_startup_outer &&
2248 : : innerpath == cheapest_total_inner)
2249 : 221919 : continue; /* already tried it */
2250 : :
2251 : 101598 : try_hashjoin_path(root,
2252 : : joinrel,
2253 : : outerpath,
2254 : : innerpath,
2255 : : hashclauses,
2256 : : jointype,
2257 : : extra);
2258 : : }
2259 : : }
2260 : :
2261 : : /*
2262 : : * If the joinrel is parallel-safe, we may be able to consider a
2263 : : * partial hash join. However, the resulting path must not be
2264 : : * parameterized.
2265 : : */
3426 rhaas@postgresql.org 2266 [ + + + + ]:CBC 263009 : if (joinrel->consider_parallel &&
3517 2267 [ + + ]: 234098 : outerrel->partial_pathlist != NIL &&
2268 [ + - ]: 8685 : bms_is_empty(joinrel->lateral_relids))
2269 : : {
2270 : : Path *cheapest_partial_outer;
2817 andres@anarazel.de 2271 : 8685 : Path *cheapest_partial_inner = NULL;
3440 tgl@sss.pgh.pa.us 2272 : 8685 : Path *cheapest_safe_inner = NULL;
2273 : :
3517 rhaas@postgresql.org 2274 : 8685 : cheapest_partial_outer =
2275 : 8685 : (Path *) linitial(outerrel->partial_pathlist);
2276 : :
2277 : : /*
2278 : : * Can we use a partial inner plan too, so that we can build a
2279 : : * shared hash table in parallel?
2280 : : */
2272 tmunro@postgresql.or 2281 [ + + + + ]: 8685 : if (innerrel->partial_pathlist != NIL &&
2282 : : enable_parallel_hash)
2283 : : {
2817 andres@anarazel.de 2284 : 6372 : cheapest_partial_inner =
2285 : 6372 : (Path *) linitial(innerrel->partial_pathlist);
2286 : 6372 : try_partial_hashjoin_path(root, joinrel,
2287 : : cheapest_partial_outer,
2288 : : cheapest_partial_inner,
2289 : : hashclauses, jointype, extra,
2290 : : true /* parallel_hash */ );
2291 : : }
2292 : :
2293 : : /*
2294 : : * Normally, given that the joinrel is parallel-safe, the cheapest
2295 : : * total inner path will also be parallel-safe, but if not, we'll
2296 : : * have to search for the cheapest safe, unparameterized inner
2297 : : * path. If full, right, right-semi or right-anti join, we can't
2298 : : * use parallelism (building the hash table in each backend)
2299 : : * because no one process has all the match bits.
2300 : : */
18 rguo@postgresql.org 2301 [ + + + + ]:GNC 8685 : if (jointype == JOIN_FULL ||
2302 [ + + ]: 6777 : jointype == JOIN_RIGHT ||
2303 [ + + ]: 4984 : jointype == JOIN_RIGHT_SEMI ||
2304 : : jointype == JOIN_RIGHT_ANTI)
890 tmunro@postgresql.or 2305 :CBC 3866 : cheapest_safe_inner = NULL;
2306 [ + + ]: 4819 : else if (cheapest_total_inner->parallel_safe)
3517 rhaas@postgresql.org 2307 : 4693 : cheapest_safe_inner = cheapest_total_inner;
2308 : : else
2309 : : cheapest_safe_inner =
3105 2310 : 126 : get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
2311 : :
3517 2312 [ + + ]: 8685 : if (cheapest_safe_inner != NULL)
2313 : 4807 : try_partial_hashjoin_path(root, joinrel,
2314 : : cheapest_partial_outer,
2315 : : cheapest_safe_inner,
2316 : : hashclauses, jointype, extra,
2317 : : false /* parallel_hash */ );
2318 : : }
2319 : : }
2320 : : }
2321 : :
2322 : : /*
2323 : : * select_mergejoin_clauses
2324 : : * Select mergejoin clauses that are usable for a particular join.
2325 : : * Returns a list of RestrictInfo nodes for those clauses.
2326 : : *
2327 : : * *mergejoin_allowed is normally set to true, but it is set to false if
2328 : : * this is a right-semi join, or this is a right/right-anti/full join and
2329 : : * there are nonmergejoinable join clauses. The executor's mergejoin
2330 : : * machinery cannot handle such cases, so we have to avoid generating a
2331 : : * mergejoin plan. (Note that this flag does NOT consider whether there are
2332 : : * actually any mergejoinable clauses. This is correct because in some
2333 : : * cases we need to build a clauseless mergejoin. Simply returning NIL is
2334 : : * therefore not enough to distinguish safe from unsafe cases.)
2335 : : *
2336 : : * We also mark each selected RestrictInfo to show which side is currently
2337 : : * being considered as outer. These are transient markings that are only
2338 : : * good for the duration of the current add_paths_to_joinrel() call!
2339 : : *
2340 : : * We examine each restrictinfo clause known for the join to see
2341 : : * if it is mergejoinable and involves vars from the two sub-relations
2342 : : * currently of interest.
2343 : : */
2344 : : static List *
6450 tgl@sss.pgh.pa.us 2345 : 322090 : select_mergejoin_clauses(PlannerInfo *root,
2346 : : RelOptInfo *joinrel,
2347 : : RelOptInfo *outerrel,
2348 : : RelOptInfo *innerrel,
2349 : : List *restrictlist,
2350 : : JoinType jointype,
2351 : : bool *mergejoin_allowed)
2352 : : {
9518 2353 : 322090 : List *result_list = NIL;
9125 2354 : 322090 : bool isouterjoin = IS_OUTER_JOIN(jointype);
5363 2355 : 322090 : bool have_nonmergeable_joinclause = false;
2356 : : ListCell *l;
2357 : :
2358 : : /*
2359 : : * For now we do not support RIGHT_SEMI join in mergejoin: the benefit of
2360 : : * swapping inputs tends to be small here.
2361 : : */
428 rguo@postgresql.org 2362 [ + + ]: 322090 : if (jointype == JOIN_RIGHT_SEMI)
2363 : : {
2364 : 3706 : *mergejoin_allowed = false;
2365 : 3706 : return NIL;
2366 : : }
2367 : :
7773 neilc@samurai.com 2368 [ + + + + : 662332 : foreach(l, restrictlist)
+ + ]
2369 : : {
2370 : 343948 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
2371 : :
2372 : : /*
2373 : : * If processing an outer join, only use its own join clauses in the
2374 : : * merge. For inner joins we can use pushed-down clauses too. (Note:
2375 : : * we don't set have_nonmergeable_joinclause here because pushed-down
2376 : : * clauses will become otherquals not joinquals.)
2377 : : */
2696 tgl@sss.pgh.pa.us 2378 [ + + + + : 343948 : if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
- + ]
7256 2379 : 5158 : continue;
2380 : :
2381 : : /* Check that clause is a mergeable operator clause */
7915 2382 [ + + ]: 338790 : if (!restrictinfo->can_join ||
6804 2383 [ + + ]: 300835 : restrictinfo->mergeopfamilies == NIL)
2384 : : {
2385 : : /*
2386 : : * The executor can handle extra joinquals that are constants, but
2387 : : * not anything else, when doing right/right-anti/full merge join.
2388 : : * (The reason to support constants is so we can do FULL JOIN ON
2389 : : * FALSE.)
2390 : : */
5723 2391 [ + - + + ]: 46326 : if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
5363 2392 : 41232 : have_nonmergeable_joinclause = true;
9343 2393 : 46326 : continue; /* not mergejoinable */
2394 : : }
2395 : :
2396 : : /*
2397 : : * Check if clause has the form "outer op inner" or "inner op outer".
2398 : : */
326 drowley@postgresql.o 2399 [ + + ]: 292464 : if (!clause_sides_match_join(restrictinfo, outerrel->relids,
2400 : : innerrel->relids))
2401 : : {
5363 tgl@sss.pgh.pa.us 2402 : 504 : have_nonmergeable_joinclause = true;
8270 2403 : 504 : continue; /* no good for these input relations */
2404 : : }
2405 : :
2406 : : /*
2407 : : * If clause has the form "inner op outer", check if its operator has
2408 : : * valid commutator. This is necessary because mergejoin clauses in
2409 : : * this form will get commuted in createplan.c to put the outer var on
2410 : : * the left (see get_switched_clauses). This probably shouldn't ever
2411 : : * fail, since mergejoinable operators ought to have commutators, but
2412 : : * be paranoid.
2413 : : *
2414 : : * The clause being mergejoinable indicates that it's an OpExpr.
2415 : : */
367 rguo@postgresql.org 2416 [ + + + + ]: 436501 : if (!restrictinfo->outer_is_left &&
2417 : 144541 : !OidIsValid(get_commutator(castNode(OpExpr, restrictinfo->clause)->opno)))
2418 : : {
2419 : 18 : have_nonmergeable_joinclause = true;
2420 : 18 : continue;
2421 : : }
2422 : :
2423 : : /*
2424 : : * Insist that each side have a non-redundant eclass. This
2425 : : * restriction is needed because various bits of the planner expect
2426 : : * that each clause in a merge be associable with some pathkey in a
2427 : : * canonical pathkey list, but redundant eclasses can't appear in
2428 : : * canonical sort orderings. (XXX it might be worth relaxing this,
2429 : : * but not enough time to address it for 8.3.)
2430 : : */
5426 tgl@sss.pgh.pa.us 2431 : 291942 : update_mergeclause_eclasses(root, restrictinfo);
2432 : :
6450 2433 [ + + ]: 291942 : if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
2434 [ + + ]: 291918 : EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
2435 : : {
5363 2436 : 64 : have_nonmergeable_joinclause = true;
6450 2437 : 64 : continue; /* can't handle redundant eclasses */
2438 : : }
2439 : :
6804 2440 : 291878 : result_list = lappend(result_list, restrictinfo);
2441 : : }
2442 : :
2443 : : /*
2444 : : * Report whether mergejoin is allowed (see comment at top of function).
2445 : : */
5364 2446 [ + + ]: 318384 : switch (jointype)
2447 : : {
2448 : 51849 : case JOIN_RIGHT:
2449 : : case JOIN_RIGHT_ANTI:
2450 : : case JOIN_FULL:
5363 2451 : 51849 : *mergejoin_allowed = !have_nonmergeable_joinclause;
5364 2452 : 51849 : break;
2453 : 266535 : default:
5363 2454 : 266535 : *mergejoin_allowed = true;
5364 2455 : 266535 : break;
2456 : : }
2457 : :
9518 2458 : 318384 : return result_list;
2459 : : }
|