Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * clausesel.c
4 : : * Routines to compute clause selectivities
5 : : *
6 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/optimizer/path/clausesel.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : #include "postgres.h"
16 : :
17 : : #include "nodes/nodeFuncs.h"
18 : : #include "optimizer/clauses.h"
19 : : #include "optimizer/optimizer.h"
20 : : #include "optimizer/pathnode.h"
21 : : #include "optimizer/plancat.h"
22 : : #include "statistics/statistics.h"
23 : : #include "utils/fmgroids.h"
24 : : #include "utils/lsyscache.h"
25 : : #include "utils/selfuncs.h"
26 : :
27 : : /*
28 : : * Data structure for accumulating info about possible range-query
29 : : * clause pairs in clauselist_selectivity.
30 : : */
31 : : typedef struct RangeQueryClause
32 : : {
33 : : struct RangeQueryClause *next; /* next in linked list */
34 : : Node *var; /* The common variable of the clauses */
35 : : bool have_lobound; /* found a low-bound clause yet? */
36 : : bool have_hibound; /* found a high-bound clause yet? */
37 : : Selectivity lobound; /* Selectivity of a var > something clause */
38 : : Selectivity hibound; /* Selectivity of a var < something clause */
39 : : } RangeQueryClause;
40 : :
41 : : static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
42 : : bool varonleft, bool isLTsel, Selectivity s2);
43 : : static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
44 : : List *clauses);
45 : : static Selectivity clauselist_selectivity_or(PlannerInfo *root,
46 : : List *clauses,
47 : : int varRelid,
48 : : JoinType jointype,
49 : : SpecialJoinInfo *sjinfo,
50 : : bool use_extended_stats);
51 : :
52 : : /****************************************************************************
53 : : * ROUTINES TO COMPUTE SELECTIVITIES
54 : : ****************************************************************************/
55 : :
56 : : /*
57 : : * clauselist_selectivity -
58 : : * Compute the selectivity of an implicitly-ANDed list of boolean
59 : : * expression clauses. The list can be empty, in which case 1.0
60 : : * must be returned. List elements may be either RestrictInfos
61 : : * or bare expression clauses --- the former is preferred since
62 : : * it allows caching of results.
63 : : *
64 : : * See clause_selectivity() for the meaning of the additional parameters.
65 : : *
66 : : * The basic approach is to apply extended statistics first, on as many
67 : : * clauses as possible, in order to capture cross-column dependencies etc.
68 : : * The remaining clauses are then estimated by taking the product of their
69 : : * selectivities, but that's only right if they have independent
70 : : * probabilities, and in reality they are often NOT independent even if they
71 : : * only refer to a single column. So, we want to be smarter where we can.
72 : : *
73 : : * We also recognize "range queries", such as "x > 34 AND x < 42". Clauses
74 : : * are recognized as possible range query components if they are restriction
75 : : * opclauses whose operators have scalarltsel or a related function as their
76 : : * restriction selectivity estimator. We pair up clauses of this form that
77 : : * refer to the same variable. An unpairable clause of this kind is simply
78 : : * multiplied into the selectivity product in the normal way. But when we
79 : : * find a pair, we know that the selectivities represent the relative
80 : : * positions of the low and high bounds within the column's range, so instead
81 : : * of figuring the selectivity as hisel * losel, we can figure it as hisel +
82 : : * losel - 1. (To visualize this, see that hisel is the fraction of the range
83 : : * below the high bound, while losel is the fraction above the low bound; so
84 : : * hisel can be interpreted directly as a 0..1 value but we need to convert
85 : : * losel to 1-losel before interpreting it as a value. Then the available
86 : : * range is 1-losel to hisel. However, this calculation double-excludes
87 : : * nulls, so really we need hisel + losel + null_frac - 1.)
88 : : *
89 : : * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
90 : : * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation
91 : : * yields an impossible (negative) result.
92 : : *
93 : : * A free side-effect is that we can recognize redundant inequalities such
94 : : * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
95 : : *
96 : : * Of course this is all very dependent on the behavior of the inequality
97 : : * selectivity functions; perhaps some day we can generalize the approach.
98 : : */
99 : : Selectivity
1979 dean.a.rasheed@gmail 100 :CBC 2383564 : clauselist_selectivity(PlannerInfo *root,
101 : : List *clauses,
102 : : int varRelid,
103 : : JoinType jointype,
104 : : SpecialJoinInfo *sjinfo)
105 : : {
106 : 2383564 : return clauselist_selectivity_ext(root, clauses, varRelid,
107 : : jointype, sjinfo, true);
108 : : }
109 : :
110 : : /*
111 : : * clauselist_selectivity_ext -
112 : : * Extended version of clauselist_selectivity(). If "use_extended_stats"
113 : : * is false, all extended statistics will be ignored, and only per-column
114 : : * statistics will be used.
115 : : */
116 : : Selectivity
117 : 2390023 : clauselist_selectivity_ext(PlannerInfo *root,
118 : : List *clauses,
119 : : int varRelid,
120 : : JoinType jointype,
121 : : SpecialJoinInfo *sjinfo,
122 : : bool use_extended_stats)
123 : : {
9519 bruce@momjian.us 124 : 2390023 : Selectivity s1 = 1.0;
125 : : RelOptInfo *rel;
1979 dean.a.rasheed@gmail 126 : 2390023 : Bitmapset *estimatedclauses = NULL;
9519 bruce@momjian.us 127 : 2390023 : RangeQueryClause *rqlist = NULL;
128 : : ListCell *l;
129 : : int listidx;
130 : :
131 : : /*
132 : : * If there's exactly one clause, just go directly to
133 : : * clause_selectivity_ext(). None of what we might do below is relevant.
134 : : */
1979 dean.a.rasheed@gmail 135 [ + + ]: 2390023 : if (list_length(clauses) == 1)
136 : 1186495 : return clause_selectivity_ext(root, (Node *) linitial(clauses),
137 : : varRelid, jointype, sjinfo,
138 : : use_extended_stats);
139 : :
140 : : /*
141 : : * Determine if these clauses reference a single relation. If so, and if
142 : : * it has extended statistics, try to apply those.
143 : : */
144 : 1203528 : rel = find_single_rel_for_clauses(root, clauses);
145 [ + + + + : 1203528 : if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
+ + + + ]
146 : : {
147 : : /*
148 : : * Estimate as many clauses as possible using extended statistics.
149 : : *
150 : : * 'estimatedclauses' is populated with the 0-based list position
151 : : * index of clauses estimated here, and that should be ignored below.
152 : : */
153 : 2040 : s1 = statext_clauselist_selectivity(root, clauses, varRelid,
154 : : jointype, sjinfo, rel,
155 : : &estimatedclauses, false);
156 : : }
157 : :
158 : : /*
159 : : * Apply normal selectivity estimates for remaining clauses. We'll be
160 : : * careful to skip any clauses which were already estimated above.
161 : : *
162 : : * Anything that doesn't look like a potential rangequery clause gets
163 : : * multiplied into s1 and forgotten. Anything that does gets inserted into
164 : : * an rqlist entry.
165 : : */
3317 simon@2ndQuadrant.co 166 : 1203528 : listidx = -1;
8014 neilc@samurai.com 167 [ + + + + : 1952200 : foreach(l, clauses)
+ + ]
168 : : {
169 : 748672 : Node *clause = (Node *) lfirst(l);
170 : : RestrictInfo *rinfo;
171 : : Selectivity s2;
172 : :
3317 simon@2ndQuadrant.co 173 : 748672 : listidx++;
174 : :
175 : : /*
176 : : * Skip this clause if it's already been estimated by some other
177 : : * statistics above.
178 : : */
179 [ + + ]: 748672 : if (bms_is_member(listidx, estimatedclauses))
180 : 3905 : continue;
181 : :
182 : : /* Compute the selectivity of this clause in isolation */
1979 dean.a.rasheed@gmail 183 : 744767 : s2 = clause_selectivity_ext(root, clause, varRelid, jointype, sjinfo,
184 : : use_extended_stats);
185 : :
186 : : /*
187 : : * Check for being passed a RestrictInfo.
188 : : *
189 : : * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
190 : : * 0.0; just use that rather than looking for range pairs.
191 : : */
8157 tgl@sss.pgh.pa.us 192 [ + + ]: 744767 : if (IsA(clause, RestrictInfo))
193 : : {
194 : 743757 : rinfo = (RestrictInfo *) clause;
7248 195 [ + + ]: 743757 : if (rinfo->pseudoconstant)
196 : : {
197 : 21430 : s1 = s1 * s2;
198 : 21430 : continue;
199 : : }
8157 200 : 722327 : clause = (Node *) rinfo->clause;
201 : : }
202 : : else
203 : 1010 : rinfo = NULL;
204 : :
205 : : /*
206 : : * See if it looks like a restriction clause with a pseudoconstant on
207 : : * one side. (Anything more complicated than that might not behave in
208 : : * the simple way we are expecting.) Most of the tests here can be
209 : : * done more efficiently with rinfo than without.
210 : : */
8010 neilc@samurai.com 211 [ + + + - ]: 723337 : if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
212 : : {
8545 tgl@sss.pgh.pa.us 213 : 623839 : OpExpr *expr = (OpExpr *) clause;
8157 214 : 623839 : bool varonleft = true;
215 : : bool ok;
216 : :
217 [ + + ]: 623839 : if (rinfo)
218 : : {
1191 219 [ + + + + ]: 975495 : ok = (rinfo->num_base_rels == 1) &&
8157 220 : 346732 : (is_pseudo_constant_clause_relids(lsecond(expr->args),
7507 bruce@momjian.us 221 [ + + ]: 5926 : rinfo->right_relids) ||
8157 tgl@sss.pgh.pa.us 222 : 5926 : (varonleft = false,
7507 bruce@momjian.us 223 : 5926 : is_pseudo_constant_clause_relids(linitial(expr->args),
224 : : rinfo->left_relids)));
225 : : }
226 : : else
227 : : {
1930 tgl@sss.pgh.pa.us 228 [ + + - + ]: 1944 : ok = (NumRelids(root, clause) == 1) &&
8157 229 [ - - ]: 942 : (is_pseudo_constant_clause(lsecond(expr->args)) ||
8157 tgl@sss.pgh.pa.us 230 :UBC 0 : (varonleft = false,
8014 neilc@samurai.com 231 : 0 : is_pseudo_constant_clause(linitial(expr->args))));
232 : : }
233 : :
8157 tgl@sss.pgh.pa.us 234 [ + + ]:CBC 623839 : if (ok)
235 : : {
236 : : /*
237 : : * If it's not a "<"/"<="/">"/">=" operator, just merge the
238 : : * selectivity in generically. But if it's the right oprrest,
239 : : * add the clause to rqlist for later processing.
240 : : */
241 [ + + + ]: 347137 : switch (get_oprrest(expr->opno))
242 : : {
243 : 12208 : case F_SCALARLTSEL:
244 : : case F_SCALARLESEL:
245 : 12208 : addRangeClause(&rqlist, clause,
246 : : varonleft, true, s2);
247 : 12208 : break;
248 : 32126 : case F_SCALARGTSEL:
249 : : case F_SCALARGESEL:
250 : 32126 : addRangeClause(&rqlist, clause,
251 : : varonleft, false, s2);
252 : 32126 : break;
253 : 302803 : default:
254 : : /* Just merge the selectivity in generically */
255 : 302803 : s1 = s1 * s2;
256 : 302803 : break;
257 : : }
258 : 347137 : continue; /* drop to loop bottom */
259 : : }
260 : : }
261 : :
262 : : /* Not the right form, so treat it generically. */
9613 263 : 376200 : s1 = s1 * s2;
264 : : }
265 : :
266 : : /*
267 : : * Now scan the rangequery pair list.
268 : : */
9598 269 [ + + ]: 1239367 : while (rqlist != NULL)
270 : : {
271 : : RangeQueryClause *rqnext;
272 : :
273 [ + + + + ]: 35839 : if (rqlist->have_lobound && rqlist->have_hibound)
274 : 7720 : {
275 : : /* Successfully matched a pair of range clauses */
276 : : Selectivity s2;
277 : :
278 : : /*
279 : : * Exact equality to the default value probably means the
280 : : * selectivity function punted. This is not airtight but should
281 : : * be good enough.
282 : : */
7847 283 [ + + ]: 7720 : if (rqlist->hibound == DEFAULT_INEQ_SEL ||
284 [ - + ]: 5686 : rqlist->lobound == DEFAULT_INEQ_SEL)
285 : : {
286 : 2034 : s2 = DEFAULT_RANGE_INEQ_SEL;
287 : : }
288 : : else
289 : : {
290 : 5686 : s2 = rqlist->hibound + rqlist->lobound - 1.0;
291 : :
292 : : /* Adjust for double-exclusion of NULLs */
6822 293 : 5686 : s2 += nulltestsel(root, IS_NULL, rqlist->var,
294 : : varRelid, jointype, sjinfo);
295 : :
296 : : /*
297 : : * A zero or slightly negative s2 should be converted into a
298 : : * small positive value; we probably are dealing with a very
299 : : * tight range and got a bogus result due to roundoff errors.
300 : : * However, if s2 is very negative, then we probably have
301 : : * default selectivity estimates on one or both sides of the
302 : : * range that we failed to recognize above for some reason.
303 : : */
7847 304 [ + + ]: 5686 : if (s2 <= 0.0)
305 : : {
306 [ + + ]: 687 : if (s2 < -0.01)
307 : : {
308 : : /*
309 : : * No data available --- use a default estimate that
310 : : * is small, but not real small.
311 : : */
312 : 10 : s2 = DEFAULT_RANGE_INEQ_SEL;
313 : : }
314 : : else
315 : : {
316 : : /*
317 : : * It's just roundoff error; use a small positive
318 : : * value
319 : : */
320 : 677 : s2 = 1.0e-10;
321 : : }
322 : : }
323 : : }
324 : : /* Merge in the selectivity of the pair of clauses */
9539 325 : 7720 : s1 *= s2;
326 : : }
327 : : else
328 : : {
329 : : /* Only found one of a pair, merge it in generically */
9598 330 [ + + ]: 28119 : if (rqlist->have_lobound)
331 : 23934 : s1 *= rqlist->lobound;
332 : : else
333 : 4185 : s1 *= rqlist->hibound;
334 : : }
335 : : /* release storage and advance */
336 : 35839 : rqnext = rqlist->next;
337 : 35839 : pfree(rqlist);
338 : 35839 : rqlist = rqnext;
339 : : }
340 : :
9613 341 : 1203528 : return s1;
342 : : }
343 : :
344 : : /*
345 : : * clauselist_selectivity_or -
346 : : * Compute the selectivity of an implicitly-ORed list of boolean
347 : : * expression clauses. The list can be empty, in which case 0.0
348 : : * must be returned. List elements may be either RestrictInfos
349 : : * or bare expression clauses --- the former is preferred since
350 : : * it allows caching of results.
351 : : *
352 : : * See clause_selectivity() for the meaning of the additional parameters.
353 : : *
354 : : * The basic approach is to apply extended statistics first, on as many
355 : : * clauses as possible, in order to capture cross-column dependencies etc.
356 : : * The remaining clauses are then estimated as if they were independent.
357 : : */
358 : : static Selectivity
1979 dean.a.rasheed@gmail 359 : 12578 : clauselist_selectivity_or(PlannerInfo *root,
360 : : List *clauses,
361 : : int varRelid,
362 : : JoinType jointype,
363 : : SpecialJoinInfo *sjinfo,
364 : : bool use_extended_stats)
365 : : {
366 : 12578 : Selectivity s1 = 0.0;
367 : : RelOptInfo *rel;
368 : 12578 : Bitmapset *estimatedclauses = NULL;
369 : : ListCell *lc;
370 : : int listidx;
371 : :
372 : : /*
373 : : * Determine if these clauses reference a single relation. If so, and if
374 : : * it has extended statistics, try to apply those.
375 : : */
376 : 12578 : rel = find_single_rel_for_clauses(root, clauses);
377 [ + + + + : 12578 : if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
+ + + + ]
378 : : {
379 : : /*
380 : : * Estimate as many clauses as possible using extended statistics.
381 : : *
382 : : * 'estimatedclauses' is populated with the 0-based list position
383 : : * index of clauses estimated here, and that should be ignored below.
384 : : */
385 : 130 : s1 = statext_clauselist_selectivity(root, clauses, varRelid,
386 : : jointype, sjinfo, rel,
387 : : &estimatedclauses, true);
388 : : }
389 : :
390 : : /*
391 : : * Estimate the remaining clauses as if they were independent.
392 : : *
393 : : * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to account
394 : : * for the probable overlap of selected tuple sets.
395 : : *
396 : : * XXX is this too conservative?
397 : : */
398 : 12578 : listidx = -1;
399 [ + - + + : 41141 : foreach(lc, clauses)
+ + ]
400 : : {
401 : : Selectivity s2;
402 : :
403 : 28563 : listidx++;
404 : :
405 : : /*
406 : : * Skip this clause if it's already been estimated by some other
407 : : * statistics above.
408 : : */
409 [ + + ]: 28563 : if (bms_is_member(listidx, estimatedclauses))
410 : 200 : continue;
411 : :
412 : 28363 : s2 = clause_selectivity_ext(root, (Node *) lfirst(lc), varRelid,
413 : : jointype, sjinfo, use_extended_stats);
414 : :
415 : 28363 : s1 = s1 + s2 - s1 * s2;
416 : : }
417 : :
418 : 12578 : return s1;
419 : : }
420 : :
421 : : /*
422 : : * addRangeClause --- add a new range clause for clauselist_selectivity
423 : : *
424 : : * Here is where we try to match up pairs of range-query clauses
425 : : */
426 : : static void
9598 tgl@sss.pgh.pa.us 427 : 44334 : addRangeClause(RangeQueryClause **rqlist, Node *clause,
428 : : bool varonleft, bool isLTsel, Selectivity s2)
429 : : {
430 : : RangeQueryClause *rqelem;
431 : : Node *var;
432 : : bool is_lobound;
433 : :
9116 434 [ + + ]: 44334 : if (varonleft)
435 : : {
8511 436 : 44151 : var = get_leftop((Expr *) clause);
9519 bruce@momjian.us 437 : 44151 : is_lobound = !isLTsel; /* x < something is high bound */
438 : : }
439 : : else
440 : : {
8511 tgl@sss.pgh.pa.us 441 : 183 : var = get_rightop((Expr *) clause);
9598 442 : 183 : is_lobound = isLTsel; /* something < x is low bound */
443 : : }
444 : :
445 [ + + ]: 45515 : for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
446 : : {
447 : : /*
448 : : * We use full equal() here because the "var" might be a function of
449 : : * one or more attributes of the same relation...
450 : : */
9519 bruce@momjian.us 451 [ + + ]: 9676 : if (!equal(var, rqelem->var))
9598 tgl@sss.pgh.pa.us 452 : 1181 : continue;
453 : : /* Found the right group to put this clause in */
454 [ + + ]: 8495 : if (is_lobound)
455 : : {
9519 bruce@momjian.us 456 [ + + ]: 521 : if (!rqelem->have_lobound)
457 : : {
9598 tgl@sss.pgh.pa.us 458 : 186 : rqelem->have_lobound = true;
459 : 186 : rqelem->lobound = s2;
460 : : }
461 : : else
462 : : {
463 : :
464 : : /*------
465 : : * We have found two similar clauses, such as
466 : : * x < y AND x <= z.
467 : : * Keep only the more restrictive one.
468 : : *------
469 : : */
470 [ + + ]: 335 : if (rqelem->lobound > s2)
471 : 50 : rqelem->lobound = s2;
472 : : }
473 : : }
474 : : else
475 : : {
9519 bruce@momjian.us 476 [ + + ]: 7974 : if (!rqelem->have_hibound)
477 : : {
9598 tgl@sss.pgh.pa.us 478 : 7534 : rqelem->have_hibound = true;
479 : 7534 : rqelem->hibound = s2;
480 : : }
481 : : else
482 : : {
483 : :
484 : : /*------
485 : : * We have found two similar clauses, such as
486 : : * x > y AND x >= z.
487 : : * Keep only the more restrictive one.
488 : : *------
489 : : */
490 [ + + ]: 440 : if (rqelem->hibound > s2)
491 : 130 : rqelem->hibound = s2;
492 : : }
493 : : }
494 : 8495 : return;
495 : : }
496 : :
497 : : /* No matching var found, so make a new clause-pair data structure */
146 michael@paquier.xyz 498 :GNC 35839 : rqelem = palloc_object(RangeQueryClause);
9598 tgl@sss.pgh.pa.us 499 :CBC 35839 : rqelem->var = var;
500 [ + + ]: 35839 : if (is_lobound)
501 : : {
502 : 31468 : rqelem->have_lobound = true;
503 : 31468 : rqelem->have_hibound = false;
504 : 31468 : rqelem->lobound = s2;
505 : : }
506 : : else
507 : : {
508 : 4371 : rqelem->have_lobound = false;
509 : 4371 : rqelem->have_hibound = true;
510 : 4371 : rqelem->hibound = s2;
511 : : }
512 : 35839 : rqelem->next = *rqlist;
513 : 35839 : *rqlist = rqelem;
514 : : }
515 : :
516 : : /*
517 : : * find_single_rel_for_clauses
518 : : * Examine each clause in 'clauses' and determine if all clauses
519 : : * reference only a single relation. If so return that relation,
520 : : * otherwise return NULL.
521 : : */
522 : : static RelOptInfo *
3314 523 : 1218435 : find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
524 : : {
525 : 1218435 : int lastrelid = 0;
526 : : ListCell *l;
527 : :
3316 simon@2ndQuadrant.co 528 [ + + + + : 1599766 : foreach(l, clauses)
+ + ]
529 : : {
530 : 591631 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
531 : : int relid;
532 : :
533 : : /*
534 : : * If we have a list of bare clauses rather than RestrictInfos, we
535 : : * could pull out their relids the hard way with pull_varnos().
536 : : * However, currently the extended-stats machinery won't do anything
537 : : * with non-RestrictInfo clauses anyway, so there's no point in
538 : : * spending extra cycles; just fail if that's what we have.
539 : : *
540 : : * An exception to that rule is if we have a bare BoolExpr AND clause.
541 : : * We treat this as a special case because the restrictinfo machinery
542 : : * doesn't build RestrictInfos on top of AND clauses.
543 : : */
1974 dean.a.rasheed@gmail 544 [ + + ]: 591631 : if (is_andclause(rinfo))
545 : 1733 : {
546 : : RelOptInfo *rel;
547 : :
548 : 2329 : rel = find_single_rel_for_clauses(root,
549 : : ((BoolExpr *) rinfo)->args);
550 : :
551 [ + + ]: 2329 : if (rel == NULL)
552 : 210300 : return NULL;
553 [ + + ]: 1733 : if (lastrelid == 0)
554 : 871 : lastrelid = rel->relid;
555 [ - + ]: 862 : else if (rel->relid != lastrelid)
1974 dean.a.rasheed@gmail 556 :UBC 0 : return NULL;
557 : :
1974 dean.a.rasheed@gmail 558 :CBC 30149 : continue;
559 : : }
560 : :
3314 tgl@sss.pgh.pa.us 561 [ + + ]: 589302 : if (!IsA(rinfo, RestrictInfo))
562 : 883 : return NULL;
563 : :
564 [ + + ]: 588419 : if (bms_is_empty(rinfo->clause_relids))
565 : 28416 : continue; /* we can ignore variable-free clauses */
566 [ + + ]: 560003 : if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
567 : 205561 : return NULL; /* multiple relations in this clause */
568 [ + + ]: 354442 : if (lastrelid == 0)
569 : 179259 : lastrelid = relid; /* first clause referencing a relation */
570 [ + + ]: 175183 : else if (relid != lastrelid)
571 : 3260 : return NULL; /* relation not same as last one */
572 : : }
573 : :
3316 simon@2ndQuadrant.co 574 [ + + ]: 1008135 : if (lastrelid != 0)
575 : 139040 : return find_base_rel(root, lastrelid);
576 : :
3314 tgl@sss.pgh.pa.us 577 : 869095 : return NULL; /* no clauses */
578 : : }
579 : :
580 : : /*
581 : : * treat_as_join_clause -
582 : : * Decide whether an operator clause is to be handled by the
583 : : * restriction or join estimator. Subroutine for clause_selectivity().
584 : : */
585 : : static inline bool
1930 586 : 854941 : treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo,
587 : : int varRelid, SpecialJoinInfo *sjinfo)
588 : : {
6471 589 [ + + ]: 854941 : if (varRelid != 0)
590 : : {
591 : : /*
592 : : * Caller is forcing restriction mode (eg, because we are examining an
593 : : * inner indexscan qual).
594 : : */
595 : 345663 : return false;
596 : : }
597 [ + + ]: 509278 : else if (sjinfo == NULL)
598 : : {
599 : : /*
600 : : * It must be a restriction clause, since it's being evaluated at a
601 : : * scan node.
602 : : */
603 : 279870 : return false;
604 : : }
605 : : else
606 : : {
607 : : /*
608 : : * Otherwise, it's a join if there's more than one base relation used.
609 : : * We can optimize this calculation if an rinfo was passed.
610 : : *
611 : : * XXX Since we know the clause is being evaluated at a join, the
612 : : * only way it could be single-relation is if it was delayed by outer
613 : : * joins. We intentionally count only baserels here, not OJs that
614 : : * might be present in rinfo->clause_relids, so that we direct such
615 : : * cases to the restriction qual estimators not join estimators.
616 : : * Eventually some notice should be taken of the possibility of
617 : : * injected nulls, but we'll likely want to do that in the restriction
618 : : * estimators rather than starting to treat such cases as join quals.
619 : : */
620 [ + + ]: 229408 : if (rinfo)
1191 621 : 229023 : return (rinfo->num_base_rels > 1);
622 : : else
1930 623 : 385 : return (NumRelids(root, clause) > 1);
624 : : }
625 : : }
626 : :
627 : :
628 : : /*
629 : : * clause_selectivity -
630 : : * Compute the selectivity of a general boolean expression clause.
631 : : *
632 : : * The clause can be either a RestrictInfo or a plain expression. If it's
633 : : * a RestrictInfo, we try to cache the selectivity for possible re-use,
634 : : * so passing RestrictInfos is preferred.
635 : : *
636 : : * varRelid is either 0 or a rangetable index.
637 : : *
638 : : * When varRelid is not 0, only variables belonging to that relation are
639 : : * considered in computing selectivity; other vars are treated as constants
640 : : * of unknown values. This is appropriate for estimating the selectivity of
641 : : * a join clause that is being used as a restriction clause in a scan of a
642 : : * nestloop join's inner relation --- varRelid should then be the ID of the
643 : : * inner relation.
644 : : *
645 : : * When varRelid is 0, all variables are treated as variables. This
646 : : * is appropriate for ordinary join clauses and restriction clauses.
647 : : *
648 : : * jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
649 : : * if the clause isn't a join clause.
650 : : *
651 : : * sjinfo is NULL for a non-join clause, otherwise it provides additional
652 : : * context information about the join being performed. There are some
653 : : * special cases:
654 : : * 1. For a special (not INNER) join, sjinfo is always a member of
655 : : * root->join_info_list.
656 : : * 2. For an INNER join, sjinfo is just a transient struct, and only the
657 : : * relids and jointype fields in it can be trusted.
658 : : * It is possible for jointype to be different from sjinfo->jointype.
659 : : * This indicates we are considering a variant join: either with
660 : : * the LHS and RHS switched, or with one input unique-ified.
661 : : *
662 : : * Note: when passing nonzero varRelid, it's normally appropriate to set
663 : : * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
664 : : * join clause; because we aren't treating it as a join clause.
665 : : */
666 : : Selectivity
7639 667 : 687486 : clause_selectivity(PlannerInfo *root,
668 : : Node *clause,
669 : : int varRelid,
670 : : JoinType jointype,
671 : : SpecialJoinInfo *sjinfo)
672 : : {
1979 dean.a.rasheed@gmail 673 : 687486 : return clause_selectivity_ext(root, clause, varRelid,
674 : : jointype, sjinfo, true);
675 : : }
676 : :
677 : : /*
678 : : * clause_selectivity_ext -
679 : : * Extended version of clause_selectivity(). If "use_extended_stats" is
680 : : * false, all extended statistics will be ignored, and only per-column
681 : : * statistics will be used.
682 : : */
683 : : Selectivity
684 : 2659175 : clause_selectivity_ext(PlannerInfo *root,
685 : : Node *clause,
686 : : int varRelid,
687 : : JoinType jointype,
688 : : SpecialJoinInfo *sjinfo,
689 : : bool use_extended_stats)
690 : : {
6689 tgl@sss.pgh.pa.us 691 : 2659175 : Selectivity s1 = 0.5; /* default for any unhandled clause type */
8157 692 : 2659175 : RestrictInfo *rinfo = NULL;
693 : 2659175 : bool cacheable = false;
694 : :
695 [ - + ]: 2659175 : if (clause == NULL) /* can this still happen? */
9782 tgl@sss.pgh.pa.us 696 :UBC 0 : return s1;
697 : :
8157 tgl@sss.pgh.pa.us 698 [ + + ]:CBC 2659175 : if (IsA(clause, RestrictInfo))
699 : : {
700 : 2638023 : rinfo = (RestrictInfo *) clause;
701 : :
702 : : /*
703 : : * If the clause is marked pseudoconstant, then it will be used as a
704 : : * gating qual and should not affect selectivity estimates; hence
705 : : * return 1.0. The only exception is that a constant FALSE may be
706 : : * taken as having selectivity 0.0, since it will surely mean no rows
707 : : * out of the plan. This case is simple enough that we need not
708 : : * bother caching the result.
709 : : */
7248 710 [ + + ]: 2638023 : if (rinfo->pseudoconstant)
711 : : {
7153 bruce@momjian.us 712 [ + + ]: 24335 : if (!IsA(rinfo->clause, Const))
6689 tgl@sss.pgh.pa.us 713 : 24141 : return (Selectivity) 1.0;
714 : : }
715 : :
716 : : /*
717 : : * If possible, cache the result of the selectivity calculation for
718 : : * the clause. We can cache if varRelid is zero or the clause
719 : : * contains only vars of that relid --- otherwise varRelid will affect
720 : : * the result, so mustn't cache. Outer join quals might be examined
721 : : * with either their join's actual jointype or JOIN_INNER, so we need
722 : : * two cache variables to remember both cases. Note: we assume the
723 : : * result won't change if we are switching the input relations or
724 : : * considering a unique-ified case, so we only need one cache variable
725 : : * for all non-JOIN_INNER cases.
726 : : */
8157 727 [ + + ]: 2613882 : if (varRelid == 0 ||
1191 728 [ + - ]: 956956 : rinfo->num_base_rels == 0 ||
729 [ + + + + ]: 1576104 : (rinfo->num_base_rels == 1 &&
730 : 619148 : bms_is_member(varRelid, rinfo->clause_relids)))
731 : : {
732 : : /* Cacheable --- do we already have the result? */
6297 733 [ + + ]: 2273882 : if (jointype == JOIN_INNER)
734 : : {
735 [ + + ]: 1974818 : if (rinfo->norm_selec >= 0)
736 : 1516904 : return rinfo->norm_selec;
737 : : }
738 : : else
739 : : {
740 [ + + ]: 299064 : if (rinfo->outer_selec >= 0)
741 : 187224 : return rinfo->outer_selec;
742 : : }
6473 743 : 569754 : cacheable = true;
744 : : }
745 : :
746 : : /*
747 : : * Proceed with examination of contained clause. If the clause is an
748 : : * OR-clause, we want to look at the variant with sub-RestrictInfos,
749 : : * so that per-subclause selectivities can be cached.
750 : : */
7511 751 [ + + ]: 909754 : if (rinfo->orclause)
752 : 12530 : clause = (Node *) rinfo->orclause;
753 : : else
754 : 897224 : clause = (Node *) rinfo->clause;
755 : : }
756 : :
9782 757 [ + + ]: 930906 : if (IsA(clause, Var))
758 : : {
9323 759 : 32740 : Var *var = (Var *) clause;
760 : :
761 : : /*
762 : : * We probably shouldn't ever see an uplevel Var here, but if we do,
763 : : * return the default selectivity...
764 : : */
765 [ + - + + ]: 32740 : if (var->varlevelsup == 0 &&
766 [ + + ]: 572 : (varRelid == 0 || varRelid == (int) var->varno))
767 : : {
768 : : /* Use the restriction selectivity function for a bool Var */
3876 769 : 32474 : s1 = boolvarsel(root, (Node *) var, varRelid);
770 : : }
771 : : }
9782 772 [ + + ]: 898166 : else if (IsA(clause, Const))
773 : : {
774 : : /* bool constant is pretty easy... */
7153 bruce@momjian.us 775 : 3087 : Const *con = (Const *) clause;
776 : :
7248 tgl@sss.pgh.pa.us 777 [ + + ]: 6094 : s1 = con->constisnull ? 0.0 :
778 [ + + ]: 3007 : DatumGetBool(con->constvalue) ? 1.0 : 0.0;
779 : : }
7998 780 [ + + ]: 895079 : else if (IsA(clause, Param))
781 : : {
782 : : /* see if we can replace the Param */
7015 783 : 25 : Node *subst = estimate_expression_value(root, clause);
784 : :
7998 785 [ - + ]: 25 : if (IsA(subst, Const))
786 : : {
787 : : /* bool constant is pretty easy... */
7153 bruce@momjian.us 788 :UBC 0 : Const *con = (Const *) subst;
789 : :
7248 tgl@sss.pgh.pa.us 790 [ # # ]: 0 : s1 = con->constisnull ? 0.0 :
791 [ # # ]: 0 : DatumGetBool(con->constvalue) ? 1.0 : 0.0;
792 : : }
793 : : else
794 : : {
795 : : /* XXX any way to do better than default? */
796 : : }
797 : : }
2653 tgl@sss.pgh.pa.us 798 [ + + ]:CBC 895054 : else if (is_notclause(clause))
799 : : {
800 : : /* inverse of the selectivity of the underlying clause */
1979 dean.a.rasheed@gmail 801 : 11864 : s1 = 1.0 - clause_selectivity_ext(root,
802 : 11864 : (Node *) get_notclausearg((Expr *) clause),
803 : : varRelid,
804 : : jointype,
805 : : sjinfo,
806 : : use_extended_stats);
807 : : }
2653 tgl@sss.pgh.pa.us 808 [ + + ]: 883190 : else if (is_andclause(clause))
809 : : {
810 : : /* share code with clauselist_selectivity() */
1979 dean.a.rasheed@gmail 811 : 3109 : s1 = clauselist_selectivity_ext(root,
812 : : ((BoolExpr *) clause)->args,
813 : : varRelid,
814 : : jointype,
815 : : sjinfo,
816 : : use_extended_stats);
817 : : }
2653 tgl@sss.pgh.pa.us 818 [ + + ]: 880081 : else if (is_orclause(clause))
819 : : {
820 : : /*
821 : : * Almost the same thing as clauselist_selectivity, but with the
822 : : * clauses connected by OR.
823 : : */
1979 dean.a.rasheed@gmail 824 : 12578 : s1 = clauselist_selectivity_or(root,
825 : : ((BoolExpr *) clause)->args,
826 : : varRelid,
827 : : jointype,
828 : : sjinfo,
829 : : use_extended_stats);
830 : : }
6689 tgl@sss.pgh.pa.us 831 [ + + + + ]: 867503 : else if (is_opclause(clause) || IsA(clause, DistinctExpr))
10467 bruce@momjian.us 832 : 825053 : {
5049 tgl@sss.pgh.pa.us 833 : 825073 : OpExpr *opclause = (OpExpr *) clause;
834 : 825073 : Oid opno = opclause->opno;
835 : :
1930 836 [ + + ]: 825073 : if (treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo))
837 : : {
838 : : /* Estimate selectivity for a join clause. */
8958 bruce@momjian.us 839 : 221031 : s1 = join_selectivity(root, opno,
840 : : opclause->args,
841 : : opclause->inputcollid,
842 : : jointype,
843 : : sjinfo);
844 : : }
845 : : else
846 : : {
847 : : /* Estimate selectivity for a restriction clause. */
848 : 604042 : s1 = restriction_selectivity(root, opno,
849 : : opclause->args,
850 : : opclause->inputcollid,
851 : : varRelid);
852 : : }
853 : :
854 : : /*
855 : : * DistinctExpr has the same representation as OpExpr, but the
856 : : * contained operator is "=" not "<>", so we must negate the result.
857 : : * This estimation method doesn't give the right behavior for nulls,
858 : : * but it's better than doing nothing.
859 : : */
6689 tgl@sss.pgh.pa.us 860 [ + + ]: 825053 : if (IsA(clause, DistinctExpr))
861 : 733 : s1 = 1.0 - s1;
862 : : }
2642 863 [ + + ]: 42430 : else if (is_funcclause(clause))
864 : : {
865 : 11409 : FuncExpr *funcclause = (FuncExpr *) clause;
866 : :
867 : : /* Try to get an estimate from the support function, if any */
868 : 11409 : s1 = function_selectivity(root,
869 : : funcclause->funcid,
870 : : funcclause->args,
871 : : funcclause->inputcollid,
1930 872 : 11409 : treat_as_join_clause(root, clause, rinfo,
873 : : varRelid, sjinfo),
874 : : varRelid,
875 : : jointype,
876 : : sjinfo);
877 : :
878 : : /* If no support, fall back on boolvarsel */
227 tgl@sss.pgh.pa.us 879 [ + + ]:GNC 11409 : if (s1 < 0)
880 : 11384 : s1 = boolvarsel(root, clause, varRelid);
881 : : }
7466 tgl@sss.pgh.pa.us 882 [ + + ]:CBC 31021 : else if (IsA(clause, ScalarArrayOpExpr))
883 : : {
884 : : /* Use node specific selectivity calculation function */
885 : 18459 : s1 = scalararraysel(root,
886 : : (ScalarArrayOpExpr *) clause,
1930 887 : 18459 : treat_as_join_clause(root, clause, rinfo,
888 : : varRelid, sjinfo),
889 : : varRelid,
890 : : jointype,
891 : : sjinfo);
892 : : }
7416 893 [ + + ]: 12562 : else if (IsA(clause, RowCompareExpr))
894 : : {
895 : : /* Use node specific selectivity calculation function */
896 : 240 : s1 = rowcomparesel(root,
897 : : (RowCompareExpr *) clause,
898 : : varRelid,
899 : : jointype,
900 : : sjinfo);
901 : : }
9080 902 [ + + ]: 12322 : else if (IsA(clause, NullTest))
903 : : {
904 : : /* Use node specific selectivity calculation function */
8599 905 : 8745 : s1 = nulltestsel(root,
906 : : ((NullTest *) clause)->nulltesttype,
8545 907 : 8745 : (Node *) ((NullTest *) clause)->arg,
908 : : varRelid,
909 : : jointype,
910 : : sjinfo);
911 : : }
9080 912 [ + + ]: 3577 : else if (IsA(clause, BooleanTest))
913 : : {
914 : : /* Use node specific selectivity calculation function */
8599 915 : 791 : s1 = booltestsel(root,
916 : : ((BooleanTest *) clause)->booltesttype,
8545 917 : 791 : (Node *) ((BooleanTest *) clause)->arg,
918 : : varRelid,
919 : : jointype,
920 : : sjinfo);
921 : : }
6903 922 [ + + ]: 2786 : else if (IsA(clause, CurrentOfExpr))
923 : : {
924 : : /* CURRENT OF selects at most one row of its table */
925 : 345 : CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
926 : 345 : RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);
927 : :
928 [ + + ]: 345 : if (crel->tuples > 0)
929 : 334 : s1 = 1.0 / crel->tuples;
930 : : }
9396 931 [ - + ]: 2441 : else if (IsA(clause, RelabelType))
932 : : {
933 : : /* Not sure this case is needed, but it can't hurt */
1979 dean.a.rasheed@gmail 934 :UBC 0 : s1 = clause_selectivity_ext(root,
935 : 0 : (Node *) ((RelabelType *) clause)->arg,
936 : : varRelid,
937 : : jointype,
938 : : sjinfo,
939 : : use_extended_stats);
940 : : }
8492 tgl@sss.pgh.pa.us 941 [ - + ]:CBC 2441 : else if (IsA(clause, CoerceToDomain))
942 : : {
943 : : /* Not sure this case is needed, but it can't hurt */
1979 dean.a.rasheed@gmail 944 :UBC 0 : s1 = clause_selectivity_ext(root,
945 : 0 : (Node *) ((CoerceToDomain *) clause)->arg,
946 : : varRelid,
947 : : jointype,
948 : : sjinfo,
949 : : use_extended_stats);
950 : : }
951 : : else
952 : : {
953 : : /*
954 : : * For anything else, see if we can consider it as a boolean variable.
955 : : * This only works if it's an immutable expression in Vars of a single
956 : : * relation; but there's no point in us checking that here because
957 : : * boolvarsel() will do it internally, and return a suitable default
958 : : * selectivity if not.
959 : : */
3876 tgl@sss.pgh.pa.us 960 :CBC 2441 : s1 = boolvarsel(root, clause, varRelid);
961 : : }
962 : :
963 : : /* Cache the result if possible */
8157 964 [ + + ]: 930886 : if (cacheable)
965 : : {
6297 966 [ + + ]: 569734 : if (jointype == JOIN_INNER)
967 : 457894 : rinfo->norm_selec = s1;
968 : : else
969 : 111840 : rinfo->outer_selec = s1;
970 : : }
971 : :
972 : : #ifdef SELECTIVITY_DEBUG
973 : : elog(DEBUG4, "clause_selectivity: s1 %f", s1);
974 : : #endif /* SELECTIVITY_DEBUG */
975 : :
9782 976 : 930886 : return s1;
977 : : }
|