Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * dependencies.c
4 : : * POSTGRES functional dependencies
5 : : *
6 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : * IDENTIFICATION
10 : : * src/backend/statistics/dependencies.c
11 : : *
12 : : *-------------------------------------------------------------------------
13 : : */
14 : : #include "postgres.h"
15 : :
16 : : #include "access/htup_details.h"
17 : : #include "catalog/pg_statistic_ext.h"
18 : : #include "catalog/pg_statistic_ext_data.h"
19 : : #include "nodes/nodeFuncs.h"
20 : : #include "optimizer/clauses.h"
21 : : #include "optimizer/optimizer.h"
22 : : #include "parser/parsetree.h"
23 : : #include "statistics/extended_stats_internal.h"
24 : : #include "utils/fmgroids.h"
25 : : #include "utils/lsyscache.h"
26 : : #include "utils/memutils.h"
27 : : #include "utils/selfuncs.h"
28 : : #include "utils/syscache.h"
29 : : #include "utils/typcache.h"
30 : :
31 : : /* size of the struct header fields (magic, type, ndeps) */
32 : : #define SizeOfHeader (3 * sizeof(uint32))
33 : :
34 : : /* size of a serialized dependency (degree, natts, atts) */
35 : : #define SizeOfItem(natts) \
36 : : (sizeof(double) + sizeof(AttrNumber) * (1 + (natts)))
37 : :
38 : : /* minimal size of a dependency (with two attributes) */
39 : : #define MinSizeOfItem SizeOfItem(2)
40 : :
41 : : /* minimal size of dependencies, when all deps are minimal */
42 : : #define MinSizeOfItems(ndeps) \
43 : : (SizeOfHeader + (ndeps) * MinSizeOfItem)
44 : :
45 : : /*
46 : : * Internal state for DependencyGenerator of dependencies. Dependencies are similar to
47 : : * k-permutations of n elements, except that the order does not matter for the
48 : : * first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent.
49 : : */
50 : : typedef struct DependencyGeneratorData
51 : : {
52 : : int k; /* size of the dependency */
53 : : int n; /* number of possible attributes */
54 : : int current; /* next dependency to return (index) */
55 : : AttrNumber ndependencies; /* number of dependencies generated */
56 : : AttrNumber *dependencies; /* array of pre-generated dependencies */
57 : : } DependencyGeneratorData;
58 : :
59 : : typedef DependencyGeneratorData *DependencyGenerator;
60 : :
61 : : static void generate_dependencies_recurse(DependencyGenerator state,
62 : : int index, AttrNumber start, AttrNumber *current);
63 : : static void generate_dependencies(DependencyGenerator state);
64 : : static DependencyGenerator DependencyGenerator_init(int n, int k);
65 : : static void DependencyGenerator_free(DependencyGenerator state);
66 : : static AttrNumber *DependencyGenerator_next(DependencyGenerator state);
67 : : static double dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency);
68 : : static bool dependency_is_fully_matched(MVDependency *dependency,
69 : : Bitmapset *attnums);
70 : : static bool dependency_is_compatible_clause(Node *clause, Index relid,
71 : : AttrNumber *attnum);
72 : : static bool dependency_is_compatible_expression(Node *clause, Index relid,
73 : : List *statlist, Node **expr);
74 : : static MVDependency *find_strongest_dependency(MVDependencies **dependencies,
75 : : int ndependencies, Bitmapset *attnums);
76 : : static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
77 : : int varRelid, JoinType jointype,
78 : : SpecialJoinInfo *sjinfo,
79 : : MVDependency **dependencies,
80 : : int ndependencies,
81 : : AttrNumber *list_attnums,
82 : : Bitmapset **estimatedclauses);
83 : :
84 : : static void
3177 simon@2ndQuadrant.co 85 :CBC 417 : generate_dependencies_recurse(DependencyGenerator state, int index,
86 : : AttrNumber start, AttrNumber *current)
87 : : {
88 : : /*
89 : : * The generator handles the first (k-1) elements differently from the
90 : : * last element.
91 : : */
92 [ + + ]: 417 : if (index < (state->k - 1))
93 : : {
94 : : AttrNumber i;
95 : :
96 : : /*
97 : : * The first (k-1) values have to be in ascending order, which we
98 : : * generate recursively.
99 : : */
100 : :
101 [ + + ]: 489 : for (i = start; i < state->n; i++)
102 : : {
103 : 318 : current[index] = i;
104 : 318 : generate_dependencies_recurse(state, (index + 1), (i + 1), current);
105 : : }
106 : : }
107 : : else
108 : : {
109 : : int i;
110 : :
111 : : /*
112 : : * the last element is the implied value, which does not respect the
113 : : * ascending order. We just need to check that the value is not in the
114 : : * first (k-1) elements.
115 : : */
116 : :
117 [ + + ]: 882 : for (i = 0; i < state->n; i++)
118 : : {
119 : : int j;
120 : 636 : bool match = false;
121 : :
122 : 636 : current[index] = i;
123 : :
124 [ + + ]: 1098 : for (j = 0; j < index; j++)
125 : : {
126 [ + + ]: 780 : if (current[j] == i)
127 : : {
128 : 318 : match = true;
129 : 318 : break;
130 : : }
131 : : }
132 : :
133 : : /*
134 : : * If the value is not found in the first part of the dependency,
135 : : * we're done.
136 : : */
137 [ + + ]: 636 : if (!match)
138 : : {
139 : 636 : state->dependencies = (AttrNumber *) repalloc(state->dependencies,
3100 tgl@sss.pgh.pa.us 140 : 318 : state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
3177 simon@2ndQuadrant.co 141 : 318 : memcpy(&state->dependencies[(state->k * state->ndependencies)],
142 : 318 : current, state->k * sizeof(AttrNumber));
143 : 318 : state->ndependencies++;
144 : : }
145 : : }
146 : : }
147 : 417 : }
148 : :
149 : : /* generate all dependencies (k-permutations of n elements) */
150 : : static void
151 : 99 : generate_dependencies(DependencyGenerator state)
152 : : {
6 michael@paquier.xyz 153 :GNC 99 : AttrNumber *current = palloc0_array(AttrNumber, state->k);
154 : :
3177 simon@2ndQuadrant.co 155 :CBC 99 : generate_dependencies_recurse(state, 0, 0, current);
156 : :
157 : 99 : pfree(current);
158 : 99 : }
159 : :
160 : : /*
161 : : * initialize the DependencyGenerator of variations, and prebuild the variations
162 : : *
163 : : * This pre-builds all the variations. We could also generate them in
164 : : * DependencyGenerator_next(), but this seems simpler.
165 : : */
166 : : static DependencyGenerator
167 : 99 : DependencyGenerator_init(int n, int k)
168 : : {
169 : : DependencyGenerator state;
170 : :
171 [ + - - + ]: 99 : Assert((n >= k) && (k > 0));
172 : :
173 : : /* allocate the DependencyGenerator state */
6 michael@paquier.xyz 174 :GNC 99 : state = palloc0_object(DependencyGeneratorData);
175 : 99 : state->dependencies = palloc_array(AttrNumber, k);
176 : :
3177 simon@2ndQuadrant.co 177 :CBC 99 : state->ndependencies = 0;
178 : 99 : state->current = 0;
179 : 99 : state->k = k;
180 : 99 : state->n = n;
181 : :
182 : : /* now actually pre-generate all the variations */
183 : 99 : generate_dependencies(state);
184 : :
185 : 99 : return state;
186 : : }
187 : :
188 : : /* free the DependencyGenerator state */
189 : : static void
190 : 99 : DependencyGenerator_free(DependencyGenerator state)
191 : : {
192 : 99 : pfree(state->dependencies);
193 : 99 : pfree(state);
194 : 99 : }
195 : :
196 : : /* generate next combination */
197 : : static AttrNumber *
198 : 417 : DependencyGenerator_next(DependencyGenerator state)
199 : : {
200 [ + + ]: 417 : if (state->current == state->ndependencies)
201 : 99 : return NULL;
202 : :
203 : 318 : return &state->dependencies[state->k * state->current++];
204 : : }
205 : :
206 : :
207 : : /*
208 : : * validates functional dependency on the data
209 : : *
210 : : * An actual work horse of detecting functional dependencies. Given a variation
211 : : * of k attributes, it checks that the first (k-1) are sufficient to determine
212 : : * the last one.
213 : : */
214 : : static double
1726 tomas.vondra@postgre 215 : 318 : dependency_degree(StatsBuildData *data, int k, AttrNumber *dependency)
216 : : {
217 : : int i,
218 : : nitems;
219 : : MultiSortSupport mss;
220 : : SortItem *items;
221 : : AttrNumber *attnums_dep;
222 : :
223 : : /* counters valid within a group */
3177 simon@2ndQuadrant.co 224 : 318 : int group_size = 0;
225 : 318 : int n_violations = 0;
226 : :
227 : : /* total number of rows supporting (consistent with) the dependency */
228 : 318 : int n_supporting_rows = 0;
229 : :
230 : : /* Make sure we have at least two input attributes. */
231 [ - + ]: 318 : Assert(k >= 2);
232 : :
233 : : /* sort info for all attributes columns */
234 : 318 : mss = multi_sort_init(k);
235 : :
236 : : /*
237 : : * Translate the array of indexes to regular attnums for the dependency
238 : : * (we will need this to identify the columns in StatsBuildData).
239 : : */
6 michael@paquier.xyz 240 :GNC 318 : attnums_dep = palloc_array(AttrNumber, k);
2456 tomas.vondra@postgre 241 [ + + ]:CBC 1026 : for (i = 0; i < k; i++)
1726 242 : 708 : attnums_dep[i] = data->attnums[dependency[i]];
243 : :
244 : : /*
245 : : * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
246 : : *
247 : : * (a) sort the data lexicographically
248 : : *
249 : : * (b) split the data into groups by first (k-1) columns
250 : : *
251 : : * (c) for each group count different values in the last column
252 : : *
253 : : * We use the column data types' default sort operators and collations;
254 : : * perhaps at some point it'd be worth using column-specific collations?
255 : : */
256 : :
257 : : /* prepare the sort function for the dimensions */
3177 simon@2ndQuadrant.co 258 [ + + ]: 1026 : for (i = 0; i < k; i++)
259 : : {
1726 tomas.vondra@postgre 260 : 708 : VacAttrStats *colstat = data->stats[dependency[i]];
261 : : TypeCacheEntry *type;
262 : :
3177 simon@2ndQuadrant.co 263 : 708 : type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
264 [ - + ]: 708 : if (type->lt_opr == InvalidOid) /* shouldn't happen */
3177 simon@2ndQuadrant.co 265 [ # # ]:UBC 0 : elog(ERROR, "cache lookup failed for ordering operator for type %u",
266 : : colstat->attrtypid);
267 : :
268 : : /* prepare the sort function for this dimension */
2343 tomas.vondra@postgre 269 :CBC 708 : multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
270 : : }
271 : :
272 : : /*
273 : : * build an array of SortItem(s) sorted using the multi-sort support
274 : : *
275 : : * XXX This relies on all stats entries pointing to the same tuple
276 : : * descriptor. For now that assumption holds, but it might change in the
277 : : * future for example if we support statistics on multiple tables.
278 : : */
1726 279 : 318 : items = build_sorted_items(data, &nitems, mss, k, attnums_dep);
280 : :
281 : : /*
282 : : * Walk through the sorted array, split it into rows according to the
283 : : * first (k-1) columns. If there's a single value in the last column, we
284 : : * count the group as 'supporting' the functional dependency. Otherwise we
285 : : * count it as contradicting.
286 : : */
287 : :
288 : : /* start with the first row forming a group */
3177 simon@2ndQuadrant.co 289 : 318 : group_size = 1;
290 : :
291 : : /* loop 1 beyond the end of the array so that we count the final group */
2456 tomas.vondra@postgre 292 [ + + ]: 752939 : for (i = 1; i <= nitems; i++)
293 : : {
294 : : /*
295 : : * Check if the group ended, which may be either because we processed
296 : : * all the items (i==nitems), or because the i-th item is not equal to
297 : : * the preceding one.
298 : : */
299 [ + + + + ]: 1504924 : if (i == nitems ||
3100 tgl@sss.pgh.pa.us 300 : 752303 : multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
301 : : {
302 : : /*
303 : : * If no violations were found in the group then track the rows of
304 : : * the group as supporting the functional dependency.
305 : : */
3177 simon@2ndQuadrant.co 306 [ + + ]: 16314 : if (n_violations == 0)
307 : 9969 : n_supporting_rows += group_size;
308 : :
309 : : /* Reset counters for the new group */
310 : 16314 : n_violations = 0;
311 : 16314 : group_size = 1;
312 : 16314 : continue;
313 : : }
314 : : /* first columns match, but the last one does not (so contradicting) */
315 [ + + ]: 736307 : else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
316 : 30051 : n_violations++;
317 : :
318 : 736307 : group_size++;
319 : : }
320 : :
321 : : /* Compute the 'degree of validity' as (supporting/total). */
1726 tomas.vondra@postgre 322 : 318 : return (n_supporting_rows * 1.0 / data->numrows);
323 : : }
324 : :
325 : : /*
326 : : * detects functional dependencies between groups of columns
327 : : *
328 : : * Generates all possible subsets of columns (variations) and computes
329 : : * the degree of validity for each one. For example when creating statistics
330 : : * on three columns (a,b,c) there are 9 possible dependencies
331 : : *
332 : : * two columns three columns
333 : : * ----------- -------------
334 : : * (a) -> b (a,b) -> c
335 : : * (a) -> c (a,c) -> b
336 : : * (b) -> a (b,c) -> a
337 : : * (b) -> c
338 : : * (c) -> a
339 : : * (c) -> b
340 : : */
341 : : MVDependencies *
342 : 75 : statext_dependencies_build(StatsBuildData *data)
343 : : {
344 : : int i,
345 : : k;
346 : :
347 : : /* result */
3177 simon@2ndQuadrant.co 348 : 75 : MVDependencies *dependencies = NULL;
349 : : MemoryContext cxt;
350 : :
1726 tomas.vondra@postgre 351 [ - + ]: 75 : Assert(data->nattnums >= 2);
352 : :
353 : : /* tracks memory allocated by dependency_degree calls */
1547 354 : 75 : cxt = AllocSetContextCreate(CurrentMemoryContext,
355 : : "dependency_degree cxt",
356 : : ALLOCSET_DEFAULT_SIZES);
357 : :
358 : : /*
359 : : * We'll try build functional dependencies starting from the smallest ones
360 : : * covering just 2 columns, to the largest ones, covering all columns
361 : : * included in the statistics object. We start from the smallest ones
362 : : * because we want to be able to skip already implied ones.
363 : : */
1726 364 [ + + ]: 174 : for (k = 2; k <= data->nattnums; k++)
365 : : {
366 : : AttrNumber *dependency; /* array with k elements */
367 : :
368 : : /* prepare a DependencyGenerator of variation */
369 : 99 : DependencyGenerator DependencyGenerator = DependencyGenerator_init(data->nattnums, k);
370 : :
371 : : /* generate all possible variations of k values (out of n) */
3177 simon@2ndQuadrant.co 372 [ + + ]: 417 : while ((dependency = DependencyGenerator_next(DependencyGenerator)))
373 : : {
374 : : double degree;
375 : : MVDependency *d;
376 : : MemoryContext oldcxt;
377 : :
378 : : /* release memory used by dependency degree calculation */
1547 tomas.vondra@postgre 379 : 318 : oldcxt = MemoryContextSwitchTo(cxt);
380 : :
381 : : /* compute how valid the dependency seems */
1726 382 : 318 : degree = dependency_degree(data, k, dependency);
383 : :
1547 384 : 318 : MemoryContextSwitchTo(oldcxt);
385 : 318 : MemoryContextReset(cxt);
386 : :
387 : : /*
388 : : * if the dependency seems entirely invalid, don't store it
389 : : */
3177 simon@2ndQuadrant.co 390 [ + + ]: 318 : if (degree == 0.0)
391 : 126 : continue;
392 : :
393 : 192 : d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
3100 tgl@sss.pgh.pa.us 394 : 192 : + k * sizeof(AttrNumber));
395 : :
396 : : /* copy the dependency (and keep the indexes into stxkeys) */
3177 simon@2ndQuadrant.co 397 : 192 : d->degree = degree;
398 : 192 : d->nattributes = k;
399 [ + + ]: 612 : for (i = 0; i < k; i++)
1726 tomas.vondra@postgre 400 : 420 : d->attributes[i] = data->attnums[dependency[i]];
401 : :
402 : : /* initialize the list of dependencies */
3177 simon@2ndQuadrant.co 403 [ + + ]: 192 : if (dependencies == NULL)
404 : : {
6 michael@paquier.xyz 405 :GNC 66 : dependencies = palloc0_object(MVDependencies);
406 : :
3177 simon@2ndQuadrant.co 407 :CBC 66 : dependencies->magic = STATS_DEPS_MAGIC;
408 : 66 : dependencies->type = STATS_DEPS_TYPE_BASIC;
409 : 66 : dependencies->ndeps = 0;
410 : : }
411 : :
412 : 192 : dependencies->ndeps++;
413 : 192 : dependencies = (MVDependencies *) repalloc(dependencies,
414 : : offsetof(MVDependencies, deps)
2431 tomas.vondra@postgre 415 : 192 : + dependencies->ndeps * sizeof(MVDependency *));
416 : :
3177 simon@2ndQuadrant.co 417 : 192 : dependencies->deps[dependencies->ndeps - 1] = d;
418 : : }
419 : :
420 : : /*
421 : : * we're done with variations of k elements, so free the
422 : : * DependencyGenerator
423 : : */
424 : 99 : DependencyGenerator_free(DependencyGenerator);
425 : : }
426 : :
1547 tomas.vondra@postgre 427 : 75 : MemoryContextDelete(cxt);
428 : :
3177 simon@2ndQuadrant.co 429 : 75 : return dependencies;
430 : : }
431 : :
432 : :
433 : : /*
434 : : * Serialize list of dependencies into a bytea value.
435 : : */
436 : : bytea *
3135 bruce@momjian.us 437 : 87 : statext_dependencies_serialize(MVDependencies *dependencies)
438 : : {
439 : : int i;
440 : : bytea *output;
441 : : char *tmp;
442 : : Size len;
443 : :
444 : : /* we need to store ndeps, with a number of attributes for each one */
2431 tomas.vondra@postgre 445 : 87 : len = VARHDRSZ + SizeOfHeader;
446 : :
447 : : /* and also include space for the actual attribute numbers and degrees */
3177 simon@2ndQuadrant.co 448 [ + + ]: 327 : for (i = 0; i < dependencies->ndeps; i++)
2431 tomas.vondra@postgre 449 : 240 : len += SizeOfItem(dependencies->deps[i]->nattributes);
450 : :
3177 simon@2ndQuadrant.co 451 : 87 : output = (bytea *) palloc0(len);
452 : 87 : SET_VARSIZE(output, len);
453 : :
454 : 87 : tmp = VARDATA(output);
455 : :
456 : : /* Store the base struct values (magic, type, ndeps) */
457 : 87 : memcpy(tmp, &dependencies->magic, sizeof(uint32));
458 : 87 : tmp += sizeof(uint32);
459 : 87 : memcpy(tmp, &dependencies->type, sizeof(uint32));
460 : 87 : tmp += sizeof(uint32);
461 : 87 : memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
462 : 87 : tmp += sizeof(uint32);
463 : :
464 : : /* store number of attributes and attribute numbers for each dependency */
465 [ + + ]: 327 : for (i = 0; i < dependencies->ndeps; i++)
466 : : {
467 : 240 : MVDependency *d = dependencies->deps[i];
468 : :
2431 tomas.vondra@postgre 469 : 240 : memcpy(tmp, &d->degree, sizeof(double));
470 : 240 : tmp += sizeof(double);
471 : :
472 : 240 : memcpy(tmp, &d->nattributes, sizeof(AttrNumber));
473 : 240 : tmp += sizeof(AttrNumber);
474 : :
3177 simon@2ndQuadrant.co 475 : 240 : memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
476 : 240 : tmp += sizeof(AttrNumber) * d->nattributes;
477 : :
478 : : /* protect against overflow */
479 [ - + ]: 240 : Assert(tmp <= ((char *) output + len));
480 : : }
481 : :
482 : : /* make sure we've produced exactly the right amount of data */
2431 tomas.vondra@postgre 483 [ - + ]: 87 : Assert(tmp == ((char *) output + len));
484 : :
3177 simon@2ndQuadrant.co 485 : 87 : return output;
486 : : }
487 : :
488 : : /*
489 : : * Reads serialized dependencies into MVDependencies structure.
490 : : */
491 : : MVDependencies *
492 : 830 : statext_dependencies_deserialize(bytea *data)
493 : : {
494 : : int i;
495 : : Size min_expected_size;
496 : : MVDependencies *dependencies;
497 : : char *tmp;
498 : :
499 [ - + ]: 830 : if (data == NULL)
3177 simon@2ndQuadrant.co 500 :UBC 0 : return NULL;
501 : :
2431 tomas.vondra@postgre 502 [ - + - - :CBC 830 : if (VARSIZE_ANY_EXHDR(data) < SizeOfHeader)
- - - - +
- - + ]
1026 peter@eisentraut.org 503 [ # # # # :UBC 0 : elog(ERROR, "invalid MVDependencies size %zu (expected at least %zu)",
# # # # #
# # # ]
504 : : VARSIZE_ANY_EXHDR(data), SizeOfHeader);
505 : :
506 : : /* read the MVDependencies header */
6 michael@paquier.xyz 507 :GNC 830 : dependencies = palloc0_object(MVDependencies);
508 : :
509 : : /* initialize pointer to the data part (skip the varlena header) */
3177 simon@2ndQuadrant.co 510 [ + - ]:CBC 830 : tmp = VARDATA_ANY(data);
511 : :
512 : : /* read the header fields and perform basic sanity checks */
513 : 830 : memcpy(&dependencies->magic, tmp, sizeof(uint32));
514 : 830 : tmp += sizeof(uint32);
515 : 830 : memcpy(&dependencies->type, tmp, sizeof(uint32));
516 : 830 : tmp += sizeof(uint32);
517 : 830 : memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
518 : 830 : tmp += sizeof(uint32);
519 : :
520 [ - + ]: 830 : if (dependencies->magic != STATS_DEPS_MAGIC)
3177 simon@2ndQuadrant.co 521 [ # # ]:UBC 0 : elog(ERROR, "invalid dependency magic %d (expected %d)",
522 : : dependencies->magic, STATS_DEPS_MAGIC);
523 : :
3177 simon@2ndQuadrant.co 524 [ - + ]:CBC 830 : if (dependencies->type != STATS_DEPS_TYPE_BASIC)
3177 simon@2ndQuadrant.co 525 [ # # ]:UBC 0 : elog(ERROR, "invalid dependency type %d (expected %d)",
526 : : dependencies->type, STATS_DEPS_TYPE_BASIC);
527 : :
3177 simon@2ndQuadrant.co 528 [ - + ]:CBC 830 : if (dependencies->ndeps == 0)
2392 tomas.vondra@postgre 529 [ # # ]:UBC 0 : elog(ERROR, "invalid zero-length item array in MVDependencies");
530 : :
531 : : /* what minimum bytea size do we expect for those parameters */
2431 tomas.vondra@postgre 532 :CBC 830 : min_expected_size = SizeOfItem(dependencies->ndeps);
533 : :
3177 simon@2ndQuadrant.co 534 [ - + - - : 830 : if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
- - - - +
- - + ]
1026 peter@eisentraut.org 535 [ # # # # :UBC 0 : elog(ERROR, "invalid dependencies size %zu (expected at least %zu)",
# # # # #
# # # ]
536 : : VARSIZE_ANY_EXHDR(data), min_expected_size);
537 : :
538 : : /* allocate space for the MCV items */
3177 simon@2ndQuadrant.co 539 :CBC 830 : dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
3100 tgl@sss.pgh.pa.us 540 : 830 : + (dependencies->ndeps * sizeof(MVDependency *)));
541 : :
3177 simon@2ndQuadrant.co 542 [ + + ]: 4863 : for (i = 0; i < dependencies->ndeps; i++)
543 : : {
544 : : double degree;
545 : : AttrNumber k;
546 : : MVDependency *d;
547 : :
548 : : /* degree of validity */
549 : 4033 : memcpy(°ree, tmp, sizeof(double));
550 : 4033 : tmp += sizeof(double);
551 : :
552 : : /* number of attributes */
553 : 4033 : memcpy(&k, tmp, sizeof(AttrNumber));
554 : 4033 : tmp += sizeof(AttrNumber);
555 : :
556 : : /* is the number of attributes valid? */
557 [ + - - + ]: 4033 : Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
558 : :
559 : : /* now that we know the number of attributes, allocate the dependency */
560 : 4033 : d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
3100 tgl@sss.pgh.pa.us 561 : 4033 : + (k * sizeof(AttrNumber)));
562 : :
3177 simon@2ndQuadrant.co 563 : 4033 : d->degree = degree;
564 : 4033 : d->nattributes = k;
565 : :
566 : : /* copy attribute numbers */
567 : 4033 : memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
568 : 4033 : tmp += sizeof(AttrNumber) * d->nattributes;
569 : :
570 : 4033 : dependencies->deps[i] = d;
571 : :
572 : : /* still within the bytea */
573 [ - + - - : 4033 : Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
- - - - +
- - + ]
574 : : }
575 : :
576 : : /* we should have consumed the whole bytea exactly */
577 [ - + - - : 830 : Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
- - - - +
- - + ]
578 : :
579 : 830 : return dependencies;
580 : : }
581 : :
582 : : /*
583 : : * dependency_is_fully_matched
584 : : * checks that a functional dependency is fully matched given clauses on
585 : : * attributes (assuming the clauses are suitable equality clauses)
586 : : */
587 : : static bool
3135 bruce@momjian.us 588 : 3078 : dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
589 : : {
590 : : int j;
591 : :
592 : : /*
593 : : * Check that the dependency actually is fully covered by clauses. We have
594 : : * to translate all attribute numbers, as those are referenced
595 : : */
3177 simon@2ndQuadrant.co 596 [ + + ]: 7821 : for (j = 0; j < dependency->nattributes; j++)
597 : : {
598 : 6279 : int attnum = dependency->attributes[j];
599 : :
600 [ + + ]: 6279 : if (!bms_is_member(attnum, attnums))
601 : 1536 : return false;
602 : : }
603 : :
604 : 1542 : return true;
605 : : }
606 : :
607 : : /*
608 : : * statext_dependencies_load
609 : : * Load the functional dependencies for the indicated pg_statistic_ext tuple
610 : : */
611 : : MVDependencies *
1430 tomas.vondra@postgre 612 : 804 : statext_dependencies_load(Oid mvoid, bool inh)
613 : : {
614 : : MVDependencies *result;
615 : : bool isnull;
616 : : Datum deps;
617 : : HeapTuple htup;
618 : :
619 : 804 : htup = SearchSysCache2(STATEXTDATASTXOID,
620 : : ObjectIdGetDatum(mvoid),
621 : : BoolGetDatum(inh));
3177 simon@2ndQuadrant.co 622 [ - + ]: 804 : if (!HeapTupleIsValid(htup))
3138 tgl@sss.pgh.pa.us 623 [ # # ]:UBC 0 : elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
624 : :
2378 tomas.vondra@postgre 625 :CBC 804 : deps = SysCacheGetAttr(STATEXTDATASTXOID, htup,
626 : : Anum_pg_statistic_ext_data_stxddependencies, &isnull);
2785 tgl@sss.pgh.pa.us 627 [ - + ]: 804 : if (isnull)
2785 tgl@sss.pgh.pa.us 628 [ # # ]:UBC 0 : elog(ERROR,
629 : : "requested statistics kind \"%c\" is not yet built for statistics object %u",
630 : : STATS_EXT_DEPENDENCIES, mvoid);
631 : :
2785 tgl@sss.pgh.pa.us 632 :CBC 804 : result = statext_dependencies_deserialize(DatumGetByteaPP(deps));
633 : :
3177 simon@2ndQuadrant.co 634 : 804 : ReleaseSysCache(htup);
635 : :
2785 tgl@sss.pgh.pa.us 636 : 804 : return result;
637 : : }
638 : :
639 : : /*
640 : : * dependency_is_compatible_clause
641 : : * Determines if the clause is compatible with functional dependencies
642 : : *
643 : : * Only clauses that have the form of equality to a pseudoconstant, or can be
644 : : * interpreted that way, are currently accepted. Furthermore the variable
645 : : * part of the clause must be a simple Var belonging to the specified
646 : : * relation, whose attribute number we return in *attnum on success.
647 : : */
648 : : static bool
3177 simon@2ndQuadrant.co 649 : 1980 : dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
650 : : {
651 : : Var *var;
652 : : Node *clause_expr;
653 : :
2099 tomas.vondra@postgre 654 [ + + ]: 1980 : if (IsA(clause, RestrictInfo))
655 : : {
656 : 1920 : RestrictInfo *rinfo = (RestrictInfo *) clause;
657 : :
658 : : /* Pseudoconstants are not interesting (they couldn't contain a Var) */
659 [ + + ]: 1920 : if (rinfo->pseudoconstant)
660 : 3 : return false;
661 : :
662 : : /* Clauses referencing multiple, or no, varnos are incompatible */
663 [ - + ]: 1917 : if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
2099 tomas.vondra@postgre 664 :UBC 0 : return false;
665 : :
2099 tomas.vondra@postgre 666 :CBC 1917 : clause = (Node *) rinfo->clause;
667 : : }
668 : :
669 [ + + ]: 1977 : if (is_opclause(clause))
670 : : {
671 : : /* If it's an opclause, check for Var = Const or Const = Var. */
672 : 723 : OpExpr *expr = (OpExpr *) clause;
673 : :
674 : : /* Only expressions with two arguments are candidates. */
3177 simon@2ndQuadrant.co 675 [ - + ]: 723 : if (list_length(expr->args) != 2)
3177 simon@2ndQuadrant.co 676 :UBC 0 : return false;
677 : :
678 : : /* Make sure non-selected argument is a pseudoconstant. */
2934 tgl@sss.pgh.pa.us 679 [ + - ]:CBC 723 : if (is_pseudo_constant_clause(lsecond(expr->args)))
1726 tomas.vondra@postgre 680 : 723 : clause_expr = linitial(expr->args);
2934 tgl@sss.pgh.pa.us 681 [ # # ]:UBC 0 : else if (is_pseudo_constant_clause(linitial(expr->args)))
1726 tomas.vondra@postgre 682 : 0 : clause_expr = lsecond(expr->args);
683 : : else
3177 simon@2ndQuadrant.co 684 : 0 : return false;
685 : :
686 : : /*
687 : : * If it's not an "=" operator, just ignore the clause, as it's not
688 : : * compatible with functional dependencies.
689 : : *
690 : : * This uses the function for estimating selectivity, not the operator
691 : : * directly (a bit awkward, but well ...).
692 : : *
693 : : * XXX this is pretty dubious; probably it'd be better to check btree
694 : : * or hash opclass membership, so as not to be fooled by custom
695 : : * selectivity functions, and to be more consistent with decisions
696 : : * elsewhere in the planner.
697 : : */
3177 simon@2ndQuadrant.co 698 [ + + ]:CBC 723 : if (get_oprrest(expr->opno) != F_EQSEL)
699 : 18 : return false;
700 : :
701 : : /* OK to proceed with checking "var" */
702 : : }
2099 tomas.vondra@postgre 703 [ + + ]: 1254 : else if (IsA(clause, ScalarArrayOpExpr))
704 : : {
705 : : /* If it's a scalar array operator, check for Var IN Const. */
2042 tgl@sss.pgh.pa.us 706 : 1218 : ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
707 : :
708 : : /*
709 : : * Reject ALL() variant, we only care about ANY/IN.
710 : : *
711 : : * XXX Maybe we should check if all the values are the same, and allow
712 : : * ALL in that case? Doesn't seem very practical, though.
713 : : */
2103 tomas.vondra@postgre 714 [ + + ]: 1218 : if (!expr->useOr)
715 : 18 : return false;
716 : :
717 : : /* Only expressions with two arguments are candidates. */
718 [ - + ]: 1200 : if (list_length(expr->args) != 2)
2103 tomas.vondra@postgre 719 :UBC 0 : return false;
720 : :
721 : : /*
722 : : * We know it's always (Var IN Const), so we assume the var is the
723 : : * first argument, and pseudoconstant is the second one.
724 : : */
2103 tomas.vondra@postgre 725 [ - + ]:CBC 1200 : if (!is_pseudo_constant_clause(lsecond(expr->args)))
2103 tomas.vondra@postgre 726 :UBC 0 : return false;
727 : :
1726 tomas.vondra@postgre 728 :CBC 1200 : clause_expr = linitial(expr->args);
729 : :
730 : : /*
731 : : * If it's not an "=" operator, just ignore the clause, as it's not
732 : : * compatible with functional dependencies. The operator is identified
733 : : * simply by looking at which function it uses to estimate
734 : : * selectivity. That's a bit strange, but it's what other similar
735 : : * places do.
736 : : */
2103 737 [ + + ]: 1200 : if (get_oprrest(expr->opno) != F_EQSEL)
738 : 90 : return false;
739 : :
740 : : /* OK to proceed with checking "var" */
741 : : }
2099 742 [ + - ]: 36 : else if (is_orclause(clause))
743 : : {
1726 744 : 36 : BoolExpr *bool_expr = (BoolExpr *) clause;
745 : : ListCell *lc;
746 : :
747 : : /* start with no attribute number */
2099 748 : 36 : *attnum = InvalidAttrNumber;
749 : :
1726 750 [ + - + + : 75 : foreach(lc, bool_expr->args)
+ + ]
751 : : {
752 : : AttrNumber clause_attnum;
753 : :
754 : : /*
755 : : * Had we found incompatible clause in the arguments, treat the
756 : : * whole clause as incompatible.
757 : : */
2099 758 [ + + ]: 60 : if (!dependency_is_compatible_clause((Node *) lfirst(lc),
759 : : relid, &clause_attnum))
760 : 21 : return false;
761 : :
762 [ + + ]: 42 : if (*attnum == InvalidAttrNumber)
763 : 18 : *attnum = clause_attnum;
764 : :
765 : : /* ensure all the variables are the same (same attnum) */
766 [ + + ]: 42 : if (*attnum != clause_attnum)
767 : 3 : return false;
768 : : }
769 : :
770 : : /* the Var is already checked by the recursive call */
771 : 15 : return true;
772 : : }
2099 tomas.vondra@postgre 773 [ # # ]:UBC 0 : else if (is_notclause(clause))
774 : : {
775 : : /*
776 : : * "NOT x" can be interpreted as "x = false", so get the argument and
777 : : * proceed with seeing if it's a suitable Var.
778 : : */
1726 779 : 0 : clause_expr = (Node *) get_notclausearg(clause);
780 : : }
781 : : else
782 : : {
783 : : /*
784 : : * A boolean expression "x" can be interpreted as "x = true", so
785 : : * proceed with seeing if it's a suitable Var.
786 : : */
14 peter@eisentraut.org 787 :UNC 0 : clause_expr = clause;
788 : : }
789 : :
790 : : /*
791 : : * We may ignore any RelabelType node above the operand. (There won't be
792 : : * more than one, since eval_const_expressions has been applied already.)
793 : : */
1726 tomas.vondra@postgre 794 [ - + ]:CBC 1815 : if (IsA(clause_expr, RelabelType))
1726 tomas.vondra@postgre 795 :UBC 0 : clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
796 : :
797 : : /* We only support plain Vars for now */
1726 tomas.vondra@postgre 798 [ + + ]:CBC 1815 : if (!IsA(clause_expr, Var))
2934 tgl@sss.pgh.pa.us 799 : 144 : return false;
800 : :
801 : : /* OK, we know we have a Var */
1726 tomas.vondra@postgre 802 : 1671 : var = (Var *) clause_expr;
803 : :
804 : : /* Ensure Var is from the correct relation */
2934 tgl@sss.pgh.pa.us 805 [ - + ]: 1671 : if (var->varno != relid)
2934 tgl@sss.pgh.pa.us 806 :UBC 0 : return false;
807 : :
808 : : /* We also better ensure the Var is from the current level */
2934 tgl@sss.pgh.pa.us 809 [ - + ]:CBC 1671 : if (var->varlevelsup != 0)
2934 tgl@sss.pgh.pa.us 810 :UBC 0 : return false;
811 : :
812 : : /* Also ignore system attributes (we don't allow stats on those) */
2934 tgl@sss.pgh.pa.us 813 [ - + ]:CBC 1671 : if (!AttrNumberIsForUserDefinedAttr(var->varattno))
2934 tgl@sss.pgh.pa.us 814 :UBC 0 : return false;
815 : :
2934 tgl@sss.pgh.pa.us 816 :CBC 1671 : *attnum = var->varattno;
817 : 1671 : return true;
818 : : }
819 : :
820 : : /*
821 : : * find_strongest_dependency
822 : : * find the strongest dependency on the attributes
823 : : *
824 : : * When applying functional dependencies, we start with the strongest
825 : : * dependencies. That is, we select the dependency that:
826 : : *
827 : : * (a) has all attributes covered by equality clauses
828 : : *
829 : : * (b) has the most attributes
830 : : *
831 : : * (c) has the highest degree of validity
832 : : *
833 : : * This guarantees that we eliminate the most redundant conditions first
834 : : * (see the comment in dependencies_clauselist_selectivity).
835 : : */
836 : : static MVDependency *
2164 tomas.vondra@postgre 837 : 1743 : find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
838 : : Bitmapset *attnums)
839 : : {
840 : : int i,
841 : : j;
3177 simon@2ndQuadrant.co 842 : 1743 : MVDependency *strongest = NULL;
843 : :
844 : : /* number of attnums in clauses */
845 : 1743 : int nattnums = bms_num_members(attnums);
846 : :
847 : : /*
848 : : * Iterate over the MVDependency items and find the strongest one from the
849 : : * fully-matched dependencies. We do the cheap checks first, before
850 : : * matching it against the attnums.
851 : : */
2164 tomas.vondra@postgre 852 [ + + ]: 3504 : for (i = 0; i < ndependencies; i++)
853 : : {
854 [ + + ]: 10140 : for (j = 0; j < dependencies[i]->ndeps; j++)
855 : : {
856 : 8379 : MVDependency *dependency = dependencies[i]->deps[j];
857 : :
858 : : /*
859 : : * Skip dependencies referencing more attributes than available
860 : : * clauses, as those can't be fully matched.
861 : : */
862 [ + + ]: 8379 : if (dependency->nattributes > nattnums)
3177 simon@2ndQuadrant.co 863 : 5301 : continue;
864 : :
2164 tomas.vondra@postgre 865 [ + + ]: 3078 : if (strongest)
866 : : {
867 : : /* skip dependencies on fewer attributes than the strongest. */
868 [ - + ]: 1968 : if (dependency->nattributes < strongest->nattributes)
2164 tomas.vondra@postgre 869 :UBC 0 : continue;
870 : :
871 : : /* also skip weaker dependencies when attribute count matches */
2164 tomas.vondra@postgre 872 [ + + ]:CBC 1968 : if (strongest->nattributes == dependency->nattributes &&
873 [ - + ]: 1827 : strongest->degree > dependency->degree)
2164 tomas.vondra@postgre 874 :UBC 0 : continue;
875 : : }
876 : :
877 : : /*
878 : : * this dependency is stronger, but we must still check that it's
879 : : * fully matched to these attnums. We perform this check last as
880 : : * it's slightly more expensive than the previous checks.
881 : : */
2164 tomas.vondra@postgre 882 [ + + ]:CBC 3078 : if (dependency_is_fully_matched(dependency, attnums))
883 : 1542 : strongest = dependency; /* save new best match */
884 : : }
885 : : }
886 : :
3177 simon@2ndQuadrant.co 887 : 1743 : return strongest;
888 : : }
889 : :
890 : : /*
891 : : * clauselist_apply_dependencies
892 : : * Apply the specified functional dependencies to a list of clauses and
893 : : * return the estimated selectivity of the clauses that are compatible
894 : : * with any of the given dependencies.
895 : : *
896 : : * This will estimate all not-already-estimated clauses that are compatible
897 : : * with functional dependencies, and which have an attribute mentioned by any
898 : : * of the given dependencies (either as an implying or implied attribute).
899 : : *
900 : : * Given (lists of) clauses on attributes (a,b) and a functional dependency
901 : : * (a=>b), the per-column selectivities P(a) and P(b) are notionally combined
902 : : * using the formula
903 : : *
904 : : * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
905 : : *
906 : : * where 'f' is the degree of dependency. This reflects the fact that we
907 : : * expect a fraction f of all rows to be consistent with the dependency
908 : : * (a=>b), and so have a selectivity of P(a), while the remaining rows are
909 : : * treated as independent.
910 : : *
911 : : * In practice, we use a slightly modified version of this formula, which uses
912 : : * a selectivity of Min(P(a), P(b)) for the dependent rows, since the result
913 : : * should obviously not exceed either column's individual selectivity. I.e.,
914 : : * we actually combine selectivities using the formula
915 : : *
916 : : * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
917 : : *
918 : : * This can make quite a difference if the specific values matching the
919 : : * clauses are not consistent with the functional dependency.
920 : : */
921 : : static Selectivity
2089 dean.a.rasheed@gmail 922 : 798 : clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
923 : : int varRelid, JoinType jointype,
924 : : SpecialJoinInfo *sjinfo,
925 : : MVDependency **dependencies, int ndependencies,
926 : : AttrNumber *list_attnums,
927 : : Bitmapset **estimatedclauses)
928 : : {
929 : : Bitmapset *attnums;
930 : : int i;
931 : : int j;
932 : : int nattrs;
933 : : Selectivity *attr_sel;
934 : : int attidx;
935 : : int listidx;
936 : : ListCell *l;
937 : : Selectivity s1;
938 : :
939 : : /*
940 : : * Extract the attnums of all implying and implied attributes from all the
941 : : * given dependencies. Each of these attributes is expected to have at
942 : : * least 1 not-already-estimated compatible clause that we will estimate
943 : : * here.
944 : : */
945 : 798 : attnums = NULL;
946 [ + + ]: 1743 : for (i = 0; i < ndependencies; i++)
947 : : {
948 [ + + ]: 2976 : for (j = 0; j < dependencies[i]->nattributes; j++)
949 : : {
950 : 2031 : AttrNumber attnum = dependencies[i]->attributes[j];
951 : :
952 : 2031 : attnums = bms_add_member(attnums, attnum);
953 : : }
954 : : }
955 : :
956 : : /*
957 : : * Compute per-column selectivity estimates for each of these attributes,
958 : : * and mark all the corresponding clauses as estimated.
959 : : */
960 : 798 : nattrs = bms_num_members(attnums);
6 michael@paquier.xyz 961 :GNC 798 : attr_sel = palloc_array(Selectivity, nattrs);
962 : :
2089 dean.a.rasheed@gmail 963 :CBC 798 : attidx = 0;
964 : 798 : i = -1;
965 [ + + ]: 2547 : while ((i = bms_next_member(attnums, i)) >= 0)
966 : : {
967 : 1749 : List *attr_clauses = NIL;
968 : : Selectivity simple_sel;
969 : :
970 : 1749 : listidx = -1;
971 [ + - + + : 5718 : foreach(l, clauses)
+ + ]
972 : : {
973 : 3969 : Node *clause = (Node *) lfirst(l);
974 : :
975 : 3969 : listidx++;
976 [ + + ]: 3969 : if (list_attnums[listidx] == i)
977 : : {
978 : 1749 : attr_clauses = lappend(attr_clauses, clause);
979 : 1749 : *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
980 : : }
981 : : }
982 : :
1839 983 : 1749 : simple_sel = clauselist_selectivity_ext(root, attr_clauses, varRelid,
984 : : jointype, sjinfo, false);
2089 985 : 1749 : attr_sel[attidx++] = simple_sel;
986 : : }
987 : :
988 : : /*
989 : : * Now combine these selectivities using the dependency information. For
990 : : * chains of dependencies such as a -> b -> c, the b -> c dependency will
991 : : * come before the a -> b dependency in the array, so we traverse the
992 : : * array backwards to ensure such chains are computed in the right order.
993 : : *
994 : : * As explained above, pairs of selectivities are combined using the
995 : : * formula
996 : : *
997 : : * P(a,b) = f * Min(P(a), P(b)) + (1-f) * P(a) * P(b)
998 : : *
999 : : * to ensure that the combined selectivity is never greater than either
1000 : : * individual selectivity.
1001 : : *
1002 : : * Where multiple dependencies apply (e.g., a -> b -> c), we use
1003 : : * conditional probabilities to compute the overall result as follows:
1004 : : *
1005 : : * P(a,b,c) = P(c|a,b) * P(a,b) = P(c|a,b) * P(b|a) * P(a)
1006 : : *
1007 : : * so we replace the selectivities of all implied attributes with
1008 : : * conditional probabilities, that are conditional on all their implying
1009 : : * attributes. The selectivities of all other non-implied attributes are
1010 : : * left as they are.
1011 : : */
1012 [ + + ]: 1743 : for (i = ndependencies - 1; i >= 0; i--)
1013 : : {
1014 : 945 : MVDependency *dependency = dependencies[i];
1015 : : AttrNumber attnum;
1016 : : Selectivity s2;
1017 : : double f;
1018 : :
1019 : : /* Selectivity of all the implying attributes */
1020 : 945 : s1 = 1.0;
1021 [ + + ]: 2031 : for (j = 0; j < dependency->nattributes - 1; j++)
1022 : : {
1023 : 1086 : attnum = dependency->attributes[j];
1024 : 1086 : attidx = bms_member_index(attnums, attnum);
1025 : 1086 : s1 *= attr_sel[attidx];
1026 : : }
1027 : :
1028 : : /* Original selectivity of the implied attribute */
1029 : 945 : attnum = dependency->attributes[j];
1030 : 945 : attidx = bms_member_index(attnums, attnum);
1031 : 945 : s2 = attr_sel[attidx];
1032 : :
1033 : : /*
1034 : : * Replace s2 with the conditional probability s2 given s1, computed
1035 : : * using the formula P(b|a) = P(a,b) / P(a), which simplifies to
1036 : : *
1037 : : * P(b|a) = f * Min(P(a), P(b)) / P(a) + (1-f) * P(b)
1038 : : *
1039 : : * where P(a) = s1, the selectivity of the implying attributes, and
1040 : : * P(b) = s2, the selectivity of the implied attribute.
1041 : : */
1042 : 945 : f = dependency->degree;
1043 : :
1044 [ + + ]: 945 : if (s1 <= s2)
1045 : 891 : attr_sel[attidx] = f + (1 - f) * s2;
1046 : : else
1047 : 54 : attr_sel[attidx] = f * s2 / s1 + (1 - f) * s2;
1048 : : }
1049 : :
1050 : : /*
1051 : : * The overall selectivity of all the clauses on all these attributes is
1052 : : * then the product of all the original (non-implied) probabilities and
1053 : : * the new conditional (implied) probabilities.
1054 : : */
1055 : 798 : s1 = 1.0;
1056 [ + + ]: 2547 : for (i = 0; i < nattrs; i++)
1057 : 1749 : s1 *= attr_sel[i];
1058 : :
1059 [ - + - + ]: 798 : CLAMP_PROBABILITY(s1);
1060 : :
1061 : 798 : pfree(attr_sel);
1062 : 798 : bms_free(attnums);
1063 : :
1064 : 798 : return s1;
1065 : : }
1066 : :
1067 : : /*
1068 : : * dependency_is_compatible_expression
1069 : : * Determines if the expression is compatible with functional dependencies
1070 : : *
1071 : : * Similar to dependency_is_compatible_clause, but doesn't enforce that the
1072 : : * expression is a simple Var. On success, return the matching statistics
1073 : : * expression into *expr.
1074 : : */
1075 : : static bool
1726 tomas.vondra@postgre 1076 : 321 : dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, Node **expr)
1077 : : {
1078 : : ListCell *lc,
1079 : : *lc2;
1080 : : Node *clause_expr;
1081 : :
1082 [ + + ]: 321 : if (IsA(clause, RestrictInfo))
1083 : : {
1084 : 276 : RestrictInfo *rinfo = (RestrictInfo *) clause;
1085 : :
1086 : : /* Pseudoconstants are not interesting (they couldn't contain a Var) */
1087 [ + + ]: 276 : if (rinfo->pseudoconstant)
1088 : 3 : return false;
1089 : :
1090 : : /* Clauses referencing multiple, or no, varnos are incompatible */
1091 [ - + ]: 273 : if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
1726 tomas.vondra@postgre 1092 :UBC 0 : return false;
1093 : :
1726 tomas.vondra@postgre 1094 :CBC 273 : clause = (Node *) rinfo->clause;
1095 : : }
1096 : :
1097 [ + + ]: 318 : if (is_opclause(clause))
1098 : : {
1099 : : /* If it's an opclause, check for Var = Const or Const = Var. */
1100 : 102 : OpExpr *expr = (OpExpr *) clause;
1101 : :
1102 : : /* Only expressions with two arguments are candidates. */
1103 [ - + ]: 102 : if (list_length(expr->args) != 2)
1726 tomas.vondra@postgre 1104 :UBC 0 : return false;
1105 : :
1106 : : /* Make sure non-selected argument is a pseudoconstant. */
1726 tomas.vondra@postgre 1107 [ + - ]:CBC 102 : if (is_pseudo_constant_clause(lsecond(expr->args)))
1108 : 102 : clause_expr = linitial(expr->args);
1726 tomas.vondra@postgre 1109 [ # # ]:UBC 0 : else if (is_pseudo_constant_clause(linitial(expr->args)))
1110 : 0 : clause_expr = lsecond(expr->args);
1111 : : else
1112 : 0 : return false;
1113 : :
1114 : : /*
1115 : : * If it's not an "=" operator, just ignore the clause, as it's not
1116 : : * compatible with functional dependencies.
1117 : : *
1118 : : * This uses the function for estimating selectivity, not the operator
1119 : : * directly (a bit awkward, but well ...).
1120 : : *
1121 : : * XXX this is pretty dubious; probably it'd be better to check btree
1122 : : * or hash opclass membership, so as not to be fooled by custom
1123 : : * selectivity functions, and to be more consistent with decisions
1124 : : * elsewhere in the planner.
1125 : : */
1726 tomas.vondra@postgre 1126 [ + + ]:CBC 102 : if (get_oprrest(expr->opno) != F_EQSEL)
1127 : 18 : return false;
1128 : :
1129 : : /* OK to proceed with checking "var" */
1130 : : }
1131 [ + + ]: 216 : else if (IsA(clause, ScalarArrayOpExpr))
1132 : : {
1133 : : /* If it's a scalar array operator, check for Var IN Const. */
1134 : 195 : ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1135 : :
1136 : : /*
1137 : : * Reject ALL() variant, we only care about ANY/IN.
1138 : : *
1139 : : * FIXME Maybe we should check if all the values are the same, and
1140 : : * allow ALL in that case? Doesn't seem very practical, though.
1141 : : */
1142 [ + + ]: 195 : if (!expr->useOr)
1143 : 18 : return false;
1144 : :
1145 : : /* Only expressions with two arguments are candidates. */
1146 [ - + ]: 177 : if (list_length(expr->args) != 2)
1726 tomas.vondra@postgre 1147 :UBC 0 : return false;
1148 : :
1149 : : /*
1150 : : * We know it's always (Var IN Const), so we assume the var is the
1151 : : * first argument, and pseudoconstant is the second one.
1152 : : */
1726 tomas.vondra@postgre 1153 [ - + ]:CBC 177 : if (!is_pseudo_constant_clause(lsecond(expr->args)))
1726 tomas.vondra@postgre 1154 :UBC 0 : return false;
1155 : :
1726 tomas.vondra@postgre 1156 :CBC 177 : clause_expr = linitial(expr->args);
1157 : :
1158 : : /*
1159 : : * If it's not an "=" operator, just ignore the clause, as it's not
1160 : : * compatible with functional dependencies. The operator is identified
1161 : : * simply by looking at which function it uses to estimate
1162 : : * selectivity. That's a bit strange, but it's what other similar
1163 : : * places do.
1164 : : */
1165 [ + + ]: 177 : if (get_oprrest(expr->opno) != F_EQSEL)
1166 : 90 : return false;
1167 : :
1168 : : /* OK to proceed with checking "var" */
1169 : : }
1170 [ + - ]: 21 : else if (is_orclause(clause))
1171 : : {
1172 : 21 : BoolExpr *bool_expr = (BoolExpr *) clause;
1173 : :
1174 : : /* start with no expression (we'll use the first match) */
1175 : 21 : *expr = NULL;
1176 : :
1177 [ + - + + : 60 : foreach(lc, bool_expr->args)
+ + ]
1178 : : {
1179 : 45 : Node *or_expr = NULL;
1180 : :
1181 : : /*
1182 : : * Had we found incompatible expression in the arguments, treat
1183 : : * the whole expression as incompatible.
1184 : : */
1185 [ + + ]: 45 : if (!dependency_is_compatible_expression((Node *) lfirst(lc), relid,
1186 : : statlist, &or_expr))
1187 : 6 : return false;
1188 : :
1189 [ + + ]: 42 : if (*expr == NULL)
1190 : 18 : *expr = or_expr;
1191 : :
1192 : : /* ensure all the expressions are the same */
1193 [ + + ]: 42 : if (!equal(or_expr, *expr))
1194 : 3 : return false;
1195 : : }
1196 : :
1197 : : /* the expression is already checked by the recursive call */
1198 : 15 : return true;
1199 : : }
1726 tomas.vondra@postgre 1200 [ # # ]:UBC 0 : else if (is_notclause(clause))
1201 : : {
1202 : : /*
1203 : : * "NOT x" can be interpreted as "x = false", so get the argument and
1204 : : * proceed with seeing if it's a suitable Var.
1205 : : */
1206 : 0 : clause_expr = (Node *) get_notclausearg(clause);
1207 : : }
1208 : : else
1209 : : {
1210 : : /*
1211 : : * A boolean expression "x" can be interpreted as "x = true", so
1212 : : * proceed with seeing if it's a suitable Var.
1213 : : */
14 peter@eisentraut.org 1214 :UNC 0 : clause_expr = clause;
1215 : : }
1216 : :
1217 : : /*
1218 : : * We may ignore any RelabelType node above the operand. (There won't be
1219 : : * more than one, since eval_const_expressions has been applied already.)
1220 : : */
1726 tomas.vondra@postgre 1221 [ - + ]:CBC 171 : if (IsA(clause_expr, RelabelType))
1726 tomas.vondra@postgre 1222 :UBC 0 : clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
1223 : :
1224 : : /*
1225 : : * Search for a matching statistics expression.
1226 : : */
1726 tomas.vondra@postgre 1227 [ + - + + :CBC 174 : foreach(lc, statlist)
+ + ]
1228 : : {
1229 : 171 : StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
1230 : :
1231 : : /* ignore stats without dependencies */
1232 [ - + ]: 171 : if (info->kind != STATS_EXT_DEPENDENCIES)
1726 tomas.vondra@postgre 1233 :UBC 0 : continue;
1234 : :
1726 tomas.vondra@postgre 1235 [ + + + - :CBC 279 : foreach(lc2, info->exprs)
+ + ]
1236 : : {
1237 : 276 : Node *stat_expr = (Node *) lfirst(lc2);
1238 : :
1239 [ + + ]: 276 : if (equal(clause_expr, stat_expr))
1240 : : {
1241 : 168 : *expr = stat_expr;
1242 : 168 : return true;
1243 : : }
1244 : : }
1245 : : }
1246 : :
1247 : 3 : return false;
1248 : : }
1249 : :
1250 : : /*
1251 : : * dependencies_clauselist_selectivity
1252 : : * Return the estimated selectivity of (a subset of) the given clauses
1253 : : * using functional dependency statistics, or 1.0 if no useful functional
1254 : : * dependency statistic exists.
1255 : : *
1256 : : * 'estimatedclauses' is an input/output argument that gets a bit set
1257 : : * corresponding to the (zero-based) list index of each clause that is included
1258 : : * in the estimated selectivity.
1259 : : *
1260 : : * Given equality clauses on attributes (a,b) we find the strongest dependency
1261 : : * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
1262 : : * dependency, we then combine the per-clause selectivities using the formula
1263 : : *
1264 : : * P(a,b) = f * P(a) + (1-f) * P(a) * P(b)
1265 : : *
1266 : : * where 'f' is the degree of the dependency. (Actually we use a slightly
1267 : : * modified version of this formula -- see clauselist_apply_dependencies()).
1268 : : *
1269 : : * With clauses on more than two attributes, the dependencies are applied
1270 : : * recursively, starting with the widest/strongest dependencies. For example
1271 : : * P(a,b,c) is first split like this:
1272 : : *
1273 : : * P(a,b,c) = f * P(a,b) + (1-f) * P(a,b) * P(c)
1274 : : *
1275 : : * assuming (a,b=>c) is the strongest dependency.
1276 : : */
1277 : : Selectivity
3177 simon@2ndQuadrant.co 1278 : 1218 : dependencies_clauselist_selectivity(PlannerInfo *root,
1279 : : List *clauses,
1280 : : int varRelid,
1281 : : JoinType jointype,
1282 : : SpecialJoinInfo *sjinfo,
1283 : : RelOptInfo *rel,
1284 : : Bitmapset **estimatedclauses)
1285 : : {
1286 : 1218 : Selectivity s1 = 1.0;
1287 : : ListCell *l;
1288 : 1218 : Bitmapset *clauses_attnums = NULL;
1289 : : AttrNumber *list_attnums;
1290 : : int listidx;
1291 : : MVDependencies **func_dependencies;
1292 : : int nfunc_dependencies;
1293 : : int total_ndeps;
1294 : : MVDependency **dependencies;
1295 : : int ndependencies;
1296 : : int i;
1297 : : AttrNumber attnum_offset;
1431 tomas.vondra@postgre 1298 [ + - ]: 1218 : RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
1299 : :
1300 : : /* unique expressions */
1301 : : Node **unique_exprs;
1302 : : int unique_exprs_cnt;
1303 : :
1304 : : /* check if there's any stats that might be useful for us. */
3177 simon@2ndQuadrant.co 1305 [ + + ]: 1218 : if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
1306 : 324 : return 1.0;
1307 : :
6 michael@paquier.xyz 1308 :GNC 894 : list_attnums = palloc_array(AttrNumber, list_length(clauses));
1309 : :
1310 : : /*
1311 : : * We allocate space as if every clause was a unique expression, although
1312 : : * that's probably overkill. Some will be simple column references that
1313 : : * we'll translate to attnums, and there might be duplicates. But it's
1314 : : * easier and cheaper to just do one allocation than repalloc later.
1315 : : */
1316 : 894 : unique_exprs = palloc_array(Node *, list_length(clauses));
1726 tomas.vondra@postgre 1317 :CBC 894 : unique_exprs_cnt = 0;
1318 : :
1319 : : /*
1320 : : * Pre-process the clauses list to extract the attnums seen in each item.
1321 : : * We need to determine if there's any clauses which will be useful for
1322 : : * dependency selectivity estimations. Along the way we'll record all of
1323 : : * the attnums for each clause in a list which we'll reference later so we
1324 : : * don't need to repeat the same work again. We'll also keep track of all
1325 : : * attnums seen.
1326 : : *
1327 : : * We also skip clauses that we already estimated using different types of
1328 : : * statistics (we treat them as incompatible).
1329 : : *
1330 : : * To handle expressions, we assign them negative attnums, as if it was a
1331 : : * system attribute (this is fine, as we only allow extended stats on user
1332 : : * attributes). And then we offset everything by the number of
1333 : : * expressions, so that we can store the values in a bitmapset.
1334 : : */
3177 simon@2ndQuadrant.co 1335 : 894 : listidx = 0;
1336 [ + - + + : 2835 : foreach(l, clauses)
+ + ]
1337 : : {
1338 : 1941 : Node *clause = (Node *) lfirst(l);
1339 : : AttrNumber attnum;
1726 tomas.vondra@postgre 1340 : 1941 : Node *expr = NULL;
1341 : :
1342 : : /* ignore clause by default */
1343 : 1941 : list_attnums[listidx] = InvalidAttrNumber;
1344 : :
1345 [ + + ]: 1941 : if (!bms_is_member(listidx, *estimatedclauses))
1346 : : {
1347 : : /*
1348 : : * If it's a simple column reference, just extract the attnum. If
1349 : : * it's an expression, assign a negative attnum as if it was a
1350 : : * system attribute.
1351 : : */
1352 [ + + ]: 1920 : if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
1353 : : {
1354 : 1644 : list_attnums[listidx] = attnum;
1355 : : }
1356 [ + + ]: 276 : else if (dependency_is_compatible_expression(clause, rel->relid,
1357 : : rel->statlist,
1358 : : &expr))
1359 : : {
1360 : : /* special attnum assigned to this expression */
1361 : 141 : attnum = InvalidAttrNumber;
1362 : :
1363 [ - + ]: 141 : Assert(expr != NULL);
1364 : :
1365 : : /* If the expression is duplicate, use the same attnum. */
1366 [ + + ]: 237 : for (i = 0; i < unique_exprs_cnt; i++)
1367 : : {
1368 [ - + ]: 96 : if (equal(unique_exprs[i], expr))
1369 : : {
1370 : : /* negative attribute number to expression */
1726 tomas.vondra@postgre 1371 :UBC 0 : attnum = -(i + 1);
1372 : 0 : break;
1373 : : }
1374 : : }
1375 : :
1376 : : /* not found in the list, so add it */
1726 tomas.vondra@postgre 1377 [ + - ]:CBC 141 : if (attnum == InvalidAttrNumber)
1378 : : {
1379 : 141 : unique_exprs[unique_exprs_cnt++] = expr;
1380 : :
1381 : : /* after incrementing the value, to get -1, -2, ... */
1382 : 141 : attnum = (-unique_exprs_cnt);
1383 : : }
1384 : :
1385 : : /* remember which attnum was assigned to this clause */
1386 : 141 : list_attnums[listidx] = attnum;
1387 : : }
1388 : : }
1389 : :
3177 simon@2ndQuadrant.co 1390 : 1941 : listidx++;
1391 : : }
1392 : :
1726 tomas.vondra@postgre 1393 [ - + ]: 894 : Assert(listidx == list_length(clauses));
1394 : :
1395 : : /*
1396 : : * How much we need to offset the attnums? If there are no expressions,
1397 : : * then no offset is needed. Otherwise we need to offset enough for the
1398 : : * lowest value (-unique_exprs_cnt) to become 1.
1399 : : */
1400 [ + + ]: 894 : if (unique_exprs_cnt > 0)
1401 : 66 : attnum_offset = (unique_exprs_cnt + 1);
1402 : : else
1403 : 828 : attnum_offset = 0;
1404 : :
1405 : : /*
1406 : : * Now that we know how many expressions there are, we can offset the
1407 : : * values just enough to build the bitmapset.
1408 : : */
1409 [ + + ]: 2835 : for (i = 0; i < list_length(clauses); i++)
1410 : : {
1411 : : AttrNumber attnum;
1412 : :
1413 : : /* ignore incompatible or already estimated clauses */
1414 [ + + ]: 1941 : if (list_attnums[i] == InvalidAttrNumber)
1415 : 156 : continue;
1416 : :
1417 : : /* make sure the attnum is in the expected range */
1418 [ - + ]: 1785 : Assert(list_attnums[i] >= (-unique_exprs_cnt));
1419 [ - + ]: 1785 : Assert(list_attnums[i] <= MaxHeapAttributeNumber);
1420 : :
1421 : : /* make sure the attnum is positive (valid AttrNumber) */
1422 : 1785 : attnum = list_attnums[i] + attnum_offset;
1423 : :
1424 : : /*
1425 : : * Either it's a regular attribute, or it's an expression, in which
1426 : : * case we must not have seen it before (expressions are unique).
1427 : : *
1428 : : * XXX Check whether it's a regular attribute has to be done using the
1429 : : * original attnum, while the second check has to use the value with
1430 : : * an offset.
1431 : : */
1432 [ + + - + ]: 1785 : Assert(AttrNumberIsForUserDefinedAttr(list_attnums[i]) ||
1433 : : !bms_is_member(attnum, clauses_attnums));
1434 : :
1435 : : /*
1436 : : * Remember the offset attnum, both for attributes and expressions.
1437 : : * We'll pass list_attnums to clauselist_apply_dependencies, which
1438 : : * uses it to identify clauses in a bitmap. We could also pass the
1439 : : * offset, but this is more convenient.
1440 : : */
1441 : 1785 : list_attnums[i] = attnum;
1442 : :
1443 : 1785 : clauses_attnums = bms_add_member(clauses_attnums, attnum);
1444 : : }
1445 : :
1446 : : /*
1447 : : * If there's not at least two distinct attnums and expressions, then
1448 : : * reject the whole list of clauses. We must return 1.0 so the calling
1449 : : * function's selectivity is unaffected.
1450 : : */
1938 drowley@postgresql.o 1451 [ + + ]: 894 : if (bms_membership(clauses_attnums) != BMS_MULTIPLE)
1452 : : {
2089 dean.a.rasheed@gmail 1453 : 96 : bms_free(clauses_attnums);
3177 simon@2ndQuadrant.co 1454 : 96 : pfree(list_attnums);
1455 : 96 : return 1.0;
1456 : : }
1457 : :
1458 : : /*
1459 : : * Load all functional dependencies matching at least two parameters. We
1460 : : * can simply consider all dependencies at once, without having to search
1461 : : * for the best statistics object.
1462 : : *
1463 : : * To not waste cycles and memory, we deserialize dependencies only for
1464 : : * statistics that match at least two attributes. The array is allocated
1465 : : * with the assumption that all objects match - we could grow the array to
1466 : : * make it just the right size, but it's likely wasteful anyway thanks to
1467 : : * moving the freed chunks to freelists etc.
1468 : : */
6 michael@paquier.xyz 1469 :GNC 798 : func_dependencies = palloc_array(MVDependencies *, list_length(rel->statlist));
2089 dean.a.rasheed@gmail 1470 :CBC 798 : nfunc_dependencies = 0;
1471 : 798 : total_ndeps = 0;
1472 : :
1473 [ + - + + : 1665 : foreach(l, rel->statlist)
+ + ]
1474 : : {
1475 : 867 : StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
1476 : : int nmatched;
1477 : : int nexprs;
1478 : : int k;
1479 : : MVDependencies *deps;
1480 : :
1481 : : /* skip statistics that are not of the correct type */
2164 tomas.vondra@postgre 1482 [ + + ]: 867 : if (stat->kind != STATS_EXT_DEPENDENCIES)
1483 : 54 : continue;
1484 : :
1485 : : /* skip statistics with mismatching stxdinherit value */
1430 1486 [ - + ]: 813 : if (stat->inherit != rte->inh)
1430 tomas.vondra@postgre 1487 :UBC 0 : continue;
1488 : :
1489 : : /*
1490 : : * Count matching attributes - we have to undo the attnum offsets. The
1491 : : * input attribute numbers are not offset (expressions are not
1492 : : * included in stat->keys, so it's not necessary). But we need to
1493 : : * offset it before checking against clauses_attnums.
1494 : : */
1726 tomas.vondra@postgre 1495 :CBC 813 : nmatched = 0;
1496 : 813 : k = -1;
1497 [ + + ]: 3060 : while ((k = bms_next_member(stat->keys, k)) >= 0)
1498 : : {
1499 : 2247 : AttrNumber attnum = (AttrNumber) k;
1500 : :
1501 : : /* skip expressions */
1502 [ - + ]: 2247 : if (!AttrNumberIsForUserDefinedAttr(attnum))
1726 tomas.vondra@postgre 1503 :UBC 0 : continue;
1504 : :
1505 : : /* apply the same offset as above */
1726 tomas.vondra@postgre 1506 :CBC 2247 : attnum += attnum_offset;
1507 : :
1508 [ + + ]: 2247 : if (bms_is_member(attnum, clauses_attnums))
1509 : 1620 : nmatched++;
1510 : : }
1511 : :
1512 : : /* count matching expressions */
1513 : 813 : nexprs = 0;
1514 [ + + ]: 942 : for (i = 0; i < unique_exprs_cnt; i++)
1515 : : {
1516 : : ListCell *lc;
1517 : :
1518 [ + - + + : 516 : foreach(lc, stat->exprs)
+ + ]
1519 : : {
1520 : 387 : Node *stat_expr = (Node *) lfirst(lc);
1521 : :
1522 : : /* try to match it */
1523 [ + + ]: 387 : if (equal(stat_expr, unique_exprs[i]))
1524 : 129 : nexprs++;
1525 : : }
1526 : : }
1527 : :
1528 : : /*
1529 : : * Skip objects matching fewer than two attributes/expressions from
1530 : : * clauses.
1531 : : */
1532 [ + + ]: 813 : if (nmatched + nexprs < 2)
2164 1533 : 9 : continue;
1534 : :
1430 1535 : 804 : deps = statext_dependencies_load(stat->statOid, rte->inh);
1536 : :
1537 : : /*
1538 : : * The expressions may be represented by different attnums in the
1539 : : * stats, we need to remap them to be consistent with the clauses.
1540 : : * That will make the later steps (e.g. picking the strongest item and
1541 : : * so on) much simpler and cheaper, because it won't need to care
1542 : : * about the offset at all.
1543 : : *
1544 : : * When we're at it, we can ignore dependencies that are not fully
1545 : : * matched by clauses (i.e. referencing attributes or expressions that
1546 : : * are not in the clauses).
1547 : : *
1548 : : * We have to do this for all statistics, as long as there are any
1549 : : * expressions - we need to shift the attnums in all dependencies.
1550 : : *
1551 : : * XXX Maybe we should do this always, because it also eliminates some
1552 : : * of the dependencies early. It might be cheaper than having to walk
1553 : : * the longer list in find_strongest_dependency later, especially as
1554 : : * we need to do that repeatedly?
1555 : : *
1556 : : * XXX We have to do this even when there are no expressions in
1557 : : * clauses, otherwise find_strongest_dependency may fail for stats
1558 : : * with expressions (due to lookup of negative value in bitmap). So we
1559 : : * need to at least filter out those dependencies. Maybe we could do
1560 : : * it in a cheaper way (if there are no expr clauses, we can just
1561 : : * discard all negative attnums without any lookups).
1562 : : */
1726 1563 [ + + - + ]: 804 : if (unique_exprs_cnt > 0 || stat->exprs != NIL)
1564 : : {
1565 : 54 : int ndeps = 0;
1566 : :
1567 [ + + ]: 324 : for (i = 0; i < deps->ndeps; i++)
1568 : : {
1569 : 270 : bool skip = false;
1570 : 270 : MVDependency *dep = deps->deps[i];
1571 : : int j;
1572 : :
1573 [ + + ]: 753 : for (j = 0; j < dep->nattributes; j++)
1574 : : {
1575 : : int idx;
1576 : : Node *expr;
1577 : 615 : AttrNumber unique_attnum = InvalidAttrNumber;
1578 : : AttrNumber attnum;
1579 : :
1580 : : /* undo the per-statistics offset */
1581 : 615 : attnum = dep->attributes[j];
1582 : :
1583 : : /*
1584 : : * For regular attributes we can simply check if it
1585 : : * matches any clause. If there's no matching clause, we
1586 : : * can just ignore it. We need to offset the attnum
1587 : : * though.
1588 : : */
1589 [ - + ]: 615 : if (AttrNumberIsForUserDefinedAttr(attnum))
1590 : : {
1726 tomas.vondra@postgre 1591 :UBC 0 : dep->attributes[j] = attnum + attnum_offset;
1592 : :
1593 [ # # ]: 0 : if (!bms_is_member(dep->attributes[j], clauses_attnums))
1594 : : {
1595 : 0 : skip = true;
1596 : 0 : break;
1597 : : }
1598 : :
1599 : 0 : continue;
1600 : : }
1601 : :
1602 : : /*
1603 : : * the attnum should be a valid system attnum (-1, -2,
1604 : : * ...)
1605 : : */
1726 tomas.vondra@postgre 1606 [ - + ]:CBC 615 : Assert(AttributeNumberIsValid(attnum));
1607 : :
1608 : : /*
1609 : : * For expressions, we need to do two translations. First
1610 : : * we have to translate the negative attnum to index in
1611 : : * the list of expressions (in the statistics object).
1612 : : * Then we need to see if there's a matching clause. The
1613 : : * index of the unique expression determines the attnum
1614 : : * (and we offset it).
1615 : : */
1616 : 615 : idx = -(1 + attnum);
1617 : :
1618 : : /* Is the expression index is valid? */
1619 [ + - - + ]: 615 : Assert((idx >= 0) && (idx < list_length(stat->exprs)));
1620 : :
1621 : 615 : expr = (Node *) list_nth(stat->exprs, idx);
1622 : :
1623 : : /* try to find the expression in the unique list */
1168 drowley@postgresql.o 1624 [ + + ]: 1230 : for (int m = 0; m < unique_exprs_cnt; m++)
1625 : : {
1626 : : /*
1627 : : * found a matching unique expression, use the attnum
1628 : : * (derived from index of the unique expression)
1629 : : */
1630 [ + + ]: 1098 : if (equal(unique_exprs[m], expr))
1631 : : {
1632 : 483 : unique_attnum = -(m + 1) + attnum_offset;
1726 tomas.vondra@postgre 1633 : 483 : break;
1634 : : }
1635 : : }
1636 : :
1637 : : /*
1638 : : * Found no matching expression, so we can simply skip
1639 : : * this dependency, because there's no chance it will be
1640 : : * fully covered.
1641 : : */
1642 [ + + ]: 615 : if (unique_attnum == InvalidAttrNumber)
1643 : : {
1644 : 132 : skip = true;
1645 : 132 : break;
1646 : : }
1647 : :
1648 : : /* otherwise remap it to the new attnum */
1649 : 483 : dep->attributes[j] = unique_attnum;
1650 : : }
1651 : :
1652 : : /* if found a matching dependency, keep it */
1653 [ + + ]: 270 : if (!skip)
1654 : : {
1655 : : /* maybe we've skipped something earlier, so move it */
1656 [ - + ]: 138 : if (ndeps != i)
1726 tomas.vondra@postgre 1657 :UBC 0 : deps->deps[ndeps] = deps->deps[i];
1658 : :
1726 tomas.vondra@postgre 1659 :CBC 138 : ndeps++;
1660 : : }
1661 : : }
1662 : :
1663 : 54 : deps->ndeps = ndeps;
1664 : : }
1665 : :
1666 : : /*
1667 : : * It's possible we've removed all dependencies, in which case we
1668 : : * don't bother adding it to the list.
1669 : : */
1670 [ + - ]: 804 : if (deps->ndeps > 0)
1671 : : {
1672 : 804 : func_dependencies[nfunc_dependencies] = deps;
1673 : 804 : total_ndeps += deps->ndeps;
1674 : 804 : nfunc_dependencies++;
1675 : : }
1676 : : }
1677 : :
1678 : : /* if no matching stats could be found then we've nothing to do */
2089 dean.a.rasheed@gmail 1679 [ - + ]: 798 : if (nfunc_dependencies == 0)
1680 : : {
2089 dean.a.rasheed@gmail 1681 :UBC 0 : pfree(func_dependencies);
1682 : 0 : bms_free(clauses_attnums);
3177 simon@2ndQuadrant.co 1683 : 0 : pfree(list_attnums);
1726 tomas.vondra@postgre 1684 : 0 : pfree(unique_exprs);
3177 simon@2ndQuadrant.co 1685 : 0 : return 1.0;
1686 : : }
1687 : :
1688 : : /*
1689 : : * Work out which dependencies we can apply, starting with the
1690 : : * widest/strongest ones, and proceeding to smaller/weaker ones.
1691 : : */
6 michael@paquier.xyz 1692 :GNC 798 : dependencies = palloc_array(MVDependency *, total_ndeps);
2089 dean.a.rasheed@gmail 1693 :CBC 798 : ndependencies = 0;
1694 : :
1695 : : while (true)
3177 simon@2ndQuadrant.co 1696 : 945 : {
1697 : : MVDependency *dependency;
1698 : : AttrNumber attnum;
1699 : :
1700 : : /* the widest/strongest dependency, fully matched by clauses */
2089 dean.a.rasheed@gmail 1701 : 1743 : dependency = find_strongest_dependency(func_dependencies,
1702 : : nfunc_dependencies,
1703 : : clauses_attnums);
3177 simon@2ndQuadrant.co 1704 [ + + ]: 1743 : if (!dependency)
1705 : 798 : break;
1706 : :
2089 dean.a.rasheed@gmail 1707 : 945 : dependencies[ndependencies++] = dependency;
1708 : :
1709 : : /* Ignore dependencies using this implied attribute in later loops */
1710 : 945 : attnum = dependency->attributes[dependency->nattributes - 1];
1711 : 945 : clauses_attnums = bms_del_member(clauses_attnums, attnum);
1712 : : }
1713 : :
1714 : : /*
1715 : : * If we found applicable dependencies, use them to estimate all
1716 : : * compatible clauses on attributes that they refer to.
1717 : : */
1718 [ + - ]: 798 : if (ndependencies != 0)
1719 : 798 : s1 = clauselist_apply_dependencies(root, clauses, varRelid, jointype,
1720 : : sjinfo, dependencies, ndependencies,
1721 : : list_attnums, estimatedclauses);
1722 : :
1723 : : /* free deserialized functional dependencies (and then the array) */
1724 [ + + ]: 1602 : for (i = 0; i < nfunc_dependencies; i++)
1725 : 804 : pfree(func_dependencies[i]);
1726 : :
3177 simon@2ndQuadrant.co 1727 : 798 : pfree(dependencies);
2089 dean.a.rasheed@gmail 1728 : 798 : pfree(func_dependencies);
1729 : 798 : bms_free(clauses_attnums);
3177 simon@2ndQuadrant.co 1730 : 798 : pfree(list_attnums);
1726 tomas.vondra@postgre 1731 : 798 : pfree(unique_exprs);
1732 : :
3177 simon@2ndQuadrant.co 1733 : 798 : return s1;
1734 : : }
|