Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * pgpa_identifier.c
4 : : * create appropriate identifiers for range table entries
5 : : *
6 : : * The goal of this module is to be able to produce identifiers for range
7 : : * table entries that are unique, understandable to human beings, and
8 : : * able to be reconstructed during future planning cycles. As an
9 : : * exception, we do not care about, or want to produce, identifiers for
10 : : * RTE_JOIN entries. This is because (1) we would end up with a ton of
11 : : * RTEs with unhelpful names like unnamed_join_17; (2) not all joins have
12 : : * RTEs; and (3) we intend to refer to joins by their constituent members
13 : : * rather than by reference to the join RTE.
14 : : *
15 : : * In general, we construct identifiers of the following form:
16 : : *
17 : : * alias_name#occurrence_number/child_table_name@subquery_name
18 : : *
19 : : * However, occurrence_number is omitted when it is the first occurrence
20 : : * within the same subquery, child_table_name is omitted for relations that
21 : : * are not child tables, and subquery_name is omitted for the topmost
22 : : * query level. Whenever an item is omitted, the preceding punctuation mark
23 : : * is also omitted. Identifier-style escaping is applied to alias_name and
24 : : * subquery_name. In generated advice, child table names are always
25 : : * schema-qualified, but users can supply advice where the schema name is
26 : : * not mentioned. Identifier-style escaping is applied to the schema and to
27 : : * the relation name separately.
28 : : *
29 : : * The upshot of all of these rules is that in simple cases, the relation
30 : : * identifier is textually identical to the alias name, making life easier
31 : : * for users. However, even in complex cases, every relation identifier
32 : : * for a given query will be unique (or at least we hope so: if not, this
33 : : * code is buggy and the identifier format might need to be rethought).
34 : : *
35 : : * A key goal of this system is that we want to be able to reconstruct the
36 : : * same identifiers during a future planning cycle for the same query, so
37 : : * that if a certain behavior is specified for a certain identifier, we can
38 : : * properly identify the RTI for which that behavior is mandated. In order
39 : : * for this to work, subquery names must be unique and known before the
40 : : * subquery is planned, and the remainder of the identifier must not depend
41 : : * on any part of the query outside of the current subquery level. In
42 : : * particular, occurrence_number must be calculated relative to the range
43 : : * table for the relevant subquery, not the final flattened range table.
44 : : *
45 : : * NB: All of this code must use rt_fetch(), not planner_rt_fetch()!
46 : : * Join removal and self-join elimination remove rels from the arrays
47 : : * that planner_rt_fetch() uses; using rt_fetch() is necessary to get
48 : : * stable results.
49 : : *
50 : : * Copyright (c) 2016-2026, PostgreSQL Global Development Group
51 : : *
52 : : * contrib/pg_plan_advice/pgpa_identifier.c
53 : : *
54 : : *-------------------------------------------------------------------------
55 : : */
56 : :
57 : : #include "postgres.h"
58 : :
59 : : #include "pgpa_identifier.h"
60 : :
61 : : #include "parser/parsetree.h"
62 : : #include "utils/builtins.h"
63 : : #include "utils/lsyscache.h"
64 : :
65 : : static Index *pgpa_create_top_rti_map(Index rtable_length, List *rtable,
66 : : List *appinfos);
67 : : static int pgpa_occurrence_number(List *rtable, Index *top_rti_map,
68 : : SubPlanRTInfo *rtinfo, Index rti);
69 : :
70 : : /*
71 : : * Create a range table identifier from scratch.
72 : : *
73 : : * This function leaves the caller to do all the heavy lifting, so it's
74 : : * generally better to use one of the functions below instead.
75 : : *
76 : : * See the file header comments for more details on the format of an
77 : : * identifier.
78 : : */
79 : : const char *
3 rhaas@postgresql.org 80 :GNC 1150 : pgpa_identifier_string(const pgpa_identifier *rid)
81 : : {
82 : : const char *result;
83 : :
84 [ - + ]: 1150 : Assert(rid->alias_name != NULL);
85 : 1150 : result = quote_identifier(rid->alias_name);
86 : :
87 [ - + ]: 1150 : Assert(rid->occurrence >= 0);
88 [ + + ]: 1150 : if (rid->occurrence > 1)
89 : 16 : result = psprintf("%s#%d", result, rid->occurrence);
90 : :
91 [ + + ]: 1150 : if (rid->partrel != NULL)
92 : : {
93 [ + + ]: 190 : if (rid->partnsp == NULL)
94 : 7 : result = psprintf("%s/%s", result,
95 : 7 : quote_identifier(rid->partrel));
96 : : else
97 : 183 : result = psprintf("%s/%s.%s", result,
98 : 183 : quote_identifier(rid->partnsp),
99 : 183 : quote_identifier(rid->partrel));
100 : : }
101 : :
102 [ + + ]: 1150 : if (rid->plan_name != NULL)
103 : 34 : result = psprintf("%s@%s", result, quote_identifier(rid->plan_name));
104 : :
105 : 1150 : return result;
106 : : }
107 : :
108 : : /*
109 : : * Compute a relation identifier for a particular RTI.
110 : : *
111 : : * The caller provides root and rti, and gets the necessary details back via
112 : : * the remaining parameters.
113 : : */
114 : : void
115 : 1425 : pgpa_compute_identifier_by_rti(PlannerInfo *root, Index rti,
116 : : pgpa_identifier *rid)
117 : : {
118 : 1425 : Index top_rti = rti;
119 : 1425 : int occurrence = 1;
120 : : RangeTblEntry *rte;
121 : : RangeTblEntry *top_rte;
122 : 1425 : char *partnsp = NULL;
123 : 1425 : char *partrel = NULL;
124 : :
125 : : /*
126 : : * If this is a child RTE, find the topmost parent that is still of type
127 : : * RTE_RELATION. We do this because we identify children of partitioned
128 : : * tables by the name of the child table, but subqueries can also have
129 : : * child rels and we don't care about those here.
130 : : */
131 : : for (;;)
132 : 286 : {
133 : : AppendRelInfo *appinfo;
134 : : RangeTblEntry *parent_rte;
135 : :
136 : : /* append_rel_array can be NULL if there are no children */
137 [ + + ]: 1711 : if (root->append_rel_array == NULL ||
138 [ + + ]: 684 : (appinfo = root->append_rel_array[top_rti]) == NULL)
139 : : break;
140 : :
141 : 286 : parent_rte = rt_fetch(appinfo->parent_relid, root->parse->rtable);
142 [ - + ]: 286 : if (parent_rte->rtekind != RTE_RELATION)
3 rhaas@postgresql.org 143 :UNC 0 : break;
144 : :
3 rhaas@postgresql.org 145 :GNC 286 : top_rti = appinfo->parent_relid;
146 : : }
147 : :
148 : : /* Get the range table entries for the RTI and top RTI. */
149 : 1425 : rte = rt_fetch(rti, root->parse->rtable);
150 : 1425 : top_rte = rt_fetch(top_rti, root->parse->rtable);
151 [ - + ]: 1425 : Assert(rte->rtekind != RTE_JOIN);
152 [ - + ]: 1425 : Assert(top_rte->rtekind != RTE_JOIN);
153 : :
154 : : /* Work out the correct occurrence number. */
155 [ + + ]: 2709 : for (Index prior_rti = 1; prior_rti < top_rti; ++prior_rti)
156 : : {
157 : : RangeTblEntry *prior_rte;
158 : : AppendRelInfo *appinfo;
159 : :
160 : : /*
161 : : * If this is a child rel of a parent that is a relation, skip it.
162 : : *
163 : : * Such range table entries are disambiguated by mentioning the schema
164 : : * and name of the table, not by counting them as separate occurrences
165 : : * of the same table.
166 : : *
167 : : * NB: append_rel_array can be NULL if there are no children
168 : : */
169 [ + + ]: 1284 : if (root->append_rel_array != NULL &&
170 [ - + ]: 390 : (appinfo = root->append_rel_array[prior_rti]) != NULL)
171 : : {
172 : : RangeTblEntry *parent_rte;
173 : :
3 rhaas@postgresql.org 174 :UNC 0 : parent_rte = rt_fetch(appinfo->parent_relid, root->parse->rtable);
175 [ # # ]: 0 : if (parent_rte->rtekind == RTE_RELATION)
176 : 0 : continue;
177 : : }
178 : :
179 : : /* Skip NULL entries and joins. */
3 rhaas@postgresql.org 180 :GNC 1284 : prior_rte = rt_fetch(prior_rti, root->parse->rtable);
181 [ + - + + ]: 1284 : if (prior_rte == NULL || prior_rte->rtekind == RTE_JOIN)
182 : 122 : continue;
183 : :
184 : : /* Skip if the alias name differs. */
185 [ + + ]: 1162 : if (strcmp(prior_rte->eref->aliasname, rte->eref->aliasname) != 0)
186 : 1149 : continue;
187 : :
188 : : /* Looks like a true duplicate. */
189 : 13 : ++occurrence;
190 : : }
191 : :
192 : : /* If this is a child table, get the schema and relation names. */
193 [ + + ]: 1425 : if (rti != top_rti)
194 : : {
195 : 286 : partnsp = get_namespace_name_or_temp(get_rel_namespace(rte->relid));
196 : 286 : partrel = get_rel_name(rte->relid);
197 : : }
198 : :
199 : : /* OK, we have all the answers we need. Return them to the caller. */
200 : 1425 : rid->alias_name = top_rte->eref->aliasname;
201 : 1425 : rid->occurrence = occurrence;
202 : 1425 : rid->partnsp = partnsp;
203 : 1425 : rid->partrel = partrel;
204 : 1425 : rid->plan_name = root->plan_name;
205 : 1425 : }
206 : :
207 : : /*
208 : : * Compute a relation identifier for a set of RTIs, except for any RTE_JOIN
209 : : * RTIs that may be present.
210 : : *
211 : : * RTE_JOIN entries are excluded because they cannot be mentioned by plan
212 : : * advice.
213 : : *
214 : : * The caller is responsible for making sure that the tkeys array is large
215 : : * enough to store the results.
216 : : *
217 : : * The return value is the number of identifiers computed.
218 : : */
219 : : int
220 : 718 : pgpa_compute_identifiers_by_relids(PlannerInfo *root, Bitmapset *relids,
221 : : pgpa_identifier *rids)
222 : : {
223 : 718 : int count = 0;
224 : 718 : int rti = -1;
225 : :
226 [ + + ]: 1561 : while ((rti = bms_next_member(relids, rti)) >= 0)
227 : : {
228 : 843 : RangeTblEntry *rte = rt_fetch(rti, root->parse->rtable);
229 : :
230 [ + + ]: 843 : if (rte->rtekind == RTE_JOIN)
231 : 13 : continue;
232 : 830 : pgpa_compute_identifier_by_rti(root, rti, &rids[count++]);
233 : : }
234 : :
235 [ - + ]: 718 : Assert(count > 0);
236 : 718 : return count;
237 : : }
238 : :
239 : : /*
240 : : * Create an array of range table identifiers for all the non-NULL,
241 : : * non-RTE_JOIN entries in the PlannedStmt's range table.
242 : : */
243 : : pgpa_identifier *
244 : 306 : pgpa_create_identifiers_for_planned_stmt(PlannedStmt *pstmt)
245 : : {
246 : 306 : Index rtable_length = list_length(pstmt->rtable);
247 : 306 : pgpa_identifier *result = palloc0_array(pgpa_identifier, rtable_length);
248 : : Index *top_rti_map;
249 : 306 : int rtinfoindex = 0;
250 : 306 : SubPlanRTInfo *rtinfo = NULL;
251 : 306 : SubPlanRTInfo *nextrtinfo = NULL;
252 : :
253 : : /*
254 : : * Account for relations added by inheritance expansion of partitioned
255 : : * tables.
256 : : */
257 : 306 : top_rti_map = pgpa_create_top_rti_map(rtable_length, pstmt->rtable,
258 : : pstmt->appendRelations);
259 : :
260 : : /*
261 : : * When we begin iterating, we're processing the portion of the range
262 : : * table that originated from the top-level PlannerInfo, so subrtinfo is
263 : : * NULL. Later, subrtinfo will be the SubPlanRTInfo for the subquery whose
264 : : * portion of the range table we are processing. nextrtinfo is always the
265 : : * SubPlanRTInfo that follows the current one, if any, so when we're
266 : : * processing the top-level query's portion of the range table, the next
267 : : * SubPlanRTInfo is the very first one.
268 : : */
269 [ + + ]: 306 : if (pstmt->subrtinfos != NULL)
270 : 18 : nextrtinfo = linitial(pstmt->subrtinfos);
271 : :
272 : : /* Main loop over the range table. */
273 [ + + ]: 1138 : for (Index rti = 1; rti <= rtable_length; rti++)
274 : : {
275 : : const char *plan_name;
276 : : Index top_rti;
277 : : RangeTblEntry *rte;
278 : : RangeTblEntry *top_rte;
279 : 832 : char *partnsp = NULL;
280 : 832 : char *partrel = NULL;
281 : : int occurrence;
282 : : pgpa_identifier *rid;
283 : :
284 : : /*
285 : : * Advance to the next SubPlanRTInfo, if it's time to do that.
286 : : *
287 : : * This loop probably shouldn't ever iterate more than once, because
288 : : * that would imply that a subquery was planned but added nothing to
289 : : * the range table; but let's be defensive and assume it can happen.
290 : : */
291 [ + + + + ]: 850 : while (nextrtinfo != NULL && rti > nextrtinfo->rtoffset)
292 : : {
293 : 18 : rtinfo = nextrtinfo;
294 [ + - ]: 18 : if (++rtinfoindex >= list_length(pstmt->subrtinfos))
295 : 18 : nextrtinfo = NULL;
296 : : else
3 rhaas@postgresql.org 297 :UNC 0 : nextrtinfo = list_nth(pstmt->subrtinfos, rtinfoindex);
298 : : }
299 : :
300 : : /* Fetch the range table entry, if any. */
3 rhaas@postgresql.org 301 :GNC 832 : rte = rt_fetch(rti, pstmt->rtable);
302 : :
303 : : /*
304 : : * We can't and don't need to identify null entries, and we don't want
305 : : * to identify join entries.
306 : : */
307 [ + - + + ]: 832 : if (rte == NULL || rte->rtekind == RTE_JOIN)
308 : 140 : continue;
309 : :
310 : : /*
311 : : * If this is not a relation added by partitioned table expansion,
312 : : * then the top RTI/RTE are just the same as this RTI/RTE. Otherwise,
313 : : * we need the information for the top RTI/RTE, and must also fetch
314 : : * the partition schema and name.
315 : : */
316 : 692 : top_rti = top_rti_map[rti - 1];
317 [ + + ]: 692 : if (rti == top_rti)
318 : 574 : top_rte = rte;
319 : : else
320 : : {
321 : 118 : top_rte = rt_fetch(top_rti, pstmt->rtable);
322 : : partnsp =
323 : 118 : get_namespace_name_or_temp(get_rel_namespace(rte->relid));
324 : 118 : partrel = get_rel_name(rte->relid);
325 : : }
326 : :
327 : : /* Compute the correct occurrence number. */
328 : 692 : occurrence = pgpa_occurrence_number(pstmt->rtable, top_rti_map,
329 : : rtinfo, top_rti);
330 : :
331 : : /* Get the name of the current plan (NULL for toplevel query). */
332 [ + + ]: 692 : plan_name = rtinfo == NULL ? NULL : rtinfo->plan_name;
333 : :
334 : : /* Save all the details we've derived. */
335 : 692 : rid = &result[rti - 1];
336 : 692 : rid->alias_name = top_rte->eref->aliasname;
337 : 692 : rid->occurrence = occurrence;
338 : 692 : rid->partnsp = partnsp;
339 : 692 : rid->partrel = partrel;
340 : 692 : rid->plan_name = plan_name;
341 : : }
342 : :
343 : 306 : return result;
344 : : }
345 : :
346 : : /*
347 : : * Search for a pgpa_identifier in the array of identifiers computed for the
348 : : * range table. If exactly one match is found, return the matching RTI; else
349 : : * return 0.
350 : : */
351 : : Index
352 : 140 : pgpa_compute_rti_from_identifier(int rtable_length,
353 : : pgpa_identifier *rt_identifiers,
354 : : pgpa_identifier *rid)
355 : : {
356 : 140 : Index result = 0;
357 : :
358 [ + + ]: 705 : for (Index rti = 1; rti <= rtable_length; ++rti)
359 : : {
360 : 565 : pgpa_identifier *rti_rid = &rt_identifiers[rti - 1];
361 : :
362 : : /* If there's no identifier for this RTI, skip it. */
363 [ + + ]: 565 : if (rti_rid->alias_name == NULL)
364 : 103 : continue;
365 : :
366 : : /*
367 : : * If it matches, return this RTI. As usual, an omitted partition
368 : : * schema matches anything, but partition and plan names must either
369 : : * match exactly or be omitted on both sides.
370 : : */
371 [ + + ]: 462 : if (strcmp(rid->alias_name, rti_rid->alias_name) == 0 &&
372 [ + + ]: 197 : rid->occurrence == rti_rid->occurrence &&
373 [ + + + + ]: 193 : (rid->partnsp == NULL || rti_rid->partnsp == NULL ||
374 [ + - + + ]: 202 : strcmp(rid->partnsp, rti_rid->partnsp) == 0) &&
375 [ + - ]: 333 : strings_equal_or_both_null(rid->partrel, rti_rid->partrel) &&
376 : 140 : strings_equal_or_both_null(rid->plan_name, rti_rid->plan_name))
377 : : {
378 [ - + ]: 140 : if (result != 0)
379 : : {
380 : : /* Multiple matches were found. */
3 rhaas@postgresql.org 381 :UNC 0 : return 0;
382 : : }
3 rhaas@postgresql.org 383 :GNC 140 : result = rti;
384 : : }
385 : : }
386 : :
387 : 140 : return result;
388 : : }
389 : :
390 : : /*
391 : : * Build a mapping from each RTI to the RTI whose alias_name will be used to
392 : : * construct the range table identifier.
393 : : *
394 : : * For child relations, this is the topmost parent that is still of type
395 : : * RTE_RELATION. For other relations, it's just the original RTI.
396 : : *
397 : : * Since we're eventually going to need this information for every RTI in
398 : : * the range table, it's best to compute all the answers in a single pass over
399 : : * the AppendRelInfo list. Otherwise, we might end up searching through that
400 : : * list repeatedly for entries of interest.
401 : : *
402 : : * Note that the returned array is uses zero-based indexing, while RTIs use
403 : : * 1-based indexing, so subtract 1 from the RTI before looking it up in the
404 : : * array.
405 : : */
406 : : static Index *
407 : 306 : pgpa_create_top_rti_map(Index rtable_length, List *rtable, List *appinfos)
408 : : {
409 : 306 : Index *top_rti_map = palloc0_array(Index, rtable_length);
410 : :
411 : : /* Initially, make every RTI point to itself. */
412 [ + + ]: 1138 : for (Index rti = 1; rti <= rtable_length; ++rti)
413 : 832 : top_rti_map[rti - 1] = rti;
414 : :
415 : : /* Update the map for each AppendRelInfo object. */
416 [ + + + + : 730 : foreach_node(AppendRelInfo, appinfo, appinfos)
+ + ]
417 : : {
418 : 118 : Index parent_rti = appinfo->parent_relid;
419 : 118 : RangeTblEntry *parent_rte = rt_fetch(parent_rti, rtable);
420 : :
421 : : /* If the parent is not RTE_RELATION, ignore this entry. */
422 [ - + ]: 118 : if (parent_rte->rtekind != RTE_RELATION)
3 rhaas@postgresql.org 423 :UNC 0 : continue;
424 : :
425 : : /*
426 : : * Map the child to wherever we mapped the parent. Parents always
427 : : * precede their children in the AppendRelInfo list, so this should
428 : : * work out.
429 : : */
3 rhaas@postgresql.org 430 :GNC 118 : top_rti_map[appinfo->child_relid - 1] = top_rti_map[parent_rti - 1];
431 : : }
432 : :
433 : 306 : return top_rti_map;
434 : : }
435 : :
436 : : /*
437 : : * Find the occurrence number of a certain relation within a certain subquery.
438 : : *
439 : : * The same alias name can occur multiple times within a subquery, but we want
440 : : * to disambiguate by giving different occurrences different integer indexes.
441 : : * However, child tables are disambiguated by including the table name rather
442 : : * than by incrementing the occurrence number; and joins are not named and so
443 : : * shouldn't increment the occurrence number either.
444 : : */
445 : : static int
446 : 692 : pgpa_occurrence_number(List *rtable, Index *top_rti_map,
447 : : SubPlanRTInfo *rtinfo, Index rti)
448 : : {
449 [ + + ]: 692 : Index rtoffset = (rtinfo == NULL) ? 0 : rtinfo->rtoffset;
450 : 692 : int occurrence = 1;
451 : 692 : RangeTblEntry *rte = rt_fetch(rti, rtable);
452 : :
453 [ + + ]: 1174 : for (Index prior_rti = rtoffset + 1; prior_rti < rti; ++prior_rti)
454 : : {
455 : : RangeTblEntry *prior_rte;
456 : :
457 : : /*
458 : : * If this is a child rel of a parent that is a relation, skip it.
459 : : *
460 : : * Such range table entries are disambiguated by mentioning the schema
461 : : * and name of the table, not by counting them as separate occurrences
462 : : * of the same table.
463 : : */
464 [ - + ]: 482 : if (top_rti_map[prior_rti - 1] != prior_rti)
3 rhaas@postgresql.org 465 :UNC 0 : continue;
466 : :
467 : : /* Skip joins. */
3 rhaas@postgresql.org 468 :GNC 482 : prior_rte = rt_fetch(prior_rti, rtable);
469 [ + + ]: 482 : if (prior_rte->rtekind == RTE_JOIN)
470 : 28 : continue;
471 : :
472 : : /* Skip if the alias name differs. */
473 [ + + ]: 454 : if (strcmp(prior_rte->eref->aliasname, rte->eref->aliasname) != 0)
474 : 446 : continue;
475 : :
476 : : /* Looks like a true duplicate. */
477 : 8 : ++occurrence;
478 : : }
479 : :
480 : 692 : return occurrence;
481 : : }
|