Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * parse_collate.c
4 : : * Routines for assigning collation information.
5 : : *
6 : : * We choose to handle collation analysis in a post-pass over the output
7 : : * of expression parse analysis. This is because we need more state to
8 : : * perform this processing than is needed in the finished tree. If we
9 : : * did it on-the-fly while building the tree, all that state would have
10 : : * to be kept in expression node trees permanently. This way, the extra
11 : : * storage is just local variables in this recursive routine.
12 : : *
13 : : * The info that is actually saved in the finished tree is:
14 : : * 1. The output collation of each expression node, or InvalidOid if it
15 : : * returns a noncollatable data type. This can also be InvalidOid if the
16 : : * result type is collatable but the collation is indeterminate.
17 : : * 2. The collation to be used in executing each function. InvalidOid means
18 : : * that there are no collatable inputs or their collation is indeterminate.
19 : : * This value is only stored in node types that might call collation-using
20 : : * functions.
21 : : *
22 : : * You might think we could get away with storing only one collation per
23 : : * node, but the two concepts really need to be kept distinct. Otherwise
24 : : * it's too confusing when a function produces a collatable output type but
25 : : * has no collatable inputs or produces noncollatable output from collatable
26 : : * inputs.
27 : : *
28 : : * Cases with indeterminate collation might result in an error being thrown
29 : : * at runtime. If we knew exactly which functions require collation
30 : : * information, we could throw those errors at parse time instead.
31 : : *
32 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
33 : : * Portions Copyright (c) 1994, Regents of the University of California
34 : : *
35 : : *
36 : : * IDENTIFICATION
37 : : * src/backend/parser/parse_collate.c
38 : : *
39 : : *-------------------------------------------------------------------------
40 : : */
41 : : #include "postgres.h"
42 : :
43 : : #include "catalog/pg_aggregate.h"
44 : : #include "catalog/pg_collation.h"
45 : : #include "nodes/makefuncs.h"
46 : : #include "nodes/nodeFuncs.h"
47 : : #include "parser/parse_collate.h"
48 : : #include "utils/lsyscache.h"
49 : :
50 : :
51 : : /*
52 : : * Collation strength (the SQL standard calls this "derivation"). Order is
53 : : * chosen to allow comparisons to work usefully. Note: the standard doesn't
54 : : * seem to distinguish between NONE and CONFLICT.
55 : : */
56 : : typedef enum
57 : : {
58 : : COLLATE_NONE, /* expression is of a noncollatable datatype */
59 : : COLLATE_IMPLICIT, /* collation was derived implicitly */
60 : : COLLATE_CONFLICT, /* we had a conflict of implicit collations */
61 : : COLLATE_EXPLICIT, /* collation was derived explicitly */
62 : : } CollateStrength;
63 : :
64 : : typedef struct
65 : : {
66 : : ParseState *pstate; /* parse state (for error reporting) */
67 : : Oid collation; /* OID of current collation, if any */
68 : : CollateStrength strength; /* strength of current collation choice */
69 : : int location; /* location of expr that set collation */
70 : : /* Remaining fields are only valid when strength == COLLATE_CONFLICT */
71 : : Oid collation2; /* OID of conflicting collation */
72 : : int location2; /* location of expr that set collation2 */
73 : : } assign_collations_context;
74 : :
75 : : static bool assign_query_collations_walker(Node *node, ParseState *pstate);
76 : : static bool assign_collations_walker(Node *node,
77 : : assign_collations_context *context);
78 : : static void merge_collation_state(Oid collation,
79 : : CollateStrength strength,
80 : : int location,
81 : : Oid collation2,
82 : : int location2,
83 : : assign_collations_context *context);
84 : : static void assign_aggregate_collations(Aggref *aggref,
85 : : assign_collations_context *loccontext);
86 : : static void assign_ordered_set_collations(Aggref *aggref,
87 : : assign_collations_context *loccontext);
88 : : static void assign_hypothetical_collations(Aggref *aggref,
89 : : assign_collations_context *loccontext);
90 : :
91 : :
92 : : /*
93 : : * assign_query_collations()
94 : : * Mark all expressions in the given Query with collation information.
95 : : *
96 : : * This should be applied to each Query after completion of parse analysis
97 : : * for expressions. Note that we do not recurse into sub-Queries, since
98 : : * those should have been processed when built.
99 : : */
100 : : void
5526 tgl@sss.pgh.pa.us 101 :CBC 376229 : assign_query_collations(ParseState *pstate, Query *query)
102 : : {
103 : : /*
104 : : * We just use query_tree_walker() to visit all the contained expressions.
105 : : * We can skip the rangetable and CTE subqueries, though, since RTEs and
106 : : * subqueries had better have been processed already (else Vars referring
107 : : * to them would not get created with the right collation).
108 : : */
109 : 376229 : (void) query_tree_walker(query,
110 : : assign_query_collations_walker,
111 : : pstate,
112 : : QTW_IGNORE_RANGE_TABLE |
113 : : QTW_IGNORE_CTE_SUBQUERIES);
114 : 376201 : }
115 : :
116 : : /*
117 : : * Walker for assign_query_collations
118 : : *
119 : : * Each expression found by query_tree_walker is processed independently.
120 : : * Note that query_tree_walker may pass us a whole List, such as the
121 : : * targetlist, in which case each subexpression must be processed
122 : : * independently --- we don't want to bleat if two different targetentries
123 : : * have different collations.
124 : : */
125 : : static bool
126 : 4518556 : assign_query_collations_walker(Node *node, ParseState *pstate)
127 : : {
128 : : /* Need do nothing for empty subexpressions */
129 [ + + ]: 4518556 : if (node == NULL)
130 : 3755427 : return false;
131 : :
132 : : /*
133 : : * We don't want to recurse into a set-operations tree; it's already been
134 : : * fully processed in transformSetOperationStmt.
135 : : */
136 [ + + ]: 763129 : if (IsA(node, SetOperationStmt))
137 : 8541 : return false;
138 : :
139 [ + + ]: 754588 : if (IsA(node, List))
140 : 369331 : assign_list_collations(pstate, (List *) node);
141 : : else
142 : 385257 : assign_expr_collations(pstate, node);
143 : :
144 : 754560 : return false;
145 : : }
146 : :
147 : : /*
148 : : * assign_list_collations()
149 : : * Mark all nodes in the list of expressions with collation information.
150 : : *
151 : : * The list member expressions are processed independently; they do not have
152 : : * to share a common collation.
153 : : */
154 : : void
155 : 414124 : assign_list_collations(ParseState *pstate, List *exprs)
156 : : {
157 : : ListCell *lc;
158 : :
159 [ + + + + : 1493569 : foreach(lc, exprs)
+ + ]
160 : : {
5504 bruce@momjian.us 161 : 1079465 : Node *node = (Node *) lfirst(lc);
162 : :
5526 tgl@sss.pgh.pa.us 163 : 1079465 : assign_expr_collations(pstate, node);
164 : : }
165 : 414104 : }
166 : :
167 : : /*
168 : : * assign_expr_collations()
169 : : * Mark all nodes in the given expression tree with collation information.
170 : : *
171 : : * This is exported for the benefit of various utility commands that process
172 : : * expressions without building a complete Query. It should be applied after
173 : : * calling transformExpr() plus any expression-modifying operations such as
174 : : * coerce_to_boolean().
175 : : */
176 : : void
177 : 1561463 : assign_expr_collations(ParseState *pstate, Node *expr)
178 : : {
179 : : assign_collations_context context;
180 : :
181 : : /* initialize context for tree walk */
182 : 1561463 : context.pstate = pstate;
183 : 1561463 : context.collation = InvalidOid;
184 : 1561463 : context.strength = COLLATE_NONE;
185 : 1561463 : context.location = -1;
186 : :
187 : : /* and away we go */
188 : 1561463 : (void) assign_collations_walker(expr, &context);
189 : 1561431 : }
190 : :
191 : : /*
192 : : * select_common_collation()
193 : : * Identify a common collation for a list of expressions.
194 : : *
195 : : * The expressions should all return the same datatype, else this is not
196 : : * terribly meaningful.
197 : : *
198 : : * none_ok means that it is permitted to return InvalidOid, indicating that
199 : : * no common collation could be identified, even for collatable datatypes.
200 : : * Otherwise, an error is thrown for conflict of implicit collations.
201 : : *
202 : : * In theory, none_ok = true reflects the rules of SQL standard clause "Result
203 : : * of data type combinations", none_ok = false reflects the rules of clause
204 : : * "Collation determination" (in some cases invoked via "Grouping
205 : : * operations").
206 : : */
207 : : Oid
208 : 49278 : select_common_collation(ParseState *pstate, List *exprs, bool none_ok)
209 : : {
210 : : assign_collations_context context;
211 : :
212 : : /* initialize context for tree walk */
213 : 49278 : context.pstate = pstate;
214 : 49278 : context.collation = InvalidOid;
215 : 49278 : context.strength = COLLATE_NONE;
216 : 49278 : context.location = -1;
217 : :
218 : : /* and away we go */
219 : 49278 : (void) assign_collations_walker((Node *) exprs, &context);
220 : :
221 : : /* deal with collation conflict */
222 [ + + ]: 49278 : if (context.strength == COLLATE_CONFLICT)
223 : : {
224 [ + + ]: 48 : if (none_ok)
225 : 24 : return InvalidOid;
226 [ + - ]: 24 : ereport(ERROR,
227 : : (errcode(ERRCODE_COLLATION_MISMATCH),
228 : : errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
229 : : get_collation_name(context.collation),
230 : : get_collation_name(context.collation2)),
231 : : errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
232 : : parser_errposition(context.pstate, context.location2)));
233 : : }
234 : :
235 : : /*
236 : : * Note: if strength is still COLLATE_NONE, we'll return InvalidOid, but
237 : : * that's okay because it must mean none of the expressions returned
238 : : * collatable datatypes.
239 : : */
240 : 49230 : return context.collation;
241 : : }
242 : :
243 : : /*
244 : : * assign_collations_walker()
245 : : * Recursive guts of collation processing.
246 : : *
247 : : * Nodes with no children (eg, Vars, Consts, Params) must have been marked
248 : : * when built. All upper-level nodes are marked here.
249 : : *
250 : : * Note: if this is invoked directly on a List, it will attempt to infer a
251 : : * common collation for all the list members. In particular, it will throw
252 : : * error if there are conflicting explicit collations for different members.
253 : : */
254 : : static bool
255 : 5903103 : assign_collations_walker(Node *node, assign_collations_context *context)
256 : : {
257 : : assign_collations_context loccontext;
258 : : Oid collation;
259 : : CollateStrength strength;
260 : : int location;
261 : :
262 : : /* Need do nothing for empty subexpressions */
263 [ + + ]: 5903103 : if (node == NULL)
264 : 321112 : return false;
265 : :
266 : : /*
267 : : * Prepare for recursion. For most node types, though not all, the first
268 : : * thing we do is recurse to process all nodes below this one. Each level
269 : : * of the tree has its own local context.
270 : : */
271 : 5581991 : loccontext.pstate = context->pstate;
272 : 5581991 : loccontext.collation = InvalidOid;
273 : 5581991 : loccontext.strength = COLLATE_NONE;
274 : 5581991 : loccontext.location = -1;
275 : : /* Set these fields just to suppress uninitialized-value warnings: */
4516 276 : 5581991 : loccontext.collation2 = InvalidOid;
277 : 5581991 : loccontext.location2 = -1;
278 : :
279 : : /*
280 : : * Recurse if appropriate, then determine the collation for this node.
281 : : *
282 : : * Note: the general cases are at the bottom of the switch, after various
283 : : * special cases.
284 : : */
5526 285 [ + + + + : 5581991 : switch (nodeTag(node))
+ + + + +
+ + ]
286 : : {
287 : 6741 : case T_CollateExpr:
288 : : {
289 : : /*
290 : : * COLLATE sets an explicitly derived collation, regardless of
291 : : * what the child state is. But we must recurse to set up
292 : : * collation info below here.
293 : : */
294 : 6741 : CollateExpr *expr = (CollateExpr *) node;
295 : :
296 : 6741 : (void) expression_tree_walker(node,
297 : : assign_collations_walker,
298 : : &loccontext);
299 : :
300 : 6741 : collation = expr->collOid;
301 [ - + ]: 6741 : Assert(OidIsValid(collation));
302 : 6741 : strength = COLLATE_EXPLICIT;
303 : 6741 : location = expr->location;
304 : : }
305 : 6741 : break;
306 : 13316 : case T_FieldSelect:
307 : : {
308 : : /*
309 : : * For FieldSelect, the result has the field's declared
310 : : * collation, independently of what happened in the arguments.
311 : : * (The immediate argument must be composite and thus not
312 : : * collatable, anyhow.) The field's collation was already
313 : : * looked up and saved in the node.
314 : : */
315 : 13316 : FieldSelect *expr = (FieldSelect *) node;
316 : :
317 : : /* ... but first, recurse */
318 : 13316 : (void) expression_tree_walker(node,
319 : : assign_collations_walker,
320 : : &loccontext);
321 : :
322 [ + + ]: 13316 : if (OidIsValid(expr->resultcollid))
323 : : {
324 : : /* Node's result type is collatable. */
325 : : /* Pass up field's collation as an implicit choice. */
326 : 1975 : collation = expr->resultcollid;
327 : 1975 : strength = COLLATE_IMPLICIT;
328 : 1975 : location = exprLocation(node);
329 : : }
330 : : else
331 : : {
332 : : /* Node's result type isn't collatable. */
333 : 11341 : collation = InvalidOid;
334 : 11341 : strength = COLLATE_NONE;
3240 335 : 11341 : location = -1; /* won't be used */
336 : : }
337 : : }
5526 338 : 13316 : break;
339 : 2729 : case T_RowExpr:
340 : : {
341 : : /*
342 : : * RowExpr is a special case because the subexpressions are
343 : : * independent: we don't want to complain if some of them have
344 : : * incompatible explicit collations.
345 : : */
5504 bruce@momjian.us 346 : 2729 : RowExpr *expr = (RowExpr *) node;
347 : :
5526 tgl@sss.pgh.pa.us 348 : 2729 : assign_list_collations(context->pstate, expr->args);
349 : :
350 : : /*
351 : : * Since the result is always composite and therefore never
352 : : * has a collation, we can just stop here: this node has no
353 : : * impact on the collation of its parent.
354 : : */
5504 bruce@momjian.us 355 : 2729 : return false; /* done */
356 : : }
5526 tgl@sss.pgh.pa.us 357 : 192 : case T_RowCompareExpr:
358 : : {
359 : : /*
360 : : * For RowCompare, we have to find the common collation of
361 : : * each pair of input columns and build a list. If we can't
362 : : * find a common collation, we just put InvalidOid into the
363 : : * list, which may or may not cause an error at runtime.
364 : : */
365 : 192 : RowCompareExpr *expr = (RowCompareExpr *) node;
366 : 192 : List *colls = NIL;
367 : : ListCell *l;
368 : : ListCell *r;
369 : :
370 [ + - + + : 612 : forboth(l, expr->largs, r, expr->rargs)
+ - + + +
+ + - +
+ ]
371 : : {
5504 bruce@momjian.us 372 : 420 : Node *le = (Node *) lfirst(l);
373 : 420 : Node *re = (Node *) lfirst(r);
374 : : Oid coll;
375 : :
5526 tgl@sss.pgh.pa.us 376 : 420 : coll = select_common_collation(context->pstate,
5526 tgl@sss.pgh.pa.us 377 :ECB (309) : list_make2(le, re),
378 : : true);
5526 tgl@sss.pgh.pa.us 379 :CBC 420 : colls = lappend_oid(colls, coll);
380 : : }
381 : 192 : expr->inputcollids = colls;
382 : :
383 : : /*
384 : : * Since the result is always boolean and therefore never has
385 : : * a collation, we can just stop here: this node has no impact
386 : : * on the collation of its parent.
387 : : */
5504 bruce@momjian.us 388 : 192 : return false; /* done */
389 : : }
5526 tgl@sss.pgh.pa.us 390 : 56256 : case T_CoerceToDomain:
391 : : {
392 : : /*
393 : : * If the domain declaration included a non-default COLLATE
394 : : * spec, then use that collation as the output collation of
395 : : * the coercion. Otherwise allow the input collation to
396 : : * bubble up. (The input should be of the domain's base type,
397 : : * therefore we don't need to worry about it not being
398 : : * collatable when the domain is.)
399 : : */
400 : 56256 : CoerceToDomain *expr = (CoerceToDomain *) node;
5504 bruce@momjian.us 401 : 56256 : Oid typcollation = get_typcollation(expr->resulttype);
402 : :
403 : : /* ... but first, recurse */
5526 tgl@sss.pgh.pa.us 404 : 56256 : (void) expression_tree_walker(node,
405 : : assign_collations_walker,
406 : : &loccontext);
407 : :
408 [ + + ]: 56256 : if (OidIsValid(typcollation))
409 : : {
410 : : /* Node's result type is collatable. */
411 [ + + ]: 46955 : if (typcollation == DEFAULT_COLLATION_OID)
412 : : {
413 : : /* Collation state bubbles up from child. */
414 : 416 : collation = loccontext.collation;
415 : 416 : strength = loccontext.strength;
416 : 416 : location = loccontext.location;
417 : : }
418 : : else
419 : : {
420 : : /* Use domain's collation as an implicit choice. */
421 : 46539 : collation = typcollation;
422 : 46539 : strength = COLLATE_IMPLICIT;
423 : 46539 : location = exprLocation(node);
424 : : }
425 : : }
426 : : else
427 : : {
428 : : /* Node's result type isn't collatable. */
429 : 9301 : collation = InvalidOid;
430 : 9301 : strength = COLLATE_NONE;
3240 431 : 9301 : location = -1; /* won't be used */
432 : : }
433 : :
434 : : /*
435 : : * Save the state into the expression node. We know it
436 : : * doesn't care about input collation.
437 : : */
5526 438 [ - + ]: 56256 : if (strength == COLLATE_CONFLICT)
5526 tgl@sss.pgh.pa.us 439 :UBC 0 : exprSetCollation(node, InvalidOid);
440 : : else
5526 tgl@sss.pgh.pa.us 441 :CBC 56256 : exprSetCollation(node, collation);
442 : : }
443 : 56256 : break;
444 : 1048612 : case T_TargetEntry:
445 : 1048612 : (void) expression_tree_walker(node,
446 : : assign_collations_walker,
447 : : &loccontext);
448 : :
449 : : /*
450 : : * TargetEntry can have only one child, and should bubble that
451 : : * state up to its parent. We can't use the general-case code
452 : : * below because exprType and friends don't work on TargetEntry.
453 : : */
454 : 1048600 : collation = loccontext.collation;
455 : 1048600 : strength = loccontext.strength;
456 : 1048600 : location = loccontext.location;
457 : :
458 : : /*
459 : : * Throw error if the collation is indeterminate for a TargetEntry
460 : : * that is a sort/group target. We prefer to do this now, instead
461 : : * of leaving the comparison functions to fail at runtime, because
462 : : * we can give a syntax error pointer to help locate the problem.
463 : : * There are some cases where there might not be a failure, for
464 : : * example if the planner chooses to use hash aggregation instead
465 : : * of sorting for grouping; but it seems better to predictably
466 : : * throw an error. (Compare transformSetOperationTree, which will
467 : : * throw error for indeterminate collation of set-op columns, even
468 : : * though the planner might be able to implement the set-op
469 : : * without sorting.)
470 : : */
5523 471 [ + + ]: 1048600 : if (strength == COLLATE_CONFLICT &&
472 [ + + ]: 20 : ((TargetEntry *) node)->ressortgroupref != 0)
473 [ + - ]: 12 : ereport(ERROR,
474 : : (errcode(ERRCODE_COLLATION_MISMATCH),
475 : : errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
476 : : get_collation_name(loccontext.collation),
477 : : get_collation_name(loccontext.collation2)),
478 : : errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
479 : : parser_errposition(context->pstate,
480 : : loccontext.location2)));
5526 481 : 1048588 : break;
4015 andres@anarazel.de 482 : 776059 : case T_InferenceElem:
483 : : case T_RangeTblRef:
484 : : case T_JoinExpr:
485 : : case T_FromExpr:
486 : : case T_OnConflictExpr:
487 : : case T_ForPortionOfExpr:
488 : : case T_SortGroupClause:
489 : : case T_MergeAction:
5526 tgl@sss.pgh.pa.us 490 : 776059 : (void) expression_tree_walker(node,
491 : : assign_collations_walker,
492 : : &loccontext);
493 : :
494 : : /*
495 : : * When we're invoked on a query's jointree, we don't need to do
496 : : * anything with join nodes except recurse through them to process
497 : : * WHERE/ON expressions. So just stop here. Likewise, we don't
498 : : * need to do anything when invoked on sort/group lists.
499 : : */
500 : 776051 : return false;
501 : 36136 : case T_Query:
502 : : {
503 : : /*
504 : : * We get here when we're invoked on the Query belonging to a
505 : : * SubLink. Act as though the Query returns its first output
506 : : * column, which indeed is what it does for EXPR_SUBLINK and
507 : : * ARRAY_SUBLINK cases. In the cases where the SubLink
508 : : * returns boolean, this info will be ignored. Special case:
509 : : * in EXISTS, the Query might return no columns, in which case
510 : : * we need do nothing.
511 : : *
512 : : * We needn't recurse, since the Query is already processed.
513 : : */
514 : 36136 : Query *qtree = (Query *) node;
515 : : TargetEntry *tent;
516 : :
4537 517 [ + + ]: 36136 : if (qtree->targetList == NIL)
518 : 4 : return false;
3312 519 : 36132 : tent = linitial_node(TargetEntry, qtree->targetList);
4537 520 [ - + ]: 36132 : if (tent->resjunk)
4537 tgl@sss.pgh.pa.us 521 :UBC 0 : return false;
522 : :
5526 tgl@sss.pgh.pa.us 523 :CBC 36132 : collation = exprCollation((Node *) tent->expr);
524 : : /* collation doesn't change if it's converted to array */
525 : 36132 : strength = COLLATE_IMPLICIT;
526 : 36132 : location = exprLocation((Node *) tent->expr);
527 : : }
528 : 36132 : break;
529 : 94239 : case T_List:
530 : 94239 : (void) expression_tree_walker(node,
531 : : assign_collations_walker,
532 : : &loccontext);
533 : :
534 : : /*
535 : : * When processing a list, collation state just bubbles up from
536 : : * the list elements.
537 : : */
538 : 94239 : collation = loccontext.collation;
539 : 94239 : strength = loccontext.strength;
540 : 94239 : location = loccontext.location;
541 : 94239 : break;
542 : :
543 : 2389582 : case T_Var:
544 : : case T_Const:
545 : : case T_Param:
546 : : case T_CoerceToDomainValue:
547 : : case T_CaseTestExpr:
548 : : case T_SetToDefault:
549 : : case T_CurrentOfExpr:
550 : : case T_GraphPropertyRef:
551 : :
552 : : /*
553 : : * General case for childless expression nodes. These should
554 : : * already have a collation assigned; it is not this function's
555 : : * responsibility to look into the catalogs for base-case
556 : : * information.
557 : : */
558 : 2389582 : collation = exprCollation(node);
559 : :
560 : : /*
561 : : * Note: in most cases, there will be an assigned collation
562 : : * whenever type_is_collatable(exprType(node)); but an exception
563 : : * occurs for a Var referencing a subquery output column for which
564 : : * a unique collation was not determinable. That may lead to a
565 : : * runtime failure if a collation-sensitive function is applied to
566 : : * the Var.
567 : : */
568 : :
569 [ + + ]: 2389582 : if (OidIsValid(collation))
570 : 568892 : strength = COLLATE_IMPLICIT;
571 : : else
572 : 1820690 : strength = COLLATE_NONE;
573 : 2389582 : location = exprLocation(node);
574 : 2389582 : break;
575 : :
576 : 1158129 : default:
577 : : {
578 : : /*
579 : : * General case for most expression nodes with children. First
580 : : * recurse, then figure out what to assign to this node.
581 : : */
582 : : Oid typcollation;
583 : :
584 : : /*
585 : : * For most node types, we want to treat all the child
586 : : * expressions alike; but there are a few exceptions, hence
587 : : * this inner switch.
588 : : */
4757 589 [ + + + + : 1158129 : switch (nodeTag(node))
+ ]
590 : : {
591 : 31337 : case T_Aggref:
592 : : {
593 : : /*
594 : : * Aggref is messy enough that we give it its own
595 : : * function, in fact three of them. The FILTER
596 : : * clause is independent of the rest of the
597 : : * aggregate, however, so it can be processed
598 : : * separately.
599 : : */
600 : 31337 : Aggref *aggref = (Aggref *) node;
601 : :
4516 602 [ + + + - ]: 31337 : switch (aggref->aggkind)
603 : : {
604 : 31137 : case AGGKIND_NORMAL:
605 : 31137 : assign_aggregate_collations(aggref,
606 : : &loccontext);
607 : 31129 : break;
608 : 118 : case AGGKIND_ORDERED_SET:
609 : 118 : assign_ordered_set_collations(aggref,
610 : : &loccontext);
611 : 118 : break;
612 : 82 : case AGGKIND_HYPOTHETICAL:
613 : 82 : assign_hypothetical_collations(aggref,
614 : : &loccontext);
615 : 78 : break;
4516 tgl@sss.pgh.pa.us 616 :UBC 0 : default:
617 [ # # ]: 0 : elog(ERROR, "unrecognized aggkind: %d",
618 : : (int) aggref->aggkind);
619 : : }
620 : :
4676 noah@leadboat.com 621 :CBC 31325 : assign_expr_collations(context->pstate,
3240 tgl@sss.pgh.pa.us 622 : 31325 : (Node *) aggref->aggfilter);
623 : : }
4676 noah@leadboat.com 624 : 31325 : break;
625 : 2612 : case T_WindowFunc:
626 : : {
627 : : /*
628 : : * WindowFunc requires special processing only for
629 : : * its aggfilter clause, as for aggregates.
630 : : */
631 : 2612 : WindowFunc *wfunc = (WindowFunc *) node;
632 : :
633 : 2612 : (void) assign_collations_walker((Node *) wfunc->args,
634 : : &loccontext);
635 : :
636 : 2612 : assign_expr_collations(context->pstate,
637 : 2612 : (Node *) wfunc->aggfilter);
638 : : }
4757 tgl@sss.pgh.pa.us 639 : 2612 : break;
640 : 30520 : case T_CaseExpr:
641 : : {
642 : : /*
643 : : * CaseExpr is a special case because we do not
644 : : * want to recurse into the test expression (if
645 : : * any). It was already marked with collations
646 : : * during transformCaseExpr, and furthermore its
647 : : * collation is not relevant to the result of the
648 : : * CASE --- only the output expressions are.
649 : : */
650 : 30520 : CaseExpr *expr = (CaseExpr *) node;
651 : : ListCell *lc;
652 : :
653 [ + - + + : 82771 : foreach(lc, expr->args)
+ + ]
654 : : {
3312 655 : 52251 : CaseWhen *when = lfirst_node(CaseWhen, lc);
656 : :
657 : : /*
658 : : * The condition expressions mustn't affect
659 : : * the CASE's result collation either; but
660 : : * since they are known to yield boolean, it's
661 : : * safe to recurse directly on them --- they
662 : : * won't change loccontext.
663 : : */
4757 664 : 52251 : (void) assign_collations_walker((Node *) when->expr,
665 : : &loccontext);
666 : 52251 : (void) assign_collations_walker((Node *) when->result,
667 : : &loccontext);
668 : : }
669 : 30520 : (void) assign_collations_walker((Node *) expr->defresult,
670 : : &loccontext);
671 : : }
672 : 30520 : break;
1973 673 : 8928 : case T_SubscriptingRef:
674 : : {
675 : : /*
676 : : * The subscripts are treated as independent
677 : : * expressions not contributing to the node's
678 : : * collation. Only the container, and the source
679 : : * expression if any, contribute. (This models
680 : : * the old behavior, in which the subscripts could
681 : : * be counted on to be integers and thus not
682 : : * contribute anything.)
683 : : */
684 : 8928 : SubscriptingRef *sbsref = (SubscriptingRef *) node;
685 : :
686 : 8928 : assign_expr_collations(context->pstate,
687 : 8928 : (Node *) sbsref->refupperindexpr);
688 : 8928 : assign_expr_collations(context->pstate,
689 : 8928 : (Node *) sbsref->reflowerindexpr);
690 : 8928 : (void) assign_collations_walker((Node *) sbsref->refexpr,
691 : : &loccontext);
692 : 8928 : (void) assign_collations_walker((Node *) sbsref->refassgnexpr,
693 : : &loccontext);
694 : : }
695 : 8928 : break;
4757 696 : 1084732 : default:
697 : :
698 : : /*
699 : : * Normal case: all child expressions contribute
700 : : * equally to loccontext.
701 : : */
702 : 1084732 : (void) expression_tree_walker(node,
703 : : assign_collations_walker,
704 : : &loccontext);
705 : 1084724 : break;
706 : : }
707 : :
708 : : /*
709 : : * Now figure out what collation to assign to this node.
710 : : */
5526 711 : 1158109 : typcollation = get_typcollation(exprType(node));
712 [ + + ]: 1158109 : if (OidIsValid(typcollation))
713 : : {
714 : : /* Node's result is collatable; what about its input? */
715 [ + + ]: 199433 : if (loccontext.strength > COLLATE_NONE)
716 : : {
717 : : /* Collation state bubbles up from children. */
718 : 148456 : collation = loccontext.collation;
719 : 148456 : strength = loccontext.strength;
720 : 148456 : location = loccontext.location;
721 : : }
722 : : else
723 : : {
724 : : /*
725 : : * Collatable output produced without any collatable
726 : : * input. Use the type's collation (which is usually
727 : : * DEFAULT_COLLATION_OID, but might be different for a
728 : : * domain).
729 : : */
730 : 50977 : collation = typcollation;
731 : 50977 : strength = COLLATE_IMPLICIT;
732 : 50977 : location = exprLocation(node);
733 : : }
734 : : }
735 : : else
736 : : {
737 : : /* Node's result type isn't collatable. */
738 : 958676 : collation = InvalidOid;
739 : 958676 : strength = COLLATE_NONE;
3240 740 : 958676 : location = -1; /* won't be used */
741 : : }
742 : :
743 : : /*
744 : : * Save the result collation into the expression node. If the
745 : : * state is COLLATE_CONFLICT, we'll set the collation to
746 : : * InvalidOid, which might result in an error at runtime.
747 : : */
5526 748 [ + + ]: 1158109 : if (strength == COLLATE_CONFLICT)
749 : 20 : exprSetCollation(node, InvalidOid);
750 : : else
751 : 1158089 : exprSetCollation(node, collation);
752 : :
753 : : /*
754 : : * Likewise save the input collation, which is the one that
755 : : * any function called by this node should use.
756 : : */
757 [ + + ]: 1158109 : if (loccontext.strength == COLLATE_CONFLICT)
758 : 72 : exprSetInputCollation(node, InvalidOid);
759 : : else
760 : 1158037 : exprSetInputCollation(node, loccontext.collation);
761 : : }
762 : 1158109 : break;
763 : : }
764 : :
765 : : /*
766 : : * Now, merge my information into my parent's state.
767 : : */
4516 768 : 4802963 : merge_collation_state(collation,
769 : : strength,
770 : : location,
771 : : loccontext.collation2,
772 : : loccontext.location2,
773 : : context);
774 : :
775 : 4802947 : return false;
776 : : }
777 : :
778 : : /*
779 : : * Merge collation state of a subexpression into the context for its parent.
780 : : */
781 : : static void
782 : 4802963 : merge_collation_state(Oid collation,
783 : : CollateStrength strength,
784 : : int location,
785 : : Oid collation2,
786 : : int location2,
787 : : assign_collations_context *context)
788 : : {
789 : : /*
790 : : * If the collation strength for this node is different from what's
791 : : * already in *context, then this node either dominates or is dominated by
792 : : * earlier siblings.
793 : : */
5526 794 [ + + ]: 4802963 : if (strength > context->strength)
795 : : {
796 : : /* Override previous parent state */
797 : 1095697 : context->collation = collation;
798 : 1095697 : context->strength = strength;
799 : 1095697 : context->location = location;
800 : : /* Bubble up error info if applicable */
801 [ + + ]: 1095697 : if (strength == COLLATE_CONFLICT)
802 : : {
4516 803 : 76 : context->collation2 = collation2;
804 : 76 : context->location2 = location2;
805 : : }
806 : : }
5526 807 [ + + ]: 3707266 : else if (strength == context->strength)
808 : : {
809 : : /* Merge, or detect error if there's a collation conflict */
810 [ + + - + : 3629075 : switch (strength)
- ]
811 : : {
812 : 3472009 : case COLLATE_NONE:
813 : : /* Nothing + nothing is still nothing */
814 : 3472009 : break;
815 : 156994 : case COLLATE_IMPLICIT:
816 [ + + ]: 156994 : if (collation != context->collation)
817 : : {
818 : : /*
819 : : * Non-default implicit collation always beats default.
820 : : */
821 [ + + ]: 35562 : if (context->collation == DEFAULT_COLLATION_OID)
822 : : {
823 : : /* Override previous parent state */
824 : 9640 : context->collation = collation;
825 : 9640 : context->strength = strength;
826 : 9640 : context->location = location;
827 : : }
828 [ + + ]: 25922 : else if (collation != DEFAULT_COLLATION_OID)
829 : : {
830 : : /*
831 : : * Oops, we have a conflict. We cannot throw error
832 : : * here, since the conflict could be resolved by a
833 : : * later sibling CollateExpr, or the parent might not
834 : : * care about collation anyway. Return enough info to
835 : : * throw the error later, if needed.
836 : : */
837 : 120 : context->strength = COLLATE_CONFLICT;
838 : 120 : context->collation2 = collation;
839 : 120 : context->location2 = location;
840 : : }
841 : : }
842 : 156994 : break;
5526 tgl@sss.pgh.pa.us 843 :UBC 0 : case COLLATE_CONFLICT:
844 : : /* We're still conflicted ... */
845 : 0 : break;
5526 tgl@sss.pgh.pa.us 846 :CBC 72 : case COLLATE_EXPLICIT:
847 [ + + ]: 72 : if (collation != context->collation)
848 : : {
849 : : /*
850 : : * Oops, we have a conflict of explicit COLLATE clauses.
851 : : * Here we choose to throw error immediately; that is what
852 : : * the SQL standard says to do, and there's no good reason
853 : : * to be less strict.
854 : : */
855 [ + - ]: 16 : ereport(ERROR,
856 : : (errcode(ERRCODE_COLLATION_MISMATCH),
857 : : errmsg("collation mismatch between explicit collations \"%s\" and \"%s\"",
858 : : get_collation_name(context->collation),
859 : : get_collation_name(collation)),
860 : : parser_errposition(context->pstate, location)));
861 : : }
862 : 56 : break;
863 : : }
864 : : }
4516 865 : 4802947 : }
866 : :
867 : : /*
868 : : * Aggref is a special case because expressions used only for ordering
869 : : * shouldn't be taken to conflict with each other or with regular args,
870 : : * indeed shouldn't affect the aggregate's result collation at all.
871 : : * We handle this by applying assign_expr_collations() to them rather than
872 : : * passing down our loccontext.
873 : : *
874 : : * Note that we recurse to each TargetEntry, not directly to its contained
875 : : * expression, so that the case above for T_TargetEntry will complain if we
876 : : * can't resolve a collation for an ORDER BY item (whether or not it is also
877 : : * a normal aggregate arg).
878 : : *
879 : : * We need not recurse into the aggorder or aggdistinct lists, because those
880 : : * contain only SortGroupClause nodes which we need not process.
881 : : */
882 : : static void
883 : 31137 : assign_aggregate_collations(Aggref *aggref,
884 : : assign_collations_context *loccontext)
885 : : {
886 : : ListCell *lc;
887 : :
888 : : /* Plain aggregates have no direct args */
889 [ - + ]: 31137 : Assert(aggref->aggdirectargs == NIL);
890 : :
891 : : /* Process aggregated args, holding resjunk ones at arm's length */
892 [ + + + + : 55429 : foreach(lc, aggref->args)
+ + ]
893 : : {
3312 894 : 24300 : TargetEntry *tle = lfirst_node(TargetEntry, lc);
895 : :
4516 896 [ + + ]: 24300 : if (tle->resjunk)
897 : 1074 : assign_expr_collations(loccontext->pstate, (Node *) tle);
898 : : else
899 : 23226 : (void) assign_collations_walker((Node *) tle, loccontext);
900 : : }
901 : 31129 : }
902 : :
903 : : /*
904 : : * For ordered-set aggregates, it's somewhat unclear how best to proceed.
905 : : * The spec-defined inverse distribution functions have only one sort column
906 : : * and don't return collatable types, but this is clearly too restrictive in
907 : : * the general case. Our solution is to consider that the aggregate's direct
908 : : * arguments contribute normally to determination of the aggregate's own
909 : : * collation, while aggregated arguments contribute only when the aggregate
910 : : * is designed to have exactly one aggregated argument (i.e., it has a single
911 : : * aggregated argument and is non-variadic). If it can have more than one
912 : : * aggregated argument, we process the aggregated arguments as independent
913 : : * sort columns. This avoids throwing error for something like
914 : : * agg(...) within group (order by x collate "foo", y collate "bar")
915 : : * while also guaranteeing that variadic aggregates don't change in behavior
916 : : * depending on how many sort columns a particular call happens to have.
917 : : *
918 : : * Otherwise this is much like the plain-aggregate case.
919 : : */
920 : : static void
921 : 118 : assign_ordered_set_collations(Aggref *aggref,
922 : : assign_collations_context *loccontext)
923 : : {
924 : : bool merge_sort_collations;
925 : : ListCell *lc;
926 : :
927 : : /* Merge sort collations to parent only if there can be only one */
928 [ + - + - ]: 236 : merge_sort_collations = (list_length(aggref->args) == 1 &&
3240 929 : 118 : get_func_variadictype(aggref->aggfnoid) == InvalidOid);
930 : :
931 : : /* Direct args, if any, are normal children of the Aggref node */
4516 932 : 118 : (void) assign_collations_walker((Node *) aggref->aggdirectargs,
933 : : loccontext);
934 : :
935 : : /* Process aggregated args appropriately */
936 [ + - + + : 236 : foreach(lc, aggref->args)
+ + ]
937 : : {
3312 938 : 118 : TargetEntry *tle = lfirst_node(TargetEntry, lc);
939 : :
4516 940 [ + - ]: 118 : if (merge_sort_collations)
941 : 118 : (void) assign_collations_walker((Node *) tle, loccontext);
942 : : else
4516 tgl@sss.pgh.pa.us 943 :UBC 0 : assign_expr_collations(loccontext->pstate, (Node *) tle);
944 : : }
4516 tgl@sss.pgh.pa.us 945 :CBC 118 : }
946 : :
947 : : /*
948 : : * Hypothetical-set aggregates are even more special: per spec, we need to
949 : : * unify the collations of each pair of hypothetical and aggregated args.
950 : : * And we need to force the choice of collation down into the sort column
951 : : * to ensure that the sort happens with the chosen collation. Other than
952 : : * that, the behavior is like regular ordered-set aggregates. Note that
953 : : * hypothetical direct arguments contribute to the aggregate collation
954 : : * only when their partner aggregated arguments do.
955 : : */
956 : : static void
957 : 82 : assign_hypothetical_collations(Aggref *aggref,
958 : : assign_collations_context *loccontext)
959 : : {
960 : 82 : ListCell *h_cell = list_head(aggref->aggdirectargs);
961 : 82 : ListCell *s_cell = list_head(aggref->args);
962 : : bool merge_sort_collations;
963 : : int extra_args;
964 : :
965 : : /* Merge sort collations to parent only if there can be only one */
966 [ + + - + ]: 145 : merge_sort_collations = (list_length(aggref->args) == 1 &&
3240 967 : 63 : get_func_variadictype(aggref->aggfnoid) == InvalidOid);
968 : :
969 : : /* Process any non-hypothetical direct args */
4516 970 : 82 : extra_args = list_length(aggref->aggdirectargs) - list_length(aggref->args);
971 [ - + ]: 82 : Assert(extra_args >= 0);
972 [ - + ]: 82 : while (extra_args-- > 0)
973 : : {
4516 tgl@sss.pgh.pa.us 974 :UBC 0 : (void) assign_collations_walker((Node *) lfirst(h_cell), loccontext);
2486 975 : 0 : h_cell = lnext(aggref->aggdirectargs, h_cell);
976 : : }
977 : :
978 : : /* Scan hypothetical args and aggregated args in parallel */
4516 tgl@sss.pgh.pa.us 979 [ + + + - ]:CBC 195 : while (h_cell && s_cell)
980 : : {
981 : 117 : Node *h_arg = (Node *) lfirst(h_cell);
982 : 117 : TargetEntry *s_tle = (TargetEntry *) lfirst(s_cell);
983 : : assign_collations_context paircontext;
984 : :
985 : : /*
986 : : * Assign collations internally in this pair of expressions, then
987 : : * choose a common collation for them. This should match
988 : : * select_common_collation(), but we can't use that function as-is
989 : : * because we need access to the whole collation state so we can
990 : : * bubble it up to the aggregate function's level.
991 : : */
992 : 117 : paircontext.pstate = loccontext->pstate;
993 : 117 : paircontext.collation = InvalidOid;
994 : 117 : paircontext.strength = COLLATE_NONE;
995 : 117 : paircontext.location = -1;
996 : : /* Set these fields just to suppress uninitialized-value warnings: */
997 : 117 : paircontext.collation2 = InvalidOid;
998 : 117 : paircontext.location2 = -1;
999 : :
1000 : 117 : (void) assign_collations_walker(h_arg, &paircontext);
1001 : 117 : (void) assign_collations_walker((Node *) s_tle->expr, &paircontext);
1002 : :
1003 : : /* deal with collation conflict */
1004 [ - + ]: 113 : if (paircontext.strength == COLLATE_CONFLICT)
4516 tgl@sss.pgh.pa.us 1005 [ # # ]:UBC 0 : ereport(ERROR,
1006 : : (errcode(ERRCODE_COLLATION_MISMATCH),
1007 : : errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
1008 : : get_collation_name(paircontext.collation),
1009 : : get_collation_name(paircontext.collation2)),
1010 : : errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
1011 : : parser_errposition(paircontext.pstate,
1012 : : paircontext.location2)));
1013 : :
1014 : : /*
1015 : : * At this point paircontext.collation can be InvalidOid only if the
1016 : : * type is not collatable; no need to do anything in that case. If we
1017 : : * do have to change the sort column's collation, do it by inserting a
1018 : : * RelabelType node into the sort column TLE.
1019 : : *
1020 : : * XXX This is pretty grotty for a couple of reasons:
1021 : : * assign_collations_walker isn't supposed to be changing the
1022 : : * expression structure like this, and a parse-time change of
1023 : : * collation ought to be signaled by a CollateExpr not a RelabelType
1024 : : * (the use of RelabelType for collation marking is supposed to be a
1025 : : * planner/executor thing only). But we have no better alternative.
1026 : : * In particular, injecting a CollateExpr could result in the
1027 : : * expression being interpreted differently after dump/reload, since
1028 : : * we might be effectively promoting an implicit collation to
1029 : : * explicit. This kluge is relying on ruleutils.c not printing a
1030 : : * COLLATE clause for a RelabelType, and probably on some other
1031 : : * fragile behaviors.
1032 : : */
4516 tgl@sss.pgh.pa.us 1033 [ + + - + ]:CBC 132 : if (OidIsValid(paircontext.collation) &&
1034 : 19 : paircontext.collation != exprCollation((Node *) s_tle->expr))
1035 : : {
4516 tgl@sss.pgh.pa.us 1036 :UBC 0 : s_tle->expr = (Expr *)
1037 : 0 : makeRelabelType(s_tle->expr,
1038 : 0 : exprType((Node *) s_tle->expr),
1039 : 0 : exprTypmod((Node *) s_tle->expr),
1040 : : paircontext.collation,
1041 : : COERCE_IMPLICIT_CAST);
1042 : : }
1043 : :
1044 : : /*
1045 : : * If appropriate, merge this column's collation state up to the
1046 : : * aggregate function.
1047 : : */
4516 tgl@sss.pgh.pa.us 1048 [ - + ]:CBC 113 : if (merge_sort_collations)
4516 tgl@sss.pgh.pa.us 1049 :UBC 0 : merge_collation_state(paircontext.collation,
1050 : : paircontext.strength,
1051 : : paircontext.location,
1052 : : paircontext.collation2,
1053 : : paircontext.location2,
1054 : : loccontext);
1055 : :
2486 tgl@sss.pgh.pa.us 1056 :CBC 113 : h_cell = lnext(aggref->aggdirectargs, h_cell);
1057 : 113 : s_cell = lnext(aggref->args, s_cell);
1058 : : }
4516 1059 [ + - - + ]: 78 : Assert(h_cell == NULL && s_cell == NULL);
5526 1060 : 78 : }
|