Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * execGrouping.c
4 : : * executor utility routines for grouping, hashing, and aggregation
5 : : *
6 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/executor/execGrouping.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : #include "postgres.h"
16 : :
17 : : #include <math.h>
18 : :
19 : : #include "access/htup_details.h"
20 : : #include "access/parallel.h"
21 : : #include "common/hashfn.h"
22 : : #include "executor/executor.h"
23 : : #include "miscadmin.h"
24 : : #include "utils/lsyscache.h"
25 : :
26 : : static int TupleHashTableMatch(struct tuplehash_hash *tb, MinimalTuple tuple1, MinimalTuple tuple2);
27 : : static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
28 : : MinimalTuple tuple);
29 : : static inline TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable,
30 : : TupleTableSlot *slot,
31 : : bool *isnew, uint32 hash);
32 : :
33 : : /*
34 : : * Define parameters for tuple hash table code generation. The interface is
35 : : * *also* declared in execnodes.h (to generate the types, which are externally
36 : : * visible).
37 : : */
38 : : #define SH_PREFIX tuplehash
39 : : #define SH_ELEMENT_TYPE TupleHashEntryData
40 : : #define SH_KEY_TYPE MinimalTuple
41 : : #define SH_KEY firstTuple
42 : : #define SH_HASH_KEY(tb, key) TupleHashTableHash_internal(tb, key)
43 : : #define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0
44 : : #define SH_SCOPE extern
45 : : #define SH_STORE_HASH
46 : : #define SH_GET_HASH(tb, a) a->hash
47 : : #define SH_DEFINE
48 : : #include "lib/simplehash.h"
49 : :
50 : :
51 : : /*****************************************************************************
52 : : * Utility routines for grouping tuples together
53 : : *****************************************************************************/
54 : :
55 : : /*
56 : : * execTuplesMatchPrepare
57 : : * Build expression that can be evaluated using ExecQual(), returning
58 : : * whether an ExprContext's inner/outer tuples are NOT DISTINCT
59 : : */
60 : : ExprState *
2861 andres@anarazel.de 61 :CBC 6027 : execTuplesMatchPrepare(TupleDesc desc,
62 : : int numCols,
63 : : const AttrNumber *keyColIdx,
64 : : const Oid *eqOperators,
65 : : const Oid *collations,
66 : : PlanState *parent)
67 : : {
68 : : Oid *eqFunctions;
69 : : int i;
70 : : ExprState *expr;
71 : :
72 [ + + ]: 6027 : if (numCols == 0)
73 : 27 : return NULL;
74 : :
434 michael@paquier.xyz 75 : 6000 : eqFunctions = (Oid *) palloc(numCols * sizeof(Oid));
76 : :
77 : : /* lookup equality functions */
8376 tgl@sss.pgh.pa.us 78 [ + + ]: 16329 : for (i = 0; i < numCols; i++)
2861 andres@anarazel.de 79 : 10329 : eqFunctions[i] = get_opcode(eqOperators[i]);
80 : :
81 : : /* build actual expression */
2588 82 : 6000 : expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL,
83 : : numCols, keyColIdx, eqFunctions, collations,
84 : : parent);
85 : :
2861 86 : 6000 : return expr;
87 : : }
88 : :
89 : : /*
90 : : * execTuplesHashPrepare
91 : : * Look up the equality and hashing functions needed for a TupleHashTable.
92 : : *
93 : : * This is similar to execTuplesMatchPrepare, but we also need to find the
94 : : * hash functions associated with the equality operators. *eqFunctions and
95 : : * *hashFunctions receive the palloc'd result arrays.
96 : : *
97 : : * Note: we expect that the given operators are not cross-type comparisons.
98 : : */
99 : : void
6915 tgl@sss.pgh.pa.us 100 : 4161 : execTuplesHashPrepare(int numCols,
101 : : const Oid *eqOperators,
102 : : Oid **eqFuncOids,
103 : : FmgrInfo **hashFunctions)
104 : : {
105 : : int i;
106 : :
2861 andres@anarazel.de 107 : 4161 : *eqFuncOids = (Oid *) palloc(numCols * sizeof(Oid));
6915 tgl@sss.pgh.pa.us 108 : 4161 : *hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
109 : :
8213 110 [ + + ]: 10999 : for (i = 0; i < numCols; i++)
111 : : {
6915 112 : 6838 : Oid eq_opr = eqOperators[i];
113 : : Oid eq_function;
114 : : Oid left_hash_function;
115 : : Oid right_hash_function;
116 : :
117 : 6838 : eq_function = get_opcode(eq_opr);
6895 118 [ - + ]: 6838 : if (!get_op_hash_functions(eq_opr,
119 : : &left_hash_function, &right_hash_function))
8184 tgl@sss.pgh.pa.us 120 [ # # ]:UBC 0 : elog(ERROR, "could not find hash function for hash operator %u",
121 : : eq_opr);
122 : : /* We're not supporting cross-type cases here */
6895 tgl@sss.pgh.pa.us 123 [ - + ]:CBC 6838 : Assert(left_hash_function == right_hash_function);
2861 andres@anarazel.de 124 : 6838 : (*eqFuncOids)[i] = eq_function;
6895 tgl@sss.pgh.pa.us 125 : 6838 : fmgr_info(right_hash_function, &(*hashFunctions)[i]);
126 : : }
8376 127 : 4161 : }
128 : :
129 : :
130 : : /*****************************************************************************
131 : : * Utility routines for all-in-memory hash tables
132 : : *
133 : : * These routines build hash tables for grouping tuples together (eg, for
134 : : * hash aggregation). There is one entry for each not-distinct set of tuples
135 : : * presented.
136 : : *****************************************************************************/
137 : :
138 : : /*
139 : : * Construct an empty TupleHashTable
140 : : *
141 : : * parent: PlanState node that will own this hash table
142 : : * inputDesc: tuple descriptor for input tuples
143 : : * inputOps: slot ops for input tuples, or NULL if unknown or not fixed
144 : : * numCols: number of columns to be compared (length of next 4 arrays)
145 : : * keyColIdx: indexes of tuple columns to compare
146 : : * eqfuncoids: OIDs of equality comparison functions to use
147 : : * hashfunctions: FmgrInfos of datatype-specific hashing functions to use
148 : : * collations: collations to use in comparisons
149 : : * nelements: initial estimate of hashtable size
150 : : * additionalsize: size of data that may be stored along with the hash entry
151 : : * metacxt: memory context for long-lived data and the simplehash table
152 : : * tuplescxt: memory context in which to store the hashed tuples themselves
153 : : * tempcxt: short-lived context for evaluation hash and comparison functions
154 : : * use_variable_hash_iv: if true, adjust hash IV per-parallel-worker
155 : : *
156 : : * The hashfunctions array may be made with execTuplesHashPrepare(). Note they
157 : : * are not cross-type functions, but expect to see the table datatype(s)
158 : : * on both sides.
159 : : *
160 : : * Note that the keyColIdx, hashfunctions, and collations arrays must be
161 : : * allocated in storage that will live as long as the hashtable does.
162 : : *
163 : : * The metacxt and tuplescxt are separate because it's usually desirable for
164 : : * tuplescxt to be a BumpContext to avoid memory wastage, while metacxt must
165 : : * support pfree in case the simplehash table needs to be enlarged. (We could
166 : : * simplify the API of TupleHashTables by managing the tuplescxt internally.
167 : : * But that would be disadvantageous to nodeAgg.c and nodeSubplan.c, which use
168 : : * a single tuplescxt for multiple TupleHashTables that are reset together.)
169 : : *
170 : : * LookupTupleHashEntry, FindTupleHashEntry, and related functions may leak
171 : : * memory in the tempcxt. It is caller's responsibility to reset that context
172 : : * reasonably often, typically once per tuple. (We do it that way, rather
173 : : * than managing an extra context within the hashtable, because in many cases
174 : : * the caller can specify a tempcxt that it needs to reset per-tuple anyway.)
175 : : *
176 : : * We don't currently provide DestroyTupleHashTable functionality; the hash
177 : : * table will be cleaned up at destruction of the metacxt. (Some callers
178 : : * bother to delete the tuplescxt explicitly, though it'd be sufficient to
179 : : * ensure it's a child of the metacxt.) There's not much point in working
180 : : * harder than this so long as the expression-evaluation infrastructure
181 : : * behaves similarly.
182 : : */
183 : : TupleHashTable
362 184 : 3712 : BuildTupleHashTable(PlanState *parent,
185 : : TupleDesc inputDesc,
186 : : const TupleTableSlotOps *inputOps,
187 : : int numCols,
188 : : AttrNumber *keyColIdx,
189 : : const Oid *eqfuncoids,
190 : : FmgrInfo *hashfunctions,
191 : : Oid *collations,
192 : : double nelements,
193 : : Size additionalsize,
194 : : MemoryContext metacxt,
195 : : MemoryContext tuplescxt,
196 : : MemoryContext tempcxt,
197 : : bool use_variable_hash_iv)
198 : : {
199 : : TupleHashTable hashtable;
200 : : uint32 nbuckets;
201 : : MemoryContext oldcontext;
370 drowley@postgresql.o 202 : 3712 : uint32 hash_iv = 0;
203 : :
204 : : /*
205 : : * tuplehash_create requires a uint32 element count, so we had better
206 : : * clamp the given nelements to fit in that. As long as we have to do
207 : : * that, we might as well protect against completely insane input like
208 : : * zero or NaN. But it is not our job here to enforce issues like staying
209 : : * within hash_mem: the caller should have done that, and we don't have
210 : : * enough info to second-guess.
211 : : */
44 tgl@sss.pgh.pa.us 212 [ + - - + ]:GNC 3712 : if (isnan(nelements) || nelements <= 0)
44 tgl@sss.pgh.pa.us 213 :UNC 0 : nbuckets = 1;
44 tgl@sss.pgh.pa.us 214 [ - + ]:GNC 3712 : else if (nelements >= PG_UINT32_MAX)
44 tgl@sss.pgh.pa.us 215 :UNC 0 : nbuckets = PG_UINT32_MAX;
216 : : else
44 tgl@sss.pgh.pa.us 217 :GNC 3712 : nbuckets = (uint32) nelements;
218 : :
219 : : /* tuplescxt must be separate, else ResetTupleHashTable breaks things */
47 220 [ - + ]: 3712 : Assert(metacxt != tuplescxt);
221 : :
222 : : /* ensure additionalsize is maxalign'ed */
267 jdavis@postgresql.or 223 : 3712 : additionalsize = MAXALIGN(additionalsize);
224 : :
2502 andres@anarazel.de 225 :CBC 3712 : oldcontext = MemoryContextSwitchTo(metacxt);
226 : :
6 michael@paquier.xyz 227 :GNC 3712 : hashtable = palloc_object(TupleHashTableData);
228 : :
8376 tgl@sss.pgh.pa.us 229 :CBC 3712 : hashtable->numCols = numCols;
230 : 3712 : hashtable->keyColIdx = keyColIdx;
2461 peter@eisentraut.org 231 : 3712 : hashtable->tab_collations = collations;
47 tgl@sss.pgh.pa.us 232 :GNC 3712 : hashtable->tuplescxt = tuplescxt;
8376 tgl@sss.pgh.pa.us 233 :CBC 3712 : hashtable->tempcxt = tempcxt;
267 jdavis@postgresql.or 234 : 3712 : hashtable->additionalsize = additionalsize;
7367 bruce@momjian.us 235 : 3712 : hashtable->tableslot = NULL; /* will be made on first lookup */
7580 tgl@sss.pgh.pa.us 236 : 3712 : hashtable->inputslot = NULL;
370 drowley@postgresql.o 237 : 3712 : hashtable->in_hash_expr = NULL;
2861 andres@anarazel.de 238 : 3712 : hashtable->cur_eq_func = NULL;
239 : :
240 : : /*
241 : : * If parallelism is in use, even if the leader backend is performing the
242 : : * scan itself, we don't want to create the hashtable exactly the same way
243 : : * in all workers. As hashtables are iterated over in keyspace-order,
244 : : * doing so in all processes in the same way is likely to lead to
245 : : * "unbalanced" hashtables when the table size initially is
246 : : * underestimated.
247 : : */
3287 rhaas@postgresql.org 248 [ + + ]: 3712 : if (use_variable_hash_iv)
370 drowley@postgresql.o 249 : 499 : hash_iv = murmurhash32(ParallelWorkerNumber);
250 : :
2502 andres@anarazel.de 251 : 3712 : hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
252 : :
253 : : /*
254 : : * We copy the input tuple descriptor just for safety --- we assume all
255 : : * input tuples will have equivalent descriptors.
256 : : */
2588 257 : 3712 : hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc),
258 : : &TTSOpsMinimalTuple);
259 : :
260 : : /* build hash ExprState for all columns */
370 drowley@postgresql.o 261 [ + - ]: 3712 : hashtable->tab_hash_expr = ExecBuildHash32FromAttrs(inputDesc,
262 : : inputOps,
263 : : hashfunctions,
264 : : collations,
265 : : numCols,
266 : : keyColIdx,
267 : : parent,
268 : : hash_iv);
269 : :
270 : : /* build comparator for all columns */
2861 andres@anarazel.de 271 [ + - ]: 3712 : hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
272 : : inputOps,
273 : : &TTSOpsMinimalTuple,
274 : : numCols,
275 : : keyColIdx, eqfuncoids, collations,
276 : : parent);
277 : :
278 : : /*
279 : : * While not pretty, it's ok to not shut down this context, but instead
280 : : * rely on the containing memory context being reset, as
281 : : * ExecBuildGroupingEqual() only builds a very simple expression calling
282 : : * functions (i.e. nothing that'd employ RegisterExprContextCallback()).
283 : : */
2502 284 : 3712 : hashtable->exprcontext = CreateStandaloneExprContext();
285 : :
286 : 3712 : MemoryContextSwitchTo(oldcontext);
287 : :
8376 tgl@sss.pgh.pa.us 288 : 3712 : return hashtable;
289 : : }
290 : :
291 : : /*
292 : : * Reset contents of the hashtable to be empty, preserving all the non-content
293 : : * state.
294 : : *
295 : : * Note: in usages where several TupleHashTables share a tuplescxt, all must
296 : : * be reset together, as the first one's reset call will destroy all their
297 : : * data. The additional reset calls for the rest will redundantly reset the
298 : : * tuplescxt. But because of mcxt.c's isReset flag, that's cheap enough that
299 : : * we need not avoid it.
300 : : */
301 : : void
2502 andres@anarazel.de 302 : 97493 : ResetTupleHashTable(TupleHashTable hashtable)
303 : : {
304 : 97493 : tuplehash_reset(hashtable->hashtab);
47 tgl@sss.pgh.pa.us 305 :GNC 97493 : MemoryContextReset(hashtable->tuplescxt);
2502 andres@anarazel.de 306 : 97493 : }
307 : :
308 : : /*
309 : : * Estimate the amount of space needed for a TupleHashTable with nentries
310 : : * entries, if the tuples have average data width tupleWidth and the caller
311 : : * requires additionalsize extra space per entry.
312 : : *
313 : : * Return SIZE_MAX if it'd overflow size_t.
314 : : *
315 : : * nentries is "double" because this is meant for use by the planner,
316 : : * which typically works with double rowcount estimates. So we'd need to
317 : : * clamp to integer somewhere and that might as well be here. We do expect
318 : : * the value not to be NaN or negative, else the result will be garbage.
319 : : */
320 : : Size
44 tgl@sss.pgh.pa.us 321 : 2411 : EstimateTupleHashTableSpace(double nentries,
322 : : Size tupleWidth,
323 : : Size additionalsize)
324 : : {
325 : : Size sh_space;
326 : : double tuples_space;
327 : :
328 : : /* First estimate the space needed for the simplehash table */
329 : 2411 : sh_space = tuplehash_estimate_space(nentries);
330 : :
331 : : /* Give up if that's already too big */
332 [ + + ]: 2411 : if (sh_space >= SIZE_MAX)
333 : 3 : return sh_space;
334 : :
335 : : /*
336 : : * Compute space needed for hashed tuples with additional data. nentries
337 : : * must be somewhat sane, so it should be safe to compute this product.
338 : : *
339 : : * We assume that the hashed tuples will be kept in a BumpContext so that
340 : : * there is not additional per-tuple overhead.
341 : : *
342 : : * (Note that this is only accurate if MEMORY_CONTEXT_CHECKING is off,
343 : : * else bump.c will add a MemoryChunk header to each tuple. However, it
344 : : * seems undesirable for debug builds to make different planning choices
345 : : * than production builds, so we assume the production behavior always.)
346 : : */
347 : 2408 : tuples_space = nentries * (MAXALIGN(SizeofMinimalTupleHeader) +
348 : 2408 : MAXALIGN(tupleWidth) +
349 : 2408 : MAXALIGN(additionalsize));
350 : :
351 : : /*
352 : : * Check for size_t overflow. This coding is trickier than it may appear,
353 : : * because on 64-bit machines SIZE_MAX cannot be represented exactly as a
354 : : * double. We must cast it explicitly to suppress compiler warnings about
355 : : * an inexact conversion, and we must trust that any double value that
356 : : * compares strictly less than "(double) SIZE_MAX" will cast to a
357 : : * representable size_t value.
358 : : */
359 [ - + ]: 2408 : if (sh_space + tuples_space >= (double) SIZE_MAX)
44 tgl@sss.pgh.pa.us 360 :UNC 0 : return SIZE_MAX;
361 : :
362 : : /* We don't bother estimating size of the miscellaneous overhead data */
44 tgl@sss.pgh.pa.us 363 :GNC 2408 : return (Size) (sh_space + tuples_space);
44 tgl@sss.pgh.pa.us 364 :ECB (97316) : }
365 : :
366 : : /*
367 : : * Find or create a hashtable entry for the tuple group containing the
368 : : * given tuple. The tuple must be the same type as the hashtable entries.
369 : : *
370 : : * If isnew is NULL, we do not create new entries; we return NULL if no
371 : : * match is found.
372 : : *
373 : : * If hash is not NULL, we set it to the calculated hash value. This allows
374 : : * callers access to the hash value even if no entry is returned.
375 : : *
376 : : * If isnew isn't NULL, then a new entry is created if no existing entry
377 : : * matches. On return, *isnew is true if the entry is newly created,
378 : : * false if it existed already. The additional data in the new entry has
379 : : * been zeroed.
380 : : */
381 : : TupleHashEntry
8376 tgl@sss.pgh.pa.us 382 :CBC 4103443 : LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
383 : : bool *isnew, uint32 *hash)
384 : : {
385 : : TupleHashEntry entry;
386 : : MemoryContext oldContext;
387 : : uint32 local_hash;
388 : :
389 : : /* Need to run the hash functions in short-lived context */
390 : 4103443 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
391 : :
392 : : /* set up data needed by hash and match functions */
7580 393 : 4103443 : hashtable->inputslot = slot;
370 drowley@postgresql.o 394 : 4103443 : hashtable->in_hash_expr = hashtable->tab_hash_expr;
2861 andres@anarazel.de 395 : 4103443 : hashtable->cur_eq_func = hashtable->tab_eq_func;
396 : :
1969 jdavis@postgresql.or 397 : 4103443 : local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
398 : 4103440 : entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, local_hash);
399 : :
400 [ + + ]: 4103440 : if (hash != NULL)
401 : 3520925 : *hash = local_hash;
402 : :
403 [ + + - + ]: 4103440 : Assert(entry == NULL || entry->hash == local_hash);
404 : :
2140 405 : 4103440 : MemoryContextSwitchTo(oldContext);
406 : :
407 : 4103440 : return entry;
408 : : }
409 : :
410 : : /*
411 : : * Compute the hash value for a tuple
412 : : */
413 : : uint32
2136 jdavis@postgresql.or 414 :UBC 0 : TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
415 : : {
416 : : MemoryContext oldContext;
417 : : uint32 hash;
418 : :
419 : 0 : hashtable->inputslot = slot;
370 drowley@postgresql.o 420 : 0 : hashtable->in_hash_expr = hashtable->tab_hash_expr;
421 : :
422 : : /* Need to run the hash functions in short-lived context */
2136 jdavis@postgresql.or 423 : 0 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
424 : :
425 : 0 : hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
426 : :
427 : 0 : MemoryContextSwitchTo(oldContext);
428 : :
429 : 0 : return hash;
430 : : }
431 : :
432 : : /*
433 : : * A variant of LookupTupleHashEntry for callers that have already computed
434 : : * the hash value.
435 : : */
436 : : TupleHashEntry
2140 jdavis@postgresql.or 437 :CBC 592812 : LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot,
438 : : bool *isnew, uint32 hash)
439 : : {
440 : : TupleHashEntry entry;
441 : : MemoryContext oldContext;
442 : :
443 : : /* Need to run the hash functions in short-lived context */
444 : 592812 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
445 : :
446 : : /* set up data needed by hash and match functions */
447 : 592812 : hashtable->inputslot = slot;
370 drowley@postgresql.o 448 : 592812 : hashtable->in_hash_expr = hashtable->tab_hash_expr;
2140 jdavis@postgresql.or 449 : 592812 : hashtable->cur_eq_func = hashtable->tab_eq_func;
450 : :
451 : 592812 : entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
1969 452 [ + + - + ]: 592812 : Assert(entry == NULL || entry->hash == hash);
453 : :
8155 tgl@sss.pgh.pa.us 454 : 592812 : MemoryContextSwitchTo(oldContext);
455 : :
456 : 592812 : return entry;
457 : : }
458 : :
459 : : /*
460 : : * Search for a hashtable entry matching the given tuple. No entry is
461 : : * created if there's not a match. This is similar to the non-creating
462 : : * case of LookupTupleHashEntry, except that it supports cross-type
463 : : * comparisons, in which the given tuple is not of the same type as the
464 : : * table entries. The caller must provide the hash ExprState to use for
465 : : * the input tuple, as well as the equality ExprState, since these may be
466 : : * different from the table's internal functions.
467 : : */
468 : : TupleHashEntry
6888 469 : 513657 : FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
470 : : ExprState *eqcomp,
471 : : ExprState *hashexpr)
472 : : {
473 : : TupleHashEntry entry;
474 : : MemoryContext oldContext;
475 : : MinimalTuple key;
476 : :
477 : : /* Need to run the hash functions in short-lived context */
478 : 513657 : oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
479 : :
480 : : /* Set up data needed by hash and match functions */
481 : 513657 : hashtable->inputslot = slot;
370 drowley@postgresql.o 482 : 513657 : hashtable->in_hash_expr = hashexpr;
2861 andres@anarazel.de 483 : 513657 : hashtable->cur_eq_func = eqcomp;
484 : :
485 : : /* Search the hash table */
3350 486 : 513657 : key = NULL; /* flag to reference inputslot */
487 : 513657 : entry = tuplehash_lookup(hashtable->hashtab, key);
6888 tgl@sss.pgh.pa.us 488 : 513657 : MemoryContextSwitchTo(oldContext);
489 : :
490 : 513657 : return entry;
491 : : }
492 : :
493 : : /*
494 : : * If tuple is NULL, use the input slot instead. This convention avoids the
495 : : * need to materialize virtual input tuples unless they actually need to get
496 : : * copied into the table.
497 : : *
498 : : * Also, the caller must select an appropriate memory context for running
499 : : * the hash functions.
500 : : */
501 : : static uint32
2136 jdavis@postgresql.or 502 : 4617100 : TupleHashTableHash_internal(struct tuplehash_hash *tb,
503 : : MinimalTuple tuple)
504 : : {
3338 peter_e@gmx.net 505 : 4617100 : TupleHashTable hashtable = (TupleHashTable) tb->private_data;
506 : : uint32 hashkey;
507 : : TupleTableSlot *slot;
508 : : bool isnull;
509 : :
7580 tgl@sss.pgh.pa.us 510 [ + - ]: 4617100 : if (tuple == NULL)
511 : : {
512 : : /* Process the current input tuple for the table */
370 drowley@postgresql.o 513 : 4617100 : hashtable->exprcontext->ecxt_innertuple = hashtable->inputslot;
514 : 4617100 : hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->in_hash_expr,
515 : : hashtable->exprcontext,
516 : : &isnull));
517 : : }
518 : : else
519 : : {
520 : : /*
521 : : * Process a tuple already stored in the table.
522 : : *
523 : : * (this case never actually occurs due to the way simplehash.h is
524 : : * used, as the hash-value is stored in the entries)
525 : : */
370 drowley@postgresql.o 526 :UBC 0 : slot = hashtable->exprcontext->ecxt_innertuple = hashtable->tableslot;
7111 tgl@sss.pgh.pa.us 527 : 0 : ExecStoreMinimalTuple(tuple, slot, false);
370 drowley@postgresql.o 528 : 0 : hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->tab_hash_expr,
529 : : hashtable->exprcontext,
530 : : &isnull));
531 : : }
532 : :
533 : : /*
534 : : * The hashing done above, even with an initial value, doesn't tend to
535 : : * result in good hash perturbation. Running the value produced above
536 : : * through murmurhash32 leads to near perfect hash perturbation.
537 : : */
2878 andres@anarazel.de 538 :CBC 4617097 : return murmurhash32(hashkey);
539 : : }
540 : :
541 : : /*
542 : : * Does the work of LookupTupleHashEntry and LookupTupleHashEntryHash. Useful
543 : : * so that we can avoid switching the memory context multiple times for
544 : : * LookupTupleHashEntry.
545 : : *
546 : : * NB: This function may or may not change the memory context. Caller is
547 : : * expected to change it back.
548 : : */
549 : : static inline TupleHashEntry
2140 jdavis@postgresql.or 550 : 4696252 : LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
551 : : bool *isnew, uint32 hash)
552 : : {
553 : : TupleHashEntryData *entry;
554 : : bool found;
555 : : MinimalTuple key;
556 : :
557 : 4696252 : key = NULL; /* flag to reference inputslot */
558 : :
559 [ + + ]: 4696252 : if (isnew)
560 : : {
561 : 3842609 : entry = tuplehash_insert_hash(hashtable->hashtab, key, hash, &found);
562 : :
563 [ + + ]: 3842609 : if (found)
564 : : {
565 : : /* found pre-existing entry */
566 : 3328704 : *isnew = false;
567 : : }
568 : : else
569 : : {
570 : : /* created new entry */
571 : 513905 : *isnew = true;
572 : :
47 tgl@sss.pgh.pa.us 573 :GNC 513905 : MemoryContextSwitchTo(hashtable->tuplescxt);
574 : :
575 : : /*
576 : : * Copy the first tuple into the tuples context, and request
577 : : * additionalsize extra bytes before the allocation.
578 : : *
579 : : * The caller can get a pointer to the additional data with
580 : : * TupleHashEntryGetAdditional(), and store arbitrary data there.
581 : : * Placing both the tuple and additional data in the same
582 : : * allocation avoids the need to store an extra pointer in
583 : : * TupleHashEntryData or allocate an additional chunk.
584 : : */
267 jdavis@postgresql.or 585 :CBC 513905 : entry->firstTuple = ExecCopySlotMinimalTupleExtra(slot,
586 : : hashtable->additionalsize);
587 : : }
588 : : }
589 : : else
590 : : {
2140 591 : 853643 : entry = tuplehash_lookup_hash(hashtable->hashtab, key, hash);
592 : : }
593 : :
594 : 4696252 : return entry;
595 : : }
596 : :
597 : : /*
598 : : * See whether two tuples (presumably of the same hash value) match
599 : : */
600 : : static int
47 peter@eisentraut.org 601 :GNC 3612450 : TupleHashTableMatch(struct tuplehash_hash *tb, MinimalTuple tuple1, MinimalTuple tuple2)
602 : : {
603 : : TupleTableSlot *slot1;
604 : : TupleTableSlot *slot2;
3338 peter_e@gmx.net 605 :CBC 3612450 : TupleHashTable hashtable = (TupleHashTable) tb->private_data;
2861 andres@anarazel.de 606 : 3612450 : ExprContext *econtext = hashtable->exprcontext;
607 : :
608 : : /*
609 : : * We assume that simplehash.h will only ever call us with the first
610 : : * argument being an actual table entry, and the second argument being
611 : : * LookupTupleHashEntry's dummy TupleHashEntryData. The other direction
612 : : * could be supported too, but is not currently required.
613 : : */
7580 tgl@sss.pgh.pa.us 614 [ - + ]: 3612450 : Assert(tuple1 != NULL);
615 : 3612450 : slot1 = hashtable->tableslot;
7111 616 : 3612450 : ExecStoreMinimalTuple(tuple1, slot1, false);
7580 617 [ - + ]: 3612450 : Assert(tuple2 == NULL);
618 : 3612450 : slot2 = hashtable->inputslot;
619 : :
620 : : /* For crosstype comparisons, the inputslot must be first */
2861 andres@anarazel.de 621 : 3612450 : econtext->ecxt_innertuple = slot2;
622 : 3612450 : econtext->ecxt_outertuple = slot1;
623 : 3612450 : return !ExecQualAndReset(hashtable->cur_eq_func, econtext);
624 : : }
|