Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * dynahash.c
4 : : * dynamic chained hash tables
5 : : *
6 : : * dynahash.c supports both local-to-a-backend hash tables and hash tables in
7 : : * shared memory. For shared hash tables, it is the caller's responsibility
8 : : * to provide appropriate access interlocking. The simplest convention is
9 : : * that a single LWLock protects the whole hash table. Searches (HASH_FIND or
10 : : * hash_seq_search) need only shared lock, but any update requires exclusive
11 : : * lock. For heavily-used shared tables, the single-lock approach creates a
12 : : * concurrency bottleneck, so we also support "partitioned" locking wherein
13 : : * there are multiple LWLocks guarding distinct subsets of the table. To use
14 : : * a hash table in partitioned mode, the HASH_PARTITION flag must be given
15 : : * to hash_create. This prevents any attempt to split buckets on-the-fly.
16 : : * Therefore, each hash bucket chain operates independently, and no fields
17 : : * of the hash header change after init except nentries and freeList.
18 : : * (A partitioned table uses multiple copies of those fields, guarded by
19 : : * spinlocks, for additional concurrency.)
20 : : * This lets any subset of the hash buckets be treated as a separately
21 : : * lockable partition. We expect callers to use the low-order bits of a
22 : : * lookup key's hash value as a partition number --- this will work because
23 : : * of the way calc_bucket() maps hash values to bucket numbers.
24 : : *
25 : : * The memory allocator function should match malloc's semantics of returning
26 : : * NULL on failure. (This is essential for hash tables in shared memory.
27 : : * For hash tables in local memory, we used to use palloc() which will throw
28 : : * error on failure; but we no longer do, so it's untested whether this
29 : : * module will still cope with that behavior.)
30 : : *
31 : : * dynahash.c provides support for these types of lookup keys:
32 : : *
33 : : * 1. Null-terminated C strings (truncated if necessary to fit in keysize),
34 : : * compared as though by strcmp(). This is selected by specifying the
35 : : * HASH_STRINGS flag to hash_create.
36 : : *
37 : : * 2. Arbitrary binary data of size keysize, compared as though by memcmp().
38 : : * (Caller must ensure there are no undefined padding bits in the keys!)
39 : : * This is selected by specifying the HASH_BLOBS flag to hash_create.
40 : : *
41 : : * 3. More complex key behavior can be selected by specifying user-supplied
42 : : * hashing, comparison, and/or key-copying functions. At least a hashing
43 : : * function must be supplied; comparison defaults to memcmp() and key copying
44 : : * to memcpy() when a user-defined hashing function is selected.
45 : : *
46 : : * Compared to simplehash, dynahash has the following benefits:
47 : : *
48 : : * - It supports partitioning, which is useful for shared memory access using
49 : : * locks.
50 : : * - Shared memory hashes are allocated in a fixed size area at startup and
51 : : * are discoverable by name from other processes.
52 : : * - Because entries don't need to be moved in the case of hash conflicts,
53 : : * dynahash has better performance for large entries.
54 : : * - Guarantees stable pointers to entries.
55 : : *
56 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
57 : : * Portions Copyright (c) 1994, Regents of the University of California
58 : : *
59 : : *
60 : : * IDENTIFICATION
61 : : * src/backend/utils/hash/dynahash.c
62 : : *
63 : : *-------------------------------------------------------------------------
64 : : */
65 : :
66 : : /*
67 : : * Original comments:
68 : : *
69 : : * Dynamic hashing, after CACM April 1988 pp 446-457, by Per-Ake Larson.
70 : : * Coded into C, with minor code improvements, and with hsearch(3) interface,
71 : : * by ejp@ausmelb.oz, Jul 26, 1988: 13:16;
72 : : * also, hcreate/hdestroy routines added to simulate hsearch(3).
73 : : *
74 : : * These routines simulate hsearch(3) and family, with the important
75 : : * difference that the hash table is dynamic - can grow indefinitely
76 : : * beyond its original size (as supplied to hcreate()).
77 : : *
78 : : * Performance appears to be comparable to that of hsearch(3).
79 : : * The 'source-code' options referred to in hsearch(3)'s 'man' page
80 : : * are not implemented; otherwise functionality is identical.
81 : : *
82 : : * Compilation controls:
83 : : * HASH_STATISTICS causes some usage statistics to be maintained, which can be
84 : : * logged by calling hash_stats().
85 : : *
86 : : * Problems & fixes to ejp@ausmelb.oz. WARNING: relies on pre-processor
87 : : * concatenation property, in probably unnecessary code 'optimization'.
88 : : *
89 : : * Modified margo@postgres.berkeley.edu February 1990
90 : : * added multiple table interface
91 : : * Modified by sullivan@postgres.berkeley.edu April 1990
92 : : * changed ctl structure for shared memory
93 : : */
94 : :
95 : : #include "postgres.h"
96 : :
97 : : #include <limits.h>
98 : :
99 : : #include "access/xact.h"
100 : : #include "common/hashfn.h"
101 : : #include "lib/ilist.h"
102 : : #include "port/pg_bitutils.h"
103 : : #include "storage/shmem.h"
104 : : #include "storage/spin.h"
105 : : #include "utils/memutils.h"
106 : :
107 : :
108 : : /*
109 : : * Constants
110 : : *
111 : : * A hash table has a top-level "directory", each of whose entries points
112 : : * to a "segment" of ssize bucket headers. The maximum number of hash
113 : : * buckets is thus dsize * ssize (but dsize may be expansible). Of course,
114 : : * the number of records in the table can be larger, but we don't want a
115 : : * whole lot of records per bucket or performance goes down.
116 : : *
117 : : * In a hash table allocated in shared memory, the directory cannot be
118 : : * expanded because it must stay at a fixed address. The directory size
119 : : * should be selected using hash_select_dirsize (and you'd better have
120 : : * a good idea of the maximum number of entries!). For non-shared hash
121 : : * tables, the initial directory size can be left at the default.
122 : : */
123 : : #define DEF_SEGSIZE 256
124 : : #define DEF_SEGSIZE_SHIFT 8 /* must be log2(DEF_SEGSIZE) */
125 : : #define DEF_DIRSIZE 256
126 : :
127 : : /* Number of freelists to be used for a partitioned hash table. */
128 : : #define NUM_FREELISTS 32
129 : :
130 : : /* A hash bucket is a linked list of HASHELEMENTs */
131 : : typedef HASHELEMENT *HASHBUCKET;
132 : :
133 : : /* A hash segment is an array of bucket headers */
134 : : typedef HASHBUCKET *HASHSEGMENT;
135 : :
136 : : /*
137 : : * Per-freelist data.
138 : : *
139 : : * In a partitioned hash table, each freelist is associated with a specific
140 : : * set of hashcodes, as determined by the FREELIST_IDX() macro below.
141 : : * nentries tracks the number of live hashtable entries having those hashcodes
142 : : * (NOT the number of entries in the freelist, as you might expect).
143 : : *
144 : : * The coverage of a freelist might be more or less than one partition, so it
145 : : * needs its own lock rather than relying on caller locking. Relying on that
146 : : * wouldn't work even if the coverage was the same, because of the occasional
147 : : * need to "borrow" entries from another freelist; see get_hash_entry().
148 : : *
149 : : * Using an array of FreeListData instead of separate arrays of mutexes,
150 : : * nentries and freeLists helps to reduce sharing of cache lines between
151 : : * different mutexes.
152 : : */
153 : : typedef struct
154 : : {
155 : : slock_t mutex; /* spinlock for this freelist */
156 : : int64 nentries; /* number of entries in associated buckets */
157 : : HASHELEMENT *freeList; /* chain of free elements */
158 : : } FreeListData;
159 : :
160 : : /*
161 : : * Header structure for a hash table --- contains all changeable info
162 : : *
163 : : * In a shared-memory hash table, the HASHHDR is in shared memory, while
164 : : * each backend has a local HTAB struct. For a non-shared table, there isn't
165 : : * any functional difference between HASHHDR and HTAB, but we separate them
166 : : * anyway to share code between shared and non-shared tables.
167 : : */
168 : : struct HASHHDR
169 : : {
170 : : /*
171 : : * The freelist can become a point of contention in high-concurrency hash
172 : : * tables, so we use an array of freelists, each with its own mutex and
173 : : * nentries count, instead of just a single one. Although the freelists
174 : : * normally operate independently, we will scavenge entries from freelists
175 : : * other than a hashcode's default freelist when necessary.
176 : : *
177 : : * If the hash table is not partitioned, only freeList[0] is used and its
178 : : * spinlock is not used at all; callers' locking is assumed sufficient.
179 : : */
180 : : FreeListData freeList[NUM_FREELISTS];
181 : :
182 : : /* These fields can change, but not in a partitioned table */
183 : : /* Also, dsize can't change in a shared table, even if unpartitioned */
184 : : int64 dsize; /* directory size */
185 : : int64 nsegs; /* number of allocated segments (<= dsize) */
186 : : uint32 max_bucket; /* ID of maximum bucket in use */
187 : : uint32 high_mask; /* mask to modulo into entire table */
188 : : uint32 low_mask; /* mask to modulo into lower half of table */
189 : :
190 : : /* These fields are fixed at hashtable creation */
191 : : Size keysize; /* hash key length in bytes */
192 : : Size entrysize; /* total user element size in bytes */
193 : : int64 num_partitions; /* # partitions (must be power of 2), or 0 */
194 : : int64 max_dsize; /* 'dsize' limit if directory is fixed size */
195 : : int64 ssize; /* segment size --- must be power of 2 */
196 : : int sshift; /* segment shift = log2(ssize) */
197 : : int nelem_alloc; /* number of entries to allocate at once */
198 : : bool isfixed; /* if true, don't enlarge */
199 : :
200 : : #ifdef HASH_STATISTICS
201 : :
202 : : /*
203 : : * Count statistics here. NB: stats code doesn't bother with mutex, so
204 : : * counts could be corrupted a bit in a partitioned table.
205 : : */
206 : : uint64 accesses;
207 : : uint64 collisions;
208 : : uint64 expansions;
209 : : #endif
210 : : };
211 : :
212 : : #define IS_PARTITIONED(hctl) ((hctl)->num_partitions != 0)
213 : :
214 : : #define FREELIST_IDX(hctl, hashcode) \
215 : : (IS_PARTITIONED(hctl) ? (hashcode) % NUM_FREELISTS : 0)
216 : :
217 : : /*
218 : : * Top control structure for a hashtable --- in a shared table, each backend
219 : : * has its own copy (OK since no fields change at runtime)
220 : : */
221 : : struct HTAB
222 : : {
223 : : HASHHDR *hctl; /* => shared control information */
224 : : HASHSEGMENT *dir; /* directory of segment starts */
225 : : HashValueFunc hash; /* hash function */
226 : : HashCompareFunc match; /* key comparison function */
227 : : HashCopyFunc keycopy; /* key copying function */
228 : : HashAllocFunc alloc; /* memory allocator */
229 : : MemoryContext hcxt; /* memory context if default allocator used */
230 : : char *tabname; /* table name (for error messages) */
231 : : bool isshared; /* true if table is in shared memory */
232 : :
233 : : /* freezing a shared table isn't allowed, so we can keep state here */
234 : : bool frozen; /* true = no more inserts allowed */
235 : :
236 : : /* We keep local copies of these fixed values to reduce contention */
237 : : Size keysize; /* hash key length in bytes */
238 : : int64 ssize; /* segment size --- must be power of 2 */
239 : : int sshift; /* segment shift = log2(ssize) */
240 : :
241 : : /*
242 : : * In a USE_VALGRIND build, non-shared hashtables keep an slist chain of
243 : : * all the element blocks they have allocated. This pacifies Valgrind,
244 : : * which would otherwise often claim that the element blocks are "possibly
245 : : * lost" for lack of any non-interior pointers to their starts.
246 : : */
247 : : #ifdef USE_VALGRIND
248 : : slist_head element_blocks;
249 : : #endif
250 : : };
251 : :
252 : : /*
253 : : * Key (also entry) part of a HASHELEMENT
254 : : */
255 : : #define ELEMENTKEY(helem) (((char *)(helem)) + MAXALIGN(sizeof(HASHELEMENT)))
256 : :
257 : : /*
258 : : * Obtain element pointer given pointer to key
259 : : */
260 : : #define ELEMENT_FROM_KEY(key) \
261 : : ((HASHELEMENT *) (((char *) (key)) - MAXALIGN(sizeof(HASHELEMENT))))
262 : :
263 : : /*
264 : : * Fast MOD arithmetic, assuming that y is a power of 2 !
265 : : */
266 : : #define MOD(x,y) ((x) & ((y)-1))
267 : :
268 : : /*
269 : : * Private function prototypes
270 : : */
271 : : static void *DynaHashAlloc(Size size);
272 : : static HASHSEGMENT seg_alloc(HTAB *hashp);
273 : : static bool element_alloc(HTAB *hashp, int nelem, int freelist_idx);
274 : : static bool dir_realloc(HTAB *hashp);
275 : : static bool expand_table(HTAB *hashp);
276 : : static HASHBUCKET get_hash_entry(HTAB *hashp, int freelist_idx);
277 : : static void hdefault(HTAB *hashp);
278 : : static int choose_nelem_alloc(Size entrysize);
279 : : static bool init_htab(HTAB *hashp, int64 nelem);
280 : : pg_noreturn static void hash_corrupted(HTAB *hashp);
281 : : static uint32 hash_initial_lookup(HTAB *hashp, uint32 hashvalue,
282 : : HASHBUCKET **bucketptr);
283 : : static int my_log2(int64 num);
284 : : static int64 next_pow2_int64(int64 num);
285 : : static int next_pow2_int(int64 num);
286 : : static void register_seq_scan(HTAB *hashp);
287 : : static void deregister_seq_scan(HTAB *hashp);
288 : : static bool has_seq_scans(HTAB *hashp);
289 : :
290 : :
291 : : /*
292 : : * memory allocation support
293 : : */
294 : : static MemoryContext CurrentDynaHashCxt = NULL;
295 : :
296 : : static void *
9253 tgl@sss.pgh.pa.us 297 :CBC 1439531 : DynaHashAlloc(Size size)
298 : : {
8894 JanWieck@Yahoo.com 299 [ + - - + : 1439531 : Assert(MemoryContextIsValid(CurrentDynaHashCxt));
- - - - -
- ]
1110 tgl@sss.pgh.pa.us 300 : 1439531 : return MemoryContextAllocExtended(CurrentDynaHashCxt, size,
301 : : MCXT_ALLOC_NO_OOM);
302 : : }
303 : :
304 : :
305 : : /*
306 : : * HashCompareFunc for string keys
307 : : *
308 : : * Because we copy keys with strlcpy(), they will be truncated at keysize-1
309 : : * bytes, so we can only compare that many ... hence strncmp is almost but
310 : : * not quite the right thing.
311 : : */
312 : : static int
6971 313 : 483393 : string_compare(const char *key1, const char *key2, Size keysize)
314 : : {
315 : 483393 : return strncmp(key1, key2, keysize - 1);
316 : : }
317 : :
318 : :
319 : : /************************** CREATE ROUTINES **********************/
320 : :
321 : : /*
322 : : * hash_create -- create a new dynamic hash table
323 : : *
324 : : * tabname: a name for the table (for debugging purposes)
325 : : * nelem: maximum number of elements expected
326 : : * *info: additional table parameters, as indicated by flags
327 : : * flags: bitmask indicating which parameters to take from *info
328 : : *
329 : : * The flags value *must* include HASH_ELEM. (Formerly, this was nominally
330 : : * optional, but the default keysize and entrysize values were useless.)
331 : : * The flags value must also include exactly one of HASH_STRINGS, HASH_BLOBS,
332 : : * or HASH_FUNCTION, to define the key hashing semantics (C strings,
333 : : * binary blobs, or custom, respectively). Callers specifying a custom
334 : : * hash function will likely also want to use HASH_COMPARE, and perhaps
335 : : * also HASH_KEYCOPY, to control key comparison and copying.
336 : : * Another often-used flag is HASH_CONTEXT, to allocate the hash table
337 : : * under info->hcxt rather than under TopMemoryContext; the default
338 : : * behavior is only suitable for session-lifespan hash tables.
339 : : * Other flags bits are special-purpose and seldom used, except for those
340 : : * associated with shared-memory hash tables, for which see ShmemInitHash().
341 : : *
342 : : * Fields in *info are read only when the associated flags bit is set.
343 : : * It is not necessary to initialize other fields of *info.
344 : : * Neither tabname nor *info need persist after the hash_create() call.
345 : : *
346 : : * Note: It is deprecated for callers of hash_create() to explicitly specify
347 : : * string_hash, tag_hash, uint32_hash, or oid_hash. Just set HASH_STRINGS or
348 : : * HASH_BLOBS. Use HASH_FUNCTION only when you want something other than
349 : : * one of these.
350 : : *
351 : : * Note: for a shared-memory hashtable, nelem needs to be a pretty good
352 : : * estimate, since we can't expand the table on the fly. But an unshared
353 : : * hashtable can be expanded on-the-fly, so it's better for nelem to be
354 : : * on the small side and let the table grow if it's exceeded. An overly
355 : : * large nelem will penalize hash_seq_search speed without buying much.
356 : : */
357 : : HTAB *
67 michael@paquier.xyz 358 :GNC 340055 : hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)
359 : : {
360 : : HTAB *hashp;
361 : : HASHHDR *hctl;
362 : :
363 : : /*
364 : : * Hash tables now allocate space for key and data, but you have to say
365 : : * how much space to allocate.
366 : : */
1778 tgl@sss.pgh.pa.us 367 [ - + ]:CBC 340055 : Assert(flags & HASH_ELEM);
368 [ - + ]: 340055 : Assert(info->keysize > 0);
369 [ - + ]: 340055 : Assert(info->entrysize >= info->keysize);
370 : :
371 : : /*
372 : : * For shared hash tables, we have a local hash header (HTAB struct) that
373 : : * we allocate in TopMemoryContext; all else is in shared memory.
374 : : *
375 : : * For non-shared hash tables, everything including the hash header is in
376 : : * a memory context created specially for the hash table --- this makes
377 : : * hash_destroy very simple. The memory context is made a child of either
378 : : * a context specified by the caller, or TopMemoryContext if nothing is
379 : : * specified.
380 : : */
7480 381 [ + + ]: 340055 : if (flags & HASH_SHARED_MEM)
382 : : {
383 : : /* Set up to allocate the hash header */
384 : 9448 : CurrentDynaHashCxt = TopMemoryContext;
385 : : }
386 : : else
387 : : {
388 : : /* Create the hash table's private memory context */
389 [ + + ]: 330607 : if (flags & HASH_CONTEXT)
390 : 196516 : CurrentDynaHashCxt = info->hcxt;
391 : : else
392 : 134091 : CurrentDynaHashCxt = TopMemoryContext;
2772 393 : 330607 : CurrentDynaHashCxt = AllocSetContextCreate(CurrentDynaHashCxt,
394 : : "dynahash",
395 : : ALLOCSET_DEFAULT_SIZES);
396 : : }
397 : :
398 : : /* Initialize the hash header, plus a copy of the table name */
188 399 : 340055 : hashp = (HTAB *) MemoryContextAlloc(CurrentDynaHashCxt,
400 : 340055 : sizeof(HTAB) + strlen(tabname) + 1);
10267 bruce@momjian.us 401 [ + - + - : 4420715 : MemSet(hashp, 0, sizeof(HTAB));
+ - + - +
+ ]
402 : :
7480 tgl@sss.pgh.pa.us 403 : 340055 : hashp->tabname = (char *) (hashp + 1);
8789 404 : 340055 : strcpy(hashp->tabname, tabname);
405 : :
406 : : /* If we have a private context, label it with hashtable's name */
2772 407 [ + + ]: 340055 : if (!(flags & HASH_SHARED_MEM))
408 : 330607 : MemoryContextSetIdentifier(CurrentDynaHashCxt, hashp->tabname);
409 : :
410 : : /*
411 : : * Select the appropriate hash function (see comments at head of file).
412 : : */
10278 bruce@momjian.us 413 [ + + ]: 340055 : if (flags & HASH_FUNCTION)
414 : : {
1778 tgl@sss.pgh.pa.us 415 [ - + ]: 13422 : Assert(!(flags & (HASH_BLOBS | HASH_STRINGS)));
10278 bruce@momjian.us 416 : 13422 : hashp->hash = info->hash;
417 : : }
3967 tgl@sss.pgh.pa.us 418 [ + + ]: 326633 : else if (flags & HASH_BLOBS)
419 : : {
1778 420 [ - + ]: 277561 : Assert(!(flags & HASH_STRINGS));
421 : : /* We can optimize hashing for common key sizes */
3967 422 [ + + ]: 277561 : if (info->keysize == sizeof(uint32))
423 : 196178 : hashp->hash = uint32_hash;
424 : : else
425 : 81383 : hashp->hash = tag_hash;
426 : : }
427 : : else
428 : : {
429 : : /*
430 : : * string_hash used to be considered the default hash method, and in a
431 : : * non-assert build it effectively still is. But we now consider it
432 : : * an assertion error to not say HASH_STRINGS explicitly. To help
433 : : * catch mistaken usage of HASH_STRINGS, we also insist on a
434 : : * reasonably long string length: if the keysize is only 4 or 8 bytes,
435 : : * it's almost certainly an integer or pointer not a string.
436 : : */
1778 437 [ - + ]: 49072 : Assert(flags & HASH_STRINGS);
438 [ - + ]: 49072 : Assert(info->keysize > 8);
439 : :
440 : 49072 : hashp->hash = string_hash;
441 : : }
442 : :
443 : : /*
444 : : * If you don't specify a match function, it defaults to string_compare if
445 : : * you used string_hash, and to memcmp otherwise.
446 : : *
447 : : * Note: explicitly specifying string_hash is deprecated, because this
448 : : * might not work for callers in loadable modules on some platforms due to
449 : : * referencing a trampoline instead of the string_hash function proper.
450 : : * Specify HASH_STRINGS instead.
451 : : */
8106 452 [ + + ]: 340055 : if (flags & HASH_COMPARE)
453 : 7155 : hashp->match = info->match;
454 [ + + ]: 332900 : else if (hashp->hash == string_hash)
6971 455 : 49072 : hashp->match = (HashCompareFunc) string_compare;
456 : : else
8106 457 : 283828 : hashp->match = memcmp;
458 : :
459 : : /*
460 : : * Similarly, the key-copying function defaults to strlcpy or memcpy.
461 : : */
7437 462 [ - + ]: 340055 : if (flags & HASH_KEYCOPY)
7437 tgl@sss.pgh.pa.us 463 :UBC 0 : hashp->keycopy = info->keycopy;
7437 tgl@sss.pgh.pa.us 464 [ + + ]:CBC 340055 : else if (hashp->hash == string_hash)
465 : : {
466 : : /*
467 : : * The signature of keycopy is meant for memcpy(), which returns
468 : : * void*, but strlcpy() returns size_t. Since we never use the return
469 : : * value of keycopy, and size_t is pretty much always the same size as
470 : : * void *, this should be safe. The extra cast in the middle is to
471 : : * avoid warnings from -Wcast-function-type.
472 : : */
1932 peter@eisentraut.org 473 : 49072 : hashp->keycopy = (HashCopyFunc) (pg_funcptr_t) strlcpy;
474 : : }
475 : : else
7437 tgl@sss.pgh.pa.us 476 : 290983 : hashp->keycopy = memcpy;
477 : :
478 : : /* And select the entry allocation function, too. */
7480 479 [ + + ]: 340055 : if (flags & HASH_ALLOC)
480 : 9448 : hashp->alloc = info->alloc;
481 : : else
482 : 330607 : hashp->alloc = DynaHashAlloc;
483 : :
10278 bruce@momjian.us 484 [ + + ]: 340055 : if (flags & HASH_SHARED_MEM)
485 : : {
486 : : /*
487 : : * ctl structure and directory are preallocated for shared memory
488 : : * tables. Note that HASH_DIRSIZE and HASH_ALLOC had better be set as
489 : : * well.
490 : : */
8793 tgl@sss.pgh.pa.us 491 : 9448 : hashp->hctl = info->hctl;
7038 492 : 9448 : hashp->dir = (HASHSEGMENT *) (((char *) info->hctl) + sizeof(HASHHDR));
8894 JanWieck@Yahoo.com 493 : 9448 : hashp->hcxt = NULL;
8789 tgl@sss.pgh.pa.us 494 : 9448 : hashp->isshared = true;
495 : :
496 : : /* hash table already exists, we're just attaching to it */
10278 bruce@momjian.us 497 [ - + ]: 9448 : if (flags & HASH_ATTACH)
498 : : {
499 : : /* make local copies of some heavily-used values */
7038 tgl@sss.pgh.pa.us 500 :UBC 0 : hctl = hashp->hctl;
501 : 0 : hashp->keysize = hctl->keysize;
502 : 0 : hashp->ssize = hctl->ssize;
503 : 0 : hashp->sshift = hctl->sshift;
504 : :
9919 bruce@momjian.us 505 : 0 : return hashp;
506 : : }
507 : : }
508 : : else
509 : : {
510 : : /* setup hash table defaults */
9745 tgl@sss.pgh.pa.us 511 :CBC 330607 : hashp->hctl = NULL;
10278 bruce@momjian.us 512 : 330607 : hashp->dir = NULL;
8106 tgl@sss.pgh.pa.us 513 : 330607 : hashp->hcxt = CurrentDynaHashCxt;
8789 514 : 330607 : hashp->isshared = false;
515 : : }
516 : :
10278 bruce@momjian.us 517 [ + + ]: 340055 : if (!hashp->hctl)
518 : : {
207 tomas.vondra@postgre 519 : 330607 : hashp->hctl = (HASHHDR *) hashp->alloc(sizeof(HASHHDR));
10278 bruce@momjian.us 520 [ - + ]: 330607 : if (!hashp->hctl)
7673 neilc@samurai.com 521 [ # # ]:UBC 0 : ereport(ERROR,
522 : : (errcode(ERRCODE_OUT_OF_MEMORY),
523 : : errmsg("out of memory")));
524 : : }
525 : :
6760 tgl@sss.pgh.pa.us 526 :CBC 340055 : hashp->frozen = false;
527 : :
7676 neilc@samurai.com 528 : 340055 : hdefault(hashp);
529 : :
10278 bruce@momjian.us 530 : 340055 : hctl = hashp->hctl;
531 : :
7038 tgl@sss.pgh.pa.us 532 [ + + ]: 340055 : if (flags & HASH_PARTITION)
533 : : {
534 : : /* Doesn't make sense to partition a local hash table */
535 [ - + ]: 5245 : Assert(flags & HASH_SHARED_MEM);
536 : :
537 : : /*
538 : : * The number of partitions had better be a power of 2. Also, it must
539 : : * be less than INT_MAX (see init_htab()), so call the int version of
540 : : * next_pow2.
541 : : */
4704 542 [ - + ]: 5245 : Assert(info->num_partitions == next_pow2_int(info->num_partitions));
543 : :
7038 544 : 5245 : hctl->num_partitions = info->num_partitions;
545 : : }
546 : :
10278 bruce@momjian.us 547 [ - + ]: 340055 : if (flags & HASH_SEGMENT)
548 : : {
10278 bruce@momjian.us 549 :UBC 0 : hctl->ssize = info->ssize;
550 : 0 : hctl->sshift = my_log2(info->ssize);
551 : : /* ssize had better be a power of 2 */
9647 tgl@sss.pgh.pa.us 552 [ # # ]: 0 : Assert(hctl->ssize == (1L << hctl->sshift));
553 : : }
554 : :
555 : : /*
556 : : * SHM hash tables have fixed directory size passed by the caller.
557 : : */
10278 bruce@momjian.us 558 [ + + ]:CBC 340055 : if (flags & HASH_DIRSIZE)
559 : : {
9745 tgl@sss.pgh.pa.us 560 : 9448 : hctl->max_dsize = info->max_dsize;
561 : 9448 : hctl->dsize = info->dsize;
562 : : }
563 : :
564 : : /* remember the entry sizes, too */
1778 565 : 340055 : hctl->keysize = info->keysize;
566 : 340055 : hctl->entrysize = info->entrysize;
567 : :
568 : : /* make local copies of heavily-used constant fields */
7038 569 : 340055 : hashp->keysize = hctl->keysize;
570 : 340055 : hashp->ssize = hctl->ssize;
571 : 340055 : hashp->sshift = hctl->sshift;
572 : :
573 : : /* Build the hash directory structure */
8793 574 [ - + ]: 340055 : if (!init_htab(hashp, nelem))
6622 tgl@sss.pgh.pa.us 575 [ # # ]:UBC 0 : elog(ERROR, "failed to initialize hash table \"%s\"", hashp->tabname);
576 : :
577 : : /*
578 : : * For a shared hash table, preallocate the requested number of elements.
579 : : * This reduces problems with run-time out-of-shared-memory conditions.
580 : : *
581 : : * For a non-shared hash table, preallocate the requested number of
582 : : * elements if it's less than our chosen nelem_alloc. This avoids wasting
583 : : * space if the caller correctly estimates a small table size.
584 : : */
7065 tgl@sss.pgh.pa.us 585 [ + + ]:CBC 340055 : if ((flags & HASH_SHARED_MEM) ||
586 [ + + ]: 330607 : nelem < hctl->nelem_alloc)
587 : : {
588 : : int i,
589 : : freelist_partitions,
590 : : nelem_alloc,
591 : : nelem_alloc_first;
592 : :
593 : : /*
594 : : * If hash table is partitioned, give each freelist an equal share of
595 : : * the initial allocation. Otherwise only freeList[0] is used.
596 : : */
3506 rhaas@postgresql.org 597 [ + + ]: 120601 : if (IS_PARTITIONED(hashp->hctl))
598 : 5245 : freelist_partitions = NUM_FREELISTS;
599 : : else
600 : 115356 : freelist_partitions = 1;
601 : :
602 : 120601 : nelem_alloc = nelem / freelist_partitions;
3020 tgl@sss.pgh.pa.us 603 [ - + ]: 120601 : if (nelem_alloc <= 0)
3506 rhaas@postgresql.org 604 :UBC 0 : nelem_alloc = 1;
605 : :
606 : : /*
607 : : * Make sure we'll allocate all the requested elements; freeList[0]
608 : : * gets the excess if the request isn't divisible by NUM_FREELISTS.
609 : : */
3506 rhaas@postgresql.org 610 [ + + ]:CBC 120601 : if (nelem_alloc * freelist_partitions < nelem)
611 : 52 : nelem_alloc_first =
612 : 52 : nelem - nelem_alloc * (freelist_partitions - 1);
613 : : else
614 : 120549 : nelem_alloc_first = nelem_alloc;
615 : :
616 [ + + ]: 403797 : for (i = 0; i < freelist_partitions; i++)
617 : : {
618 [ + + ]: 283196 : int temp = (i == 0) ? nelem_alloc_first : nelem_alloc;
619 : :
207 tomas.vondra@postgre 620 [ - + ]: 283196 : if (!element_alloc(hashp, temp, i))
207 tomas.vondra@postgre 621 [ # # ]:UBC 0 : ereport(ERROR,
622 : : (errcode(ERRCODE_OUT_OF_MEMORY),
623 : : errmsg("out of memory")));
624 : : }
625 : : }
626 : :
627 : : /* Set isfixed if requested, but not till after we build initial entries */
5314 heikki.linnakangas@i 628 [ + + ]:CBC 340055 : if (flags & HASH_FIXED_SIZE)
95 tgl@sss.pgh.pa.us 629 :GNC 4196 : hctl->isfixed = true;
630 : :
9919 bruce@momjian.us 631 :CBC 340055 : return hashp;
632 : : }
633 : :
634 : : /*
635 : : * Set default HASHHDR parameters.
636 : : */
637 : : static void
10277 638 : 340055 : hdefault(HTAB *hashp)
639 : : {
8769 640 : 340055 : HASHHDR *hctl = hashp->hctl;
641 : :
8793 tgl@sss.pgh.pa.us 642 [ + - + - : 36725940 : MemSet(hctl, 0, sizeof(HASHHDR));
+ - + - +
+ ]
643 : :
7038 644 : 340055 : hctl->dsize = DEF_DIRSIZE;
10278 bruce@momjian.us 645 : 340055 : hctl->nsegs = 0;
646 : :
7038 tgl@sss.pgh.pa.us 647 : 340055 : hctl->num_partitions = 0; /* not partitioned */
648 : :
649 : : /* table has no fixed maximum size */
10278 bruce@momjian.us 650 : 340055 : hctl->max_dsize = NO_MAX_DSIZE;
651 : :
7038 tgl@sss.pgh.pa.us 652 : 340055 : hctl->ssize = DEF_SEGSIZE;
653 : 340055 : hctl->sshift = DEF_SEGSIZE_SHIFT;
654 : :
95 tgl@sss.pgh.pa.us 655 :GNC 340055 : hctl->isfixed = false; /* can be enlarged */
656 : :
657 : : #ifdef HASH_STATISTICS
658 : : hctl->accesses = hctl->collisions = hctl->expansions = 0;
659 : : #endif
10703 scrappy@hub.org 660 :CBC 340055 : }
661 : :
662 : : /*
663 : : * Given the user-specified entry size, choose nelem_alloc, ie, how many
664 : : * elements to add to the hash table when we need more.
665 : : */
666 : : static int
7065 tgl@sss.pgh.pa.us 667 : 357603 : choose_nelem_alloc(Size entrysize)
668 : : {
669 : : int nelem_alloc;
670 : : Size elementSize;
671 : : Size allocSize;
672 : :
673 : : /* Each element has a HASHELEMENT header plus user data. */
674 : : /* NB: this had better match element_alloc() */
675 : 357603 : elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize);
676 : :
677 : : /*
678 : : * The idea here is to choose nelem_alloc at least 32, but round up so
679 : : * that the allocation request will be a power of 2 or just less. This
680 : : * makes little difference for hash tables in shared memory, but for hash
681 : : * tables managed by palloc, the allocation request will be rounded up to
682 : : * a power of 2 anyway. If we fail to take this into account, we'll waste
683 : : * as much as half the allocated space.
684 : : */
685 : 357603 : allocSize = 32 * 4; /* assume elementSize at least 8 */
686 : : do
687 : : {
688 : 1366363 : allocSize <<= 1;
689 : 1366363 : nelem_alloc = allocSize / elementSize;
690 [ + + ]: 1366363 : } while (nelem_alloc < 32);
691 : :
692 : 357603 : return nelem_alloc;
693 : : }
694 : :
695 : : /*
696 : : * Compute derived fields of hctl and build the initial directory/segment
697 : : * arrays
698 : : */
699 : : static bool
67 michael@paquier.xyz 700 :GNC 340055 : init_htab(HTAB *hashp, int64 nelem)
701 : : {
8769 bruce@momjian.us 702 :CBC 340055 : HASHHDR *hctl = hashp->hctl;
703 : : HASHSEGMENT *segp;
704 : : int nbuckets;
705 : : int nsegs;
706 : : int i;
707 : :
708 : : /*
709 : : * initialize mutexes if it's a partitioned table
710 : : */
7038 tgl@sss.pgh.pa.us 711 [ + + ]: 340055 : if (IS_PARTITIONED(hctl))
3506 rhaas@postgresql.org 712 [ + + ]: 173085 : for (i = 0; i < NUM_FREELISTS; i++)
713 : 167840 : SpinLockInit(&(hctl->freeList[i].mutex));
714 : :
715 : : /*
716 : : * Allocate space for the next greater power of two number of buckets,
717 : : * assuming a desired maximum load factor of 1.
718 : : */
207 tomas.vondra@postgre 719 : 340055 : nbuckets = next_pow2_int(nelem);
720 : :
721 : : /*
722 : : * In a partitioned table, nbuckets must be at least equal to
723 : : * num_partitions; were it less, keys with apparently different partition
724 : : * numbers would map to the same bucket, breaking partition independence.
725 : : * (Normally nbuckets will be much bigger; this is just a safety check.)
726 : : */
727 [ - + ]: 340055 : while (nbuckets < hctl->num_partitions)
207 tomas.vondra@postgre 728 :UBC 0 : nbuckets <<= 1;
729 : :
10278 bruce@momjian.us 730 :CBC 340055 : hctl->max_bucket = hctl->low_mask = nbuckets - 1;
731 : 340055 : hctl->high_mask = (nbuckets << 1) - 1;
732 : :
733 : : /*
734 : : * Figure number of directory segments needed, round up to a power of 2
735 : : */
207 tomas.vondra@postgre 736 : 340055 : nsegs = (nbuckets - 1) / hctl->ssize + 1;
737 : 340055 : nsegs = next_pow2_int(nsegs);
738 : :
739 : : /*
740 : : * Make sure directory is big enough. If pre-allocated directory is too
741 : : * small, choke (caller screwed up).
742 : : */
9745 tgl@sss.pgh.pa.us 743 [ - + ]: 340055 : if (nsegs > hctl->dsize)
744 : : {
9745 tgl@sss.pgh.pa.us 745 [ # # ]:UBC 0 : if (!(hashp->dir))
746 : 0 : hctl->dsize = nsegs;
747 : : else
8793 748 : 0 : return false;
749 : : }
750 : :
751 : : /* Allocate a directory */
10278 bruce@momjian.us 752 [ + + ]:CBC 340055 : if (!(hashp->dir))
753 : : {
8894 JanWieck@Yahoo.com 754 : 330607 : CurrentDynaHashCxt = hashp->hcxt;
207 tomas.vondra@postgre 755 : 330607 : hashp->dir = (HASHSEGMENT *)
756 : 330607 : hashp->alloc(hctl->dsize * sizeof(HASHSEGMENT));
757 [ - + ]: 330607 : if (!hashp->dir)
207 tomas.vondra@postgre 758 :UBC 0 : return false;
759 : : }
760 : :
761 : : /* Allocate initial segments */
10278 bruce@momjian.us 762 [ + + ]:CBC 967588 : for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++)
763 : : {
207 tomas.vondra@postgre 764 : 627533 : *segp = seg_alloc(hashp);
765 [ - + ]: 627533 : if (*segp == NULL)
207 tomas.vondra@postgre 766 :UBC 0 : return false;
767 : : }
768 : :
769 : : /* Choose number of entries to allocate at a time */
207 tomas.vondra@postgre 770 :CBC 340055 : hctl->nelem_alloc = choose_nelem_alloc(hctl->entrysize);
771 : :
8793 tgl@sss.pgh.pa.us 772 : 340055 : return true;
773 : : }
774 : :
775 : : /*
776 : : * Estimate the space needed for a hashtable containing the given number
777 : : * of entries of given size.
778 : : * NOTE: this is used to estimate the footprint of hashtables in shared
779 : : * memory; therefore it does not count HTAB which is in local memory.
780 : : * NB: assumes that all hash structure parameters have default values!
781 : : */
782 : : Size
67 michael@paquier.xyz 783 :GNC 17548 : hash_estimate_size(int64 num_entries, Size entrysize)
784 : : {
785 : : Size size;
786 : : int64 nBuckets,
787 : : nSegments,
788 : : nDirEntries,
789 : : nElementAllocs,
790 : : elementSize,
791 : : elementAllocCnt;
792 : :
793 : : /* estimate number of buckets wanted */
794 : 17548 : nBuckets = next_pow2_int64(num_entries);
795 : : /* # of segments needed for nBuckets */
796 : 17548 : nSegments = next_pow2_int64((nBuckets - 1) / DEF_SEGSIZE + 1);
797 : : /* directory entries */
9733 tgl@sss.pgh.pa.us 798 :CBC 17548 : nDirEntries = DEF_DIRSIZE;
799 [ - + ]: 17548 : while (nDirEntries < nSegments)
9733 tgl@sss.pgh.pa.us 800 :UBC 0 : nDirEntries <<= 1; /* dir_alloc doubles dsize at each call */
801 : :
802 : : /* fixed control info */
7374 tgl@sss.pgh.pa.us 803 :CBC 17548 : size = MAXALIGN(sizeof(HASHHDR)); /* but not HTAB, per above */
804 : : /* directory */
805 : 17548 : size = add_size(size, mul_size(nDirEntries, sizeof(HASHSEGMENT)));
806 : : /* segments */
807 : 17548 : size = add_size(size, mul_size(nSegments,
808 : : MAXALIGN(DEF_SEGSIZE * sizeof(HASHBUCKET))));
809 : : /* elements --- allocated in groups of choose_nelem_alloc() entries */
7065 810 : 17548 : elementAllocCnt = choose_nelem_alloc(entrysize);
7429 811 : 17548 : nElementAllocs = (num_entries - 1) / elementAllocCnt + 1;
7065 812 : 17548 : elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize);
7374 813 : 17548 : size = add_size(size,
814 : : mul_size(nElementAllocs,
815 : : mul_size(elementAllocCnt, elementSize)));
816 : :
9745 817 : 17548 : return size;
818 : : }
819 : :
820 : : /*
821 : : * Select an appropriate directory size for a hashtable with the given
822 : : * maximum number of entries.
823 : : * This is only needed for hashtables in shared memory, whose directories
824 : : * cannot be expanded dynamically.
825 : : * NB: assumes that all hash structure parameters have default values!
826 : : *
827 : : * XXX this had better agree with the behavior of init_htab()...
828 : : */
829 : : int64
67 michael@paquier.xyz 830 :GNC 9448 : hash_select_dirsize(int64 num_entries)
831 : : {
832 : : int64 nBuckets,
833 : : nSegments,
834 : : nDirEntries;
835 : :
836 : : /* estimate number of buckets wanted */
837 : 9448 : nBuckets = next_pow2_int64(num_entries);
838 : : /* # of segments needed for nBuckets */
839 : 9448 : nSegments = next_pow2_int64((nBuckets - 1) / DEF_SEGSIZE + 1);
840 : : /* directory entries */
9376 tgl@sss.pgh.pa.us 841 :CBC 9448 : nDirEntries = DEF_DIRSIZE;
842 [ - + ]: 9448 : while (nDirEntries < nSegments)
9376 tgl@sss.pgh.pa.us 843 :UBC 0 : nDirEntries <<= 1; /* dir_alloc doubles dsize at each call */
844 : :
9376 tgl@sss.pgh.pa.us 845 :CBC 9448 : return nDirEntries;
846 : : }
847 : :
848 : : /*
849 : : * Compute the required initial memory allocation for a shared-memory
850 : : * hashtable with the given parameters. We need space for the HASHHDR
851 : : * and for the (non expansible) directory.
852 : : */
853 : : Size
207 tomas.vondra@postgre 854 : 9448 : hash_get_shared_size(HASHCTL *info, int flags)
855 : : {
856 [ - + ]: 9448 : Assert(flags & HASH_DIRSIZE);
857 [ - + ]: 9448 : Assert(info->dsize == info->max_dsize);
858 : 9448 : return sizeof(HASHHDR) + info->dsize * sizeof(HASHSEGMENT);
859 : : }
860 : :
861 : :
862 : : /********************** DESTROY ROUTINES ************************/
863 : :
864 : : void
10277 bruce@momjian.us 865 : 61246 : hash_destroy(HTAB *hashp)
866 : : {
10278 867 [ + - ]: 61246 : if (hashp != NULL)
868 : : {
869 : : /* allocation method must be one we know how to free, too */
7480 tgl@sss.pgh.pa.us 870 [ - + ]: 61246 : Assert(hashp->alloc == DynaHashAlloc);
871 : : /* so this hashtable must have its own context */
8894 JanWieck@Yahoo.com 872 [ - + ]: 61246 : Assert(hashp->hcxt != NULL);
873 : :
70 drowley@postgresql.o 874 :GNC 61246 : hash_stats(__func__, hashp);
875 : :
876 : : /*
877 : : * Free everything by destroying the hash table's memory context.
878 : : */
8894 JanWieck@Yahoo.com 879 :CBC 61246 : MemoryContextDelete(hashp->hcxt);
880 : : }
10703 scrappy@hub.org 881 : 61246 : }
882 : :
883 : : void
70 drowley@postgresql.o 884 :GNC 61246 : hash_stats(const char *caller, HTAB *hashp)
885 : : {
886 : : #ifdef HASH_STATISTICS
887 : : HASHHDR *hctl = hashp->hctl;
888 : :
889 : : elog(DEBUG4,
890 : : "hash_stats: Caller: %s Table Name: \"%s\" Accesses: " UINT64_FORMAT " Collisions: " UINT64_FORMAT " Expansions: " UINT64_FORMAT " Entries: " INT64_FORMAT " Key Size: %zu Max Bucket: %u Segment Count: " INT64_FORMAT,
891 : : caller != NULL ? caller : "(unknown)", hashp->tabname, hctl->accesses,
892 : : hctl->collisions, hctl->expansions, hash_get_num_entries(hashp),
893 : : hctl->keysize, hctl->max_bucket, hctl->nsegs);
894 : : #endif
10703 scrappy@hub.org 895 :CBC 61246 : }
896 : :
897 : : /*******************************SEARCH ROUTINES *****************************/
898 : :
899 : :
900 : : /*
901 : : * get_hash_value -- exported routine to calculate a key's hash value
902 : : *
903 : : * We export this because for partitioned tables, callers need to compute
904 : : * the partition number (from the low-order bits of the hash value) before
905 : : * searching.
906 : : */
907 : : uint32
7038 tgl@sss.pgh.pa.us 908 : 75022363 : get_hash_value(HTAB *hashp, const void *keyPtr)
909 : : {
910 : 75022363 : return hashp->hash(keyPtr, hashp->keysize);
911 : : }
912 : :
913 : : /* Convert a hash value to a bucket number */
914 : : static inline uint32
8106 915 : 181508368 : calc_bucket(HASHHDR *hctl, uint32 hash_val)
916 : : {
917 : : uint32 bucket;
918 : :
10278 bruce@momjian.us 919 : 181508368 : bucket = hash_val & hctl->high_mask;
920 [ + + ]: 181508368 : if (bucket > hctl->max_bucket)
921 : 87138078 : bucket = bucket & hctl->low_mask;
922 : :
8634 tgl@sss.pgh.pa.us 923 : 181508368 : return bucket;
924 : : }
925 : :
926 : : /*
927 : : * hash_search -- look up key in table and perform action
928 : : * hash_search_with_hash_value -- same, with key's hash value already computed
929 : : *
930 : : * action is one of:
931 : : * HASH_FIND: look up key in table
932 : : * HASH_ENTER: look up key in table, creating entry if not present
933 : : * HASH_ENTER_NULL: same, but return NULL if out of memory
934 : : * HASH_REMOVE: look up key in table, remove entry if present
935 : : *
936 : : * Return value is a pointer to the element found/entered/removed if any,
937 : : * or NULL if no match was found. (NB: in the case of the REMOVE action,
938 : : * the result is a dangling pointer that shouldn't be dereferenced!)
939 : : *
940 : : * HASH_ENTER will normally ereport a generic "out of memory" error if
941 : : * it is unable to create a new entry. The HASH_ENTER_NULL operation is
942 : : * the same except it will return NULL if out of memory.
943 : : *
944 : : * If foundPtr isn't NULL, then *foundPtr is set true if we found an
945 : : * existing entry in the table, false otherwise. This is needed in the
946 : : * HASH_ENTER case, but is redundant with the return value otherwise.
947 : : *
948 : : * For hash_search_with_hash_value, the hashvalue parameter must have been
949 : : * calculated with get_hash_value().
950 : : */
951 : : void *
10277 bruce@momjian.us 952 : 112654615 : hash_search(HTAB *hashp,
953 : : const void *keyPtr,
954 : : HASHACTION action,
955 : : bool *foundPtr)
956 : : {
7038 tgl@sss.pgh.pa.us 957 : 112654615 : return hash_search_with_hash_value(hashp,
958 : : keyPtr,
959 : 112654615 : hashp->hash(keyPtr, hashp->keysize),
960 : : action,
961 : : foundPtr);
962 : : }
963 : :
964 : : void *
965 : 180149476 : hash_search_with_hash_value(HTAB *hashp,
966 : : const void *keyPtr,
967 : : uint32 hashvalue,
968 : : HASHACTION action,
969 : : bool *foundPtr)
970 : : {
8769 bruce@momjian.us 971 : 180149476 : HASHHDR *hctl = hashp->hctl;
3020 tgl@sss.pgh.pa.us 972 [ + + ]: 180149476 : int freelist_idx = FREELIST_IDX(hctl, hashvalue);
973 : : Size keysize;
974 : : HASHBUCKET currBucket;
975 : : HASHBUCKET *prevBucketPtr;
976 : : HashCompareFunc match;
977 : :
978 : : #ifdef HASH_STATISTICS
979 : : hctl->accesses++;
980 : : #endif
981 : :
982 : : /*
983 : : * If inserting, check if it is time to split a bucket.
984 : : *
985 : : * NOTE: failure to expand table is not a fatal error, it just means we
986 : : * have to run at higher fill factor than we wanted. However, if we're
987 : : * using the palloc allocator then it will throw error anyway on
988 : : * out-of-memory, so we must do this before modifying the table.
989 : : */
4757 990 [ + + + + ]: 180149476 : if (action == HASH_ENTER || action == HASH_ENTER_NULL)
991 : : {
992 : : /*
993 : : * Can't split if running in partitioned mode, nor if frozen, nor if
994 : : * table is the subject of any active hash_seq_search scans.
995 : : */
67 michael@paquier.xyz 996 [ + + ]:GNC 46417967 : if (hctl->freeList[0].nentries > (int64) hctl->max_bucket &&
1865 tmunro@postgresql.or 997 [ + - + - ]:CBC 361761 : !IS_PARTITIONED(hctl) && !hashp->frozen &&
4757 tgl@sss.pgh.pa.us 998 [ + + ]: 361761 : !has_seq_scans(hashp))
999 : 361300 : (void) expand_table(hashp);
1000 : : }
1001 : :
1002 : : /*
1003 : : * Do the initial lookup
1004 : : */
592 michael@paquier.xyz 1005 : 180149476 : (void) hash_initial_lookup(hashp, hashvalue, &prevBucketPtr);
7457 tgl@sss.pgh.pa.us 1006 : 180149476 : currBucket = *prevBucketPtr;
1007 : :
1008 : : /*
1009 : : * Follow collision chain looking for matching key
1010 : : */
1011 : 180149476 : match = hashp->match; /* save one fetch in inner loop */
7038 1012 : 180149476 : keysize = hashp->keysize; /* ditto */
1013 : :
7457 1014 [ + + ]: 216667722 : while (currBucket != NULL)
1015 : : {
1016 [ + + + + ]: 321839248 : if (currBucket->hashvalue == hashvalue &&
1017 : 142663041 : match(ELEMENTKEY(currBucket), keyPtr, keysize) == 0)
1018 : 142657961 : break;
1019 : 36518246 : prevBucketPtr = &(currBucket->link);
8793 1020 : 36518246 : currBucket = *prevBucketPtr;
1021 : : #ifdef HASH_STATISTICS
1022 : : hctl->collisions++;
1023 : : #endif
1024 : : }
1025 : :
8789 1026 [ + + ]: 180149476 : if (foundPtr)
1027 : 47759519 : *foundPtr = (bool) (currBucket != NULL);
1028 : :
1029 : : /*
1030 : : * OK, now what?
1031 : : */
10278 bruce@momjian.us 1032 [ + + + - ]: 180149476 : switch (action)
1033 : : {
8789 tgl@sss.pgh.pa.us 1034 : 110245040 : case HASH_FIND:
8793 1035 [ + + ]: 110245040 : if (currBucket != NULL)
334 peter@eisentraut.org 1036 : 100752538 : return ELEMENTKEY(currBucket);
8789 tgl@sss.pgh.pa.us 1037 : 9492502 : return NULL;
1038 : :
10277 bruce@momjian.us 1039 : 23486469 : case HASH_REMOVE:
8793 tgl@sss.pgh.pa.us 1040 [ + + ]: 23486469 : if (currBucket != NULL)
1041 : : {
1042 : : /* if partitioned, must lock to touch nentries and freeList */
3665 rhaas@postgresql.org 1043 [ + + ]: 23482918 : if (IS_PARTITIONED(hctl))
3506 1044 [ + + ]: 5078610 : SpinLockAcquire(&(hctl->freeList[freelist_idx].mutex));
1045 : :
1046 : : /* delete the record from the appropriate nentries counter. */
1047 [ - + ]: 23482918 : Assert(hctl->freeList[freelist_idx].nentries > 0);
1048 : 23482918 : hctl->freeList[freelist_idx].nentries--;
1049 : :
1050 : : /* remove record from hash bucket's chain. */
8793 tgl@sss.pgh.pa.us 1051 : 23482918 : *prevBucketPtr = currBucket->link;
1052 : :
1053 : : /* add the record to the appropriate freelist. */
3506 rhaas@postgresql.org 1054 : 23482918 : currBucket->link = hctl->freeList[freelist_idx].freeList;
1055 : 23482918 : hctl->freeList[freelist_idx].freeList = currBucket;
1056 : :
3665 1057 [ + + ]: 23482918 : if (IS_PARTITIONED(hctl))
3506 1058 : 5078610 : SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1059 : :
1060 : : /*
1061 : : * better hope the caller is synchronizing access to this
1062 : : * element, because someone else is going to reuse it the next
1063 : : * time something is added to the table
1064 : : */
334 peter@eisentraut.org 1065 : 23482918 : return ELEMENTKEY(currBucket);
1066 : : }
8789 tgl@sss.pgh.pa.us 1067 : 3551 : return NULL;
1068 : :
1069 : 46417967 : case HASH_ENTER:
1070 : : case HASH_ENTER_NULL:
1071 : : /* Return existing element if found, else create one */
8793 1072 [ + + ]: 46417967 : if (currBucket != NULL)
334 peter@eisentraut.org 1073 : 18422505 : return ELEMENTKEY(currBucket);
1074 : :
1075 : : /* disallow inserts if frozen */
6760 tgl@sss.pgh.pa.us 1076 [ - + ]: 27995462 : if (hashp->frozen)
6622 tgl@sss.pgh.pa.us 1077 [ # # ]:UBC 0 : elog(ERROR, "cannot insert into frozen hashtable \"%s\"",
1078 : : hashp->tabname);
1079 : :
3506 rhaas@postgresql.org 1080 :CBC 27995462 : currBucket = get_hash_entry(hashp, freelist_idx);
8789 tgl@sss.pgh.pa.us 1081 [ - + ]: 27995462 : if (currBucket == NULL)
1082 : : {
1083 : : /* out of memory */
7038 tgl@sss.pgh.pa.us 1084 [ # # ]:UBC 0 : if (action == HASH_ENTER_NULL)
1085 : 0 : return NULL;
1086 : : /* report a generic message */
1087 [ # # ]: 0 : if (hashp->isshared)
1088 [ # # ]: 0 : ereport(ERROR,
1089 : : (errcode(ERRCODE_OUT_OF_MEMORY),
1090 : : errmsg("out of shared memory")));
1091 : : else
1092 [ # # ]: 0 : ereport(ERROR,
1093 : : (errcode(ERRCODE_OUT_OF_MEMORY),
1094 : : errmsg("out of memory")));
1095 : : }
1096 : :
1097 : : /* link into hashbucket chain */
8789 tgl@sss.pgh.pa.us 1098 :CBC 27995462 : *prevBucketPtr = currBucket;
1099 : 27995462 : currBucket->link = NULL;
1100 : :
1101 : : /* copy key into record */
8106 1102 : 27995462 : currBucket->hashvalue = hashvalue;
7437 1103 : 27995462 : hashp->keycopy(ELEMENTKEY(currBucket), keyPtr, keysize);
1104 : :
1105 : : /*
1106 : : * Caller is expected to fill the data field on return. DO NOT
1107 : : * insert any code that could possibly throw error here, as doing
1108 : : * so would leave the table entry incomplete and hence corrupt the
1109 : : * caller's data structure.
1110 : : */
1111 : :
334 peter@eisentraut.org 1112 : 27995462 : return ELEMENTKEY(currBucket);
1113 : : }
1114 : :
8131 tgl@sss.pgh.pa.us 1115 [ # # ]:UBC 0 : elog(ERROR, "unrecognized hash action code: %d", (int) action);
1116 : :
1117 : : return NULL; /* keep compiler quiet */
1118 : : }
1119 : :
1120 : : /*
1121 : : * hash_update_hash_key -- change the hash key of an existing table entry
1122 : : *
1123 : : * This is equivalent to removing the entry, making a new entry, and copying
1124 : : * over its data, except that the entry never goes to the table's freelist.
1125 : : * Therefore this cannot suffer an out-of-memory failure, even if there are
1126 : : * other processes operating in other partitions of the hashtable.
1127 : : *
1128 : : * Returns true if successful, false if the requested new hash key is already
1129 : : * present. Throws error if the specified entry pointer isn't actually a
1130 : : * table member.
1131 : : *
1132 : : * NB: currently, there is no special case for old and new hash keys being
1133 : : * identical, which means we'll report false for that situation. This is
1134 : : * preferable for existing uses.
1135 : : *
1136 : : * NB: for a partitioned hashtable, caller must hold lock on both relevant
1137 : : * partitions, if the new hash key would belong to a different partition.
1138 : : */
1139 : : bool
4671 tgl@sss.pgh.pa.us 1140 :CBC 680 : hash_update_hash_key(HTAB *hashp,
1141 : : void *existingEntry,
1142 : : const void *newKeyPtr)
1143 : : {
1144 : 680 : HASHELEMENT *existingElement = ELEMENT_FROM_KEY(existingEntry);
1145 : : uint32 newhashvalue;
1146 : : Size keysize;
1147 : : uint32 bucket;
1148 : : uint32 newbucket;
1149 : : HASHBUCKET currBucket;
1150 : : HASHBUCKET *prevBucketPtr;
1151 : : HASHBUCKET *oldPrevPtr;
1152 : : HashCompareFunc match;
1153 : :
1154 : : #ifdef HASH_STATISTICS
1155 : : HASHHDR *hctl = hashp->hctl;
1156 : :
1157 : : hctl->accesses++;
1158 : : #endif
1159 : :
1160 : : /* disallow updates if frozen */
1161 [ - + ]: 680 : if (hashp->frozen)
4671 tgl@sss.pgh.pa.us 1162 [ # # ]:UBC 0 : elog(ERROR, "cannot update in frozen hashtable \"%s\"",
1163 : : hashp->tabname);
1164 : :
1165 : : /*
1166 : : * Lookup the existing element using its saved hash value. We need to do
1167 : : * this to be able to unlink it from its hash chain, but as a side benefit
1168 : : * we can verify the validity of the passed existingEntry pointer.
1169 : : */
592 michael@paquier.xyz 1170 :CBC 680 : bucket = hash_initial_lookup(hashp, existingElement->hashvalue,
1171 : : &prevBucketPtr);
4671 tgl@sss.pgh.pa.us 1172 : 680 : currBucket = *prevBucketPtr;
1173 : :
1174 [ + - ]: 685 : while (currBucket != NULL)
1175 : : {
1176 [ + + ]: 685 : if (currBucket == existingElement)
1177 : 680 : break;
1178 : 5 : prevBucketPtr = &(currBucket->link);
1179 : 5 : currBucket = *prevBucketPtr;
1180 : : }
1181 : :
1182 [ - + ]: 680 : if (currBucket == NULL)
4671 tgl@sss.pgh.pa.us 1183 [ # # ]:UBC 0 : elog(ERROR, "hash_update_hash_key argument is not in hashtable \"%s\"",
1184 : : hashp->tabname);
1185 : :
4671 tgl@sss.pgh.pa.us 1186 :CBC 680 : oldPrevPtr = prevBucketPtr;
1187 : :
1188 : : /*
1189 : : * Now perform the equivalent of a HASH_ENTER operation to locate the hash
1190 : : * chain we want to put the entry into.
1191 : : */
1192 : 680 : newhashvalue = hashp->hash(newKeyPtr, hashp->keysize);
592 michael@paquier.xyz 1193 : 680 : newbucket = hash_initial_lookup(hashp, newhashvalue, &prevBucketPtr);
4671 tgl@sss.pgh.pa.us 1194 : 680 : currBucket = *prevBucketPtr;
1195 : :
1196 : : /*
1197 : : * Follow collision chain looking for matching key
1198 : : */
1199 : 680 : match = hashp->match; /* save one fetch in inner loop */
1200 : 680 : keysize = hashp->keysize; /* ditto */
1201 : :
1202 [ + + ]: 783 : while (currBucket != NULL)
1203 : : {
1204 [ - + - - ]: 103 : if (currBucket->hashvalue == newhashvalue &&
4671 tgl@sss.pgh.pa.us 1205 :UBC 0 : match(ELEMENTKEY(currBucket), newKeyPtr, keysize) == 0)
1206 : 0 : break;
4671 tgl@sss.pgh.pa.us 1207 :CBC 103 : prevBucketPtr = &(currBucket->link);
1208 : 103 : currBucket = *prevBucketPtr;
1209 : : #ifdef HASH_STATISTICS
1210 : : hctl->collisions++;
1211 : : #endif
1212 : : }
1213 : :
1214 [ - + ]: 680 : if (currBucket != NULL)
4671 tgl@sss.pgh.pa.us 1215 :UBC 0 : return false; /* collision with an existing entry */
1216 : :
4671 tgl@sss.pgh.pa.us 1217 :CBC 680 : currBucket = existingElement;
1218 : :
1219 : : /*
1220 : : * If old and new hash values belong to the same bucket, we need not
1221 : : * change any chain links, and indeed should not since this simplistic
1222 : : * update will corrupt the list if currBucket is the last element. (We
1223 : : * cannot fall out earlier, however, since we need to scan the bucket to
1224 : : * check for duplicate keys.)
1225 : : */
4670 1226 [ + + ]: 680 : if (bucket != newbucket)
1227 : : {
1228 : : /* OK to remove record from old hash bucket's chain. */
1229 : 584 : *oldPrevPtr = currBucket->link;
1230 : :
1231 : : /* link into new hashbucket chain */
1232 : 584 : *prevBucketPtr = currBucket;
1233 : 584 : currBucket->link = NULL;
1234 : : }
1235 : :
1236 : : /* copy new key into record */
4671 1237 : 680 : currBucket->hashvalue = newhashvalue;
1238 : 680 : hashp->keycopy(ELEMENTKEY(currBucket), newKeyPtr, keysize);
1239 : :
1240 : : /* rest of record is untouched */
1241 : :
1242 : 680 : return true;
1243 : : }
1244 : :
1245 : : /*
1246 : : * Allocate a new hashtable entry if possible; return NULL if out of memory.
1247 : : * (Or, if the underlying space allocator throws error for out-of-memory,
1248 : : * we won't return at all.)
1249 : : */
1250 : : static HASHBUCKET
3506 rhaas@postgresql.org 1251 : 27995462 : get_hash_entry(HTAB *hashp, int freelist_idx)
1252 : : {
1253 : 27995462 : HASHHDR *hctl = hashp->hctl;
1254 : : HASHBUCKET newElement;
1255 : :
1256 : : for (;;)
1257 : : {
1258 : : /* if partitioned, must lock to touch nentries and freeList */
3665 1259 [ + + ]: 28287301 : if (IS_PARTITIONED(hctl))
3506 1260 [ + + ]: 5682085 : SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
1261 : :
1262 : : /* try to get an entry from the freelist */
1263 : 28287301 : newElement = hctl->freeList[freelist_idx].freeList;
1264 : :
7038 tgl@sss.pgh.pa.us 1265 [ + + ]: 28287301 : if (newElement != NULL)
1266 : 27963905 : break;
1267 : :
3665 rhaas@postgresql.org 1268 [ + + ]: 323396 : if (IS_PARTITIONED(hctl))
3506 1269 : 31557 : SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1270 : :
1271 : : /*
1272 : : * No free elements in this freelist. In a partitioned table, there
1273 : : * might be entries in other freelists, but to reduce contention we
1274 : : * prefer to first try to get another chunk of buckets from the main
1275 : : * shmem allocator. If that fails, though, we *MUST* root through all
1276 : : * the other freelists before giving up. There are multiple callers
1277 : : * that assume that they can allocate every element in the initially
1278 : : * requested table size, or that deleting an element guarantees they
1279 : : * can insert a new element, even if shared memory is entirely full.
1280 : : * Failing because the needed element is in a different freelist is
1281 : : * not acceptable.
1282 : : */
207 tomas.vondra@postgre 1283 [ + + ]: 323396 : if (!element_alloc(hashp, hctl->nelem_alloc, freelist_idx))
1284 : : {
1285 : : int borrow_from_idx;
1286 : :
3506 rhaas@postgresql.org 1287 [ - + ]:GBC 31557 : if (!IS_PARTITIONED(hctl))
3506 rhaas@postgresql.org 1288 :UBC 0 : return NULL; /* out of memory */
1289 : :
1290 : : /* try to borrow element from another freelist */
3506 rhaas@postgresql.org 1291 :GBC 31557 : borrow_from_idx = freelist_idx;
1292 : : for (;;)
1293 : : {
1294 : 34841 : borrow_from_idx = (borrow_from_idx + 1) % NUM_FREELISTS;
1295 [ - + ]: 34841 : if (borrow_from_idx == freelist_idx)
3020 tgl@sss.pgh.pa.us 1296 :UBC 0 : break; /* examined all freelists, fail */
1297 : :
3506 rhaas@postgresql.org 1298 [ - + ]:GBC 34841 : SpinLockAcquire(&(hctl->freeList[borrow_from_idx].mutex));
1299 : 34841 : newElement = hctl->freeList[borrow_from_idx].freeList;
1300 : :
1301 [ + + ]: 34841 : if (newElement != NULL)
1302 : : {
1303 : 31557 : hctl->freeList[borrow_from_idx].freeList = newElement->link;
1304 : 31557 : SpinLockRelease(&(hctl->freeList[borrow_from_idx].mutex));
1305 : :
1306 : : /* careful: count the new element in its proper freelist */
1307 [ - + ]: 31557 : SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
1308 : 31557 : hctl->freeList[freelist_idx].nentries++;
1309 : 31557 : SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1310 : :
3020 tgl@sss.pgh.pa.us 1311 : 31557 : return newElement;
1312 : : }
1313 : :
3506 rhaas@postgresql.org 1314 : 3284 : SpinLockRelease(&(hctl->freeList[borrow_from_idx].mutex));
1315 : : }
1316 : :
1317 : : /* no elements available to borrow either, so out of memory */
3020 tgl@sss.pgh.pa.us 1318 :UBC 0 : return NULL;
1319 : : }
1320 : : }
1321 : :
1322 : : /* remove entry from freelist, bump nentries */
3506 rhaas@postgresql.org 1323 :CBC 27963905 : hctl->freeList[freelist_idx].freeList = newElement->link;
1324 : 27963905 : hctl->freeList[freelist_idx].nentries++;
1325 : :
3665 1326 [ + + ]: 27963905 : if (IS_PARTITIONED(hctl))
3506 1327 : 5650528 : SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1328 : :
7038 tgl@sss.pgh.pa.us 1329 : 27963905 : return newElement;
1330 : : }
1331 : :
1332 : : /*
1333 : : * hash_get_num_entries -- get the number of entries in a hashtable
1334 : : */
1335 : : int64
1336 : 62036 : hash_get_num_entries(HTAB *hashp)
1337 : : {
1338 : : int i;
67 michael@paquier.xyz 1339 :GNC 62036 : int64 sum = hashp->hctl->freeList[0].nentries;
1340 : :
1341 : : /*
1342 : : * We currently don't bother with acquiring the mutexes; it's only
1343 : : * sensible to call this function if you've got lock on all partitions of
1344 : : * the table.
1345 : : */
3020 tgl@sss.pgh.pa.us 1346 [ + + ]:CBC 62036 : if (IS_PARTITIONED(hashp->hctl))
1347 : : {
1348 [ + + ]: 56992 : for (i = 1; i < NUM_FREELISTS; i++)
1349 : 55211 : sum += hashp->hctl->freeList[i].nentries;
1350 : : }
1351 : :
3506 rhaas@postgresql.org 1352 : 62036 : return sum;
1353 : : }
1354 : :
1355 : : /*
1356 : : * hash_seq_init/_search/_term
1357 : : * Sequentially search through hash table and return
1358 : : * all the elements one by one, return NULL when no more.
1359 : : *
1360 : : * hash_seq_term should be called if and only if the scan is abandoned before
1361 : : * completion; if hash_seq_search returns NULL then it has already done the
1362 : : * end-of-scan cleanup.
1363 : : *
1364 : : * NOTE: caller may delete the returned element before continuing the scan.
1365 : : * However, deleting any other element while the scan is in progress is
1366 : : * UNDEFINED (it might be the one that curIndex is pointing at!). Also,
1367 : : * if elements are added to the table while the scan is in progress, it is
1368 : : * unspecified whether they will be visited by the scan or not.
1369 : : *
1370 : : * NOTE: it is possible to use hash_seq_init/hash_seq_search without any
1371 : : * worry about hash_seq_term cleanup, if the hashtable is first locked against
1372 : : * further insertions by calling hash_freeze.
1373 : : *
1374 : : * NOTE: to use this with a partitioned hashtable, caller had better hold
1375 : : * at least shared lock on all partitions of the table throughout the scan!
1376 : : * We can cope with insertions or deletions by our own backend, but *not*
1377 : : * with concurrent insertions or deletions by another.
1378 : : */
1379 : : void
9065 tgl@sss.pgh.pa.us 1380 : 2042774 : hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
1381 : : {
1382 : 2042774 : status->hashp = hashp;
1383 : 2042774 : status->curBucket = 0;
8793 1384 : 2042774 : status->curEntry = NULL;
447 akorotkov@postgresql 1385 : 2042774 : status->hasHashvalue = false;
6760 tgl@sss.pgh.pa.us 1386 [ + - ]: 2042774 : if (!hashp->frozen)
1387 : 2042774 : register_seq_scan(hashp);
9065 1388 : 2042774 : }
1389 : :
1390 : : /*
1391 : : * Same as above but scan by the given hash value.
1392 : : * See also hash_seq_search().
1393 : : *
1394 : : * NOTE: the default hash function doesn't match syscache hash function.
1395 : : * Thus, if you're going to use this function in syscache callback, make sure
1396 : : * you're using custom hash function. See relatt_cache_syshash()
1397 : : * for example.
1398 : : */
1399 : : void
447 akorotkov@postgresql 1400 : 853169 : hash_seq_init_with_hash_value(HASH_SEQ_STATUS *status, HTAB *hashp,
1401 : : uint32 hashvalue)
1402 : : {
1403 : : HASHBUCKET *bucketPtr;
1404 : :
1405 : 853169 : hash_seq_init(status, hashp);
1406 : :
1407 : 853169 : status->hasHashvalue = true;
1408 : 853169 : status->hashvalue = hashvalue;
1409 : :
1410 : 853169 : status->curBucket = hash_initial_lookup(hashp, hashvalue, &bucketPtr);
1411 : 853169 : status->curEntry = *bucketPtr;
1412 : 853169 : }
1413 : :
1414 : : void *
9065 tgl@sss.pgh.pa.us 1415 : 27726726 : hash_seq_search(HASH_SEQ_STATUS *status)
1416 : : {
1417 : : HTAB *hashp;
1418 : : HASHHDR *hctl;
1419 : : uint32 max_bucket;
1420 : : int64 ssize;
1421 : : int64 segment_num;
1422 : : int64 segment_ndx;
1423 : : HASHSEGMENT segp;
1424 : : uint32 curBucket;
1425 : : HASHELEMENT *curElem;
1426 : :
447 akorotkov@postgresql 1427 [ + + ]: 27726726 : if (status->hasHashvalue)
1428 : : {
1429 : : /*
1430 : : * Scan entries only in the current bucket because only this bucket
1431 : : * can contain entries with the given hash value.
1432 : : */
1433 [ + + ]: 962330 : while ((curElem = status->curEntry) != NULL)
1434 : : {
1435 : 109161 : status->curEntry = curElem->link;
1436 [ + + ]: 109161 : if (status->hashvalue != curElem->hashvalue)
1437 : 104471 : continue;
1438 : 4690 : return (void *) ELEMENTKEY(curElem);
1439 : : }
1440 : :
1441 : 853169 : hash_seq_term(status);
1442 : 853169 : return NULL;
1443 : : }
1444 : :
7480 tgl@sss.pgh.pa.us 1445 [ + + ]: 26868867 : if ((curElem = status->curEntry) != NULL)
1446 : : {
1447 : : /* Continuing scan of curBucket... */
1448 : 8051813 : status->curEntry = curElem->link;
7318 bruce@momjian.us 1449 [ + + ]: 8051813 : if (status->curEntry == NULL) /* end of this bucket */
7480 tgl@sss.pgh.pa.us 1450 : 5608994 : ++status->curBucket;
334 peter@eisentraut.org 1451 : 8051813 : return ELEMENTKEY(curElem);
1452 : : }
1453 : :
1454 : : /*
1455 : : * Search for next nonempty bucket starting at curBucket.
1456 : : */
7480 tgl@sss.pgh.pa.us 1457 : 18817054 : curBucket = status->curBucket;
1458 : 18817054 : hashp = status->hashp;
1459 : 18817054 : hctl = hashp->hctl;
7038 1460 : 18817054 : ssize = hashp->ssize;
7480 1461 : 18817054 : max_bucket = hctl->max_bucket;
1462 : :
1463 [ + + ]: 18817054 : if (curBucket > max_bucket)
1464 : : {
6760 1465 : 35698 : hash_seq_term(status);
7318 bruce@momjian.us 1466 : 35698 : return NULL; /* search is done */
1467 : : }
1468 : :
1469 : : /*
1470 : : * first find the right segment in the table directory.
1471 : : */
7038 tgl@sss.pgh.pa.us 1472 : 18781356 : segment_num = curBucket >> hashp->sshift;
7480 1473 : 18781356 : segment_ndx = MOD(curBucket, ssize);
1474 : :
1475 : 18781356 : segp = hashp->dir[segment_num];
1476 : :
1477 : : /*
1478 : : * Pick up the first item in this bucket's chain. If chain is not empty
1479 : : * we can begin searching it. Otherwise we have to advance to find the
1480 : : * next nonempty bucket. We try to optimize that case since searching a
1481 : : * near-empty hashtable has to iterate this loop a lot.
1482 : : */
1483 [ + + ]: 89957964 : while ((curElem = segp[segment_ndx]) == NULL)
1484 : : {
1485 : : /* empty bucket, advance to next */
1486 [ + + ]: 72313756 : if (++curBucket > max_bucket)
1487 : : {
1488 : 1137148 : status->curBucket = curBucket;
6760 1489 : 1137148 : hash_seq_term(status);
7318 bruce@momjian.us 1490 : 1137148 : return NULL; /* search is done */
1491 : : }
7480 tgl@sss.pgh.pa.us 1492 [ + + ]: 71176608 : if (++segment_ndx >= ssize)
1493 : : {
1494 : 146403 : segment_num++;
1495 : 146403 : segment_ndx = 0;
1496 : 146403 : segp = hashp->dir[segment_num];
1497 : : }
1498 : : }
1499 : :
1500 : : /* Begin scan of curBucket... */
1501 : 17644208 : status->curEntry = curElem->link;
3051 1502 [ + + ]: 17644208 : if (status->curEntry == NULL) /* end of this bucket */
7480 1503 : 12035159 : ++curBucket;
1504 : 17644208 : status->curBucket = curBucket;
334 peter@eisentraut.org 1505 : 17644208 : return ELEMENTKEY(curElem);
1506 : : }
1507 : :
1508 : : void
6760 tgl@sss.pgh.pa.us 1509 : 2042764 : hash_seq_term(HASH_SEQ_STATUS *status)
1510 : : {
1511 [ + - ]: 2042764 : if (!status->hashp->frozen)
1512 : 2042764 : deregister_seq_scan(status->hashp);
1513 : 2042764 : }
1514 : :
1515 : : /*
1516 : : * hash_freeze
1517 : : * Freeze a hashtable against future insertions (deletions are
1518 : : * still allowed)
1519 : : *
1520 : : * The reason for doing this is that by preventing any more bucket splits,
1521 : : * we no longer need to worry about registering hash_seq_search scans,
1522 : : * and thus caller need not be careful about ensuring hash_seq_term gets
1523 : : * called at the right times.
1524 : : *
1525 : : * Multiple calls to hash_freeze() are allowed, but you can't freeze a table
1526 : : * with active scans (since hash_seq_term would then do the wrong thing).
1527 : : */
1528 : : void
6760 tgl@sss.pgh.pa.us 1529 :UBC 0 : hash_freeze(HTAB *hashp)
1530 : : {
1531 [ # # ]: 0 : if (hashp->isshared)
6622 1532 [ # # ]: 0 : elog(ERROR, "cannot freeze shared hashtable \"%s\"", hashp->tabname);
6760 1533 [ # # # # ]: 0 : if (!hashp->frozen && has_seq_scans(hashp))
6622 1534 [ # # ]: 0 : elog(ERROR, "cannot freeze hashtable \"%s\" because it has active scans",
1535 : : hashp->tabname);
6760 1536 : 0 : hashp->frozen = true;
1537 : 0 : }
1538 : :
1539 : :
1540 : : /********************************* UTILITIES ************************/
1541 : :
1542 : : /*
1543 : : * Expand the table by adding one more hash bucket.
1544 : : */
1545 : : static bool
10277 bruce@momjian.us 1546 :CBC 361300 : expand_table(HTAB *hashp)
1547 : : {
8769 1548 : 361300 : HASHHDR *hctl = hashp->hctl;
1549 : : HASHSEGMENT old_seg,
1550 : : new_seg;
1551 : : int64 old_bucket,
1552 : : new_bucket;
1553 : : int64 new_segnum,
1554 : : new_segndx;
1555 : : int64 old_segnum,
1556 : : old_segndx;
1557 : : HASHBUCKET *oldlink,
1558 : : *newlink;
1559 : : HASHBUCKET currElement,
1560 : : nextElement;
1561 : :
7038 tgl@sss.pgh.pa.us 1562 [ - + ]: 361300 : Assert(!IS_PARTITIONED(hctl));
1563 : :
1564 : : #ifdef HASH_STATISTICS
1565 : : hctl->expansions++;
1566 : : #endif
1567 : :
9745 1568 : 361300 : new_bucket = hctl->max_bucket + 1;
7038 1569 : 361300 : new_segnum = new_bucket >> hashp->sshift;
1570 : 361300 : new_segndx = MOD(new_bucket, hashp->ssize);
1571 : :
10278 bruce@momjian.us 1572 [ + + ]: 361300 : if (new_segnum >= hctl->nsegs)
1573 : : {
1574 : : /* Allocate new segment if necessary -- could fail if dir full */
1575 [ - + ]: 1157 : if (new_segnum >= hctl->dsize)
9653 bruce@momjian.us 1576 [ # # ]:UBC 0 : if (!dir_realloc(hashp))
8793 tgl@sss.pgh.pa.us 1577 : 0 : return false;
10278 bruce@momjian.us 1578 [ - + ]:CBC 1157 : if (!(hashp->dir[new_segnum] = seg_alloc(hashp)))
8793 tgl@sss.pgh.pa.us 1579 :UBC 0 : return false;
10278 bruce@momjian.us 1580 :CBC 1157 : hctl->nsegs++;
1581 : : }
1582 : :
1583 : : /* OK, we created a new bucket */
9745 tgl@sss.pgh.pa.us 1584 : 361300 : hctl->max_bucket++;
1585 : :
1586 : : /*
1587 : : * *Before* changing masks, find old bucket corresponding to same hash
1588 : : * values; values in that bucket may need to be relocated to new bucket.
1589 : : * Note that new_bucket is certainly larger than low_mask at this point,
1590 : : * so we can skip the first step of the regular hash mask calc.
1591 : : */
9647 1592 : 361300 : old_bucket = (new_bucket & hctl->low_mask);
1593 : :
1594 : : /*
1595 : : * If we crossed a power of 2, readjust masks.
1596 : : */
8634 1597 [ + + ]: 361300 : if ((uint32) new_bucket > hctl->high_mask)
1598 : : {
10278 bruce@momjian.us 1599 : 2271 : hctl->low_mask = hctl->high_mask;
8634 tgl@sss.pgh.pa.us 1600 : 2271 : hctl->high_mask = (uint32) new_bucket | hctl->low_mask;
1601 : : }
1602 : :
1603 : : /*
1604 : : * Relocate records to the new bucket. NOTE: because of the way the hash
1605 : : * masking is done in calc_bucket, only one old bucket can need to be
1606 : : * split at this point. With a different way of reducing the hash value,
1607 : : * that might not be true!
1608 : : */
7038 1609 : 361300 : old_segnum = old_bucket >> hashp->sshift;
1610 : 361300 : old_segndx = MOD(old_bucket, hashp->ssize);
1611 : :
8793 1612 : 361300 : old_seg = hashp->dir[old_segnum];
1613 : 361300 : new_seg = hashp->dir[new_segnum];
1614 : :
1615 : 361300 : oldlink = &old_seg[old_segndx];
1616 : 361300 : newlink = &new_seg[new_segndx];
1617 : :
1618 : 361300 : for (currElement = *oldlink;
1619 [ + + ]: 865663 : currElement != NULL;
1620 : 504363 : currElement = nextElement)
1621 : : {
1622 : 504363 : nextElement = currElement->link;
67 michael@paquier.xyz 1623 [ + + ]:GNC 504363 : if ((int64) calc_bucket(hctl, currElement->hashvalue) == old_bucket)
1624 : : {
8793 tgl@sss.pgh.pa.us 1625 :CBC 248134 : *oldlink = currElement;
1626 : 248134 : oldlink = &currElement->link;
1627 : : }
1628 : : else
1629 : : {
1630 : 256229 : *newlink = currElement;
1631 : 256229 : newlink = &currElement->link;
1632 : : }
1633 : : }
1634 : : /* don't forget to terminate the rebuilt hash chains... */
1635 : 361300 : *oldlink = NULL;
1636 : 361300 : *newlink = NULL;
1637 : :
1638 : 361300 : return true;
1639 : : }
1640 : :
1641 : :
1642 : : static bool
10277 bruce@momjian.us 1643 :UBC 0 : dir_realloc(HTAB *hashp)
1644 : : {
1645 : : HASHSEGMENT *p;
1646 : : HASHSEGMENT *old_p;
1647 : : int64 new_dsize;
1648 : : int64 old_dirsize;
1649 : : int64 new_dirsize;
1650 : :
10278 1651 [ # # ]: 0 : if (hashp->hctl->max_dsize != NO_MAX_DSIZE)
8793 tgl@sss.pgh.pa.us 1652 : 0 : return false;
1653 : :
1654 : : /* Reallocate directory */
9745 1655 : 0 : new_dsize = hashp->hctl->dsize << 1;
8793 1656 : 0 : old_dirsize = hashp->hctl->dsize * sizeof(HASHSEGMENT);
1657 : 0 : new_dirsize = new_dsize * sizeof(HASHSEGMENT);
1658 : :
1659 : 0 : old_p = hashp->dir;
8894 JanWieck@Yahoo.com 1660 : 0 : CurrentDynaHashCxt = hashp->hcxt;
8793 tgl@sss.pgh.pa.us 1661 : 0 : p = (HASHSEGMENT *) hashp->alloc((Size) new_dirsize);
1662 : :
10278 bruce@momjian.us 1663 [ # # ]: 0 : if (p != NULL)
1664 : : {
8793 tgl@sss.pgh.pa.us 1665 : 0 : memcpy(p, old_p, old_dirsize);
1666 [ # # # # : 0 : MemSet(((char *) p) + old_dirsize, 0, new_dirsize - old_dirsize);
# # # # #
# ]
1667 : 0 : hashp->dir = p;
9745 1668 : 0 : hashp->hctl->dsize = new_dsize;
1669 : :
1670 : : /* XXX assume the allocator is palloc, so we know how to free */
7480 1671 [ # # ]: 0 : Assert(hashp->alloc == DynaHashAlloc);
1672 : 0 : pfree(old_p);
1673 : :
8793 1674 : 0 : return true;
1675 : : }
1676 : :
1677 : 0 : return false;
1678 : : }
1679 : :
1680 : :
1681 : : static HASHSEGMENT
10277 bruce@momjian.us 1682 :CBC 628690 : seg_alloc(HTAB *hashp)
1683 : : {
1684 : : HASHSEGMENT segp;
1685 : :
8894 JanWieck@Yahoo.com 1686 : 628690 : CurrentDynaHashCxt = hashp->hcxt;
7038 tgl@sss.pgh.pa.us 1687 : 628690 : segp = (HASHSEGMENT) hashp->alloc(sizeof(HASHBUCKET) * hashp->ssize);
1688 : :
10278 bruce@momjian.us 1689 [ - + ]: 628690 : if (!segp)
8793 tgl@sss.pgh.pa.us 1690 :UBC 0 : return NULL;
1691 : :
7038 tgl@sss.pgh.pa.us 1692 [ + - + - :CBC 628690 : MemSet(segp, 0, sizeof(HASHBUCKET) * hashp->ssize);
+ - - + -
- ]
1693 : :
8793 1694 : 628690 : return segp;
1695 : : }
1696 : :
1697 : : /*
1698 : : * allocate some new elements and link them into the indicated free list
1699 : : */
1700 : : static bool
207 tomas.vondra@postgre 1701 : 606592 : element_alloc(HTAB *hashp, int nelem, int freelist_idx)
1702 : : {
3506 rhaas@postgresql.org 1703 : 606592 : HASHHDR *hctl = hashp->hctl;
1704 : : Size elementSize;
1705 : : Size requestSize;
1706 : : char *allocedBlock;
1707 : : HASHELEMENT *firstElement;
1708 : : HASHELEMENT *tmpElement;
1709 : : HASHELEMENT *prevElement;
1710 : : int i;
1711 : :
95 tgl@sss.pgh.pa.us 1712 [ + + ]:GNC 606592 : if (hctl->isfixed)
207 tomas.vondra@postgre 1713 :GBC 31557 : return false;
1714 : :
1715 : : /* Each element has a HASHELEMENT header plus user data. */
207 tomas.vondra@postgre 1716 :CBC 575035 : elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(hctl->entrysize);
1717 : :
87 tgl@sss.pgh.pa.us 1718 :GNC 575035 : requestSize = nelem * elementSize;
1719 : :
1720 : : /* Add space for slist_node list link if we need one. */
1721 : : #ifdef USE_VALGRIND
1722 : : if (!hashp->isshared)
1723 : : requestSize += MAXALIGN(sizeof(slist_node));
1724 : : #endif
1725 : :
1726 : : /* Allocate the memory. */
8894 JanWieck@Yahoo.com 1727 :CBC 575035 : CurrentDynaHashCxt = hashp->hcxt;
87 tgl@sss.pgh.pa.us 1728 :GNC 575035 : allocedBlock = hashp->alloc(requestSize);
1729 : :
1730 [ - + ]: 575035 : if (!allocedBlock)
207 tomas.vondra@postgre 1731 :UBC 0 : return false;
1732 : :
1733 : : /*
1734 : : * If USE_VALGRIND, each allocated block of elements of a non-shared
1735 : : * hashtable is chained into a list, so that Valgrind won't think it's
1736 : : * been leaked.
1737 : : */
1738 : : #ifdef USE_VALGRIND
1739 : : if (hashp->isshared)
1740 : : firstElement = (HASHELEMENT *) allocedBlock;
1741 : : else
1742 : : {
1743 : : slist_push_head(&hashp->element_blocks, (slist_node *) allocedBlock);
1744 : : firstElement = (HASHELEMENT *) (allocedBlock + MAXALIGN(sizeof(slist_node)));
1745 : : }
1746 : : #else
87 tgl@sss.pgh.pa.us 1747 :GNC 575035 : firstElement = (HASHELEMENT *) allocedBlock;
1748 : : #endif
1749 : :
1750 : : /* prepare to link all the new entries into the freelist */
7038 tgl@sss.pgh.pa.us 1751 :CBC 575035 : prevElement = NULL;
1752 : 575035 : tmpElement = firstElement;
7700 1753 [ + + ]: 51745670 : for (i = 0; i < nelem; i++)
1754 : : {
7038 1755 : 51170635 : tmpElement->link = prevElement;
1756 : 51170635 : prevElement = tmpElement;
8793 1757 : 51170635 : tmpElement = (HASHELEMENT *) (((char *) tmpElement) + elementSize);
1758 : : }
1759 : :
1760 : : /* if partitioned, must lock to touch freeList */
3665 rhaas@postgresql.org 1761 [ + + ]: 575035 : if (IS_PARTITIONED(hctl))
3506 1762 [ - + ]: 167840 : SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
1763 : :
1764 : : /* freelist could be nonempty if two backends did this concurrently */
1765 : 575035 : firstElement->link = hctl->freeList[freelist_idx].freeList;
1766 : 575035 : hctl->freeList[freelist_idx].freeList = prevElement;
1767 : :
3665 1768 [ + + ]: 575035 : if (IS_PARTITIONED(hctl))
3506 1769 : 167840 : SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1770 : :
207 tomas.vondra@postgre 1771 : 575035 : return true;
1772 : : }
1773 : :
1774 : : /*
1775 : : * Do initial lookup of a bucket for the given hash value, retrieving its
1776 : : * bucket number and its hash bucket.
1777 : : */
1778 : : static inline uint32
592 michael@paquier.xyz 1779 : 181004005 : hash_initial_lookup(HTAB *hashp, uint32 hashvalue, HASHBUCKET **bucketptr)
1780 : : {
1781 : 181004005 : HASHHDR *hctl = hashp->hctl;
1782 : : HASHSEGMENT segp;
1783 : : int64 segment_num;
1784 : : int64 segment_ndx;
1785 : : uint32 bucket;
1786 : :
1787 : 181004005 : bucket = calc_bucket(hctl, hashvalue);
1788 : :
1789 : 181004005 : segment_num = bucket >> hashp->sshift;
1790 : 181004005 : segment_ndx = MOD(bucket, hashp->ssize);
1791 : :
1792 : 181004005 : segp = hashp->dir[segment_num];
1793 : :
1794 [ - + ]: 181004005 : if (segp == NULL)
592 michael@paquier.xyz 1795 :UBC 0 : hash_corrupted(hashp);
1796 : :
592 michael@paquier.xyz 1797 :CBC 181004005 : *bucketptr = &segp[segment_ndx];
1798 : 181004005 : return bucket;
1799 : : }
1800 : :
1801 : : /* complain when we have detected a corrupted hashtable */
1802 : : static void
8789 tgl@sss.pgh.pa.us 1803 :UBC 0 : hash_corrupted(HTAB *hashp)
1804 : : {
1805 : : /*
1806 : : * If the corruption is in a shared hashtable, we'd better force a
1807 : : * systemwide restart. Otherwise, just shut down this one backend.
1808 : : */
1809 [ # # ]: 0 : if (hashp->isshared)
8131 1810 [ # # ]: 0 : elog(PANIC, "hash table \"%s\" corrupted", hashp->tabname);
1811 : : else
1812 [ # # ]: 0 : elog(FATAL, "hash table \"%s\" corrupted", hashp->tabname);
1813 : : }
1814 : :
1815 : : /* calculate ceil(log base 2) of num */
1816 : : static int
67 michael@paquier.xyz 1817 :GNC 739347 : my_log2(int64 num)
1818 : : {
1819 : : /*
1820 : : * guard against too-large input, which would be invalid for
1821 : : * pg_ceil_log2_*()
1822 : : */
1823 [ - + ]: 739347 : if (num > PG_INT64_MAX / 2)
67 michael@paquier.xyz 1824 :UNC 0 : num = PG_INT64_MAX / 2;
1825 : :
2029 drowley@postgresql.o 1826 :CBC 739347 : return pg_ceil_log2_64(num);
1827 : : }
1828 : :
1829 : : /* calculate first power of 2 >= num, bounded to what will fit in a int64 */
1830 : : static int64
67 michael@paquier.xyz 1831 :GNC 53992 : next_pow2_int64(int64 num)
1832 : : {
1833 : : /* my_log2's internal range check is sufficient */
4704 tgl@sss.pgh.pa.us 1834 :CBC 53992 : return 1L << my_log2(num);
1835 : : }
1836 : :
1837 : : /* calculate first power of 2 >= num, bounded to what will fit in an int */
1838 : : static int
67 michael@paquier.xyz 1839 :GNC 685355 : next_pow2_int(int64 num)
1840 : : {
4704 tgl@sss.pgh.pa.us 1841 [ - + ]:CBC 685355 : if (num > INT_MAX / 2)
4704 tgl@sss.pgh.pa.us 1842 :UBC 0 : num = INT_MAX / 2;
4704 tgl@sss.pgh.pa.us 1843 :CBC 685355 : return 1 << my_log2(num);
1844 : : }
1845 : :
1846 : :
1847 : : /************************* SEQ SCAN TRACKING ************************/
1848 : :
1849 : : /*
1850 : : * We track active hash_seq_search scans here. The need for this mechanism
1851 : : * comes from the fact that a scan will get confused if a bucket split occurs
1852 : : * while it's in progress: it might visit entries twice, or even miss some
1853 : : * entirely (if it's partway through the same bucket that splits). Hence
1854 : : * we want to inhibit bucket splits if there are any active scans on the
1855 : : * table being inserted into. This is a fairly rare case in current usage,
1856 : : * so just postponing the split until the next insertion seems sufficient.
1857 : : *
1858 : : * Given present usages of the function, only a few scans are likely to be
1859 : : * open concurrently; so a finite-size stack of open scans seems sufficient,
1860 : : * and we don't worry that linear search is too slow. Note that we do
1861 : : * allow multiple scans of the same hashtable to be open concurrently.
1862 : : *
1863 : : * This mechanism can support concurrent scan and insertion in a shared
1864 : : * hashtable if it's the same backend doing both. It would fail otherwise,
1865 : : * but locking reasons seem to preclude any such scenario anyway, so we don't
1866 : : * worry.
1867 : : *
1868 : : * This arrangement is reasonably robust if a transient hashtable is deleted
1869 : : * without notifying us. The absolute worst case is we might inhibit splits
1870 : : * in another table created later at exactly the same address. We will give
1871 : : * a warning at transaction end for reference leaks, so any bugs leading to
1872 : : * lack of notification should be easy to catch.
1873 : : */
1874 : :
1875 : : #define MAX_SEQ_SCANS 100
1876 : :
1877 : : static HTAB *seq_scan_tables[MAX_SEQ_SCANS]; /* tables being scanned */
1878 : : static int seq_scan_level[MAX_SEQ_SCANS]; /* subtransaction nest level */
1879 : : static int num_seq_scans = 0;
1880 : :
1881 : :
1882 : : /* Register a table as having an active hash_seq_search scan */
1883 : : static void
6760 1884 : 2042774 : register_seq_scan(HTAB *hashp)
1885 : : {
1886 [ - + ]: 2042774 : if (num_seq_scans >= MAX_SEQ_SCANS)
6622 tgl@sss.pgh.pa.us 1887 [ # # ]:UBC 0 : elog(ERROR, "too many active hash_seq_search scans, cannot start one on \"%s\"",
1888 : : hashp->tabname);
6760 tgl@sss.pgh.pa.us 1889 :CBC 2042774 : seq_scan_tables[num_seq_scans] = hashp;
1890 : 2042774 : seq_scan_level[num_seq_scans] = GetCurrentTransactionNestLevel();
1891 : 2042774 : num_seq_scans++;
1892 : 2042774 : }
1893 : :
1894 : : /* Deregister an active scan */
1895 : : static void
1896 : 2042764 : deregister_seq_scan(HTAB *hashp)
1897 : : {
1898 : : int i;
1899 : :
1900 : : /* Search backward since it's most likely at the stack top */
1901 [ + - ]: 2042764 : for (i = num_seq_scans - 1; i >= 0; i--)
1902 : : {
1903 [ + - ]: 2042764 : if (seq_scan_tables[i] == hashp)
1904 : : {
1905 : 2042764 : seq_scan_tables[i] = seq_scan_tables[num_seq_scans - 1];
1906 : 2042764 : seq_scan_level[i] = seq_scan_level[num_seq_scans - 1];
1907 : 2042764 : num_seq_scans--;
1908 : 2042764 : return;
1909 : : }
1910 : : }
6760 tgl@sss.pgh.pa.us 1911 [ # # ]:UBC 0 : elog(ERROR, "no hash_seq_search scan for hash table \"%s\"",
1912 : : hashp->tabname);
1913 : : }
1914 : :
1915 : : /* Check if a table has any active scan */
1916 : : static bool
6760 tgl@sss.pgh.pa.us 1917 :CBC 361761 : has_seq_scans(HTAB *hashp)
1918 : : {
1919 : : int i;
1920 : :
1921 [ + + ]: 361934 : for (i = 0; i < num_seq_scans; i++)
1922 : : {
1923 [ + + ]: 634 : if (seq_scan_tables[i] == hashp)
6760 tgl@sss.pgh.pa.us 1924 :GBC 461 : return true;
1925 : : }
6760 tgl@sss.pgh.pa.us 1926 :CBC 361300 : return false;
1927 : : }
1928 : :
1929 : : /* Clean up any open scans at end of transaction */
1930 : : void
1931 : 322454 : AtEOXact_HashTables(bool isCommit)
1932 : : {
1933 : : /*
1934 : : * During abort cleanup, open scans are expected; just silently clean 'em
1935 : : * out. An open scan at commit means someone forgot a hash_seq_term()
1936 : : * call, so complain.
1937 : : *
1938 : : * Note: it's tempting to try to print the tabname here, but refrain for
1939 : : * fear of touching deallocated memory. This isn't a user-facing message
1940 : : * anyway, so it needn't be pretty.
1941 : : */
1942 [ + + ]: 322454 : if (isCommit)
1943 : : {
1944 : : int i;
1945 : :
1946 [ - + ]: 297153 : for (i = 0; i < num_seq_scans; i++)
1947 : : {
6760 tgl@sss.pgh.pa.us 1948 [ # # ]:UBC 0 : elog(WARNING, "leaked hash_seq_search scan for hash table %p",
1949 : : seq_scan_tables[i]);
1950 : : }
1951 : : }
6760 tgl@sss.pgh.pa.us 1952 :CBC 322454 : num_seq_scans = 0;
1953 : 322454 : }
1954 : :
1955 : : /* Clean up any open scans at end of subtransaction */
1956 : : void
1957 : 9106 : AtEOSubXact_HashTables(bool isCommit, int nestDepth)
1958 : : {
1959 : : int i;
1960 : :
1961 : : /*
1962 : : * Search backward to make cleanup easy. Note we must check all entries,
1963 : : * not only those at the end of the array, because deletion technique
1964 : : * doesn't keep them in order.
1965 : : */
1966 [ - + ]: 9106 : for (i = num_seq_scans - 1; i >= 0; i--)
1967 : : {
6760 tgl@sss.pgh.pa.us 1968 [ # # ]:UBC 0 : if (seq_scan_level[i] >= nestDepth)
1969 : : {
1970 [ # # ]: 0 : if (isCommit)
1971 [ # # ]: 0 : elog(WARNING, "leaked hash_seq_search scan for hash table %p",
1972 : : seq_scan_tables[i]);
1973 : 0 : seq_scan_tables[i] = seq_scan_tables[num_seq_scans - 1];
1974 : 0 : seq_scan_level[i] = seq_scan_level[num_seq_scans - 1];
1975 : 0 : num_seq_scans--;
1976 : : }
1977 : : }
6760 tgl@sss.pgh.pa.us 1978 :CBC 9106 : }
|