Age Owner Branch data TLA Line data Source code
1 : : /*
2 : : * simplehash.h
3 : : *
4 : : * When included this file generates a "templated" (by way of macros)
5 : : * open-addressing hash table implementation specialized to user-defined
6 : : * types.
7 : : *
8 : : * It's probably not worthwhile to generate such a specialized implementation
9 : : * for hash tables that aren't performance or space sensitive.
10 : : *
11 : : * Compared to dynahash, simplehash has the following benefits:
12 : : *
13 : : * - Due to the "templated" code generation has known structure sizes and no
14 : : * indirect function calls (which show up substantially in dynahash
15 : : * profiles). These features considerably increase speed for small
16 : : * entries.
17 : : * - Open addressing has better CPU cache behavior than dynahash's chained
18 : : * hashtables.
19 : : * - The generated interface is type-safe and easier to use than dynahash,
20 : : * though at the cost of more complex setup.
21 : : * - Allocates memory in a MemoryContext or another allocator with a
22 : : * malloc/free style interface (which isn't easily usable in a shared
23 : : * memory context)
24 : : * - Does not require the overhead of a separate memory context.
25 : : *
26 : : * Usage notes:
27 : : *
28 : : * To generate a hash-table and associated functions for a use case several
29 : : * macros have to be #define'ed before this file is included. Including
30 : : * the file #undef's all those, so a new hash table can be generated
31 : : * afterwards.
32 : : * The relevant parameters are:
33 : : * - SH_PREFIX - prefix for all symbol names generated. A prefix of 'foo'
34 : : * will result in hash table type 'foo_hash' and functions like
35 : : * 'foo_insert'/'foo_lookup' and so forth.
36 : : * - SH_ELEMENT_TYPE - type of the contained elements
37 : : * - SH_KEY_TYPE - type of the hashtable's key
38 : : * - SH_DECLARE - if defined function prototypes and type declarations are
39 : : * generated
40 : : * - SH_DEFINE - if defined function definitions are generated
41 : : * - SH_SCOPE - in which scope (e.g. extern, static inline) do function
42 : : * declarations reside
43 : : * - SH_RAW_ALLOCATOR - if defined, memory contexts are not used; instead,
44 : : * use this to allocate bytes. The allocator must zero the returned space.
45 : : * - SH_USE_NONDEFAULT_ALLOCATOR - if defined no element allocator functions
46 : : * are defined, so you can supply your own
47 : : * The following parameters are only relevant when SH_DEFINE is defined:
48 : : * - SH_KEY - name of the element in SH_ELEMENT_TYPE containing the hash key
49 : : * - SH_EQUAL(table, a, b) - compare two table keys
50 : : * - SH_HASH_KEY(table, key) - generate hash for the key
51 : : * - SH_STORE_HASH - if defined the hash is stored in the elements
52 : : * - SH_GET_HASH(tb, a) - return the field to store the hash in
53 : : *
54 : : * The element type is required to contain a "status" member that can store
55 : : * the range of values defined in the SH_STATUS enum.
56 : : *
57 : : * While SH_STORE_HASH (and subsequently SH_GET_HASH) are optional, because
58 : : * the hash table implementation needs to compare hashes to move elements
59 : : * (particularly when growing the hash), it's preferable, if possible, to
60 : : * store the element's hash in the element's data type. If the hash is so
61 : : * stored, the hash table will also compare hashes before calling SH_EQUAL
62 : : * when comparing two keys.
63 : : *
64 : : * For convenience the hash table create functions accept a void pointer
65 : : * that will be stored in the hash table type's member private_data. This
66 : : * allows callbacks to reference caller provided data.
67 : : *
68 : : * For examples of usage look at tidbitmap.c (file local definition) and
69 : : * execnodes.h/execGrouping.c (exposed declaration, file local
70 : : * implementation).
71 : : *
72 : : * Hash table design:
73 : : *
74 : : * The hash table design chosen is a variant of linear open-addressing. The
75 : : * reason for doing so is that linear addressing is CPU cache & pipeline
76 : : * friendly. The biggest disadvantage of simple linear addressing schemes
77 : : * are highly variable lookup times due to clustering, and deletions
78 : : * leaving a lot of tombstones around. To address these issues a variant
79 : : * of "robin hood" hashing is employed. Robin hood hashing optimizes
80 : : * chaining lengths by moving elements close to their optimal bucket
81 : : * ("rich" elements), out of the way if a to-be-inserted element is further
82 : : * away from its optimal position (i.e. it's "poor"). While that can make
83 : : * insertions slower, the average lookup performance is a lot better, and
84 : : * higher fill factors can be used in a still performant manner. To avoid
85 : : * tombstones - which normally solve the issue that a deleted node's
86 : : * presence is relevant to determine whether a lookup needs to continue
87 : : * looking or is done - buckets following a deleted element are shifted
88 : : * backwards, unless they're empty or already at their optimal position.
89 : : *
90 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
91 : : * Portions Copyright (c) 1994, Regents of the University of California
92 : : *
93 : : * src/include/lib/simplehash.h
94 : : */
95 : :
96 : :
97 : : /* helpers */
98 : : #define SH_MAKE_PREFIX(a) CppConcat(a,_)
99 : : #define SH_MAKE_NAME(name) SH_MAKE_NAME_(SH_MAKE_PREFIX(SH_PREFIX),name)
100 : : #define SH_MAKE_NAME_(a,b) CppConcat(a,b)
101 : :
102 : : /* name macros for: */
103 : :
104 : : /* type declarations */
105 : : #define SH_TYPE SH_MAKE_NAME(hash)
106 : : #define SH_STATUS SH_MAKE_NAME(status)
107 : : #define SH_STATUS_EMPTY SH_MAKE_NAME(SH_EMPTY)
108 : : #define SH_STATUS_IN_USE SH_MAKE_NAME(SH_IN_USE)
109 : : #define SH_ITERATOR SH_MAKE_NAME(iterator)
110 : :
111 : : /* function declarations */
112 : : #define SH_CREATE SH_MAKE_NAME(create)
113 : : #define SH_DESTROY SH_MAKE_NAME(destroy)
114 : : #define SH_RESET SH_MAKE_NAME(reset)
115 : : #define SH_INSERT SH_MAKE_NAME(insert)
116 : : #define SH_INSERT_HASH SH_MAKE_NAME(insert_hash)
117 : : #define SH_DELETE_ITEM SH_MAKE_NAME(delete_item)
118 : : #define SH_DELETE SH_MAKE_NAME(delete)
119 : : #define SH_LOOKUP SH_MAKE_NAME(lookup)
120 : : #define SH_LOOKUP_HASH SH_MAKE_NAME(lookup_hash)
121 : : #define SH_GROW SH_MAKE_NAME(grow)
122 : : #define SH_START_ITERATE SH_MAKE_NAME(start_iterate)
123 : : #define SH_START_ITERATE_AT SH_MAKE_NAME(start_iterate_at)
124 : : #define SH_ITERATE SH_MAKE_NAME(iterate)
125 : : #define SH_ALLOCATE SH_MAKE_NAME(allocate)
126 : : #define SH_FREE SH_MAKE_NAME(free)
127 : : #define SH_ESTIMATE_SPACE SH_MAKE_NAME(estimate_space)
128 : : #define SH_STAT SH_MAKE_NAME(stat)
129 : :
130 : : /* internal helper functions (no externally visible prototypes) */
131 : : #define SH_COMPUTE_SIZE SH_MAKE_NAME(compute_size)
132 : : #define SH_UPDATE_PARAMETERS SH_MAKE_NAME(update_parameters)
133 : : #define SH_NEXT SH_MAKE_NAME(next)
134 : : #define SH_PREV SH_MAKE_NAME(prev)
135 : : #define SH_DISTANCE_FROM_OPTIMAL SH_MAKE_NAME(distance)
136 : : #define SH_INITIAL_BUCKET SH_MAKE_NAME(initial_bucket)
137 : : #define SH_ENTRY_HASH SH_MAKE_NAME(entry_hash)
138 : : #define SH_INSERT_HASH_INTERNAL SH_MAKE_NAME(insert_hash_internal)
139 : : #define SH_LOOKUP_HASH_INTERNAL SH_MAKE_NAME(lookup_hash_internal)
140 : :
141 : : /* generate forward declarations necessary to use the hash table */
142 : : #ifdef SH_DECLARE
143 : :
144 : : /* type definitions */
145 : : typedef struct SH_TYPE
146 : : {
147 : : /*
148 : : * Size of data / bucket array, 64 bits to handle UINT32_MAX sized hash
149 : : * tables. Note that the maximum number of elements is lower
150 : : * (SH_MAX_FILLFACTOR)
151 : : */
152 : : uint64 size;
153 : :
154 : : /* how many elements have valid contents */
155 : : uint32 members;
156 : :
157 : : /* mask for bucket and size calculations, based on size */
158 : : uint32 sizemask;
159 : :
160 : : /* boundary after which to grow hashtable */
161 : : uint32 grow_threshold;
162 : :
163 : : /* hash buckets */
164 : : SH_ELEMENT_TYPE *data;
165 : :
166 : : #ifndef SH_RAW_ALLOCATOR
167 : : /* memory context to use for allocations */
168 : : MemoryContext ctx;
169 : : #endif
170 : :
171 : : /* user defined data, useful for callbacks */
172 : : void *private_data;
173 : : } SH_TYPE;
174 : :
175 : : typedef enum SH_STATUS
176 : : {
177 : : SH_STATUS_EMPTY = 0x00,
178 : : SH_STATUS_IN_USE = 0x01
179 : : } SH_STATUS;
180 : :
181 : : typedef struct SH_ITERATOR
182 : : {
183 : : uint32 cur; /* current element */
184 : : uint32 end;
185 : : bool done; /* iterator exhausted? */
186 : : } SH_ITERATOR;
187 : :
188 : : /* externally visible function prototypes */
189 : : #ifdef SH_RAW_ALLOCATOR
190 : : /* <prefix>_hash <prefix>_create(uint32 nelements, void *private_data) */
191 : : SH_SCOPE SH_TYPE *SH_CREATE(uint32 nelements, void *private_data);
192 : : #else
193 : : /*
194 : : * <prefix>_hash <prefix>_create(MemoryContext ctx, uint32 nelements,
195 : : * void *private_data)
196 : : */
197 : : SH_SCOPE SH_TYPE *SH_CREATE(MemoryContext ctx, uint32 nelements,
198 : : void *private_data);
199 : : #endif
200 : :
201 : : /* void <prefix>_destroy(<prefix>_hash *tb) */
202 : : SH_SCOPE void SH_DESTROY(SH_TYPE * tb);
203 : :
204 : : /* void <prefix>_reset(<prefix>_hash *tb) */
205 : : SH_SCOPE void SH_RESET(SH_TYPE * tb);
206 : :
207 : : /* void <prefix>_grow(<prefix>_hash *tb, uint64 newsize) */
208 : : SH_SCOPE void SH_GROW(SH_TYPE * tb, uint64 newsize);
209 : :
210 : : /* <element> *<prefix>_insert(<prefix>_hash *tb, <key> key, bool *found) */
211 : : SH_SCOPE SH_ELEMENT_TYPE *SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found);
212 : :
213 : : /*
214 : : * <element> *<prefix>_insert_hash(<prefix>_hash *tb, <key> key, uint32 hash,
215 : : * bool *found)
216 : : */
217 : : SH_SCOPE SH_ELEMENT_TYPE *SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
218 : : uint32 hash, bool *found);
219 : :
220 : : /* <element> *<prefix>_lookup(<prefix>_hash *tb, <key> key) */
221 : : SH_SCOPE SH_ELEMENT_TYPE *SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key);
222 : :
223 : : /* <element> *<prefix>_lookup_hash(<prefix>_hash *tb, <key> key, uint32 hash) */
224 : : SH_SCOPE SH_ELEMENT_TYPE *SH_LOOKUP_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
225 : : uint32 hash);
226 : :
227 : : /* void <prefix>_delete_item(<prefix>_hash *tb, <element> *entry) */
228 : : SH_SCOPE void SH_DELETE_ITEM(SH_TYPE * tb, SH_ELEMENT_TYPE * entry);
229 : :
230 : : /* bool <prefix>_delete(<prefix>_hash *tb, <key> key) */
231 : : SH_SCOPE bool SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key);
232 : :
233 : : /* void <prefix>_start_iterate(<prefix>_hash *tb, <prefix>_iterator *iter) */
234 : : SH_SCOPE void SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
235 : :
236 : : /*
237 : : * void <prefix>_start_iterate_at(<prefix>_hash *tb, <prefix>_iterator *iter,
238 : : * uint32 at)
239 : : */
240 : : SH_SCOPE void SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at);
241 : :
242 : : /* <element> *<prefix>_iterate(<prefix>_hash *tb, <prefix>_iterator *iter) */
243 : : SH_SCOPE SH_ELEMENT_TYPE *SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
244 : :
245 : : /* size_t <prefix>_estimate_space(double nentries) */
246 : : SH_SCOPE size_t SH_ESTIMATE_SPACE(double nentries);
247 : :
248 : : /* void <prefix>_stat(<prefix>_hash *tb) */
249 : : SH_SCOPE void SH_STAT(SH_TYPE * tb);
250 : :
251 : : #endif /* SH_DECLARE */
252 : :
253 : :
254 : : /* generate implementation of the hash table */
255 : : #ifdef SH_DEFINE
256 : :
257 : : #include "port/pg_bitutils.h"
258 : :
259 : : #ifndef SH_RAW_ALLOCATOR
260 : : #include "utils/memutils.h"
261 : : #endif
262 : :
263 : : /* max data array size,we allow up to PG_UINT32_MAX buckets, including 0 */
264 : : #define SH_MAX_SIZE (((uint64) PG_UINT32_MAX) + 1)
265 : :
266 : : /* normal fillfactor, unless already close to maximum */
267 : : #ifndef SH_FILLFACTOR
268 : : #define SH_FILLFACTOR (0.9)
269 : : #endif
270 : : /* increase fillfactor if we otherwise would error out */
271 : : #define SH_MAX_FILLFACTOR (0.98)
272 : : /* grow if actual and optimal location bigger than */
273 : : #ifndef SH_GROW_MAX_DIB
274 : : #define SH_GROW_MAX_DIB 25
275 : : #endif
276 : : /* grow if more than elements to move when inserting */
277 : : #ifndef SH_GROW_MAX_MOVE
278 : : #define SH_GROW_MAX_MOVE 150
279 : : #endif
280 : : #ifndef SH_GROW_MIN_FILLFACTOR
281 : : /* but do not grow due to SH_GROW_MAX_* if below */
282 : : #define SH_GROW_MIN_FILLFACTOR 0.1
283 : : #endif
284 : :
285 : : #ifdef SH_STORE_HASH
286 : : #define SH_COMPARE_KEYS(tb, ahash, akey, b) (ahash == SH_GET_HASH(tb, b) && SH_EQUAL(tb, b->SH_KEY, akey))
287 : : #else
288 : : #define SH_COMPARE_KEYS(tb, ahash, akey, b) (SH_EQUAL(tb, b->SH_KEY, akey))
289 : : #endif
290 : :
291 : : /*
292 : : * Wrap the following definitions in include guards, to avoid multiple
293 : : * definition errors if this header is included more than once. The rest of
294 : : * the file deliberately has no include guards, because it can be included
295 : : * with different parameters to define functions and types with non-colliding
296 : : * names.
297 : : */
298 : : #ifndef SIMPLEHASH_H
299 : : #define SIMPLEHASH_H
300 : :
301 : : #ifdef FRONTEND
302 : : #define sh_error(...) pg_fatal(__VA_ARGS__)
303 : : #define sh_log(...) pg_log_info(__VA_ARGS__)
304 : : #else
305 : : #define sh_error(...) elog(ERROR, __VA_ARGS__)
306 : : #define sh_log(...) elog(LOG, __VA_ARGS__)
307 : : #endif
308 : :
309 : : #endif
310 : :
311 : : /*
312 : : * Compute allocation size for hashtable. Result can be passed to
313 : : * SH_UPDATE_PARAMETERS. (Keep SH_ESTIMATE_SPACE in sync with this!)
314 : : */
315 : : static inline uint64
900 jdavis@postgresql.or 316 :CBC 447880 : SH_COMPUTE_SIZE(uint64 newsize)
317 : : {
318 : : uint64 size;
319 : :
320 : : /* supporting zero sized hashes would complicate matters */
3490 andres@anarazel.de 321 : 447880 : size = Max(newsize, 2);
322 : :
323 : : /* round up size to the next power of 2, that's how bucketing works */
2218 drowley@postgresql.o 324 : 447880 : size = pg_nextpower2_64(size);
3490 andres@anarazel.de 325 [ - + ]: 447880 : Assert(size <= SH_MAX_SIZE);
326 : :
327 : : /*
328 : : * Verify that allocation of ->data is possible on this platform, without
329 : : * overflowing Size.
330 : : */
1656 tgl@sss.pgh.pa.us 331 [ - + ]: 447880 : if (unlikely((((uint64) sizeof(SH_ELEMENT_TYPE)) * size) >= SIZE_MAX / 2))
2331 rhaas@postgresql.org 332 [ # # ]:UBC 0 : sh_error("hash table too large");
333 : :
900 jdavis@postgresql.or 334 :CBC 447880 : return size;
335 : : }
336 : :
337 : : /*
338 : : * Update sizing parameters for hashtable. Called when creating and growing
339 : : * the hashtable.
340 : : */
341 : : static inline void
342 : 223940 : SH_UPDATE_PARAMETERS(SH_TYPE * tb, uint64 newsize)
343 : : {
344 : 223940 : uint64 size = SH_COMPUTE_SIZE(newsize);
345 : :
346 : : /* now set size */
3490 andres@anarazel.de 347 : 223940 : tb->size = size;
1726 drowley@postgresql.o 348 : 223940 : tb->sizemask = (uint32) (size - 1);
349 : :
350 : : /*
351 : : * Compute the next threshold at which we need to grow the hash table
352 : : * again.
353 : : */
3490 andres@anarazel.de 354 [ - + ]: 223940 : if (tb->size == SH_MAX_SIZE)
3490 andres@anarazel.de 355 :UBC 0 : tb->grow_threshold = ((double) tb->size) * SH_MAX_FILLFACTOR;
356 : : else
3490 andres@anarazel.de 357 :CBC 223940 : tb->grow_threshold = ((double) tb->size) * SH_FILLFACTOR;
358 : 223940 : }
359 : :
360 : : /* return the optimal bucket for the hash */
361 : : static inline uint32
3275 bruce@momjian.us 362 : 36020809 : SH_INITIAL_BUCKET(SH_TYPE * tb, uint32 hash)
363 : : {
3490 andres@anarazel.de 364 : 36020809 : return hash & tb->sizemask;
365 : : }
366 : :
367 : : /* return next bucket after the current, handling wraparound */
368 : : static inline uint32
3275 bruce@momjian.us 369 : 16487565 : SH_NEXT(SH_TYPE * tb, uint32 curelem, uint32 startelem)
370 : : {
3490 andres@anarazel.de 371 : 16487565 : curelem = (curelem + 1) & tb->sizemask;
372 : :
373 [ - + ]: 16487565 : Assert(curelem != startelem);
374 : :
375 : 16487565 : return curelem;
376 : : }
377 : :
378 : : /* return bucket before the current, handling wraparound */
379 : : static inline uint32
3275 bruce@momjian.us 380 : 2162766 : SH_PREV(SH_TYPE * tb, uint32 curelem, uint32 startelem)
381 : : {
3490 andres@anarazel.de 382 : 2162766 : curelem = (curelem - 1) & tb->sizemask;
383 : :
384 [ - + ]: 2162766 : Assert(curelem != startelem);
385 : :
386 : 2162766 : return curelem;
387 : : }
388 : :
389 : : /* return distance between bucket and its optimal position */
390 : : static inline uint32
3275 bruce@momjian.us 391 : 6379610 : SH_DISTANCE_FROM_OPTIMAL(SH_TYPE * tb, uint32 optimal, uint32 bucket)
392 : : {
3490 andres@anarazel.de 393 [ + + ]: 6379610 : if (optimal <= bucket)
394 : 6346010 : return bucket - optimal;
395 : : else
396 : 33600 : return (tb->size + bucket) - optimal;
397 : : }
398 : :
399 : : static inline uint32
3275 bruce@momjian.us 400 : 7388541 : SH_ENTRY_HASH(SH_TYPE * tb, SH_ELEMENT_TYPE * entry)
401 : : {
402 : : #ifdef SH_STORE_HASH
3490 andres@anarazel.de 403 : 3461046 : return SH_GET_HASH(tb, entry);
404 : : #else
405 : 3927495 : return SH_HASH_KEY(tb, entry->SH_KEY);
406 : : #endif
407 : : }
408 : :
409 : : /* default memory allocator function */
410 : : static inline void *SH_ALLOCATE(SH_TYPE * type, Size size);
411 : : static inline void SH_FREE(SH_TYPE * type, void *pointer);
412 : :
413 : : #ifndef SH_USE_NONDEFAULT_ALLOCATOR
414 : :
415 : : /* default memory allocator function */
416 : : static inline void *
3275 bruce@momjian.us 417 : 218110 : SH_ALLOCATE(SH_TYPE * type, Size size)
418 : : {
419 : : #ifdef SH_RAW_ALLOCATOR
2331 rhaas@postgresql.org 420 : 514 : return SH_RAW_ALLOCATOR(size);
421 : : #else
3374 422 : 217596 : return MemoryContextAllocExtended(type->ctx, size,
423 : : MCXT_ALLOC_HUGE | MCXT_ALLOC_ZERO);
424 : : #endif
425 : : }
426 : :
427 : : /* default memory free function */
428 : : static inline void
3275 bruce@momjian.us 429 : 31893 : SH_FREE(SH_TYPE * type, void *pointer)
430 : : {
3374 rhaas@postgresql.org 431 : 31893 : pfree(pointer);
432 : 31893 : }
433 : :
434 : : #endif
435 : :
436 : : /*
437 : : * Create a hash table with enough space for `nelements` distinct members.
438 : : * Memory for the hash table is allocated from the passed-in context. If
439 : : * desired, the array of elements can be allocated using a passed-in allocator;
440 : : * this could be useful in order to place the array of elements in a shared
441 : : * memory, or in a context that will outlive the rest of the hash table.
442 : : * Memory other than for the array of elements will still be allocated from
443 : : * the passed-in context.
444 : : */
445 : : #ifdef SH_RAW_ALLOCATOR
446 : : SH_SCOPE SH_TYPE *
2331 447 : 509 : SH_CREATE(uint32 nelements, void *private_data)
448 : : #else
449 : : SH_SCOPE SH_TYPE *
3372 450 : 220612 : SH_CREATE(MemoryContext ctx, uint32 nelements, void *private_data)
451 : : #endif
452 : : {
453 : : SH_TYPE *tb;
454 : : uint64 size;
455 : :
456 : : #ifdef SH_RAW_ALLOCATOR
1279 tgl@sss.pgh.pa.us 457 : 509 : tb = (SH_TYPE *) SH_RAW_ALLOCATOR(sizeof(SH_TYPE));
458 : : #else
459 : 220612 : tb = (SH_TYPE *) MemoryContextAllocZero(ctx, sizeof(SH_TYPE));
3490 andres@anarazel.de 460 : 220612 : tb->ctx = ctx;
461 : : #endif
3372 rhaas@postgresql.org 462 : 221121 : tb->private_data = private_data;
463 : :
464 : : /* increase nelements by fillfactor, want to store nelements elements */
3490 andres@anarazel.de 465 [ - + ]: 221121 : size = Min((double) SH_MAX_SIZE, ((double) nelements) / SH_FILLFACTOR);
466 : :
900 jdavis@postgresql.or 467 : 221121 : size = SH_COMPUTE_SIZE(size);
468 : :
469 : 221121 : tb->data = (SH_ELEMENT_TYPE *) SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * size);
470 : :
471 : 221121 : SH_UPDATE_PARAMETERS(tb, size);
3490 andres@anarazel.de 472 : 221121 : return tb;
473 : : }
474 : :
475 : : /* destroy a previously created hash table */
476 : : SH_SCOPE void
3275 bruce@momjian.us 477 : 34904 : SH_DESTROY(SH_TYPE * tb)
478 : : {
3374 rhaas@postgresql.org 479 : 34904 : SH_FREE(tb, tb->data);
3490 andres@anarazel.de 480 : 34904 : pfree(tb);
481 : 34904 : }
482 : :
483 : : /* reset the contents of a previously created hash table */
484 : : SH_SCOPE void
2642 485 : 129216 : SH_RESET(SH_TYPE * tb)
486 : : {
487 : 129216 : memset(tb->data, 0, sizeof(SH_ELEMENT_TYPE) * tb->size);
488 : 129216 : tb->members = 0;
489 : 129216 : }
490 : :
491 : : /*
492 : : * Grow a hash table to at least `newsize` buckets.
493 : : *
494 : : * Usually this will automatically be called by insertions/deletions, when
495 : : * necessary. But resizing to the exact input size can be advantageous
496 : : * performance-wise, when known at some point.
497 : : */
498 : : SH_SCOPE void
1726 drowley@postgresql.o 499 : 2819 : SH_GROW(SH_TYPE * tb, uint64 newsize)
500 : : {
3490 andres@anarazel.de 501 : 2819 : uint64 oldsize = tb->size;
502 : 2819 : SH_ELEMENT_TYPE *olddata = tb->data;
503 : : SH_ELEMENT_TYPE *newdata;
504 : : uint32 i;
505 : 2819 : uint32 startelem = 0;
506 : : uint32 copyelem;
507 : :
2218 drowley@postgresql.o 508 [ - + ]: 2819 : Assert(oldsize == pg_nextpower2_64(oldsize));
3490 andres@anarazel.de 509 [ - + ]: 2819 : Assert(oldsize != SH_MAX_SIZE);
510 [ - + ]: 2819 : Assert(oldsize < newsize);
511 : :
900 jdavis@postgresql.or 512 : 2819 : newsize = SH_COMPUTE_SIZE(newsize);
513 : :
514 : 2819 : tb->data = (SH_ELEMENT_TYPE *) SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * newsize);
515 : :
516 : : /*
517 : : * Update parameters for new table after allocation succeeds to avoid
518 : : * inconsistent state on OOM.
519 : : */
520 : 2819 : SH_UPDATE_PARAMETERS(tb, newsize);
521 : :
3490 andres@anarazel.de 522 : 2819 : newdata = tb->data;
523 : :
524 : : /*
525 : : * Copy entries from the old data to newdata. We theoretically could use
526 : : * SH_INSERT here, to avoid code duplication, but that's more general than
527 : : * we need. We neither want tb->members increased, nor do we need to do
528 : : * deal with deleted elements, nor do we need to compare keys. So a
529 : : * special-cased implementation is lot faster. As resizing can be time
530 : : * consuming and frequent, that's worthwhile to optimize.
531 : : *
532 : : * To be able to simply move entries over, we have to start not at the
533 : : * first bucket (i.e olddata[0]), but find the first bucket that's either
534 : : * empty, or is occupied by an entry at its optimal position. Such a
535 : : * bucket has to exist in any table with a load factor under 1, as not all
536 : : * buckets are occupied, i.e. there always has to be an empty bucket. By
537 : : * starting at such a bucket we can move the entries to the larger table,
538 : : * without having to deal with conflicts.
539 : : */
540 : :
541 : : /* search for the first element in the hash that's not wrapped around */
542 [ + - ]: 27375 : for (i = 0; i < oldsize; i++)
543 : : {
544 : 27375 : SH_ELEMENT_TYPE *oldentry = &olddata[i];
545 : : uint32 hash;
546 : : uint32 optimal;
547 : :
548 [ + + ]: 27375 : if (oldentry->status != SH_STATUS_IN_USE)
549 : : {
550 : 1484 : startelem = i;
551 : 1484 : break;
552 : : }
553 : :
554 : 25891 : hash = SH_ENTRY_HASH(tb, oldentry);
555 : 25891 : optimal = SH_INITIAL_BUCKET(tb, hash);
556 : :
557 [ + + ]: 25891 : if (optimal == i)
558 : : {
559 : 1335 : startelem = i;
560 : 1335 : break;
561 : : }
562 : : }
563 : :
564 : : /* and copy all elements in the old table */
565 : 2819 : copyelem = startelem;
566 [ + + ]: 712107 : for (i = 0; i < oldsize; i++)
567 : : {
568 : 709288 : SH_ELEMENT_TYPE *oldentry = &olddata[copyelem];
569 : :
570 [ + + ]: 709288 : if (oldentry->status == SH_STATUS_IN_USE)
571 : : {
572 : : uint32 hash;
573 : : uint32 startelem2;
574 : : uint32 curelem;
575 : : SH_ELEMENT_TYPE *newentry;
576 : :
577 : 620358 : hash = SH_ENTRY_HASH(tb, oldentry);
1308 drowley@postgresql.o 578 : 620358 : startelem2 = SH_INITIAL_BUCKET(tb, hash);
579 : 620358 : curelem = startelem2;
580 : :
581 : : /* find empty element to put data into */
582 : : while (true)
583 : : {
3490 andres@anarazel.de 584 : 856431 : newentry = &newdata[curelem];
585 : :
586 [ + + ]: 856431 : if (newentry->status == SH_STATUS_EMPTY)
587 : : {
588 : 620358 : break;
589 : : }
590 : :
1308 drowley@postgresql.o 591 : 236073 : curelem = SH_NEXT(tb, curelem, startelem2);
592 : : }
593 : :
594 : : /* copy entry to new slot */
3490 andres@anarazel.de 595 : 620358 : memcpy(newentry, oldentry, sizeof(SH_ELEMENT_TYPE));
596 : : }
597 : :
598 : : /* can't use SH_NEXT here, would use new size */
599 : 709288 : copyelem++;
600 [ + + ]: 709288 : if (copyelem >= oldsize)
601 : : {
602 : 2819 : copyelem = 0;
603 : : }
604 : : }
605 : :
3374 rhaas@postgresql.org 606 : 2819 : SH_FREE(tb, olddata);
3490 andres@anarazel.de 607 : 2819 : }
608 : :
609 : : /*
610 : : * This is a separate static inline function, so it can be reliably be inlined
611 : : * into its wrapper functions even if SH_SCOPE is extern.
612 : : */
613 : : static inline SH_ELEMENT_TYPE *
2469 jdavis@postgresql.or 614 : 16228489 : SH_INSERT_HASH_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
615 : : {
616 : : uint32 startelem;
617 : : uint32 curelem;
618 : : SH_ELEMENT_TYPE *data;
619 : : uint32 insertdist;
620 : :
3450 andres@anarazel.de 621 : 129 : restart:
622 : 16228618 : insertdist = 0;
623 : :
624 : : /*
625 : : * We do the grow check even if the key is actually present, to avoid
626 : : * doing the check inside the loop. This also lets us avoid having to
627 : : * re-find our position in the hashtable after resizing.
628 : : *
629 : : * Note that this also reached when resizing the table due to
630 : : * SH_GROW_MAX_DIB / SH_GROW_MAX_MOVE.
631 : : */
3490 632 [ + + ]: 16228618 : if (unlikely(tb->members >= tb->grow_threshold))
633 : : {
1656 tgl@sss.pgh.pa.us 634 [ - + ]: 2819 : if (unlikely(tb->size == SH_MAX_SIZE))
2331 rhaas@postgresql.org 635 [ # # ]:UBC 0 : sh_error("hash table size exceeded");
636 : :
637 : : /*
638 : : * When optimizing, it can be very useful to print these out.
639 : : */
640 : : /* SH_STAT(tb); */
3490 andres@anarazel.de 641 :CBC 2819 : SH_GROW(tb, tb->size * 2);
642 : : /* SH_STAT(tb); */
643 : : }
644 : :
645 : : /* perform insert, start bucket search at optimal location */
646 : 16228618 : data = tb->data;
647 : 16228618 : startelem = SH_INITIAL_BUCKET(tb, hash);
648 : 16228618 : curelem = startelem;
649 : : while (true)
650 : 6023662 : {
651 : : uint32 curdist;
652 : : uint32 curhash;
653 : : uint32 curoptimal;
654 : 22252280 : SH_ELEMENT_TYPE *entry = &data[curelem];
655 : :
656 : : /* any empty bucket can directly be used */
657 [ + + ]: 22252280 : if (entry->status == SH_STATUS_EMPTY)
658 : : {
659 : 3499114 : tb->members++;
660 : 3499114 : entry->SH_KEY = key;
661 : : #ifdef SH_STORE_HASH
662 : 1931408 : SH_GET_HASH(tb, entry) = hash;
663 : : #endif
664 : 3499114 : entry->status = SH_STATUS_IN_USE;
665 : 3499114 : *found = false;
666 : 3499114 : return entry;
667 : : }
668 : :
669 : : /*
670 : : * If the bucket is not empty, we either found a match (in which case
671 : : * we're done), or we have to decide whether to skip over or move the
672 : : * colliding entry. When the colliding element's distance to its
673 : : * optimal position is smaller than the to-be-inserted entry's, we
674 : : * shift the colliding entry (and its followers) forward by one.
675 : : */
676 : :
677 [ + + + + ]: 18753166 : if (SH_COMPARE_KEYS(tb, hash, key, entry))
[ + + + + ]
[ + + + +
+ - ]
678 : : {
679 [ - + ]: 12373556 : Assert(entry->status == SH_STATUS_IN_USE);
680 : 12373556 : *found = true;
681 : 12373556 : return entry;
682 : : }
683 : :
684 : 6379610 : curhash = SH_ENTRY_HASH(tb, entry);
685 : 6379610 : curoptimal = SH_INITIAL_BUCKET(tb, curhash);
686 : 6379610 : curdist = SH_DISTANCE_FROM_OPTIMAL(tb, curoptimal, curelem);
687 : :
688 [ + + ]: 6379610 : if (insertdist > curdist)
689 : : {
690 : 355948 : SH_ELEMENT_TYPE *lastentry = entry;
691 : 355948 : uint32 emptyelem = curelem;
692 : : uint32 moveelem;
3450 693 : 355948 : int32 emptydist = 0;
694 : :
695 : : /* find next empty bucket */
696 : : while (true)
3490 697 : 1826297 : {
698 : : SH_ELEMENT_TYPE *emptyentry;
699 : :
700 : 2182245 : emptyelem = SH_NEXT(tb, emptyelem, startelem);
701 : 2182245 : emptyentry = &data[emptyelem];
702 : :
703 [ + + ]: 2182245 : if (emptyentry->status == SH_STATUS_EMPTY)
704 : : {
705 : 355819 : lastentry = emptyentry;
706 : 355819 : break;
707 : : }
708 : :
709 : : /*
710 : : * To avoid negative consequences from overly imbalanced
711 : : * hashtables, grow the hashtable if collisions would require
712 : : * us to move a lot of entries. The most likely cause of such
713 : : * imbalance is filling a (currently) small table, from a
714 : : * currently big one, in hash-table order. Don't grow if the
715 : : * hashtable would be too empty, to prevent quick space
716 : : * explosion for some weird edge cases.
717 : : */
3018 718 [ + + ]: 1826426 : if (unlikely(++emptydist > SH_GROW_MAX_MOVE) &&
719 [ + - ]: 129 : ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
720 : : {
3450 721 : 129 : tb->grow_threshold = 0;
722 : 129 : goto restart;
723 : : }
724 : : }
725 : :
726 : : /* shift forward, starting at last occupied element */
727 : :
728 : : /*
729 : : * TODO: This could be optimized to be one memcpy in many cases,
730 : : * excepting wrapping around at the end of ->data. Hasn't shown up
731 : : * in profiles so far though.
732 : : */
3490 733 : 355819 : moveelem = emptyelem;
734 [ + + ]: 2518585 : while (moveelem != curelem)
735 : : {
736 : : SH_ELEMENT_TYPE *moveentry;
737 : :
738 : 2162766 : moveelem = SH_PREV(tb, moveelem, startelem);
739 : 2162766 : moveentry = &data[moveelem];
740 : :
741 : 2162766 : memcpy(lastentry, moveentry, sizeof(SH_ELEMENT_TYPE));
742 : 2162766 : lastentry = moveentry;
743 : : }
744 : :
745 : : /* and fill the now empty spot */
746 : 355819 : tb->members++;
747 : :
748 : 355819 : entry->SH_KEY = key;
749 : : #ifdef SH_STORE_HASH
750 : 243515 : SH_GET_HASH(tb, entry) = hash;
751 : : #endif
752 : 355819 : entry->status = SH_STATUS_IN_USE;
753 : 355819 : *found = false;
754 : 355819 : return entry;
755 : : }
756 : :
757 : 6023662 : curelem = SH_NEXT(tb, curelem, startelem);
758 : 6023662 : insertdist++;
759 : :
760 : : /*
761 : : * To avoid negative consequences from overly imbalanced hashtables,
762 : : * grow the hashtable if collisions lead to large runs. The most
763 : : * likely cause of such imbalance is filling a (currently) small
764 : : * table, from a currently big one, in hash-table order. Don't grow
765 : : * if the hashtable would be too empty, to prevent quick space
766 : : * explosion for some weird edge cases.
767 : : */
3018 768 [ - + ]: 6023662 : if (unlikely(insertdist > SH_GROW_MAX_DIB) &&
3018 andres@anarazel.de 769 [ # # ]:UBC 0 : ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
770 : : {
3450 771 : 0 : tb->grow_threshold = 0;
772 : 0 : goto restart;
773 : : }
774 : : }
775 : : }
776 : :
777 : : /*
778 : : * Insert the key into the hash-table, set *found to true if the key already
779 : : * exists, false otherwise. Returns the hash-table entry in either case.
780 : : */
781 : : SH_SCOPE SH_ELEMENT_TYPE *
2469 jdavis@postgresql.or 782 :CBC 11016036 : SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found)
783 : : {
2182 tgl@sss.pgh.pa.us 784 : 11016036 : uint32 hash = SH_HASH_KEY(tb, key);
785 : :
2469 jdavis@postgresql.or 786 : 11016036 : return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
787 : : }
788 : :
789 : : /*
790 : : * Insert the key into the hash-table using an already-calculated hash. Set
791 : : * *found to true if the key already exists, false otherwise. Returns the
792 : : * hash-table entry in either case.
793 : : */
794 : : SH_SCOPE SH_ELEMENT_TYPE *
795 : 5212453 : SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
796 : : {
797 : 5212453 : return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
798 : : }
799 : :
800 : : /*
801 : : * This is a separate static inline function, so it can be reliably be inlined
802 : : * into its wrapper functions even if SH_SCOPE is extern.
803 : : */
804 : : static inline SH_ELEMENT_TYPE *
805 : 11324371 : SH_LOOKUP_HASH_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash)
806 : : {
3490 andres@anarazel.de 807 : 11324371 : const uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
808 : 11324371 : uint32 curelem = startelem;
809 : :
810 : : while (true)
811 : 6475832 : {
812 : 17800203 : SH_ELEMENT_TYPE *entry = &tb->data[curelem];
813 : :
814 [ + + ]: 17800203 : if (entry->status == SH_STATUS_EMPTY)
815 : : {
816 : 3152828 : return NULL;
817 : : }
818 : :
819 [ - + ]: 14647375 : Assert(entry->status == SH_STATUS_IN_USE);
820 : :
821 [ + + ]: 14647375 : if (SH_COMPARE_KEYS(tb, hash, key, entry))
[ + + + - ]
[ + + + +
+ - ]
822 : 8171543 : return entry;
823 : :
824 : : /*
825 : : * TODO: we could stop search based on distance. If the current
826 : : * buckets's distance-from-optimal is smaller than what we've skipped
827 : : * already, the entry doesn't exist. Probably only do so if
828 : : * SH_STORE_HASH is defined, to avoid re-computing hashes?
829 : : */
830 : :
831 : 6475832 : curelem = SH_NEXT(tb, curelem, startelem);
832 : : }
833 : : }
834 : :
835 : : /*
836 : : * Lookup entry in hash table. Returns NULL if key not present.
837 : : */
838 : : SH_SCOPE SH_ELEMENT_TYPE *
2469 jdavis@postgresql.or 839 : 10274554 : SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key)
840 : : {
2182 tgl@sss.pgh.pa.us 841 : 10274554 : uint32 hash = SH_HASH_KEY(tb, key);
842 : :
2469 jdavis@postgresql.or 843 : 10274554 : return SH_LOOKUP_HASH_INTERNAL(tb, key, hash);
844 : : }
845 : :
846 : : /*
847 : : * Lookup entry in hash table using an already-calculated hash.
848 : : *
849 : : * Returns NULL if key not present.
850 : : */
851 : : SH_SCOPE SH_ELEMENT_TYPE *
852 : 1049817 : SH_LOOKUP_HASH(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash)
853 : : {
854 : 1049817 : return SH_LOOKUP_HASH_INTERNAL(tb, key, hash);
855 : : }
856 : :
857 : : /*
858 : : * Delete entry from hash table by key. Returns whether to-be-deleted key was
859 : : * present.
860 : : */
861 : : SH_SCOPE bool
3275 bruce@momjian.us 862 : 1079279 : SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key)
863 : : {
3490 andres@anarazel.de 864 : 1079279 : uint32 hash = SH_HASH_KEY(tb, key);
865 : 1079279 : uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
866 : 1079279 : uint32 curelem = startelem;
867 : :
868 : : while (true)
869 : 289204 : {
870 : 1368483 : SH_ELEMENT_TYPE *entry = &tb->data[curelem];
871 : :
872 [ + + ]: 1368483 : if (entry->status == SH_STATUS_EMPTY)
873 : 97098 : return false;
874 : :
875 [ + - ]: 2512335 : if (entry->status == SH_STATUS_IN_USE &&
[ + - + + ]
876 [ + + ]: 1271385 : SH_COMPARE_KEYS(tb, hash, key, entry))
[ # # # # ]
877 : : {
878 : 982181 : SH_ELEMENT_TYPE *lastentry = entry;
879 : :
880 : 982181 : tb->members--;
881 : :
882 : : /*
883 : : * Backward shift following elements till either an empty element
884 : : * or an element at its optimal position is encountered.
885 : : *
886 : : * While that sounds expensive, the average chain length is short,
887 : : * and deletions would otherwise require tombstones.
888 : : */
889 : : while (true)
890 : 86192 : {
891 : : SH_ELEMENT_TYPE *curentry;
892 : : uint32 curhash;
893 : : uint32 curoptimal;
894 : :
895 : 1068373 : curelem = SH_NEXT(tb, curelem, startelem);
896 : 1068373 : curentry = &tb->data[curelem];
897 : :
898 [ + + ]: 1068373 : if (curentry->status != SH_STATUS_IN_USE)
899 : : {
900 : 931720 : lastentry->status = SH_STATUS_EMPTY;
901 : 931720 : break;
902 : : }
903 : :
904 : 136653 : curhash = SH_ENTRY_HASH(tb, curentry);
905 : 136653 : curoptimal = SH_INITIAL_BUCKET(tb, curhash);
906 : :
907 : : /* current is at optimal position, done */
908 [ + + ]: 136653 : if (curoptimal == curelem)
909 : : {
910 : 50461 : lastentry->status = SH_STATUS_EMPTY;
911 : 50461 : break;
912 : : }
913 : :
914 : : /* shift */
915 : 86192 : memcpy(lastentry, curentry, sizeof(SH_ELEMENT_TYPE));
916 : :
917 : 86192 : lastentry = curentry;
918 : : }
919 : :
920 : 982181 : return true;
921 : : }
922 : :
923 : : /* TODO: return false; if distance too big */
924 : :
925 : 289204 : curelem = SH_NEXT(tb, curelem, startelem);
926 : : }
927 : : }
928 : :
929 : : /*
930 : : * Delete entry from hash table by entry pointer
931 : : */
932 : : SH_SCOPE void
1862 drowley@postgresql.o 933 : 190570 : SH_DELETE_ITEM(SH_TYPE * tb, SH_ELEMENT_TYPE * entry)
934 : : {
935 : 190570 : SH_ELEMENT_TYPE *lastentry = entry;
936 : 190570 : uint32 hash = SH_ENTRY_HASH(tb, entry);
937 : 190570 : uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
938 : : uint32 curelem;
939 : :
940 : : /* Calculate the index of 'entry' */
941 : 190570 : curelem = entry - &tb->data[0];
942 : :
943 : 190570 : tb->members--;
944 : :
945 : : /*
946 : : * Backward shift following elements till either an empty element or an
947 : : * element at its optimal position is encountered.
948 : : *
949 : : * While that sounds expensive, the average chain length is short, and
950 : : * deletions would otherwise require tombstones.
951 : : */
952 : : while (true)
953 : 21606 : {
954 : : SH_ELEMENT_TYPE *curentry;
955 : : uint32 curhash;
956 : : uint32 curoptimal;
957 : :
958 : 212176 : curelem = SH_NEXT(tb, curelem, startelem);
959 : 212176 : curentry = &tb->data[curelem];
960 : :
961 [ + + ]: 212176 : if (curentry->status != SH_STATUS_IN_USE)
962 : : {
963 : 176717 : lastentry->status = SH_STATUS_EMPTY;
964 : 176717 : break;
965 : : }
966 : :
967 : 35459 : curhash = SH_ENTRY_HASH(tb, curentry);
968 : 35459 : curoptimal = SH_INITIAL_BUCKET(tb, curhash);
969 : :
970 : : /* current is at optimal position, done */
971 [ + + ]: 35459 : if (curoptimal == curelem)
972 : : {
973 : 13853 : lastentry->status = SH_STATUS_EMPTY;
974 : 13853 : break;
975 : : }
976 : :
977 : : /* shift */
978 : 21606 : memcpy(lastentry, curentry, sizeof(SH_ELEMENT_TYPE));
979 : :
980 : 21606 : lastentry = curentry;
981 : : }
982 : 190570 : }
983 : :
984 : : /*
985 : : * Initialize iterator.
986 : : */
987 : : SH_SCOPE void
3275 bruce@momjian.us 988 : 322717 : SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
989 : : {
3490 andres@anarazel.de 990 : 322717 : uint64 startelem = PG_UINT64_MAX;
991 : :
992 : : /*
993 : : * Search for the first empty element. As deletions during iterations are
994 : : * supported, we want to start/end at an element that cannot be affected
995 : : * by elements being shifted.
996 : : */
1034 997 [ + - ]: 369093 : for (uint32 i = 0; i < tb->size; i++)
998 : : {
3490 999 : 369093 : SH_ELEMENT_TYPE *entry = &tb->data[i];
1000 : :
1001 [ + + ]: 369093 : if (entry->status != SH_STATUS_IN_USE)
1002 : : {
1003 : 322717 : startelem = i;
1004 : 322717 : break;
1005 : : }
1006 : : }
1007 : :
1008 : : /* we should have found an empty element */
1009 [ - + ]: 322717 : Assert(startelem < SH_MAX_SIZE);
1010 : :
1011 : : /*
1012 : : * Iterate backwards, that allows the current element to be deleted, even
1013 : : * if there are backward shifts
1014 : : */
1015 : 322717 : iter->cur = startelem;
1016 : 322717 : iter->end = iter->cur;
1017 : 322717 : iter->done = false;
1018 : 322717 : }
1019 : :
1020 : : /*
1021 : : * Initialize iterator to a specific bucket. That's really only useful for
1022 : : * cases where callers are partially iterating over the hashspace, and that
1023 : : * iteration deletes and inserts elements based on visited entries. Doing that
1024 : : * repeatedly could lead to an unbalanced keyspace when always starting at the
1025 : : * same position.
1026 : : */
1027 : : SH_SCOPE void
3275 bruce@momjian.us 1028 : 24 : SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at)
1029 : : {
1030 : : /*
1031 : : * Iterate backwards, that allows the current element to be deleted, even
1032 : : * if there are backward shifts.
1033 : : */
3240 tgl@sss.pgh.pa.us 1034 : 24 : iter->cur = at & tb->sizemask; /* ensure at is within a valid range */
3490 andres@anarazel.de 1035 : 24 : iter->end = iter->cur;
1036 : 24 : iter->done = false;
1037 : 24 : }
1038 : :
1039 : : /*
1040 : : * Iterate over all entries in the hash-table. Return the next occupied entry,
1041 : : * or NULL if done.
1042 : : *
1043 : : * During iteration the current entry in the hash table may be deleted,
1044 : : * without leading to elements being skipped or returned twice. Additionally
1045 : : * the rest of the table may be modified (i.e. there can be insertions or
1046 : : * deletions), but if so, there's neither a guarantee that all nodes are
1047 : : * visited at least once, nor a guarantee that a node is visited at most once.
1048 : : */
1049 : : SH_SCOPE SH_ELEMENT_TYPE *
3275 bruce@momjian.us 1050 : 2819093 : SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
1051 : : {
1052 : : /* validate sanity of the given iterator */
199 drowley@postgresql.o 1053 [ - + ]: 2819093 : Assert(iter->cur < tb->size);
1054 [ - + ]: 2819093 : Assert(iter->end < tb->size);
1055 : :
3490 andres@anarazel.de 1056 [ + + ]: 37927506 : while (!iter->done)
1057 : : {
1058 : : SH_ELEMENT_TYPE *elem;
1059 : :
1060 : 37604926 : elem = &tb->data[iter->cur];
1061 : :
1062 : : /* next element in backward direction */
1063 : 37604926 : iter->cur = (iter->cur - 1) & tb->sizemask;
1064 : :
1065 [ + + ]: 37604926 : if ((iter->cur & tb->sizemask) == (iter->end & tb->sizemask))
1066 : 322606 : iter->done = true;
1067 [ + + ]: 37604926 : if (elem->status == SH_STATUS_IN_USE)
1068 : : {
1069 : 2496513 : return elem;
1070 : : }
1071 : : }
1072 : :
1073 : 322580 : return NULL;
1074 : : }
1075 : :
1076 : : /*
1077 : : * Estimate the amount of space needed for a hashtable with nentries entries.
1078 : : * Return SIZE_MAX if that's too many entries.
1079 : : *
1080 : : * nentries is "double" because this is meant for use by the planner,
1081 : : * which typically works with double rowcount estimates. So we'd need to
1082 : : * clamp to integer somewhere and that might as well be here. We do expect
1083 : : * the value not to be NaN or negative, else the result will be garbage.
1084 : : */
1085 : : SH_SCOPE size_t
184 tgl@sss.pgh.pa.us 1086 :GNC 3964 : SH_ESTIMATE_SPACE(double nentries)
1087 : : {
1088 : : uint64 size;
1089 : : uint64 space;
1090 : :
1091 : : /* scale request by SH_FILLFACTOR, as SH_CREATE does */
1092 : 3964 : nentries = nentries / SH_FILLFACTOR;
1093 : :
1094 : : /* fail if we'd overrun SH_MAX_SIZE entries */
1095 [ + + ]: 3964 : if (nentries >= SH_MAX_SIZE)
1096 : 5 : return SIZE_MAX;
1097 : :
1098 : : /* should be safe to convert to uint64 */
1099 : 3959 : size = (uint64) nentries;
1100 : :
1101 : : /* supporting zero sized hashes would complicate matters */
1102 : 3959 : size = Max(size, 2);
1103 : :
1104 : : /* round up size to the next power of 2, that's how bucketing works */
1105 : 3959 : size = pg_nextpower2_64(size);
1106 : :
1107 : : /* calculate space needed for ->data */
1108 : 3959 : space = ((uint64) sizeof(SH_ELEMENT_TYPE)) * size;
1109 : :
1110 : : /* verify that allocation of ->data is possible on this platform */
1111 [ - + ]: 3959 : if (space >= SIZE_MAX / 2)
184 tgl@sss.pgh.pa.us 1112 :UNC 0 : return SIZE_MAX;
1113 : :
184 tgl@sss.pgh.pa.us 1114 :GNC 3959 : return (size_t) space + sizeof(SH_TYPE);
1115 : : }
1116 : :
1117 : : /*
1118 : : * Report some statistics about the state of the hashtable. For
1119 : : * debugging/profiling purposes only.
1120 : : */
1121 : : SH_SCOPE void
3275 bruce@momjian.us 1122 :UBC 0 : SH_STAT(SH_TYPE * tb)
1123 : : {
3490 andres@anarazel.de 1124 : 0 : uint32 max_chain_length = 0;
1125 : 0 : uint32 total_chain_length = 0;
1126 : : double avg_chain_length;
1127 : : double fillfactor;
1128 : : uint32 i;
1129 : :
1279 tgl@sss.pgh.pa.us 1130 : 0 : uint32 *collisions = (uint32 *) palloc0(tb->size * sizeof(uint32));
3490 andres@anarazel.de 1131 : 0 : uint32 total_collisions = 0;
1132 : 0 : uint32 max_collisions = 0;
1133 : : double avg_collisions;
1134 : :
1135 [ # # ]: 0 : for (i = 0; i < tb->size; i++)
1136 : : {
1137 : : uint32 hash;
1138 : : uint32 optimal;
1139 : : uint32 dist;
1140 : : SH_ELEMENT_TYPE *elem;
1141 : :
1142 : 0 : elem = &tb->data[i];
1143 : :
1144 [ # # ]: 0 : if (elem->status != SH_STATUS_IN_USE)
1145 : 0 : continue;
1146 : :
1147 : 0 : hash = SH_ENTRY_HASH(tb, elem);
1148 : 0 : optimal = SH_INITIAL_BUCKET(tb, hash);
1149 : 0 : dist = SH_DISTANCE_FROM_OPTIMAL(tb, optimal, i);
1150 : :
1151 [ # # ]: 0 : if (dist > max_chain_length)
1152 : 0 : max_chain_length = dist;
1153 : 0 : total_chain_length += dist;
1154 : :
1155 : 0 : collisions[optimal]++;
1156 : : }
1157 : :
1158 [ # # ]: 0 : for (i = 0; i < tb->size; i++)
1159 : : {
1160 : 0 : uint32 curcoll = collisions[i];
1161 : :
1162 [ # # ]: 0 : if (curcoll == 0)
1163 : 0 : continue;
1164 : :
1165 : : /* single contained element is not a collision */
1166 : 0 : curcoll--;
1167 : 0 : total_collisions += curcoll;
1168 [ # # ]: 0 : if (curcoll > max_collisions)
1169 : 0 : max_collisions = curcoll;
1170 : : }
1171 : :
1172 : : /* large enough to be worth freeing, even if just used for debugging */
758 1173 : 0 : pfree(collisions);
1174 : :
3490 1175 [ # # ]: 0 : if (tb->members > 0)
1176 : : {
1177 : 0 : fillfactor = tb->members / ((double) tb->size);
1178 : 0 : avg_chain_length = ((double) total_chain_length) / tb->members;
1179 : 0 : avg_collisions = ((double) total_collisions) / tb->members;
1180 : : }
1181 : : else
1182 : : {
1183 : 0 : fillfactor = 0;
1184 : 0 : avg_chain_length = 0;
1185 : 0 : avg_collisions = 0;
1186 : : }
1187 : :
1679 peter@eisentraut.org 1188 [ # # ]: 0 : sh_log("size: " UINT64_FORMAT ", members: %u, filled: %f, total chain: %u, max chain: %u, avg chain: %f, total_collisions: %u, max_collisions: %u, avg_collisions: %f",
1189 : : tb->size, tb->members, fillfactor, total_chain_length, max_chain_length, avg_chain_length,
1190 : : total_collisions, max_collisions, avg_collisions);
3490 andres@anarazel.de 1191 : 0 : }
1192 : :
1193 : : #endif /* SH_DEFINE */
1194 : :
1195 : :
1196 : : /* undefine external parameters, so next hash table can be defined */
1197 : : #undef SH_PREFIX
1198 : : #undef SH_KEY_TYPE
1199 : : #undef SH_KEY
1200 : : #undef SH_ELEMENT_TYPE
1201 : : #undef SH_HASH_KEY
1202 : : #undef SH_SCOPE
1203 : : #undef SH_DECLARE
1204 : : #undef SH_DEFINE
1205 : : #undef SH_GET_HASH
1206 : : #undef SH_STORE_HASH
1207 : : #undef SH_USE_NONDEFAULT_ALLOCATOR
1208 : : #undef SH_EQUAL
1209 : :
1210 : : /* undefine locally declared macros */
1211 : : #undef SH_MAKE_PREFIX
1212 : : #undef SH_MAKE_NAME
1213 : : #undef SH_MAKE_NAME_
1214 : : #undef SH_FILLFACTOR
1215 : : #undef SH_MAX_FILLFACTOR
1216 : : #undef SH_GROW_MAX_DIB
1217 : : #undef SH_GROW_MAX_MOVE
1218 : : #undef SH_GROW_MIN_FILLFACTOR
1219 : : #undef SH_MAX_SIZE
1220 : :
1221 : : /* types */
1222 : : #undef SH_TYPE
1223 : : #undef SH_STATUS
1224 : : #undef SH_STATUS_EMPTY
1225 : : #undef SH_STATUS_IN_USE
1226 : : #undef SH_ITERATOR
1227 : :
1228 : : /* external function names */
1229 : : #undef SH_CREATE
1230 : : #undef SH_DESTROY
1231 : : #undef SH_RESET
1232 : : #undef SH_INSERT
1233 : : #undef SH_INSERT_HASH
1234 : : #undef SH_DELETE_ITEM
1235 : : #undef SH_DELETE
1236 : : #undef SH_LOOKUP
1237 : : #undef SH_LOOKUP_HASH
1238 : : #undef SH_GROW
1239 : : #undef SH_START_ITERATE
1240 : : #undef SH_START_ITERATE_AT
1241 : : #undef SH_ITERATE
1242 : : #undef SH_ALLOCATE
1243 : : #undef SH_FREE
1244 : : #undef SH_ESTIMATE_SPACE
1245 : : #undef SH_STAT
1246 : :
1247 : : /* internal function names */
1248 : : #undef SH_COMPUTE_SIZE
1249 : : #undef SH_UPDATE_PARAMETERS
1250 : : #undef SH_COMPARE_KEYS
1251 : : #undef SH_INITIAL_BUCKET
1252 : : #undef SH_NEXT
1253 : : #undef SH_PREV
1254 : : #undef SH_DISTANCE_FROM_OPTIMAL
1255 : : #undef SH_ENTRY_HASH
1256 : : #undef SH_INSERT_HASH_INTERNAL
1257 : : #undef SH_LOOKUP_HASH_INTERNAL
|