Age Owner Branch data TLA Line data Source code
1 : : /*
2 : : * simplehash.h
3 : : *
4 : : * When included this file generates a "templated" (by way of macros)
5 : : * open-addressing hash table implementation specialized to user-defined
6 : : * types.
7 : : *
8 : : * It's probably not worthwhile to generate such a specialized implementation
9 : : * for hash tables that aren't performance or space sensitive.
10 : : *
11 : : * Compared to dynahash, simplehash has the following benefits:
12 : : *
13 : : * - Due to the "templated" code generation has known structure sizes and no
14 : : * indirect function calls (which show up substantially in dynahash
15 : : * profiles). These features considerably increase speed for small
16 : : * entries.
17 : : * - Open addressing has better CPU cache behavior than dynahash's chained
18 : : * hashtables.
19 : : * - The generated interface is type-safe and easier to use than dynahash,
20 : : * though at the cost of more complex setup.
21 : : * - Allocates memory in a MemoryContext or another allocator with a
22 : : * malloc/free style interface (which isn't easily usable in a shared
23 : : * memory context)
24 : : * - Does not require the overhead of a separate memory context.
25 : : *
26 : : * Usage notes:
27 : : *
28 : : * To generate a hash-table and associated functions for a use case several
29 : : * macros have to be #define'ed before this file is included. Including
30 : : * the file #undef's all those, so a new hash table can be generated
31 : : * afterwards.
32 : : * The relevant parameters are:
33 : : * - SH_PREFIX - prefix for all symbol names generated. A prefix of 'foo'
34 : : * will result in hash table type 'foo_hash' and functions like
35 : : * 'foo_insert'/'foo_lookup' and so forth.
36 : : * - SH_ELEMENT_TYPE - type of the contained elements
37 : : * - SH_KEY_TYPE - type of the hashtable's key
38 : : * - SH_DECLARE - if defined function prototypes and type declarations are
39 : : * generated
40 : : * - SH_DEFINE - if defined function definitions are generated
41 : : * - SH_SCOPE - in which scope (e.g. extern, static inline) do function
42 : : * declarations reside
43 : : * - SH_RAW_ALLOCATOR - if defined, memory contexts are not used; instead,
44 : : * use this to allocate bytes. The allocator must zero the returned space.
45 : : * - SH_USE_NONDEFAULT_ALLOCATOR - if defined no element allocator functions
46 : : * are defined, so you can supply your own
47 : : * The following parameters are only relevant when SH_DEFINE is defined:
48 : : * - SH_KEY - name of the element in SH_ELEMENT_TYPE containing the hash key
49 : : * - SH_EQUAL(table, a, b) - compare two table keys
50 : : * - SH_HASH_KEY(table, key) - generate hash for the key
51 : : * - SH_STORE_HASH - if defined the hash is stored in the elements
52 : : * - SH_GET_HASH(tb, a) - return the field to store the hash in
53 : : *
54 : : * The element type is required to contain a "status" member that can store
55 : : * the range of values defined in the SH_STATUS enum.
56 : : *
57 : : * While SH_STORE_HASH (and subsequently SH_GET_HASH) are optional, because
58 : : * the hash table implementation needs to compare hashes to move elements
59 : : * (particularly when growing the hash), it's preferable, if possible, to
60 : : * store the element's hash in the element's data type. If the hash is so
61 : : * stored, the hash table will also compare hashes before calling SH_EQUAL
62 : : * when comparing two keys.
63 : : *
64 : : * For convenience the hash table create functions accept a void pointer
65 : : * that will be stored in the hash table type's member private_data. This
66 : : * allows callbacks to reference caller provided data.
67 : : *
68 : : * For examples of usage look at tidbitmap.c (file local definition) and
69 : : * execnodes.h/execGrouping.c (exposed declaration, file local
70 : : * implementation).
71 : : *
72 : : * Hash table design:
73 : : *
74 : : * The hash table design chosen is a variant of linear open-addressing. The
75 : : * reason for doing so is that linear addressing is CPU cache & pipeline
76 : : * friendly. The biggest disadvantage of simple linear addressing schemes
77 : : * are highly variable lookup times due to clustering, and deletions
78 : : * leaving a lot of tombstones around. To address these issues a variant
79 : : * of "robin hood" hashing is employed. Robin hood hashing optimizes
80 : : * chaining lengths by moving elements close to their optimal bucket
81 : : * ("rich" elements), out of the way if a to-be-inserted element is further
82 : : * away from its optimal position (i.e. it's "poor"). While that can make
83 : : * insertions slower, the average lookup performance is a lot better, and
84 : : * higher fill factors can be used in a still performant manner. To avoid
85 : : * tombstones - which normally solve the issue that a deleted node's
86 : : * presence is relevant to determine whether a lookup needs to continue
87 : : * looking or is done - buckets following a deleted element are shifted
88 : : * backwards, unless they're empty or already at their optimal position.
89 : : *
90 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
91 : : * Portions Copyright (c) 1994, Regents of the University of California
92 : : *
93 : : * src/include/lib/simplehash.h
94 : : */
95 : :
96 : : #include "port/pg_bitutils.h"
97 : :
98 : : /* helpers */
99 : : #define SH_MAKE_PREFIX(a) CppConcat(a,_)
100 : : #define SH_MAKE_NAME(name) SH_MAKE_NAME_(SH_MAKE_PREFIX(SH_PREFIX),name)
101 : : #define SH_MAKE_NAME_(a,b) CppConcat(a,b)
102 : :
103 : : /* name macros for: */
104 : :
105 : : /* type declarations */
106 : : #define SH_TYPE SH_MAKE_NAME(hash)
107 : : #define SH_STATUS SH_MAKE_NAME(status)
108 : : #define SH_STATUS_EMPTY SH_MAKE_NAME(SH_EMPTY)
109 : : #define SH_STATUS_IN_USE SH_MAKE_NAME(SH_IN_USE)
110 : : #define SH_ITERATOR SH_MAKE_NAME(iterator)
111 : :
112 : : /* function declarations */
113 : : #define SH_CREATE SH_MAKE_NAME(create)
114 : : #define SH_DESTROY SH_MAKE_NAME(destroy)
115 : : #define SH_RESET SH_MAKE_NAME(reset)
116 : : #define SH_INSERT SH_MAKE_NAME(insert)
117 : : #define SH_INSERT_HASH SH_MAKE_NAME(insert_hash)
118 : : #define SH_DELETE_ITEM SH_MAKE_NAME(delete_item)
119 : : #define SH_DELETE SH_MAKE_NAME(delete)
120 : : #define SH_LOOKUP SH_MAKE_NAME(lookup)
121 : : #define SH_LOOKUP_HASH SH_MAKE_NAME(lookup_hash)
122 : : #define SH_GROW SH_MAKE_NAME(grow)
123 : : #define SH_START_ITERATE SH_MAKE_NAME(start_iterate)
124 : : #define SH_START_ITERATE_AT SH_MAKE_NAME(start_iterate_at)
125 : : #define SH_ITERATE SH_MAKE_NAME(iterate)
126 : : #define SH_ALLOCATE SH_MAKE_NAME(allocate)
127 : : #define SH_FREE SH_MAKE_NAME(free)
128 : : #define SH_ESTIMATE_SPACE SH_MAKE_NAME(estimate_space)
129 : : #define SH_STAT SH_MAKE_NAME(stat)
130 : :
131 : : /* internal helper functions (no externally visible prototypes) */
132 : : #define SH_COMPUTE_SIZE SH_MAKE_NAME(compute_size)
133 : : #define SH_UPDATE_PARAMETERS SH_MAKE_NAME(update_parameters)
134 : : #define SH_NEXT SH_MAKE_NAME(next)
135 : : #define SH_PREV SH_MAKE_NAME(prev)
136 : : #define SH_DISTANCE_FROM_OPTIMAL SH_MAKE_NAME(distance)
137 : : #define SH_INITIAL_BUCKET SH_MAKE_NAME(initial_bucket)
138 : : #define SH_ENTRY_HASH SH_MAKE_NAME(entry_hash)
139 : : #define SH_INSERT_HASH_INTERNAL SH_MAKE_NAME(insert_hash_internal)
140 : : #define SH_LOOKUP_HASH_INTERNAL SH_MAKE_NAME(lookup_hash_internal)
141 : :
142 : : /* generate forward declarations necessary to use the hash table */
143 : : #ifdef SH_DECLARE
144 : :
145 : : /* type definitions */
146 : : typedef struct SH_TYPE
147 : : {
148 : : /*
149 : : * Size of data / bucket array, 64 bits to handle UINT32_MAX sized hash
150 : : * tables. Note that the maximum number of elements is lower
151 : : * (SH_MAX_FILLFACTOR)
152 : : */
153 : : uint64 size;
154 : :
155 : : /* how many elements have valid contents */
156 : : uint32 members;
157 : :
158 : : /* mask for bucket and size calculations, based on size */
159 : : uint32 sizemask;
160 : :
161 : : /* boundary after which to grow hashtable */
162 : : uint32 grow_threshold;
163 : :
164 : : /* hash buckets */
165 : : SH_ELEMENT_TYPE *data;
166 : :
167 : : #ifndef SH_RAW_ALLOCATOR
168 : : /* memory context to use for allocations */
169 : : MemoryContext ctx;
170 : : #endif
171 : :
172 : : /* user defined data, useful for callbacks */
173 : : void *private_data;
174 : : } SH_TYPE;
175 : :
176 : : typedef enum SH_STATUS
177 : : {
178 : : SH_STATUS_EMPTY = 0x00,
179 : : SH_STATUS_IN_USE = 0x01
180 : : } SH_STATUS;
181 : :
182 : : typedef struct SH_ITERATOR
183 : : {
184 : : uint32 cur; /* current element */
185 : : uint32 end;
186 : : bool done; /* iterator exhausted? */
187 : : } SH_ITERATOR;
188 : :
189 : : /* externally visible function prototypes */
190 : : #ifdef SH_RAW_ALLOCATOR
191 : : /* <prefix>_hash <prefix>_create(uint32 nelements, void *private_data) */
192 : : SH_SCOPE SH_TYPE *SH_CREATE(uint32 nelements, void *private_data);
193 : : #else
194 : : /*
195 : : * <prefix>_hash <prefix>_create(MemoryContext ctx, uint32 nelements,
196 : : * void *private_data)
197 : : */
198 : : SH_SCOPE SH_TYPE *SH_CREATE(MemoryContext ctx, uint32 nelements,
199 : : void *private_data);
200 : : #endif
201 : :
202 : : /* void <prefix>_destroy(<prefix>_hash *tb) */
203 : : SH_SCOPE void SH_DESTROY(SH_TYPE * tb);
204 : :
205 : : /* void <prefix>_reset(<prefix>_hash *tb) */
206 : : SH_SCOPE void SH_RESET(SH_TYPE * tb);
207 : :
208 : : /* void <prefix>_grow(<prefix>_hash *tb, uint64 newsize) */
209 : : SH_SCOPE void SH_GROW(SH_TYPE * tb, uint64 newsize);
210 : :
211 : : /* <element> *<prefix>_insert(<prefix>_hash *tb, <key> key, bool *found) */
212 : : SH_SCOPE SH_ELEMENT_TYPE *SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found);
213 : :
214 : : /*
215 : : * <element> *<prefix>_insert_hash(<prefix>_hash *tb, <key> key, uint32 hash,
216 : : * bool *found)
217 : : */
218 : : SH_SCOPE SH_ELEMENT_TYPE *SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
219 : : uint32 hash, bool *found);
220 : :
221 : : /* <element> *<prefix>_lookup(<prefix>_hash *tb, <key> key) */
222 : : SH_SCOPE SH_ELEMENT_TYPE *SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key);
223 : :
224 : : /* <element> *<prefix>_lookup_hash(<prefix>_hash *tb, <key> key, uint32 hash) */
225 : : SH_SCOPE SH_ELEMENT_TYPE *SH_LOOKUP_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
226 : : uint32 hash);
227 : :
228 : : /* void <prefix>_delete_item(<prefix>_hash *tb, <element> *entry) */
229 : : SH_SCOPE void SH_DELETE_ITEM(SH_TYPE * tb, SH_ELEMENT_TYPE * entry);
230 : :
231 : : /* bool <prefix>_delete(<prefix>_hash *tb, <key> key) */
232 : : SH_SCOPE bool SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key);
233 : :
234 : : /* void <prefix>_start_iterate(<prefix>_hash *tb, <prefix>_iterator *iter) */
235 : : SH_SCOPE void SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
236 : :
237 : : /*
238 : : * void <prefix>_start_iterate_at(<prefix>_hash *tb, <prefix>_iterator *iter,
239 : : * uint32 at)
240 : : */
241 : : SH_SCOPE void SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at);
242 : :
243 : : /* <element> *<prefix>_iterate(<prefix>_hash *tb, <prefix>_iterator *iter) */
244 : : SH_SCOPE SH_ELEMENT_TYPE *SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
245 : :
246 : : /* size_t <prefix>_estimate_space(double nentries) */
247 : : SH_SCOPE size_t SH_ESTIMATE_SPACE(double nentries);
248 : :
249 : : /* void <prefix>_stat(<prefix>_hash *tb) */
250 : : SH_SCOPE void SH_STAT(SH_TYPE * tb);
251 : :
252 : : #endif /* SH_DECLARE */
253 : :
254 : :
255 : : /* generate implementation of the hash table */
256 : : #ifdef SH_DEFINE
257 : :
258 : : #ifndef SH_RAW_ALLOCATOR
259 : : #include "utils/memutils.h"
260 : : #endif
261 : :
262 : : /* max data array size,we allow up to PG_UINT32_MAX buckets, including 0 */
263 : : #define SH_MAX_SIZE (((uint64) PG_UINT32_MAX) + 1)
264 : :
265 : : /* normal fillfactor, unless already close to maximum */
266 : : #ifndef SH_FILLFACTOR
267 : : #define SH_FILLFACTOR (0.9)
268 : : #endif
269 : : /* increase fillfactor if we otherwise would error out */
270 : : #define SH_MAX_FILLFACTOR (0.98)
271 : : /* grow if actual and optimal location bigger than */
272 : : #ifndef SH_GROW_MAX_DIB
273 : : #define SH_GROW_MAX_DIB 25
274 : : #endif
275 : : /* grow if more than elements to move when inserting */
276 : : #ifndef SH_GROW_MAX_MOVE
277 : : #define SH_GROW_MAX_MOVE 150
278 : : #endif
279 : : #ifndef SH_GROW_MIN_FILLFACTOR
280 : : /* but do not grow due to SH_GROW_MAX_* if below */
281 : : #define SH_GROW_MIN_FILLFACTOR 0.1
282 : : #endif
283 : :
284 : : #ifdef SH_STORE_HASH
285 : : #define SH_COMPARE_KEYS(tb, ahash, akey, b) (ahash == SH_GET_HASH(tb, b) && SH_EQUAL(tb, b->SH_KEY, akey))
286 : : #else
287 : : #define SH_COMPARE_KEYS(tb, ahash, akey, b) (SH_EQUAL(tb, b->SH_KEY, akey))
288 : : #endif
289 : :
290 : : /*
291 : : * Wrap the following definitions in include guards, to avoid multiple
292 : : * definition errors if this header is included more than once. The rest of
293 : : * the file deliberately has no include guards, because it can be included
294 : : * with different parameters to define functions and types with non-colliding
295 : : * names.
296 : : */
297 : : #ifndef SIMPLEHASH_H
298 : : #define SIMPLEHASH_H
299 : :
300 : : #ifdef FRONTEND
301 : : #define sh_error(...) pg_fatal(__VA_ARGS__)
302 : : #define sh_log(...) pg_log_info(__VA_ARGS__)
303 : : #else
304 : : #define sh_error(...) elog(ERROR, __VA_ARGS__)
305 : : #define sh_log(...) elog(LOG, __VA_ARGS__)
306 : : #endif
307 : :
308 : : #endif
309 : :
310 : : /*
311 : : * Compute allocation size for hashtable. Result can be passed to
312 : : * SH_UPDATE_PARAMETERS. (Keep SH_ESTIMATE_SPACE in sync with this!)
313 : : */
314 : : static inline uint64
760 jdavis@postgresql.or 315 :CBC 108408 : SH_COMPUTE_SIZE(uint64 newsize)
316 : : {
317 : : uint64 size;
318 : :
319 : : /* supporting zero sized hashes would complicate matters */
3350 andres@anarazel.de 320 : 108408 : size = Max(newsize, 2);
321 : :
322 : : /* round up size to the next power of 2, that's how bucketing works */
2078 drowley@postgresql.o 323 : 108408 : size = pg_nextpower2_64(size);
3350 andres@anarazel.de 324 [ - + ]: 108408 : Assert(size <= SH_MAX_SIZE);
325 : :
326 : : /*
327 : : * Verify that allocation of ->data is possible on this platform, without
328 : : * overflowing Size.
329 : : */
1516 tgl@sss.pgh.pa.us 330 [ - + ]: 108408 : if (unlikely((((uint64) sizeof(SH_ELEMENT_TYPE)) * size) >= SIZE_MAX / 2))
2191 rhaas@postgresql.org 331 [ # # ]:UBC 0 : sh_error("hash table too large");
332 : :
760 jdavis@postgresql.or 333 :CBC 108408 : return size;
334 : : }
335 : :
336 : : /*
337 : : * Update sizing parameters for hashtable. Called when creating and growing
338 : : * the hashtable.
339 : : */
340 : : static inline void
341 : 54204 : SH_UPDATE_PARAMETERS(SH_TYPE * tb, uint64 newsize)
342 : : {
343 : 54204 : uint64 size = SH_COMPUTE_SIZE(newsize);
344 : :
345 : : /* now set size */
3350 andres@anarazel.de 346 : 54204 : tb->size = size;
1586 drowley@postgresql.o 347 : 54204 : tb->sizemask = (uint32) (size - 1);
348 : :
349 : : /*
350 : : * Compute the next threshold at which we need to grow the hash table
351 : : * again.
352 : : */
3350 andres@anarazel.de 353 [ - + ]: 54204 : if (tb->size == SH_MAX_SIZE)
3350 andres@anarazel.de 354 :UBC 0 : tb->grow_threshold = ((double) tb->size) * SH_MAX_FILLFACTOR;
355 : : else
3350 andres@anarazel.de 356 :CBC 54204 : tb->grow_threshold = ((double) tb->size) * SH_FILLFACTOR;
357 : 54204 : }
358 : :
359 : : /* return the optimal bucket for the hash */
360 : : static inline uint32
3135 bruce@momjian.us 361 : 25729685 : SH_INITIAL_BUCKET(SH_TYPE * tb, uint32 hash)
362 : : {
3350 andres@anarazel.de 363 : 25729685 : return hash & tb->sizemask;
364 : : }
365 : :
366 : : /* return next bucket after the current, handling wraparound */
367 : : static inline uint32
3135 bruce@momjian.us 368 : 12213063 : SH_NEXT(SH_TYPE * tb, uint32 curelem, uint32 startelem)
369 : : {
3350 andres@anarazel.de 370 : 12213063 : curelem = (curelem + 1) & tb->sizemask;
371 : :
372 [ - + ]: 12213063 : Assert(curelem != startelem);
373 : :
374 : 12213063 : return curelem;
375 : : }
376 : :
377 : : /* return bucket before the current, handling wraparound */
378 : : static inline uint32
3135 bruce@momjian.us 379 : 1587354 : SH_PREV(SH_TYPE * tb, uint32 curelem, uint32 startelem)
380 : : {
3350 andres@anarazel.de 381 : 1587354 : curelem = (curelem - 1) & tb->sizemask;
382 : :
383 [ - + ]: 1587354 : Assert(curelem != startelem);
384 : :
385 : 1587354 : return curelem;
386 : : }
387 : :
388 : : /* return distance between bucket and its optimal position */
389 : : static inline uint32
3135 bruce@momjian.us 390 : 4976823 : SH_DISTANCE_FROM_OPTIMAL(SH_TYPE * tb, uint32 optimal, uint32 bucket)
391 : : {
3350 andres@anarazel.de 392 [ + + ]: 4976823 : if (optimal <= bucket)
393 : 4954405 : return bucket - optimal;
394 : : else
395 : 22418 : return (tb->size + bucket) - optimal;
396 : : }
397 : :
398 : : static inline uint32
3135 bruce@momjian.us 399 : 5560831 : SH_ENTRY_HASH(SH_TYPE * tb, SH_ELEMENT_TYPE * entry)
400 : : {
401 : : #ifdef SH_STORE_HASH
3350 andres@anarazel.de 402 : 2560493 : return SH_GET_HASH(tb, entry);
403 : : #else
404 : 3000338 : return SH_HASH_KEY(tb, entry->SH_KEY);
405 : : #endif
406 : : }
407 : :
408 : : /* default memory allocator function */
409 : : static inline void *SH_ALLOCATE(SH_TYPE * type, Size size);
410 : : static inline void SH_FREE(SH_TYPE * type, void *pointer);
411 : :
412 : : #ifndef SH_USE_NONDEFAULT_ALLOCATOR
413 : :
414 : : /* default memory allocator function */
415 : : static inline void *
3135 bruce@momjian.us 416 : 49437 : SH_ALLOCATE(SH_TYPE * type, Size size)
417 : : {
418 : : #ifdef SH_RAW_ALLOCATOR
2191 rhaas@postgresql.org 419 : 373 : return SH_RAW_ALLOCATOR(size);
420 : : #else
3234 421 : 49064 : return MemoryContextAllocExtended(type->ctx, size,
422 : : MCXT_ALLOC_HUGE | MCXT_ALLOC_ZERO);
423 : : #endif
424 : : }
425 : :
426 : : /* default memory free function */
427 : : static inline void
3135 bruce@momjian.us 428 : 24509 : SH_FREE(SH_TYPE * type, void *pointer)
429 : : {
3234 rhaas@postgresql.org 430 : 24509 : pfree(pointer);
431 : 24509 : }
432 : :
433 : : #endif
434 : :
435 : : /*
436 : : * Create a hash table with enough space for `nelements` distinct members.
437 : : * Memory for the hash table is allocated from the passed-in context. If
438 : : * desired, the array of elements can be allocated using a passed-in allocator;
439 : : * this could be useful in order to place the array of elements in a shared
440 : : * memory, or in a context that will outlive the rest of the hash table.
441 : : * Memory other than for the array of elements will still be allocated from
442 : : * the passed-in context.
443 : : */
444 : : #ifdef SH_RAW_ALLOCATOR
445 : : SH_SCOPE SH_TYPE *
2191 446 : 368 : SH_CREATE(uint32 nelements, void *private_data)
447 : : #else
448 : : SH_SCOPE SH_TYPE *
3232 449 : 51730 : SH_CREATE(MemoryContext ctx, uint32 nelements, void *private_data)
450 : : #endif
451 : : {
452 : : SH_TYPE *tb;
453 : : uint64 size;
454 : :
455 : : #ifdef SH_RAW_ALLOCATOR
1139 tgl@sss.pgh.pa.us 456 : 368 : tb = (SH_TYPE *) SH_RAW_ALLOCATOR(sizeof(SH_TYPE));
457 : : #else
458 : 51730 : tb = (SH_TYPE *) MemoryContextAllocZero(ctx, sizeof(SH_TYPE));
3350 andres@anarazel.de 459 : 51730 : tb->ctx = ctx;
460 : : #endif
3232 rhaas@postgresql.org 461 : 52098 : tb->private_data = private_data;
462 : :
463 : : /* increase nelements by fillfactor, want to store nelements elements */
3350 andres@anarazel.de 464 [ - + ]: 52098 : size = Min((double) SH_MAX_SIZE, ((double) nelements) / SH_FILLFACTOR);
465 : :
760 jdavis@postgresql.or 466 : 52098 : size = SH_COMPUTE_SIZE(size);
467 : :
468 : 52098 : tb->data = (SH_ELEMENT_TYPE *) SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * size);
469 : :
470 : 52098 : SH_UPDATE_PARAMETERS(tb, size);
3350 andres@anarazel.de 471 : 52098 : return tb;
472 : : }
473 : :
474 : : /* destroy a previously created hash table */
475 : : SH_SCOPE void
3135 bruce@momjian.us 476 : 27170 : SH_DESTROY(SH_TYPE * tb)
477 : : {
3234 rhaas@postgresql.org 478 : 27170 : SH_FREE(tb, tb->data);
3350 andres@anarazel.de 479 : 27170 : pfree(tb);
480 : 27170 : }
481 : :
482 : : /* reset the contents of a previously created hash table */
483 : : SH_SCOPE void
2502 484 : 97493 : SH_RESET(SH_TYPE * tb)
485 : : {
486 : 97493 : memset(tb->data, 0, sizeof(SH_ELEMENT_TYPE) * tb->size);
487 : 97493 : tb->members = 0;
488 : 97493 : }
489 : :
490 : : /*
491 : : * Grow a hash table to at least `newsize` buckets.
492 : : *
493 : : * Usually this will automatically be called by insertions/deletions, when
494 : : * necessary. But resizing to the exact input size can be advantageous
495 : : * performance-wise, when known at some point.
496 : : */
497 : : SH_SCOPE void
1586 drowley@postgresql.o 498 : 2106 : SH_GROW(SH_TYPE * tb, uint64 newsize)
499 : : {
3350 andres@anarazel.de 500 : 2106 : uint64 oldsize = tb->size;
501 : 2106 : SH_ELEMENT_TYPE *olddata = tb->data;
502 : : SH_ELEMENT_TYPE *newdata;
503 : : uint32 i;
504 : 2106 : uint32 startelem = 0;
505 : : uint32 copyelem;
506 : :
2078 drowley@postgresql.o 507 [ - + ]: 2106 : Assert(oldsize == pg_nextpower2_64(oldsize));
3350 andres@anarazel.de 508 [ - + ]: 2106 : Assert(oldsize != SH_MAX_SIZE);
509 [ - + ]: 2106 : Assert(oldsize < newsize);
510 : :
760 jdavis@postgresql.or 511 : 2106 : newsize = SH_COMPUTE_SIZE(newsize);
512 : :
513 : 2106 : tb->data = (SH_ELEMENT_TYPE *) SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * newsize);
514 : :
515 : : /*
516 : : * Update parameters for new table after allocation succeeds to avoid
517 : : * inconsistent state on OOM.
518 : : */
519 : 2106 : SH_UPDATE_PARAMETERS(tb, newsize);
520 : :
3350 andres@anarazel.de 521 : 2106 : newdata = tb->data;
522 : :
523 : : /*
524 : : * Copy entries from the old data to newdata. We theoretically could use
525 : : * SH_INSERT here, to avoid code duplication, but that's more general than
526 : : * we need. We neither want tb->members increased, nor do we need to do
527 : : * deal with deleted elements, nor do we need to compare keys. So a
528 : : * special-cased implementation is lot faster. As resizing can be time
529 : : * consuming and frequent, that's worthwhile to optimize.
530 : : *
531 : : * To be able to simply move entries over, we have to start not at the
532 : : * first bucket (i.e olddata[0]), but find the first bucket that's either
533 : : * empty, or is occupied by an entry at its optimal position. Such a
534 : : * bucket has to exist in any table with a load factor under 1, as not all
535 : : * buckets are occupied, i.e. there always has to be an empty bucket. By
536 : : * starting at such a bucket we can move the entries to the larger table,
537 : : * without having to deal with conflicts.
538 : : */
539 : :
540 : : /* search for the first element in the hash that's not wrapped around */
541 [ + - ]: 20807 : for (i = 0; i < oldsize; i++)
542 : : {
543 : 20807 : SH_ELEMENT_TYPE *oldentry = &olddata[i];
544 : : uint32 hash;
545 : : uint32 optimal;
546 : :
547 [ + + ]: 20807 : if (oldentry->status != SH_STATUS_IN_USE)
548 : : {
549 : 1096 : startelem = i;
550 : 1096 : break;
551 : : }
552 : :
553 : 19711 : hash = SH_ENTRY_HASH(tb, oldentry);
554 : 19711 : optimal = SH_INITIAL_BUCKET(tb, hash);
555 : :
556 [ + + ]: 19711 : if (optimal == i)
557 : : {
558 : 1010 : startelem = i;
559 : 1010 : break;
560 : : }
561 : : }
562 : :
563 : : /* and copy all elements in the old table */
564 : 2106 : copyelem = startelem;
565 [ + + ]: 527796 : for (i = 0; i < oldsize; i++)
566 : : {
567 : 525690 : SH_ELEMENT_TYPE *oldentry = &olddata[copyelem];
568 : :
569 [ + + ]: 525690 : if (oldentry->status == SH_STATUS_IN_USE)
570 : : {
571 : : uint32 hash;
572 : : uint32 startelem2;
573 : : uint32 curelem;
574 : : SH_ELEMENT_TYPE *newentry;
575 : :
576 : 459528 : hash = SH_ENTRY_HASH(tb, oldentry);
1168 drowley@postgresql.o 577 : 459528 : startelem2 = SH_INITIAL_BUCKET(tb, hash);
578 : 459528 : curelem = startelem2;
579 : :
580 : : /* find empty element to put data into */
581 : : while (true)
582 : : {
3350 andres@anarazel.de 583 : 635146 : newentry = &newdata[curelem];
584 : :
585 [ + + ]: 635146 : if (newentry->status == SH_STATUS_EMPTY)
586 : : {
587 : 459528 : break;
588 : : }
589 : :
1168 drowley@postgresql.o 590 : 175618 : curelem = SH_NEXT(tb, curelem, startelem2);
591 : : }
592 : :
593 : : /* copy entry to new slot */
3350 andres@anarazel.de 594 : 459528 : memcpy(newentry, oldentry, sizeof(SH_ELEMENT_TYPE));
595 : : }
596 : :
597 : : /* can't use SH_NEXT here, would use new size */
598 : 525690 : copyelem++;
599 [ + + ]: 525690 : if (copyelem >= oldsize)
600 : : {
601 : 2106 : copyelem = 0;
602 : : }
603 : : }
604 : :
3234 rhaas@postgresql.org 605 : 2106 : SH_FREE(tb, olddata);
3350 andres@anarazel.de 606 : 2106 : }
607 : :
608 : : /*
609 : : * This is a separate static inline function, so it can be reliably be inlined
610 : : * into its wrapper functions even if SH_SCOPE is extern.
611 : : */
612 : : static inline SH_ELEMENT_TYPE *
2329 jdavis@postgresql.or 613 : 12402379 : SH_INSERT_HASH_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
614 : : {
615 : : uint32 startelem;
616 : : uint32 curelem;
617 : : SH_ELEMENT_TYPE *data;
618 : : uint32 insertdist;
619 : :
3310 andres@anarazel.de 620 : 104 : restart:
621 : 12402483 : insertdist = 0;
622 : :
623 : : /*
624 : : * We do the grow check even if the key is actually present, to avoid
625 : : * doing the check inside the loop. This also lets us avoid having to
626 : : * re-find our position in the hashtable after resizing.
627 : : *
628 : : * Note that this also reached when resizing the table due to
629 : : * SH_GROW_MAX_DIB / SH_GROW_MAX_MOVE.
630 : : */
3350 631 [ + + ]: 12402483 : if (unlikely(tb->members >= tb->grow_threshold))
632 : : {
1516 tgl@sss.pgh.pa.us 633 [ - + ]: 2106 : if (unlikely(tb->size == SH_MAX_SIZE))
2191 rhaas@postgresql.org 634 [ # # ]:UBC 0 : sh_error("hash table size exceeded");
635 : :
636 : : /*
637 : : * When optimizing, it can be very useful to print these out.
638 : : */
639 : : /* SH_STAT(tb); */
3350 andres@anarazel.de 640 :CBC 2106 : SH_GROW(tb, tb->size * 2);
641 : : /* SH_STAT(tb); */
642 : : }
643 : :
644 : : /* perform insert, start bucket search at optimal location */
645 : 12402483 : data = tb->data;
646 : 12402483 : startelem = SH_INITIAL_BUCKET(tb, hash);
647 : 12402483 : curelem = startelem;
648 : : while (true)
649 : 4716008 : {
650 : : uint32 curdist;
651 : : uint32 curhash;
652 : : uint32 curoptimal;
653 : 17118491 : SH_ELEMENT_TYPE *entry = &data[curelem];
654 : :
655 : : /* any empty bucket can directly be used */
656 [ + + ]: 17118491 : if (entry->status == SH_STATUS_EMPTY)
657 : : {
658 : 2439521 : tb->members++;
659 : 2439521 : entry->SH_KEY = key;
660 : : #ifdef SH_STORE_HASH
661 : 1406499 : SH_GET_HASH(tb, entry) = hash;
662 : : #endif
663 : 2439521 : entry->status = SH_STATUS_IN_USE;
664 : 2439521 : *found = false;
665 : 2439521 : return entry;
666 : : }
667 : :
668 : : /*
669 : : * If the bucket is not empty, we either found a match (in which case
670 : : * we're done), or we have to decide whether to skip over or move the
671 : : * colliding entry. When the colliding element's distance to its
672 : : * optimal position is smaller than the to-be-inserted entry's, we
673 : : * shift the colliding entry (and its followers) forward by one.
674 : : */
675 : :
676 [ + + ]: 14678970 : if (SH_COMPARE_KEYS(tb, hash, key, entry))
[ + + + + ]
[ + + + -
+ - ]
677 : : {
678 [ - + ]: 9702147 : Assert(entry->status == SH_STATUS_IN_USE);
679 : 9702147 : *found = true;
680 : 9702147 : return entry;
681 : : }
682 : :
683 : 4976823 : curhash = SH_ENTRY_HASH(tb, entry);
684 : 4976823 : curoptimal = SH_INITIAL_BUCKET(tb, curhash);
685 : 4976823 : curdist = SH_DISTANCE_FROM_OPTIMAL(tb, curoptimal, curelem);
686 : :
687 [ + + ]: 4976823 : if (insertdist > curdist)
688 : : {
689 : 260815 : SH_ELEMENT_TYPE *lastentry = entry;
690 : 260815 : uint32 emptyelem = curelem;
691 : : uint32 moveelem;
3310 692 : 260815 : int32 emptydist = 0;
693 : :
694 : : /* find next empty bucket */
695 : : while (true)
3350 696 : 1342243 : {
697 : : SH_ELEMENT_TYPE *emptyentry;
698 : :
699 : 1603058 : emptyelem = SH_NEXT(tb, emptyelem, startelem);
700 : 1603058 : emptyentry = &data[emptyelem];
701 : :
702 [ + + ]: 1603058 : if (emptyentry->status == SH_STATUS_EMPTY)
703 : : {
704 : 260711 : lastentry = emptyentry;
705 : 260711 : break;
706 : : }
707 : :
708 : : /*
709 : : * To avoid negative consequences from overly imbalanced
710 : : * hashtables, grow the hashtable if collisions would require
711 : : * us to move a lot of entries. The most likely cause of such
712 : : * imbalance is filling a (currently) small table, from a
713 : : * currently big one, in hash-table order. Don't grow if the
714 : : * hashtable would be too empty, to prevent quick space
715 : : * explosion for some weird edge cases.
716 : : */
2878 717 [ + + ]: 1342347 : if (unlikely(++emptydist > SH_GROW_MAX_MOVE) &&
718 [ + - ]: 104 : ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
719 : : {
3310 720 : 104 : tb->grow_threshold = 0;
721 : 104 : goto restart;
722 : : }
723 : : }
724 : :
725 : : /* shift forward, starting at last occupied element */
726 : :
727 : : /*
728 : : * TODO: This could be optimized to be one memcpy in many cases,
729 : : * excepting wrapping around at the end of ->data. Hasn't shown up
730 : : * in profiles so far though.
731 : : */
3350 732 : 260711 : moveelem = emptyelem;
733 [ + + ]: 1848065 : while (moveelem != curelem)
734 : : {
735 : : SH_ELEMENT_TYPE *moveentry;
736 : :
737 : 1587354 : moveelem = SH_PREV(tb, moveelem, startelem);
738 : 1587354 : moveentry = &data[moveelem];
739 : :
740 : 1587354 : memcpy(lastentry, moveentry, sizeof(SH_ELEMENT_TYPE));
741 : 1587354 : lastentry = moveentry;
742 : : }
743 : :
744 : : /* and fill the now empty spot */
745 : 260711 : tb->members++;
746 : :
747 : 260711 : entry->SH_KEY = key;
748 : : #ifdef SH_STORE_HASH
749 : 176743 : SH_GET_HASH(tb, entry) = hash;
750 : : #endif
751 : 260711 : entry->status = SH_STATUS_IN_USE;
752 : 260711 : *found = false;
753 : 260711 : return entry;
754 : : }
755 : :
756 : 4716008 : curelem = SH_NEXT(tb, curelem, startelem);
757 : 4716008 : insertdist++;
758 : :
759 : : /*
760 : : * To avoid negative consequences from overly imbalanced hashtables,
761 : : * grow the hashtable if collisions lead to large runs. The most
762 : : * likely cause of such imbalance is filling a (currently) small
763 : : * table, from a currently big one, in hash-table order. Don't grow
764 : : * if the hashtable would be too empty, to prevent quick space
765 : : * explosion for some weird edge cases.
766 : : */
2878 767 [ - + ]: 4716008 : if (unlikely(insertdist > SH_GROW_MAX_DIB) &&
2878 andres@anarazel.de 768 [ # # ]:UBC 0 : ((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
769 : : {
3310 770 : 0 : tb->grow_threshold = 0;
771 : 0 : goto restart;
772 : : }
773 : : }
774 : : }
775 : :
776 : : /*
777 : : * Insert the key into the hash-table, set *found to true if the key already
778 : : * exists, false otherwise. Returns the hash-table entry in either case.
779 : : */
780 : : SH_SCOPE SH_ELEMENT_TYPE *
2329 jdavis@postgresql.or 781 :CBC 8559770 : SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found)
782 : : {
2042 tgl@sss.pgh.pa.us 783 : 8559770 : uint32 hash = SH_HASH_KEY(tb, key);
784 : :
2329 jdavis@postgresql.or 785 : 8559770 : return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
786 : : }
787 : :
788 : : /*
789 : : * Insert the key into the hash-table using an already-calculated hash. Set
790 : : * *found to true if the key already exists, false otherwise. Returns the
791 : : * hash-table entry in either case.
792 : : */
793 : : SH_SCOPE SH_ELEMENT_TYPE *
794 : 3842609 : SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash, bool *found)
795 : : {
796 : 3842609 : return SH_INSERT_HASH_INTERNAL(tb, key, hash, found);
797 : : }
798 : :
799 : : /*
800 : : * This is a separate static inline function, so it can be reliably be inlined
801 : : * into its wrapper functions even if SH_SCOPE is extern.
802 : : */
803 : : static inline SH_ELEMENT_TYPE *
804 : 6893616 : SH_LOOKUP_HASH_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash)
805 : : {
3350 andres@anarazel.de 806 : 6893616 : const uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
807 : 6893616 : uint32 curelem = startelem;
808 : :
809 : : while (true)
810 : 4631842 : {
811 : 11525458 : SH_ELEMENT_TYPE *entry = &tb->data[curelem];
812 : :
813 [ + + ]: 11525458 : if (entry->status == SH_STATUS_EMPTY)
814 : : {
815 : 2143082 : return NULL;
816 : : }
817 : :
818 [ - + ]: 9382376 : Assert(entry->status == SH_STATUS_IN_USE);
819 : :
820 [ + + ]: 9382376 : if (SH_COMPARE_KEYS(tb, hash, key, entry))
[ + + + - ]
[ + + + -
+ - ]
821 : 4750534 : return entry;
822 : :
823 : : /*
824 : : * TODO: we could stop search based on distance. If the current
825 : : * buckets's distance-from-optimal is smaller than what we've skipped
826 : : * already, the entry doesn't exist. Probably only do so if
827 : : * SH_STORE_HASH is defined, to avoid re-computing hashes?
828 : : */
829 : :
830 : 4631842 : curelem = SH_NEXT(tb, curelem, startelem);
831 : : }
832 : : }
833 : :
834 : : /*
835 : : * Lookup entry in hash table. Returns NULL if key not present.
836 : : */
837 : : SH_SCOPE SH_ELEMENT_TYPE *
2329 jdavis@postgresql.or 838 : 6039973 : SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key)
839 : : {
2042 tgl@sss.pgh.pa.us 840 : 6039973 : uint32 hash = SH_HASH_KEY(tb, key);
841 : :
2329 jdavis@postgresql.or 842 : 6039973 : return SH_LOOKUP_HASH_INTERNAL(tb, key, hash);
843 : : }
844 : :
845 : : /*
846 : : * Lookup entry in hash table using an already-calculated hash.
847 : : *
848 : : * Returns NULL if key not present.
849 : : */
850 : : SH_SCOPE SH_ELEMENT_TYPE *
851 : 853643 : SH_LOOKUP_HASH(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash)
852 : : {
853 : 853643 : return SH_LOOKUP_HASH_INTERNAL(tb, key, hash);
854 : : }
855 : :
856 : : /*
857 : : * Delete entry from hash table by key. Returns whether to-be-deleted key was
858 : : * present.
859 : : */
860 : : SH_SCOPE bool
3135 bruce@momjian.us 861 : 872755 : SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key)
862 : : {
3350 andres@anarazel.de 863 : 872755 : uint32 hash = SH_HASH_KEY(tb, key);
864 : 872755 : uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
865 : 872755 : uint32 curelem = startelem;
866 : :
867 : : while (true)
868 : 220424 : {
869 : 1093179 : SH_ELEMENT_TYPE *entry = &tb->data[curelem];
870 : :
871 [ + + ]: 1093179 : if (entry->status == SH_STATUS_EMPTY)
872 : 72821 : return false;
873 : :
874 [ + - ]: 2017911 : if (entry->status == SH_STATUS_IN_USE &&
[ + - + + ]
875 [ + + ]: 1020358 : SH_COMPARE_KEYS(tb, hash, key, entry))
[ # # # # ]
876 : : {
877 : 799934 : SH_ELEMENT_TYPE *lastentry = entry;
878 : :
879 : 799934 : tb->members--;
880 : :
881 : : /*
882 : : * Backward shift following elements till either an empty element
883 : : * or an element at its optimal position is encountered.
884 : : *
885 : : * While that sounds expensive, the average chain length is short,
886 : : * and deletions would otherwise require tombstones.
887 : : */
888 : : while (true)
889 : 62597 : {
890 : : SH_ELEMENT_TYPE *curentry;
891 : : uint32 curhash;
892 : : uint32 curoptimal;
893 : :
894 : 862531 : curelem = SH_NEXT(tb, curelem, startelem);
895 : 862531 : curentry = &tb->data[curelem];
896 : :
897 [ + + ]: 862531 : if (curentry->status != SH_STATUS_IN_USE)
898 : : {
899 : 761923 : lastentry->status = SH_STATUS_EMPTY;
900 : 761923 : break;
901 : : }
902 : :
903 : 100608 : curhash = SH_ENTRY_HASH(tb, curentry);
904 : 100608 : curoptimal = SH_INITIAL_BUCKET(tb, curhash);
905 : :
906 : : /* current is at optimal position, done */
907 [ + + ]: 100608 : if (curoptimal == curelem)
908 : : {
909 : 38011 : lastentry->status = SH_STATUS_EMPTY;
910 : 38011 : break;
911 : : }
912 : :
913 : : /* shift */
914 : 62597 : memcpy(lastentry, curentry, sizeof(SH_ELEMENT_TYPE));
915 : :
916 : 62597 : lastentry = curentry;
917 : : }
918 : :
919 : 799934 : return true;
920 : : }
921 : :
922 : : /* TODO: return false; if distance too big */
923 : :
924 : 220424 : curelem = SH_NEXT(tb, curelem, startelem);
925 : : }
926 : : }
927 : :
928 : : /*
929 : : * Delete entry from hash table by entry pointer
930 : : */
931 : : SH_SCOPE void
1722 drowley@postgresql.o 932 : 1194 : SH_DELETE_ITEM(SH_TYPE * tb, SH_ELEMENT_TYPE * entry)
933 : : {
934 : 1194 : SH_ELEMENT_TYPE *lastentry = entry;
935 : 1194 : uint32 hash = SH_ENTRY_HASH(tb, entry);
936 : 1194 : uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
937 : : uint32 curelem;
938 : :
939 : : /* Calculate the index of 'entry' */
940 : 1194 : curelem = entry - &tb->data[0];
941 : :
942 : 1194 : tb->members--;
943 : :
944 : : /*
945 : : * Backward shift following elements till either an empty element or an
946 : : * element at its optimal position is encountered.
947 : : *
948 : : * While that sounds expensive, the average chain length is short, and
949 : : * deletions would otherwise require tombstones.
950 : : */
951 : : while (true)
952 : 2388 : {
953 : : SH_ELEMENT_TYPE *curentry;
954 : : uint32 curhash;
955 : : uint32 curoptimal;
956 : :
957 : 3582 : curelem = SH_NEXT(tb, curelem, startelem);
958 : 3582 : curentry = &tb->data[curelem];
959 : :
960 [ + + ]: 3582 : if (curentry->status != SH_STATUS_IN_USE)
961 : : {
962 : 615 : lastentry->status = SH_STATUS_EMPTY;
963 : 615 : break;
964 : : }
965 : :
966 : 2967 : curhash = SH_ENTRY_HASH(tb, curentry);
967 : 2967 : curoptimal = SH_INITIAL_BUCKET(tb, curhash);
968 : :
969 : : /* current is at optimal position, done */
970 [ + + ]: 2967 : if (curoptimal == curelem)
971 : : {
972 : 579 : lastentry->status = SH_STATUS_EMPTY;
973 : 579 : break;
974 : : }
975 : :
976 : : /* shift */
977 : 2388 : memcpy(lastentry, curentry, sizeof(SH_ELEMENT_TYPE));
978 : :
979 : 2388 : lastentry = curentry;
980 : : }
981 : 1194 : }
982 : :
983 : : /*
984 : : * Initialize iterator.
985 : : */
986 : : SH_SCOPE void
3135 bruce@momjian.us 987 : 101517 : SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
988 : : {
3350 andres@anarazel.de 989 : 101517 : uint64 startelem = PG_UINT64_MAX;
990 : :
991 : : /*
992 : : * Search for the first empty element. As deletions during iterations are
993 : : * supported, we want to start/end at an element that cannot be affected
994 : : * by elements being shifted.
995 : : */
894 996 [ + - ]: 141005 : for (uint32 i = 0; i < tb->size; i++)
997 : : {
3350 998 : 141005 : SH_ELEMENT_TYPE *entry = &tb->data[i];
999 : :
1000 [ + + ]: 141005 : if (entry->status != SH_STATUS_IN_USE)
1001 : : {
1002 : 101517 : startelem = i;
1003 : 101517 : break;
1004 : : }
1005 : : }
1006 : :
1007 : : /* we should have found an empty element */
1008 [ - + ]: 101517 : Assert(startelem < SH_MAX_SIZE);
1009 : :
1010 : : /*
1011 : : * Iterate backwards, that allows the current element to be deleted, even
1012 : : * if there are backward shifts
1013 : : */
1014 : 101517 : iter->cur = startelem;
1015 : 101517 : iter->end = iter->cur;
1016 : 101517 : iter->done = false;
1017 : 101517 : }
1018 : :
1019 : : /*
1020 : : * Initialize iterator to a specific bucket. That's really only useful for
1021 : : * cases where callers are partially iterating over the hashspace, and that
1022 : : * iteration deletes and inserts elements based on visited entries. Doing that
1023 : : * repeatedly could lead to an unbalanced keyspace when always starting at the
1024 : : * same position.
1025 : : */
1026 : : SH_SCOPE void
3135 bruce@momjian.us 1027 : 18 : SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at)
1028 : : {
1029 : : /*
1030 : : * Iterate backwards, that allows the current element to be deleted, even
1031 : : * if there are backward shifts.
1032 : : */
3100 tgl@sss.pgh.pa.us 1033 : 18 : iter->cur = at & tb->sizemask; /* ensure at is within a valid range */
3350 andres@anarazel.de 1034 : 18 : iter->end = iter->cur;
1035 : 18 : iter->done = false;
1036 : 18 : }
1037 : :
1038 : : /*
1039 : : * Iterate over all entries in the hash-table. Return the next occupied entry,
1040 : : * or NULL if done.
1041 : : *
1042 : : * During iteration the current entry in the hash table may be deleted,
1043 : : * without leading to elements being skipped or returned twice. Additionally
1044 : : * the rest of the table may be modified (i.e. there can be insertions or
1045 : : * deletions), but if so, there's neither a guarantee that all nodes are
1046 : : * visited at least once, nor a guarantee that a node is visited at most once.
1047 : : */
1048 : : SH_SCOPE SH_ELEMENT_TYPE *
3135 bruce@momjian.us 1049 : 2002087 : SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
1050 : : {
1051 : : /* validate sanity of the given iterator */
59 drowley@postgresql.o 1052 [ - + ]: 2002087 : Assert(iter->cur < tb->size);
1053 [ - + ]: 2002087 : Assert(iter->end < tb->size);
1054 : :
3350 andres@anarazel.de 1055 [ + + ]: 11013561 : while (!iter->done)
1056 : : {
1057 : : SH_ELEMENT_TYPE *elem;
1058 : :
1059 : 10912105 : elem = &tb->data[iter->cur];
1060 : :
1061 : : /* next element in backward direction */
1062 : 10912105 : iter->cur = (iter->cur - 1) & tb->sizemask;
1063 : :
1064 [ + + ]: 10912105 : if ((iter->cur & tb->sizemask) == (iter->end & tb->sizemask))
1065 : 101456 : iter->done = true;
1066 [ + + ]: 10912105 : if (elem->status == SH_STATUS_IN_USE)
1067 : : {
1068 : 1900631 : return elem;
1069 : : }
1070 : : }
1071 : :
1072 : 101456 : return NULL;
1073 : : }
1074 : :
1075 : : /*
1076 : : * Estimate the amount of space needed for a hashtable with nentries entries.
1077 : : * Return SIZE_MAX if that's too many entries.
1078 : : *
1079 : : * nentries is "double" because this is meant for use by the planner,
1080 : : * which typically works with double rowcount estimates. So we'd need to
1081 : : * clamp to integer somewhere and that might as well be here. We do expect
1082 : : * the value not to be NaN or negative, else the result will be garbage.
1083 : : */
1084 : : SH_SCOPE size_t
44 tgl@sss.pgh.pa.us 1085 :GNC 2411 : SH_ESTIMATE_SPACE(double nentries)
1086 : : {
1087 : : uint64 size;
1088 : : uint64 space;
1089 : :
1090 : : /* scale request by SH_FILLFACTOR, as SH_CREATE does */
1091 : 2411 : nentries = nentries / SH_FILLFACTOR;
1092 : :
1093 : : /* fail if we'd overrun SH_MAX_SIZE entries */
1094 [ + + ]: 2411 : if (nentries >= SH_MAX_SIZE)
1095 : 3 : return SIZE_MAX;
1096 : :
1097 : : /* should be safe to convert to uint64 */
1098 : 2408 : size = (uint64) nentries;
1099 : :
1100 : : /* supporting zero sized hashes would complicate matters */
1101 : 2408 : size = Max(size, 2);
1102 : :
1103 : : /* round up size to the next power of 2, that's how bucketing works */
1104 : 2408 : size = pg_nextpower2_64(size);
1105 : :
1106 : : /* calculate space needed for ->data */
1107 : 2408 : space = ((uint64) sizeof(SH_ELEMENT_TYPE)) * size;
1108 : :
1109 : : /* verify that allocation of ->data is possible on this platform */
1110 [ - + ]: 2408 : if (space >= SIZE_MAX / 2)
44 tgl@sss.pgh.pa.us 1111 :UNC 0 : return SIZE_MAX;
1112 : :
44 tgl@sss.pgh.pa.us 1113 :GNC 2408 : return (size_t) space + sizeof(SH_TYPE);
1114 : : }
1115 : :
1116 : : /*
1117 : : * Report some statistics about the state of the hashtable. For
1118 : : * debugging/profiling purposes only.
1119 : : */
1120 : : SH_SCOPE void
3135 bruce@momjian.us 1121 :UBC 0 : SH_STAT(SH_TYPE * tb)
1122 : : {
3350 andres@anarazel.de 1123 : 0 : uint32 max_chain_length = 0;
1124 : 0 : uint32 total_chain_length = 0;
1125 : : double avg_chain_length;
1126 : : double fillfactor;
1127 : : uint32 i;
1128 : :
1139 tgl@sss.pgh.pa.us 1129 : 0 : uint32 *collisions = (uint32 *) palloc0(tb->size * sizeof(uint32));
3350 andres@anarazel.de 1130 : 0 : uint32 total_collisions = 0;
1131 : 0 : uint32 max_collisions = 0;
1132 : : double avg_collisions;
1133 : :
1134 [ # # ]: 0 : for (i = 0; i < tb->size; i++)
1135 : : {
1136 : : uint32 hash;
1137 : : uint32 optimal;
1138 : : uint32 dist;
1139 : : SH_ELEMENT_TYPE *elem;
1140 : :
1141 : 0 : elem = &tb->data[i];
1142 : :
1143 [ # # ]: 0 : if (elem->status != SH_STATUS_IN_USE)
1144 : 0 : continue;
1145 : :
1146 : 0 : hash = SH_ENTRY_HASH(tb, elem);
1147 : 0 : optimal = SH_INITIAL_BUCKET(tb, hash);
1148 : 0 : dist = SH_DISTANCE_FROM_OPTIMAL(tb, optimal, i);
1149 : :
1150 [ # # ]: 0 : if (dist > max_chain_length)
1151 : 0 : max_chain_length = dist;
1152 : 0 : total_chain_length += dist;
1153 : :
1154 : 0 : collisions[optimal]++;
1155 : : }
1156 : :
1157 [ # # ]: 0 : for (i = 0; i < tb->size; i++)
1158 : : {
1159 : 0 : uint32 curcoll = collisions[i];
1160 : :
1161 [ # # ]: 0 : if (curcoll == 0)
1162 : 0 : continue;
1163 : :
1164 : : /* single contained element is not a collision */
1165 : 0 : curcoll--;
1166 : 0 : total_collisions += curcoll;
1167 [ # # ]: 0 : if (curcoll > max_collisions)
1168 : 0 : max_collisions = curcoll;
1169 : : }
1170 : :
1171 : : /* large enough to be worth freeing, even if just used for debugging */
618 1172 : 0 : pfree(collisions);
1173 : :
3350 1174 [ # # ]: 0 : if (tb->members > 0)
1175 : : {
1176 : 0 : fillfactor = tb->members / ((double) tb->size);
1177 : 0 : avg_chain_length = ((double) total_chain_length) / tb->members;
1178 : 0 : avg_collisions = ((double) total_collisions) / tb->members;
1179 : : }
1180 : : else
1181 : : {
1182 : 0 : fillfactor = 0;
1183 : 0 : avg_chain_length = 0;
1184 : 0 : avg_collisions = 0;
1185 : : }
1186 : :
1539 peter@eisentraut.org 1187 [ # # ]: 0 : sh_log("size: " UINT64_FORMAT ", members: %u, filled: %f, total chain: %u, max chain: %u, avg chain: %f, total_collisions: %u, max_collisions: %u, avg_collisions: %f",
1188 : : tb->size, tb->members, fillfactor, total_chain_length, max_chain_length, avg_chain_length,
1189 : : total_collisions, max_collisions, avg_collisions);
3350 andres@anarazel.de 1190 : 0 : }
1191 : :
1192 : : #endif /* SH_DEFINE */
1193 : :
1194 : :
1195 : : /* undefine external parameters, so next hash table can be defined */
1196 : : #undef SH_PREFIX
1197 : : #undef SH_KEY_TYPE
1198 : : #undef SH_KEY
1199 : : #undef SH_ELEMENT_TYPE
1200 : : #undef SH_HASH_KEY
1201 : : #undef SH_SCOPE
1202 : : #undef SH_DECLARE
1203 : : #undef SH_DEFINE
1204 : : #undef SH_GET_HASH
1205 : : #undef SH_STORE_HASH
1206 : : #undef SH_USE_NONDEFAULT_ALLOCATOR
1207 : : #undef SH_EQUAL
1208 : :
1209 : : /* undefine locally declared macros */
1210 : : #undef SH_MAKE_PREFIX
1211 : : #undef SH_MAKE_NAME
1212 : : #undef SH_MAKE_NAME_
1213 : : #undef SH_FILLFACTOR
1214 : : #undef SH_MAX_FILLFACTOR
1215 : : #undef SH_GROW_MAX_DIB
1216 : : #undef SH_GROW_MAX_MOVE
1217 : : #undef SH_GROW_MIN_FILLFACTOR
1218 : : #undef SH_MAX_SIZE
1219 : :
1220 : : /* types */
1221 : : #undef SH_TYPE
1222 : : #undef SH_STATUS
1223 : : #undef SH_STATUS_EMPTY
1224 : : #undef SH_STATUS_IN_USE
1225 : : #undef SH_ITERATOR
1226 : :
1227 : : /* external function names */
1228 : : #undef SH_CREATE
1229 : : #undef SH_DESTROY
1230 : : #undef SH_RESET
1231 : : #undef SH_INSERT
1232 : : #undef SH_INSERT_HASH
1233 : : #undef SH_DELETE_ITEM
1234 : : #undef SH_DELETE
1235 : : #undef SH_LOOKUP
1236 : : #undef SH_LOOKUP_HASH
1237 : : #undef SH_GROW
1238 : : #undef SH_START_ITERATE
1239 : : #undef SH_START_ITERATE_AT
1240 : : #undef SH_ITERATE
1241 : : #undef SH_ALLOCATE
1242 : : #undef SH_FREE
1243 : : #undef SH_ESTIMATE_SPACE
1244 : : #undef SH_STAT
1245 : :
1246 : : /* internal function names */
1247 : : #undef SH_COMPUTE_SIZE
1248 : : #undef SH_UPDATE_PARAMETERS
1249 : : #undef SH_COMPARE_KEYS
1250 : : #undef SH_INITIAL_BUCKET
1251 : : #undef SH_NEXT
1252 : : #undef SH_PREV
1253 : : #undef SH_DISTANCE_FROM_OPTIMAL
1254 : : #undef SH_ENTRY_HASH
1255 : : #undef SH_INSERT_HASH_INTERNAL
1256 : : #undef SH_LOOKUP_HASH_INTERNAL
|