Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * shmem.c
4 : : * create shared memory and initialize shared memory data structures.
5 : : *
6 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/storage/ipc/shmem.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : /*
16 : : * POSTGRES processes share one or more regions of shared memory.
17 : : * The shared memory is created by a postmaster and is inherited
18 : : * by each backend via fork() (or, in some ports, via other OS-specific
19 : : * methods). The routines in this file are used for allocating and
20 : : * binding to shared memory data structures.
21 : : *
22 : : * This module provides facilities to allocate fixed-size structures in shared
23 : : * memory, for things like variables shared between all backend processes.
24 : : * Each such structure has a string name to identify it, specified when it is
25 : : * requested. shmem_hash.c provides a shared hash table implementation on top
26 : : * of that.
27 : : *
28 : : * Shared memory areas should usually not be allocated after postmaster
29 : : * startup, although we do allow small allocations later for the benefit of
30 : : * extension modules that are loaded after startup. Despite that allowance,
31 : : * extensions that need shared memory should be added in
32 : : * shared_preload_libraries, because the allowance is quite small and there is
33 : : * no guarantee that any memory is available after startup.
34 : : *
35 : : * Nowadays, there is also another way to allocate shared memory called
36 : : * Dynamic Shared Memory. See dsm.c for that facility. One big difference
37 : : * between traditional shared memory handled by shmem.c and dynamic shared
38 : : * memory is that traditional shared memory areas are mapped to the same
39 : : * address in all processes, so you can use normal pointers in shared memory
40 : : * structs. With Dynamic Shared Memory, you must use offsets or DSA pointers
41 : : * instead.
42 : : *
43 : : * Shared memory managed by shmem.c can never be freed, once allocated. Each
44 : : * hash table has its own free list, so hash buckets can be reused when an
45 : : * item is deleted.
46 : : *
47 : : * Usage
48 : : * -----
49 : : *
50 : : * To allocate shared memory, you need to register a set of callback functions
51 : : * which handle the lifecycle of the allocation. In the request_fn callback,
52 : : * call ShmemRequestStruct() with the desired name and size. When the area is
53 : : * later allocated or attached to, the global variable pointed to by the .ptr
54 : : * option is set to the shared memory location of the allocation. The init_fn
55 : : * callback can perform additional initialization.
56 : : *
57 : : * typedef struct MyShmemData {
58 : : * ...
59 : : * } MyShmemData;
60 : : *
61 : : * static MyShmemData *MyShmem;
62 : : *
63 : : * static void my_shmem_request(void *arg);
64 : : * static void my_shmem_init(void *arg);
65 : : *
66 : : * const ShmemCallbacks MyShmemCallbacks = {
67 : : * .request_fn = my_shmem_request,
68 : : * .init_fn = my_shmem_init,
69 : : * };
70 : : *
71 : : * static void
72 : : * my_shmem_request(void *arg)
73 : : * {
74 : : * ShmemRequestStruct(.name = "My shmem area",
75 : : * .size = sizeof(MyShmemData),
76 : : * .ptr = (void **) &MyShmem,
77 : : * );
78 : : * }
79 : : *
80 : : * In builtin PostgreSQL code, add the callbacks to the list in
81 : : * src/include/storage/subsystemlist.h. In an add-in module, you can register
82 : : * the callbacks by calling RegisterShmemCallbacks(&MyShmemCallbacks) in the
83 : : * extension's _PG_init() function.
84 : : *
85 : : * Lifecycle
86 : : * ---------
87 : : *
88 : : * Initializing shared memory happens in multiple phases. In the first phase,
89 : : * during postmaster startup, all the request_fn callbacks are called. Only
90 : : * after all the request_fn callbacks have been called and all the shmem areas
91 : : * have been requested by the ShmemRequestStruct() calls we know how much
92 : : * shared memory we need in total. After that, postmaster allocates global
93 : : * shared memory segment, and calls all the init_fn callbacks to initialize
94 : : * all the requested shmem areas.
95 : : *
96 : : * In standard Unix-ish environments, individual backends do not need to
97 : : * re-establish their local pointers into shared memory, because they inherit
98 : : * correct values of those variables via fork() from the postmaster. However,
99 : : * this does not work in the EXEC_BACKEND case. In ports using EXEC_BACKEND,
100 : : * backend startup also calls the shmem_request callbacks to re-establish the
101 : : * knowledge about each shared memory area, sets the pointer variables
102 : : * (*options->ptr), and calls the attach_fn callback, if any, for additional
103 : : * per-backend setup.
104 : : *
105 : : * Legacy ShmemInitStruct()/ShmemInitHash() functions
106 : : * --------------------------------------------------
107 : : *
108 : : * ShmemInitStruct()/ShmemInitHash() is another way of registering shmem
109 : : * areas. It pre-dates the ShmemRequestStruct()/ShmemRequestHash() functions,
110 : : * and should not be used in new code, but as of this writing it is still
111 : : * widely used in extensions.
112 : : *
113 : : * To allocate a shmem area with ShmemInitStruct(), you need to separately
114 : : * register the size needed for the area by calling RequestAddinShmemSpace()
115 : : * from the extension's shmem_request_hook, and allocate the area by calling
116 : : * ShmemInitStruct() from the extension's shmem_startup_hook. There are no
117 : : * init/attach callbacks. Instead, the caller of ShmemInitStruct() must check
118 : : * the return status of ShmemInitStruct() and initialize the struct if it was
119 : : * not previously initialized.
120 : : *
121 : : * Calling ShmemAlloc() directly
122 : : * -----------------------------
123 : : *
124 : : * There's a more low-level way of allocating shared memory too: you can call
125 : : * ShmemAlloc() directly. It's used to implement the higher level mechanisms,
126 : : * and should generally not be called directly.
127 : : */
128 : :
129 : : #include "postgres.h"
130 : :
131 : : #include <unistd.h>
132 : :
133 : : #include "access/slru.h"
134 : : #include "common/int.h"
135 : : #include "fmgr.h"
136 : : #include "funcapi.h"
137 : : #include "miscadmin.h"
138 : : #include "port/pg_bitutils.h"
139 : : #include "port/pg_numa.h"
140 : : #include "storage/lwlock.h"
141 : : #include "storage/pg_shmem.h"
142 : : #include "storage/shmem.h"
143 : : #include "storage/shmem_internal.h"
144 : : #include "storage/spin.h"
145 : : #include "utils/builtins.h"
146 : : #include "utils/tuplestore.h"
147 : :
148 : : /*
149 : : * Registered callbacks.
150 : : *
151 : : * During postmaster startup, we accumulate the callbacks from all subsystems
152 : : * in this list.
153 : : *
154 : : * This is in process private memory, although on Unix-like systems, we expect
155 : : * all the registrations to happen at postmaster startup time and be inherited
156 : : * by all the child processes via fork().
157 : : */
158 : : static List *registered_shmem_callbacks;
159 : :
160 : : /*
161 : : * In the shmem request phase, all the shmem areas requested with the
162 : : * ShmemRequest*() functions are accumulated here.
163 : : */
164 : : typedef struct
165 : : {
166 : : ShmemStructOpts *options;
167 : : ShmemRequestKind kind;
168 : : } ShmemRequest;
169 : :
170 : : static List *pending_shmem_requests;
171 : :
172 : : /*
173 : : * Per-process state machine, for sanity checking that we do things in the
174 : : * right order.
175 : : *
176 : : * Postmaster:
177 : : * INITIAL -> REQUESTING -> INITIALIZING -> DONE
178 : : *
179 : : * Backends in EXEC_BACKEND mode:
180 : : * INITIAL -> REQUESTING -> ATTACHING -> DONE
181 : : *
182 : : * Late request:
183 : : * DONE -> REQUESTING -> AFTER_STARTUP_ATTACH_OR_INIT -> DONE
184 : : */
185 : : enum shmem_request_state
186 : : {
187 : : /* Initial state */
188 : : SRS_INITIAL,
189 : :
190 : : /*
191 : : * When we start calling the shmem_request callbacks, we enter the
192 : : * SRS_REQUESTING phase. All ShmemRequestStruct calls happen in this
193 : : * state.
194 : : */
195 : : SRS_REQUESTING,
196 : :
197 : : /*
198 : : * Postmaster has finished all shmem requests, and is now initializing the
199 : : * shared memory segment. init_fn callbacks are called in this state.
200 : : */
201 : : SRS_INITIALIZING,
202 : :
203 : : /*
204 : : * A postmaster child process is starting up. attach_fn callbacks are
205 : : * called in this state.
206 : : */
207 : : SRS_ATTACHING,
208 : :
209 : : /* An after-startup allocation or attachment is in progress */
210 : : SRS_AFTER_STARTUP_ATTACH_OR_INIT,
211 : :
212 : : /* Normal state after shmem initialization / attachment */
213 : : SRS_DONE,
214 : : };
215 : : static enum shmem_request_state shmem_request_state = SRS_INITIAL;
216 : :
217 : : /*
218 : : * This is the first data structure stored in the shared memory segment, at
219 : : * the offset that PGShmemHeader->content_offset points to. Allocations by
220 : : * ShmemAlloc() are carved out of the space after this.
221 : : *
222 : : * For the base pointer and the total size of the shmem segment, we rely on
223 : : * the PGShmemHeader.
224 : : */
225 : : typedef struct ShmemAllocatorData
226 : : {
227 : : Size free_offset; /* offset to first free space from ShmemBase */
228 : :
229 : : /* protects 'free_offset' */
230 : : slock_t shmem_lock;
231 : :
232 : : HASHHDR *index; /* location of ShmemIndex */
233 : : size_t index_size; /* size of shmem region holding ShmemIndex */
234 : : LWLock index_lock; /* protects ShmemIndex */
235 : : } ShmemAllocatorData;
236 : :
237 : : #define ShmemIndexLock (&ShmemAllocator->index_lock)
238 : :
239 : : static void *ShmemAllocRaw(Size size, Size alignment, Size *allocated_size);
240 : :
241 : : /* shared memory global variables */
242 : :
243 : : static PGShmemHeader *ShmemSegHdr; /* shared mem segment header */
244 : : static void *ShmemBase; /* start address of shared memory */
245 : : static void *ShmemEnd; /* end+1 address of shared memory */
246 : :
247 : : static ShmemAllocatorData *ShmemAllocator;
248 : :
249 : : /*
250 : : * ShmemIndex is a global directory of shmem areas, itself also stored in the
251 : : * shared memory.
252 : : */
253 : : static HTAB *ShmemIndex;
254 : :
255 : : /* max size of data structure string name */
256 : : #define SHMEM_INDEX_KEYSIZE (48)
257 : :
258 : : /*
259 : : * # of additional entries to reserve in the shmem index table, for
260 : : * allocations after postmaster startup. (This is not a hard limit, the hash
261 : : * table can grow larger than that if there is shared memory available)
262 : : */
263 : : #define SHMEM_INDEX_ADDITIONAL_SIZE (128)
264 : :
265 : : /* this is a hash bucket in the shmem index table */
266 : : typedef struct
267 : : {
268 : : char key[SHMEM_INDEX_KEYSIZE]; /* string name */
269 : : void *location; /* location in shared mem */
270 : : Size size; /* # bytes requested for the structure */
271 : : Size allocated_size; /* # bytes actually allocated */
272 : : } ShmemIndexEnt;
273 : :
274 : : /* To get reliable results for NUMA inquiry we need to "touch pages" once */
275 : : static bool firstNumaTouch = true;
276 : :
277 : : static void CallShmemCallbacksAfterStartup(const ShmemCallbacks *callbacks);
278 : : static void InitShmemIndexEntry(ShmemRequest *request);
279 : : static bool AttachShmemIndexEntry(ShmemRequest *request, bool missing_ok);
280 : :
281 : : Datum pg_numa_available(PG_FUNCTION_ARGS);
282 : :
283 : : /*
284 : : * ShmemRequestStruct() --- request a named shared memory area
285 : : *
286 : : * Subsystems call this to register their shared memory needs. This is
287 : : * usually done early in postmaster startup, before the shared memory segment
288 : : * has been created, so that the size can be included in the estimate for
289 : : * total amount of shared memory needed. We set aside a small amount of
290 : : * memory for allocations that happen later, for the benefit of non-preloaded
291 : : * extensions, but that should not be relied upon.
292 : : *
293 : : * This does not yet allocate the memory, but merely registers the need for
294 : : * it. The actual allocation happens later in the postmaster startup
295 : : * sequence.
296 : : *
297 : : * This must be called from a shmem_request callback function, registered with
298 : : * RegisterShmemCallbacks(). This enforces a coding pattern that works the
299 : : * same in normal Unix systems and with EXEC_BACKEND. On Unix systems, the
300 : : * shmem_request callbacks are called once, early in postmaster startup, and
301 : : * the child processes inherit the struct descriptors and any other
302 : : * per-process state from the postmaster. In EXEC_BACKEND mode, shmem_request
303 : : * callbacks are *also* called in each backend, at backend startup, to
304 : : * re-establish the struct descriptors. By calling the same function in both
305 : : * cases, we ensure that all the shmem areas are registered the same way in
306 : : * all processes.
307 : : *
308 : : * 'options' defines the name and size of the area, and any other optional
309 : : * features. Leave unused options as zeros. The options are copied to
310 : : * longer-lived memory, so it doesn't need to live after the
311 : : * ShmemRequestStruct() call and can point to a local variable in the calling
312 : : * function. The 'name' must point to a long-lived string though, only the
313 : : * pointer to it is copied.
314 : : */
315 : : void
29 heikki.linnakangas@i 316 :GNC 79610 : ShmemRequestStructWithOpts(const ShmemStructOpts *options)
317 : : {
318 : : ShmemStructOpts *options_copy;
319 : :
320 : 79610 : options_copy = MemoryContextAlloc(TopMemoryContext,
321 : : sizeof(ShmemStructOpts));
322 : 79610 : memcpy(options_copy, options, sizeof(ShmemStructOpts));
323 : :
324 : 79610 : ShmemRequestInternal(options_copy, SHMEM_KIND_STRUCT);
29 heikki.linnakangas@i 325 :CBC 79610 : }
326 : :
327 : : /*
328 : : * Internal workhorse of ShmemRequestStruct() and ShmemRequestHash().
329 : : *
330 : : * Note: Unlike in the public ShmemRequestStruct() and ShmemRequestHash()
331 : : * functions, 'options' is *not* copied. It must be allocated in
332 : : * TopMemoryContext by the caller, and will be freed after the init/attach
333 : : * callbacks have been called. This allows ShmemRequestHash() to pass a
334 : : * pointer to the extended ShmemHashOpts struct instead.
335 : : */
336 : : void
29 heikki.linnakangas@i 337 :GNC 98285 : ShmemRequestInternal(ShmemStructOpts *options, ShmemRequestKind kind)
338 : : {
339 : : ShmemRequest *request;
340 : :
341 : : /* Check the options */
342 [ - + ]: 98285 : if (options->name == NULL)
29 heikki.linnakangas@i 343 [ # # ]:UNC 0 : elog(ERROR, "shared memory request is missing 'name' option");
344 : :
29 heikki.linnakangas@i 345 [ + + ]:GNC 98285 : if (IsUnderPostmaster)
346 : : {
347 [ - + - - ]: 3 : if (options->size <= 0 && options->size != SHMEM_ATTACH_UNKNOWN_SIZE)
29 heikki.linnakangas@i 348 [ # # ]:UNC 0 : elog(ERROR, "invalid size %zd for shared memory request for \"%s\"",
349 : : options->size, options->name);
350 : : }
351 : : else
352 : : {
29 heikki.linnakangas@i 353 [ - + ]:GNC 98282 : if (options->size == SHMEM_ATTACH_UNKNOWN_SIZE)
29 heikki.linnakangas@i 354 [ # # ]:UNC 0 : elog(ERROR, "SHMEM_ATTACH_UNKNOWN_SIZE cannot be used during startup");
29 heikki.linnakangas@i 355 [ - + ]:GNC 98282 : if (options->size <= 0)
29 heikki.linnakangas@i 356 [ # # ]:UNC 0 : elog(ERROR, "invalid size %zd for shared memory request for \"%s\"",
357 : : options->size, options->name);
358 : : }
359 : :
29 heikki.linnakangas@i 360 [ + + - + ]:GNC 98285 : if (options->alignment != 0 && pg_nextpower2_size_t(options->alignment) != options->alignment)
29 heikki.linnakangas@i 361 [ # # ]:UNC 0 : elog(ERROR, "invalid alignment %zu for shared memory request for \"%s\"",
362 : : options->alignment, options->name);
363 : :
364 : : /* Check that we're in the right state */
29 heikki.linnakangas@i 365 [ - + ]:GNC 98285 : if (shmem_request_state != SRS_REQUESTING)
29 heikki.linnakangas@i 366 [ # # ]:UNC 0 : elog(ERROR, "ShmemRequestStruct can only be called from a shmem_request callback");
367 : :
368 : : /* Check that it's not already registered in this process */
29 heikki.linnakangas@i 369 [ + + + + :GNC 4029859 : foreach_ptr(ShmemRequest, existing, pending_shmem_requests)
+ + ]
370 : : {
371 [ - + ]: 3833289 : if (strcmp(existing->options->name, options->name) == 0)
29 heikki.linnakangas@i 372 [ # # ]:UNC 0 : ereport(ERROR,
373 : : (errmsg("shared memory struct \"%s\" is already registered",
374 : : options->name)));
375 : : }
376 : :
377 : : /* Request looks valid, remember it */
29 heikki.linnakangas@i 378 :GNC 98285 : request = palloc(sizeof(ShmemRequest));
379 : 98285 : request->options = options;
380 : 98285 : request->kind = kind;
381 : 98285 : pending_shmem_requests = lappend(pending_shmem_requests, request);
382 : 98285 : }
383 : :
384 : : /*
385 : : * ShmemGetRequestedSize() --- estimate the total size of all registered shared
386 : : * memory structures.
387 : : *
388 : : * This is called at postmaster startup, before the shared memory segment has
389 : : * been created.
390 : : */
391 : : size_t
392 : 2311 : ShmemGetRequestedSize(void)
393 : : {
394 : : size_t size;
395 : :
396 : : /* memory needed for the ShmemIndex */
397 : 2311 : size = hash_estimate_size(list_length(pending_shmem_requests) + SHMEM_INDEX_ADDITIONAL_SIZE,
398 : : sizeof(ShmemIndexEnt));
399 : 2311 : size = CACHELINEALIGN(size);
400 : :
401 : : /* memory needed for all the requested areas */
402 [ + - + + : 187208 : foreach_ptr(ShmemRequest, request, pending_shmem_requests)
+ + ]
403 : : {
404 : 182586 : size_t alignment = request->options->alignment;
405 : :
406 : : /* pad the start address for alignment like ShmemAllocRaw() does */
407 [ + + ]: 182586 : if (alignment < PG_CACHE_LINE_SIZE)
408 : 175653 : alignment = PG_CACHE_LINE_SIZE;
409 : 182586 : size = TYPEALIGN(alignment, size);
410 : :
411 : 182586 : size = add_size(size, request->options->size);
412 : : }
413 : :
414 : 2311 : return size;
415 : : }
416 : :
417 : : /*
418 : : * ShmemInitRequested() --- allocate and initialize requested shared memory
419 : : * structures.
420 : : *
421 : : * This is called once at postmaster startup, after the shared memory segment
422 : : * has been created.
423 : : */
424 : : void
425 : 1241 : ShmemInitRequested(void)
426 : : {
427 : : /* should be called only by the postmaster or a standalone backend */
428 [ - + ]: 1241 : Assert(!IsUnderPostmaster);
429 [ - + ]: 1241 : Assert(shmem_request_state == SRS_INITIALIZING);
430 : :
431 : : /*
432 : : * Initialize the ShmemIndex entries and perform basic initialization of
433 : : * all the requested memory areas. There are no concurrent processes yet,
434 : : * so no need for locking.
435 : : */
436 [ + - + + : 100527 : foreach_ptr(ShmemRequest, request, pending_shmem_requests)
+ + ]
437 : : {
438 : 98045 : InitShmemIndexEntry(request);
439 : 98045 : pfree(request->options);
440 : : }
441 : 1241 : list_free_deep(pending_shmem_requests);
442 : 1241 : pending_shmem_requests = NIL;
443 : :
444 : : /*
445 : : * Call the subsystem-specific init callbacks to finish initialization of
446 : : * all the areas.
447 : : */
448 [ + - + + : 57115 : foreach_ptr(const ShmemCallbacks, callbacks, registered_shmem_callbacks)
+ + ]
449 : : {
450 [ + + ]: 54633 : if (callbacks->init_fn)
451 : 49665 : callbacks->init_fn(callbacks->opaque_arg);
452 : : }
453 : :
454 : 1241 : shmem_request_state = SRS_DONE;
455 : 1241 : }
456 : :
457 : : /*
458 : : * Re-establish process private state related to shmem areas.
459 : : *
460 : : * This is called at backend startup in EXEC_BACKEND mode, in every backend.
461 : : */
462 : : #ifdef EXEC_BACKEND
463 : : void
464 : : ShmemAttachRequested(void)
465 : : {
466 : : ListCell *lc;
467 : :
468 : : /* Must be initializing a (non-standalone) backend */
469 : : Assert(IsUnderPostmaster);
470 : : Assert(ShmemAllocator->index != NULL);
471 : : Assert(shmem_request_state == SRS_REQUESTING);
472 : : shmem_request_state = SRS_ATTACHING;
473 : :
474 : : LWLockAcquire(ShmemIndexLock, LW_SHARED);
475 : :
476 : : /*
477 : : * Attach to all the requested memory areas.
478 : : */
479 : : foreach_ptr(ShmemRequest, request, pending_shmem_requests)
480 : : {
481 : : AttachShmemIndexEntry(request, false);
482 : : pfree(request->options);
483 : : }
484 : : list_free_deep(pending_shmem_requests);
485 : : pending_shmem_requests = NIL;
486 : :
487 : : /* Call attach callbacks */
488 : : foreach(lc, registered_shmem_callbacks)
489 : : {
490 : : const ShmemCallbacks *callbacks = (const ShmemCallbacks *) lfirst(lc);
491 : :
492 : : if (callbacks->attach_fn)
493 : : callbacks->attach_fn(callbacks->opaque_arg);
494 : : }
495 : :
496 : : LWLockRelease(ShmemIndexLock);
497 : :
498 : : shmem_request_state = SRS_DONE;
499 : : }
500 : : #endif
501 : :
502 : : /*
503 : : * Insert requested shmem area into the shared memory index and initialize it.
504 : : *
505 : : * Note that this only does performs basic initialization depending on
506 : : * ShmemRequestKind, like setting the global pointer variable to the area for
507 : : * SHMEM_KIND_STRUCT or setting up the backend-private HTAB control struct.
508 : : * This does *not* call the subsystem-specific init callbacks. That's done
509 : : * later after all the shmem areas have been initialized or attached to.
510 : : */
511 : : static void
512 : 98046 : InitShmemIndexEntry(ShmemRequest *request)
513 : : {
514 : 98046 : const char *name = request->options->name;
515 : : ShmemIndexEnt *index_entry;
516 : : bool found;
517 : : size_t allocated_size;
518 : : void *structPtr;
519 : :
520 : : /* look it up in the shmem index */
521 : : index_entry = (ShmemIndexEnt *)
522 : 98046 : hash_search(ShmemIndex, name, HASH_ENTER_NULL, &found);
523 [ - + ]: 98046 : if (found)
29 heikki.linnakangas@i 524 [ # # ]:UNC 0 : elog(ERROR, "shared memory struct \"%s\" is already initialized", name);
29 heikki.linnakangas@i 525 [ - + ]:GNC 98046 : if (!index_entry)
526 : : {
527 : : /* tried to add it to the hash table, but there was no space */
29 heikki.linnakangas@i 528 [ # # ]:UNC 0 : ereport(ERROR,
529 : : (errcode(ERRCODE_OUT_OF_MEMORY),
530 : : errmsg("could not create ShmemIndex entry for data structure \"%s\"",
531 : : name)));
532 : : }
533 : :
534 : : /*
535 : : * We inserted the entry to the shared memory index. Allocate requested
536 : : * amount of shared memory for it, and initialize the index entry.
537 : : */
29 heikki.linnakangas@i 538 :GNC 98046 : structPtr = ShmemAllocRaw(request->options->size,
539 : 98046 : request->options->alignment,
540 : : &allocated_size);
541 [ - + ]: 98046 : if (structPtr == NULL)
542 : : {
543 : : /* out of memory; remove the failed ShmemIndex entry */
29 heikki.linnakangas@i 544 :UNC 0 : hash_search(ShmemIndex, name, HASH_REMOVE, NULL);
545 [ # # ]: 0 : ereport(ERROR,
546 : : (errcode(ERRCODE_OUT_OF_MEMORY),
547 : : errmsg("not enough shared memory for data structure"
548 : : " \"%s\" (%zd bytes requested)",
549 : : name, request->options->size)));
550 : : }
29 heikki.linnakangas@i 551 :GNC 98046 : index_entry->size = request->options->size;
552 : 98046 : index_entry->allocated_size = allocated_size;
553 : 98046 : index_entry->location = structPtr;
554 : :
555 : : /* Initialize depending on the kind of shmem area it is */
556 [ + + + - ]: 98046 : switch (request->kind)
557 : : {
558 : 79416 : case SHMEM_KIND_STRUCT:
559 [ + - ]: 79416 : if (request->options->ptr)
560 : 79416 : *(request->options->ptr) = index_entry->location;
561 : 79416 : break;
562 : 9939 : case SHMEM_KIND_HASH:
563 : 9939 : shmem_hash_init(structPtr, request->options);
564 : 9939 : break;
565 : 8691 : case SHMEM_KIND_SLRU:
566 : 8691 : shmem_slru_init(structPtr, request->options);
567 : 8691 : break;
568 : : }
569 : 98046 : }
570 : :
571 : : /*
572 : : * Look up a named shmem area in the shared memory index and attach to it.
573 : : *
574 : : * Note that this only performs the basic attachment actions depending on
575 : : * ShmemRequestKind, like setting the global pointer variable to the area for
576 : : * SHMEM_KIND_STRUCT or setting up the backend-private HTAB control struct.
577 : : * This does *not* call the subsystem-specific attach callbacks. That's done
578 : : * later after all the shmem areas have been initialized or attached to.
579 : : */
580 : : static bool
581 : 2 : AttachShmemIndexEntry(ShmemRequest *request, bool missing_ok)
582 : : {
583 : 2 : const char *name = request->options->name;
584 : : ShmemIndexEnt *index_entry;
585 : :
586 : : /* Look it up in the shmem index */
587 : : index_entry = (ShmemIndexEnt *)
588 : 2 : hash_search(ShmemIndex, name, HASH_FIND, NULL);
589 [ - + ]: 2 : if (!index_entry)
590 : : {
29 heikki.linnakangas@i 591 [ # # ]:UNC 0 : if (!missing_ok)
592 [ # # ]: 0 : ereport(ERROR,
593 : : (errmsg("could not find ShmemIndex entry for data structure \"%s\"",
594 : : request->options->name)));
595 : 0 : return false;
596 : : }
597 : :
598 : : /* Check that the size in the index matches the request */
29 heikki.linnakangas@i 599 [ - + ]:GNC 2 : if (index_entry->size != request->options->size &&
29 heikki.linnakangas@i 600 [ # # ]:UNC 0 : request->options->size != SHMEM_ATTACH_UNKNOWN_SIZE)
601 : : {
602 [ # # ]: 0 : ereport(ERROR,
603 : : (errmsg("shared memory struct \"%s\" was created with"
604 : : " different size: existing %zu, requested %zd",
605 : : name, index_entry->size, request->options->size)));
606 : : }
607 : :
608 : : /*
609 : : * Re-establish the caller's pointer variable, or do other actions to
610 : : * attach depending on the kind of shmem area it is.
611 : : */
29 heikki.linnakangas@i 612 [ + - - - ]:GNC 2 : switch (request->kind)
613 : : {
614 : 2 : case SHMEM_KIND_STRUCT:
615 [ + - ]: 2 : if (request->options->ptr)
616 : 2 : *(request->options->ptr) = index_entry->location;
617 : 2 : break;
29 heikki.linnakangas@i 618 :UNC 0 : case SHMEM_KIND_HASH:
619 : 0 : shmem_hash_attach(index_entry->location, request->options);
620 : 0 : break;
621 : 0 : case SHMEM_KIND_SLRU:
622 : 0 : shmem_slru_attach(index_entry->location, request->options);
623 : 0 : break;
624 : : }
625 : :
29 heikki.linnakangas@i 626 :GNC 2 : return true;
627 : : }
628 : :
629 : : /*
630 : : * InitShmemAllocator() --- set up basic pointers to shared memory.
631 : : *
632 : : * Called at postmaster or stand-alone backend startup, to initialize the
633 : : * allocator's data structure in the shared memory segment. In EXEC_BACKEND,
634 : : * this is also called at backend startup, to set up pointers to the
635 : : * already-initialized data structure.
636 : : */
637 : : void
95 638 : 1241 : InitShmemAllocator(PGShmemHeader *seghdr)
639 : : {
640 : : Size offset;
641 : : int64 hash_nelems;
642 : : HASHCTL info;
643 : : int hash_flags;
644 : :
645 : : #ifndef EXEC_BACKEND
40 646 [ - + ]: 1241 : Assert(!IsUnderPostmaster);
647 : : #endif
95 648 [ - + ]: 1241 : Assert(seghdr != NULL);
649 : :
29 650 [ - + ]: 1241 : if (IsUnderPostmaster)
651 : : {
29 heikki.linnakangas@i 652 [ # # ]:UNC 0 : Assert(shmem_request_state == SRS_INITIAL);
653 : : }
654 : : else
655 : : {
29 heikki.linnakangas@i 656 [ - + ]:GNC 1241 : Assert(shmem_request_state == SRS_REQUESTING);
657 : 1241 : shmem_request_state = SRS_INITIALIZING;
658 : : }
659 : :
660 : : /*
661 : : * We assume the pointer and offset are MAXALIGN. Not a hard requirement,
662 : : * but it's true today and keeps the math below simpler.
663 : : */
95 664 [ - + ]: 1241 : Assert(seghdr == (void *) MAXALIGN(seghdr));
665 [ - + ]: 1241 : Assert(seghdr->content_offset == MAXALIGN(seghdr->content_offset));
666 : :
667 : : /*
668 : : * Allocations after this point should go through ShmemAlloc, which
669 : : * expects to allocate everything on cache line boundaries. Make sure the
670 : : * first allocation begins on a cache line boundary.
671 : : */
40 672 : 1241 : offset = CACHELINEALIGN(seghdr->content_offset + sizeof(ShmemAllocatorData));
673 [ - + ]: 1241 : if (offset > seghdr->totalsize)
40 heikki.linnakangas@i 674 [ # # ]:UNC 0 : ereport(ERROR,
675 : : (errcode(ERRCODE_OUT_OF_MEMORY),
676 : : errmsg("out of shared memory (%zu bytes requested)",
677 : : offset)));
678 : :
679 : : /*
680 : : * In postmaster or stand-alone backend, initialize the shared memory
681 : : * allocator so that we can allocate shared memory for ShmemIndex using
682 : : * ShmemAlloc(). In a regular backend just set up the pointers required
683 : : * by ShmemAlloc().
684 : : */
40 heikki.linnakangas@i 685 :GNC 1241 : ShmemAllocator = (ShmemAllocatorData *) ((char *) seghdr + seghdr->content_offset);
686 [ + - ]: 1241 : if (!IsUnderPostmaster)
687 : : {
688 : 1241 : SpinLockInit(&ShmemAllocator->shmem_lock);
689 : 1241 : ShmemAllocator->free_offset = offset;
690 : 1241 : LWLockInitialize(&ShmemAllocator->index_lock, LWTRANCHE_SHMEM_INDEX);
691 : : }
692 : :
525 peter@eisentraut.org 693 : 1241 : ShmemSegHdr = seghdr;
694 : 1241 : ShmemBase = seghdr;
695 : 1241 : ShmemEnd = (char *) ShmemBase + seghdr->totalsize;
696 : :
697 : : /*
698 : : * Create (or attach to) the shared memory index of shmem areas.
699 : : *
700 : : * This is the same initialization as ShmemInitHash() does, but we cannot
701 : : * use ShmemInitHash() here because it relies on ShmemIndex being already
702 : : * initialized.
703 : : */
29 heikki.linnakangas@i 704 : 1241 : hash_nelems = list_length(pending_shmem_requests) + SHMEM_INDEX_ADDITIONAL_SIZE;
705 : :
40 706 : 1241 : info.keysize = SHMEM_INDEX_KEYSIZE;
707 : 1241 : info.entrysize = sizeof(ShmemIndexEnt);
31 708 : 1241 : hash_flags = HASH_ELEM | HASH_STRINGS | HASH_FIXED_SIZE;
709 : :
40 710 [ + - ]: 1241 : if (!IsUnderPostmaster)
711 : : {
29 712 : 1241 : ShmemAllocator->index_size = hash_estimate_size(hash_nelems, info.entrysize);
31 713 : 1241 : ShmemAllocator->index = (HASHHDR *) ShmemAlloc(ShmemAllocator->index_size);
714 : : }
715 : 2482 : ShmemIndex = shmem_hash_create(ShmemAllocator->index,
716 : 1241 : ShmemAllocator->index_size,
717 : : IsUnderPostmaster,
718 : : "ShmemIndex", hash_nelems,
719 : : &info, hash_flags);
40 720 [ - + ]: 1241 : Assert(ShmemIndex != NULL);
721 : :
722 : : /*
723 : : * Add an entry for ShmemIndex itself into ShmemIndex, so that it's
724 : : * visible in the pg_shmem_allocations view
725 : : */
34 726 [ + - ]: 1241 : if (!IsUnderPostmaster)
727 : : {
728 : : bool found;
729 : : ShmemIndexEnt *result = (ShmemIndexEnt *)
730 : 1241 : hash_search(ShmemIndex, "ShmemIndex", HASH_ENTER, &found);
731 : :
732 [ - + ]: 1241 : Assert(!found);
31 733 : 1241 : result->size = ShmemAllocator->index_size;
734 : 1241 : result->allocated_size = ShmemAllocator->index_size;
34 735 : 1241 : result->location = ShmemAllocator->index;
736 : : }
10892 scrappy@hub.org 737 : 1241 : }
738 : :
739 : : /*
740 : : * Reset state on postmaster crash restart.
741 : : */
742 : : void
29 heikki.linnakangas@i 743 : 5 : ResetShmemAllocator(void)
744 : : {
745 [ - + ]: 5 : Assert(!IsUnderPostmaster);
746 : 5 : shmem_request_state = SRS_INITIAL;
747 : :
748 : 5 : pending_shmem_requests = NIL;
749 : :
750 : : /*
751 : : * Note that we don't clear the registered callbacks. We will need to
752 : : * call them again as we restart
753 : : */
29 heikki.linnakangas@i 754 :CBC 5 : }
755 : :
756 : : /*
757 : : * ShmemAlloc -- allocate max-aligned chunk from shared memory
758 : : *
759 : : * Throws error if request cannot be satisfied.
760 : : *
761 : : * Assumes ShmemSegHdr is initialized.
762 : : */
763 : : void *
7142 tgl@sss.pgh.pa.us 764 : 1241 : ShmemAlloc(Size size)
765 : : {
766 : : void *newSpace;
767 : : Size allocated_size;
768 : :
29 heikki.linnakangas@i 769 :GNC 1241 : newSpace = ShmemAllocRaw(size, 0, &allocated_size);
3533 tgl@sss.pgh.pa.us 770 [ - + ]:CBC 1241 : if (!newSpace)
3533 tgl@sss.pgh.pa.us 771 [ # # ]:UBC 0 : ereport(ERROR,
772 : : (errcode(ERRCODE_OUT_OF_MEMORY),
773 : : errmsg("out of shared memory (%zu bytes requested)",
774 : : size)));
3533 tgl@sss.pgh.pa.us 775 :CBC 1241 : return newSpace;
776 : : }
777 : :
778 : : /*
779 : : * ShmemAllocNoError -- allocate max-aligned chunk from shared memory
780 : : *
781 : : * As ShmemAlloc, but returns NULL if out of space, rather than erroring.
782 : : */
783 : : void *
3533 tgl@sss.pgh.pa.us 784 :LBC (437199) : ShmemAllocNoError(Size size)
785 : : {
786 : : Size allocated_size;
787 : :
29 heikki.linnakangas@i 788 :UNC 0 : return ShmemAllocRaw(size, 0, &allocated_size);
789 : : }
790 : :
791 : : /*
792 : : * ShmemAllocRaw -- allocate align chunk and return allocated size
793 : : *
794 : : * Also sets *allocated_size to the number of bytes allocated, which will
795 : : * be equal to the number requested plus any padding we choose to add.
796 : : */
797 : : static void *
29 heikki.linnakangas@i 798 :GNC 99287 : ShmemAllocRaw(Size size, Size alignment, Size *allocated_size)
799 : : {
800 : : Size rawStart;
801 : : Size newStart;
802 : : Size newFree;
803 : : void *newSpace;
804 : :
805 : : /*
806 : : * Ensure all space is adequately aligned. We used to only MAXALIGN this
807 : : * space but experience has proved that on modern systems that is not good
808 : : * enough. Many parts of the system are very sensitive to critical data
809 : : * structures getting split across cache line boundaries. To avoid that,
810 : : * attempt to align the beginning of the allocation to a cache line
811 : : * boundary. The calling code will still need to be careful about how it
812 : : * uses the allocated space - e.g. by padding each element in an array of
813 : : * structures out to a power-of-two size - but without this, even that
814 : : * won't be sufficient.
815 : : */
816 [ + + ]: 99287 : if (alignment < PG_CACHE_LINE_SIZE)
817 : 95564 : alignment = PG_CACHE_LINE_SIZE;
818 : :
3854 rhaas@postgresql.org 819 [ - + ]:CBC 99287 : Assert(ShmemSegHdr != NULL);
820 : :
40 heikki.linnakangas@i 821 :GNC 99287 : SpinLockAcquire(&ShmemAllocator->shmem_lock);
822 : :
29 823 : 99287 : rawStart = ShmemAllocator->free_offset;
824 : 99287 : newStart = TYPEALIGN(alignment, rawStart);
825 : :
8262 tgl@sss.pgh.pa.us 826 :CBC 99287 : newFree = newStart + size;
3854 rhaas@postgresql.org 827 [ + - ]: 99287 : if (newFree <= ShmemSegHdr->totalsize)
828 : : {
523 peter@eisentraut.org 829 : 99287 : newSpace = (char *) ShmemBase + newStart;
95 heikki.linnakangas@i 830 :GNC 99287 : ShmemAllocator->free_offset = newFree;
831 : : }
832 : : else
10467 bruce@momjian.us 833 :UBC 0 : newSpace = NULL;
834 : :
40 heikki.linnakangas@i 835 :GNC 99287 : SpinLockRelease(&ShmemAllocator->shmem_lock);
836 : :
837 : : /* note this assert is okay with newSpace == NULL */
29 838 [ - + ]: 99287 : Assert(newSpace == (void *) TYPEALIGN(alignment, newSpace));
839 : :
840 : 99287 : *allocated_size = newFree - rawStart;
10108 bruce@momjian.us 841 :CBC 99287 : return newSpace;
842 : : }
843 : :
844 : : /*
845 : : * ShmemAddrIsValid -- test if an address refers to shared memory
846 : : *
847 : : * Returns true if the pointer points within the shared memory segment.
848 : : */
849 : : bool
5566 heikki.linnakangas@i 850 : 1692 : ShmemAddrIsValid(const void *addr)
851 : : {
6393 tgl@sss.pgh.pa.us 852 [ + - + - ]: 1692 : return (addr >= ShmemBase) && (addr < ShmemEnd);
853 : : }
854 : :
855 : : /*
856 : : * Register callbacks that define a shared memory area (or multiple areas).
857 : : *
858 : : * The system will call the callbacks at different stages of postmaster or
859 : : * backend startup, to allocate and initialize the area.
860 : : *
861 : : * This is normally called early during postmaster startup, but if the
862 : : * SHMEM_CALLBACKS_ALLOW_AFTER_STARTUP is set, this can also be used after
863 : : * startup, although after startup there's no guarantee that there's enough
864 : : * shared memory available. When called after startup, this immediately calls
865 : : * the right callbacks depending on whether another backend had already
866 : : * initialized the area.
867 : : *
868 : : * Note: In EXEC_BACKEND mode, this needs to be called in every backend
869 : : * process. That's needed because we cannot pass down the callback function
870 : : * pointers from the postmaster process, because different processes may have
871 : : * loaded libraries to different addresses.
872 : : */
873 : : void
29 heikki.linnakangas@i 874 :GNC 55118 : RegisterShmemCallbacks(const ShmemCallbacks *callbacks)
875 : : {
876 [ + + + - ]: 55118 : if (shmem_request_state == SRS_DONE && IsUnderPostmaster)
877 : : {
878 : : /*
879 : : * After-startup initialization or attachment. Call the appropriate
880 : : * callbacks immediately.
881 : : */
882 [ - + ]: 3 : if ((callbacks->flags & SHMEM_CALLBACKS_ALLOW_AFTER_STARTUP) == 0)
29 heikki.linnakangas@i 883 [ # # ]:UNC 0 : elog(ERROR, "cannot request shared memory at this time");
884 : :
29 heikki.linnakangas@i 885 :GNC 3 : CallShmemCallbacksAfterStartup(callbacks);
886 : : }
887 : : else
888 : : {
889 : : /* Remember the callbacks for later */
890 : 55115 : registered_shmem_callbacks = lappend(registered_shmem_callbacks,
891 : : (void *) callbacks);
892 : : }
29 heikki.linnakangas@i 893 :CBC 55118 : }
894 : :
895 : : /*
896 : : * Register a shmem area (or multiple areas) after startup.
897 : : */
898 : : static void
29 heikki.linnakangas@i 899 :GNC 3 : CallShmemCallbacksAfterStartup(const ShmemCallbacks *callbacks)
900 : : {
901 : : bool found_any;
902 : : bool notfound_any;
903 : :
904 [ - + ]: 3 : Assert(shmem_request_state == SRS_DONE);
905 : 3 : shmem_request_state = SRS_REQUESTING;
906 : :
907 : : /*
908 : : * Call the request callback first. The callback makes ShmemRequest*()
909 : : * calls for each shmem area, adding them to pending_shmem_requests.
910 : : */
911 [ - + ]: 3 : Assert(pending_shmem_requests == NIL);
912 [ + - ]: 3 : if (callbacks->request_fn)
913 : 3 : callbacks->request_fn(callbacks->opaque_arg);
914 : 3 : shmem_request_state = SRS_AFTER_STARTUP_ATTACH_OR_INIT;
915 : :
916 [ - + ]: 3 : if (pending_shmem_requests == NIL)
917 : : {
29 heikki.linnakangas@i 918 :UNC 0 : shmem_request_state = SRS_DONE;
919 : 0 : return;
920 : : }
921 : :
922 : : /* Hold ShmemIndexLock while we allocate all the shmem entries */
40 heikki.linnakangas@i 923 :GNC 3 : LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE);
924 : :
925 : : /*
926 : : * Check if the requested shared memory areas have already been
927 : : * initialized. We assume all the areas requested by the request callback
928 : : * to form a coherent unit such that they're all already initialized or
929 : : * none. Otherwise it would be ambiguous which callback, init or attach,
930 : : * to callback afterwards.
931 : : */
29 932 : 3 : found_any = notfound_any = false;
933 [ + - + + : 9 : foreach_ptr(ShmemRequest, request, pending_shmem_requests)
+ + ]
934 : : {
935 [ + + ]: 3 : if (hash_search(ShmemIndex, request->options->name, HASH_FIND, NULL))
936 : 2 : found_any = true;
937 : : else
938 : 1 : notfound_any = true;
939 : : }
940 [ + + - + ]: 3 : if (found_any && notfound_any)
29 heikki.linnakangas@i 941 [ # # ]:UNC 0 : elog(ERROR, "found some but not all");
942 : :
943 : : /*
944 : : * Allocate or attach all the shmem areas requested by the request_fn
945 : : * callback.
946 : : */
29 heikki.linnakangas@i 947 [ + - + + :GNC 9 : foreach_ptr(ShmemRequest, request, pending_shmem_requests)
+ + ]
948 : : {
949 [ + + ]: 3 : if (found_any)
950 : 2 : AttachShmemIndexEntry(request, false);
951 : : else
952 : 1 : InitShmemIndexEntry(request);
953 : :
954 : 3 : pfree(request->options);
955 : : }
956 : 3 : list_free_deep(pending_shmem_requests);
957 : 3 : pending_shmem_requests = NIL;
958 : :
959 : : /* Finish by calling the appropriate subsystem-specific callback */
960 [ + + ]: 3 : if (found_any)
961 : : {
962 [ + - ]: 2 : if (callbacks->attach_fn)
963 : 2 : callbacks->attach_fn(callbacks->opaque_arg);
964 : : }
965 : : else
966 : : {
967 [ + - ]: 1 : if (callbacks->init_fn)
968 : 1 : callbacks->init_fn(callbacks->opaque_arg);
969 : : }
970 : :
7426 tgl@sss.pgh.pa.us 971 : 3 : LWLockRelease(ShmemIndexLock);
29 heikki.linnakangas@i 972 : 3 : shmem_request_state = SRS_DONE;
973 : : }
974 : :
975 : : /*
976 : : * Call all shmem request callbacks.
977 : : */
978 : : void
979 : 1244 : ShmemCallRequestCallbacks(void)
980 : : {
981 : : ListCell *lc;
982 : :
983 [ - + ]: 1244 : Assert(shmem_request_state == SRS_INITIAL);
984 : 1244 : shmem_request_state = SRS_REQUESTING;
985 : :
986 [ + - + + : 56009 : foreach(lc, registered_shmem_callbacks)
+ + ]
987 : : {
988 : 54765 : const ShmemCallbacks *callbacks = (const ShmemCallbacks *) lfirst(lc);
989 : :
990 [ + - ]: 54765 : if (callbacks->request_fn)
991 : 54765 : callbacks->request_fn(callbacks->opaque_arg);
992 : : }
10892 scrappy@hub.org 993 :GIC 1244 : }
994 : :
995 : : /*
996 : : * ShmemInitStruct -- Create/attach to a structure in shared memory.
997 : : *
998 : : * This is called during initialization to find or allocate
999 : : * a data structure in shared memory. If no other process
1000 : : * has created the structure, this routine allocates space
1001 : : * for it. If it exists already, a pointer to the existing
1002 : : * structure is returned.
1003 : : *
1004 : : * Returns: pointer to the object. *foundPtr is set true if the object was
1005 : : * already in the shmem index (hence, already initialized).
1006 : : *
1007 : : * Note: This is a legacy interface, kept for backwards compatibility with
1008 : : * extensions. Use ShmemRequestStruct() in new code!
1009 : : */
1010 : : void *
29 heikki.linnakangas@i 1011 :LBC (80247) : ShmemInitStruct(const char *name, Size size, bool *foundPtr)
1012 : : {
29 heikki.linnakangas@i 1013 :UNC 0 : void *ptr = NULL;
1014 : 0 : ShmemStructOpts options = {
1015 : : .name = name,
1016 : : .size = size,
1017 : : .ptr = &ptr,
1018 : : };
1019 : 0 : ShmemRequest request = {&options, SHMEM_KIND_STRUCT};
1020 : :
1021 [ # # # # : 0 : Assert(shmem_request_state == SRS_DONE ||
# # ]
1022 : : shmem_request_state == SRS_INITIALIZING ||
1023 : : shmem_request_state == SRS_REQUESTING);
1024 : :
29 heikki.linnakangas@i 1025 :LBC (80247) : LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE);
1026 : :
1027 : : /*
1028 : : * During postmaster startup, look up the existing entry if any.
1029 : : */
29 heikki.linnakangas@i 1030 :UNC 0 : *foundPtr = false;
1031 [ # # ]: 0 : if (IsUnderPostmaster)
1032 : 0 : *foundPtr = AttachShmemIndexEntry(&request, true);
1033 : :
1034 : : /* Initialize it if not found */
1035 [ # # ]: 0 : if (!*foundPtr)
1036 : 0 : InitShmemIndexEntry(&request);
1037 : :
29 heikki.linnakangas@i 1038 :LBC (79177) : LWLockRelease(ShmemIndexLock);
1039 : :
29 heikki.linnakangas@i 1040 [ # # ]:UNC 0 : Assert(ptr != NULL);
1041 : 0 : return ptr;
1042 : : }
1043 : :
1044 : : /*
1045 : : * Add two Size values, checking for overflow
1046 : : */
1047 : : Size
7563 tgl@sss.pgh.pa.us 1048 :CBC 504500 : add_size(Size s1, Size s2)
1049 : : {
1050 : : Size result;
1051 : :
162 jchampion@postgresql 1052 [ - + ]:GNC 504500 : if (pg_add_size_overflow(s1, s2, &result))
7563 tgl@sss.pgh.pa.us 1053 [ # # ]:UBC 0 : ereport(ERROR,
1054 : : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1055 : : errmsg("requested shared memory size overflows size_t")));
7563 tgl@sss.pgh.pa.us 1056 :CBC 504500 : return result;
1057 : : }
1058 : :
1059 : : /*
1060 : : * Multiply two Size values, checking for overflow
1061 : : */
1062 : : Size
1063 : 127272 : mul_size(Size s1, Size s2)
1064 : : {
1065 : : Size result;
1066 : :
162 jchampion@postgresql 1067 [ - + ]:GNC 127272 : if (pg_mul_size_overflow(s1, s2, &result))
7563 tgl@sss.pgh.pa.us 1068 [ # # ]:UBC 0 : ereport(ERROR,
1069 : : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1070 : : errmsg("requested shared memory size overflows size_t")));
7563 tgl@sss.pgh.pa.us 1071 :CBC 127272 : return result;
1072 : : }
1073 : :
1074 : : /* SQL SRF showing allocated shared memory */
1075 : : Datum
2308 rhaas@postgresql.org 1076 : 4 : pg_get_shmem_allocations(PG_FUNCTION_ARGS)
1077 : : {
1078 : : #define PG_GET_SHMEM_SIZES_COLS 4
1079 : 4 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1080 : : HASH_SEQ_STATUS hstat;
1081 : : ShmemIndexEnt *ent;
2182 tgl@sss.pgh.pa.us 1082 : 4 : Size named_allocated = 0;
1083 : : Datum values[PG_GET_SHMEM_SIZES_COLS];
1084 : : bool nulls[PG_GET_SHMEM_SIZES_COLS];
1085 : :
1295 michael@paquier.xyz 1086 : 4 : InitMaterializedSRF(fcinfo, 0);
1087 : :
2308 rhaas@postgresql.org 1088 : 4 : LWLockAcquire(ShmemIndexLock, LW_SHARED);
1089 : :
1090 : 4 : hash_seq_init(&hstat, ShmemIndex);
1091 : :
1092 : : /* output all allocated entries */
1093 : 4 : memset(nulls, 0, sizeof(nulls));
1094 [ + + ]: 326 : while ((ent = (ShmemIndexEnt *) hash_seq_search(&hstat)) != NULL)
1095 : : {
1096 : 322 : values[0] = CStringGetTextDatum(ent->key);
1097 : 322 : values[1] = Int64GetDatum((char *) ent->location - (char *) ShmemSegHdr);
1098 : 322 : values[2] = Int64GetDatum(ent->size);
1099 : 322 : values[3] = Int64GetDatum(ent->allocated_size);
1100 : 322 : named_allocated += ent->allocated_size;
1101 : :
1520 michael@paquier.xyz 1102 : 322 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
1103 : : values, nulls);
1104 : : }
1105 : :
1106 : : /* output shared memory allocated but not counted via the shmem index */
2308 rhaas@postgresql.org 1107 : 4 : values[0] = CStringGetTextDatum("<anonymous>");
1108 : 4 : nulls[1] = true;
95 heikki.linnakangas@i 1109 :GNC 4 : values[2] = Int64GetDatum(ShmemAllocator->free_offset - named_allocated);
2308 rhaas@postgresql.org 1110 :CBC 4 : values[3] = values[2];
1520 michael@paquier.xyz 1111 : 4 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
1112 : :
1113 : : /* output as-of-yet unused shared memory */
2308 rhaas@postgresql.org 1114 : 4 : nulls[0] = true;
95 heikki.linnakangas@i 1115 :GNC 4 : values[1] = Int64GetDatum(ShmemAllocator->free_offset);
2308 rhaas@postgresql.org 1116 :CBC 4 : nulls[1] = false;
95 heikki.linnakangas@i 1117 :GNC 4 : values[2] = Int64GetDatum(ShmemSegHdr->totalsize - ShmemAllocator->free_offset);
2308 rhaas@postgresql.org 1118 :CBC 4 : values[3] = values[2];
1520 michael@paquier.xyz 1119 : 4 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
1120 : :
2308 rhaas@postgresql.org 1121 : 4 : LWLockRelease(ShmemIndexLock);
1122 : :
1123 : 4 : return (Datum) 0;
1124 : : }
1125 : :
1126 : : /*
1127 : : * SQL SRF showing NUMA memory nodes for allocated shared memory
1128 : : *
1129 : : * Compared to pg_get_shmem_allocations(), this function does not return
1130 : : * information about shared anonymous allocations and unused shared memory.
1131 : : */
1132 : : Datum
393 tomas.vondra@postgre 1133 : 4 : pg_get_shmem_allocations_numa(PG_FUNCTION_ARGS)
1134 : : {
1135 : : #define PG_GET_SHMEM_NUMA_SIZES_COLS 3
1136 : 4 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1137 : : HASH_SEQ_STATUS hstat;
1138 : : ShmemIndexEnt *ent;
1139 : : Datum values[PG_GET_SHMEM_NUMA_SIZES_COLS];
1140 : : bool nulls[PG_GET_SHMEM_NUMA_SIZES_COLS];
1141 : : Size os_page_size;
1142 : : void **page_ptrs;
1143 : : int *pages_status;
1144 : : uint64 shm_total_page_count,
1145 : : shm_ent_page_count,
1146 : : max_nodes;
1147 : : Size *nodes;
1148 : :
1149 [ - + ]: 4 : if (pg_numa_init() == -1)
393 tomas.vondra@postgre 1150 [ # # ]:UBC 0 : elog(ERROR, "libnuma initialization failed or NUMA is not supported on this platform");
1151 : :
393 tomas.vondra@postgre 1152 :CBC 4 : InitMaterializedSRF(fcinfo, 0);
1153 : :
1154 : 4 : max_nodes = pg_numa_get_max_node();
99 tomas.vondra@postgre 1155 :GNC 4 : nodes = palloc_array(Size, max_nodes + 2);
1156 : :
1157 : : /*
1158 : : * Shared memory allocations can vary in size and may not align with OS
1159 : : * memory page boundaries, while NUMA queries work on pages.
1160 : : *
1161 : : * To correctly map each allocation to NUMA nodes, we need to: 1.
1162 : : * Determine the OS memory page size. 2. Align each allocation's start/end
1163 : : * addresses to page boundaries. 3. Query NUMA node information for all
1164 : : * pages spanning the allocation.
1165 : : */
391 tomas.vondra@postgre 1166 :CBC 4 : os_page_size = pg_get_shmem_pagesize();
1167 : :
1168 : : /*
1169 : : * Allocate memory for page pointers and status based on total shared
1170 : : * memory size. This simplified approach allocates enough space for all
1171 : : * pages in shared memory rather than calculating the exact requirements
1172 : : * for each segment.
1173 : : *
1174 : : * Add 1, because we don't know how exactly the segments align to OS
1175 : : * pages, so the allocation might use one more memory page. In practice
1176 : : * this is not very likely, and moreover we have more entries, each of
1177 : : * them using only fraction of the total pages.
1178 : : */
393 1179 : 4 : shm_total_page_count = (ShmemSegHdr->totalsize / os_page_size) + 1;
146 michael@paquier.xyz 1180 :GNC 4 : page_ptrs = palloc0_array(void *, shm_total_page_count);
1181 : 4 : pages_status = palloc_array(int, shm_total_page_count);
1182 : :
393 tomas.vondra@postgre 1183 [ + - ]:CBC 4 : if (firstNumaTouch)
1184 [ - + ]: 4 : elog(DEBUG1, "NUMA: page-faulting shared memory segments for proper NUMA readouts");
1185 : :
1186 : 4 : LWLockAcquire(ShmemIndexLock, LW_SHARED);
1187 : :
1188 : 4 : hash_seq_init(&hstat, ShmemIndex);
1189 : :
1190 : : /* output all allocated entries */
1191 [ + + ]: 326 : while ((ent = (ShmemIndexEnt *) hash_seq_search(&hstat)) != NULL)
1192 : : {
1193 : : int i;
1194 : : char *startptr,
1195 : : *endptr;
1196 : : Size total_len;
1197 : :
1198 : : /*
1199 : : * Calculate the range of OS pages used by this segment. The segment
1200 : : * may start / end half-way through a page, we want to count these
1201 : : * pages too. So we align the start/end pointers down/up, and then
1202 : : * calculate the number of pages from that.
1203 : : */
1204 : 322 : startptr = (char *) TYPEALIGN_DOWN(os_page_size, ent->location);
1205 : 322 : endptr = (char *) TYPEALIGN(os_page_size,
1206 : : (char *) ent->location + ent->allocated_size);
1207 : 322 : total_len = (endptr - startptr);
1208 : :
1209 : 322 : shm_ent_page_count = total_len / os_page_size;
1210 : :
1211 : : /*
1212 : : * If we ever get 0xff (-1) back from kernel inquiry, then we probably
1213 : : * have a bug in mapping buffers to OS pages.
1214 : : */
1215 : 322 : memset(pages_status, 0xff, sizeof(int) * shm_ent_page_count);
1216 : :
1217 : : /*
1218 : : * Setup page_ptrs[] with pointers to all OS pages for this segment,
1219 : : * and get the NUMA status using pg_numa_query_pages.
1220 : : *
1221 : : * In order to get reliable results we also need to touch memory
1222 : : * pages, so that inquiry about NUMA memory node doesn't return -2
1223 : : * (ENOENT, which indicates unmapped/unallocated pages).
1224 : : */
1225 [ + + ]: 123853 : for (i = 0; i < shm_ent_page_count; i++)
1226 : : {
1227 : 123531 : page_ptrs[i] = startptr + (i * os_page_size);
1228 : :
1229 [ + - ]: 123531 : if (firstNumaTouch)
308 1230 : 123531 : pg_numa_touch_mem_if_required(page_ptrs[i]);
1231 : :
393 1232 [ - + ]: 123531 : CHECK_FOR_INTERRUPTS();
1233 : : }
1234 : :
1235 [ - + ]: 322 : if (pg_numa_query_pages(0, shm_ent_page_count, page_ptrs, pages_status) == -1)
393 tomas.vondra@postgre 1236 [ # # ]:UBC 0 : elog(ERROR, "failed NUMA pages inquiry status: %m");
1237 : :
1238 : : /* Count number of NUMA nodes used for this shared memory entry */
99 tomas.vondra@postgre 1239 :CBC 322 : memset(nodes, 0, sizeof(Size) * (max_nodes + 2));
1240 : :
393 1241 [ + + ]: 123853 : for (i = 0; i < shm_ent_page_count; i++)
1242 : : {
1243 : 123531 : int s = pages_status[i];
1244 : :
1245 : : /* Ensure we are adding only valid index to the array */
99 1246 [ + - + - ]: 123531 : if (s >= 0 && s <= max_nodes)
1247 : : {
1248 : : /* valid NUMA node */
1249 : 123531 : nodes[s]++;
1250 : 123531 : continue;
1251 : : }
99 tomas.vondra@postgre 1252 [ # # ]:UBC 0 : else if (s == -2)
1253 : : {
1254 : : /* -2 means ENOENT (e.g. page was moved to swap) */
1255 : 0 : nodes[max_nodes + 1]++;
1256 : 0 : continue;
1257 : : }
1258 : :
1259 [ # # ]: 0 : elog(ERROR, "invalid NUMA node id outside of allowed range "
1260 : : "[0, " UINT64_FORMAT "]: %d", max_nodes, s);
1261 : : }
1262 : :
1263 : : /* no NULLs for regular nodes */
99 tomas.vondra@postgre 1264 :CBC 322 : memset(nulls, 0, sizeof(nulls));
1265 : :
1266 : : /*
1267 : : * Add one entry for each NUMA node, including those without allocated
1268 : : * memory for this segment.
1269 : : */
393 1270 [ + + ]: 644 : for (i = 0; i <= max_nodes; i++)
1271 : : {
1272 : 322 : values[0] = CStringGetTextDatum(ent->key);
270 peter@eisentraut.org 1273 :GNC 322 : values[1] = Int32GetDatum(i);
393 tomas.vondra@postgre 1274 :CBC 322 : values[2] = Int64GetDatum(nodes[i] * os_page_size);
1275 : :
1276 : 322 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
1277 : : values, nulls);
1278 : : }
1279 : :
1280 : : /* The last entry is used for pages without a NUMA node. */
99 1281 : 322 : nulls[1] = true;
1282 : 322 : values[0] = CStringGetTextDatum(ent->key);
1283 : 322 : values[2] = Int64GetDatum(nodes[max_nodes + 1] * os_page_size);
1284 : :
1285 : 322 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
1286 : : values, nulls);
1287 : : }
1288 : :
393 1289 : 4 : LWLockRelease(ShmemIndexLock);
1290 : 4 : firstNumaTouch = false;
1291 : :
1292 : 4 : return (Datum) 0;
1293 : : }
1294 : :
1295 : : /*
1296 : : * Determine the memory page size used for the shared memory segment.
1297 : : *
1298 : : * If the shared segment was allocated using huge pages, returns the size of
1299 : : * a huge page. Otherwise returns the size of regular memory page.
1300 : : *
1301 : : * This should be used only after the server is started.
1302 : : */
1303 : : Size
391 1304 : 8 : pg_get_shmem_pagesize(void)
1305 : : {
1306 : : Size os_page_size;
1307 : : #ifdef WIN32
1308 : : SYSTEM_INFO sysinfo;
1309 : :
1310 : : GetSystemInfo(&sysinfo);
1311 : : os_page_size = sysinfo.dwPageSize;
1312 : : #else
1313 : 8 : os_page_size = sysconf(_SC_PAGESIZE);
1314 : : #endif
1315 : :
1316 [ - + ]: 8 : Assert(IsUnderPostmaster);
1317 [ - + ]: 8 : Assert(huge_pages_status != HUGE_PAGES_UNKNOWN);
1318 : :
1319 [ - + ]: 8 : if (huge_pages_status == HUGE_PAGES_ON)
391 tomas.vondra@postgre 1320 :UBC 0 : GetHugePageSize(&os_page_size, NULL);
1321 : :
391 tomas.vondra@postgre 1322 :CBC 8 : return os_page_size;
1323 : : }
1324 : :
1325 : : Datum
1326 : 5 : pg_numa_available(PG_FUNCTION_ARGS)
1327 : : {
1328 : 5 : PG_RETURN_BOOL(pg_numa_init() != -1);
1329 : : }
|