Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * shmem.c
4 : : * create shared memory and initialize shared memory data structures.
5 : : *
6 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/storage/ipc/shmem.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : /*
16 : : * POSTGRES processes share one or more regions of shared memory.
17 : : * The shared memory is created by a postmaster and is inherited
18 : : * by each backend via fork() (or, in some ports, via other OS-specific
19 : : * methods). The routines in this file are used for allocating and
20 : : * binding to shared memory data structures.
21 : : *
22 : : * This module provides facilities to allocate fixed-size structures in shared
23 : : * memory, for things like variables shared between all backend processes.
24 : : * Each such structure has a string name to identify it, specified when it is
25 : : * requested. shmem_hash.c provides a shared hash table implementation on top
26 : : * of that.
27 : : *
28 : : * Shared memory areas should usually not be allocated after postmaster
29 : : * startup, although we do allow small allocations later for the benefit of
30 : : * extension modules that are loaded after startup. Despite that allowance,
31 : : * extensions that need shared memory should be added in
32 : : * shared_preload_libraries, because the allowance is quite small and there is
33 : : * no guarantee that any memory is available after startup.
34 : : *
35 : : * Nowadays, there is also another way to allocate shared memory called
36 : : * Dynamic Shared Memory. See dsm.c for that facility. One big difference
37 : : * between traditional shared memory handled by shmem.c and dynamic shared
38 : : * memory is that traditional shared memory areas are mapped to the same
39 : : * address in all processes, so you can use normal pointers in shared memory
40 : : * structs. With Dynamic Shared Memory, you must use offsets or DSA pointers
41 : : * instead.
42 : : *
43 : : * Shared memory managed by shmem.c can never be freed, once allocated. Each
44 : : * hash table has its own free list, so hash buckets can be reused when an
45 : : * item is deleted.
46 : : *
47 : : * Usage
48 : : * -----
49 : : *
50 : : * To allocate shared memory, you need to register a set of callback functions
51 : : * which handle the lifecycle of the allocation. In the request_fn callback,
52 : : * call ShmemRequestStruct() with the desired name and size. When the area is
53 : : * later allocated or attached to, the global variable pointed to by the .ptr
54 : : * option is set to the shared memory location of the allocation. The init_fn
55 : : * callback can perform additional initialization.
56 : : *
57 : : * typedef struct MyShmemData {
58 : : * ...
59 : : * } MyShmemData;
60 : : *
61 : : * static MyShmemData *MyShmem;
62 : : *
63 : : * static void my_shmem_request(void *arg);
64 : : * static void my_shmem_init(void *arg);
65 : : *
66 : : * const ShmemCallbacks MyShmemCallbacks = {
67 : : * .request_fn = my_shmem_request,
68 : : * .init_fn = my_shmem_init,
69 : : * };
70 : : *
71 : : * static void
72 : : * my_shmem_request(void *arg)
73 : : * {
74 : : * ShmemRequestStruct(.name = "My shmem area",
75 : : * .size = sizeof(MyShmemData),
76 : : * .ptr = (void **) &MyShmem,
77 : : * );
78 : : * }
79 : : *
80 : : * In builtin PostgreSQL code, add the callbacks to the list in
81 : : * src/include/storage/subsystemlist.h. In an add-in module, you can register
82 : : * the callbacks by calling RegisterShmemCallbacks(&MyShmemCallbacks) in the
83 : : * extension's _PG_init() function.
84 : : *
85 : : * Lifecycle
86 : : * ---------
87 : : *
88 : : * Initializing shared memory happens in multiple phases. In the first phase,
89 : : * during postmaster startup, all the request_fn callbacks are called. Only
90 : : * after all the request_fn callbacks have been called and all the shmem areas
91 : : * have been requested by the ShmemRequestStruct() calls we know how much
92 : : * shared memory we need in total. After that, postmaster allocates global
93 : : * shared memory segment, and calls all the init_fn callbacks to initialize
94 : : * all the requested shmem areas.
95 : : *
96 : : * In standard Unix-ish environments, individual backends do not need to
97 : : * re-establish their local pointers into shared memory, because they inherit
98 : : * correct values of those variables via fork() from the postmaster. However,
99 : : * this does not work in the EXEC_BACKEND case. In ports using EXEC_BACKEND,
100 : : * backend startup also calls the shmem_request callbacks to re-establish the
101 : : * knowledge about each shared memory area, sets the pointer variables
102 : : * (*options->ptr), and calls the attach_fn callback, if any, for additional
103 : : * per-backend setup.
104 : : *
105 : : * Legacy ShmemInitStruct()/ShmemInitHash() functions
106 : : * --------------------------------------------------
107 : : *
108 : : * ShmemInitStruct()/ShmemInitHash() is another way of registering shmem
109 : : * areas. It pre-dates the ShmemRequestStruct()/ShmemRequestHash() functions,
110 : : * and should not be used in new code, but as of this writing it is still
111 : : * widely used in extensions.
112 : : *
113 : : * To allocate a shmem area with ShmemInitStruct(), you need to separately
114 : : * register the size needed for the area by calling RequestAddinShmemSpace()
115 : : * from the extension's shmem_request_hook, and allocate the area by calling
116 : : * ShmemInitStruct() from the extension's shmem_startup_hook. There are no
117 : : * init/attach callbacks. Instead, the caller of ShmemInitStruct() must check
118 : : * the return status of ShmemInitStruct() and initialize the struct if it was
119 : : * not previously initialized.
120 : : *
121 : : * Calling ShmemAlloc() directly
122 : : * -----------------------------
123 : : *
124 : : * There's a more low-level way of allocating shared memory too: you can call
125 : : * ShmemAlloc() directly. It's used to implement the higher level mechanisms,
126 : : * and should generally not be called directly.
127 : : */
128 : :
129 : : #include "postgres.h"
130 : :
131 : : #include <unistd.h>
132 : :
133 : : #include "access/slru.h"
134 : : #include "fmgr.h"
135 : : #include "funcapi.h"
136 : : #include "miscadmin.h"
137 : : #include "port/pg_bitutils.h"
138 : : #include "port/pg_numa.h"
139 : : #include "storage/lwlock.h"
140 : : #include "storage/pg_shmem.h"
141 : : #include "storage/shmem.h"
142 : : #include "storage/shmem_internal.h"
143 : : #include "storage/spin.h"
144 : : #include "utils/builtins.h"
145 : : #include "utils/tuplestore.h"
146 : :
147 : : /*
148 : : * Registered callbacks.
149 : : *
150 : : * During postmaster startup, we accumulate the callbacks from all subsystems
151 : : * in this list.
152 : : *
153 : : * This is in process private memory, although on Unix-like systems, we expect
154 : : * all the registrations to happen at postmaster startup time and be inherited
155 : : * by all the child processes via fork().
156 : : */
157 : : static List *registered_shmem_callbacks;
158 : :
159 : : /*
160 : : * In the shmem request phase, all the shmem areas requested with the
161 : : * ShmemRequest*() functions are accumulated here.
162 : : */
163 : : typedef struct
164 : : {
165 : : ShmemStructOpts *options;
166 : : ShmemRequestKind kind;
167 : : } ShmemRequest;
168 : :
169 : : static List *pending_shmem_requests;
170 : :
171 : : /*
172 : : * Per-process state machine, for sanity checking that we do things in the
173 : : * right order.
174 : : *
175 : : * Postmaster:
176 : : * INITIAL -> REQUESTING -> INITIALIZING -> DONE
177 : : *
178 : : * Backends in EXEC_BACKEND mode:
179 : : * INITIAL -> REQUESTING -> ATTACHING -> DONE
180 : : *
181 : : * Late request:
182 : : * DONE -> REQUESTING -> AFTER_STARTUP_ATTACH_OR_INIT -> DONE
183 : : */
184 : : enum shmem_request_state
185 : : {
186 : : /* Initial state */
187 : : SRS_INITIAL,
188 : :
189 : : /*
190 : : * When we start calling the shmem_request callbacks, we enter the
191 : : * SRS_REQUESTING phase. All ShmemRequestStruct calls happen in this
192 : : * state.
193 : : */
194 : : SRS_REQUESTING,
195 : :
196 : : /*
197 : : * Postmaster has finished all shmem requests, and is now initializing the
198 : : * shared memory segment. init_fn callbacks are called in this state.
199 : : */
200 : : SRS_INITIALIZING,
201 : :
202 : : /*
203 : : * A postmaster child process is starting up. attach_fn callbacks are
204 : : * called in this state.
205 : : */
206 : : SRS_ATTACHING,
207 : :
208 : : /* An after-startup allocation or attachment is in progress */
209 : : SRS_AFTER_STARTUP_ATTACH_OR_INIT,
210 : :
211 : : /* Normal state after shmem initialization / attachment */
212 : : SRS_DONE,
213 : : };
214 : : static enum shmem_request_state shmem_request_state = SRS_INITIAL;
215 : :
216 : : /*
217 : : * This is the first data structure stored in the shared memory segment, at
218 : : * the offset that PGShmemHeader->content_offset points to. Allocations by
219 : : * ShmemAlloc() are carved out of the space after this.
220 : : *
221 : : * For the base pointer and the total size of the shmem segment, we rely on
222 : : * the PGShmemHeader.
223 : : */
224 : : typedef struct ShmemAllocatorData
225 : : {
226 : : Size free_offset; /* offset to first free space from ShmemBase */
227 : :
228 : : /* protects 'free_offset' */
229 : : slock_t shmem_lock;
230 : :
231 : : HASHHDR *index; /* location of ShmemIndex */
232 : : size_t index_size; /* size of shmem region holding ShmemIndex */
233 : : LWLock index_lock; /* protects ShmemIndex */
234 : : } ShmemAllocatorData;
235 : :
236 : : #define ShmemIndexLock (&ShmemAllocator->index_lock)
237 : :
238 : : static void *ShmemAllocRaw(Size size, Size alignment, Size *allocated_size);
239 : :
240 : : /* shared memory global variables */
241 : :
242 : : static PGShmemHeader *ShmemSegHdr; /* shared mem segment header */
243 : : static void *ShmemBase; /* start address of shared memory */
244 : : static void *ShmemEnd; /* end+1 address of shared memory */
245 : :
246 : : static ShmemAllocatorData *ShmemAllocator;
247 : :
248 : : /*
249 : : * ShmemIndex is a global directory of shmem areas, itself also stored in the
250 : : * shared memory.
251 : : */
252 : : static HTAB *ShmemIndex;
253 : :
254 : : /* max size of data structure string name */
255 : : #define SHMEM_INDEX_KEYSIZE (48)
256 : :
257 : : /*
258 : : * # of additional entries to reserve in the shmem index table, for
259 : : * allocations after postmaster startup. (This is not a hard limit, the hash
260 : : * table can grow larger than that if there is shared memory available)
261 : : */
262 : : #define SHMEM_INDEX_ADDITIONAL_SIZE (128)
263 : :
264 : : /* this is a hash bucket in the shmem index table */
265 : : typedef struct
266 : : {
267 : : char key[SHMEM_INDEX_KEYSIZE]; /* string name */
268 : : void *location; /* location in shared mem */
269 : : Size size; /* # bytes requested for the structure */
270 : : Size allocated_size; /* # bytes actually allocated */
271 : : } ShmemIndexEnt;
272 : :
273 : : /* To get reliable results for NUMA inquiry we need to "touch pages" once */
274 : : static bool firstNumaTouch = true;
275 : :
276 : : static void CallShmemCallbacksAfterStartup(const ShmemCallbacks *callbacks);
277 : : static void InitShmemIndexEntry(ShmemRequest *request);
278 : : static bool AttachShmemIndexEntry(ShmemRequest *request, bool missing_ok);
279 : :
280 : : Datum pg_numa_available(PG_FUNCTION_ARGS);
281 : :
282 : : /*
283 : : * ShmemRequestStruct() --- request a named shared memory area
284 : : *
285 : : * Subsystems call this to register their shared memory needs. This is
286 : : * usually done early in postmaster startup, before the shared memory segment
287 : : * has been created, so that the size can be included in the estimate for
288 : : * total amount of shared memory needed. We set aside a small amount of
289 : : * memory for allocations that happen later, for the benefit of non-preloaded
290 : : * extensions, but that should not be relied upon.
291 : : *
292 : : * This does not yet allocate the memory, but merely registers the need for
293 : : * it. The actual allocation happens later in the postmaster startup
294 : : * sequence.
295 : : *
296 : : * This must be called from a shmem_request callback function, registered with
297 : : * RegisterShmemCallbacks(). This enforces a coding pattern that works the
298 : : * same in normal Unix systems and with EXEC_BACKEND. On Unix systems, the
299 : : * shmem_request callbacks are called once, early in postmaster startup, and
300 : : * the child processes inherit the struct descriptors and any other
301 : : * per-process state from the postmaster. In EXEC_BACKEND mode, shmem_request
302 : : * callbacks are *also* called in each backend, at backend startup, to
303 : : * re-establish the struct descriptors. By calling the same function in both
304 : : * cases, we ensure that all the shmem areas are registered the same way in
305 : : * all processes.
306 : : *
307 : : * 'options' defines the name and size of the area, and any other optional
308 : : * features. Leave unused options as zeros. The options are copied to
309 : : * longer-lived memory, so it doesn't need to live after the
310 : : * ShmemRequestStruct() call and can point to a local variable in the calling
311 : : * function. The 'name' must point to a long-lived string though, only the
312 : : * pointer to it is copied.
313 : : */
314 : : void
54 heikki.linnakangas@i 315 :GNC 80058 : ShmemRequestStructWithOpts(const ShmemStructOpts *options)
316 : : {
317 : : ShmemStructOpts *options_copy;
318 : :
319 : 80058 : options_copy = MemoryContextAlloc(TopMemoryContext,
320 : : sizeof(ShmemStructOpts));
321 : 80058 : memcpy(options_copy, options, sizeof(ShmemStructOpts));
322 : :
323 : 80058 : ShmemRequestInternal(options_copy, SHMEM_KIND_STRUCT);
54 heikki.linnakangas@i 324 :CBC 80058 : }
325 : :
326 : : /*
327 : : * Internal workhorse of ShmemRequestStruct() and ShmemRequestHash().
328 : : *
329 : : * Note: Unlike in the public ShmemRequestStruct() and ShmemRequestHash()
330 : : * functions, 'options' is *not* copied. It must be allocated in
331 : : * TopMemoryContext by the caller, and will be freed after the init/attach
332 : : * callbacks have been called. This allows ShmemRequestHash() to pass a
333 : : * pointer to the extended ShmemHashOpts struct instead.
334 : : */
335 : : void
54 heikki.linnakangas@i 336 :GNC 98838 : ShmemRequestInternal(ShmemStructOpts *options, ShmemRequestKind kind)
337 : : {
338 : : ShmemRequest *request;
339 : :
340 : : /* Check the options */
341 [ - + ]: 98838 : if (options->name == NULL)
54 heikki.linnakangas@i 342 [ # # ]:UNC 0 : elog(ERROR, "shared memory request is missing 'name' option");
343 : :
54 heikki.linnakangas@i 344 [ + + ]:GNC 98838 : if (IsUnderPostmaster)
345 : : {
346 [ - + - - ]: 3 : if (options->size <= 0 && options->size != SHMEM_ATTACH_UNKNOWN_SIZE)
54 heikki.linnakangas@i 347 [ # # ]:UNC 0 : elog(ERROR, "invalid size %zd for shared memory request for \"%s\"",
348 : : options->size, options->name);
349 : : }
350 : : else
351 : : {
54 heikki.linnakangas@i 352 [ - + ]:GNC 98835 : if (options->size == SHMEM_ATTACH_UNKNOWN_SIZE)
54 heikki.linnakangas@i 353 [ # # ]:UNC 0 : elog(ERROR, "SHMEM_ATTACH_UNKNOWN_SIZE cannot be used during startup");
54 heikki.linnakangas@i 354 [ - + ]:GNC 98835 : if (options->size <= 0)
54 heikki.linnakangas@i 355 [ # # ]:UNC 0 : elog(ERROR, "invalid size %zd for shared memory request for \"%s\"",
356 : : options->size, options->name);
357 : : }
358 : :
54 heikki.linnakangas@i 359 [ + + - + ]:GNC 98838 : if (options->alignment != 0 && pg_nextpower2_size_t(options->alignment) != options->alignment)
54 heikki.linnakangas@i 360 [ # # ]:UNC 0 : elog(ERROR, "invalid alignment %zu for shared memory request for \"%s\"",
361 : : options->alignment, options->name);
362 : :
363 : : /* Check that we're in the right state */
54 heikki.linnakangas@i 364 [ - + ]:GNC 98838 : if (shmem_request_state != SRS_REQUESTING)
54 heikki.linnakangas@i 365 [ # # ]:UNC 0 : elog(ERROR, "ShmemRequestStruct can only be called from a shmem_request callback");
366 : :
367 : : /* Check that it's not already registered in this process */
54 heikki.linnakangas@i 368 [ + + + + :GNC 4052532 : foreach_ptr(ShmemRequest, existing, pending_shmem_requests)
+ + ]
369 : : {
370 [ - + ]: 3854856 : if (strcmp(existing->options->name, options->name) == 0)
54 heikki.linnakangas@i 371 [ # # ]:UNC 0 : ereport(ERROR,
372 : : (errmsg("shared memory struct \"%s\" is already registered",
373 : : options->name)));
374 : : }
375 : :
376 : : /* Request looks valid, remember it */
54 heikki.linnakangas@i 377 :GNC 98838 : request = palloc(sizeof(ShmemRequest));
378 : 98838 : request->options = options;
379 : 98838 : request->kind = kind;
380 : 98838 : pending_shmem_requests = lappend(pending_shmem_requests, request);
381 : 98838 : }
382 : :
383 : : /*
384 : : * ShmemGetRequestedSize() --- estimate the total size of all registered shared
385 : : * memory structures.
386 : : *
387 : : * This is called at postmaster startup, before the shared memory segment has
388 : : * been created.
389 : : */
390 : : size_t
391 : 2325 : ShmemGetRequestedSize(void)
392 : : {
393 : : size_t size;
394 : :
395 : : /* memory needed for the ShmemIndex */
396 : 2325 : size = hash_estimate_size(list_length(pending_shmem_requests) + SHMEM_INDEX_ADDITIONAL_SIZE,
397 : : sizeof(ShmemIndexEnt));
398 : 2325 : size = CACHELINEALIGN(size);
399 : :
400 : : /* memory needed for all the requested areas */
401 [ + - + + : 188342 : foreach_ptr(ShmemRequest, request, pending_shmem_requests)
+ + ]
402 : : {
403 : 183692 : size_t alignment = request->options->alignment;
404 : :
405 : : /* pad the start address for alignment like ShmemAllocRaw() does */
406 [ + + ]: 183692 : if (alignment < PG_CACHE_LINE_SIZE)
407 : 176717 : alignment = PG_CACHE_LINE_SIZE;
408 : 183692 : size = TYPEALIGN(alignment, size);
409 : :
410 : 183692 : size = add_size(size, request->options->size);
411 : : }
412 : :
413 : 2325 : return size;
414 : : }
415 : :
416 : : /*
417 : : * ShmemInitRequested() --- allocate and initialize requested shared memory
418 : : * structures.
419 : : *
420 : : * This is called once at postmaster startup, after the shared memory segment
421 : : * has been created.
422 : : */
423 : : void
424 : 1248 : ShmemInitRequested(void)
425 : : {
426 : : /* should be called only by the postmaster or a standalone backend */
427 [ - + ]: 1248 : Assert(!IsUnderPostmaster);
428 [ - + ]: 1248 : Assert(shmem_request_state == SRS_INITIALIZING);
429 : :
430 : : /*
431 : : * Initialize the ShmemIndex entries and perform basic initialization of
432 : : * all the requested memory areas. There are no concurrent processes yet,
433 : : * so no need for locking.
434 : : */
435 [ + - + + : 101094 : foreach_ptr(ShmemRequest, request, pending_shmem_requests)
+ + ]
436 : : {
437 : 98598 : InitShmemIndexEntry(request);
438 : 98598 : pfree(request->options);
439 : : }
440 : 1248 : list_free_deep(pending_shmem_requests);
441 : 1248 : pending_shmem_requests = NIL;
442 : :
443 : : /*
444 : : * Call the subsystem-specific init callbacks to finish initialization of
445 : : * all the areas.
446 : : */
447 [ + - + + : 57437 : foreach_ptr(const ShmemCallbacks, callbacks, registered_shmem_callbacks)
+ + ]
448 : : {
449 [ + + ]: 54941 : if (callbacks->init_fn)
450 : 49945 : callbacks->init_fn(callbacks->opaque_arg);
451 : : }
452 : :
453 : 1248 : shmem_request_state = SRS_DONE;
454 : 1248 : }
455 : :
456 : : /*
457 : : * Re-establish process private state related to shmem areas.
458 : : *
459 : : * This is called at backend startup in EXEC_BACKEND mode, in every backend.
460 : : */
461 : : #ifdef EXEC_BACKEND
462 : : void
463 : : ShmemAttachRequested(void)
464 : : {
465 : : ListCell *lc;
466 : :
467 : : /* Must be initializing a (non-standalone) backend */
468 : : Assert(IsUnderPostmaster);
469 : : Assert(ShmemAllocator->index != NULL);
470 : : Assert(shmem_request_state == SRS_REQUESTING);
471 : : shmem_request_state = SRS_ATTACHING;
472 : :
473 : : LWLockAcquire(ShmemIndexLock, LW_SHARED);
474 : :
475 : : /*
476 : : * Attach to all the requested memory areas.
477 : : */
478 : : foreach_ptr(ShmemRequest, request, pending_shmem_requests)
479 : : {
480 : : AttachShmemIndexEntry(request, false);
481 : : pfree(request->options);
482 : : }
483 : : list_free_deep(pending_shmem_requests);
484 : : pending_shmem_requests = NIL;
485 : :
486 : : /* Call attach callbacks */
487 : : foreach(lc, registered_shmem_callbacks)
488 : : {
489 : : const ShmemCallbacks *callbacks = (const ShmemCallbacks *) lfirst(lc);
490 : :
491 : : if (callbacks->attach_fn)
492 : : callbacks->attach_fn(callbacks->opaque_arg);
493 : : }
494 : :
495 : : LWLockRelease(ShmemIndexLock);
496 : :
497 : : shmem_request_state = SRS_DONE;
498 : : }
499 : : #endif
500 : :
501 : : /*
502 : : * Insert requested shmem area into the shared memory index and initialize it.
503 : : *
504 : : * Note that this only does performs basic initialization depending on
505 : : * ShmemRequestKind, like setting the global pointer variable to the area for
506 : : * SHMEM_KIND_STRUCT or setting up the backend-private HTAB control struct.
507 : : * This does *not* call the subsystem-specific init callbacks. That's done
508 : : * later after all the shmem areas have been initialized or attached to.
509 : : */
510 : : static void
511 : 98599 : InitShmemIndexEntry(ShmemRequest *request)
512 : : {
513 : 98599 : const char *name = request->options->name;
514 : : ShmemIndexEnt *index_entry;
515 : : bool found;
516 : : size_t allocated_size;
517 : : void *structPtr;
518 : :
519 : : /* look it up in the shmem index */
520 : : index_entry = (ShmemIndexEnt *)
521 : 98599 : hash_search(ShmemIndex, name, HASH_ENTER_NULL, &found);
522 [ - + ]: 98599 : if (found)
54 heikki.linnakangas@i 523 [ # # ]:UNC 0 : elog(ERROR, "shared memory struct \"%s\" is already initialized", name);
54 heikki.linnakangas@i 524 [ - + ]:GNC 98599 : if (!index_entry)
525 : : {
526 : : /* tried to add it to the hash table, but there was no space */
54 heikki.linnakangas@i 527 [ # # ]:UNC 0 : ereport(ERROR,
528 : : (errcode(ERRCODE_OUT_OF_MEMORY),
529 : : errmsg("could not create ShmemIndex entry for data structure \"%s\"",
530 : : name)));
531 : : }
532 : :
533 : : /*
534 : : * We inserted the entry to the shared memory index. Allocate requested
535 : : * amount of shared memory for it, and initialize the index entry.
536 : : */
54 heikki.linnakangas@i 537 :GNC 98599 : structPtr = ShmemAllocRaw(request->options->size,
538 : 98599 : request->options->alignment,
539 : : &allocated_size);
540 [ - + ]: 98599 : if (structPtr == NULL)
541 : : {
542 : : /* out of memory; remove the failed ShmemIndex entry */
54 heikki.linnakangas@i 543 :UNC 0 : hash_search(ShmemIndex, name, HASH_REMOVE, NULL);
544 [ # # ]: 0 : ereport(ERROR,
545 : : (errcode(ERRCODE_OUT_OF_MEMORY),
546 : : errmsg("not enough shared memory for data structure"
547 : : " \"%s\" (%zd bytes requested)",
548 : : name, request->options->size)));
549 : : }
54 heikki.linnakangas@i 550 :GNC 98599 : index_entry->size = request->options->size;
551 : 98599 : index_entry->allocated_size = allocated_size;
552 : 98599 : index_entry->location = structPtr;
553 : :
554 : : /* Initialize depending on the kind of shmem area it is */
555 [ + + + - ]: 98599 : switch (request->kind)
556 : : {
557 : 79864 : case SHMEM_KIND_STRUCT:
558 [ + - ]: 79864 : if (request->options->ptr)
559 : 79864 : *(request->options->ptr) = index_entry->location;
560 : 79864 : break;
561 : 9995 : case SHMEM_KIND_HASH:
562 : 9995 : shmem_hash_init(structPtr, request->options);
563 : 9995 : break;
564 : 8740 : case SHMEM_KIND_SLRU:
565 : 8740 : shmem_slru_init(structPtr, request->options);
566 : 8740 : break;
567 : : }
568 : 98599 : }
569 : :
570 : : /*
571 : : * Look up a named shmem area in the shared memory index and attach to it.
572 : : *
573 : : * Note that this only performs the basic attachment actions depending on
574 : : * ShmemRequestKind, like setting the global pointer variable to the area for
575 : : * SHMEM_KIND_STRUCT or setting up the backend-private HTAB control struct.
576 : : * This does *not* call the subsystem-specific attach callbacks. That's done
577 : : * later after all the shmem areas have been initialized or attached to.
578 : : */
579 : : static bool
580 : 2 : AttachShmemIndexEntry(ShmemRequest *request, bool missing_ok)
581 : : {
582 : 2 : const char *name = request->options->name;
583 : : ShmemIndexEnt *index_entry;
584 : :
585 : : /* Look it up in the shmem index */
586 : : index_entry = (ShmemIndexEnt *)
587 : 2 : hash_search(ShmemIndex, name, HASH_FIND, NULL);
588 [ - + ]: 2 : if (!index_entry)
589 : : {
54 heikki.linnakangas@i 590 [ # # ]:UNC 0 : if (!missing_ok)
591 [ # # ]: 0 : ereport(ERROR,
592 : : (errmsg("could not find ShmemIndex entry for data structure \"%s\"",
593 : : request->options->name)));
594 : 0 : return false;
595 : : }
596 : :
597 : : /* Check that the size in the index matches the request */
54 heikki.linnakangas@i 598 [ - + ]:GNC 2 : if (index_entry->size != request->options->size &&
54 heikki.linnakangas@i 599 [ # # ]:UNC 0 : request->options->size != SHMEM_ATTACH_UNKNOWN_SIZE)
600 : : {
601 [ # # ]: 0 : ereport(ERROR,
602 : : (errmsg("shared memory struct \"%s\" was created with"
603 : : " different size: existing %zu, requested %zd",
604 : : name, index_entry->size, request->options->size)));
605 : : }
606 : :
607 : : /*
608 : : * Re-establish the caller's pointer variable, or do other actions to
609 : : * attach depending on the kind of shmem area it is.
610 : : */
54 heikki.linnakangas@i 611 [ + - - - ]:GNC 2 : switch (request->kind)
612 : : {
613 : 2 : case SHMEM_KIND_STRUCT:
614 [ + - ]: 2 : if (request->options->ptr)
615 : 2 : *(request->options->ptr) = index_entry->location;
616 : 2 : break;
54 heikki.linnakangas@i 617 :UNC 0 : case SHMEM_KIND_HASH:
618 : 0 : shmem_hash_attach(index_entry->location, request->options);
619 : 0 : break;
620 : 0 : case SHMEM_KIND_SLRU:
621 : 0 : shmem_slru_attach(index_entry->location, request->options);
622 : 0 : break;
623 : : }
624 : :
54 heikki.linnakangas@i 625 :GNC 2 : return true;
626 : : }
627 : :
628 : : /*
629 : : * InitShmemAllocator() --- set up basic pointers to shared memory.
630 : : *
631 : : * Called at postmaster or stand-alone backend startup, to initialize the
632 : : * allocator's data structure in the shared memory segment. In EXEC_BACKEND,
633 : : * this is also called at backend startup, to set up pointers to the
634 : : * already-initialized data structure.
635 : : */
636 : : void
120 637 : 1248 : InitShmemAllocator(PGShmemHeader *seghdr)
638 : : {
639 : : Size offset;
640 : : int64 hash_nelems;
641 : : HASHCTL info;
642 : : int hash_flags;
643 : :
644 : : #ifndef EXEC_BACKEND
65 645 [ - + ]: 1248 : Assert(!IsUnderPostmaster);
646 : : #endif
120 647 [ - + ]: 1248 : Assert(seghdr != NULL);
648 : :
54 649 [ - + ]: 1248 : if (IsUnderPostmaster)
650 : : {
54 heikki.linnakangas@i 651 [ # # ]:UNC 0 : Assert(shmem_request_state == SRS_INITIAL);
652 : : }
653 : : else
654 : : {
54 heikki.linnakangas@i 655 [ - + ]:GNC 1248 : Assert(shmem_request_state == SRS_REQUESTING);
656 : 1248 : shmem_request_state = SRS_INITIALIZING;
657 : : }
658 : :
659 : : /*
660 : : * We assume the pointer and offset are MAXALIGN. Not a hard requirement,
661 : : * but it's true today and keeps the math below simpler.
662 : : */
120 663 [ - + ]: 1248 : Assert(seghdr == (void *) MAXALIGN(seghdr));
664 [ - + ]: 1248 : Assert(seghdr->content_offset == MAXALIGN(seghdr->content_offset));
665 : :
666 : : /*
667 : : * Allocations after this point should go through ShmemAlloc, which
668 : : * expects to allocate everything on cache line boundaries. Make sure the
669 : : * first allocation begins on a cache line boundary.
670 : : */
65 671 : 1248 : offset = CACHELINEALIGN(seghdr->content_offset + sizeof(ShmemAllocatorData));
672 [ - + ]: 1248 : if (offset > seghdr->totalsize)
65 heikki.linnakangas@i 673 [ # # ]:UNC 0 : ereport(ERROR,
674 : : (errcode(ERRCODE_OUT_OF_MEMORY),
675 : : errmsg("out of shared memory (%zu bytes requested)",
676 : : offset)));
677 : :
678 : : /*
679 : : * In postmaster or stand-alone backend, initialize the shared memory
680 : : * allocator so that we can allocate shared memory for ShmemIndex using
681 : : * ShmemAlloc(). In a regular backend just set up the pointers required
682 : : * by ShmemAlloc().
683 : : */
65 heikki.linnakangas@i 684 :GNC 1248 : ShmemAllocator = (ShmemAllocatorData *) ((char *) seghdr + seghdr->content_offset);
685 [ + - ]: 1248 : if (!IsUnderPostmaster)
686 : : {
687 : 1248 : SpinLockInit(&ShmemAllocator->shmem_lock);
688 : 1248 : ShmemAllocator->free_offset = offset;
689 : 1248 : LWLockInitialize(&ShmemAllocator->index_lock, LWTRANCHE_SHMEM_INDEX);
690 : : }
691 : :
550 peter@eisentraut.org 692 : 1248 : ShmemSegHdr = seghdr;
693 : 1248 : ShmemBase = seghdr;
694 : 1248 : ShmemEnd = (char *) ShmemBase + seghdr->totalsize;
695 : :
696 : : /*
697 : : * Create (or attach to) the shared memory index of shmem areas.
698 : : *
699 : : * This is the same initialization as ShmemInitHash() does, but we cannot
700 : : * use ShmemInitHash() here because it relies on ShmemIndex being already
701 : : * initialized.
702 : : */
54 heikki.linnakangas@i 703 : 1248 : hash_nelems = list_length(pending_shmem_requests) + SHMEM_INDEX_ADDITIONAL_SIZE;
704 : :
65 705 : 1248 : info.keysize = SHMEM_INDEX_KEYSIZE;
706 : 1248 : info.entrysize = sizeof(ShmemIndexEnt);
56 707 : 1248 : hash_flags = HASH_ELEM | HASH_STRINGS | HASH_FIXED_SIZE;
708 : :
65 709 [ + - ]: 1248 : if (!IsUnderPostmaster)
710 : : {
54 711 : 1248 : ShmemAllocator->index_size = hash_estimate_size(hash_nelems, info.entrysize);
56 712 : 1248 : ShmemAllocator->index = (HASHHDR *) ShmemAlloc(ShmemAllocator->index_size);
713 : : }
714 : 2496 : ShmemIndex = shmem_hash_create(ShmemAllocator->index,
715 : 1248 : ShmemAllocator->index_size,
716 : : IsUnderPostmaster,
717 : : "ShmemIndex", hash_nelems,
718 : : &info, hash_flags);
65 719 [ - + ]: 1248 : Assert(ShmemIndex != NULL);
720 : :
721 : : /*
722 : : * Add an entry for ShmemIndex itself into ShmemIndex, so that it's
723 : : * visible in the pg_shmem_allocations view
724 : : */
59 725 [ + - ]: 1248 : if (!IsUnderPostmaster)
726 : : {
727 : : bool found;
728 : : ShmemIndexEnt *result = (ShmemIndexEnt *)
729 : 1248 : hash_search(ShmemIndex, "ShmemIndex", HASH_ENTER, &found);
730 : :
731 [ - + ]: 1248 : Assert(!found);
56 732 : 1248 : result->size = ShmemAllocator->index_size;
733 : 1248 : result->allocated_size = ShmemAllocator->index_size;
59 734 : 1248 : result->location = ShmemAllocator->index;
735 : : }
10917 scrappy@hub.org 736 : 1248 : }
737 : :
738 : : /*
739 : : * Reset state on postmaster crash restart.
740 : : */
741 : : void
54 heikki.linnakangas@i 742 : 5 : ResetShmemAllocator(void)
743 : : {
744 [ - + ]: 5 : Assert(!IsUnderPostmaster);
745 : 5 : shmem_request_state = SRS_INITIAL;
746 : :
747 : 5 : pending_shmem_requests = NIL;
748 : :
749 : : /*
750 : : * Note that we don't clear the registered callbacks. We will need to
751 : : * call them again as we restart
752 : : */
54 heikki.linnakangas@i 753 :CBC 5 : }
754 : :
755 : : /*
756 : : * ShmemAlloc -- allocate max-aligned chunk from shared memory
757 : : *
758 : : * Throws error if request cannot be satisfied.
759 : : *
760 : : * Assumes ShmemSegHdr is initialized.
761 : : */
762 : : void *
7167 tgl@sss.pgh.pa.us 763 : 1248 : ShmemAlloc(Size size)
764 : : {
765 : : void *newSpace;
766 : : Size allocated_size;
767 : :
54 heikki.linnakangas@i 768 :GNC 1248 : newSpace = ShmemAllocRaw(size, 0, &allocated_size);
3558 tgl@sss.pgh.pa.us 769 [ - + ]:CBC 1248 : if (!newSpace)
3558 tgl@sss.pgh.pa.us 770 [ # # ]:UBC 0 : ereport(ERROR,
771 : : (errcode(ERRCODE_OUT_OF_MEMORY),
772 : : errmsg("out of shared memory (%zu bytes requested)",
773 : : size)));
3558 tgl@sss.pgh.pa.us 774 :CBC 1248 : return newSpace;
775 : : }
776 : :
777 : : /*
778 : : * ShmemAllocNoError -- allocate max-aligned chunk from shared memory
779 : : *
780 : : * As ShmemAlloc, but returns NULL if out of space, rather than erroring.
781 : : */
782 : : void *
3558 tgl@sss.pgh.pa.us 783 :LBC (438600) : ShmemAllocNoError(Size size)
784 : : {
785 : : Size allocated_size;
786 : :
54 heikki.linnakangas@i 787 :UNC 0 : return ShmemAllocRaw(size, 0, &allocated_size);
788 : : }
789 : :
790 : : /*
791 : : * ShmemAllocRaw -- allocate align chunk and return allocated size
792 : : *
793 : : * Also sets *allocated_size to the number of bytes allocated, which will
794 : : * be equal to the number requested plus any padding we choose to add.
795 : : */
796 : : static void *
54 heikki.linnakangas@i 797 :GNC 99847 : ShmemAllocRaw(Size size, Size alignment, Size *allocated_size)
798 : : {
799 : : Size rawStart;
800 : : Size newStart;
801 : : Size newFree;
802 : : void *newSpace;
803 : :
804 : : /*
805 : : * Ensure all space is adequately aligned. We used to only MAXALIGN this
806 : : * space but experience has proved that on modern systems that is not good
807 : : * enough. Many parts of the system are very sensitive to critical data
808 : : * structures getting split across cache line boundaries. To avoid that,
809 : : * attempt to align the beginning of the allocation to a cache line
810 : : * boundary. The calling code will still need to be careful about how it
811 : : * uses the allocated space - e.g. by padding each element in an array of
812 : : * structures out to a power-of-two size - but without this, even that
813 : : * won't be sufficient.
814 : : */
815 [ + + ]: 99847 : if (alignment < PG_CACHE_LINE_SIZE)
816 : 96103 : alignment = PG_CACHE_LINE_SIZE;
817 : :
3879 rhaas@postgresql.org 818 [ - + ]:CBC 99847 : Assert(ShmemSegHdr != NULL);
819 : :
65 heikki.linnakangas@i 820 :GNC 99847 : SpinLockAcquire(&ShmemAllocator->shmem_lock);
821 : :
54 822 : 99847 : rawStart = ShmemAllocator->free_offset;
823 : 99847 : newStart = TYPEALIGN(alignment, rawStart);
824 : :
8287 tgl@sss.pgh.pa.us 825 :CBC 99847 : newFree = newStart + size;
3879 rhaas@postgresql.org 826 [ + - ]: 99847 : if (newFree <= ShmemSegHdr->totalsize)
827 : : {
548 peter@eisentraut.org 828 : 99847 : newSpace = (char *) ShmemBase + newStart;
120 heikki.linnakangas@i 829 :GNC 99847 : ShmemAllocator->free_offset = newFree;
830 : : }
831 : : else
10492 bruce@momjian.us 832 :UBC 0 : newSpace = NULL;
833 : :
65 heikki.linnakangas@i 834 :GNC 99847 : SpinLockRelease(&ShmemAllocator->shmem_lock);
835 : :
836 : : /* note this assert is okay with newSpace == NULL */
54 837 [ - + ]: 99847 : Assert(newSpace == (void *) TYPEALIGN(alignment, newSpace));
838 : :
839 : 99847 : *allocated_size = newFree - rawStart;
10133 bruce@momjian.us 840 :CBC 99847 : return newSpace;
841 : : }
842 : :
843 : : /*
844 : : * ShmemAddrIsValid -- test if an address refers to shared memory
845 : : *
846 : : * Returns true if the pointer points within the shared memory segment.
847 : : */
848 : : bool
5591 heikki.linnakangas@i 849 : 1692 : ShmemAddrIsValid(const void *addr)
850 : : {
6418 tgl@sss.pgh.pa.us 851 [ + - + - ]: 1692 : return (addr >= ShmemBase) && (addr < ShmemEnd);
852 : : }
853 : :
854 : : /*
855 : : * Register callbacks that define a shared memory area (or multiple areas).
856 : : *
857 : : * The system will call the callbacks at different stages of postmaster or
858 : : * backend startup, to allocate and initialize the area.
859 : : *
860 : : * This is normally called early during postmaster startup, but if the
861 : : * SHMEM_CALLBACKS_ALLOW_AFTER_STARTUP is set, this can also be used after
862 : : * startup, although after startup there's no guarantee that there's enough
863 : : * shared memory available. When called after startup, this immediately calls
864 : : * the right callbacks depending on whether another backend had already
865 : : * initialized the area.
866 : : *
867 : : * Note: In EXEC_BACKEND mode, this needs to be called in every backend
868 : : * process. That's needed because we cannot pass down the callback function
869 : : * pointers from the postmaster process, because different processes may have
870 : : * loaded libraries to different addresses.
871 : : */
872 : : void
54 heikki.linnakangas@i 873 :GNC 55426 : RegisterShmemCallbacks(const ShmemCallbacks *callbacks)
874 : : {
875 [ + + + - ]: 55426 : if (shmem_request_state == SRS_DONE && IsUnderPostmaster)
876 : : {
877 : : /*
878 : : * After-startup initialization or attachment. Call the appropriate
879 : : * callbacks immediately.
880 : : */
881 [ - + ]: 3 : if ((callbacks->flags & SHMEM_CALLBACKS_ALLOW_AFTER_STARTUP) == 0)
54 heikki.linnakangas@i 882 [ # # ]:UNC 0 : elog(ERROR, "cannot request shared memory at this time");
883 : :
54 heikki.linnakangas@i 884 :GNC 3 : CallShmemCallbacksAfterStartup(callbacks);
885 : : }
886 : : else
887 : : {
888 : : /* Remember the callbacks for later */
889 : 55423 : registered_shmem_callbacks = lappend(registered_shmem_callbacks,
890 : : (void *) callbacks);
891 : : }
54 heikki.linnakangas@i 892 :CBC 55426 : }
893 : :
894 : : /*
895 : : * Register a shmem area (or multiple areas) after startup.
896 : : */
897 : : static void
54 heikki.linnakangas@i 898 :GNC 3 : CallShmemCallbacksAfterStartup(const ShmemCallbacks *callbacks)
899 : : {
900 : : bool found_any;
901 : : bool notfound_any;
902 : :
903 [ - + ]: 3 : Assert(shmem_request_state == SRS_DONE);
904 : 3 : shmem_request_state = SRS_REQUESTING;
905 : :
906 : : /*
907 : : * Call the request callback first. The callback makes ShmemRequest*()
908 : : * calls for each shmem area, adding them to pending_shmem_requests.
909 : : */
910 [ - + ]: 3 : Assert(pending_shmem_requests == NIL);
911 [ + - ]: 3 : if (callbacks->request_fn)
912 : 3 : callbacks->request_fn(callbacks->opaque_arg);
913 : 3 : shmem_request_state = SRS_AFTER_STARTUP_ATTACH_OR_INIT;
914 : :
915 [ - + ]: 3 : if (pending_shmem_requests == NIL)
916 : : {
54 heikki.linnakangas@i 917 :UNC 0 : shmem_request_state = SRS_DONE;
918 : 0 : return;
919 : : }
920 : :
921 : : /* Hold ShmemIndexLock while we allocate all the shmem entries */
65 heikki.linnakangas@i 922 :GNC 3 : LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE);
923 : :
924 : : /*
925 : : * Check if the requested shared memory areas have already been
926 : : * initialized. We assume all the areas requested by the request callback
927 : : * to form a coherent unit such that they're all already initialized or
928 : : * none. Otherwise it would be ambiguous which callback, init or attach,
929 : : * to callback afterwards.
930 : : */
54 931 : 3 : found_any = notfound_any = false;
932 [ + - + + : 9 : foreach_ptr(ShmemRequest, request, pending_shmem_requests)
+ + ]
933 : : {
934 [ + + ]: 3 : if (hash_search(ShmemIndex, request->options->name, HASH_FIND, NULL))
935 : 2 : found_any = true;
936 : : else
937 : 1 : notfound_any = true;
938 : : }
939 [ + + - + ]: 3 : if (found_any && notfound_any)
54 heikki.linnakangas@i 940 [ # # ]:UNC 0 : elog(ERROR, "found some but not all");
941 : :
942 : : /*
943 : : * Allocate or attach all the shmem areas requested by the request_fn
944 : : * callback.
945 : : */
54 heikki.linnakangas@i 946 [ + - + + :GNC 9 : foreach_ptr(ShmemRequest, request, pending_shmem_requests)
+ + ]
947 : : {
948 [ + + ]: 3 : if (found_any)
949 : 2 : AttachShmemIndexEntry(request, false);
950 : : else
951 : 1 : InitShmemIndexEntry(request);
952 : :
953 : 3 : pfree(request->options);
954 : : }
955 : 3 : list_free_deep(pending_shmem_requests);
956 : 3 : pending_shmem_requests = NIL;
957 : :
958 : : /* Finish by calling the appropriate subsystem-specific callback */
959 [ + + ]: 3 : if (found_any)
960 : : {
961 [ + - ]: 2 : if (callbacks->attach_fn)
962 : 2 : callbacks->attach_fn(callbacks->opaque_arg);
963 : : }
964 : : else
965 : : {
966 [ + - ]: 1 : if (callbacks->init_fn)
967 : 1 : callbacks->init_fn(callbacks->opaque_arg);
968 : : }
969 : :
7451 tgl@sss.pgh.pa.us 970 : 3 : LWLockRelease(ShmemIndexLock);
54 heikki.linnakangas@i 971 : 3 : shmem_request_state = SRS_DONE;
972 : : }
973 : :
974 : : /*
975 : : * Call all shmem request callbacks.
976 : : */
977 : : void
978 : 1251 : ShmemCallRequestCallbacks(void)
979 : : {
980 : : ListCell *lc;
981 : :
982 [ - + ]: 1251 : Assert(shmem_request_state == SRS_INITIAL);
983 : 1251 : shmem_request_state = SRS_REQUESTING;
984 : :
985 [ + - + + : 56324 : foreach(lc, registered_shmem_callbacks)
+ + ]
986 : : {
987 : 55073 : const ShmemCallbacks *callbacks = (const ShmemCallbacks *) lfirst(lc);
988 : :
989 [ + - ]: 55073 : if (callbacks->request_fn)
990 : 55073 : callbacks->request_fn(callbacks->opaque_arg);
991 : : }
10917 scrappy@hub.org 992 :GIC 1251 : }
993 : :
994 : : /*
995 : : * ShmemInitStruct -- Create/attach to a structure in shared memory.
996 : : *
997 : : * This is called during initialization to find or allocate
998 : : * a data structure in shared memory. If no other process
999 : : * has created the structure, this routine allocates space
1000 : : * for it. If it exists already, a pointer to the existing
1001 : : * structure is returned.
1002 : : *
1003 : : * Returns: pointer to the object. *foundPtr is set true if the object was
1004 : : * already in the shmem index (hence, already initialized).
1005 : : *
1006 : : * Note: This is a legacy interface, kept for backwards compatibility with
1007 : : * extensions. Use ShmemRequestStruct() in new code!
1008 : : */
1009 : : void *
54 heikki.linnakangas@i 1010 :LBC (80547) : ShmemInitStruct(const char *name, Size size, bool *foundPtr)
1011 : : {
54 heikki.linnakangas@i 1012 :UNC 0 : void *ptr = NULL;
1013 : 0 : ShmemStructOpts options = {
1014 : : .name = name,
1015 : : .size = size,
1016 : : .ptr = &ptr,
1017 : : };
1018 : 0 : ShmemRequest request = {&options, SHMEM_KIND_STRUCT};
1019 : :
1020 [ # # # # : 0 : Assert(shmem_request_state == SRS_DONE ||
# # ]
1021 : : shmem_request_state == SRS_INITIALIZING ||
1022 : : shmem_request_state == SRS_REQUESTING);
1023 : :
54 heikki.linnakangas@i 1024 :LBC (80547) : LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE);
1025 : :
1026 : : /*
1027 : : * During postmaster startup, look up the existing entry if any.
1028 : : */
54 heikki.linnakangas@i 1029 :UNC 0 : *foundPtr = false;
1030 [ # # ]: 0 : if (IsUnderPostmaster)
1031 : 0 : *foundPtr = AttachShmemIndexEntry(&request, true);
1032 : :
1033 : : /* Initialize it if not found */
1034 [ # # ]: 0 : if (!*foundPtr)
1035 : 0 : InitShmemIndexEntry(&request);
1036 : :
54 heikki.linnakangas@i 1037 :LBC (79473) : LWLockRelease(ShmemIndexLock);
1038 : :
54 heikki.linnakangas@i 1039 [ # # ]:UNC 0 : Assert(ptr != NULL);
1040 : 0 : return ptr;
1041 : : }
1042 : :
1043 : : /* SQL SRF showing allocated shared memory */
1044 : : Datum
2333 rhaas@postgresql.org 1045 :CBC 4 : pg_get_shmem_allocations(PG_FUNCTION_ARGS)
1046 : : {
1047 : : #define PG_GET_SHMEM_SIZES_COLS 4
1048 : 4 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1049 : : HASH_SEQ_STATUS hstat;
1050 : : ShmemIndexEnt *ent;
2207 tgl@sss.pgh.pa.us 1051 : 4 : Size named_allocated = 0;
1052 : : Datum values[PG_GET_SHMEM_SIZES_COLS];
1053 : : bool nulls[PG_GET_SHMEM_SIZES_COLS];
1054 : :
1320 michael@paquier.xyz 1055 : 4 : InitMaterializedSRF(fcinfo, 0);
1056 : :
2333 rhaas@postgresql.org 1057 : 4 : LWLockAcquire(ShmemIndexLock, LW_SHARED);
1058 : :
1059 : 4 : hash_seq_init(&hstat, ShmemIndex);
1060 : :
1061 : : /* output all allocated entries */
1062 : 4 : memset(nulls, 0, sizeof(nulls));
1063 [ + + ]: 326 : while ((ent = (ShmemIndexEnt *) hash_seq_search(&hstat)) != NULL)
1064 : : {
1065 : 322 : values[0] = CStringGetTextDatum(ent->key);
1066 : 322 : values[1] = Int64GetDatum((char *) ent->location - (char *) ShmemSegHdr);
1067 : 322 : values[2] = Int64GetDatum(ent->size);
1068 : 322 : values[3] = Int64GetDatum(ent->allocated_size);
1069 : 322 : named_allocated += ent->allocated_size;
1070 : :
1545 michael@paquier.xyz 1071 : 322 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
1072 : : values, nulls);
1073 : : }
1074 : :
1075 : : /* output shared memory allocated but not counted via the shmem index */
2333 rhaas@postgresql.org 1076 : 4 : values[0] = CStringGetTextDatum("<anonymous>");
1077 : 4 : nulls[1] = true;
120 heikki.linnakangas@i 1078 :GNC 4 : values[2] = Int64GetDatum(ShmemAllocator->free_offset - named_allocated);
2333 rhaas@postgresql.org 1079 :CBC 4 : values[3] = values[2];
1545 michael@paquier.xyz 1080 : 4 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
1081 : :
1082 : : /* output as-of-yet unused shared memory */
2333 rhaas@postgresql.org 1083 : 4 : nulls[0] = true;
120 heikki.linnakangas@i 1084 :GNC 4 : values[1] = Int64GetDatum(ShmemAllocator->free_offset);
2333 rhaas@postgresql.org 1085 :CBC 4 : nulls[1] = false;
120 heikki.linnakangas@i 1086 :GNC 4 : values[2] = Int64GetDatum(ShmemSegHdr->totalsize - ShmemAllocator->free_offset);
2333 rhaas@postgresql.org 1087 :CBC 4 : values[3] = values[2];
1545 michael@paquier.xyz 1088 : 4 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
1089 : :
2333 rhaas@postgresql.org 1090 : 4 : LWLockRelease(ShmemIndexLock);
1091 : :
1092 : 4 : return (Datum) 0;
1093 : : }
1094 : :
1095 : : /*
1096 : : * SQL SRF showing NUMA memory nodes for allocated shared memory
1097 : : *
1098 : : * Compared to pg_get_shmem_allocations(), this function does not return
1099 : : * information about shared anonymous allocations and unused shared memory.
1100 : : */
1101 : : Datum
418 tomas.vondra@postgre 1102 : 4 : pg_get_shmem_allocations_numa(PG_FUNCTION_ARGS)
1103 : : {
1104 : : #define PG_GET_SHMEM_NUMA_SIZES_COLS 3
1105 : 4 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1106 : : HASH_SEQ_STATUS hstat;
1107 : : ShmemIndexEnt *ent;
1108 : : Datum values[PG_GET_SHMEM_NUMA_SIZES_COLS];
1109 : : bool nulls[PG_GET_SHMEM_NUMA_SIZES_COLS];
1110 : : Size os_page_size;
1111 : : void **page_ptrs;
1112 : : int *pages_status;
1113 : : uint64 shm_total_page_count,
1114 : : shm_ent_page_count,
1115 : : max_nodes;
1116 : : Size *nodes;
1117 : :
1118 [ - + ]: 4 : if (pg_numa_init() == -1)
418 tomas.vondra@postgre 1119 [ # # ]:UBC 0 : elog(ERROR, "libnuma initialization failed or NUMA is not supported on this platform");
1120 : :
418 tomas.vondra@postgre 1121 :CBC 4 : InitMaterializedSRF(fcinfo, 0);
1122 : :
1123 : 4 : max_nodes = pg_numa_get_max_node();
124 tomas.vondra@postgre 1124 :GNC 4 : nodes = palloc_array(Size, max_nodes + 2);
1125 : :
1126 : : /*
1127 : : * Shared memory allocations can vary in size and may not align with OS
1128 : : * memory page boundaries, while NUMA queries work on pages.
1129 : : *
1130 : : * To correctly map each allocation to NUMA nodes, we need to: 1.
1131 : : * Determine the OS memory page size. 2. Align each allocation's start/end
1132 : : * addresses to page boundaries. 3. Query NUMA node information for all
1133 : : * pages spanning the allocation.
1134 : : */
416 tomas.vondra@postgre 1135 :CBC 4 : os_page_size = pg_get_shmem_pagesize();
1136 : :
1137 : : /*
1138 : : * Allocate memory for page pointers and status based on total shared
1139 : : * memory size. This simplified approach allocates enough space for all
1140 : : * pages in shared memory rather than calculating the exact requirements
1141 : : * for each segment.
1142 : : *
1143 : : * Add 1, because we don't know how exactly the segments align to OS
1144 : : * pages, so the allocation might use one more memory page. In practice
1145 : : * this is not very likely, and moreover we have more entries, each of
1146 : : * them using only fraction of the total pages.
1147 : : */
418 1148 : 4 : shm_total_page_count = (ShmemSegHdr->totalsize / os_page_size) + 1;
171 michael@paquier.xyz 1149 :GNC 4 : page_ptrs = palloc0_array(void *, shm_total_page_count);
1150 : 4 : pages_status = palloc_array(int, shm_total_page_count);
1151 : :
418 tomas.vondra@postgre 1152 [ + - ]:CBC 4 : if (firstNumaTouch)
1153 [ - + ]: 4 : elog(DEBUG1, "NUMA: page-faulting shared memory segments for proper NUMA readouts");
1154 : :
1155 : 4 : LWLockAcquire(ShmemIndexLock, LW_SHARED);
1156 : :
1157 : 4 : hash_seq_init(&hstat, ShmemIndex);
1158 : :
1159 : : /* output all allocated entries */
1160 [ + + ]: 326 : while ((ent = (ShmemIndexEnt *) hash_seq_search(&hstat)) != NULL)
1161 : : {
1162 : : int i;
1163 : : char *startptr,
1164 : : *endptr;
1165 : : Size total_len;
1166 : :
1167 : : /*
1168 : : * Calculate the range of OS pages used by this segment. The segment
1169 : : * may start / end half-way through a page, we want to count these
1170 : : * pages too. So we align the start/end pointers down/up, and then
1171 : : * calculate the number of pages from that.
1172 : : */
1173 : 322 : startptr = (char *) TYPEALIGN_DOWN(os_page_size, ent->location);
1174 : 322 : endptr = (char *) TYPEALIGN(os_page_size,
1175 : : (char *) ent->location + ent->allocated_size);
1176 : 322 : total_len = (endptr - startptr);
1177 : :
1178 : 322 : shm_ent_page_count = total_len / os_page_size;
1179 : :
1180 : : /*
1181 : : * If we ever get 0xff (-1) back from kernel inquiry, then we probably
1182 : : * have a bug in mapping buffers to OS pages.
1183 : : */
1184 : 322 : memset(pages_status, 0xff, sizeof(int) * shm_ent_page_count);
1185 : :
1186 : : /*
1187 : : * Setup page_ptrs[] with pointers to all OS pages for this segment,
1188 : : * and get the NUMA status using pg_numa_query_pages.
1189 : : *
1190 : : * In order to get reliable results we also need to touch memory
1191 : : * pages, so that inquiry about NUMA memory node doesn't return -2
1192 : : * (ENOENT, which indicates unmapped/unallocated pages).
1193 : : */
1194 [ + + ]: 123853 : for (i = 0; i < shm_ent_page_count; i++)
1195 : : {
1196 : 123531 : page_ptrs[i] = startptr + (i * os_page_size);
1197 : :
1198 [ + - ]: 123531 : if (firstNumaTouch)
333 1199 : 123531 : pg_numa_touch_mem_if_required(page_ptrs[i]);
1200 : :
418 1201 [ - + ]: 123531 : CHECK_FOR_INTERRUPTS();
1202 : : }
1203 : :
1204 [ - + ]: 322 : if (pg_numa_query_pages(0, shm_ent_page_count, page_ptrs, pages_status) == -1)
418 tomas.vondra@postgre 1205 [ # # ]:UBC 0 : elog(ERROR, "failed NUMA pages inquiry status: %m");
1206 : :
1207 : : /* Count number of NUMA nodes used for this shared memory entry */
124 tomas.vondra@postgre 1208 :CBC 322 : memset(nodes, 0, sizeof(Size) * (max_nodes + 2));
1209 : :
418 1210 [ + + ]: 123853 : for (i = 0; i < shm_ent_page_count; i++)
1211 : : {
1212 : 123531 : int s = pages_status[i];
1213 : :
1214 : : /* Ensure we are adding only valid index to the array */
124 1215 [ + - + - ]: 123531 : if (s >= 0 && s <= max_nodes)
1216 : : {
1217 : : /* valid NUMA node */
1218 : 123531 : nodes[s]++;
1219 : 123531 : continue;
1220 : : }
124 tomas.vondra@postgre 1221 [ # # ]:UBC 0 : else if (s == -2)
1222 : : {
1223 : : /* -2 means ENOENT (e.g. page was moved to swap) */
1224 : 0 : nodes[max_nodes + 1]++;
1225 : 0 : continue;
1226 : : }
1227 : :
1228 [ # # ]: 0 : elog(ERROR, "invalid NUMA node id outside of allowed range "
1229 : : "[0, " UINT64_FORMAT "]: %d", max_nodes, s);
1230 : : }
1231 : :
1232 : : /* no NULLs for regular nodes */
124 tomas.vondra@postgre 1233 :CBC 322 : memset(nulls, 0, sizeof(nulls));
1234 : :
1235 : : /*
1236 : : * Add one entry for each NUMA node, including those without allocated
1237 : : * memory for this segment.
1238 : : */
418 1239 [ + + ]: 644 : for (i = 0; i <= max_nodes; i++)
1240 : : {
1241 : 322 : values[0] = CStringGetTextDatum(ent->key);
295 peter@eisentraut.org 1242 :GNC 322 : values[1] = Int32GetDatum(i);
418 tomas.vondra@postgre 1243 :CBC 322 : values[2] = Int64GetDatum(nodes[i] * os_page_size);
1244 : :
1245 : 322 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
1246 : : values, nulls);
1247 : : }
1248 : :
1249 : : /* The last entry is used for pages without a NUMA node. */
124 1250 : 322 : nulls[1] = true;
1251 : 322 : values[0] = CStringGetTextDatum(ent->key);
1252 : 322 : values[2] = Int64GetDatum(nodes[max_nodes + 1] * os_page_size);
1253 : :
1254 : 322 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
1255 : : values, nulls);
1256 : : }
1257 : :
418 1258 : 4 : LWLockRelease(ShmemIndexLock);
1259 : 4 : firstNumaTouch = false;
1260 : :
1261 : 4 : return (Datum) 0;
1262 : : }
1263 : :
1264 : : /*
1265 : : * Determine the memory page size used for the shared memory segment.
1266 : : *
1267 : : * If the shared segment was allocated using huge pages, returns the size of
1268 : : * a huge page. Otherwise returns the size of regular memory page.
1269 : : *
1270 : : * This should be used only after the server is started.
1271 : : */
1272 : : Size
416 1273 : 8 : pg_get_shmem_pagesize(void)
1274 : : {
1275 : : Size os_page_size;
1276 : : #ifdef WIN32
1277 : : SYSTEM_INFO sysinfo;
1278 : :
1279 : : GetSystemInfo(&sysinfo);
1280 : : os_page_size = sysinfo.dwPageSize;
1281 : : #else
1282 : 8 : os_page_size = sysconf(_SC_PAGESIZE);
1283 : : #endif
1284 : :
1285 [ - + ]: 8 : Assert(IsUnderPostmaster);
1286 [ - + ]: 8 : Assert(huge_pages_status != HUGE_PAGES_UNKNOWN);
1287 : :
1288 [ - + ]: 8 : if (huge_pages_status == HUGE_PAGES_ON)
416 tomas.vondra@postgre 1289 :UBC 0 : GetHugePageSize(&os_page_size, NULL);
1290 : :
416 tomas.vondra@postgre 1291 :CBC 8 : return os_page_size;
1292 : : }
1293 : :
1294 : : Datum
1295 : 5 : pg_numa_available(PG_FUNCTION_ARGS)
1296 : : {
1297 : 5 : PG_RETURN_BOOL(pg_numa_init() != -1);
1298 : : }
|