Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * dsm_impl.c
4 : : * manage dynamic shared memory segments
5 : : *
6 : : * This file provides low-level APIs for creating and destroying shared
7 : : * memory segments using several different possible techniques. We refer
8 : : * to these segments as dynamic because they can be created, altered, and
9 : : * destroyed at any point during the server life cycle. This is unlike
10 : : * the main shared memory segment, of which there is always exactly one
11 : : * and which is always mapped at a fixed address in every PostgreSQL
12 : : * background process.
13 : : *
14 : : * Because not all systems provide the same primitives in this area, nor
15 : : * do all primitives behave the same way on all systems, we provide
16 : : * several implementations of this facility. Many systems implement
17 : : * POSIX shared memory (shm_open etc.), which is well-suited to our needs
18 : : * in this area, with the exception that shared memory identifiers live
19 : : * in a flat system-wide namespace, raising the uncomfortable prospect of
20 : : * name collisions with other processes (including other copies of
21 : : * PostgreSQL) running on the same system. Some systems only support
22 : : * the older System V shared memory interface (shmget etc.) which is
23 : : * also usable; however, the default allocation limits are often quite
24 : : * small, and the namespace is even more restricted.
25 : : *
26 : : * We also provide an mmap-based shared memory implementation. This may
27 : : * be useful on systems that provide shared memory via a special-purpose
28 : : * filesystem; by opting for this implementation, the user can even
29 : : * control precisely where their shared memory segments are placed. It
30 : : * can also be used as a fallback for systems where shm_open and shmget
31 : : * are not available or can't be used for some reason. Of course,
32 : : * mapping a file residing on an actual spinning disk is a fairly poor
33 : : * approximation for shared memory because writeback may hurt performance
34 : : * substantially, but there should be few systems where we must make do
35 : : * with such poor tools.
36 : : *
37 : : * As ever, Windows requires its own implementation.
38 : : *
39 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
40 : : * Portions Copyright (c) 1994, Regents of the University of California
41 : : *
42 : : *
43 : : * IDENTIFICATION
44 : : * src/backend/storage/ipc/dsm_impl.c
45 : : *
46 : : *-------------------------------------------------------------------------
47 : : */
48 : :
49 : : #include "postgres.h"
50 : :
51 : : #include <fcntl.h>
52 : : #include <signal.h>
53 : : #include <unistd.h>
54 : : #ifndef WIN32
55 : : #include <sys/mman.h>
56 : : #include <sys/ipc.h>
57 : : #include <sys/shm.h>
58 : : #include <sys/stat.h>
59 : : #endif
60 : :
61 : : #include "common/file_perm.h"
62 : : #include "libpq/pqsignal.h"
63 : : #include "miscadmin.h"
64 : : #include "pgstat.h"
65 : : #include "portability/mem.h"
66 : : #include "postmaster/postmaster.h"
67 : : #include "storage/dsm_impl.h"
68 : : #include "storage/fd.h"
69 : : #include "utils/guc.h"
70 : : #include "utils/memutils.h"
71 : : #include "utils/wait_event.h"
72 : :
73 : : #ifdef USE_DSM_POSIX
74 : : static bool dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
75 : : void **impl_private, void **mapped_address,
76 : : Size *mapped_size, int elevel);
77 : : static int dsm_impl_posix_resize(int fd, off_t size);
78 : : #endif
79 : : #ifdef USE_DSM_SYSV
80 : : static bool dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
81 : : void **impl_private, void **mapped_address,
82 : : Size *mapped_size, int elevel);
83 : : #endif
84 : : #ifdef USE_DSM_WINDOWS
85 : : static bool dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
86 : : void **impl_private, void **mapped_address,
87 : : Size *mapped_size, int elevel);
88 : : #endif
89 : : #ifdef USE_DSM_MMAP
90 : : static bool dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
91 : : void **impl_private, void **mapped_address,
92 : : Size *mapped_size, int elevel);
93 : : #endif
94 : : static int errcode_for_dynamic_shared_memory(void);
95 : :
96 : : const struct config_enum_entry dynamic_shared_memory_options[] = {
97 : : #ifdef USE_DSM_POSIX
98 : : {"posix", DSM_IMPL_POSIX, false},
99 : : #endif
100 : : #ifdef USE_DSM_SYSV
101 : : {"sysv", DSM_IMPL_SYSV, false},
102 : : #endif
103 : : #ifdef USE_DSM_WINDOWS
104 : : {"windows", DSM_IMPL_WINDOWS, false},
105 : : #endif
106 : : #ifdef USE_DSM_MMAP
107 : : {"mmap", DSM_IMPL_MMAP, false},
108 : : #endif
109 : : {NULL, 0, false}
110 : : };
111 : :
112 : : /* Implementation selector. */
113 : : int dynamic_shared_memory_type = DEFAULT_DYNAMIC_SHARED_MEMORY_TYPE;
114 : :
115 : : /* Amount of space reserved for DSM segments in the main area. */
116 : : int min_dynamic_shared_memory;
117 : :
118 : : /* Size of buffer to be used for zero-filling. */
119 : : #define ZBUFFER_SIZE 8192
120 : :
121 : : #define SEGMENT_NAME_PREFIX "Global/PostgreSQL"
122 : :
123 : : /*------
124 : : * Perform a low-level shared memory operation in a platform-specific way,
125 : : * as dictated by the selected implementation. Each implementation is
126 : : * required to implement the following primitives.
127 : : *
128 : : * DSM_OP_CREATE. Create a segment whose size is the request_size and
129 : : * map it.
130 : : *
131 : : * DSM_OP_ATTACH. Map the segment, whose size must be the request_size.
132 : : *
133 : : * DSM_OP_DETACH. Unmap the segment.
134 : : *
135 : : * DSM_OP_DESTROY. Unmap the segment, if it is mapped. Destroy the
136 : : * segment.
137 : : *
138 : : * Arguments:
139 : : * op: The operation to be performed.
140 : : * handle: The handle of an existing object, or for DSM_OP_CREATE, the
141 : : * identifier for the new handle the caller wants created.
142 : : * request_size: For DSM_OP_CREATE, the requested size. Otherwise, 0.
143 : : * impl_private: Private, implementation-specific data. Will be a pointer
144 : : * to NULL for the first operation on a shared memory segment within this
145 : : * backend; thereafter, it will point to the value to which it was set
146 : : * on the previous call.
147 : : * mapped_address: Pointer to start of current mapping; pointer to NULL
148 : : * if none. Updated with new mapping address.
149 : : * mapped_size: Pointer to size of current mapping; pointer to 0 if none.
150 : : * Updated with new mapped size.
151 : : * elevel: Level at which to log errors.
152 : : *
153 : : * Return value: true on success, false on failure. When false is returned,
154 : : * a message should first be logged at the specified elevel, except in the
155 : : * case where DSM_OP_CREATE experiences a name collision, which should
156 : : * silently return false.
157 : : *-----
158 : : */
159 : : bool
4521 rhaas@postgresql.org 160 :CBC 52260 : dsm_impl_op(dsm_op op, dsm_handle handle, Size request_size,
161 : : void **impl_private, void **mapped_address, Size *mapped_size,
162 : : int elevel)
163 : : {
2686 tmunro@postgresql.or 164 [ + + - + ]: 52260 : Assert(op == DSM_OP_CREATE || request_size == 0);
4540 rhaas@postgresql.org 165 [ + + + + : 52260 : Assert((op != DSM_OP_CREATE && op != DSM_OP_ATTACH) ||
+ - - + ]
166 : : (*mapped_address == NULL && *mapped_size == 0));
167 : :
168 [ + - - - ]: 52260 : switch (dynamic_shared_memory_type)
169 : : {
170 : : #ifdef USE_DSM_POSIX
171 : 52260 : case DSM_IMPL_POSIX:
172 : 52260 : return dsm_impl_posix(op, handle, request_size, impl_private,
173 : : mapped_address, mapped_size, elevel);
174 : : #endif
175 : : #ifdef USE_DSM_SYSV
4540 rhaas@postgresql.org 176 :UBC 0 : case DSM_IMPL_SYSV:
177 : 0 : return dsm_impl_sysv(op, handle, request_size, impl_private,
178 : : mapped_address, mapped_size, elevel);
179 : : #endif
180 : : #ifdef USE_DSM_WINDOWS
181 : : case DSM_IMPL_WINDOWS:
182 : : return dsm_impl_windows(op, handle, request_size, impl_private,
183 : : mapped_address, mapped_size, elevel);
184 : : #endif
185 : : #ifdef USE_DSM_MMAP
186 : 0 : case DSM_IMPL_MMAP:
187 : 0 : return dsm_impl_mmap(op, handle, request_size, impl_private,
188 : : mapped_address, mapped_size, elevel);
189 : : #endif
4527 tgl@sss.pgh.pa.us 190 : 0 : default:
191 [ # # ]: 0 : elog(ERROR, "unexpected dynamic shared memory type: %d",
192 : : dynamic_shared_memory_type);
193 : : return false;
194 : : }
195 : : }
196 : :
197 : : #ifdef USE_DSM_POSIX
198 : : /*
199 : : * Operating system primitives to support POSIX shared memory.
200 : : *
201 : : * POSIX shared memory segments are created and attached using shm_open()
202 : : * and shm_unlink(); other operations, such as sizing or mapping the
203 : : * segment, are performed as if the shared memory segments were files.
204 : : *
205 : : * Indeed, on some platforms, they may be implemented that way. While
206 : : * POSIX shared memory segments seem intended to exist in a flat namespace,
207 : : * some operating systems may implement them as files, even going so far
208 : : * to treat a request for /xyz as a request to create a file by that name
209 : : * in the root directory. Users of such broken platforms should select
210 : : * a different shared memory implementation.
211 : : */
212 : : static bool
4521 rhaas@postgresql.org 213 :CBC 52260 : dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
214 : : void **impl_private, void **mapped_address, Size *mapped_size,
215 : : int elevel)
216 : : {
217 : : char name[64];
218 : : int flags;
219 : : int fd;
220 : : char *address;
221 : :
4540 222 : 52260 : snprintf(name, 64, "/PostgreSQL.%u", handle);
223 : :
224 : : /* Handle teardown cases. */
225 [ + + + + ]: 52260 : if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
226 : : {
227 [ + + ]: 27113 : if (*mapped_address != NULL
228 [ - + ]: 25148 : && munmap(*mapped_address, *mapped_size) != 0)
229 : : {
4540 rhaas@postgresql.org 230 [ # # ]:UBC 0 : ereport(elevel,
231 : : (errcode_for_dynamic_shared_memory(),
232 : : errmsg("could not unmap shared memory segment \"%s\": %m",
233 : : name)));
234 : 0 : return false;
235 : : }
4540 rhaas@postgresql.org 236 :CBC 27113 : *mapped_address = NULL;
237 : 27113 : *mapped_size = 0;
238 [ + + - + ]: 27113 : if (op == DSM_OP_DESTROY && shm_unlink(name) != 0)
239 : : {
4540 rhaas@postgresql.org 240 [ # # ]:UBC 0 : ereport(elevel,
241 : : (errcode_for_dynamic_shared_memory(),
242 : : errmsg("could not remove shared memory segment \"%s\": %m",
243 : : name)));
244 : 0 : return false;
245 : : }
4540 rhaas@postgresql.org 246 :CBC 27113 : return true;
247 : : }
248 : :
249 : : /*
250 : : * Create new segment or open an existing one for attach.
251 : : *
252 : : * Even though we will close the FD before returning, it seems desirable
253 : : * to use Reserve/ReleaseExternalFD, to reduce the probability of EMFILE
254 : : * failure. The fact that we won't hold the FD open long justifies using
255 : : * ReserveExternalFD rather than AcquireExternalFD, though.
256 : : */
2211 tgl@sss.pgh.pa.us 257 : 25147 : ReserveExternalFD();
258 : :
4540 rhaas@postgresql.org 259 [ + + ]: 25147 : flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
2899 sfrost@snowman.net 260 [ - + ]: 25147 : if ((fd = shm_open(name, flags, PG_FILE_MODE_OWNER)) == -1)
261 : : {
2211 tgl@sss.pgh.pa.us 262 :UBC 0 : ReleaseExternalFD();
1353 tmunro@postgresql.or 263 [ # # # # ]: 0 : if (op == DSM_OP_ATTACH || errno != EEXIST)
4540 rhaas@postgresql.org 264 [ # # ]: 0 : ereport(elevel,
265 : : (errcode_for_dynamic_shared_memory(),
266 : : errmsg("could not open shared memory segment \"%s\": %m",
267 : : name)));
268 : 0 : return false;
269 : : }
270 : :
271 : : /*
272 : : * If we're attaching the segment, determine the current size; if we are
273 : : * creating the segment, set the size to the requested value.
274 : : */
4540 rhaas@postgresql.org 275 [ + + ]:CBC 25147 : if (op == DSM_OP_ATTACH)
276 : : {
277 : : struct stat st;
278 : :
279 [ - + ]: 22136 : if (fstat(fd, &st) != 0)
280 : : {
281 : : int save_errno;
282 : :
283 : : /* Back out what's already been done. */
4540 rhaas@postgresql.org 284 :UBC 0 : save_errno = errno;
285 : 0 : close(fd);
2211 tgl@sss.pgh.pa.us 286 : 0 : ReleaseExternalFD();
4540 rhaas@postgresql.org 287 : 0 : errno = save_errno;
288 : :
289 [ # # ]: 0 : ereport(elevel,
290 : : (errcode_for_dynamic_shared_memory(),
291 : : errmsg("could not stat shared memory segment \"%s\": %m",
292 : : name)));
293 : 0 : return false;
294 : : }
4540 rhaas@postgresql.org 295 :CBC 22136 : request_size = st.st_size;
296 : : }
2686 tmunro@postgresql.or 297 [ - + ]: 3011 : else if (dsm_impl_posix_resize(fd, request_size) != 0)
298 : : {
299 : : int save_errno;
300 : :
301 : : /* Back out what's already been done. */
4540 rhaas@postgresql.org 302 :UBC 0 : save_errno = errno;
303 : 0 : close(fd);
2211 tgl@sss.pgh.pa.us 304 : 0 : ReleaseExternalFD();
2686 tmunro@postgresql.or 305 : 0 : shm_unlink(name);
4540 rhaas@postgresql.org 306 : 0 : errno = save_errno;
307 : :
308 [ # # ]: 0 : ereport(elevel,
309 : : (errcode_for_dynamic_shared_memory(),
310 : : errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
311 : : name, request_size)));
312 : 0 : return false;
313 : : }
314 : :
315 : : /* Map it. */
4331 bruce@momjian.us 316 :CBC 25147 : address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
317 : : MAP_SHARED | MAP_HASSEMAPHORE | MAP_NOSYNC, fd, 0);
4540 rhaas@postgresql.org 318 [ - + ]: 25147 : if (address == MAP_FAILED)
319 : : {
320 : : int save_errno;
321 : :
322 : : /* Back out what's already been done. */
4540 rhaas@postgresql.org 323 :UBC 0 : save_errno = errno;
324 : 0 : close(fd);
2211 tgl@sss.pgh.pa.us 325 : 0 : ReleaseExternalFD();
4540 rhaas@postgresql.org 326 [ # # ]: 0 : if (op == DSM_OP_CREATE)
327 : 0 : shm_unlink(name);
328 : 0 : errno = save_errno;
329 : :
330 [ # # ]: 0 : ereport(elevel,
331 : : (errcode_for_dynamic_shared_memory(),
332 : : errmsg("could not map shared memory segment \"%s\": %m",
333 : : name)));
334 : 0 : return false;
335 : : }
4540 rhaas@postgresql.org 336 :CBC 25147 : *mapped_address = address;
337 : 25147 : *mapped_size = request_size;
338 : 25147 : close(fd);
2211 tgl@sss.pgh.pa.us 339 : 25147 : ReleaseExternalFD();
340 : :
4540 rhaas@postgresql.org 341 : 25147 : return true;
342 : : }
343 : :
344 : : /*
345 : : * Set the size of a virtual memory region associated with a file descriptor.
346 : : * If necessary, also ensure that virtual memory is actually allocated by the
347 : : * operating system, to avoid nasty surprises later.
348 : : *
349 : : * Returns non-zero if either truncation or allocation fails, and sets errno.
350 : : */
351 : : static int
3093 tgl@sss.pgh.pa.us 352 : 3011 : dsm_impl_posix_resize(int fd, off_t size)
353 : : {
354 : : int rc;
355 : : int save_errno;
356 : : sigset_t save_sigmask;
357 : :
358 : : /*
359 : : * Block all blockable signals, except SIGQUIT. posix_fallocate() can run
360 : : * for quite a long time, and is an all-or-nothing operation. If we
361 : : * allowed SIGUSR1 to interrupt us repeatedly (for example, due to
362 : : * recovery conflicts), the retry loop might never succeed.
363 : : */
1339 tmunro@postgresql.or 364 [ + + ]: 3011 : if (IsUnderPostmaster)
1338 365 : 1740 : sigprocmask(SIG_SETMASK, &BlockSig, &save_sigmask);
366 : :
1340 367 : 3011 : pgstat_report_wait_start(WAIT_EVENT_DSM_ALLOCATE);
368 : : #if defined(HAVE_POSIX_FALLOCATE) && defined(__linux__)
369 : :
370 : : /*
371 : : * On Linux, a shm_open fd is backed by a tmpfs file. If we were to use
372 : : * ftruncate, the file would contain a hole. Accessing memory backed by a
373 : : * hole causes tmpfs to allocate pages, which fails with SIGBUS if there
374 : : * is no more tmpfs space available. So we ask tmpfs to allocate pages
375 : : * here, so we can fail gracefully with ENOSPC now rather than risking
376 : : * SIGBUS later.
377 : : *
378 : : * We still use a traditional EINTR retry loop to handle SIGCONT.
379 : : * posix_fallocate() doesn't restart automatically, and we don't want this
380 : : * to fail if you attach a debugger.
381 : : */
382 : : do
383 : : {
384 : 3011 : rc = posix_fallocate(fd, 0, size);
385 [ - + ]: 3011 : } while (rc == EINTR);
386 : :
387 : : /*
388 : : * The caller expects errno to be set, but posix_fallocate() doesn't set
389 : : * it. Instead it returns error numbers directly. So set errno, even
390 : : * though we'll also return rc to indicate success or failure.
391 : : */
392 : 3011 : errno = rc;
393 : : #else
394 : : /* Extend the file to the requested size. */
395 : : do
396 : : {
397 : : rc = ftruncate(fd, size);
398 : : } while (rc < 0 && errno == EINTR);
399 : : #endif
400 : 3011 : pgstat_report_wait_end();
401 : :
1339 402 [ + + ]: 3011 : if (IsUnderPostmaster)
403 : : {
404 : 1740 : save_errno = errno;
1338 405 : 1740 : sigprocmask(SIG_SETMASK, &save_sigmask, NULL);
1339 406 : 1740 : errno = save_errno;
407 : : }
408 : :
3093 tgl@sss.pgh.pa.us 409 : 3011 : return rc;
410 : : }
411 : :
412 : : #endif /* USE_DSM_POSIX */
413 : :
414 : : #ifdef USE_DSM_SYSV
415 : : /*
416 : : * Operating system primitives to support System V shared memory.
417 : : *
418 : : * System V shared memory segments are manipulated using shmget(), shmat(),
419 : : * shmdt(), and shmctl(). As the default allocation limits for System V
420 : : * shared memory are usually quite low, the POSIX facilities may be
421 : : * preferable; but those are not supported everywhere.
422 : : */
423 : : static bool
4521 rhaas@postgresql.org 424 :UBC 0 : dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
425 : : void **impl_private, void **mapped_address, Size *mapped_size,
426 : : int elevel)
427 : : {
428 : : key_t key;
429 : : int ident;
430 : : char *address;
431 : : char name[64];
432 : : int *ident_cache;
433 : :
434 : : /*
435 : : * POSIX shared memory and mmap-based shared memory identify segments with
436 : : * names. To avoid needless error message variation, we use the handle as
437 : : * the name.
438 : : */
4540 439 : 0 : snprintf(name, 64, "%u", handle);
440 : :
441 : : /*
442 : : * The System V shared memory namespace is very restricted; names are of
443 : : * type key_t, which is expected to be some sort of integer data type, but
444 : : * not necessarily the same one as dsm_handle. Since we use dsm_handle to
445 : : * identify shared memory segments across processes, this might seem like
446 : : * a problem, but it's really not. If dsm_handle is bigger than key_t,
447 : : * the cast below might truncate away some bits from the handle the
448 : : * user-provided, but it'll truncate exactly the same bits away in exactly
449 : : * the same fashion every time we use that handle, which is all that
450 : : * really matters. Conversely, if dsm_handle is smaller than key_t, we
451 : : * won't use the full range of available key space, but that's no big deal
452 : : * either.
453 : : *
454 : : * We do make sure that the key isn't negative, because that might not be
455 : : * portable.
456 : : */
457 : 0 : key = (key_t) handle;
4331 bruce@momjian.us 458 [ # # ]: 0 : if (key < 1) /* avoid compiler warning if type is unsigned */
4540 rhaas@postgresql.org 459 : 0 : key = -key;
460 : :
461 : : /*
462 : : * There's one special key, IPC_PRIVATE, which can't be used. If we end
463 : : * up with that value by chance during a create operation, just pretend it
464 : : * already exists, so that caller will retry. If we run into it anywhere
465 : : * else, the caller has passed a handle that doesn't correspond to
466 : : * anything we ever created, which should not happen.
467 : : */
468 [ # # ]: 0 : if (key == IPC_PRIVATE)
469 : : {
470 [ # # ]: 0 : if (op != DSM_OP_CREATE)
471 [ # # ]: 0 : elog(DEBUG4, "System V shared memory key may not be IPC_PRIVATE");
472 : 0 : errno = EEXIST;
473 : 0 : return false;
474 : : }
475 : :
476 : : /*
477 : : * Before we can do anything with a shared memory segment, we have to map
478 : : * the shared memory key to a shared memory identifier using shmget(). To
479 : : * avoid repeated lookups, we store the key using impl_private.
480 : : */
481 [ # # ]: 0 : if (*impl_private != NULL)
482 : : {
483 : 0 : ident_cache = *impl_private;
484 : 0 : ident = *ident_cache;
485 : : }
486 : : else
487 : : {
4331 bruce@momjian.us 488 : 0 : int flags = IPCProtection;
489 : : size_t segsize;
490 : :
491 : : /*
492 : : * Allocate the memory BEFORE acquiring the resource, so that we don't
493 : : * leak the resource if memory allocation fails.
494 : : */
4540 rhaas@postgresql.org 495 : 0 : ident_cache = MemoryContextAlloc(TopMemoryContext, sizeof(int));
496 : :
497 : : /*
498 : : * When using shmget to find an existing segment, we must pass the
499 : : * size as 0. Passing a non-zero size which is greater than the
500 : : * actual size will result in EINVAL.
501 : : */
502 : 0 : segsize = 0;
503 : :
504 [ # # ]: 0 : if (op == DSM_OP_CREATE)
505 : : {
506 : 0 : flags |= IPC_CREAT | IPC_EXCL;
507 : 0 : segsize = request_size;
508 : : }
509 : :
510 [ # # ]: 0 : if ((ident = shmget(key, segsize, flags)) == -1)
511 : : {
1353 tmunro@postgresql.or 512 [ # # # # ]: 0 : if (op == DSM_OP_ATTACH || errno != EEXIST)
513 : : {
4331 bruce@momjian.us 514 : 0 : int save_errno = errno;
515 : :
4540 rhaas@postgresql.org 516 : 0 : pfree(ident_cache);
517 : 0 : errno = save_errno;
518 [ # # ]: 0 : ereport(elevel,
519 : : (errcode_for_dynamic_shared_memory(),
520 : : errmsg("could not get shared memory segment: %m")));
521 : : }
522 : 0 : return false;
523 : : }
524 : :
525 : 0 : *ident_cache = ident;
526 : 0 : *impl_private = ident_cache;
527 : : }
528 : :
529 : : /* Handle teardown cases. */
530 [ # # # # ]: 0 : if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
531 : : {
532 : 0 : pfree(ident_cache);
533 : 0 : *impl_private = NULL;
534 [ # # # # ]: 0 : if (*mapped_address != NULL && shmdt(*mapped_address) != 0)
535 : : {
536 [ # # ]: 0 : ereport(elevel,
537 : : (errcode_for_dynamic_shared_memory(),
538 : : errmsg("could not unmap shared memory segment \"%s\": %m",
539 : : name)));
540 : 0 : return false;
541 : : }
542 : 0 : *mapped_address = NULL;
543 : 0 : *mapped_size = 0;
544 [ # # # # ]: 0 : if (op == DSM_OP_DESTROY && shmctl(ident, IPC_RMID, NULL) < 0)
545 : : {
546 [ # # ]: 0 : ereport(elevel,
547 : : (errcode_for_dynamic_shared_memory(),
548 : : errmsg("could not remove shared memory segment \"%s\": %m",
549 : : name)));
550 : 0 : return false;
551 : : }
552 : 0 : return true;
553 : : }
554 : :
555 : : /* If we're attaching it, we must use IPC_STAT to determine the size. */
556 [ # # ]: 0 : if (op == DSM_OP_ATTACH)
557 : : {
558 : : struct shmid_ds shm;
559 : :
560 [ # # ]: 0 : if (shmctl(ident, IPC_STAT, &shm) != 0)
561 : : {
562 [ # # ]: 0 : ereport(elevel,
563 : : (errcode_for_dynamic_shared_memory(),
564 : : errmsg("could not stat shared memory segment \"%s\": %m",
565 : : name)));
566 : 0 : return false;
567 : : }
568 : 0 : request_size = shm.shm_segsz;
569 : : }
570 : :
571 : : /* Map it. */
572 : 0 : address = shmat(ident, NULL, PG_SHMAT_FLAGS);
573 [ # # ]: 0 : if (address == (void *) -1)
574 : : {
575 : : int save_errno;
576 : :
577 : : /* Back out what's already been done. */
578 : 0 : save_errno = errno;
579 [ # # ]: 0 : if (op == DSM_OP_CREATE)
580 : 0 : shmctl(ident, IPC_RMID, NULL);
581 : 0 : errno = save_errno;
582 : :
583 [ # # ]: 0 : ereport(elevel,
584 : : (errcode_for_dynamic_shared_memory(),
585 : : errmsg("could not map shared memory segment \"%s\": %m",
586 : : name)));
587 : 0 : return false;
588 : : }
589 : 0 : *mapped_address = address;
590 : 0 : *mapped_size = request_size;
591 : :
592 : 0 : return true;
593 : : }
594 : : #endif
595 : :
596 : : #ifdef USE_DSM_WINDOWS
597 : : /*
598 : : * Operating system primitives to support Windows shared memory.
599 : : *
600 : : * Windows shared memory implementation is done using file mapping
601 : : * which can be backed by either physical file or system paging file.
602 : : * Current implementation uses system paging file as other effects
603 : : * like performance are not clear for physical file and it is used in similar
604 : : * way for main shared memory in windows.
605 : : *
606 : : * A memory mapping object is a kernel object - they always get deleted when
607 : : * the last reference to them goes away, either explicitly via a CloseHandle or
608 : : * when the process containing the reference exits.
609 : : */
610 : : static bool
611 : : dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
612 : : void **impl_private, void **mapped_address,
613 : : Size *mapped_size, int elevel)
614 : : {
615 : : char *address;
616 : : HANDLE hmap;
617 : : char name[64];
618 : : MEMORY_BASIC_INFORMATION info;
619 : :
620 : : /*
621 : : * Storing the shared memory segment in the Global\ namespace, can allow
622 : : * any process running in any session to access that file mapping object
623 : : * provided that the caller has the required access rights. But to avoid
624 : : * issues faced in main shared memory, we are using the naming convention
625 : : * similar to main shared memory. We can change here once issue mentioned
626 : : * in GetSharedMemName is resolved.
627 : : */
628 : : snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
629 : :
630 : : /*
631 : : * Handle teardown cases. Since Windows automatically destroys the object
632 : : * when no references remain, we can treat it the same as detach.
633 : : */
634 : : if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
635 : : {
636 : : if (*mapped_address != NULL
637 : : && UnmapViewOfFile(*mapped_address) == 0)
638 : : {
639 : : _dosmaperr(GetLastError());
640 : : ereport(elevel,
641 : : (errcode_for_dynamic_shared_memory(),
642 : : errmsg("could not unmap shared memory segment \"%s\": %m",
643 : : name)));
644 : : return false;
645 : : }
646 : : if (*impl_private != NULL
647 : : && CloseHandle(*impl_private) == 0)
648 : : {
649 : : _dosmaperr(GetLastError());
650 : : ereport(elevel,
651 : : (errcode_for_dynamic_shared_memory(),
652 : : errmsg("could not remove shared memory segment \"%s\": %m",
653 : : name)));
654 : : return false;
655 : : }
656 : :
657 : : *impl_private = NULL;
658 : : *mapped_address = NULL;
659 : : *mapped_size = 0;
660 : : return true;
661 : : }
662 : :
663 : : /* Create new segment or open an existing one for attach. */
664 : : if (op == DSM_OP_CREATE)
665 : : {
666 : : DWORD size_high;
667 : : DWORD size_low;
668 : : DWORD errcode;
669 : :
670 : : /* Shifts >= the width of the type are undefined. */
671 : : #ifdef _WIN64
672 : : size_high = request_size >> 32;
673 : : #else
674 : : size_high = 0;
675 : : #endif
676 : : size_low = (DWORD) request_size;
677 : :
678 : : /* CreateFileMapping might not clear the error code on success */
679 : : SetLastError(0);
680 : :
681 : : hmap = CreateFileMapping(INVALID_HANDLE_VALUE, /* Use the pagefile */
682 : : NULL, /* Default security attrs */
683 : : PAGE_READWRITE, /* Memory is read/write */
684 : : size_high, /* Upper 32 bits of size */
685 : : size_low, /* Lower 32 bits of size */
686 : : name);
687 : :
688 : : errcode = GetLastError();
689 : : if (errcode == ERROR_ALREADY_EXISTS || errcode == ERROR_ACCESS_DENIED)
690 : : {
691 : : /*
692 : : * On Windows, when the segment already exists, a handle for the
693 : : * existing segment is returned. We must close it before
694 : : * returning. However, if the existing segment is created by a
695 : : * service, then it returns ERROR_ACCESS_DENIED. We don't do
696 : : * _dosmaperr here, so errno won't be modified.
697 : : */
698 : : if (hmap)
699 : : CloseHandle(hmap);
700 : : return false;
701 : : }
702 : :
703 : : if (!hmap)
704 : : {
705 : : _dosmaperr(errcode);
706 : : ereport(elevel,
707 : : (errcode_for_dynamic_shared_memory(),
708 : : errmsg("could not create shared memory segment \"%s\": %m",
709 : : name)));
710 : : return false;
711 : : }
712 : : }
713 : : else
714 : : {
715 : : hmap = OpenFileMapping(FILE_MAP_WRITE | FILE_MAP_READ,
716 : : FALSE, /* do not inherit the name */
717 : : name); /* name of mapping object */
718 : : if (!hmap)
719 : : {
720 : : _dosmaperr(GetLastError());
721 : : ereport(elevel,
722 : : (errcode_for_dynamic_shared_memory(),
723 : : errmsg("could not open shared memory segment \"%s\": %m",
724 : : name)));
725 : : return false;
726 : : }
727 : : }
728 : :
729 : : /* Map it. */
730 : : address = MapViewOfFile(hmap, FILE_MAP_WRITE | FILE_MAP_READ,
731 : : 0, 0, 0);
732 : : if (!address)
733 : : {
734 : : int save_errno;
735 : :
736 : : _dosmaperr(GetLastError());
737 : : /* Back out what's already been done. */
738 : : save_errno = errno;
739 : : CloseHandle(hmap);
740 : : errno = save_errno;
741 : :
742 : : ereport(elevel,
743 : : (errcode_for_dynamic_shared_memory(),
744 : : errmsg("could not map shared memory segment \"%s\": %m",
745 : : name)));
746 : : return false;
747 : : }
748 : :
749 : : /*
750 : : * VirtualQuery gives size in page_size units, which is 4K for Windows. We
751 : : * need size only when we are attaching, but it's better to get the size
752 : : * when creating new segment to keep size consistent both for
753 : : * DSM_OP_CREATE and DSM_OP_ATTACH.
754 : : */
755 : : if (VirtualQuery(address, &info, sizeof(info)) == 0)
756 : : {
757 : : int save_errno;
758 : :
759 : : _dosmaperr(GetLastError());
760 : : /* Back out what's already been done. */
761 : : save_errno = errno;
762 : : UnmapViewOfFile(address);
763 : : CloseHandle(hmap);
764 : : errno = save_errno;
765 : :
766 : : ereport(elevel,
767 : : (errcode_for_dynamic_shared_memory(),
768 : : errmsg("could not stat shared memory segment \"%s\": %m",
769 : : name)));
770 : : return false;
771 : : }
772 : :
773 : : *mapped_address = address;
774 : : *mapped_size = info.RegionSize;
775 : : *impl_private = hmap;
776 : :
777 : : return true;
778 : : }
779 : : #endif
780 : :
781 : : #ifdef USE_DSM_MMAP
782 : : /*
783 : : * Operating system primitives to support mmap-based shared memory.
784 : : *
785 : : * Calling this "shared memory" is somewhat of a misnomer, because what
786 : : * we're really doing is creating a bunch of files and mapping them into
787 : : * our address space. The operating system may feel obliged to
788 : : * synchronize the contents to disk even if nothing is being paged out,
789 : : * which will not serve us well. The user can relocate the pg_dynshmem
790 : : * directory to a ramdisk to avoid this problem, if available.
791 : : */
792 : : static bool
4521 793 : 0 : dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
794 : : void **impl_private, void **mapped_address, Size *mapped_size,
795 : : int elevel)
796 : : {
797 : : char name[64];
798 : : int flags;
799 : : int fd;
800 : : char *address;
801 : :
4540 802 : 0 : snprintf(name, 64, PG_DYNSHMEM_DIR "/" PG_DYNSHMEM_MMAP_FILE_PREFIX "%u",
803 : : handle);
804 : :
805 : : /* Handle teardown cases. */
806 [ # # # # ]: 0 : if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
807 : : {
808 [ # # ]: 0 : if (*mapped_address != NULL
809 [ # # ]: 0 : && munmap(*mapped_address, *mapped_size) != 0)
810 : : {
811 [ # # ]: 0 : ereport(elevel,
812 : : (errcode_for_dynamic_shared_memory(),
813 : : errmsg("could not unmap shared memory segment \"%s\": %m",
814 : : name)));
815 : 0 : return false;
816 : : }
817 : 0 : *mapped_address = NULL;
818 : 0 : *mapped_size = 0;
819 [ # # # # ]: 0 : if (op == DSM_OP_DESTROY && unlink(name) != 0)
820 : : {
821 [ # # ]: 0 : ereport(elevel,
822 : : (errcode_for_dynamic_shared_memory(),
823 : : errmsg("could not remove shared memory segment \"%s\": %m",
824 : : name)));
825 : 0 : return false;
826 : : }
827 : 0 : return true;
828 : : }
829 : :
830 : : /* Create new segment or open an existing one for attach. */
831 [ # # ]: 0 : flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
3095 peter_e@gmx.net 832 [ # # ]: 0 : if ((fd = OpenTransientFile(name, flags)) == -1)
833 : : {
1353 tmunro@postgresql.or 834 [ # # # # ]: 0 : if (op == DSM_OP_ATTACH || errno != EEXIST)
4540 rhaas@postgresql.org 835 [ # # ]: 0 : ereport(elevel,
836 : : (errcode_for_dynamic_shared_memory(),
837 : : errmsg("could not open shared memory segment \"%s\": %m",
838 : : name)));
839 : 0 : return false;
840 : : }
841 : :
842 : : /*
843 : : * If we're attaching the segment, determine the current size; if we are
844 : : * creating the segment, set the size to the requested value.
845 : : */
846 [ # # ]: 0 : if (op == DSM_OP_ATTACH)
847 : : {
848 : : struct stat st;
849 : :
850 [ # # ]: 0 : if (fstat(fd, &st) != 0)
851 : : {
852 : : int save_errno;
853 : :
854 : : /* Back out what's already been done. */
855 : 0 : save_errno = errno;
856 : 0 : CloseTransientFile(fd);
857 : 0 : errno = save_errno;
858 : :
859 [ # # ]: 0 : ereport(elevel,
860 : : (errcode_for_dynamic_shared_memory(),
861 : : errmsg("could not stat shared memory segment \"%s\": %m",
862 : : name)));
863 : 0 : return false;
864 : : }
865 : 0 : request_size = st.st_size;
866 : : }
867 : : else
868 : : {
869 : : /*
870 : : * Allocate a buffer full of zeros.
871 : : *
872 : : * Note: palloc zbuffer, instead of just using a local char array, to
873 : : * ensure it is reasonably well-aligned; this may save a few cycles
874 : : * transferring data to the kernel.
875 : : */
4331 bruce@momjian.us 876 : 0 : char *zbuffer = (char *) palloc0(ZBUFFER_SIZE);
761 heikki.linnakangas@i 877 : 0 : Size remaining = request_size;
4331 bruce@momjian.us 878 : 0 : bool success = true;
879 : :
880 : : /*
881 : : * Zero-fill the file. We have to do this the hard way to ensure that
882 : : * all the file space has really been allocated, so that we don't
883 : : * later seg fault when accessing the memory mapping. This is pretty
884 : : * pessimal.
885 : : */
4540 rhaas@postgresql.org 886 [ # # # # ]: 0 : while (success && remaining > 0)
887 : : {
4331 bruce@momjian.us 888 : 0 : Size goal = remaining;
889 : :
4540 rhaas@postgresql.org 890 [ # # ]: 0 : if (goal > ZBUFFER_SIZE)
891 : 0 : goal = ZBUFFER_SIZE;
3284 892 : 0 : pgstat_report_wait_start(WAIT_EVENT_DSM_FILL_ZERO_WRITE);
4540 893 [ # # ]: 0 : if (write(fd, zbuffer, goal) == goal)
894 : 0 : remaining -= goal;
895 : : else
896 : 0 : success = false;
3284 897 : 0 : pgstat_report_wait_end();
898 : : }
899 : :
4540 900 [ # # ]: 0 : if (!success)
901 : : {
902 : : int save_errno;
903 : :
904 : : /* Back out what's already been done. */
905 : 0 : save_errno = errno;
906 : 0 : CloseTransientFile(fd);
2686 tmunro@postgresql.or 907 : 0 : unlink(name);
4540 rhaas@postgresql.org 908 [ # # ]: 0 : errno = save_errno ? save_errno : ENOSPC;
909 : :
910 [ # # ]: 0 : ereport(elevel,
911 : : (errcode_for_dynamic_shared_memory(),
912 : : errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
913 : : name, request_size)));
914 : 0 : return false;
915 : : }
916 : : }
917 : :
918 : : /* Map it. */
4331 bruce@momjian.us 919 : 0 : address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
920 : : MAP_SHARED | MAP_HASSEMAPHORE | MAP_NOSYNC, fd, 0);
4540 rhaas@postgresql.org 921 [ # # ]: 0 : if (address == MAP_FAILED)
922 : : {
923 : : int save_errno;
924 : :
925 : : /* Back out what's already been done. */
926 : 0 : save_errno = errno;
927 : 0 : CloseTransientFile(fd);
928 [ # # ]: 0 : if (op == DSM_OP_CREATE)
929 : 0 : unlink(name);
930 : 0 : errno = save_errno;
931 : :
932 [ # # ]: 0 : ereport(elevel,
933 : : (errcode_for_dynamic_shared_memory(),
934 : : errmsg("could not map shared memory segment \"%s\": %m",
935 : : name)));
936 : 0 : return false;
937 : : }
938 : 0 : *mapped_address = address;
939 : 0 : *mapped_size = request_size;
940 : :
2444 peter@eisentraut.org 941 [ # # ]: 0 : if (CloseTransientFile(fd) != 0)
942 : : {
2563 michael@paquier.xyz 943 [ # # ]: 0 : ereport(elevel,
944 : : (errcode_for_file_access(),
945 : : errmsg("could not close shared memory segment \"%s\": %m",
946 : : name)));
947 : 0 : return false;
948 : : }
949 : :
4540 rhaas@postgresql.org 950 : 0 : return true;
951 : : }
952 : : #endif
953 : :
954 : : /*
955 : : * Implementation-specific actions that must be performed when a segment is to
956 : : * be preserved even when no backend has it attached.
957 : : *
958 : : * Except on Windows, we don't need to do anything at all. But since Windows
959 : : * cleans up segments automatically when no references remain, we duplicate
960 : : * the segment handle into the postmaster process. The postmaster needn't
961 : : * do anything to receive the handle; Windows transfers it automatically.
962 : : */
963 : : void
3491 rhaas@postgresql.org 964 :CBC 1256 : dsm_impl_pin_segment(dsm_handle handle, void *impl_private,
965 : : void **impl_private_pm_handle)
966 : : {
4388 967 : 1256 : switch (dynamic_shared_memory_type)
968 : : {
969 : : #ifdef USE_DSM_WINDOWS
970 : : case DSM_IMPL_WINDOWS:
971 : : if (IsUnderPostmaster)
972 : : {
973 : : HANDLE hmap;
974 : :
975 : : if (!DuplicateHandle(GetCurrentProcess(), impl_private,
976 : : PostmasterHandle, &hmap, 0, FALSE,
977 : : DUPLICATE_SAME_ACCESS))
978 : : {
979 : : char name[64];
980 : :
981 : : snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
982 : : _dosmaperr(GetLastError());
983 : : ereport(ERROR,
984 : : (errcode_for_dynamic_shared_memory(),
985 : : errmsg("could not duplicate handle for \"%s\": %m",
986 : : name)));
987 : : }
988 : :
989 : : /*
990 : : * Here, we remember the handle that we created in the
991 : : * postmaster process. This handle isn't actually usable in
992 : : * any process other than the postmaster, but that doesn't
993 : : * matter. We're just holding onto it so that, if the segment
994 : : * is unpinned, dsm_impl_unpin_segment can close it.
995 : : */
996 : : *impl_private_pm_handle = hmap;
997 : : }
998 : : break;
999 : : #endif
1000 : : default:
3491 1001 : 1256 : break;
1002 : : }
1003 : 1256 : }
1004 : :
1005 : : /*
1006 : : * Implementation-specific actions that must be performed when a segment is no
1007 : : * longer to be preserved, so that it will be cleaned up when all backends
1008 : : * have detached from it.
1009 : : *
1010 : : * Except on Windows, we don't need to do anything at all. For Windows, we
1011 : : * close the extra handle that dsm_impl_pin_segment created in the
1012 : : * postmaster's process space.
1013 : : */
1014 : : void
1015 : 192 : dsm_impl_unpin_segment(dsm_handle handle, void **impl_private)
1016 : : {
1017 : 192 : switch (dynamic_shared_memory_type)
1018 : : {
1019 : : #ifdef USE_DSM_WINDOWS
1020 : : case DSM_IMPL_WINDOWS:
1021 : : if (IsUnderPostmaster)
1022 : : {
1023 : : if (*impl_private &&
1024 : : !DuplicateHandle(PostmasterHandle, *impl_private,
1025 : : NULL, NULL, 0, FALSE,
1026 : : DUPLICATE_CLOSE_SOURCE))
1027 : : {
1028 : : char name[64];
1029 : :
1030 : : snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
1031 : : _dosmaperr(GetLastError());
1032 : : ereport(ERROR,
1033 : : (errcode_for_dynamic_shared_memory(),
1034 : : errmsg("could not duplicate handle for \"%s\": %m",
1035 : : name)));
1036 : : }
1037 : :
1038 : : *impl_private = NULL;
1039 : : }
1040 : : break;
1041 : : #endif
1042 : : default:
4388 1043 : 192 : break;
1044 : : }
1045 : 192 : }
1046 : :
1047 : : static int
3865 andres@anarazel.de 1048 :UBC 0 : errcode_for_dynamic_shared_memory(void)
1049 : : {
4540 rhaas@postgresql.org 1050 [ # # # # ]: 0 : if (errno == EFBIG || errno == ENOMEM)
2181 tgl@sss.pgh.pa.us 1051 : 0 : return errcode(ERRCODE_OUT_OF_MEMORY);
1052 : : else
1053 : 0 : return errcode_for_file_access();
1054 : : }
|