Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * pmsignal.c
4 : : * routines for signaling between the postmaster and its child processes
5 : : *
6 : : *
7 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
8 : : * Portions Copyright (c) 1994, Regents of the University of California
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/storage/ipc/pmsignal.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : #include "postgres.h"
16 : :
17 : : #include <signal.h>
18 : : #include <unistd.h>
19 : :
20 : : #ifdef HAVE_SYS_PRCTL_H
21 : : #include <sys/prctl.h>
22 : : #endif
23 : :
24 : : #include "miscadmin.h"
25 : : #include "postmaster/postmaster.h"
26 : : #include "replication/walsender.h"
27 : : #include "storage/ipc.h"
28 : : #include "storage/pmsignal.h"
29 : : #include "storage/shmem.h"
30 : : #include "storage/subsystems.h"
31 : : #include "utils/memutils.h"
32 : :
33 : :
34 : : /*
35 : : * The postmaster is signaled by its children by sending SIGUSR1. The
36 : : * specific reason is communicated via flags in shared memory. We keep
37 : : * a boolean flag for each possible "reason", so that different reasons
38 : : * can be signaled by different backends at the same time. (However,
39 : : * if the same reason is signaled more than once simultaneously, the
40 : : * postmaster will observe it only once.)
41 : : *
42 : : * The flags are actually declared as "volatile sig_atomic_t" for maximum
43 : : * portability. This should ensure that loads and stores of the flag
44 : : * values are atomic, allowing us to dispense with any explicit locking.
45 : : *
46 : : * In addition to the per-reason flags, we store a set of per-child-process
47 : : * flags that are currently used only for detecting whether a backend has
48 : : * exited without performing proper shutdown. The per-child-process flags
49 : : * have three possible states: UNUSED, ASSIGNED, ACTIVE. An UNUSED slot is
50 : : * available for assignment. An ASSIGNED slot is associated with a postmaster
51 : : * child process, but either the process has not touched shared memory yet, or
52 : : * it has successfully cleaned up after itself. An ACTIVE slot means the
53 : : * process is actively using shared memory. The slots are assigned to child
54 : : * processes by postmaster, and pmchild.c is responsible for tracking which
55 : : * one goes with which PID.
56 : : *
57 : : * Actually there is a fourth state, WALSENDER. This is just like ACTIVE,
58 : : * but carries the extra information that the child is a WAL sender.
59 : : * WAL senders too start in ACTIVE state, but switch to WALSENDER once they
60 : : * start streaming the WAL (and they never go back to ACTIVE after that).
61 : : *
62 : : * We also have a shared-memory field that is used for communication in
63 : : * the opposite direction, from postmaster to children: it tells why the
64 : : * postmaster has broadcasted SIGQUIT signals, if indeed it has done so.
65 : : */
66 : :
67 : : #define PM_CHILD_UNUSED 0 /* these values must fit in sig_atomic_t */
68 : : #define PM_CHILD_ASSIGNED 1
69 : : #define PM_CHILD_ACTIVE 2
70 : : #define PM_CHILD_WALSENDER 3
71 : :
72 : : /* "typedef struct PMSignalData PMSignalData" appears in pmsignal.h */
73 : : struct PMSignalData
74 : : {
75 : : /* per-reason flags for signaling the postmaster */
76 : : sig_atomic_t PMSignalFlags[NUM_PMSIGNALS];
77 : : /* global flags for signals from postmaster to children */
78 : : QuitSignalReason sigquit_reason; /* why SIGQUIT was sent */
79 : : /* per-child-process flags */
80 : : int num_child_flags; /* # of entries in PMChildFlags[] */
81 : : sig_atomic_t PMChildFlags[FLEXIBLE_ARRAY_MEMBER];
82 : : };
83 : :
84 : : /* PMSignalState pointer is valid in both postmaster and child processes */
85 : : NON_EXEC_STATIC volatile PMSignalData *PMSignalState = NULL;
86 : :
87 : : static void PMSignalShmemRequest(void *);
88 : : static void PMSignalShmemInit(void *);
89 : :
90 : : const ShmemCallbacks PMSignalShmemCallbacks = {
91 : : .request_fn = PMSignalShmemRequest,
92 : : .init_fn = PMSignalShmemInit,
93 : : };
94 : :
95 : : /*
96 : : * Local copy of PMSignalState->num_child_flags, only valid in the
97 : : * postmaster. Postmaster keeps a local copy so that it doesn't need to
98 : : * trust the value in shared memory.
99 : : */
100 : : static int num_child_flags;
101 : :
102 : : /*
103 : : * Signal handler to be notified if postmaster dies.
104 : : */
105 : : #ifdef USE_POSTMASTER_DEATH_SIGNAL
106 : : volatile sig_atomic_t postmaster_possibly_dead = false;
107 : :
108 : : static void
1329 tgl@sss.pgh.pa.us 109 :CBC 23 : postmaster_death_handler(SIGNAL_ARGS)
110 : : {
2855 tmunro@postgresql.or 111 : 23 : postmaster_possibly_dead = true;
112 : 23 : }
113 : :
114 : : /*
115 : : * The available signals depend on the OS. SIGUSR1 and SIGUSR2 are already
116 : : * used for other things, so choose another one.
117 : : *
118 : : * Currently, we assume that we can always find a signal to use. That
119 : : * seems like a reasonable assumption for all platforms that are modern
120 : : * enough to have a parent-death signaling mechanism.
121 : : */
122 : : #if defined(SIGINFO)
123 : : #define POSTMASTER_DEATH_SIGNAL SIGINFO
124 : : #elif defined(SIGPWR)
125 : : #define POSTMASTER_DEATH_SIGNAL SIGPWR
126 : : #else
127 : : #error "cannot find a signal to use for postmaster death"
128 : : #endif
129 : :
130 : : #endif /* USE_POSTMASTER_DEATH_SIGNAL */
131 : :
132 : : static void MarkPostmasterChildInactive(int code, Datum arg);
133 : :
134 : : /*
135 : : * PMSignalShmemRequest - Register pmsignal.c's shared memory needs
136 : : */
137 : : static void
29 heikki.linnakangas@i 138 :GNC 1244 : PMSignalShmemRequest(void *arg)
139 : : {
140 : : size_t size;
141 : :
142 : 1244 : num_child_flags = MaxLivePostmasterChildren();
143 : :
144 : 1244 : size = add_size(offsetof(PMSignalData, PMChildFlags),
145 : : mul_size(num_child_flags, sizeof(sig_atomic_t)));
146 : 1244 : ShmemRequestStruct(.name = "PMSignalState",
147 : : .size = size,
148 : : .ptr = (void **) &PMSignalState,
149 : : );
6209 tgl@sss.pgh.pa.us 150 :GIC 1244 : }
151 : :
152 : : static void
29 heikki.linnakangas@i 153 :GNC 1241 : PMSignalShmemInit(void *arg)
154 : : {
155 [ - + ]: 1241 : Assert(PMSignalState);
156 [ - + ]: 1241 : Assert(num_child_flags > 0);
157 : 1241 : PMSignalState->num_child_flags = num_child_flags;
8948 tgl@sss.pgh.pa.us 158 :CBC 1241 : }
159 : :
160 : : /*
161 : : * SendPostmasterSignal - signal the postmaster from a child process
162 : : */
163 : : void
164 : 5200 : SendPostmasterSignal(PMSignalReason reason)
165 : : {
166 : : /* If called in a standalone backend, do nothing */
167 [ - + ]: 5200 : if (!IsUnderPostmaster)
8948 tgl@sss.pgh.pa.us 168 :UBC 0 : return;
169 : : /* Atomically set the proper flag */
6209 tgl@sss.pgh.pa.us 170 :CBC 5200 : PMSignalState->PMSignalFlags[reason] = true;
171 : : /* Send signal to postmaster */
8122 neilc@samurai.com 172 : 5200 : kill(PostmasterPid, SIGUSR1);
173 : : }
174 : :
175 : : /*
176 : : * CheckPostmasterSignal - check to see if a particular reason has been
177 : : * signaled, and clear the signal flag. Should be called by postmaster
178 : : * after receiving SIGUSR1.
179 : : */
180 : : bool
8948 tgl@sss.pgh.pa.us 181 : 44641 : CheckPostmasterSignal(PMSignalReason reason)
182 : : {
183 : : /* Careful here --- don't clear flag if we haven't seen it set */
6209 184 [ + + ]: 44641 : if (PMSignalState->PMSignalFlags[reason])
185 : : {
186 : 4635 : PMSignalState->PMSignalFlags[reason] = false;
8948 187 : 4635 : return true;
188 : : }
189 : 40006 : return false;
190 : : }
191 : :
192 : : /*
193 : : * SetQuitSignalReason - broadcast the reason for a system shutdown.
194 : : * Should be called by postmaster before sending SIGQUIT to children.
195 : : *
196 : : * Note: in a crash-and-restart scenario, the "reason" field gets cleared
197 : : * as a part of rebuilding shared memory; the postmaster need not do it
198 : : * explicitly.
199 : : */
200 : : void
1958 201 : 356 : SetQuitSignalReason(QuitSignalReason reason)
202 : : {
203 : 356 : PMSignalState->sigquit_reason = reason;
204 : 356 : }
205 : :
206 : : /*
207 : : * GetQuitSignalReason - obtain the reason for a system shutdown.
208 : : * Called by child processes when they receive SIGQUIT.
209 : : * If the postmaster hasn't actually sent SIGQUIT, will return PMQUIT_NOT_SENT.
210 : : */
211 : : QuitSignalReason
1958 tgl@sss.pgh.pa.us 212 :UBC 0 : GetQuitSignalReason(void)
213 : : {
214 : : /* This is called in signal handlers, so be extra paranoid. */
215 [ # # # # ]: 0 : if (!IsUnderPostmaster || PMSignalState == NULL)
216 : 0 : return PMQUIT_NOT_SENT;
217 : 0 : return PMSignalState->sigquit_reason;
218 : : }
219 : :
220 : :
221 : : /*
222 : : * MarkPostmasterChildSlotAssigned - mark the given slot as ASSIGNED for a
223 : : * new postmaster child process.
224 : : *
225 : : * Only the postmaster is allowed to execute this routine, so we need no
226 : : * special locking.
227 : : */
228 : : void
537 heikki.linnakangas@i 229 :CBC 25585 : MarkPostmasterChildSlotAssigned(int slot)
230 : : {
231 [ + - - + ]: 25585 : Assert(slot > 0 && slot <= num_child_flags);
232 : 25585 : slot--;
233 : :
234 [ - + ]: 25585 : if (PMSignalState->PMChildFlags[slot] != PM_CHILD_UNUSED)
537 heikki.linnakangas@i 235 [ # # ]:UBC 0 : elog(FATAL, "postmaster child slot is already in use");
236 : :
537 heikki.linnakangas@i 237 :CBC 25585 : PMSignalState->PMChildFlags[slot] = PM_CHILD_ASSIGNED;
6209 tgl@sss.pgh.pa.us 238 : 25585 : }
239 : :
240 : : /*
241 : : * MarkPostmasterChildSlotUnassigned - release a slot after death of a
242 : : * postmaster child process. This must be called in the postmaster process.
243 : : *
244 : : * Returns true if the slot had been in ASSIGNED state (the expected case),
245 : : * false otherwise (implying that the child failed to clean itself up).
246 : : */
247 : : bool
537 heikki.linnakangas@i 248 : 25584 : MarkPostmasterChildSlotUnassigned(int slot)
249 : : {
250 : : bool result;
251 : :
252 [ + - - + ]: 25584 : Assert(slot > 0 && slot <= num_child_flags);
6209 tgl@sss.pgh.pa.us 253 : 25584 : slot--;
254 : :
255 : : /*
256 : : * Note: the slot state might already be unused, because the logic in
257 : : * postmaster.c is such that this might get called twice when a child
258 : : * crashes. So we don't try to Assert anything about the state.
259 : : */
260 : 25584 : result = (PMSignalState->PMChildFlags[slot] == PM_CHILD_ASSIGNED);
261 : 25584 : PMSignalState->PMChildFlags[slot] = PM_CHILD_UNUSED;
262 : 25584 : return result;
263 : : }
264 : :
265 : : /*
266 : : * IsPostmasterChildWalSender - check if given slot is in use by a
267 : : * walsender process. This is called only by the postmaster.
268 : : */
269 : : bool
5954 heikki.linnakangas@i 270 : 2205 : IsPostmasterChildWalSender(int slot)
271 : : {
537 272 [ + - - + ]: 2205 : Assert(slot > 0 && slot <= num_child_flags);
5954 273 : 2205 : slot--;
274 : :
275 [ + + ]: 2205 : if (PMSignalState->PMChildFlags[slot] == PM_CHILD_WALSENDER)
276 : 47 : return true;
277 : : else
278 : 2158 : return false;
279 : : }
280 : :
281 : : /*
282 : : * RegisterPostmasterChildActive - mark a postmaster child as about to begin
283 : : * actively using shared memory. This is called in the child process.
284 : : *
285 : : * This register an shmem exit hook to mark us as inactive again when the
286 : : * process exits normally.
287 : : */
288 : : void
574 289 : 22837 : RegisterPostmasterChildActive(void)
290 : : {
6209 tgl@sss.pgh.pa.us 291 : 22837 : int slot = MyPMChildSlot;
292 : :
293 [ + - - + ]: 22837 : Assert(slot > 0 && slot <= PMSignalState->num_child_flags);
294 : 22837 : slot--;
295 [ - + ]: 22837 : Assert(PMSignalState->PMChildFlags[slot] == PM_CHILD_ASSIGNED);
5589 heikki.linnakangas@i 296 : 22837 : PMSignalState->PMChildFlags[slot] = PM_CHILD_ACTIVE;
297 : :
298 : : /* Arrange to clean up at exit. */
574 299 : 22837 : on_shmem_exit(MarkPostmasterChildInactive, 0);
5589 300 : 22837 : }
301 : :
302 : : /*
303 : : * MarkPostmasterChildWalSender - mark a postmaster child as a WAL sender
304 : : * process. This is called in the child process, sometime after marking the
305 : : * child as active.
306 : : */
307 : : void
308 : 1281 : MarkPostmasterChildWalSender(void)
309 : : {
310 : 1281 : int slot = MyPMChildSlot;
311 : :
312 [ - + ]: 1281 : Assert(am_walsender);
313 : :
314 [ + - - + ]: 1281 : Assert(slot > 0 && slot <= PMSignalState->num_child_flags);
315 : 1281 : slot--;
316 [ - + ]: 1281 : Assert(PMSignalState->PMChildFlags[slot] == PM_CHILD_ACTIVE);
317 : 1281 : PMSignalState->PMChildFlags[slot] = PM_CHILD_WALSENDER;
5954 318 : 1281 : }
319 : :
320 : : /*
321 : : * MarkPostmasterChildInactive - mark a postmaster child as done using
322 : : * shared memory. This is called in the child process.
323 : : */
324 : : static void
574 325 : 22837 : MarkPostmasterChildInactive(int code, Datum arg)
326 : : {
6209 tgl@sss.pgh.pa.us 327 : 22837 : int slot = MyPMChildSlot;
328 : :
329 [ + - - + ]: 22837 : Assert(slot > 0 && slot <= PMSignalState->num_child_flags);
330 : 22837 : slot--;
5954 heikki.linnakangas@i 331 [ + + - + ]: 22837 : Assert(PMSignalState->PMChildFlags[slot] == PM_CHILD_ACTIVE ||
332 : : PMSignalState->PMChildFlags[slot] == PM_CHILD_WALSENDER);
6209 tgl@sss.pgh.pa.us 333 : 22837 : PMSignalState->PMChildFlags[slot] = PM_CHILD_ASSIGNED;
334 : 22837 : }
335 : :
336 : :
337 : : /*
338 : : * PostmasterIsAliveInternal - check whether postmaster process is still alive
339 : : *
340 : : * This is the slow path of PostmasterIsAlive(), where the caller has already
341 : : * checked 'postmaster_possibly_dead'. (On platforms that don't support
342 : : * a signal for parent death, PostmasterIsAlive() is just an alias for this.)
343 : : */
344 : : bool
2855 tmunro@postgresql.or 345 : 300 : PostmasterIsAliveInternal(void)
346 : : {
347 : : #ifdef USE_POSTMASTER_DEATH_SIGNAL
348 : : /*
349 : : * Reset the flag before checking, so that we don't miss a signal if
350 : : * postmaster dies right after the check. If postmaster was indeed dead,
351 : : * we'll re-arm it before returning to caller.
352 : : */
353 : 300 : postmaster_possibly_dead = false;
354 : : #endif
355 : :
356 : : #ifndef WIN32
357 : : {
358 : : char c;
359 : : ssize_t rc;
360 : :
361 : 300 : rc = read(postmaster_alive_fds[POSTMASTER_FD_WATCH], &c, 1);
362 : :
363 : : /*
364 : : * In the usual case, the postmaster is still alive, and there is no
365 : : * data in the pipe.
366 : : */
367 [ + + - + : 300 : if (rc < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
- - ]
5614 rhaas@postgresql.org 368 : 279 : return true;
369 : : else
370 : : {
371 : : /*
372 : : * Postmaster is dead, or something went wrong with the read()
373 : : * call.
374 : : */
375 : :
376 : : #ifdef USE_POSTMASTER_DEATH_SIGNAL
2855 tmunro@postgresql.or 377 : 21 : postmaster_possibly_dead = true;
378 : : #endif
379 : :
380 [ - + ]: 21 : if (rc < 0)
2855 tmunro@postgresql.or 381 [ # # ]:UBC 0 : elog(FATAL, "read on postmaster death monitoring pipe failed: %m");
2855 tmunro@postgresql.or 382 [ - + ]:CBC 21 : else if (rc > 0)
2855 tmunro@postgresql.or 383 [ # # ]:UBC 0 : elog(FATAL, "unexpected data in postmaster death monitoring pipe");
384 : :
2855 tmunro@postgresql.or 385 :CBC 21 : return false;
386 : : }
387 : : }
388 : :
389 : : #else /* WIN32 */
390 : : if (WaitForSingleObject(PostmasterHandle, 0) == WAIT_TIMEOUT)
391 : : return true;
392 : : else
393 : : {
394 : : #ifdef USE_POSTMASTER_DEATH_SIGNAL
395 : : postmaster_possibly_dead = true;
396 : : #endif
397 : : return false;
398 : : }
399 : : #endif /* WIN32 */
400 : : }
401 : :
402 : : /*
403 : : * PostmasterDeathSignalInit - request signal on postmaster death if possible
404 : : */
405 : : void
406 : 23187 : PostmasterDeathSignalInit(void)
407 : : {
408 : : #ifdef USE_POSTMASTER_DEATH_SIGNAL
2540 tgl@sss.pgh.pa.us 409 : 23187 : int signum = POSTMASTER_DEATH_SIGNAL;
410 : :
411 : : /* Register our signal handler. */
2855 tmunro@postgresql.or 412 : 23187 : pqsignal(signum, postmaster_death_handler);
413 : :
414 : : /* Request a signal on parent exit. */
415 : : #if defined(PR_SET_PDEATHSIG)
416 [ - + ]: 23187 : if (prctl(PR_SET_PDEATHSIG, signum) < 0)
2855 tmunro@postgresql.or 417 [ # # ]:UBC 0 : elog(ERROR, "could not request parent death signal: %m");
418 : : #elif defined(PROC_PDEATHSIG_CTL)
419 : : if (procctl(P_PID, 0, PROC_PDEATHSIG_CTL, &signum) < 0)
420 : : elog(ERROR, "could not request parent death signal: %m");
421 : : #else
422 : : #error "USE_POSTMASTER_DEATH_SIGNAL set, but there is no mechanism to request the signal"
423 : : #endif
424 : :
425 : : /*
426 : : * Just in case the parent was gone already and we missed it, we'd better
427 : : * check the slow way on the first call.
428 : : */
2855 tmunro@postgresql.or 429 :CBC 23187 : postmaster_possibly_dead = true;
430 : : #endif /* USE_POSTMASTER_DEATH_SIGNAL */
431 : 23187 : }
|