Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * pmsignal.c
4 : : * routines for signaling between the postmaster and its child processes
5 : : *
6 : : *
7 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : : * Portions Copyright (c) 1994, Regents of the University of California
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/storage/ipc/pmsignal.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : #include "postgres.h"
16 : :
17 : : #include <signal.h>
18 : : #include <unistd.h>
19 : :
20 : : #ifdef HAVE_SYS_PRCTL_H
21 : : #include <sys/prctl.h>
22 : : #endif
23 : :
24 : : #include "miscadmin.h"
25 : : #include "postmaster/postmaster.h"
26 : : #include "replication/walsender.h"
27 : : #include "storage/ipc.h"
28 : : #include "storage/pmsignal.h"
29 : : #include "storage/shmem.h"
30 : : #include "utils/memutils.h"
31 : :
32 : :
33 : : /*
34 : : * The postmaster is signaled by its children by sending SIGUSR1. The
35 : : * specific reason is communicated via flags in shared memory. We keep
36 : : * a boolean flag for each possible "reason", so that different reasons
37 : : * can be signaled by different backends at the same time. (However,
38 : : * if the same reason is signaled more than once simultaneously, the
39 : : * postmaster will observe it only once.)
40 : : *
41 : : * The flags are actually declared as "volatile sig_atomic_t" for maximum
42 : : * portability. This should ensure that loads and stores of the flag
43 : : * values are atomic, allowing us to dispense with any explicit locking.
44 : : *
45 : : * In addition to the per-reason flags, we store a set of per-child-process
46 : : * flags that are currently used only for detecting whether a backend has
47 : : * exited without performing proper shutdown. The per-child-process flags
48 : : * have three possible states: UNUSED, ASSIGNED, ACTIVE. An UNUSED slot is
49 : : * available for assignment. An ASSIGNED slot is associated with a postmaster
50 : : * child process, but either the process has not touched shared memory yet, or
51 : : * it has successfully cleaned up after itself. An ACTIVE slot means the
52 : : * process is actively using shared memory. The slots are assigned to child
53 : : * processes by postmaster, and pmchild.c is responsible for tracking which
54 : : * one goes with which PID.
55 : : *
56 : : * Actually there is a fourth state, WALSENDER. This is just like ACTIVE,
57 : : * but carries the extra information that the child is a WAL sender.
58 : : * WAL senders too start in ACTIVE state, but switch to WALSENDER once they
59 : : * start streaming the WAL (and they never go back to ACTIVE after that).
60 : : *
61 : : * We also have a shared-memory field that is used for communication in
62 : : * the opposite direction, from postmaster to children: it tells why the
63 : : * postmaster has broadcasted SIGQUIT signals, if indeed it has done so.
64 : : */
65 : :
66 : : #define PM_CHILD_UNUSED 0 /* these values must fit in sig_atomic_t */
67 : : #define PM_CHILD_ASSIGNED 1
68 : : #define PM_CHILD_ACTIVE 2
69 : : #define PM_CHILD_WALSENDER 3
70 : :
71 : : /* "typedef struct PMSignalData PMSignalData" appears in pmsignal.h */
72 : : struct PMSignalData
73 : : {
74 : : /* per-reason flags for signaling the postmaster */
75 : : sig_atomic_t PMSignalFlags[NUM_PMSIGNALS];
76 : : /* global flags for signals from postmaster to children */
77 : : QuitSignalReason sigquit_reason; /* why SIGQUIT was sent */
78 : : /* per-child-process flags */
79 : : int num_child_flags; /* # of entries in PMChildFlags[] */
80 : : sig_atomic_t PMChildFlags[FLEXIBLE_ARRAY_MEMBER];
81 : : };
82 : :
83 : : /* PMSignalState pointer is valid in both postmaster and child processes */
84 : : NON_EXEC_STATIC volatile PMSignalData *PMSignalState = NULL;
85 : :
86 : : /*
87 : : * Local copy of PMSignalState->num_child_flags, only valid in the
88 : : * postmaster. Postmaster keeps a local copy so that it doesn't need to
89 : : * trust the value in shared memory.
90 : : */
91 : : static int num_child_flags;
92 : :
93 : : /*
94 : : * Signal handler to be notified if postmaster dies.
95 : : */
96 : : #ifdef USE_POSTMASTER_DEATH_SIGNAL
97 : : volatile sig_atomic_t postmaster_possibly_dead = false;
98 : :
99 : : static void
1088 tgl@sss.pgh.pa.us 100 :CBC 26 : postmaster_death_handler(SIGNAL_ARGS)
101 : : {
2614 tmunro@postgresql.or 102 : 26 : postmaster_possibly_dead = true;
103 : 26 : }
104 : :
105 : : /*
106 : : * The available signals depend on the OS. SIGUSR1 and SIGUSR2 are already
107 : : * used for other things, so choose another one.
108 : : *
109 : : * Currently, we assume that we can always find a signal to use. That
110 : : * seems like a reasonable assumption for all platforms that are modern
111 : : * enough to have a parent-death signaling mechanism.
112 : : */
113 : : #if defined(SIGINFO)
114 : : #define POSTMASTER_DEATH_SIGNAL SIGINFO
115 : : #elif defined(SIGPWR)
116 : : #define POSTMASTER_DEATH_SIGNAL SIGPWR
117 : : #else
118 : : #error "cannot find a signal to use for postmaster death"
119 : : #endif
120 : :
121 : : #endif /* USE_POSTMASTER_DEATH_SIGNAL */
122 : :
123 : : static void MarkPostmasterChildInactive(int code, Datum arg);
124 : :
125 : : /*
126 : : * PMSignalShmemSize
127 : : * Compute space needed for pmsignal.c's shared memory
128 : : */
129 : : Size
5968 tgl@sss.pgh.pa.us 130 : 3967 : PMSignalShmemSize(void)
131 : : {
132 : : Size size;
133 : :
134 : 3967 : size = offsetof(PMSignalData, PMChildFlags);
135 : 3967 : size = add_size(size, mul_size(MaxLivePostmasterChildren(),
136 : : sizeof(sig_atomic_t)));
137 : :
138 : 3967 : return size;
139 : : }
140 : :
141 : : /*
142 : : * PMSignalShmemInit - initialize during shared-memory creation
143 : : */
144 : : void
145 : 1029 : PMSignalShmemInit(void)
146 : : {
147 : : bool found;
148 : :
149 : 1029 : PMSignalState = (PMSignalData *)
150 : 1029 : ShmemInitStruct("PMSignalState", PMSignalShmemSize(), &found);
151 : :
7931 bruce@momjian.us 152 [ + - ]: 1029 : if (!found)
153 : : {
154 : : /* initialize all flags to zeroes */
2357 peter@eisentraut.org 155 [ + - + + : 1074 : MemSet(unvolatize(PMSignalData *, PMSignalState), 0, PMSignalShmemSize());
+ - + + +
+ ]
296 heikki.linnakangas@i 156 : 1029 : num_child_flags = MaxLivePostmasterChildren();
157 : 1029 : PMSignalState->num_child_flags = num_child_flags;
158 : : }
8707 tgl@sss.pgh.pa.us 159 : 1029 : }
160 : :
161 : : /*
162 : : * SendPostmasterSignal - signal the postmaster from a child process
163 : : */
164 : : void
165 : 3992 : SendPostmasterSignal(PMSignalReason reason)
166 : : {
167 : : /* If called in a standalone backend, do nothing */
168 [ - + ]: 3992 : if (!IsUnderPostmaster)
8707 tgl@sss.pgh.pa.us 169 :UBC 0 : return;
170 : : /* Atomically set the proper flag */
5968 tgl@sss.pgh.pa.us 171 :CBC 3992 : PMSignalState->PMSignalFlags[reason] = true;
172 : : /* Send signal to postmaster */
7881 neilc@samurai.com 173 : 3992 : kill(PostmasterPid, SIGUSR1);
174 : : }
175 : :
176 : : /*
177 : : * CheckPostmasterSignal - check to see if a particular reason has been
178 : : * signaled, and clear the signal flag. Should be called by postmaster
179 : : * after receiving SIGUSR1.
180 : : */
181 : : bool
8707 tgl@sss.pgh.pa.us 182 : 31465 : CheckPostmasterSignal(PMSignalReason reason)
183 : : {
184 : : /* Careful here --- don't clear flag if we haven't seen it set */
5968 185 [ + + ]: 31465 : if (PMSignalState->PMSignalFlags[reason])
186 : : {
187 : 3695 : PMSignalState->PMSignalFlags[reason] = false;
8707 188 : 3695 : return true;
189 : : }
190 : 27770 : return false;
191 : : }
192 : :
193 : : /*
194 : : * SetQuitSignalReason - broadcast the reason for a system shutdown.
195 : : * Should be called by postmaster before sending SIGQUIT to children.
196 : : *
197 : : * Note: in a crash-and-restart scenario, the "reason" field gets cleared
198 : : * as a part of rebuilding shared memory; the postmaster need not do it
199 : : * explicitly.
200 : : */
201 : : void
1717 202 : 321 : SetQuitSignalReason(QuitSignalReason reason)
203 : : {
204 : 321 : PMSignalState->sigquit_reason = reason;
205 : 321 : }
206 : :
207 : : /*
208 : : * GetQuitSignalReason - obtain the reason for a system shutdown.
209 : : * Called by child processes when they receive SIGQUIT.
210 : : * If the postmaster hasn't actually sent SIGQUIT, will return PMQUIT_NOT_SENT.
211 : : */
212 : : QuitSignalReason
1717 tgl@sss.pgh.pa.us 213 :UBC 0 : GetQuitSignalReason(void)
214 : : {
215 : : /* This is called in signal handlers, so be extra paranoid. */
216 [ # # # # ]: 0 : if (!IsUnderPostmaster || PMSignalState == NULL)
217 : 0 : return PMQUIT_NOT_SENT;
218 : 0 : return PMSignalState->sigquit_reason;
219 : : }
220 : :
221 : :
222 : : /*
223 : : * MarkPostmasterChildSlotAssigned - mark the given slot as ASSIGNED for a
224 : : * new postmaster child process.
225 : : *
226 : : * Only the postmaster is allowed to execute this routine, so we need no
227 : : * special locking.
228 : : */
229 : : void
296 heikki.linnakangas@i 230 :CBC 21436 : MarkPostmasterChildSlotAssigned(int slot)
231 : : {
232 [ + - - + ]: 21436 : Assert(slot > 0 && slot <= num_child_flags);
233 : 21436 : slot--;
234 : :
235 [ - + ]: 21436 : if (PMSignalState->PMChildFlags[slot] != PM_CHILD_UNUSED)
296 heikki.linnakangas@i 236 [ # # ]:UBC 0 : elog(FATAL, "postmaster child slot is already in use");
237 : :
296 heikki.linnakangas@i 238 :CBC 21436 : PMSignalState->PMChildFlags[slot] = PM_CHILD_ASSIGNED;
5968 tgl@sss.pgh.pa.us 239 : 21436 : }
240 : :
241 : : /*
242 : : * MarkPostmasterChildSlotUnassigned - release a slot after death of a
243 : : * postmaster child process. This must be called in the postmaster process.
244 : : *
245 : : * Returns true if the slot had been in ASSIGNED state (the expected case),
246 : : * false otherwise (implying that the child failed to clean itself up).
247 : : */
248 : : bool
296 heikki.linnakangas@i 249 : 21435 : MarkPostmasterChildSlotUnassigned(int slot)
250 : : {
251 : : bool result;
252 : :
253 [ + - - + ]: 21435 : Assert(slot > 0 && slot <= num_child_flags);
5968 tgl@sss.pgh.pa.us 254 : 21435 : slot--;
255 : :
256 : : /*
257 : : * Note: the slot state might already be unused, because the logic in
258 : : * postmaster.c is such that this might get called twice when a child
259 : : * crashes. So we don't try to Assert anything about the state.
260 : : */
261 : 21435 : result = (PMSignalState->PMChildFlags[slot] == PM_CHILD_ASSIGNED);
262 : 21435 : PMSignalState->PMChildFlags[slot] = PM_CHILD_UNUSED;
263 : 21435 : return result;
264 : : }
265 : :
266 : : /*
267 : : * IsPostmasterChildWalSender - check if given slot is in use by a
268 : : * walsender process. This is called only by the postmaster.
269 : : */
270 : : bool
5713 heikki.linnakangas@i 271 : 1592 : IsPostmasterChildWalSender(int slot)
272 : : {
296 273 [ + - - + ]: 1592 : Assert(slot > 0 && slot <= num_child_flags);
5713 274 : 1592 : slot--;
275 : :
276 [ + + ]: 1592 : if (PMSignalState->PMChildFlags[slot] == PM_CHILD_WALSENDER)
277 : 32 : return true;
278 : : else
279 : 1560 : return false;
280 : : }
281 : :
282 : : /*
283 : : * RegisterPostmasterChildActive - mark a postmaster child as about to begin
284 : : * actively using shared memory. This is called in the child process.
285 : : *
286 : : * This register an shmem exit hook to mark us as inactive again when the
287 : : * process exits normally.
288 : : */
289 : : void
333 290 : 18649 : RegisterPostmasterChildActive(void)
291 : : {
5968 tgl@sss.pgh.pa.us 292 : 18649 : int slot = MyPMChildSlot;
293 : :
294 [ + - - + ]: 18649 : Assert(slot > 0 && slot <= PMSignalState->num_child_flags);
295 : 18649 : slot--;
296 [ - + ]: 18649 : Assert(PMSignalState->PMChildFlags[slot] == PM_CHILD_ASSIGNED);
5348 heikki.linnakangas@i 297 : 18649 : PMSignalState->PMChildFlags[slot] = PM_CHILD_ACTIVE;
298 : :
299 : : /* Arrange to clean up at exit. */
333 300 : 18649 : on_shmem_exit(MarkPostmasterChildInactive, 0);
5348 301 : 18649 : }
302 : :
303 : : /*
304 : : * MarkPostmasterChildWalSender - mark a postmaster child as a WAL sender
305 : : * process. This is called in the child process, sometime after marking the
306 : : * child as active.
307 : : */
308 : : void
309 : 1099 : MarkPostmasterChildWalSender(void)
310 : : {
311 : 1099 : int slot = MyPMChildSlot;
312 : :
313 [ - + ]: 1099 : Assert(am_walsender);
314 : :
315 [ + - - + ]: 1099 : Assert(slot > 0 && slot <= PMSignalState->num_child_flags);
316 : 1099 : slot--;
317 [ - + ]: 1099 : Assert(PMSignalState->PMChildFlags[slot] == PM_CHILD_ACTIVE);
318 : 1099 : PMSignalState->PMChildFlags[slot] = PM_CHILD_WALSENDER;
5713 319 : 1099 : }
320 : :
321 : : /*
322 : : * MarkPostmasterChildInactive - mark a postmaster child as done using
323 : : * shared memory. This is called in the child process.
324 : : */
325 : : static void
333 326 : 18649 : MarkPostmasterChildInactive(int code, Datum arg)
327 : : {
5968 tgl@sss.pgh.pa.us 328 : 18649 : int slot = MyPMChildSlot;
329 : :
330 [ + - - + ]: 18649 : Assert(slot > 0 && slot <= PMSignalState->num_child_flags);
331 : 18649 : slot--;
5713 heikki.linnakangas@i 332 [ + + - + ]: 18649 : Assert(PMSignalState->PMChildFlags[slot] == PM_CHILD_ACTIVE ||
333 : : PMSignalState->PMChildFlags[slot] == PM_CHILD_WALSENDER);
5968 tgl@sss.pgh.pa.us 334 : 18649 : PMSignalState->PMChildFlags[slot] = PM_CHILD_ASSIGNED;
335 : 18649 : }
336 : :
337 : :
338 : : /*
339 : : * PostmasterIsAliveInternal - check whether postmaster process is still alive
340 : : *
341 : : * This is the slow path of PostmasterIsAlive(), where the caller has already
342 : : * checked 'postmaster_possibly_dead'. (On platforms that don't support
343 : : * a signal for parent death, PostmasterIsAlive() is just an alias for this.)
344 : : */
345 : : bool
2614 tmunro@postgresql.or 346 : 251 : PostmasterIsAliveInternal(void)
347 : : {
348 : : #ifdef USE_POSTMASTER_DEATH_SIGNAL
349 : : /*
350 : : * Reset the flag before checking, so that we don't miss a signal if
351 : : * postmaster dies right after the check. If postmaster was indeed dead,
352 : : * we'll re-arm it before returning to caller.
353 : : */
354 : 251 : postmaster_possibly_dead = false;
355 : : #endif
356 : :
357 : : #ifndef WIN32
358 : : {
359 : : char c;
360 : : ssize_t rc;
361 : :
362 : 251 : rc = read(postmaster_alive_fds[POSTMASTER_FD_WATCH], &c, 1);
363 : :
364 : : /*
365 : : * In the usual case, the postmaster is still alive, and there is no
366 : : * data in the pipe.
367 : : */
368 [ + + - + : 251 : if (rc < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
- - ]
5373 rhaas@postgresql.org 369 : 227 : return true;
370 : : else
371 : : {
372 : : /*
373 : : * Postmaster is dead, or something went wrong with the read()
374 : : * call.
375 : : */
376 : :
377 : : #ifdef USE_POSTMASTER_DEATH_SIGNAL
2614 tmunro@postgresql.or 378 : 24 : postmaster_possibly_dead = true;
379 : : #endif
380 : :
381 [ - + ]: 24 : if (rc < 0)
2614 tmunro@postgresql.or 382 [ # # ]:UBC 0 : elog(FATAL, "read on postmaster death monitoring pipe failed: %m");
2614 tmunro@postgresql.or 383 [ - + ]:CBC 24 : else if (rc > 0)
2614 tmunro@postgresql.or 384 [ # # ]:UBC 0 : elog(FATAL, "unexpected data in postmaster death monitoring pipe");
385 : :
2614 tmunro@postgresql.or 386 :CBC 24 : return false;
387 : : }
388 : : }
389 : :
390 : : #else /* WIN32 */
391 : : if (WaitForSingleObject(PostmasterHandle, 0) == WAIT_TIMEOUT)
392 : : return true;
393 : : else
394 : : {
395 : : #ifdef USE_POSTMASTER_DEATH_SIGNAL
396 : : postmaster_possibly_dead = true;
397 : : #endif
398 : : return false;
399 : : }
400 : : #endif /* WIN32 */
401 : : }
402 : :
403 : : /*
404 : : * PostmasterDeathSignalInit - request signal on postmaster death if possible
405 : : */
406 : : void
407 : 18974 : PostmasterDeathSignalInit(void)
408 : : {
409 : : #ifdef USE_POSTMASTER_DEATH_SIGNAL
2299 tgl@sss.pgh.pa.us 410 : 18974 : int signum = POSTMASTER_DEATH_SIGNAL;
411 : :
412 : : /* Register our signal handler. */
2614 tmunro@postgresql.or 413 : 18974 : pqsignal(signum, postmaster_death_handler);
414 : :
415 : : /* Request a signal on parent exit. */
416 : : #if defined(PR_SET_PDEATHSIG)
417 [ - + ]: 18974 : if (prctl(PR_SET_PDEATHSIG, signum) < 0)
2614 tmunro@postgresql.or 418 [ # # ]:UBC 0 : elog(ERROR, "could not request parent death signal: %m");
419 : : #elif defined(PROC_PDEATHSIG_CTL)
420 : : if (procctl(P_PID, 0, PROC_PDEATHSIG_CTL, &signum) < 0)
421 : : elog(ERROR, "could not request parent death signal: %m");
422 : : #else
423 : : #error "USE_POSTMASTER_DEATH_SIGNAL set, but there is no mechanism to request the signal"
424 : : #endif
425 : :
426 : : /*
427 : : * Just in case the parent was gone already and we missed it, we'd better
428 : : * check the slow way on the first call.
429 : : */
2614 tmunro@postgresql.or 430 :CBC 18974 : postmaster_possibly_dead = true;
431 : : #endif /* USE_POSTMASTER_DEATH_SIGNAL */
432 : 18974 : }
|