Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * pgarch.c
4 : : *
5 : : * PostgreSQL WAL archiver
6 : : *
7 : : * All functions relating to archiver are included here
8 : : *
9 : : * - All functions executed by archiver process
10 : : *
11 : : * - archiver is forked from postmaster, and the two
12 : : * processes then communicate using signals. All functions
13 : : * executed by postmaster are included in this file.
14 : : *
15 : : * Initial author: Simon Riggs simon@2ndquadrant.com
16 : : *
17 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
18 : : * Portions Copyright (c) 1994, Regents of the University of California
19 : : *
20 : : *
21 : : * IDENTIFICATION
22 : : * src/backend/postmaster/pgarch.c
23 : : *
24 : : *-------------------------------------------------------------------------
25 : : */
26 : : #include "postgres.h"
27 : :
28 : : #include <time.h>
29 : : #include <sys/stat.h>
30 : : #include <unistd.h>
31 : :
32 : : #include "access/xlog.h"
33 : : #include "access/xlog_internal.h"
34 : : #include "archive/archive_module.h"
35 : : #include "archive/shell_archive.h"
36 : : #include "lib/binaryheap.h"
37 : : #include "libpq/pqsignal.h"
38 : : #include "pgstat.h"
39 : : #include "postmaster/auxprocess.h"
40 : : #include "postmaster/interrupt.h"
41 : : #include "postmaster/pgarch.h"
42 : : #include "storage/condition_variable.h"
43 : : #include "storage/aio_subsys.h"
44 : : #include "storage/fd.h"
45 : : #include "storage/ipc.h"
46 : : #include "storage/latch.h"
47 : : #include "storage/pmsignal.h"
48 : : #include "storage/proc.h"
49 : : #include "storage/procsignal.h"
50 : : #include "storage/shmem.h"
51 : : #include "storage/subsystems.h"
52 : : #include "utils/guc.h"
53 : : #include "utils/memutils.h"
54 : : #include "utils/ps_status.h"
55 : : #include "utils/resowner.h"
56 : : #include "utils/timeout.h"
57 : : #include "utils/wait_event.h"
58 : :
59 : :
60 : : /* ----------
61 : : * Timer definitions.
62 : : * ----------
63 : : */
64 : : #define PGARCH_AUTOWAKE_INTERVAL 60 /* How often to force a poll of the
65 : : * archive status directory; in seconds. */
66 : : #define PGARCH_RESTART_INTERVAL 10 /* How often to attempt to restart a
67 : : * failed archiver; in seconds. */
68 : :
69 : : /*
70 : : * Maximum number of retries allowed when attempting to archive a WAL
71 : : * file.
72 : : */
73 : : #define NUM_ARCHIVE_RETRIES 3
74 : :
75 : : /*
76 : : * Maximum number of retries allowed when attempting to remove an
77 : : * orphan archive status file.
78 : : */
79 : : #define NUM_ORPHAN_CLEANUP_RETRIES 3
80 : :
81 : : /*
82 : : * Maximum number of .ready files to gather per directory scan.
83 : : */
84 : : #define NUM_FILES_PER_DIRECTORY_SCAN 64
85 : :
86 : : /* Shared memory area for archiver process */
87 : : typedef struct PgArchData
88 : : {
89 : : int pgprocno; /* proc number of archiver process */
90 : :
91 : : /*
92 : : * Forces a directory scan in pgarch_readyXlog().
93 : : */
94 : : pg_atomic_uint32 force_dir_scan;
95 : : } PgArchData;
96 : :
97 : : char *XLogArchiveLibrary = "";
98 : : char *arch_module_check_errdetail_string;
99 : :
100 : :
101 : : /* ----------
102 : : * Local data
103 : : * ----------
104 : : */
105 : : static time_t last_sigterm_time = 0;
106 : : static PgArchData *PgArch = NULL;
107 : : static const ArchiveModuleCallbacks *ArchiveCallbacks;
108 : : static ArchiveModuleState *archive_module_state;
109 : : static MemoryContext archive_context;
110 : :
111 : :
112 : : /*
113 : : * Stuff for tracking multiple files to archive from each scan of
114 : : * archive_status. Minimizing the number of directory scans when there are
115 : : * many files to archive can significantly improve archival rate.
116 : : *
117 : : * arch_heap is a max-heap that is used during the directory scan to track
118 : : * the highest-priority files to archive. After the directory scan
119 : : * completes, the file names are stored in ascending order of priority in
120 : : * arch_files. pgarch_readyXlog() returns files from arch_files until it
121 : : * is empty, at which point another directory scan must be performed.
122 : : *
123 : : * We only need this data in the archiver process, so make it a palloc'd
124 : : * struct rather than a bunch of static arrays.
125 : : */
126 : : struct arch_files_state
127 : : {
128 : : binaryheap *arch_heap;
129 : : int arch_files_size; /* number of live entries in arch_files[] */
130 : : char *arch_files[NUM_FILES_PER_DIRECTORY_SCAN];
131 : : /* buffers underlying heap, and later arch_files[], entries: */
132 : : char arch_filenames[NUM_FILES_PER_DIRECTORY_SCAN][MAX_XFN_CHARS + 1];
133 : : };
134 : :
135 : : static struct arch_files_state *arch_files = NULL;
136 : :
137 : : /*
138 : : * Flags set by interrupt handlers for later service in the main loop.
139 : : */
140 : : static volatile sig_atomic_t ready_to_stop = false;
141 : :
142 : : /* ----------
143 : : * Local function forward declarations
144 : : * ----------
145 : : */
146 : : static void pgarch_waken_stop(SIGNAL_ARGS);
147 : : static void pgarch_MainLoop(void);
148 : : static void pgarch_ArchiverCopyLoop(void);
149 : : static bool pgarch_archiveXlog(char *xlog);
150 : : static bool pgarch_readyXlog(char *xlog);
151 : : static void pgarch_archiveDone(char *xlog);
152 : : static void pgarch_die(int code, Datum arg);
153 : : static void ProcessPgArchInterrupts(void);
154 : : static int ready_file_comparator(Datum a, Datum b, void *arg);
155 : : static void LoadArchiveLibrary(void);
156 : : static void pgarch_call_module_shutdown_cb(int code, Datum arg);
157 : :
158 : : static void PgArchShmemRequest(void *arg);
159 : : static void PgArchShmemInit(void *arg);
160 : :
161 : : const ShmemCallbacks PgArchShmemCallbacks = {
162 : : .request_fn = PgArchShmemRequest,
163 : : .init_fn = PgArchShmemInit,
164 : : };
165 : :
166 : : /* Register shared memory space needed by the archiver */
167 : : static void
29 heikki.linnakangas@i 168 :GNC 1244 : PgArchShmemRequest(void *arg)
169 : : {
170 : 1244 : ShmemRequestStruct(.name = "Archiver Data",
171 : : .size = sizeof(PgArchData),
172 : : .ptr = (void **) &PgArch,
173 : : );
1877 fujii@postgresql.org 174 :GIC 1244 : }
175 : :
176 : : /* Initialize archiver-related shared memory */
177 : : static void
29 heikki.linnakangas@i 178 :GNC 1241 : PgArchShmemInit(void *arg)
179 : : {
180 [ + - + - : 2482 : MemSet(PgArch, 0, sizeof(PgArchData));
+ - + - +
+ ]
181 : 1241 : PgArch->pgprocno = INVALID_PROC_NUMBER;
182 : 1241 : pg_atomic_init_u32(&PgArch->force_dir_scan, 0);
7960 tgl@sss.pgh.pa.us 183 :CBC 1241 : }
184 : :
185 : : /*
186 : : * PgArchCanRestart
187 : : *
188 : : * Return true, indicating archiver is allowed to restart, if enough time has
189 : : * passed since it was last launched to reach PGARCH_RESTART_INTERVAL.
190 : : * Otherwise return false.
191 : : *
192 : : * This is a safety valve to protect against continuous respawn attempts if the
193 : : * archiver is dying immediately at launch. Note that since we will retry to
194 : : * launch the archiver from the postmaster main loop, we will get another
195 : : * chance later.
196 : : */
197 : : bool
1877 fujii@postgresql.org 198 : 55 : PgArchCanRestart(void)
199 : : {
200 : : static time_t last_pgarch_start_time = 0;
201 : 55 : time_t curtime = time(NULL);
202 : :
203 : : /*
204 : : * If first time through, or time somehow went backwards, always update
205 : : * last_pgarch_start_time to match the current clock and allow archiver
206 : : * start. Otherwise allow it only once enough time has elapsed.
207 : : */
195 tgl@sss.pgh.pa.us 208 [ - + ]:GNC 55 : if (last_pgarch_start_time == 0 ||
195 tgl@sss.pgh.pa.us 209 [ # # ]:UNC 0 : curtime < last_pgarch_start_time ||
210 [ # # ]: 0 : curtime - last_pgarch_start_time >= PGARCH_RESTART_INTERVAL)
211 : : {
195 tgl@sss.pgh.pa.us 212 :GNC 55 : last_pgarch_start_time = curtime;
213 : 55 : return true;
214 : : }
195 tgl@sss.pgh.pa.us 215 :UNC 0 : return false;
216 : : }
217 : :
218 : :
219 : : /* Main entry point for archiver process */
220 : : void
438 peter@eisentraut.org 221 :CBC 18 : PgArchiverMain(const void *startup_data, size_t startup_data_len)
222 : : {
778 heikki.linnakangas@i 223 [ - + ]: 18 : Assert(startup_data_len == 0);
224 : :
225 : 18 : AuxiliaryProcessMainCommon();
226 : :
227 : : /*
228 : : * Ignore all signals usually bound to some action in the postmaster,
229 : : * except for SIGHUP, SIGTERM, SIGUSR1, SIGUSR2, and SIGQUIT.
230 : : */
2331 rhaas@postgresql.org 231 : 18 : pqsignal(SIGHUP, SignalHandlerForConfigReload);
21 andrew@dunslane.net 232 :GNC 18 : pqsignal(SIGINT, PG_SIG_IGN);
2331 rhaas@postgresql.org 233 :CBC 18 : pqsignal(SIGTERM, SignalHandlerForShutdownRequest);
234 : : /* SIGQUIT handler was already set up by InitPostmasterChild */
21 andrew@dunslane.net 235 :GNC 18 : pqsignal(SIGALRM, PG_SIG_IGN);
236 : 18 : pqsignal(SIGPIPE, PG_SIG_IGN);
1877 fujii@postgresql.org 237 :CBC 18 : pqsignal(SIGUSR1, procsignal_sigusr1_handler);
6689 tgl@sss.pgh.pa.us 238 : 18 : pqsignal(SIGUSR2, pgarch_waken_stop);
239 : :
240 : : /* Reset some signals that are accepted by postmaster but not here */
21 andrew@dunslane.net 241 :GNC 18 : pqsignal(SIGCHLD, PG_SIG_DFL);
242 : :
243 : : /* Unblock signals (they were blocked when the postmaster forked us) */
1187 tmunro@postgresql.or 244 :CBC 18 : sigprocmask(SIG_SETMASK, &UnBlockSig, NULL);
245 : :
246 : : /* We shouldn't be launched unnecessarily. */
1877 fujii@postgresql.org 247 [ + - - + : 18 : Assert(XLogArchivingActive());
- + ]
248 : :
249 : : /* Arrange to clean up at archiver exit */
250 : 18 : on_shmem_exit(pgarch_die, 0);
251 : :
252 : : /*
253 : : * Advertise our proc number so that backends can use our latch to wake us
254 : : * up while we're sleeping.
255 : : */
803 heikki.linnakangas@i 256 : 18 : PgArch->pgprocno = MyProcNumber;
257 : :
258 : : /* Create workspace for pgarch_readyXlog() */
146 michael@paquier.xyz 259 :GNC 18 : arch_files = palloc_object(struct arch_files_state);
1588 tgl@sss.pgh.pa.us 260 :CBC 18 : arch_files->arch_files_size = 0;
261 : :
262 : : /* Initialize our max-heap for prioritizing files to archive. */
263 : 18 : arch_files->arch_heap = binaryheap_allocate(NUM_FILES_PER_DIRECTORY_SCAN,
264 : : ready_file_comparator, NULL);
265 : :
266 : : /* Initialize our memory context. */
763 nathan@postgresql.or 267 : 18 : archive_context = AllocSetContextCreate(TopMemoryContext,
268 : : "archiver",
269 : : ALLOCSET_DEFAULT_SIZES);
270 : :
271 : : /* Load the archive_library. */
1552 rhaas@postgresql.org 272 : 18 : LoadArchiveLibrary();
273 : :
1294 michael@paquier.xyz 274 : 18 : pgarch_MainLoop();
275 : :
1877 fujii@postgresql.org 276 : 18 : proc_exit(0);
277 : : }
278 : :
279 : : /*
280 : : * Wake up the archiver
281 : : */
282 : : void
283 : 475 : PgArchWakeup(void)
284 : : {
285 : 475 : int arch_pgprocno = PgArch->pgprocno;
286 : :
287 : : /*
288 : : * We don't acquire ProcArrayLock here. It's actually fine because
289 : : * procLatch isn't ever freed, so we just can potentially set the wrong
290 : : * process' (or no process') latch. Even in that case the archiver will
291 : : * be relaunched shortly and will start archiving.
292 : : */
793 heikki.linnakangas@i 293 [ + + ]: 475 : if (arch_pgprocno != INVALID_PROC_NUMBER)
120 drowley@postgresql.o 294 :GNC 456 : SetLatch(&GetPGProcByNumber(arch_pgprocno)->procLatch);
7960 tgl@sss.pgh.pa.us 295 :CBC 475 : }
296 : :
297 : :
298 : : /* SIGUSR2 signal handler for archiver process */
299 : : static void
6689 300 : 18 : pgarch_waken_stop(SIGNAL_ARGS)
301 : : {
302 : : /* set flag to do a final cycle and shut down afterwards */
303 : 18 : ready_to_stop = true;
4129 andres@anarazel.de 304 : 18 : SetLatch(MyLatch);
6689 tgl@sss.pgh.pa.us 305 : 18 : }
306 : :
307 : : /*
308 : : * pgarch_MainLoop
309 : : *
310 : : * Main loop for archiver
311 : : */
312 : : static void
7960 313 : 18 : pgarch_MainLoop(void)
314 : : {
315 : : bool time_to_stop;
316 : :
317 : : /*
318 : : * There shouldn't be anything for the archiver to do except to wait for a
319 : : * signal ... however, the archiver exists to protect our data, so it
320 : : * wakes up occasionally to allow itself to be proactive.
321 : : */
322 : : do
323 : : {
4129 andres@anarazel.de 324 : 91 : ResetLatch(MyLatch);
325 : :
326 : : /* When we get SIGUSR2, we do one more archive cycle, then exit */
6689 tgl@sss.pgh.pa.us 327 : 91 : time_to_stop = ready_to_stop;
328 : :
329 : : /* Check for barrier events and config update */
426 heikki.linnakangas@i 330 : 91 : ProcessPgArchInterrupts();
331 : :
332 : : /*
333 : : * If we've gotten SIGTERM, we normally just sit and do nothing until
334 : : * SIGUSR2 arrives. However, that means a random SIGTERM would
335 : : * disable archiving indefinitely, which doesn't seem like a good
336 : : * idea. If more than 60 seconds pass since SIGTERM, exit anyway, so
337 : : * that the postmaster can start a new archiver if needed. Also exit
338 : : * if time unexpectedly goes backward.
339 : : */
2331 rhaas@postgresql.org 340 [ - + ]: 91 : if (ShutdownRequestPending)
341 : : {
6689 tgl@sss.pgh.pa.us 342 :UBC 0 : time_t curtime = time(NULL);
343 : :
344 [ # # ]: 0 : if (last_sigterm_time == 0)
345 : 0 : last_sigterm_time = curtime;
195 tgl@sss.pgh.pa.us 346 [ # # ]:UNC 0 : else if (curtime < last_sigterm_time ||
347 [ # # ]: 0 : curtime - last_sigterm_time >= 60)
6689 tgl@sss.pgh.pa.us 348 :EUB : break;
349 : : }
350 : :
351 : : /* Do what we're here for */
1877 fujii@postgresql.org 352 :CBC 91 : pgarch_ArchiverCopyLoop();
353 : :
354 : : /*
355 : : * Sleep until a signal is received, or until a poll is forced by
356 : : * PGARCH_AUTOWAKE_INTERVAL, or until postmaster dies.
357 : : */
5077 bruce@momjian.us 358 [ + + ]: 91 : if (!time_to_stop) /* Don't wait during last iteration */
359 : : {
360 : : int rc;
361 : :
1189 michael@paquier.xyz 362 : 73 : rc = WaitLatch(MyLatch,
363 : : WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
364 : : PGARCH_AUTOWAKE_INTERVAL * 1000L,
365 : : WAIT_EVENT_ARCHIVER_MAIN);
366 [ - + ]: 73 : if (rc & WL_POSTMASTER_DEATH)
1189 michael@paquier.xyz 367 :UBC 0 : time_to_stop = true;
368 : : }
369 : :
370 : : /*
371 : : * The archiver quits either when the postmaster dies (not expected)
372 : : * or after completing one more archiving cycle after receiving
373 : : * SIGUSR2.
374 : : */
2720 tmunro@postgresql.or 375 [ + + ]:CBC 91 : } while (!time_to_stop);
7960 tgl@sss.pgh.pa.us 376 : 18 : }
377 : :
378 : : /*
379 : : * pgarch_ArchiverCopyLoop
380 : : *
381 : : * Archives all outstanding xlogs then returns
382 : : */
383 : : static void
384 : 91 : pgarch_ArchiverCopyLoop(void)
385 : : {
386 : : char xlog[MAX_XFN_CHARS + 1];
387 : :
388 : : /* force directory scan in the first call to pgarch_readyXlog() */
1588 389 : 91 : arch_files->arch_files_size = 0;
390 : :
391 : : /*
392 : : * loop through all xlogs with archive_status of .ready and archive
393 : : * them...mostly we expect this to be a single file, though it is possible
394 : : * some backend will add files onto the list of those that need archiving
395 : : * while we are still copying earlier archives
396 : : */
7280 397 [ + + ]: 462 : while (pgarch_readyXlog(xlog))
398 : : {
7919 bruce@momjian.us 399 : 376 : int failures = 0;
2703 michael@paquier.xyz 400 : 376 : int failures_orphan = 0;
401 : :
402 : : for (;;)
7960 tgl@sss.pgh.pa.us 403 : 5 : {
404 : : struct stat stat_buf;
405 : : char pathname[MAXPGPATH];
406 : :
407 : : /*
408 : : * Do not initiate any more archive commands after receiving
409 : : * SIGTERM, nor after the postmaster has died unexpectedly. The
410 : : * first condition is to try to keep from having init SIGKILL the
411 : : * command, and the second is to avoid conflicts with another
412 : : * archiver spawned by a newer postmaster.
413 : : */
2331 rhaas@postgresql.org 414 [ + - - + ]: 381 : if (ShutdownRequestPending || !PostmasterIsAlive())
7280 tgl@sss.pgh.pa.us 415 : 5 : return;
416 : :
417 : : /*
418 : : * Check for barrier events and config update. This is so that
419 : : * we'll adopt a new setting for archive_command as soon as
420 : : * possible, even if there is a backlog of files to be archived.
421 : : */
426 heikki.linnakangas@i 422 : 381 : ProcessPgArchInterrupts();
423 : :
424 : : /* Reset variables that might be set by the callback */
792 nathan@postgresql.or 425 : 381 : arch_module_check_errdetail_string = NULL;
426 : :
427 : : /* can't do anything if not configured ... */
1173 michael@paquier.xyz 428 [ + - ]: 381 : if (ArchiveCallbacks->check_configured_cb != NULL &&
429 [ + + ]: 381 : !ArchiveCallbacks->check_configured_cb(archive_module_state))
430 : : {
5838 tgl@sss.pgh.pa.us 431 [ + - + - ]: 3 : ereport(WARNING,
432 : : (errmsg("\"archive_mode\" enabled, yet archiving is not configured"),
433 : : arch_module_check_errdetail_string ?
434 : : errdetail_internal("%s", arch_module_check_errdetail_string) : 0));
435 : 3 : return;
436 : : }
437 : :
438 : : /*
439 : : * Since archive status files are not removed in a durable manner,
440 : : * a system crash could leave behind .ready files for WAL segments
441 : : * that have already been recycled or removed. In this case,
442 : : * simply remove the orphan status file and move on. unlink() is
443 : : * used here as even on subsequent crashes the same orphan files
444 : : * would get removed, so there is no need to worry about
445 : : * durability.
446 : : */
2703 michael@paquier.xyz 447 : 378 : snprintf(pathname, MAXPGPATH, XLOGDIR "/%s", xlog);
448 [ - + - - ]: 378 : if (stat(pathname, &stat_buf) != 0 && errno == ENOENT)
2703 michael@paquier.xyz 449 :UBC 0 : {
450 : : char xlogready[MAXPGPATH];
451 : :
452 : 0 : StatusFilePath(xlogready, xlog, ".ready");
453 [ # # ]: 0 : if (unlink(xlogready) == 0)
454 : : {
455 [ # # ]: 0 : ereport(WARNING,
456 : : (errmsg("removed orphan archive status file \"%s\"",
457 : : xlogready)));
458 : :
459 : : /* leave loop and move to the next status file */
460 : 0 : break;
461 : : }
462 : :
463 [ # # ]: 0 : if (++failures_orphan >= NUM_ORPHAN_CLEANUP_RETRIES)
464 : : {
465 [ # # ]: 0 : ereport(WARNING,
466 : : (errmsg("removal of orphan archive status file \"%s\" failed too many times, will try again later",
467 : : xlogready)));
468 : :
469 : : /* give up cleanup of orphan status files */
470 : 0 : return;
471 : : }
472 : :
473 : : /* wait a bit before retrying */
474 : 0 : pg_usleep(1000000L);
475 : 0 : continue;
476 : : }
477 : :
7960 tgl@sss.pgh.pa.us 478 [ + + ]:CBC 378 : if (pgarch_archiveXlog(xlog))
479 : : {
480 : : /* successful */
481 : 371 : pgarch_archiveDone(xlog);
482 : :
483 : : /*
484 : : * Tell the cumulative stats system about the WAL file that we
485 : : * successfully archived
486 : : */
1490 andres@anarazel.de 487 : 371 : pgstat_report_archiver(xlog, false);
488 : :
7960 tgl@sss.pgh.pa.us 489 : 371 : break; /* out of inner retry loop */
490 : : }
491 : : else
492 : : {
493 : : /*
494 : : * Tell the cumulative stats system about the WAL file that we
495 : : * failed to archive
496 : : */
1490 andres@anarazel.de 497 : 7 : pgstat_report_archiver(xlog, true);
498 : :
7960 tgl@sss.pgh.pa.us 499 [ + + ]: 7 : if (++failures >= NUM_ARCHIVE_RETRIES)
500 : : {
501 [ + - ]: 2 : ereport(WARNING,
502 : : (errmsg("archiving write-ahead log file \"%s\" failed too many times, will try again later",
503 : : xlog)));
504 : 2 : return; /* give up archiving for now */
505 : : }
7838 506 : 5 : pg_usleep(1000000L); /* wait a bit before retrying */
507 : : }
508 : : }
509 : : }
510 : : }
511 : :
512 : : /*
513 : : * pgarch_archiveXlog
514 : : *
515 : : * Invokes archive_file_cb to copy one archive file to wherever it should go
516 : : *
517 : : * Returns true if successful
518 : : */
519 : : static bool
7960 520 : 378 : pgarch_archiveXlog(char *xlog)
521 : : {
522 : : sigjmp_buf local_sigjmp_buf;
523 : : MemoryContext oldcontext;
524 : : char pathname[MAXPGPATH];
525 : : char activitymsg[MAXFNAMELEN + 16];
526 : : bool ret;
527 : :
7610 528 : 378 : snprintf(pathname, MAXPGPATH, XLOGDIR "/%s", xlog);
529 : :
530 : : /* Report archive activity in PS display */
6713 531 : 378 : snprintf(activitymsg, sizeof(activitymsg), "archiving %s", xlog);
2246 peter@eisentraut.org 532 : 378 : set_ps_display(activitymsg);
533 : :
763 nathan@postgresql.or 534 : 378 : oldcontext = MemoryContextSwitchTo(archive_context);
535 : :
536 : : /*
537 : : * Since the archiver operates at the bottom of the exception stack,
538 : : * ERRORs turn into FATALs and cause the archiver process to restart.
539 : : * However, using ereport(ERROR, ...) when there are problems is easy to
540 : : * code and maintain. Therefore, we create our own exception handler to
541 : : * catch ERRORs and return false instead of restarting the archiver
542 : : * whenever there is a failure.
543 : : *
544 : : * We assume ERRORs from the archiving callback are the most common
545 : : * exceptions experienced by the archiver, so we opt to handle exceptions
546 : : * here instead of PgArchiverMain() to avoid reinitializing the archiver
547 : : * too frequently. We could instead add a sigsetjmp() block to
548 : : * PgArchiverMain() and use PG_TRY/PG_CATCH here, but the extra code to
549 : : * avoid the odd archiver restart doesn't seem worth it.
550 : : */
551 [ - + ]: 378 : if (sigsetjmp(local_sigjmp_buf, 1) != 0)
552 : : {
553 : : /* Since not using PG_TRY, must reset error stack by hand */
763 nathan@postgresql.or 554 :UBC 0 : error_context_stack = NULL;
555 : :
556 : : /* Prevent interrupts while cleaning up */
557 : 0 : HOLD_INTERRUPTS();
558 : :
559 : : /* Report the error to the server log. */
560 : 0 : EmitErrorReport();
561 : :
562 : : /*
563 : : * Try to clean up anything the archive module left behind. We try to
564 : : * cover anything that an archive module could conceivably have left
565 : : * behind, but it is of course possible that modules could be doing
566 : : * unexpected things that require additional cleanup. Module authors
567 : : * should be sure to do any extra required cleanup in a PG_CATCH block
568 : : * within the archiving callback, and they are encouraged to notify
569 : : * the pgsql-hackers mailing list so that we can add it here.
570 : : */
571 : 0 : disable_all_timeouts(false);
572 : 0 : LWLockReleaseAll();
573 : 0 : ConditionVariableCancelSleep();
574 : 0 : pgstat_report_wait_end();
414 andres@anarazel.de 575 : 0 : pgaio_error_cleanup();
763 nathan@postgresql.or 576 : 0 : ReleaseAuxProcessResources(false);
577 : 0 : AtEOXact_Files(false);
578 : 0 : AtEOXact_HashTables(false);
579 : :
580 : : /*
581 : : * Return to the original memory context and clear ErrorContext for
582 : : * next time.
583 : : */
584 : 0 : MemoryContextSwitchTo(oldcontext);
585 : 0 : FlushErrorState();
586 : :
587 : : /* Flush any leaked data */
588 : 0 : MemoryContextReset(archive_context);
589 : :
590 : : /* Remove our exception handler */
591 : 0 : PG_exception_stack = NULL;
592 : :
593 : : /* Now we can allow interrupts again */
594 [ # # ]: 0 : RESUME_INTERRUPTS();
595 : :
596 : : /* Report failure so that the archiver retries this file */
597 : 0 : ret = false;
598 : : }
599 : : else
600 : : {
601 : : /* Enable our exception handler */
763 nathan@postgresql.or 602 :CBC 378 : PG_exception_stack = &local_sigjmp_buf;
603 : :
604 : : /* Archive the file! */
605 : 378 : ret = ArchiveCallbacks->archive_file_cb(archive_module_state,
606 : : xlog, pathname);
607 : :
608 : : /* Remove our exception handler */
609 : 378 : PG_exception_stack = NULL;
610 : :
611 : : /* Reset our memory context and switch back to the original one */
612 : 378 : MemoryContextSwitchTo(oldcontext);
613 : 378 : MemoryContextReset(archive_context);
614 : : }
615 : :
1558 rhaas@postgresql.org 616 [ + + ]: 378 : if (ret)
617 : 371 : snprintf(activitymsg, sizeof(activitymsg), "last was %s", xlog);
618 : : else
6713 tgl@sss.pgh.pa.us 619 : 7 : snprintf(activitymsg, sizeof(activitymsg), "failed on %s", xlog);
2246 peter@eisentraut.org 620 : 378 : set_ps_display(activitymsg);
621 : :
1558 rhaas@postgresql.org 622 : 378 : return ret;
623 : : }
624 : :
625 : : /*
626 : : * pgarch_readyXlog
627 : : *
628 : : * Return name of the oldest xlog file that has not yet been archived.
629 : : * No notification is set that file archiving is now in progress, so
630 : : * this would need to be extended if multiple concurrent archival
631 : : * tasks were created. If a failure occurs, we will completely
632 : : * re-copy the file at the next available opportunity.
633 : : *
634 : : * It is important that we return the oldest, so that we archive xlogs
635 : : * in order that they were written, for two reasons:
636 : : * 1) to maintain the sequential chain of xlogs required for recovery
637 : : * 2) because the oldest ones will sooner become candidates for
638 : : * recycling at time of checkpoint
639 : : *
640 : : * NOTE: the "oldest" comparison will consider any .history file to be older
641 : : * than any other file except another .history file. Segments on a timeline
642 : : * with a smaller ID will be older than all segments on a timeline with a
643 : : * larger ID; the net result being that past timelines are given higher
644 : : * priority for archiving. This seems okay, or at least not obviously worth
645 : : * changing.
646 : : */
647 : : static bool
7960 tgl@sss.pgh.pa.us 648 : 462 : pgarch_readyXlog(char *xlog)
649 : : {
650 : : char XLogArchiveStatusDir[MAXPGPATH];
651 : : DIR *rldir;
652 : : struct dirent *rlde;
653 : :
654 : : /*
655 : : * If a directory scan was requested, clear the stored file names and
656 : : * proceed.
657 : : */
796 nathan@postgresql.or 658 [ + + ]: 462 : if (pg_atomic_exchange_u32(&PgArch->force_dir_scan, 0) == 1)
1588 tgl@sss.pgh.pa.us 659 : 3 : arch_files->arch_files_size = 0;
660 : :
661 : : /*
662 : : * If we still have stored file names from the previous directory scan,
663 : : * try to return one of those. We check to make sure the status file is
664 : : * still present, as the archive_command for a previous file may have
665 : : * already marked it done.
666 : : */
667 [ + + ]: 462 : while (arch_files->arch_files_size > 0)
668 : : {
669 : : struct stat st;
670 : : char status_file[MAXPGPATH];
671 : : char *arch_file;
672 : :
673 : 308 : arch_files->arch_files_size--;
674 : 308 : arch_file = arch_files->arch_files[arch_files->arch_files_size];
1636 rhaas@postgresql.org 675 : 308 : StatusFilePath(status_file, arch_file, ".ready");
676 : :
677 [ + - ]: 308 : if (stat(status_file, &st) == 0)
678 : : {
679 : 308 : strcpy(xlog, arch_file);
680 : 308 : return true;
681 : : }
1636 rhaas@postgresql.org 682 [ # # ]:UBC 0 : else if (errno != ENOENT)
683 [ # # ]: 0 : ereport(ERROR,
684 : : (errcode_for_file_access(),
685 : : errmsg("could not stat file \"%s\": %m", status_file)));
686 : : }
687 : :
688 : : /* arch_heap is probably empty, but let's make sure */
1588 tgl@sss.pgh.pa.us 689 :CBC 154 : binaryheap_reset(arch_files->arch_heap);
690 : :
691 : : /*
692 : : * Open the archive status directory and read through the list of files
693 : : * with the .ready suffix, looking for the earliest files.
694 : : */
7610 695 : 154 : snprintf(XLogArchiveStatusDir, MAXPGPATH, XLOGDIR "/archive_status");
7960 696 : 154 : rldir = AllocateDir(XLogArchiveStatusDir);
697 : :
7625 698 [ + + ]: 1625 : while ((rlde = ReadDir(rldir, XLogArchiveStatusDir)) != NULL)
699 : : {
7919 bruce@momjian.us 700 : 1471 : int basenamelen = (int) strlen(rlde->d_name) - 6;
701 : : char basename[MAX_XFN_CHARS + 1];
702 : : char *arch_file;
703 : :
704 : : /* Ignore entries with unexpected number of characters */
2689 michael@paquier.xyz 705 [ + + - + ]: 1471 : if (basenamelen < MIN_XFN_CHARS ||
706 : : basenamelen > MAX_XFN_CHARS)
707 : 956 : continue;
708 : :
709 : : /* Ignore entries with unexpected characters */
710 [ - + ]: 1140 : if (strspn(rlde->d_name, VALID_XFN_CHARS) < basenamelen)
2689 michael@paquier.xyz 711 :UBC 0 : continue;
712 : :
713 : : /* Ignore anything not suffixed with .ready */
2689 michael@paquier.xyz 714 [ + + ]:CBC 1140 : if (strcmp(rlde->d_name + basenamelen, ".ready") != 0)
715 : 625 : continue;
716 : :
717 : : /* Truncate off the .ready */
718 : 515 : memcpy(basename, rlde->d_name, basenamelen);
719 : 515 : basename[basenamelen] = '\0';
720 : :
721 : : /*
722 : : * Store the file in our max-heap if it has a high enough priority.
723 : : */
308 nathan@postgresql.or 724 [ + + ]:GNC 515 : if (binaryheap_size(arch_files->arch_heap) < NUM_FILES_PER_DIRECTORY_SCAN)
725 : : {
726 : : /* If the heap isn't full yet, quickly add it. */
727 : 378 : arch_file = arch_files->arch_filenames[binaryheap_size(arch_files->arch_heap)];
1636 rhaas@postgresql.org 728 :CBC 378 : strcpy(arch_file, basename);
1588 tgl@sss.pgh.pa.us 729 : 378 : binaryheap_add_unordered(arch_files->arch_heap, CStringGetDatum(arch_file));
730 : :
731 : : /* If we just filled the heap, make it a valid one. */
308 nathan@postgresql.or 732 [ + + ]:GNC 378 : if (binaryheap_size(arch_files->arch_heap) == NUM_FILES_PER_DIRECTORY_SCAN)
1588 tgl@sss.pgh.pa.us 733 :CBC 3 : binaryheap_build(arch_files->arch_heap);
734 : : }
735 [ + + ]: 137 : else if (ready_file_comparator(binaryheap_first(arch_files->arch_heap),
736 : : CStringGetDatum(basename), NULL) > 0)
737 : : {
738 : : /*
739 : : * Remove the lowest priority file and add the current one to the
740 : : * heap.
741 : : */
742 : 106 : arch_file = DatumGetCString(binaryheap_remove_first(arch_files->arch_heap));
1636 rhaas@postgresql.org 743 : 106 : strcpy(arch_file, basename);
1588 tgl@sss.pgh.pa.us 744 : 106 : binaryheap_add(arch_files->arch_heap, CStringGetDatum(arch_file));
745 : : }
746 : : }
7960 747 : 154 : FreeDir(rldir);
748 : :
749 : : /* If no files were found, simply return. */
308 nathan@postgresql.or 750 [ + + ]:GNC 154 : if (binaryheap_empty(arch_files->arch_heap))
1636 rhaas@postgresql.org 751 :CBC 86 : return false;
752 : :
753 : : /*
754 : : * If we didn't fill the heap, we didn't make it a valid one. Do that
755 : : * now.
756 : : */
308 nathan@postgresql.or 757 [ + + ]:GNC 68 : if (binaryheap_size(arch_files->arch_heap) < NUM_FILES_PER_DIRECTORY_SCAN)
1588 tgl@sss.pgh.pa.us 758 :CBC 65 : binaryheap_build(arch_files->arch_heap);
759 : :
760 : : /*
761 : : * Fill arch_files array with the files to archive in ascending order of
762 : : * priority.
763 : : */
308 nathan@postgresql.or 764 :GNC 68 : arch_files->arch_files_size = binaryheap_size(arch_files->arch_heap);
1588 tgl@sss.pgh.pa.us 765 [ + + ]:CBC 446 : for (int i = 0; i < arch_files->arch_files_size; i++)
766 : 378 : arch_files->arch_files[i] = DatumGetCString(binaryheap_remove_first(arch_files->arch_heap));
767 : :
768 : : /* Return the highest priority file. */
769 : 68 : arch_files->arch_files_size--;
770 : 68 : strcpy(xlog, arch_files->arch_files[arch_files->arch_files_size]);
771 : :
1636 rhaas@postgresql.org 772 : 68 : return true;
773 : : }
774 : :
775 : : /*
776 : : * ready_file_comparator
777 : : *
778 : : * Compares the archival priority of the given files to archive. If "a"
779 : : * has a higher priority than "b", a negative value will be returned. If
780 : : * "b" has a higher priority than "a", a positive value will be returned.
781 : : * If "a" and "b" have equivalent values, 0 will be returned.
782 : : */
783 : : static int
784 : 3804 : ready_file_comparator(Datum a, Datum b, void *arg)
785 : : {
1454 tgl@sss.pgh.pa.us 786 : 3804 : char *a_str = DatumGetCString(a);
787 : 3804 : char *b_str = DatumGetCString(b);
788 : 3804 : bool a_history = IsTLHistoryFileName(a_str);
789 : 3804 : bool b_history = IsTLHistoryFileName(b_str);
790 : :
791 : : /* Timeline history files always have the highest priority. */
1636 rhaas@postgresql.org 792 [ + + ]: 3804 : if (a_history != b_history)
793 [ - + ]: 1 : return a_history ? -1 : 1;
794 : :
795 : : /* Priority is given to older files. */
796 : 3803 : return strcmp(a_str, b_str);
797 : : }
798 : :
799 : : /*
800 : : * PgArchForceDirScan
801 : : *
802 : : * When called, the next call to pgarch_readyXlog() will perform a
803 : : * directory scan. This is useful for ensuring that important files such
804 : : * as timeline history files are archived as quickly as possible.
805 : : */
806 : : void
807 : 14 : PgArchForceDirScan(void)
808 : : {
796 nathan@postgresql.or 809 : 14 : pg_atomic_write_membarrier_u32(&PgArch->force_dir_scan, 1);
7960 tgl@sss.pgh.pa.us 810 : 14 : }
811 : :
812 : : /*
813 : : * pgarch_archiveDone
814 : : *
815 : : * Emit notification that an xlog file has been successfully archived.
816 : : * We do this by renaming the status file from NNN.ready to NNN.done.
817 : : * Eventually, a checkpoint process will notice this and delete both the
818 : : * NNN.done file and the xlog file itself.
819 : : */
820 : : static void
821 : 371 : pgarch_archiveDone(char *xlog)
822 : : {
823 : : char rlogready[MAXPGPATH];
824 : : char rlogdone[MAXPGPATH];
825 : :
7958 826 : 371 : StatusFilePath(rlogready, xlog, ".ready");
827 : 371 : StatusFilePath(rlogdone, xlog, ".done");
828 : :
829 : : /*
830 : : * To avoid extra overhead, we don't durably rename the .ready file to
831 : : * .done. Archive commands and libraries must gracefully handle attempts
832 : : * to re-archive files (e.g., if the server crashes just before this
833 : : * function is called), so it should be okay if the .ready file reappears
834 : : * after a crash.
835 : : */
1379 fujii@postgresql.org 836 [ - + ]: 371 : if (rename(rlogready, rlogdone) < 0)
1379 fujii@postgresql.org 837 [ # # ]:UBC 0 : ereport(WARNING,
838 : : (errcode_for_file_access(),
839 : : errmsg("could not rename file \"%s\" to \"%s\": %m",
840 : : rlogready, rlogdone)));
7960 tgl@sss.pgh.pa.us 841 :CBC 371 : }
842 : :
843 : :
844 : : /*
845 : : * pgarch_die
846 : : *
847 : : * Exit-time cleanup handler
848 : : */
849 : : static void
1877 fujii@postgresql.org 850 : 18 : pgarch_die(int code, Datum arg)
851 : : {
793 heikki.linnakangas@i 852 : 18 : PgArch->pgprocno = INVALID_PROC_NUMBER;
1877 fujii@postgresql.org 853 : 18 : }
854 : :
855 : : /*
856 : : * Interrupt handler for WAL archiver process.
857 : : *
858 : : * This is called in the loops pgarch_MainLoop and pgarch_ArchiverCopyLoop.
859 : : * It checks for barrier events, config update and request for logging of
860 : : * memory contexts, but not shutdown request because how to handle
861 : : * shutdown request is different between those loops.
862 : : */
863 : : static void
426 heikki.linnakangas@i 864 : 472 : ProcessPgArchInterrupts(void)
865 : : {
1782 fujii@postgresql.org 866 [ + + ]: 472 : if (ProcSignalBarrierPending)
867 : 2 : ProcessProcSignalBarrier();
868 : :
869 : : /* Perform logging of memory contexts of this process */
1552 rhaas@postgresql.org 870 [ - + ]: 472 : if (LogMemoryContextPending)
1552 rhaas@postgresql.org 871 :UBC 0 : ProcessLogMemoryContextInterrupt();
872 : :
1782 fujii@postgresql.org 873 [ + + ]:CBC 472 : if (ConfigReloadPending)
874 : : {
1552 rhaas@postgresql.org 875 : 3 : char *archiveLib = pstrdup(XLogArchiveLibrary);
876 : : bool archiveLibChanged;
877 : :
1782 fujii@postgresql.org 878 : 3 : ConfigReloadPending = false;
879 : 3 : ProcessConfigFile(PGC_SIGHUP);
880 : :
1267 peter@eisentraut.org 881 [ - + - - ]: 3 : if (XLogArchiveLibrary[0] != '\0' && XLogArchiveCommand[0] != '\0')
1267 peter@eisentraut.org 882 [ # # ]:UBC 0 : ereport(ERROR,
883 : : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
884 : : errmsg("both \"archive_command\" and \"archive_library\" set"),
885 : : errdetail("Only one of \"archive_command\", \"archive_library\" may be set.")));
886 : :
1552 rhaas@postgresql.org 887 :CBC 3 : archiveLibChanged = strcmp(XLogArchiveLibrary, archiveLib) != 0;
888 : 3 : pfree(archiveLib);
889 : :
890 [ - + ]: 3 : if (archiveLibChanged)
891 : : {
892 : : /*
893 : : * Ideally, we would simply unload the previous archive module and
894 : : * load the new one, but there is presently no mechanism for
895 : : * unloading a library (see the comment above
896 : : * internal_load_library()). To deal with this, we simply restart
897 : : * the archiver. The new archive module will be loaded when the
898 : : * new archiver process starts up. Note that this triggers the
899 : : * module's shutdown callback, if defined.
900 : : */
1552 rhaas@postgresql.org 901 [ # # ]:UBC 0 : ereport(LOG,
902 : : (errmsg("restarting archiver process because value of "
903 : : "\"archive_library\" was changed")));
904 : :
905 : 0 : proc_exit(0);
906 : : }
907 : : }
1552 rhaas@postgresql.org 908 :CBC 472 : }
909 : :
910 : : /*
911 : : * LoadArchiveLibrary
912 : : *
913 : : * Loads the archiving callbacks into our local ArchiveCallbacks.
914 : : */
915 : : static void
916 : 18 : LoadArchiveLibrary(void)
917 : : {
918 : : ArchiveModuleInit archive_init;
919 : :
1267 peter@eisentraut.org 920 [ + + - + ]: 18 : if (XLogArchiveLibrary[0] != '\0' && XLogArchiveCommand[0] != '\0')
1267 peter@eisentraut.org 921 [ # # ]:UBC 0 : ereport(ERROR,
922 : : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
923 : : errmsg("both \"archive_command\" and \"archive_library\" set"),
924 : : errdetail("Only one of \"archive_command\", \"archive_library\" may be set.")));
925 : :
926 : : /*
927 : : * If shell archiving is enabled, use our special initialization function.
928 : : * Otherwise, load the library and call its _PG_archive_module_init().
929 : : */
1552 rhaas@postgresql.org 930 [ + + ]:CBC 18 : if (XLogArchiveLibrary[0] == '\0')
931 : 17 : archive_init = shell_archive_init;
932 : : else
933 : 1 : archive_init = (ArchiveModuleInit)
934 : 1 : load_external_function(XLogArchiveLibrary,
935 : : "_PG_archive_module_init", false, NULL);
936 : :
937 [ - + ]: 18 : if (archive_init == NULL)
1552 rhaas@postgresql.org 938 [ # # ]:UBC 0 : ereport(ERROR,
939 : : (errmsg("archive modules have to define the symbol %s", "_PG_archive_module_init")));
940 : :
1173 michael@paquier.xyz 941 :CBC 18 : ArchiveCallbacks = (*archive_init) ();
942 : :
943 [ - + ]: 18 : if (ArchiveCallbacks->archive_file_cb == NULL)
1552 rhaas@postgresql.org 944 [ # # ]:UBC 0 : ereport(ERROR,
945 : : (errmsg("archive modules must register an archive callback")));
946 : :
146 michael@paquier.xyz 947 :GNC 18 : archive_module_state = palloc0_object(ArchiveModuleState);
1173 michael@paquier.xyz 948 [ - + ]:CBC 18 : if (ArchiveCallbacks->startup_cb != NULL)
1173 michael@paquier.xyz 949 :UBC 0 : ArchiveCallbacks->startup_cb(archive_module_state);
950 : :
1294 michael@paquier.xyz 951 :CBC 18 : before_shmem_exit(pgarch_call_module_shutdown_cb, 0);
1552 rhaas@postgresql.org 952 : 18 : }
953 : :
954 : : /*
955 : : * Call the shutdown callback of the loaded archive module, if defined.
956 : : */
957 : : static void
1294 michael@paquier.xyz 958 : 18 : pgarch_call_module_shutdown_cb(int code, Datum arg)
959 : : {
1173 960 [ + + ]: 18 : if (ArchiveCallbacks->shutdown_cb != NULL)
961 : 17 : ArchiveCallbacks->shutdown_cb(archive_module_state);
1782 fujii@postgresql.org 962 : 18 : }
|