Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * astreamer_verify.c
4 : : *
5 : : * Archive streamer for verification of a tar format backup (including
6 : : * compressed tar format backups).
7 : : *
8 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
9 : : *
10 : : * src/bin/pg_verifybackup/astreamer_verify.c
11 : : *
12 : : *-------------------------------------------------------------------------
13 : : */
14 : :
15 : : #include "postgres_fe.h"
16 : :
17 : : #include "access/xlog_internal.h"
18 : : #include "catalog/pg_control.h"
19 : : #include "pg_verifybackup.h"
20 : :
21 : : typedef struct astreamer_verify
22 : : {
23 : : /* These fields don't change once initialized. */
24 : : astreamer base;
25 : : verifier_context *context;
26 : : char *archive_name;
27 : : Oid tblspc_oid;
28 : :
29 : : /* These fields change for each archive member. */
30 : : manifest_file *mfile;
31 : : bool verify_checksum;
32 : : bool verify_control_data;
33 : : pg_checksum_context *checksum_ctx;
34 : : uint64 checksum_bytes;
35 : : ControlFileData control_file;
36 : : uint64 control_file_bytes;
37 : : } astreamer_verify;
38 : :
39 : : static void astreamer_verify_content(astreamer *streamer,
40 : : astreamer_member *member,
41 : : const char *data, int len,
42 : : astreamer_archive_context context);
43 : : static void astreamer_verify_finalize(astreamer *streamer);
44 : : static void astreamer_verify_free(astreamer *streamer);
45 : :
46 : : static void member_verify_header(astreamer *streamer, astreamer_member *member);
47 : : static void member_compute_checksum(astreamer *streamer,
48 : : astreamer_member *member,
49 : : const char *data, int len);
50 : : static void member_verify_checksum(astreamer *streamer);
51 : : static void member_copy_control_data(astreamer *streamer,
52 : : astreamer_member *member,
53 : : const char *data, int len);
54 : : static void member_verify_control_data(astreamer *streamer);
55 : : static void member_reset_info(astreamer *streamer);
56 : :
57 : : static const astreamer_ops astreamer_verify_ops = {
58 : : .content = astreamer_verify_content,
59 : : .finalize = astreamer_verify_finalize,
60 : : .free = astreamer_verify_free
61 : : };
62 : :
63 : : /*
64 : : * Create an astreamer that can verify a tar file.
65 : : */
66 : : astreamer *
344 rhaas@postgresql.org 67 :CBC 40 : astreamer_verify_content_new(astreamer *next, verifier_context *context,
68 : : char *archive_name, Oid tblspc_oid)
69 : : {
70 : : astreamer_verify *streamer;
71 : :
72 : 40 : streamer = palloc0(sizeof(astreamer_verify));
73 : 40 : *((const astreamer_ops **) &streamer->base.bbs_ops) =
74 : : &astreamer_verify_ops;
75 : :
76 : 40 : streamer->base.bbs_next = next;
77 : 40 : streamer->context = context;
78 : 40 : streamer->archive_name = archive_name;
79 : 40 : streamer->tblspc_oid = tblspc_oid;
80 : :
81 [ + - ]: 40 : if (!context->skip_checksums)
82 : 40 : streamer->checksum_ctx = pg_malloc(sizeof(pg_checksum_context));
83 : :
84 : 40 : return &streamer->base;
85 : : }
86 : :
87 : : /*
88 : : * Main entry point of the archive streamer for verifying tar members.
89 : : */
90 : : static void
91 : 318246 : astreamer_verify_content(astreamer *streamer, astreamer_member *member,
92 : : const char *data, int len,
93 : : astreamer_archive_context context)
94 : : {
95 : 318246 : astreamer_verify *mystreamer = (astreamer_verify *) streamer;
96 : :
97 [ - + ]: 318246 : Assert(context != ASTREAMER_UNKNOWN);
98 : :
99 [ + + + + : 318246 : switch (context)
- ]
100 : : {
101 : 25955 : case ASTREAMER_MEMBER_HEADER:
102 : : /* Initial setup plus decide which checks to perform. */
103 : 25955 : member_verify_header(streamer, member);
104 : 25955 : break;
105 : :
106 : 266297 : case ASTREAMER_MEMBER_CONTENTS:
107 : : /* Incremental work required to verify file contents. */
108 [ + + ]: 266297 : if (mystreamer->verify_checksum)
109 : 166398 : member_compute_checksum(streamer, member, data, len);
110 [ + + ]: 266297 : if (mystreamer->verify_control_data)
111 : 71 : member_copy_control_data(streamer, member, data, len);
112 : 266297 : break;
113 : :
114 : 25955 : case ASTREAMER_MEMBER_TRAILER:
115 : : /* Now we've got all the file data. */
116 [ + + ]: 25955 : if (mystreamer->verify_checksum)
117 : 24233 : member_verify_checksum(streamer);
118 [ + + ]: 25955 : if (mystreamer->verify_control_data)
119 : 26 : member_verify_control_data(streamer);
120 : :
121 : : /* Reset for next archive member. */
122 : 25954 : member_reset_info(streamer);
123 : 25954 : break;
124 : :
125 : 39 : case ASTREAMER_ARCHIVE_TRAILER:
126 : 39 : break;
127 : :
344 rhaas@postgresql.org 128 :UBC 0 : default:
129 : : /* Shouldn't happen. */
130 peter@eisentraut.org 130 : 0 : pg_fatal("unexpected state while parsing tar archive");
131 : : }
344 rhaas@postgresql.org 132 :CBC 318245 : }
133 : :
134 : : /*
135 : : * End-of-stream processing for a astreamer_verify stream.
136 : : */
137 : : static void
138 : 39 : astreamer_verify_finalize(astreamer *streamer)
139 : : {
140 [ - + ]: 39 : Assert(streamer->bbs_next == NULL);
141 : 39 : }
142 : :
143 : : /*
144 : : * Free memory associated with a astreamer_verify stream.
145 : : */
146 : : static void
147 : 39 : astreamer_verify_free(astreamer *streamer)
148 : : {
149 : 39 : astreamer_verify *mystreamer = (astreamer_verify *) streamer;
150 : :
151 [ + - ]: 39 : if (mystreamer->checksum_ctx)
152 : 39 : pfree(mystreamer->checksum_ctx);
153 : :
154 : 39 : pfree(streamer);
155 : 39 : }
156 : :
157 : : /*
158 : : * Prepare to validate the next archive member.
159 : : */
160 : : static void
161 : 25955 : member_verify_header(astreamer *streamer, astreamer_member *member)
162 : : {
163 : 25955 : astreamer_verify *mystreamer = (astreamer_verify *) streamer;
164 : : manifest_file *m;
165 : : char pathname[MAXPGPATH];
166 : :
167 : : /* We are only interested in normal files. */
168 [ + + - + ]: 25955 : if (member->is_directory || member->is_link)
169 : 754 : return;
170 : :
171 : : /*
172 : : * The backup manifest stores a relative path to the base directory for
173 : : * files belonging to a tablespace, while the tablespace backup tar
174 : : * archive does not include this path.
175 : : *
176 : : * The pathname taken from the tar file could contain '.' or '..'
177 : : * references, which we want to remove, so apply canonicalize_path(). It
178 : : * could also be an absolute pathname, which we want to treat as a
179 : : * relative path, so prepend "./" if we're not adding a tablespace prefix
180 : : * to make sure that canonicalize_path() does what we want.
181 : : */
182 [ + + ]: 25258 : if (OidIsValid(mystreamer->tblspc_oid))
183 : 14 : snprintf(pathname, MAXPGPATH, "%s/%u/%s",
184 : 14 : "pg_tblspc", mystreamer->tblspc_oid, member->pathname);
185 : : else
186 : 25244 : snprintf(pathname, MAXPGPATH, "./%s", member->pathname);
187 : 25258 : canonicalize_path(pathname);
188 : :
189 : : /* Ignore any files that are listed in the ignore list. */
190 [ + + ]: 25258 : if (should_ignore_relpath(mystreamer->context, pathname))
191 : 52 : return;
192 : :
193 : : /* Check whether there's an entry in the manifest hash. */
194 : 25206 : m = manifest_files_lookup(mystreamer->context->manifest->files, pathname);
195 [ + + ]: 25206 : if (m == NULL)
196 : : {
197 : 3 : report_backup_error(mystreamer->context,
198 : : "file \"%s\" is present in archive \"%s\" but not in the manifest",
199 : 3 : member->pathname, mystreamer->archive_name);
200 : 3 : return;
201 : : }
202 : 25203 : mystreamer->mfile = m;
203 : :
204 : : /* Flag this entry as having been encountered in a tar archive. */
205 : 25203 : m->matched = true;
206 : :
207 : : /* Check that the size matches. */
208 [ + + ]: 25203 : if (m->size != member->size)
209 : : {
210 : 2 : report_backup_error(mystreamer->context,
211 : : "file \"%s\" has size %llu in archive \"%s\" but size %" PRIu64 " in the manifest",
339 212 : 2 : member->pathname,
213 : 2 : (unsigned long long) member->size,
214 : : mystreamer->archive_name,
215 : : m->size);
344 216 : 2 : m->bad = true;
217 : 2 : return;
218 : : }
219 : :
220 : : /*
221 : : * Decide whether we're going to verify the checksum for this file, and
222 : : * whether we're going to perform the additional validation that we do
223 : : * only for the control file.
224 : : */
225 : 25201 : mystreamer->verify_checksum =
226 [ + - + - : 25201 : (!mystreamer->context->skip_checksums && should_verify_checksum(m));
+ - + + ]
227 : 25201 : mystreamer->verify_control_data =
228 : 50402 : mystreamer->context->manifest->version != 1 &&
152 fujii@postgresql.org 229 [ + - + - : 25201 : !m->bad && strcmp(m->pathname, XLOG_CONTROL_FILE) == 0;
+ + ]
230 : :
231 : : /* If we're going to verify the checksum, initial a checksum context. */
344 rhaas@postgresql.org 232 [ + + - + ]: 49434 : if (mystreamer->verify_checksum &&
233 : 24233 : pg_checksum_init(mystreamer->checksum_ctx, m->checksum_type) < 0)
234 : : {
344 rhaas@postgresql.org 235 :UBC 0 : report_backup_error(mystreamer->context,
236 : : "%s: could not initialize checksum of file \"%s\"",
237 : : mystreamer->archive_name, m->pathname);
238 : :
239 : : /*
240 : : * Checksum verification cannot be performed without proper context
241 : : * initialization.
242 : : */
243 : 0 : mystreamer->verify_checksum = false;
244 : : }
245 : : }
246 : :
247 : : /*
248 : : * Computes the checksum incrementally for the received file content.
249 : : *
250 : : * Should have a correctly initialized checksum_ctx, which will be used for
251 : : * incremental checksum computation.
252 : : */
253 : : static void
344 rhaas@postgresql.org 254 :CBC 166398 : member_compute_checksum(astreamer *streamer, astreamer_member *member,
255 : : const char *data, int len)
256 : : {
257 : 166398 : astreamer_verify *mystreamer = (astreamer_verify *) streamer;
258 : 166398 : pg_checksum_context *checksum_ctx = mystreamer->checksum_ctx;
259 : 166398 : manifest_file *m = mystreamer->mfile;
260 : :
261 [ - + ]: 166398 : Assert(mystreamer->verify_checksum);
262 [ - + ]: 166398 : Assert(m->checksum_type == checksum_ctx->type);
263 : :
264 : : /*
265 : : * Update the total count of computed checksum bytes so that we can
266 : : * cross-check against the file size.
267 : : */
268 : 166398 : mystreamer->checksum_bytes += len;
269 : :
270 : : /* Feed these bytes to the checksum calculation. */
271 [ - + ]: 166398 : if (pg_checksum_update(checksum_ctx, (uint8 *) data, len) < 0)
272 : : {
344 rhaas@postgresql.org 273 :UBC 0 : report_backup_error(mystreamer->context,
274 : : "could not update checksum of file \"%s\"",
275 : : m->pathname);
276 : 0 : mystreamer->verify_checksum = false;
277 : : }
344 rhaas@postgresql.org 278 :CBC 166398 : }
279 : :
280 : : /*
281 : : * Perform the final computation and checksum verification after the entire
282 : : * file content has been processed.
283 : : */
284 : : static void
285 : 24233 : member_verify_checksum(astreamer *streamer)
286 : : {
287 : 24233 : astreamer_verify *mystreamer = (astreamer_verify *) streamer;
288 : 24233 : manifest_file *m = mystreamer->mfile;
289 : : uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH];
290 : : int checksumlen;
291 : :
292 [ - + ]: 24233 : Assert(mystreamer->verify_checksum);
293 : :
294 : : /*
295 : : * It's unclear how this could fail, but let's check anyway to be safe.
296 : : */
297 [ - + ]: 24233 : if (mystreamer->checksum_bytes != m->size)
298 : : {
344 rhaas@postgresql.org 299 :UBC 0 : report_backup_error(mystreamer->context,
300 : : "file \"%s\" in archive \"%s\" should contain %" PRIu64 " bytes, but %" PRIu64 " bytes were read",
301 : : m->pathname, mystreamer->archive_name,
302 : : m->size,
303 : : mystreamer->checksum_bytes);
304 : 0 : return;
305 : : }
306 : :
307 : : /* Get the final checksum. */
344 rhaas@postgresql.org 308 :CBC 24233 : checksumlen = pg_checksum_final(mystreamer->checksum_ctx, checksumbuf);
309 [ - + ]: 24233 : if (checksumlen < 0)
310 : : {
344 rhaas@postgresql.org 311 :UBC 0 : report_backup_error(mystreamer->context,
312 : : "could not finalize checksum of file \"%s\"",
313 : : m->pathname);
314 : 0 : return;
315 : : }
316 : :
317 : : /* And check it against the manifest. */
344 rhaas@postgresql.org 318 [ - + ]:CBC 24233 : if (checksumlen != m->checksum_length)
344 rhaas@postgresql.org 319 :UBC 0 : report_backup_error(mystreamer->context,
320 : : "file \"%s\" in archive \"%s\" has checksum of length %d, but expected %d",
321 : : m->pathname, mystreamer->archive_name,
322 : : m->checksum_length, checksumlen);
344 rhaas@postgresql.org 323 [ + + ]:CBC 24233 : else if (memcmp(checksumbuf, m->checksum_payload, checksumlen) != 0)
324 : 2 : report_backup_error(mystreamer->context,
325 : : "checksum mismatch for file \"%s\" in archive \"%s\"",
326 : : m->pathname, mystreamer->archive_name);
327 : : }
328 : :
329 : : /*
330 : : * Stores the pg_control file contents into a local buffer; we need the entire
331 : : * control file data for verification.
332 : : */
333 : : static void
334 : 71 : member_copy_control_data(astreamer *streamer, astreamer_member *member,
335 : : const char *data, int len)
336 : : {
337 : 71 : astreamer_verify *mystreamer = (astreamer_verify *) streamer;
338 : :
339 : : /* Should be here only for control file */
340 [ - + ]: 71 : Assert(mystreamer->verify_control_data);
341 : :
342 : : /*
343 : : * Copy the new data into the control file buffer, but do not overrun the
344 : : * buffer. Note that the on-disk length of the control file is expected to
345 : : * be PG_CONTROL_FILE_SIZE, but the part that fits in our buffer is
346 : : * shorter, just sizeof(ControlFileData).
347 : : */
340 348 [ + + ]: 71 : if (mystreamer->control_file_bytes < sizeof(ControlFileData))
349 : : {
350 : : size_t remaining;
351 : :
344 352 : 26 : remaining = sizeof(ControlFileData) - mystreamer->control_file_bytes;
353 : 26 : memcpy(((char *) &mystreamer->control_file)
354 : 26 : + mystreamer->control_file_bytes,
340 355 : 26 : data, Min((size_t) len, remaining));
356 : : }
357 : :
358 : : /* Remember how many bytes we saw, even if we didn't buffer them. */
344 359 : 71 : mystreamer->control_file_bytes += len;
360 : 71 : }
361 : :
362 : : /*
363 : : * Performs the CRC calculation of pg_control data and then calls the routines
364 : : * that execute the final verification of the control file information.
365 : : */
366 : : static void
367 : 26 : member_verify_control_data(astreamer *streamer)
368 : : {
369 : 26 : astreamer_verify *mystreamer = (astreamer_verify *) streamer;
370 : 26 : manifest_data *manifest = mystreamer->context->manifest;
371 : : pg_crc32c crc;
372 : :
373 : : /* Should be here only for control file */
152 fujii@postgresql.org 374 [ - + ]: 26 : Assert(strcmp(mystreamer->mfile->pathname, XLOG_CONTROL_FILE) == 0);
344 rhaas@postgresql.org 375 [ - + ]: 26 : Assert(mystreamer->verify_control_data);
376 : :
377 : : /*
378 : : * If the control file is not the right length, that's a big problem.
379 : : *
380 : : * NB: There is a theoretical overflow risk here from casting to int, but
381 : : * it isn't likely to be a real problem and this enables us to match the
382 : : * same format string that pg_rewind uses for this case. Perhaps both this
383 : : * and pg_rewind should use an unsigned 64-bit value, but for now we don't
384 : : * worry about it.
385 : : */
386 [ - + ]: 26 : if (mystreamer->control_file_bytes != PG_CONTROL_FILE_SIZE)
344 rhaas@postgresql.org 387 :UBC 0 : report_fatal_error("unexpected control file size %d, expected %d",
388 : 0 : (int) mystreamer->control_file_bytes,
389 : : PG_CONTROL_FILE_SIZE);
390 : :
391 : : /* Compute the CRC. */
344 rhaas@postgresql.org 392 :CBC 26 : INIT_CRC32C(crc);
393 : 26 : COMP_CRC32C(crc, &mystreamer->control_file,
394 : : offsetof(ControlFileData, crc));
395 : 26 : FIN_CRC32C(crc);
396 : :
397 : : /* Control file contents not meaningful if CRC is bad. */
398 [ - + ]: 26 : if (!EQ_CRC32C(crc, mystreamer->control_file.crc))
344 rhaas@postgresql.org 399 :UBC 0 : report_fatal_error("%s: %s: CRC is incorrect",
400 : : mystreamer->archive_name,
401 : 0 : mystreamer->mfile->pathname);
402 : :
403 : : /* Can't interpret control file if not current version. */
344 rhaas@postgresql.org 404 [ - + ]:CBC 26 : if (mystreamer->control_file.pg_control_version != PG_CONTROL_VERSION)
344 rhaas@postgresql.org 405 :UBC 0 : report_fatal_error("%s: %s: unexpected control file version",
406 : : mystreamer->archive_name,
407 : 0 : mystreamer->mfile->pathname);
408 : :
409 : : /* System identifiers should match. */
344 rhaas@postgresql.org 410 :CBC 26 : if (manifest->system_identifier !=
411 [ + + ]: 26 : mystreamer->control_file.system_identifier)
161 peter@eisentraut.org 412 : 1 : report_fatal_error("%s: %s: manifest system identifier is %" PRIu64 ", but control file has %" PRIu64,
413 : : mystreamer->archive_name,
344 rhaas@postgresql.org 414 : 1 : mystreamer->mfile->pathname,
415 : : manifest->system_identifier,
416 : : mystreamer->control_file.system_identifier);
417 : 25 : }
418 : :
419 : : /*
420 : : * Reset flags and free memory allocations for member file verification.
421 : : */
422 : : static void
423 : 25954 : member_reset_info(astreamer *streamer)
424 : : {
425 : 25954 : astreamer_verify *mystreamer = (astreamer_verify *) streamer;
426 : :
427 : 25954 : mystreamer->mfile = NULL;
428 : 25954 : mystreamer->verify_checksum = false;
429 : 25954 : mystreamer->verify_control_data = false;
430 : 25954 : mystreamer->checksum_bytes = 0;
431 : 25954 : mystreamer->control_file_bytes = 0;
432 : 25954 : }
|