Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * astreamer_tar.c
4 : : *
5 : : * This module implements three types of tar processing. A tar parser
6 : : * expects unlabelled chunks of data (e.g. ASTREAMER_UNKNOWN) and splits
7 : : * it into labelled chunks (any other value of astreamer_archive_context).
8 : : * A tar archiver does the reverse: it takes a bunch of labelled chunks
9 : : * and produces a tarfile, optionally replacing member headers and trailers
10 : : * so that upstream astreamer objects can perform surgery on the tarfile
11 : : * contents without knowing the details of the tar format. A tar terminator
12 : : * just adds two blocks of NUL bytes to the end of the file, since older
13 : : * server versions produce files with this terminator omitted.
14 : : *
15 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
16 : : *
17 : : * IDENTIFICATION
18 : : * src/fe_utils/astreamer_tar.c
19 : : *-------------------------------------------------------------------------
20 : : */
21 : :
22 : : #include "postgres_fe.h"
23 : :
24 : : #include <time.h>
25 : :
26 : : #include "common/logging.h"
27 : : #include "fe_utils/astreamer.h"
28 : : #include "pgtar.h"
29 : :
30 : : typedef struct astreamer_tar_parser
31 : : {
32 : : astreamer base;
33 : : astreamer_archive_context next_context;
34 : : astreamer_member member;
35 : : size_t file_bytes_sent;
36 : : size_t pad_bytes_expected;
37 : : } astreamer_tar_parser;
38 : :
39 : : typedef struct astreamer_tar_archiver
40 : : {
41 : : astreamer base;
42 : : bool rearchive_member;
43 : : } astreamer_tar_archiver;
44 : :
45 : : static void astreamer_tar_parser_content(astreamer *streamer,
46 : : astreamer_member *member,
47 : : const char *data, int len,
48 : : astreamer_archive_context context);
49 : : static void astreamer_tar_parser_finalize(astreamer *streamer);
50 : : static void astreamer_tar_parser_free(astreamer *streamer);
51 : : static bool astreamer_tar_header(astreamer_tar_parser *mystreamer);
52 : :
53 : : static const astreamer_ops astreamer_tar_parser_ops = {
54 : : .content = astreamer_tar_parser_content,
55 : : .finalize = astreamer_tar_parser_finalize,
56 : : .free = astreamer_tar_parser_free
57 : : };
58 : :
59 : : static void astreamer_tar_archiver_content(astreamer *streamer,
60 : : astreamer_member *member,
61 : : const char *data, int len,
62 : : astreamer_archive_context context);
63 : : static void astreamer_tar_archiver_finalize(astreamer *streamer);
64 : : static void astreamer_tar_archiver_free(astreamer *streamer);
65 : :
66 : : static const astreamer_ops astreamer_tar_archiver_ops = {
67 : : .content = astreamer_tar_archiver_content,
68 : : .finalize = astreamer_tar_archiver_finalize,
69 : : .free = astreamer_tar_archiver_free
70 : : };
71 : :
72 : : static void astreamer_tar_terminator_content(astreamer *streamer,
73 : : astreamer_member *member,
74 : : const char *data, int len,
75 : : astreamer_archive_context context);
76 : : static void astreamer_tar_terminator_finalize(astreamer *streamer);
77 : : static void astreamer_tar_terminator_free(astreamer *streamer);
78 : :
79 : : static const astreamer_ops astreamer_tar_terminator_ops = {
80 : : .content = astreamer_tar_terminator_content,
81 : : .finalize = astreamer_tar_terminator_finalize,
82 : : .free = astreamer_tar_terminator_free
83 : : };
84 : :
85 : : /*
86 : : * Create a astreamer that can parse a stream of content as tar data.
87 : : *
88 : : * The input should be a series of ASTREAMER_UNKNOWN chunks; the astreamer
89 : : * specified by 'next' will receive a series of typed chunks, as per the
90 : : * conventions described in astreamer.h.
91 : : */
92 : : astreamer *
397 rhaas@postgresql.org 93 :CBC 202 : astreamer_tar_parser_new(astreamer *next)
94 : : {
95 : : astreamer_tar_parser *streamer;
96 : :
97 : 202 : streamer = palloc0(sizeof(astreamer_tar_parser));
98 : 202 : *((const astreamer_ops **) &streamer->base.bbs_ops) =
99 : : &astreamer_tar_parser_ops;
1401 100 : 202 : streamer->base.bbs_next = next;
101 : 202 : initStringInfo(&streamer->base.bbs_buffer);
397 102 : 202 : streamer->next_context = ASTREAMER_MEMBER_HEADER;
103 : :
1401 104 : 202 : return &streamer->base;
105 : : }
106 : :
107 : : /*
108 : : * Parse unknown content as tar data.
109 : : */
110 : : static void
397 111 : 624285 : astreamer_tar_parser_content(astreamer *streamer, astreamer_member *member,
112 : : const char *data, int len,
113 : : astreamer_archive_context context)
114 : : {
115 : 624285 : astreamer_tar_parser *mystreamer = (astreamer_tar_parser *) streamer;
116 : : size_t nbytes;
117 : :
118 : : /* Expect unparsed input. */
1401 119 [ - + ]: 624285 : Assert(member == NULL);
397 120 [ - + ]: 624285 : Assert(context == ASTREAMER_UNKNOWN);
121 : :
1401 122 [ + + ]: 1295630 : while (len > 0)
123 : : {
124 [ + + + + : 671550 : switch (mystreamer->next_context)
- ]
125 : : {
397 126 : 159748 : case ASTREAMER_MEMBER_HEADER:
127 : :
128 : : /*
129 : : * If we're expecting an archive member header, accumulate a
130 : : * full block of data before doing anything further.
131 : : */
132 [ - + ]: 159748 : if (!astreamer_buffer_until(streamer, &data, &len,
133 : : TAR_BLOCK_SIZE))
1401 rhaas@postgresql.org 134 :UBC 0 : return;
135 : :
136 : : /*
137 : : * Now we can process the header and get ready to process the
138 : : * file contents; however, we might find out that what we
139 : : * thought was the next file header is actually the start of
140 : : * the archive trailer. Switch modes accordingly.
141 : : */
397 rhaas@postgresql.org 142 [ + + ]:CBC 159748 : if (astreamer_tar_header(mystreamer))
143 : : {
1401 144 [ + + ]: 159549 : if (mystreamer->member.size == 0)
145 : : {
146 : : /* No content; trailer is zero-length. */
397 147 : 31353 : astreamer_content(mystreamer->base.bbs_next,
148 : : &mystreamer->member,
149 : : NULL, 0,
150 : : ASTREAMER_MEMBER_TRAILER);
151 : :
152 : : /* Expect next header. */
153 : 31353 : mystreamer->next_context = ASTREAMER_MEMBER_HEADER;
154 : : }
155 : : else
156 : : {
157 : : /* Expect contents. */
158 : 128196 : mystreamer->next_context = ASTREAMER_MEMBER_CONTENTS;
159 : : }
1401 160 : 159549 : mystreamer->base.bbs_buffer.len = 0;
161 : 159549 : mystreamer->file_bytes_sent = 0;
162 : : }
163 : : else
397 164 : 199 : mystreamer->next_context = ASTREAMER_ARCHIVE_TRAILER;
1401 165 : 159748 : break;
166 : :
397 167 : 502697 : case ASTREAMER_MEMBER_CONTENTS:
168 : :
169 : : /*
170 : : * Send as much content as we have, but not more than the
171 : : * remaining file length.
172 : : */
1401 173 [ - + ]: 502697 : Assert(mystreamer->file_bytes_sent < mystreamer->member.size);
174 : 502697 : nbytes = mystreamer->member.size - mystreamer->file_bytes_sent;
175 : 502697 : nbytes = Min(nbytes, len);
176 [ - + ]: 502697 : Assert(nbytes > 0);
397 177 : 502697 : astreamer_content(mystreamer->base.bbs_next,
178 : : &mystreamer->member,
179 : : data, nbytes,
180 : : ASTREAMER_MEMBER_CONTENTS);
1401 181 : 502697 : mystreamer->file_bytes_sent += nbytes;
182 : 502697 : data += nbytes;
183 : 502697 : len -= nbytes;
184 : :
185 : : /*
186 : : * If we've not yet sent the whole file, then there's more
187 : : * content to come; otherwise, it's time to expect the file
188 : : * trailer.
189 : : */
190 [ - + ]: 502697 : Assert(mystreamer->file_bytes_sent <= mystreamer->member.size);
191 [ + + ]: 502697 : if (mystreamer->file_bytes_sent == mystreamer->member.size)
192 : : {
193 [ + + ]: 128196 : if (mystreamer->pad_bytes_expected == 0)
194 : : {
195 : : /* Trailer is zero-length. */
397 196 : 119295 : astreamer_content(mystreamer->base.bbs_next,
197 : : &mystreamer->member,
198 : : NULL, 0,
199 : : ASTREAMER_MEMBER_TRAILER);
200 : :
201 : : /* Expect next header. */
202 : 119294 : mystreamer->next_context = ASTREAMER_MEMBER_HEADER;
203 : : }
204 : : else
205 : : {
206 : : /* Trailer is not zero-length. */
207 : 8901 : mystreamer->next_context = ASTREAMER_MEMBER_TRAILER;
208 : : }
1401 209 : 128195 : mystreamer->base.bbs_buffer.len = 0;
210 : : }
211 : 502696 : break;
212 : :
397 213 : 8901 : case ASTREAMER_MEMBER_TRAILER:
214 : :
215 : : /*
216 : : * If we're expecting an archive member trailer, accumulate
217 : : * the expected number of padding bytes before sending
218 : : * anything onward.
219 : : */
220 [ - + ]: 8901 : if (!astreamer_buffer_until(streamer, &data, &len,
221 : 8901 : mystreamer->pad_bytes_expected))
1401 rhaas@postgresql.org 222 :UBC 0 : return;
223 : :
224 : : /* OK, now we can send it. */
397 rhaas@postgresql.org 225 :CBC 8901 : astreamer_content(mystreamer->base.bbs_next,
226 : : &mystreamer->member,
227 : 8901 : data, mystreamer->pad_bytes_expected,
228 : : ASTREAMER_MEMBER_TRAILER);
229 : :
230 : : /* Expect next file header. */
231 : 8901 : mystreamer->next_context = ASTREAMER_MEMBER_HEADER;
1401 232 : 8901 : mystreamer->base.bbs_buffer.len = 0;
233 : 8901 : break;
234 : :
397 235 : 204 : case ASTREAMER_ARCHIVE_TRAILER:
236 : :
237 : : /*
238 : : * We've seen an end-of-archive indicator, so anything more is
239 : : * buffered and sent as part of the archive trailer. But we
240 : : * don't expect more than 2 blocks.
241 : : */
242 : 204 : astreamer_buffer_bytes(streamer, &data, &len, len);
1401 243 [ - + ]: 204 : if (len > 2 * TAR_BLOCK_SIZE)
1247 tgl@sss.pgh.pa.us 244 :UBC 0 : pg_fatal("tar file trailer exceeds 2 blocks");
1401 rhaas@postgresql.org 245 :CBC 204 : return;
246 : :
1401 rhaas@postgresql.org 247 :UBC 0 : default:
248 : : /* Shouldn't happen. */
1247 tgl@sss.pgh.pa.us 249 : 0 : pg_fatal("unexpected state while parsing tar archive");
250 : : }
251 : : }
252 : : }
253 : :
254 : : /*
255 : : * Parse a file header within a tar stream.
256 : : *
257 : : * The return value is true if we found a file header and passed it on to the
258 : : * next astreamer; it is false if we have reached the archive trailer.
259 : : */
260 : : static bool
397 rhaas@postgresql.org 261 :CBC 159748 : astreamer_tar_header(astreamer_tar_parser *mystreamer)
262 : : {
1401 263 : 159748 : bool has_nonzero_byte = false;
264 : : int i;
397 265 : 159748 : astreamer_member *member = &mystreamer->member;
1401 266 : 159748 : char *buffer = mystreamer->base.bbs_buffer.data;
267 : :
268 [ - + ]: 159748 : Assert(mystreamer->base.bbs_buffer.len == TAR_BLOCK_SIZE);
269 : :
270 : : /* Check whether we've got a block of all zero bytes. */
271 [ + + ]: 261636 : for (i = 0; i < TAR_BLOCK_SIZE; ++i)
272 : : {
273 [ + + ]: 261437 : if (buffer[i] != '\0')
274 : : {
275 : 159549 : has_nonzero_byte = true;
276 : 159549 : break;
277 : : }
278 : : }
279 : :
280 : : /*
281 : : * If the entire block was zeros, this is the end of the archive, not the
282 : : * start of the next file.
283 : : */
284 [ + + ]: 159748 : if (!has_nonzero_byte)
285 : 199 : return false;
286 : :
287 : : /*
288 : : * Parse key fields out of the header.
289 : : */
767 290 : 159549 : strlcpy(member->pathname, &buffer[TAR_OFFSET_NAME], MAXPGPATH);
1401 291 [ - + ]: 159549 : if (member->pathname[0] == '\0')
1247 tgl@sss.pgh.pa.us 292 :UBC 0 : pg_fatal("tar member has empty name");
767 rhaas@postgresql.org 293 :CBC 159549 : member->size = read_tar_number(&buffer[TAR_OFFSET_SIZE], 12);
294 : 159549 : member->mode = read_tar_number(&buffer[TAR_OFFSET_MODE], 8);
295 : 159549 : member->uid = read_tar_number(&buffer[TAR_OFFSET_UID], 8);
296 : 159549 : member->gid = read_tar_number(&buffer[TAR_OFFSET_GID], 8);
297 : 159549 : member->is_directory =
298 : 159549 : (buffer[TAR_OFFSET_TYPEFLAG] == TAR_FILETYPE_DIRECTORY);
299 : 159549 : member->is_link =
300 : 159549 : (buffer[TAR_OFFSET_TYPEFLAG] == TAR_FILETYPE_SYMLINK);
1401 301 [ + + ]: 159549 : if (member->is_link)
767 302 : 16 : strlcpy(member->linktarget, &buffer[TAR_OFFSET_LINKNAME], 100);
303 : :
304 : : /* Compute number of padding bytes. */
1401 305 : 159549 : mystreamer->pad_bytes_expected = tarPaddingBytesRequired(member->size);
306 : :
307 : : /* Forward the entire header to the next astreamer. */
397 308 : 159549 : astreamer_content(mystreamer->base.bbs_next, member,
309 : : buffer, TAR_BLOCK_SIZE,
310 : : ASTREAMER_MEMBER_HEADER);
311 : :
1401 312 : 159549 : return true;
313 : : }
314 : :
315 : : /*
316 : : * End-of-stream processing for a tar parser.
317 : : */
318 : : static void
397 319 : 199 : astreamer_tar_parser_finalize(astreamer *streamer)
320 : : {
321 : 199 : astreamer_tar_parser *mystreamer = (astreamer_tar_parser *) streamer;
322 : :
323 [ - + ]: 199 : if (mystreamer->next_context != ASTREAMER_ARCHIVE_TRAILER &&
397 rhaas@postgresql.org 324 [ # # ]:UBC 0 : (mystreamer->next_context != ASTREAMER_MEMBER_HEADER ||
1401 325 [ # # ]: 0 : mystreamer->base.bbs_buffer.len > 0))
1247 tgl@sss.pgh.pa.us 326 : 0 : pg_fatal("COPY stream ended before last file was finished");
327 : :
328 : : /* Send the archive trailer, even if empty. */
397 rhaas@postgresql.org 329 :CBC 199 : astreamer_content(streamer->bbs_next, NULL,
330 : 199 : streamer->bbs_buffer.data, streamer->bbs_buffer.len,
331 : : ASTREAMER_ARCHIVE_TRAILER);
332 : :
333 : : /* Now finalize successor. */
334 : 199 : astreamer_finalize(streamer->bbs_next);
1401 335 : 199 : }
336 : :
337 : : /*
338 : : * Free memory associated with a tar parser.
339 : : */
340 : : static void
397 341 : 199 : astreamer_tar_parser_free(astreamer *streamer)
342 : : {
1401 343 : 199 : pfree(streamer->bbs_buffer.data);
397 344 : 199 : astreamer_free(streamer->bbs_next);
1401 345 : 199 : }
346 : :
347 : : /*
348 : : * Create a astreamer that can generate a tar archive.
349 : : *
350 : : * This is intended to be usable either for generating a brand-new tar archive
351 : : * or for modifying one on the fly. The input should be a series of typed
352 : : * chunks (i.e. not ASTREAMER_UNKNOWN). See also the comments for
353 : : * astreamer_tar_parser_content.
354 : : */
355 : : astreamer *
397 rhaas@postgresql.org 356 :UBC 0 : astreamer_tar_archiver_new(astreamer *next)
357 : : {
358 : : astreamer_tar_archiver *streamer;
359 : :
360 : 0 : streamer = palloc0(sizeof(astreamer_tar_archiver));
361 : 0 : *((const astreamer_ops **) &streamer->base.bbs_ops) =
362 : : &astreamer_tar_archiver_ops;
1401 363 : 0 : streamer->base.bbs_next = next;
364 : :
365 : 0 : return &streamer->base;
366 : : }
367 : :
368 : : /*
369 : : * Fix up the stream of input chunks to create a valid tar file.
370 : : *
371 : : * If a ASTREAMER_MEMBER_HEADER chunk is of size 0, it is replaced with a
372 : : * newly-constructed tar header. If it is of size TAR_BLOCK_SIZE, it is
373 : : * passed through without change. Any other size is a fatal error (and
374 : : * indicates a bug).
375 : : *
376 : : * Whenever a new ASTREAMER_MEMBER_HEADER chunk is constructed, the
377 : : * corresponding ASTREAMER_MEMBER_TRAILER chunk is also constructed from
378 : : * scratch. Specifically, we construct a block of zero bytes sufficient to
379 : : * pad out to a block boundary, as required by the tar format. Other
380 : : * ASTREAMER_MEMBER_TRAILER chunks are passed through without change.
381 : : *
382 : : * Any ASTREAMER_MEMBER_CONTENTS chunks are passed through without change.
383 : : *
384 : : * The ASTREAMER_ARCHIVE_TRAILER chunk is replaced with two
385 : : * blocks of zero bytes. Not all tar programs require this, but apparently
386 : : * some do. The server does not supply this trailer. If no archive trailer is
387 : : * present, one will be added by astreamer_tar_parser_finalize.
388 : : */
389 : : static void
397 390 : 0 : astreamer_tar_archiver_content(astreamer *streamer,
391 : : astreamer_member *member,
392 : : const char *data, int len,
393 : : astreamer_archive_context context)
394 : : {
395 : 0 : astreamer_tar_archiver *mystreamer = (astreamer_tar_archiver *) streamer;
396 : : char buffer[2 * TAR_BLOCK_SIZE];
397 : :
398 [ # # ]: 0 : Assert(context != ASTREAMER_UNKNOWN);
399 : :
400 [ # # # # ]: 0 : if (context == ASTREAMER_MEMBER_HEADER && len != TAR_BLOCK_SIZE)
401 : : {
1401 402 [ # # ]: 0 : Assert(len == 0);
403 : :
404 : : /* Replace zero-length tar header with a newly constructed one. */
405 : 0 : tarCreateHeader(buffer, member->pathname, NULL,
406 : : member->size, member->mode, member->uid, member->gid,
407 : : time(NULL));
408 : 0 : data = buffer;
409 : 0 : len = TAR_BLOCK_SIZE;
410 : :
411 : : /* Also make a note to replace padding, in case size changed. */
412 : 0 : mystreamer->rearchive_member = true;
413 : : }
397 414 [ # # ]: 0 : else if (context == ASTREAMER_MEMBER_TRAILER &&
1401 415 [ # # ]: 0 : mystreamer->rearchive_member)
416 : 0 : {
417 : 0 : int pad_bytes = tarPaddingBytesRequired(member->size);
418 : :
419 : : /* Also replace padding, if we regenerated the header. */
420 : 0 : memset(buffer, 0, pad_bytes);
421 : 0 : data = buffer;
422 : 0 : len = pad_bytes;
423 : :
424 : : /* Don't do this again unless we replace another header. */
425 : 0 : mystreamer->rearchive_member = false;
426 : : }
397 427 [ # # ]: 0 : else if (context == ASTREAMER_ARCHIVE_TRAILER)
428 : : {
429 : : /* Trailer should always be two blocks of zero bytes. */
1401 430 : 0 : memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
431 : 0 : data = buffer;
432 : 0 : len = 2 * TAR_BLOCK_SIZE;
433 : : }
434 : :
397 435 : 0 : astreamer_content(streamer->bbs_next, member, data, len, context);
1401 436 : 0 : }
437 : :
438 : : /*
439 : : * End-of-stream processing for a tar archiver.
440 : : */
441 : : static void
397 442 : 0 : astreamer_tar_archiver_finalize(astreamer *streamer)
443 : : {
444 : 0 : astreamer_finalize(streamer->bbs_next);
1401 445 : 0 : }
446 : :
447 : : /*
448 : : * Free memory associated with a tar archiver.
449 : : */
450 : : static void
397 451 : 0 : astreamer_tar_archiver_free(astreamer *streamer)
452 : : {
453 : 0 : astreamer_free(streamer->bbs_next);
1401 454 : 0 : pfree(streamer);
455 : 0 : }
456 : :
457 : : /*
458 : : * Create a astreamer that blindly adds two blocks of NUL bytes to the
459 : : * end of an incomplete tarfile that the server might send us.
460 : : */
461 : : astreamer *
397 462 : 0 : astreamer_tar_terminator_new(astreamer *next)
463 : : {
464 : : astreamer *streamer;
465 : :
466 : 0 : streamer = palloc0(sizeof(astreamer));
467 : 0 : *((const astreamer_ops **) &streamer->bbs_ops) =
468 : : &astreamer_tar_terminator_ops;
1398 469 : 0 : streamer->bbs_next = next;
470 : :
471 : 0 : return streamer;
472 : : }
473 : :
474 : : /*
475 : : * Pass all the content through without change.
476 : : */
477 : : static void
397 478 : 0 : astreamer_tar_terminator_content(astreamer *streamer,
479 : : astreamer_member *member,
480 : : const char *data, int len,
481 : : astreamer_archive_context context)
482 : : {
483 : : /* Expect unparsed input. */
1398 484 [ # # ]: 0 : Assert(member == NULL);
397 485 [ # # ]: 0 : Assert(context == ASTREAMER_UNKNOWN);
486 : :
487 : : /* Just forward it. */
488 : 0 : astreamer_content(streamer->bbs_next, member, data, len, context);
1398 489 : 0 : }
490 : :
491 : : /*
492 : : * At the end, blindly add the two blocks of NUL bytes which the server fails
493 : : * to supply.
494 : : */
495 : : static void
397 496 : 0 : astreamer_tar_terminator_finalize(astreamer *streamer)
497 : : {
498 : : char buffer[2 * TAR_BLOCK_SIZE];
499 : :
1398 500 : 0 : memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
397 501 : 0 : astreamer_content(streamer->bbs_next, NULL, buffer,
502 : : 2 * TAR_BLOCK_SIZE, ASTREAMER_UNKNOWN);
503 : 0 : astreamer_finalize(streamer->bbs_next);
1398 504 : 0 : }
505 : :
506 : : /*
507 : : * Free memory associated with a tar terminator.
508 : : */
509 : : static void
397 510 : 0 : astreamer_tar_terminator_free(astreamer *streamer)
511 : : {
512 : 0 : astreamer_free(streamer->bbs_next);
1398 513 : 0 : pfree(streamer);
514 : 0 : }
|