Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * astreamer_file.c
4 : : *
5 : : * Archive streamers that write to files. astreamer_plain_writer writes
6 : : * the whole archive to a single file, and astreamer_extractor writes
7 : : * each archive member to a separate file in a given directory.
8 : : *
9 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
10 : : *
11 : : * IDENTIFICATION
12 : : * src/fe_utils/astreamer_file.c
13 : : *-------------------------------------------------------------------------
14 : : */
15 : :
16 : : #include "postgres_fe.h"
17 : :
18 : : #include <unistd.h>
19 : :
20 : : #include "common/file_perm.h"
21 : : #include "common/logging.h"
22 : : #include "fe_utils/astreamer.h"
23 : :
24 : : typedef struct astreamer_plain_writer
25 : : {
26 : : astreamer base;
27 : : char *pathname;
28 : : FILE *file;
29 : : bool should_close_file;
30 : : } astreamer_plain_writer;
31 : :
32 : : typedef struct astreamer_extractor
33 : : {
34 : : astreamer base;
35 : : char *basepath;
36 : : const char *(*link_map) (const char *);
37 : : void (*report_output_file) (const char *);
38 : : char filename[MAXPGPATH];
39 : : FILE *file;
40 : : } astreamer_extractor;
41 : :
42 : : static void astreamer_plain_writer_content(astreamer *streamer,
43 : : astreamer_member *member,
44 : : const char *data, int len,
45 : : astreamer_archive_context context);
46 : : static void astreamer_plain_writer_finalize(astreamer *streamer);
47 : : static void astreamer_plain_writer_free(astreamer *streamer);
48 : :
49 : : static const astreamer_ops astreamer_plain_writer_ops = {
50 : : .content = astreamer_plain_writer_content,
51 : : .finalize = astreamer_plain_writer_finalize,
52 : : .free = astreamer_plain_writer_free
53 : : };
54 : :
55 : : static void astreamer_extractor_content(astreamer *streamer,
56 : : astreamer_member *member,
57 : : const char *data, int len,
58 : : astreamer_archive_context context);
59 : : static void astreamer_extractor_finalize(astreamer *streamer);
60 : : static void astreamer_extractor_free(astreamer *streamer);
61 : : static void extract_directory(const char *filename, mode_t mode);
62 : : static void extract_link(const char *filename, const char *linktarget);
63 : : static FILE *create_file_for_extract(const char *filename, mode_t mode);
64 : :
65 : : static const astreamer_ops astreamer_extractor_ops = {
66 : : .content = astreamer_extractor_content,
67 : : .finalize = astreamer_extractor_finalize,
68 : : .free = astreamer_extractor_free
69 : : };
70 : :
71 : : /*
72 : : * Create a astreamer that just writes data to a file.
73 : : *
74 : : * The caller must specify a pathname and may specify a file. The pathname is
75 : : * used for error-reporting purposes either way. If file is NULL, the pathname
76 : : * also identifies the file to which the data should be written: it is opened
77 : : * for writing and closed when done. If file is not NULL, the data is written
78 : : * there.
79 : : */
80 : : astreamer *
663 rhaas@postgresql.org 81 :CBC 22 : astreamer_plain_writer_new(char *pathname, FILE *file)
82 : : {
83 : : astreamer_plain_writer *streamer;
84 : :
172 michael@paquier.xyz 85 :GNC 22 : streamer = palloc0_object(astreamer_plain_writer);
663 rhaas@postgresql.org 86 :CBC 22 : *((const astreamer_ops **) &streamer->base.bbs_ops) =
87 : : &astreamer_plain_writer_ops;
88 : :
1667 89 : 22 : streamer->pathname = pstrdup(pathname);
90 : 22 : streamer->file = file;
91 : :
92 [ + - ]: 22 : if (file == NULL)
93 : : {
94 : 22 : streamer->file = fopen(pathname, "wb");
95 [ - + ]: 22 : if (streamer->file == NULL)
1513 tgl@sss.pgh.pa.us 96 :UBC 0 : pg_fatal("could not create file \"%s\": %m", pathname);
1667 rhaas@postgresql.org 97 :CBC 22 : streamer->should_close_file = true;
98 : : }
99 : :
100 : 22 : return &streamer->base;
101 : : }
102 : :
103 : : /*
104 : : * Write archive content to file.
105 : : */
106 : : static void
663 107 : 30778 : astreamer_plain_writer_content(astreamer *streamer,
108 : : astreamer_member *member, const char *data,
109 : : int len, astreamer_archive_context context)
110 : : {
111 : : astreamer_plain_writer *mystreamer;
112 : :
113 : 30778 : mystreamer = (astreamer_plain_writer *) streamer;
114 : :
1667 115 [ - + ]: 30778 : if (len == 0)
1667 rhaas@postgresql.org 116 :UBC 0 : return;
117 : :
1667 rhaas@postgresql.org 118 :CBC 30778 : errno = 0;
119 [ - + ]: 30778 : if (fwrite(data, len, 1, mystreamer->file) != 1)
120 : : {
121 : : /* if write didn't set errno, assume problem is no disk space */
1667 rhaas@postgresql.org 122 [ # # ]:UBC 0 : if (errno == 0)
123 : 0 : errno = ENOSPC;
1513 tgl@sss.pgh.pa.us 124 : 0 : pg_fatal("could not write to file \"%s\": %m",
125 : : mystreamer->pathname);
126 : : }
127 : : }
128 : :
129 : : /*
130 : : * End-of-archive processing when writing to a plain file consists of closing
131 : : * the file if we opened it, but not if the caller provided it.
132 : : */
133 : : static void
663 rhaas@postgresql.org 134 :CBC 22 : astreamer_plain_writer_finalize(astreamer *streamer)
135 : : {
136 : : astreamer_plain_writer *mystreamer;
137 : :
138 : 22 : mystreamer = (astreamer_plain_writer *) streamer;
139 : :
1667 140 [ + - - + ]: 22 : if (mystreamer->should_close_file && fclose(mystreamer->file) != 0)
1513 tgl@sss.pgh.pa.us 141 :UBC 0 : pg_fatal("could not close file \"%s\": %m",
142 : : mystreamer->pathname);
143 : :
1667 rhaas@postgresql.org 144 :CBC 22 : mystreamer->file = NULL;
145 : 22 : mystreamer->should_close_file = false;
146 : 22 : }
147 : :
148 : : /*
149 : : * Free memory associated with this astreamer.
150 : : */
151 : : static void
663 152 : 22 : astreamer_plain_writer_free(astreamer *streamer)
153 : : {
154 : : astreamer_plain_writer *mystreamer;
155 : :
156 : 22 : mystreamer = (astreamer_plain_writer *) streamer;
157 : :
1667 158 [ - + ]: 22 : Assert(!mystreamer->should_close_file);
159 [ - + ]: 22 : Assert(mystreamer->base.bbs_next == NULL);
160 : :
161 : 22 : pfree(mystreamer->pathname);
162 : 22 : pfree(mystreamer);
163 : 22 : }
164 : :
165 : : /*
166 : : * Create a astreamer that extracts an archive.
167 : : *
168 : : * All pathnames in the archive are interpreted relative to basepath.
169 : : *
170 : : * Unlike e.g. astreamer_plain_writer_new() we can't do anything useful here
171 : : * with untyped chunks; we need typed chunks which follow the rules described
172 : : * in astreamer.h. Assuming we have that, we don't need to worry about the
173 : : * original archive format; it's enough to just look at the member information
174 : : * provided and write to the corresponding file.
175 : : *
176 : : * 'link_map' is a function that will be applied to the target of any
177 : : * symbolic link, and which should return a replacement pathname to be used
178 : : * in its place. If NULL, the symbolic link target is used without
179 : : * modification.
180 : : *
181 : : * 'report_output_file' is a function that will be called each time we open a
182 : : * new output file. The pathname to that file is passed as an argument. If
183 : : * NULL, the call is skipped.
184 : : */
185 : : astreamer *
663 186 : 181 : astreamer_extractor_new(const char *basepath,
187 : : const char *(*link_map) (const char *),
188 : : void (*report_output_file) (const char *))
189 : : {
190 : : astreamer_extractor *streamer;
191 : :
172 michael@paquier.xyz 192 :GNC 181 : streamer = palloc0_object(astreamer_extractor);
663 rhaas@postgresql.org 193 :CBC 181 : *((const astreamer_ops **) &streamer->base.bbs_ops) =
194 : : &astreamer_extractor_ops;
1667 195 : 181 : streamer->basepath = pstrdup(basepath);
196 : 181 : streamer->link_map = link_map;
197 : 181 : streamer->report_output_file = report_output_file;
198 : :
199 : 181 : return &streamer->base;
200 : : }
201 : :
202 : : /*
203 : : * Extract archive contents to the filesystem.
204 : : */
205 : : static void
663 206 : 516291 : astreamer_extractor_content(astreamer *streamer, astreamer_member *member,
207 : : const char *data, int len,
208 : : astreamer_archive_context context)
209 : : {
210 : 516291 : astreamer_extractor *mystreamer = (astreamer_extractor *) streamer;
211 : : int fnamelen;
212 : :
213 [ + + - + ]: 516291 : Assert(member != NULL || context == ASTREAMER_ARCHIVE_TRAILER);
214 [ - + ]: 516291 : Assert(context != ASTREAMER_UNKNOWN);
215 : :
1667 216 [ + + + + : 516291 : switch (context)
- ]
217 : : {
663 218 : 163450 : case ASTREAMER_MEMBER_HEADER:
1667 219 [ - + ]: 163450 : Assert(mystreamer->file == NULL);
220 : :
19 michael@paquier.xyz 221 [ - + ]: 163450 : if (!path_is_safe_for_extraction(member->pathname))
19 michael@paquier.xyz 222 :UBC 0 : pg_fatal("tar member has unsafe path name: \"%s\"",
223 : : member->pathname);
224 : :
225 : : /* Prepend basepath. */
1667 rhaas@postgresql.org 226 :CBC 163450 : snprintf(mystreamer->filename, sizeof(mystreamer->filename),
227 : 163450 : "%s/%s", mystreamer->basepath, member->pathname);
228 : :
229 : : /* Remove any trailing slash. */
230 : 163450 : fnamelen = strlen(mystreamer->filename);
231 [ + + ]: 163450 : if (mystreamer->filename[fnamelen - 1] == '/')
232 : 3996 : mystreamer->filename[fnamelen - 1] = '\0';
233 : :
234 : : /* Dispatch based on file type. */
58 tgl@sss.pgh.pa.us 235 [ + + ]: 163450 : if (member->is_regular)
236 : 159454 : mystreamer->file =
237 : 159454 : create_file_for_extract(mystreamer->filename,
238 : : member->mode);
239 [ + + ]: 3996 : else if (member->is_directory)
1667 rhaas@postgresql.org 240 : 3980 : extract_directory(mystreamer->filename, member->mode);
58 tgl@sss.pgh.pa.us 241 [ + - ]: 16 : else if (member->is_symlink)
242 : : {
1667 rhaas@postgresql.org 243 : 16 : const char *linktarget = member->linktarget;
244 : :
245 [ + - ]: 16 : if (mystreamer->link_map)
246 : 16 : linktarget = mystreamer->link_map(linktarget);
247 : :
19 michael@paquier.xyz 248 [ - + ]: 16 : if (!is_absolute_path(linktarget) &&
19 michael@paquier.xyz 249 [ # # ]:UBC 0 : !path_is_safe_for_extraction(member->linktarget))
250 : : {
251 : 0 : pg_fatal("link target has unsafe path name: \"%s\"",
252 : : member->linktarget);
253 : : }
254 : :
1667 rhaas@postgresql.org 255 :CBC 16 : extract_link(mystreamer->filename, linktarget);
256 : : }
257 : :
258 : : /* Report output file change. */
259 [ + - ]: 163450 : if (mystreamer->report_output_file)
260 : 163450 : mystreamer->report_output_file(mystreamer->filename);
261 : 163450 : break;
262 : :
663 263 : 189213 : case ASTREAMER_MEMBER_CONTENTS:
1667 264 [ - + ]: 189213 : if (mystreamer->file == NULL)
1667 rhaas@postgresql.org 265 :UBC 0 : break;
266 : :
1667 rhaas@postgresql.org 267 :CBC 189213 : errno = 0;
268 [ + + - + ]: 189213 : if (len > 0 && fwrite(data, len, 1, mystreamer->file) != 1)
269 : : {
270 : : /* if write didn't set errno, assume problem is no disk space */
1667 rhaas@postgresql.org 271 [ # # ]:UBC 0 : if (errno == 0)
272 : 0 : errno = ENOSPC;
1513 tgl@sss.pgh.pa.us 273 : 0 : pg_fatal("could not write to file \"%s\": %m",
274 : : mystreamer->filename);
275 : : }
1667 rhaas@postgresql.org 276 :CBC 189213 : break;
277 : :
663 278 : 163449 : case ASTREAMER_MEMBER_TRAILER:
1667 279 [ + + ]: 163449 : if (mystreamer->file == NULL)
280 : 3996 : break;
68 andrew@dunslane.net 281 [ - + ]: 159453 : if (fclose(mystreamer->file) != 0)
68 andrew@dunslane.net 282 :UBC 0 : pg_fatal("could not close file \"%s\": %m",
283 : : mystreamer->filename);
1667 rhaas@postgresql.org 284 :CBC 159453 : mystreamer->file = NULL;
285 : 159453 : break;
286 : :
663 287 : 179 : case ASTREAMER_ARCHIVE_TRAILER:
1667 288 : 179 : break;
289 : :
1667 rhaas@postgresql.org 290 :UBC 0 : default:
291 : : /* Shouldn't happen. */
1513 tgl@sss.pgh.pa.us 292 : 0 : pg_fatal("unexpected state while extracting archive");
293 : : }
1667 rhaas@postgresql.org 294 :CBC 516291 : }
295 : :
296 : : /*
297 : : * Should we tolerate an already-existing directory?
298 : : *
299 : : * When streaming WAL, pg_wal (or pg_xlog for pre-9.6 clusters) will have been
300 : : * created by the wal receiver process. Also, when the WAL directory location
301 : : * was specified, pg_wal (or pg_xlog) has already been created as a symbolic
302 : : * link before starting the actual backup. So just ignore creation failures
303 : : * on related directories.
304 : : *
305 : : * If in-place tablespaces are used, pg_tblspc and subdirectories may already
306 : : * exist when we get here. So tolerate that case, too.
307 : : */
308 : : static bool
1138 309 : 436 : should_allow_existing_directory(const char *pathname)
310 : : {
311 : 436 : const char *filename = last_dir_separator(pathname) + 1;
312 : :
313 [ + + ]: 436 : if (strcmp(filename, "pg_wal") == 0 ||
314 [ + - ]: 298 : strcmp(filename, "pg_xlog") == 0 ||
315 [ + + ]: 298 : strcmp(filename, "archive_status") == 0 ||
892 316 [ + + ]: 160 : strcmp(filename, "summaries") == 0 ||
1138 317 [ + + ]: 22 : strcmp(filename, "pg_tblspc") == 0)
318 : 422 : return true;
319 : :
320 [ + - ]: 14 : if (strspn(filename, "0123456789") == strlen(filename))
321 : : {
322 : 14 : const char *pg_tblspc = strstr(pathname, "/pg_tblspc/");
323 : :
324 [ + - + - ]: 14 : return pg_tblspc != NULL && pg_tblspc + 11 == filename;
325 : : }
326 : :
1138 rhaas@postgresql.org 327 :UBC 0 : return false;
328 : : }
329 : :
330 : : /*
331 : : * Create a directory.
332 : : */
333 : : static void
1667 rhaas@postgresql.org 334 :CBC 3980 : extract_directory(const char *filename, mode_t mode)
335 : : {
1138 336 [ + + ]: 3980 : if (mkdir(filename, pg_dir_create_mode) != 0 &&
337 [ + - - + ]: 436 : (errno != EEXIST || !should_allow_existing_directory(filename)))
1138 rhaas@postgresql.org 338 :UBC 0 : pg_fatal("could not create directory \"%s\": %m",
339 : : filename);
340 : :
341 : : #ifndef WIN32
1667 rhaas@postgresql.org 342 [ - + ]:CBC 3980 : if (chmod(filename, mode))
1513 tgl@sss.pgh.pa.us 343 :UBC 0 : pg_fatal("could not set permissions on directory \"%s\": %m",
344 : : filename);
345 : : #endif
1667 rhaas@postgresql.org 346 :CBC 3980 : }
347 : :
348 : : /*
349 : : * Create a symbolic link.
350 : : *
351 : : * It's most likely a link in pg_tblspc directory, to the location of a
352 : : * tablespace. Apply any tablespace mapping given on the command line
353 : : * (--tablespace-mapping). (We blindly apply the mapping without checking that
354 : : * the link really is inside pg_tblspc. We don't expect there to be other
355 : : * symlinks in a data directory, but if there are, you can call it an
356 : : * undocumented feature that you can map them too.)
357 : : */
358 : : static void
359 : 16 : extract_link(const char *filename, const char *linktarget)
360 : : {
361 [ - + ]: 16 : if (symlink(linktarget, filename) != 0)
1513 tgl@sss.pgh.pa.us 362 :UBC 0 : pg_fatal("could not create symbolic link from \"%s\" to \"%s\": %m",
363 : : filename, linktarget);
1667 rhaas@postgresql.org 364 :CBC 16 : }
365 : :
366 : : /*
367 : : * Create a regular file.
368 : : *
369 : : * Return the resulting handle so we can write the content to the file.
370 : : */
371 : : static FILE *
372 : 159454 : create_file_for_extract(const char *filename, mode_t mode)
373 : : {
374 : : FILE *file;
375 : :
376 : 159454 : file = fopen(filename, "wb");
377 [ - + ]: 159454 : if (file == NULL)
1513 tgl@sss.pgh.pa.us 378 :UBC 0 : pg_fatal("could not create file \"%s\": %m", filename);
379 : :
380 : : #ifndef WIN32
1667 rhaas@postgresql.org 381 [ - + ]:CBC 159454 : if (chmod(filename, mode))
1513 tgl@sss.pgh.pa.us 382 :UBC 0 : pg_fatal("could not set permissions on file \"%s\": %m",
383 : : filename);
384 : : #endif
385 : :
1667 rhaas@postgresql.org 386 :CBC 159454 : return file;
387 : : }
388 : :
389 : : /*
390 : : * End-of-stream processing for extracting an archive.
391 : : *
392 : : * There's nothing to do here but sanity checking.
393 : : */
394 : : static void
663 395 : 179 : astreamer_extractor_finalize(astreamer *streamer)
396 : : {
397 : 179 : astreamer_extractor *mystreamer PG_USED_FOR_ASSERTS_ONLY
398 : : = (astreamer_extractor *) streamer;
399 : :
1667 400 [ - + ]: 179 : Assert(mystreamer->file == NULL);
401 : 179 : }
402 : :
403 : : /*
404 : : * Free memory.
405 : : */
406 : : static void
663 407 : 179 : astreamer_extractor_free(astreamer *streamer)
408 : : {
409 : 179 : astreamer_extractor *mystreamer = (astreamer_extractor *) streamer;
410 : :
1667 411 : 179 : pfree(mystreamer->basepath);
412 : 179 : pfree(mystreamer);
413 : 179 : }
|