Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * compress_io.c
4 : : * Routines for archivers to write an uncompressed or compressed data
5 : : * stream.
6 : : *
7 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : : * Portions Copyright (c) 1994, Regents of the University of California
9 : : *
10 : : * This file includes two APIs for dealing with compressed data. The first
11 : : * provides more flexibility, using callbacks to read/write data from the
12 : : * underlying stream. The second API is a wrapper around fopen and
13 : : * friends, providing an interface similar to those, but abstracts away
14 : : * the possible compression. The second API is aimed for the resulting
15 : : * files to be easily manipulated with an external compression utility
16 : : * program.
17 : : *
18 : : * Compressor API
19 : : * --------------
20 : : *
21 : : * The interface for writing to an archive consists of three functions:
22 : : * AllocateCompressor, writeData, and EndCompressor. First you call
23 : : * AllocateCompressor, then write all the data by calling writeData as many
24 : : * times as needed, and finally EndCompressor. writeData will call the
25 : : * WriteFunc that was provided to AllocateCompressor for each chunk of
26 : : * compressed data.
27 : : *
28 : : * The interface for reading an archive consists of the same three functions:
29 : : * AllocateCompressor, readData, and EndCompressor. First you call
30 : : * AllocateCompressor, then read all the data by calling readData to read the
31 : : * whole compressed stream which repeatedly calls the given ReadFunc. ReadFunc
32 : : * returns the compressed data one chunk at a time. Then readData decompresses
33 : : * it and passes the decompressed data to ahwrite(), until ReadFunc returns 0
34 : : * to signal EOF. The interface is the same for compressed and uncompressed
35 : : * streams.
36 : : *
37 : : * Compressed stream API
38 : : * ----------------------
39 : : *
40 : : * The compressed stream API is providing a set of function pointers for
41 : : * opening, reading, writing, and finally closing files. The implemented
42 : : * function pointers are documented in the corresponding header file and are
43 : : * common for all streams. It allows the caller to use the same functions for
44 : : * both compressed and uncompressed streams.
45 : : *
46 : : * The interface consists of three functions, InitCompressFileHandle,
47 : : * InitDiscoverCompressFileHandle, and EndCompressFileHandle. If the
48 : : * compression is known, then start by calling InitCompressFileHandle,
49 : : * otherwise discover it by using InitDiscoverCompressFileHandle. Then call
50 : : * the function pointers as required for the read/write operations. Finally
51 : : * call EndCompressFileHandle to end the stream.
52 : : *
53 : : * InitDiscoverCompressFileHandle tries to infer the compression by the
54 : : * filename suffix. If the suffix is not yet known then it tries to simply
55 : : * open the file and if it fails, it tries to open the same file with
56 : : * compressed suffixes (.gz, .lz4 and .zst, in this order).
57 : : *
58 : : * IDENTIFICATION
59 : : * src/bin/pg_dump/compress_io.c
60 : : *
61 : : *-------------------------------------------------------------------------
62 : : */
63 : : #include "postgres_fe.h"
64 : :
65 : : #include <sys/stat.h>
66 : : #include <unistd.h>
67 : :
68 : : #include "compress_gzip.h"
69 : : #include "compress_io.h"
70 : : #include "compress_lz4.h"
71 : : #include "compress_none.h"
72 : : #include "compress_zstd.h"
73 : :
74 : : /*----------------------
75 : : * Generic functions
76 : : *----------------------
77 : : */
78 : :
79 : : /*
80 : : * Checks whether support for a compression algorithm is implemented in
81 : : * pg_dump/restore.
82 : : *
83 : : * On success returns NULL, otherwise returns a malloc'ed string which can be
84 : : * used by the caller in an error message.
85 : : */
86 : : char *
926 tomas.vondra@postgre 87 :CBC 395 : supports_compression(const pg_compress_specification compression_spec)
88 : : {
841 tgl@sss.pgh.pa.us 89 : 395 : const pg_compress_algorithm algorithm = compression_spec.algorithm;
90 : 395 : bool supported = false;
91 : :
926 tomas.vondra@postgre 92 [ + + ]: 395 : if (algorithm == PG_COMPRESSION_NONE)
93 : 229 : supported = true;
94 : : #ifdef HAVE_LIBZ
95 [ + + ]: 395 : if (algorithm == PG_COMPRESSION_GZIP)
96 : 148 : supported = true;
97 : : #endif
98 : : #ifdef USE_LZ4
99 [ + + ]: 395 : if (algorithm == PG_COMPRESSION_LZ4)
100 : 9 : supported = true;
101 : : #endif
102 : : #ifdef USE_ZSTD
885 103 [ + + ]: 395 : if (algorithm == PG_COMPRESSION_ZSTD)
104 : 9 : supported = true;
105 : : #endif
106 : :
926 107 [ - + ]: 395 : if (!supported)
852 peter@eisentraut.org 108 :UBC 0 : return psprintf(_("this build does not support compression with %s"),
109 : : get_compress_algorithm_name(algorithm));
110 : :
926 tomas.vondra@postgre 111 :CBC 395 : return NULL;
112 : : }
113 : :
114 : : /*----------------------
115 : : * Compressor API
116 : : *----------------------
117 : : */
118 : :
119 : : /*
120 : : * Allocate a new compressor.
121 : : */
122 : : CompressorState *
1009 michael@paquier.xyz 123 : 471 : AllocateCompressor(const pg_compress_specification compression_spec,
124 : : ReadFunc readF, WriteFunc writeF)
125 : : {
126 : : CompressorState *cs;
127 : :
4722 tgl@sss.pgh.pa.us 128 : 471 : cs = (CompressorState *) pg_malloc0(sizeof(CompressorState));
926 tomas.vondra@postgre 129 : 471 : cs->readF = readF;
5392 heikki.linnakangas@i 130 : 471 : cs->writeF = writeF;
131 : :
1009 michael@paquier.xyz 132 [ - + ]: 471 : if (compression_spec.algorithm == PG_COMPRESSION_NONE)
926 tomas.vondra@postgre 133 :UBC 0 : InitCompressorNone(cs, compression_spec);
926 tomas.vondra@postgre 134 [ + + ]:CBC 471 : else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
135 : 311 : InitCompressorGzip(cs, compression_spec);
136 [ + + ]: 160 : else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
137 : 80 : InitCompressorLZ4(cs, compression_spec);
885 138 [ + - ]: 80 : else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
139 : 80 : InitCompressorZstd(cs, compression_spec);
140 : :
926 141 : 471 : return cs;
142 : : }
143 : :
144 : : /*
145 : : * Terminate compression library context and flush its buffers.
146 : : */
147 : : void
5392 heikki.linnakangas@i 148 : 471 : EndCompressor(ArchiveHandle *AH, CompressorState *cs)
149 : : {
926 tomas.vondra@postgre 150 : 471 : cs->end(AH, cs);
151 : 471 : pg_free(cs);
5392 heikki.linnakangas@i 152 : 471 : }
153 : :
154 : : /*----------------------
155 : : * Compressed stream API
156 : : *----------------------
157 : : */
158 : :
159 : : /*
160 : : * Private routines
161 : : */
162 : : static int
926 tomas.vondra@postgre 163 : 747 : hasSuffix(const char *filename, const char *suffix)
164 : : {
165 : 747 : int filenamelen = strlen(filename);
166 : 747 : int suffixlen = strlen(suffix);
167 : :
168 [ - + ]: 747 : if (filenamelen < suffixlen)
926 tomas.vondra@postgre 169 :UBC 0 : return 0;
170 : :
926 tomas.vondra@postgre 171 :CBC 747 : return memcmp(&filename[filenamelen - suffixlen],
172 : : suffix,
173 : 747 : suffixlen) == 0;
174 : : }
175 : :
176 : : /* free() without changing errno; useful in several places below */
177 : : static void
3853 tgl@sss.pgh.pa.us 178 : 1483 : free_keep_errno(void *p)
179 : : {
180 : 1483 : int save_errno = errno;
181 : :
182 : 1483 : free(p);
183 : 1483 : errno = save_errno;
184 : 1483 : }
185 : :
186 : : /*
187 : : * Public interface
188 : : */
189 : :
190 : : /*
191 : : * Initialize a compress file handle for the specified compression algorithm.
192 : : */
193 : : CompressFileHandle *
926 tomas.vondra@postgre 194 : 901 : InitCompressFileHandle(const pg_compress_specification compression_spec)
195 : : {
196 : : CompressFileHandle *CFH;
197 : :
198 : 901 : CFH = pg_malloc0(sizeof(CompressFileHandle));
199 : :
200 [ + + ]: 901 : if (compression_spec.algorithm == PG_COMPRESSION_NONE)
201 : 443 : InitCompressFileHandleNone(CFH, compression_spec);
202 [ + + ]: 458 : else if (compression_spec.algorithm == PG_COMPRESSION_GZIP)
203 : 294 : InitCompressFileHandleGzip(CFH, compression_spec);
204 [ + + ]: 164 : else if (compression_spec.algorithm == PG_COMPRESSION_LZ4)
205 : 81 : InitCompressFileHandleLZ4(CFH, compression_spec);
885 206 [ + - ]: 83 : else if (compression_spec.algorithm == PG_COMPRESSION_ZSTD)
207 : 83 : InitCompressFileHandleZstd(CFH, compression_spec);
208 : :
926 209 : 901 : return CFH;
210 : : }
211 : :
212 : : /*
213 : : * Checks if a compressed file (with the specified extension) exists.
214 : : *
215 : : * The filename of the tested file is stored to fname buffer (the existing
216 : : * buffer is freed, new buffer is allocated and returned through the pointer).
217 : : */
218 : : static bool
885 219 : 354 : check_compressed_file(const char *path, char **fname, char *ext)
220 : : {
221 : 354 : free_keep_errno(*fname);
222 : 354 : *fname = psprintf("%s.%s", path, ext);
223 : 354 : return (access(*fname, F_OK) == 0);
224 : : }
225 : :
226 : : /*
227 : : * Open a file for reading. 'path' is the file to open, and 'mode' should
228 : : * be either "r" or "rb".
229 : : *
230 : : * If the file at 'path' contains the suffix of a supported compression method,
231 : : * currently this includes ".gz", ".lz4" and ".zst", then this compression will be used
232 : : * throughout. Otherwise the compression will be inferred by iteratively trying
233 : : * to open the file at 'path', first as is, then by appending known compression
234 : : * suffixes. So if you pass "foo" as 'path', this will open either "foo" or
235 : : * "foo.{gz,lz4,zst}", trying in that order.
236 : : *
237 : : * On failure, return NULL with an error code in errno.
238 : : */
239 : : CompressFileHandle *
926 240 : 249 : InitDiscoverCompressFileHandle(const char *path, const char *mode)
241 : : {
242 : 249 : CompressFileHandle *CFH = NULL;
243 : : struct stat st;
244 : : char *fname;
245 : 249 : pg_compress_specification compression_spec = {0};
246 : :
247 : 249 : compression_spec.algorithm = PG_COMPRESSION_NONE;
248 : :
249 [ - + ]: 249 : Assert(strcmp(mode, PG_BINARY_R) == 0);
250 : :
626 dgustafsson@postgres 251 : 249 : fname = pg_strdup(path);
252 : :
926 tomas.vondra@postgre 253 [ - + ]: 249 : if (hasSuffix(fname, ".gz"))
926 tomas.vondra@postgre 254 :UBC 0 : compression_spec.algorithm = PG_COMPRESSION_GZIP;
885 tomas.vondra@postgre 255 [ - + ]:CBC 249 : else if (hasSuffix(fname, ".lz4"))
885 tomas.vondra@postgre 256 :UBC 0 : compression_spec.algorithm = PG_COMPRESSION_LZ4;
885 tomas.vondra@postgre 257 [ - + ]:CBC 249 : else if (hasSuffix(fname, ".zst"))
885 tomas.vondra@postgre 258 :UBC 0 : compression_spec.algorithm = PG_COMPRESSION_ZSTD;
259 : : else
260 : : {
885 tomas.vondra@postgre 261 [ + + ]:CBC 249 : if (stat(path, &st) == 0)
926 262 : 19 : compression_spec.algorithm = PG_COMPRESSION_NONE;
885 263 [ + + ]: 230 : else if (check_compressed_file(path, &fname, "gz"))
264 : 148 : compression_spec.algorithm = PG_COMPRESSION_GZIP;
265 [ + + ]: 82 : else if (check_compressed_file(path, &fname, "lz4"))
266 : 40 : compression_spec.algorithm = PG_COMPRESSION_LZ4;
267 [ + - ]: 42 : else if (check_compressed_file(path, &fname, "zst"))
268 : 42 : compression_spec.algorithm = PG_COMPRESSION_ZSTD;
269 : : }
270 : :
926 271 : 249 : CFH = InitCompressFileHandle(compression_spec);
8 dgustafsson@postgres 272 : 249 : errno = 0;
898 tomas.vondra@postgre 273 [ - + ]: 249 : if (!CFH->open_func(fname, -1, mode, CFH))
274 : : {
926 tomas.vondra@postgre 275 :UBC 0 : free_keep_errno(CFH);
276 : 0 : CFH = NULL;
277 : : }
926 tomas.vondra@postgre 278 :CBC 249 : free_keep_errno(fname);
279 : :
280 : 249 : return CFH;
281 : : }
282 : :
283 : : /*
284 : : * Close an open file handle and release its memory.
285 : : *
286 : : * On failure, returns false and sets errno appropriately.
287 : : */
288 : : bool
289 : 880 : EndCompressFileHandle(CompressFileHandle *CFH)
290 : : {
898 291 : 880 : bool ret = false;
292 : :
8 dgustafsson@postgres 293 : 880 : errno = 0;
926 tomas.vondra@postgre 294 [ + - ]: 880 : if (CFH->private_data)
295 : 880 : ret = CFH->close_func(CFH);
296 : :
297 : 880 : free_keep_errno(CFH);
298 : :
299 : 880 : return ret;
300 : : }
|