Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * astreamer.h
4 : : *
5 : : * The "archive streamer" interface is intended to allow frontend code
6 : : * to stream from possibly-compressed archive files from any source and
7 : : * perform arbitrary actions based on the contents of those archives.
8 : : * Archive streamers are intended to be composable, and most tasks will
9 : : * require two or more archive streamers to complete. For instance,
10 : : * if the input is an uncompressed tar stream, a tar parser astreamer
11 : : * could be used to interpret it, and then an extractor astreamer could
12 : : * be used to write each archive member out to a file.
13 : : *
14 : : * In general, each archive streamer is relatively free to take whatever
15 : : * action it desires in the stream of chunks provided by the caller. It
16 : : * may do something simple, like write the archive to a file, perhaps after
17 : : * compressing it, but it can also do more complicated things, like
18 : : * annotating the byte stream to indicate which parts of the data
19 : : * correspond to tar headers or trailing padding, vs. which parts are
20 : : * payload data. A subsequent astreamer may use this information to
21 : : * make further decisions about how to process the data; for example,
22 : : * it might choose to modify the archive contents.
23 : : *
24 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
25 : : *
26 : : * IDENTIFICATION
27 : : * src/include/fe_utils/astreamer.h
28 : : *-------------------------------------------------------------------------
29 : : */
30 : :
31 : : #ifndef ASTREAMER_H
32 : : #define ASTREAMER_H
33 : :
34 : : #include "common/compression.h"
35 : : #include "lib/stringinfo.h"
36 : : #include "pqexpbuffer.h"
37 : :
38 : : struct astreamer;
39 : : struct astreamer_ops;
40 : : typedef struct astreamer astreamer;
41 : : typedef struct astreamer_ops astreamer_ops;
42 : :
43 : : /*
44 : : * Each chunk of archive data passed to a astreamer is classified into one
45 : : * of these categories. When data is initially passed to an archive streamer,
46 : : * each chunk will be categorized as ASTREAMER_UNKNOWN, and the chunks can
47 : : * be of whatever size the caller finds convenient.
48 : : *
49 : : * If the archive is parsed (e.g. see astreamer_tar_parser_new()), then all
50 : : * chunks should be labelled as one of the other types listed here. In
51 : : * addition, there should be exactly one ASTREAMER_MEMBER_HEADER chunk and
52 : : * exactly one ASTREAMER_MEMBER_TRAILER chunk per archive member, even if
53 : : * that means a zero-length call. There can be any number of
54 : : * ASTREAMER_MEMBER_CONTENTS chunks in between those calls. There
55 : : * should exactly ASTREAMER_ARCHIVE_TRAILER chunk, and it should follow the
56 : : * last ASTREAMER_MEMBER_TRAILER chunk.
57 : : *
58 : : * In theory, we could need other classifications here, such as a way of
59 : : * indicating an archive header, but the "tar" format doesn't need anything
60 : : * else, so for the time being there's no point.
61 : : */
62 : : typedef enum
63 : : {
64 : : ASTREAMER_UNKNOWN,
65 : : ASTREAMER_MEMBER_HEADER,
66 : : ASTREAMER_MEMBER_CONTENTS,
67 : : ASTREAMER_MEMBER_TRAILER,
68 : : ASTREAMER_ARCHIVE_TRAILER,
69 : : } astreamer_archive_context;
70 : :
71 : : /*
72 : : * Each chunk of data that is classified as ASTREAMER_MEMBER_HEADER,
73 : : * ASTREAMER_MEMBER_CONTENTS, or ASTREAMER_MEMBER_TRAILER should also
74 : : * pass a pointer to an instance of this struct. The details are expected
75 : : * to be present in the archive header and used to fill the struct, after
76 : : * which all subsequent calls for the same archive member are expected to
77 : : * pass the same details.
78 : : */
79 : : typedef struct
80 : : {
81 : : char pathname[MAXPGPATH];
82 : : pgoff_t size;
83 : : mode_t mode;
84 : : uid_t uid;
85 : : gid_t gid;
86 : : /* note: special filetypes will set none of these flags */
87 : : bool is_regular;
88 : : bool is_directory;
89 : : bool is_symlink;
90 : : char linktarget[MAXPGPATH];
91 : : } astreamer_member;
92 : :
93 : : /*
94 : : * Generally, each type of astreamer will define its own struct, but the
95 : : * first element should be 'astreamer base'. A astreamer that does not
96 : : * require any additional private data could use this structure directly.
97 : : *
98 : : * bbs_ops is a pointer to the astreamer_ops object which contains the
99 : : * function pointers appropriate to this type of astreamer.
100 : : *
101 : : * bbs_next is a pointer to the successor astreamer, for those types of
102 : : * astreamer which forward data to a successor. It need not be used and
103 : : * should be set to NULL when not relevant.
104 : : *
105 : : * bbs_buffer is a buffer for accumulating data for temporary storage. Each
106 : : * type of astreamer makes its own decisions about whether and how to use
107 : : * this buffer.
108 : : */
109 : : struct astreamer
110 : : {
111 : : const astreamer_ops *bbs_ops;
112 : : astreamer *bbs_next;
113 : : StringInfoData bbs_buffer;
114 : : };
115 : :
116 : : /*
117 : : * There are three callbacks for a astreamer. The 'content' callback is
118 : : * called repeatedly, as described in the astreamer_archive_context comments.
119 : : * Then, the 'finalize' callback is called once at the end, to give the
120 : : * astreamer a chance to perform cleanup such as closing files. Finally,
121 : : * because this code is running in a frontend environment where, as of this
122 : : * writing, there are no memory contexts, the 'free' callback is called to
123 : : * release memory. These callbacks should always be invoked using the static
124 : : * inline functions defined below.
125 : : */
126 : : struct astreamer_ops
127 : : {
128 : : void (*content) (astreamer *streamer, astreamer_member *member,
129 : : const char *data, int len,
130 : : astreamer_archive_context context);
131 : : void (*finalize) (astreamer *streamer);
132 : : void (*free) (astreamer *streamer);
133 : : };
134 : :
135 : : /* Send some content to a astreamer. */
136 : : static inline void
638 rhaas@postgresql.org 137 :CBC 1076649 : astreamer_content(astreamer *streamer, astreamer_member *member,
138 : : const char *data, int len,
139 : : astreamer_archive_context context)
140 : : {
141 [ - + ]: 1076649 : Assert(streamer != NULL);
142 : 1076649 : streamer->bbs_ops->content(streamer, member, data, len, context);
143 : 1076647 : }
144 : :
145 : : /* Finalize a astreamer. */
146 : : static inline void
147 : 487 : astreamer_finalize(astreamer *streamer)
148 : : {
149 [ - + ]: 487 : Assert(streamer != NULL);
150 : 487 : streamer->bbs_ops->finalize(streamer);
151 : 487 : }
152 : :
153 : : /* Free a astreamer. */
154 : : static inline void
155 : 617 : astreamer_free(astreamer *streamer)
156 : : {
157 [ - + ]: 617 : Assert(streamer != NULL);
158 : 617 : streamer->bbs_ops->free(streamer);
159 : 617 : }
160 : :
161 : : /*
162 : : * This is a convenience method for use when implementing a astreamer; it is
163 : : * not for use by outside callers. It adds the amount of data specified by
164 : : * 'nbytes' to the astreamer's buffer and adjusts '*len' and '*data'
165 : : * accordingly.
166 : : */
167 : : static inline void
168 : 214960 : astreamer_buffer_bytes(astreamer *streamer, const char **data, int *len,
169 : : int nbytes)
170 : : {
171 [ - + ]: 214960 : Assert(nbytes <= *len);
172 : :
173 : 214960 : appendBinaryStringInfo(&streamer->bbs_buffer, *data, nbytes);
174 : 214960 : *len -= nbytes;
175 : 214960 : *data += nbytes;
176 : 214960 : }
177 : :
178 : : /*
179 : : * This is a convenience method for use when implementing a astreamer; it is
180 : : * not for use by outsider callers. It attempts to add enough data to the
181 : : * astreamer's buffer to reach a length of target_bytes and adjusts '*len'
182 : : * and '*data' accordingly. It returns true if the target length has been
183 : : * reached and false otherwise.
184 : : */
185 : : static inline bool
186 : 214744 : astreamer_buffer_until(astreamer *streamer, const char **data, int *len,
187 : : int target_bytes)
188 : : {
189 : 214744 : int buflen = streamer->bbs_buffer.len;
190 : :
191 [ - + ]: 214744 : if (buflen >= target_bytes)
192 : : {
193 : : /* Target length already reached; nothing to do. */
638 rhaas@postgresql.org 194 :UBC 0 : return true;
195 : : }
196 : :
638 rhaas@postgresql.org 197 [ - + ]:CBC 214744 : if (buflen + *len < target_bytes)
198 : : {
199 : : /* Not enough data to reach target length; buffer all of it. */
638 rhaas@postgresql.org 200 :UBC 0 : astreamer_buffer_bytes(streamer, data, len, *len);
201 : 0 : return false;
202 : : }
203 : :
204 : : /* Buffer just enough to reach the target length. */
638 rhaas@postgresql.org 205 :CBC 214744 : astreamer_buffer_bytes(streamer, data, len, target_bytes - buflen);
206 : 214744 : return true;
207 : : }
208 : :
209 : : /*
210 : : * Functions for creating astreamer objects of various types. See the header
211 : : * comments for each of these functions for details.
212 : : */
213 : : extern astreamer *astreamer_plain_writer_new(char *pathname, FILE *file);
214 : : extern astreamer *astreamer_gzip_writer_new(char *pathname, FILE *file,
215 : : pg_compress_specification *compress);
216 : : extern astreamer *astreamer_extractor_new(const char *basepath,
217 : : const char *(*link_map) (const char *),
218 : : void (*report_output_file) (const char *));
219 : :
220 : : extern astreamer *astreamer_gzip_decompressor_new(astreamer *next);
221 : : extern astreamer *astreamer_lz4_compressor_new(astreamer *next,
222 : : pg_compress_specification *compress);
223 : : extern astreamer *astreamer_lz4_decompressor_new(astreamer *next);
224 : : extern astreamer *astreamer_zstd_compressor_new(astreamer *next,
225 : : pg_compress_specification *compress);
226 : : extern astreamer *astreamer_zstd_decompressor_new(astreamer *next);
227 : : extern astreamer *astreamer_tar_parser_new(astreamer *next);
228 : : extern astreamer *astreamer_tar_terminator_new(astreamer *next);
229 : : extern astreamer *astreamer_tar_archiver_new(astreamer *next);
230 : :
231 : : #endif
|