Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * xlogreader.h
4 : : * Definitions for the generic XLog reading facility
5 : : *
6 : : * Portions Copyright (c) 2013-2025, PostgreSQL Global Development Group
7 : : *
8 : : * IDENTIFICATION
9 : : * src/include/access/xlogreader.h
10 : : *
11 : : * NOTES
12 : : * See the definition of the XLogReaderState struct for instructions on
13 : : * how to use the XLogReader infrastructure.
14 : : *
15 : : * The basic idea is to allocate an XLogReaderState via
16 : : * XLogReaderAllocate(), position the reader to the first record with
17 : : * XLogBeginRead() or XLogFindNextRecord(), and call XLogReadRecord()
18 : : * until it returns NULL.
19 : : *
20 : : * Callers supply a page_read callback if they want to call
21 : : * XLogReadRecord or XLogFindNextRecord; it can be passed in as NULL
22 : : * otherwise. The WALRead function can be used as a helper to write
23 : : * page_read callbacks, but it is not mandatory; callers that use it,
24 : : * must supply segment_open callbacks. The segment_close callback
25 : : * must always be supplied.
26 : : *
27 : : * After reading a record with XLogReadRecord(), it's decomposed into
28 : : * the per-block and main data parts, and the parts can be accessed
29 : : * with the XLogRec* macros and functions. You can also decode a
30 : : * record that's already constructed in memory, without reading from
31 : : * disk, by calling the DecodeXLogRecord() function.
32 : : *-------------------------------------------------------------------------
33 : : */
34 : : #ifndef XLOGREADER_H
35 : : #define XLOGREADER_H
36 : :
37 : : #ifndef FRONTEND
38 : : #include "access/transam.h"
39 : : #endif
40 : :
41 : : #include "access/xlogrecord.h"
42 : : #include "storage/buf.h"
43 : :
44 : : /* WALOpenSegment represents a WAL segment being read. */
45 : : typedef struct WALOpenSegment
46 : : {
47 : : int ws_file; /* segment file descriptor */
48 : : XLogSegNo ws_segno; /* segment number */
49 : : TimeLineID ws_tli; /* timeline ID of the currently open file */
50 : : } WALOpenSegment;
51 : :
52 : : /* WALSegmentContext carries context information about WAL segments to read */
53 : : typedef struct WALSegmentContext
54 : : {
55 : : char ws_dir[MAXPGPATH];
56 : : int ws_segsize;
57 : : } WALSegmentContext;
58 : :
59 : : typedef struct XLogReaderState XLogReaderState;
60 : :
61 : : /* Function type definitions for various xlogreader interactions */
62 : : typedef int (*XLogPageReadCB) (XLogReaderState *xlogreader,
63 : : XLogRecPtr targetPagePtr,
64 : : int reqLen,
65 : : XLogRecPtr targetRecPtr,
66 : : char *readBuf);
67 : : typedef void (*WALSegmentOpenCB) (XLogReaderState *xlogreader,
68 : : XLogSegNo nextSegNo,
69 : : TimeLineID *tli_p);
70 : : typedef void (*WALSegmentCloseCB) (XLogReaderState *xlogreader);
71 : :
72 : : typedef struct XLogReaderRoutine
73 : : {
74 : : /*
75 : : * Data input callback
76 : : *
77 : : * This callback shall read at least reqLen valid bytes of the xlog page
78 : : * starting at targetPagePtr, and store them in readBuf. The callback
79 : : * shall return the number of bytes read (never more than XLOG_BLCKSZ), or
80 : : * -1 on failure. The callback shall sleep, if necessary, to wait for the
81 : : * requested bytes to become available. The callback will not be invoked
82 : : * again for the same page unless more than the returned number of bytes
83 : : * are needed.
84 : : *
85 : : * targetRecPtr is the position of the WAL record we're reading. Usually
86 : : * it is equal to targetPagePtr + reqLen, but sometimes xlogreader needs
87 : : * to read and verify the page or segment header, before it reads the
88 : : * actual WAL record it's interested in. In that case, targetRecPtr can
89 : : * be used to determine which timeline to read the page from.
90 : : *
91 : : * The callback shall set ->seg.ws_tli to the TLI of the file the page was
92 : : * read from.
93 : : */
94 : : XLogPageReadCB page_read;
95 : :
96 : : /*
97 : : * Callback to open the specified WAL segment for reading. ->seg.ws_file
98 : : * shall be set to the file descriptor of the opened segment. In case of
99 : : * failure, an error shall be raised by the callback and it shall not
100 : : * return.
101 : : *
102 : : * "nextSegNo" is the number of the segment to be opened.
103 : : *
104 : : * "tli_p" is an input/output argument. WALRead() uses it to pass the
105 : : * timeline in which the new segment should be found, but the callback can
106 : : * use it to return the TLI that it actually opened.
107 : : */
108 : : WALSegmentOpenCB segment_open;
109 : :
110 : : /*
111 : : * WAL segment close callback. ->seg.ws_file shall be set to a negative
112 : : * number.
113 : : */
114 : : WALSegmentCloseCB segment_close;
115 : : } XLogReaderRoutine;
116 : :
117 : : #define XL_ROUTINE(...) &(XLogReaderRoutine){__VA_ARGS__}
118 : :
119 : : typedef struct
120 : : {
121 : : /* Is this block ref in use? */
122 : : bool in_use;
123 : :
124 : : /* Identify the block this refers to */
125 : : RelFileLocator rlocator;
126 : : ForkNumber forknum;
127 : : BlockNumber blkno;
128 : :
129 : : /* Prefetching workspace. */
130 : : Buffer prefetch_buffer;
131 : :
132 : : /* copy of the fork_flags field from the XLogRecordBlockHeader */
133 : : uint8 flags;
134 : :
135 : : /* Information on full-page image, if any */
136 : : bool has_image; /* has image, even for consistency checking */
137 : : bool apply_image; /* has image that should be restored */
138 : : char *bkp_image;
139 : : uint16 hole_offset;
140 : : uint16 hole_length;
141 : : uint16 bimg_len;
142 : : uint8 bimg_info;
143 : :
144 : : /* Buffer holding the rmgr-specific data associated with this block */
145 : : bool has_data;
146 : : char *data;
147 : : uint16 data_len;
148 : : } DecodedBkpBlock;
149 : :
150 : : /*
151 : : * The decoded contents of a record. This occupies a contiguous region of
152 : : * memory, with main_data and blocks[n].data pointing to memory after the
153 : : * members declared here.
154 : : */
155 : : typedef struct DecodedXLogRecord
156 : : {
157 : : /* Private member used for resource management. */
158 : : size_t size; /* total size of decoded record */
159 : : bool oversized; /* outside the regular decode buffer? */
160 : : struct DecodedXLogRecord *next; /* decoded record queue link */
161 : :
162 : : /* Public members. */
163 : : XLogRecPtr lsn; /* location */
164 : : XLogRecPtr next_lsn; /* location of next record */
165 : : XLogRecord header; /* header */
166 : : RepOriginId record_origin;
167 : : TransactionId toplevel_xid; /* XID of top-level transaction */
168 : : char *main_data; /* record's main data portion */
169 : : uint32 main_data_len; /* main data portion's length */
170 : : int max_block_id; /* highest block_id in use (-1 if none) */
171 : : DecodedBkpBlock blocks[FLEXIBLE_ARRAY_MEMBER];
172 : : } DecodedXLogRecord;
173 : :
174 : : struct XLogReaderState
175 : : {
176 : : /*
177 : : * Operational callbacks
178 : : */
179 : : XLogReaderRoutine routine;
180 : :
181 : : /* ----------------------------------------
182 : : * Public parameters
183 : : * ----------------------------------------
184 : : */
185 : :
186 : : /*
187 : : * System identifier of the xlog files we're about to read. Set to zero
188 : : * (the default value) if unknown or unimportant.
189 : : */
190 : : uint64 system_identifier;
191 : :
192 : : /*
193 : : * Opaque data for callbacks to use. Not used by XLogReader.
194 : : */
195 : : void *private_data;
196 : :
197 : : /*
198 : : * Start and end point of last record read. EndRecPtr is also used as the
199 : : * position to read next. Calling XLogBeginRead() sets EndRecPtr to the
200 : : * starting position and ReadRecPtr to invalid.
201 : : *
202 : : * Start and end point of last record returned by XLogReadRecord(). These
203 : : * are also available as record->lsn and record->next_lsn.
204 : : */
205 : : XLogRecPtr ReadRecPtr; /* start of last record read */
206 : : XLogRecPtr EndRecPtr; /* end+1 of last record read */
207 : :
208 : : /*
209 : : * Set at the end of recovery: the start point of a partial record at the
210 : : * end of WAL (InvalidXLogRecPtr if there wasn't one), and the start
211 : : * location of its first contrecord that went missing.
212 : : */
213 : : XLogRecPtr abortedRecPtr;
214 : : XLogRecPtr missingContrecPtr;
215 : : /* Set when XLP_FIRST_IS_OVERWRITE_CONTRECORD is found */
216 : : XLogRecPtr overwrittenRecPtr;
217 : :
218 : :
219 : : /* ----------------------------------------
220 : : * Decoded representation of current record
221 : : *
222 : : * Use XLogRecGet* functions to investigate the record; these fields
223 : : * should not be accessed directly.
224 : : * ----------------------------------------
225 : : * Start and end point of the last record read and decoded by
226 : : * XLogReadRecord(). NextRecPtr is also used as the position to decode
227 : : * next. Calling XLogBeginRead() sets NextRecPtr and EndRecPtr to the
228 : : * requested starting position.
229 : : */
230 : : XLogRecPtr DecodeRecPtr; /* start of last record decoded */
231 : : XLogRecPtr NextRecPtr; /* end+1 of last record decoded */
232 : : XLogRecPtr PrevRecPtr; /* start of previous record decoded */
233 : :
234 : : /* Last record returned by XLogReadRecord(). */
235 : : DecodedXLogRecord *record;
236 : :
237 : : /* ----------------------------------------
238 : : * private/internal state
239 : : * ----------------------------------------
240 : : */
241 : :
242 : : /*
243 : : * Buffer for decoded records. This is a circular buffer, though
244 : : * individual records can't be split in the middle, so some space is often
245 : : * wasted at the end. Oversized records that don't fit in this space are
246 : : * allocated separately.
247 : : */
248 : : char *decode_buffer;
249 : : size_t decode_buffer_size;
250 : : bool free_decode_buffer; /* need to free? */
251 : : char *decode_buffer_head; /* data is read from the head */
252 : : char *decode_buffer_tail; /* new data is written at the tail */
253 : :
254 : : /*
255 : : * Queue of records that have been decoded. This is a linked list that
256 : : * usually consists of consecutive records in decode_buffer, but may also
257 : : * contain oversized records allocated with palloc().
258 : : */
259 : : DecodedXLogRecord *decode_queue_head; /* oldest decoded record */
260 : : DecodedXLogRecord *decode_queue_tail; /* newest decoded record */
261 : :
262 : : /*
263 : : * Buffer for currently read page (XLOG_BLCKSZ bytes, valid up to at least
264 : : * readLen bytes)
265 : : */
266 : : char *readBuf;
267 : : uint32 readLen;
268 : :
269 : : /* last read XLOG position for data currently in readBuf */
270 : : WALSegmentContext segcxt;
271 : : WALOpenSegment seg;
272 : : uint32 segoff;
273 : :
274 : : /*
275 : : * beginning of prior page read, and its TLI. Doesn't necessarily
276 : : * correspond to what's in readBuf; used for timeline sanity checks.
277 : : */
278 : : XLogRecPtr latestPagePtr;
279 : : TimeLineID latestPageTLI;
280 : :
281 : : /* beginning of the WAL record being read. */
282 : : XLogRecPtr currRecPtr;
283 : : /* timeline to read it from, 0 if a lookup is required */
284 : : TimeLineID currTLI;
285 : :
286 : : /*
287 : : * Safe point to read to in currTLI if current TLI is historical
288 : : * (tliSwitchPoint) or InvalidXLogRecPtr if on current timeline.
289 : : *
290 : : * Actually set to the start of the segment containing the timeline switch
291 : : * that ends currTLI's validity, not the LSN of the switch its self, since
292 : : * we can't assume the old segment will be present.
293 : : */
294 : : XLogRecPtr currTLIValidUntil;
295 : :
296 : : /*
297 : : * If currTLI is not the most recent known timeline, the next timeline to
298 : : * read from when currTLIValidUntil is reached.
299 : : */
300 : : TimeLineID nextTLI;
301 : :
302 : : /*
303 : : * Buffer for current ReadRecord result (expandable), used when a record
304 : : * crosses a page boundary.
305 : : */
306 : : char *readRecordBuf;
307 : : uint32 readRecordBufSize;
308 : :
309 : : /* Buffer to hold error message */
310 : : char *errormsg_buf;
311 : : bool errormsg_deferred;
312 : :
313 : : /*
314 : : * Flag to indicate to XLogPageReadCB that it should not block waiting for
315 : : * data.
316 : : */
317 : : bool nonblocking;
318 : : };
319 : :
320 : : /*
321 : : * Check if XLogNextRecord() has any more queued records or an error to return.
322 : : */
323 : : static inline bool
1369 tmunro@postgresql.or 324 :CBC 9129601 : XLogReaderHasQueuedRecordOrError(XLogReaderState *state)
325 : : {
326 [ + + + + ]: 9129601 : return (state->decode_queue_head != NULL) || state->errormsg_deferred;
327 : : }
328 : :
329 : : /* Get a new XLogReader */
330 : : extern XLogReaderState *XLogReaderAllocate(int wal_segment_size,
331 : : const char *waldir,
332 : : XLogReaderRoutine *routine,
333 : : void *private_data);
334 : :
335 : : /* Free an XLogReader */
336 : : extern void XLogReaderFree(XLogReaderState *state);
337 : :
338 : : /* Optionally provide a circular decoding buffer to allow readahead. */
339 : : extern void XLogReaderSetDecodeBuffer(XLogReaderState *state,
340 : : void *buffer,
341 : : size_t size);
342 : :
343 : : /* Position the XLogReader to given record */
344 : : extern void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr);
345 : : extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr);
346 : :
347 : : /* Return values from XLogPageReadCB. */
348 : : typedef enum XLogPageReadResult
349 : : {
350 : : XLREAD_SUCCESS = 0, /* record is successfully read */
351 : : XLREAD_FAIL = -1, /* failed during reading a record */
352 : : XLREAD_WOULDBLOCK = -2, /* nonblocking mode only, no data */
353 : : } XLogPageReadResult;
354 : :
355 : : /* Read the next XLog record. Returns NULL on end-of-WAL or failure */
356 : : extern struct XLogRecord *XLogReadRecord(XLogReaderState *state,
357 : : char **errormsg);
358 : :
359 : : /* Consume the next record or error. */
360 : : extern DecodedXLogRecord *XLogNextRecord(XLogReaderState *state,
361 : : char **errormsg);
362 : :
363 : : /* Release the previously returned record, if necessary. */
364 : : extern XLogRecPtr XLogReleasePreviousRecord(XLogReaderState *state);
365 : :
366 : : /* Try to read ahead, if there is data and space. */
367 : : extern DecodedXLogRecord *XLogReadAhead(XLogReaderState *state,
368 : : bool nonblocking);
369 : :
370 : : /* Validate a page */
371 : : extern bool XLogReaderValidatePageHeader(XLogReaderState *state,
372 : : XLogRecPtr recptr, char *phdr);
373 : :
374 : : /* Forget error produced by XLogReaderValidatePageHeader(). */
375 : : extern void XLogReaderResetError(XLogReaderState *state);
376 : :
377 : : /*
378 : : * Error information from WALRead that both backend and frontend caller can
379 : : * process. Currently only errors from pg_pread can be reported.
380 : : */
381 : : typedef struct WALReadError
382 : : {
383 : : int wre_errno; /* errno set by the last pg_pread() */
384 : : int wre_off; /* Offset we tried to read from. */
385 : : int wre_req; /* Bytes requested to be read. */
386 : : int wre_read; /* Bytes read by the last read(). */
387 : : WALOpenSegment wre_seg; /* Segment we tried to read from. */
388 : : } WALReadError;
389 : :
390 : : extern bool WALRead(XLogReaderState *state,
391 : : char *buf, XLogRecPtr startptr, Size count,
392 : : TimeLineID tli, WALReadError *errinfo);
393 : :
394 : : /* Functions for decoding an XLogRecord */
395 : :
396 : : extern size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len);
397 : : extern bool DecodeXLogRecord(XLogReaderState *state,
398 : : DecodedXLogRecord *decoded,
399 : : XLogRecord *record,
400 : : XLogRecPtr lsn,
401 : : char **errormsg);
402 : :
403 : : /*
404 : : * Macros that provide access to parts of the record most recently returned by
405 : : * XLogReadRecord() or XLogNextRecord().
406 : : */
407 : : #define XLogRecGetTotalLen(decoder) ((decoder)->record->header.xl_tot_len)
408 : : #define XLogRecGetPrev(decoder) ((decoder)->record->header.xl_prev)
409 : : #define XLogRecGetInfo(decoder) ((decoder)->record->header.xl_info)
410 : : #define XLogRecGetRmid(decoder) ((decoder)->record->header.xl_rmid)
411 : : #define XLogRecGetXid(decoder) ((decoder)->record->header.xl_xid)
412 : : #define XLogRecGetOrigin(decoder) ((decoder)->record->record_origin)
413 : : #define XLogRecGetTopXid(decoder) ((decoder)->record->toplevel_xid)
414 : : #define XLogRecGetData(decoder) ((decoder)->record->main_data)
415 : : #define XLogRecGetDataLen(decoder) ((decoder)->record->main_data_len)
416 : : #define XLogRecHasAnyBlockRefs(decoder) ((decoder)->record->max_block_id >= 0)
417 : : #define XLogRecMaxBlockId(decoder) ((decoder)->record->max_block_id)
418 : : #define XLogRecGetBlock(decoder, i) (&(decoder)->record->blocks[(i)])
419 : : #define XLogRecHasBlockRef(decoder, block_id) \
420 : : (((decoder)->record->max_block_id >= (block_id)) && \
421 : : ((decoder)->record->blocks[block_id].in_use))
422 : : #define XLogRecHasBlockImage(decoder, block_id) \
423 : : ((decoder)->record->blocks[block_id].has_image)
424 : : #define XLogRecBlockImageApply(decoder, block_id) \
425 : : ((decoder)->record->blocks[block_id].apply_image)
426 : : #define XLogRecHasBlockData(decoder, block_id) \
427 : : ((decoder)->record->blocks[block_id].has_data)
428 : :
429 : : #ifndef FRONTEND
430 : : extern FullTransactionId XLogRecGetFullXid(XLogReaderState *record);
431 : : #endif
432 : :
433 : : extern bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page);
434 : : extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len);
435 : : extern void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id,
436 : : RelFileLocator *rlocator, ForkNumber *forknum,
437 : : BlockNumber *blknum);
438 : : extern bool XLogRecGetBlockTagExtended(XLogReaderState *record, uint8 block_id,
439 : : RelFileLocator *rlocator, ForkNumber *forknum,
440 : : BlockNumber *blknum,
441 : : Buffer *prefetch_buffer);
442 : :
443 : : #endif /* XLOGREADER_H */
|