LCOV - c3df85756ceb0246958ef2b72c04aba51e52de13 vs 167cb26718e3eae4fef470900b4cd1d434f15649

LCOV - differential code coverage report

Current view:	top level - src/backend/access/transam - xlogutils.c (source / functions)		Coverage	Total	Hit	UNC	UBC	GNC	CBC	DCB
Current:	c3df85756ceb0246958ef2b72c04aba51e52de13 vs 167cb26718e3eae4fef470900b4cd1d434f15649	Lines:	67.1 %	234	157		77	7	150	7
Current Date:	2025-12-18 07:33:40 +0900	Functions:	86.4 %	22	19		3	3	16
Baseline:	lcov-20251218-005734-baseline	Branches:	43.0 %	200	86	5	109	9	77
Baseline Date:	2025-12-17 11:55:04 -0800	Line coverage date bins:
Legend:	Lines: hit not hit Branches: + taken - not taken # not executed	(7,30] days:	100.0 %	1	1			1
		(30,360] days:	66.7 %	9	6		3	6
		(360..) days:	67.0 %	224	150		74		150
		Function coverage date bins:
		(360..) days:	86.4 %	22	19		3	3	16
		Branch coverage date bins:
		(30,360] days:	50.0 %	18	9	5	4	9
		(360..) days:	42.3 %	182	77		105		77

 Age         Owner                    Branch data    TLA  Line data    Source code

                                  1                 :                : /*-------------------------------------------------------------------------
                                  2                 :                :  *
                                  3                 :                :  * xlogutils.c
                                  4                 :                :  *
                                  5                 :                :  * PostgreSQL write-ahead log manager utility routines
                                  6                 :                :  *
                                  7                 :                :  * This file contains support routines that are used by XLOG replay functions.
                                  8                 :                :  * None of this code is used during normal system operation.
                                  9                 :                :  *
                                 10                 :                :  *
                                 11                 :                :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
                                 12                 :                :  * Portions Copyright (c) 1994, Regents of the University of California
                                 13                 :                :  *
                                 14                 :                :  * src/backend/access/transam/xlogutils.c
                                 15                 :                :  *
                                 16                 :                :  *-------------------------------------------------------------------------
                                 17                 :                :  */
                                 18                 :                : #include "postgres.h"
                                 19                 :                : 
                                 20                 :                : #include <unistd.h>
                                 21                 :                : 
                                 22                 :                : #include "access/timeline.h"
                                 23                 :                : #include "access/xlogrecovery.h"
                                 24                 :                : #include "access/xlog_internal.h"
                                 25                 :                : #include "access/xlogutils.h"
                                 26                 :                : #include "miscadmin.h"
                                 27                 :                : #include "storage/fd.h"
                                 28                 :                : #include "storage/smgr.h"
                                 29                 :                : #include "utils/hsearch.h"
                                 30                 :                : #include "utils/rel.h"
                                 31                 :                : 
                                 32                 :                : 
                                 33                 :                : /* GUC variable */
                                 34                 :                : bool        ignore_invalid_pages = false;
                                 35                 :                : 
                                 36                 :                : /*
                                 37                 :                :  * Are we doing recovery from XLOG?
                                 38                 :                :  *
                                 39                 :                :  * This is only ever true in the startup process; it should be read as meaning
                                 40                 :                :  * "this process is replaying WAL records", rather than "the system is in
                                 41                 :                :  * recovery mode".  It should be examined primarily by functions that need
                                 42                 :                :  * to act differently when called from a WAL redo function (e.g., to skip WAL
                                 43                 :                :  * logging).  To check whether the system is in recovery regardless of which
                                 44                 :                :  * process you're running in, use RecoveryInProgress() but only after shared
                                 45                 :                :  * memory startup and lock initialization.
                                 46                 :                :  *
                                 47                 :                :  * This is updated from xlog.c and xlogrecovery.c, but lives here because
                                 48                 :                :  * it's mostly read by WAL redo functions.
                                 49                 :                :  */
                                 50                 :                : bool        InRecovery = false;
                                 51                 :                : 
                                 52                 :                : /* Are we in Hot Standby mode? Only valid in startup process, see xlogutils.h */
                                 53                 :                : HotStandbyState standbyState = STANDBY_DISABLED;
                                 54                 :                : 
                                 55                 :                : /*
                                 56                 :                :  * During XLOG replay, we may see XLOG records for incremental updates of
                                 57                 :                :  * pages that no longer exist, because their relation was later dropped or
                                 58                 :                :  * truncated.  (Note: this is only possible when full_page_writes = OFF,
                                 59                 :                :  * since when it's ON, the first reference we see to a page should always
                                 60                 :                :  * be a full-page rewrite not an incremental update.)  Rather than simply
                                 61                 :                :  * ignoring such records, we make a note of the referenced page, and then
                                 62                 :                :  * complain if we don't actually see a drop or truncate covering the page
                                 63                 :                :  * later in replay.
                                 64                 :                :  */
                                 65                 :                : typedef struct xl_invalid_page_key
                                 66                 :                : {
                                 67                 :                :     RelFileLocator locator;     /* the relation */
                                 68                 :                :     ForkNumber  forkno;         /* the fork number */
                                 69                 :                :     BlockNumber blkno;          /* the page */
                                 70                 :                : } xl_invalid_page_key;
                                 71                 :                : 
                                 72                 :                : typedef struct xl_invalid_page
                                 73                 :                : {
                                 74                 :                :     xl_invalid_page_key key;    /* hash key ... must be first */
                                 75                 :                :     bool        present;        /* page existed but contained zeroes */
                                 76                 :                : } xl_invalid_page;
                                 77                 :                : 
                                 78                 :                : static HTAB *invalid_page_tab = NULL;
                                 79                 :                : 
                                 80                 :                : static int  read_local_xlog_page_guts(XLogReaderState *state, XLogRecPtr targetPagePtr,
                                 81                 :                :                                       int reqLen, XLogRecPtr targetRecPtr,
                                 82                 :                :                                       char *cur_page, bool wait_for_wal);
                                 83                 :                : 
                                 84                 :                : /* Report a reference to an invalid page */
                                 85                 :                : static void
 1261 rhaas@postgresql.org       86                 :UBC           0 : report_invalid_page(int elevel, RelFileLocator locator, ForkNumber forkno,
                                 87                 :                :                     BlockNumber blkno, bool present)
                                 88                 :                : {
  296 andres@anarazel.de         89                 :              0 :     RelPathStr  path = relpathperm(locator, forkno);
                                 90                 :                : 
 5130 heikki.linnakangas@i       91         [ #  # ]:              0 :     if (present)
                                 92         [ #  # ]:              0 :         elog(elevel, "page %u of relation %s is uninitialized",
                                 93                 :                :              blkno, path.str);
                                 94                 :                :     else
                                 95         [ #  # ]:              0 :         elog(elevel, "page %u of relation %s does not exist",
                                 96                 :                :              blkno, path.str);
                                 97                 :              0 : }
                                 98                 :                : 
                                 99                 :                : /* Log a reference to an invalid page */
                                100                 :                : static void
 1261 rhaas@postgresql.org      101                 :              0 : log_invalid_page(RelFileLocator locator, ForkNumber forkno, BlockNumber blkno,
                                102                 :                :                  bool present)
                                103                 :                : {
                                104                 :                :     xl_invalid_page_key key;
                                105                 :                :     xl_invalid_page *hentry;
                                106                 :                :     bool        found;
                                107                 :                : 
                                108                 :                :     /*
                                109                 :                :      * Once recovery has reached a consistent state, the invalid-page table
                                110                 :                :      * should be empty and remain so. If a reference to an invalid page is
                                111                 :                :      * found after consistency is reached, PANIC immediately. This might seem
                                112                 :                :      * aggressive, but it's better than letting the invalid reference linger
                                113                 :                :      * in the hash table until the end of recovery and PANIC there, which
                                114                 :                :      * might come only much later if this is a standby server.
                                115                 :                :      */
 5123 heikki.linnakangas@i      116         [ #  # ]:              0 :     if (reachedConsistency)
                                117                 :                :     {
 1261 rhaas@postgresql.org      118                 :              0 :         report_invalid_page(WARNING, locator, forkno, blkno, present);
 2157 fujii@postgresql.org      119   [ #  #  #  # ]:              0 :         elog(ignore_invalid_pages ? WARNING : PANIC,
                                120                 :                :              "WAL contains references to invalid pages");
                                121                 :                :     }
                                122                 :                : 
                                123                 :                :     /*
                                124                 :                :      * Log references to invalid pages at DEBUG1 level.  This allows some
                                125                 :                :      * tracing of the cause (note the elog context mechanism will tell us
                                126                 :                :      * something about the XLOG record that generated the reference).
                                127                 :                :      */
 1851 tgl@sss.pgh.pa.us         128         [ #  # ]:              0 :     if (message_level_is_interesting(DEBUG1))
 1261 rhaas@postgresql.org      129                 :              0 :         report_invalid_page(DEBUG1, locator, forkno, blkno, present);
                                130                 :                : 
 7188 tgl@sss.pgh.pa.us         131         [ #  # ]:              0 :     if (invalid_page_tab == NULL)
                                132                 :                :     {
                                133                 :                :         /* create hash table when first needed */
                                134                 :                :         HASHCTL     ctl;
                                135                 :                : 
                                136                 :              0 :         ctl.keysize = sizeof(xl_invalid_page_key);
                                137                 :              0 :         ctl.entrysize = sizeof(xl_invalid_page);
                                138                 :                : 
                                139                 :              0 :         invalid_page_tab = hash_create("XLOG invalid-page table",
                                140                 :                :                                        100,
                                141                 :                :                                        &ctl,
                                142                 :                :                                        HASH_ELEM | HASH_BLOBS);
                                143                 :                :     }
                                144                 :                : 
                                145                 :                :     /* we currently assume xl_invalid_page_key contains no padding */
 1261 rhaas@postgresql.org      146                 :              0 :     key.locator = locator;
 6338 heikki.linnakangas@i      147                 :              0 :     key.forkno = forkno;
 7188 tgl@sss.pgh.pa.us         148                 :              0 :     key.blkno = blkno;
                                149                 :                :     hentry = (xl_invalid_page *)
 1046 peter@eisentraut.org      150                 :              0 :         hash_search(invalid_page_tab, &key, HASH_ENTER, &found);
                                151                 :                : 
 7188 tgl@sss.pgh.pa.us         152         [ #  # ]:              0 :     if (!found)
                                153                 :                :     {
                                154                 :                :         /* hash_search already filled in the key */
                                155                 :              0 :         hentry->present = present;
                                156                 :                :     }
                                157                 :                :     else
                                158                 :                :     {
                                159                 :                :         /* repeat reference ... leave "present" as it was */
                                160                 :                :     }
                                161                 :              0 : }
                                162                 :                : 
                                163                 :                : /* Forget any invalid pages >= minblkno, because they've been dropped */
                                164                 :                : static void
 1261 rhaas@postgresql.org      165                 :CBC       31906 : forget_invalid_pages(RelFileLocator locator, ForkNumber forkno,
                                166                 :                :                      BlockNumber minblkno)
                                167                 :                : {
                                168                 :                :     HASH_SEQ_STATUS status;
                                169                 :                :     xl_invalid_page *hentry;
                                170                 :                : 
 7188 tgl@sss.pgh.pa.us         171         [ +  - ]:          31906 :     if (invalid_page_tab == NULL)
                                172                 :          31906 :         return;                 /* nothing to do */
                                173                 :                : 
 7188 tgl@sss.pgh.pa.us         174                 :UBC           0 :     hash_seq_init(&status, invalid_page_tab);
                                175                 :                : 
                                176         [ #  # ]:              0 :     while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
                                177                 :                :     {
 1261 rhaas@postgresql.org      178   [ #  #  #  #  :              0 :         if (RelFileLocatorEquals(hentry->key.locator, locator) &&
                                              #  # ]
 6338 heikki.linnakangas@i      179         [ #  # ]:              0 :             hentry->key.forkno == forkno &&
 7188 tgl@sss.pgh.pa.us         180         [ #  # ]:              0 :             hentry->key.blkno >= minblkno)
                                181                 :                :         {
  296 andres@anarazel.de        182         [ #  # ]:              0 :             elog(DEBUG2, "page %u of relation %s has been dropped",
                                183                 :                :                  hentry->key.blkno,
                                184                 :                :                  relpathperm(hentry->key.locator, forkno).str);
                                185                 :                : 
 7188 tgl@sss.pgh.pa.us         186         [ #  # ]:              0 :             if (hash_search(invalid_page_tab,
 1046 peter@eisentraut.org      187                 :              0 :                             &hentry->key,
                                188                 :                :                             HASH_REMOVE, NULL) == NULL)
 7188 tgl@sss.pgh.pa.us         189         [ #  # ]:              0 :                 elog(ERROR, "hash table corrupted");
                                190                 :                :         }
                                191                 :                :     }
                                192                 :                : }
                                193                 :                : 
                                194                 :                : /* Forget any invalid pages in a whole database */
                                195                 :                : static void
 7188 tgl@sss.pgh.pa.us         196                 :CBC          13 : forget_invalid_pages_db(Oid dbid)
                                197                 :                : {
                                198                 :                :     HASH_SEQ_STATUS status;
                                199                 :                :     xl_invalid_page *hentry;
                                200                 :                : 
                                201         [ +  - ]:             13 :     if (invalid_page_tab == NULL)
                                202                 :             13 :         return;                 /* nothing to do */
                                203                 :                : 
 7188 tgl@sss.pgh.pa.us         204                 :UBC           0 :     hash_seq_init(&status, invalid_page_tab);
                                205                 :                : 
                                206         [ #  # ]:              0 :     while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
                                207                 :                :     {
 1261 rhaas@postgresql.org      208         [ #  # ]:              0 :         if (hentry->key.locator.dbOid == dbid)
                                209                 :                :         {
  296 andres@anarazel.de        210         [ #  # ]:              0 :             elog(DEBUG2, "page %u of relation %s has been dropped",
                                211                 :                :                  hentry->key.blkno,
                                212                 :                :                  relpathperm(hentry->key.locator, hentry->key.forkno).str);
                                213                 :                : 
 7188 tgl@sss.pgh.pa.us         214         [ #  # ]:              0 :             if (hash_search(invalid_page_tab,
 1046 peter@eisentraut.org      215                 :              0 :                             &hentry->key,
                                216                 :                :                             HASH_REMOVE, NULL) == NULL)
 7188 tgl@sss.pgh.pa.us         217         [ #  # ]:              0 :                 elog(ERROR, "hash table corrupted");
                                218                 :                :         }
                                219                 :                :     }
                                220                 :                : }
                                221                 :                : 
                                222                 :                : /* Are there any unresolved references to invalid pages? */
                                223                 :                : bool
 5130 heikki.linnakangas@i      224                 :CBC         702 : XLogHaveInvalidPages(void)
                                225                 :                : {
                                226   [ -  +  -  - ]:            702 :     if (invalid_page_tab != NULL &&
 5130 heikki.linnakangas@i      227                 :UBC           0 :         hash_get_num_entries(invalid_page_tab) > 0)
                                228                 :              0 :         return true;
 5130 heikki.linnakangas@i      229                 :CBC         702 :     return false;
                                230                 :                : }
                                231                 :                : 
                                232                 :                : /* Complain about any remaining invalid-page entries */
                                233                 :                : void
 7188 tgl@sss.pgh.pa.us         234                 :            110 : XLogCheckInvalidPages(void)
                                235                 :                : {
                                236                 :                :     HASH_SEQ_STATUS status;
                                237                 :                :     xl_invalid_page *hentry;
                                238                 :            110 :     bool        foundone = false;
                                239                 :                : 
                                240         [ +  - ]:            110 :     if (invalid_page_tab == NULL)
                                241                 :            110 :         return;                 /* nothing to do */
                                242                 :                : 
 7188 tgl@sss.pgh.pa.us         243                 :UBC           0 :     hash_seq_init(&status, invalid_page_tab);
                                244                 :                : 
                                245                 :                :     /*
                                246                 :                :      * Our strategy is to emit WARNING messages for all remaining entries and
                                247                 :                :      * only PANIC after we've dumped all the available info.
                                248                 :                :      */
                                249         [ #  # ]:              0 :     while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
                                250                 :                :     {
 1261 rhaas@postgresql.org      251                 :              0 :         report_invalid_page(WARNING, hentry->key.locator, hentry->key.forkno,
 5130 heikki.linnakangas@i      252                 :              0 :                             hentry->key.blkno, hentry->present);
 7188 tgl@sss.pgh.pa.us         253                 :              0 :         foundone = true;
                                254                 :                :     }
                                255                 :                : 
                                256         [ #  # ]:              0 :     if (foundone)
 2157 fujii@postgresql.org      257   [ #  #  #  # ]:              0 :         elog(ignore_invalid_pages ? WARNING : PANIC,
                                258                 :                :              "WAL contains references to invalid pages");
                                259                 :                : 
 6398 heikki.linnakangas@i      260                 :              0 :     hash_destroy(invalid_page_tab);
                                261                 :              0 :     invalid_page_tab = NULL;
                                262                 :                : }
                                263                 :                : 
                                264                 :                : 
                                265                 :                : /*
                                266                 :                :  * XLogReadBufferForRedo
                                267                 :                :  *      Read a page during XLOG replay
                                268                 :                :  *
                                269                 :                :  * Reads a block referenced by a WAL record into shared buffer cache, and
                                270                 :                :  * determines what needs to be done to redo the changes to it.  If the WAL
                                271                 :                :  * record includes a full-page image of the page, it is restored.
                                272                 :                :  *
                                273                 :                :  * 'record.EndRecPtr' is compared to the page's LSN to determine if the record
                                274                 :                :  * has already been replayed.  'block_id' is the ID number the block was
                                275                 :                :  * registered with, when the WAL record was created.
                                276                 :                :  *
                                277                 :                :  * Returns one of the following:
                                278                 :                :  *
                                279                 :                :  *  BLK_NEEDS_REDO  - changes from the WAL record need to be applied
                                280                 :                :  *  BLK_DONE        - block doesn't need replaying
                                281                 :                :  *  BLK_RESTORED    - block was restored from a full-page image included in
                                282                 :                :  *                    the record
                                283                 :                :  *  BLK_NOTFOUND    - block was not found (because it was truncated away by
                                284                 :                :  *                    an operation later in the WAL stream)
                                285                 :                :  *
                                286                 :                :  * On return, the buffer is locked in exclusive-mode, and returned in *buf.
                                287                 :                :  * Note that the buffer is locked and returned even if it doesn't need
                                288                 :                :  * replaying.  (Getting the buffer lock is not really necessary during
                                289                 :                :  * single-process crash recovery, but some subroutines such as MarkBufferDirty
                                290                 :                :  * will complain if we don't have the lock.  In hot standby mode it's
                                291                 :                :  * definitely necessary.)
                                292                 :                :  *
                                293                 :                :  * Note: when a backup block is available in XLOG with the BKPIMAGE_APPLY flag
                                294                 :                :  * set, we restore it, even if the page in the database appears newer.  This
                                295                 :                :  * is to protect ourselves against database pages that were partially or
                                296                 :                :  * incorrectly written during a crash.  We assume that the XLOG data must be
                                297                 :                :  * good because it has passed a CRC check, while the database page might not
                                298                 :                :  * be.  This will force us to replay all subsequent modifications of the page
                                299                 :                :  * that appear in XLOG, rather than possibly ignoring them as already
                                300                 :                :  * applied, but that's not a huge drawback.
                                301                 :                :  */
                                302                 :                : XLogRedoAction
 4046 heikki.linnakangas@i      303                 :CBC     2856259 : XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id,
                                304                 :                :                       Buffer *buf)
                                305                 :                : {
                                306                 :        2856259 :     return XLogReadBufferForRedoExtended(record, block_id, RBM_NORMAL,
                                307                 :                :                                          false, buf);
                                308                 :                : }
                                309                 :                : 
                                310                 :                : /*
                                311                 :                :  * Pin and lock a buffer referenced by a WAL record, for the purpose of
                                312                 :                :  * re-initializing it.
                                313                 :                :  */
                                314                 :                : Buffer
                                315                 :          52118 : XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
                                316                 :                : {
                                317                 :                :     Buffer      buf;
                                318                 :                : 
                                319                 :          52118 :     XLogReadBufferForRedoExtended(record, block_id, RBM_ZERO_AND_LOCK, false,
                                320                 :                :                                   &buf);
                                321                 :          52118 :     return buf;
                                322                 :                : }
                                323                 :                : 
                                324                 :                : /*
                                325                 :                :  * XLogReadBufferForRedoExtended
                                326                 :                :  *      Like XLogReadBufferForRedo, but with extra options.
                                327                 :                :  *
                                328                 :                :  * In RBM_ZERO_* modes, if the page doesn't exist, the relation is extended
                                329                 :                :  * with all-zeroes pages up to the referenced block number.  In
                                330                 :                :  * RBM_ZERO_AND_LOCK and RBM_ZERO_AND_CLEANUP_LOCK modes, the return value
                                331                 :                :  * is always BLK_NEEDS_REDO.
                                332                 :                :  *
                                333                 :                :  * (The RBM_ZERO_AND_CLEANUP_LOCK mode is redundant with the get_cleanup_lock
                                334                 :                :  * parameter. Do not use an inconsistent combination!)
                                335                 :                :  *
                                336                 :                :  * If 'get_cleanup_lock' is true, a "cleanup lock" is acquired on the buffer
                                337                 :                :  * using LockBufferForCleanup(), instead of a regular exclusive lock.
                                338                 :                :  */
                                339                 :                : XLogRedoAction
                                340                 :        2932953 : XLogReadBufferForRedoExtended(XLogReaderState *record,
                                341                 :                :                               uint8 block_id,
                                342                 :                :                               ReadBufferMode mode, bool get_cleanup_lock,
                                343                 :                :                               Buffer *buf)
                                344                 :                : {
                                345                 :        2932953 :     XLogRecPtr  lsn = record->EndRecPtr;
                                346                 :                :     RelFileLocator rlocator;
                                347                 :                :     ForkNumber  forknum;
                                348                 :                :     BlockNumber blkno;
                                349                 :                :     Buffer      prefetch_buffer;
                                350                 :                :     Page        page;
                                351                 :                :     bool        zeromode;
                                352                 :                :     bool        willinit;
                                353                 :                : 
 1261 rhaas@postgresql.org      354         [ -  + ]:        2932953 :     if (!XLogRecGetBlockTagExtended(record, block_id, &rlocator, &forknum, &blkno,
                                355                 :                :                                     &prefetch_buffer))
                                356                 :                :     {
                                357                 :                :         /* Caller specified a bogus block_id */
 1347 tgl@sss.pgh.pa.us         358         [ #  # ]:UBC           0 :         elog(PANIC, "failed to locate backup block with ID %d in WAL record",
                                359                 :                :              block_id);
                                360                 :                :     }
                                361                 :                : 
                                362                 :                :     /*
                                363                 :                :      * Make sure that if the block is marked with WILL_INIT, the caller is
                                364                 :                :      * going to initialize it. And vice versa.
                                365                 :                :      */
 3804 heikki.linnakangas@i      366   [ +  +  +  + ]:CBC     2932953 :     zeromode = (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK);
 1371 tmunro@postgresql.or      367                 :        2932953 :     willinit = (XLogRecGetBlock(record, block_id)->flags & BKPBLOCK_WILL_INIT) != 0;
 3804 heikki.linnakangas@i      368   [ +  +  -  + ]:        2932953 :     if (willinit && !zeromode)
 3804 heikki.linnakangas@i      369         [ #  # ]:UBC           0 :         elog(PANIC, "block with WILL_INIT flag in WAL record must be zeroed by redo routine");
 3804 heikki.linnakangas@i      370   [ +  +  -  + ]:CBC     2932953 :     if (!willinit && zeromode)
 3804 heikki.linnakangas@i      371         [ #  # ]:UBC           0 :         elog(PANIC, "block to be initialized in redo routine must be marked with WILL_INIT flag in the WAL record");
                                372                 :                : 
                                373                 :                :     /* If it has a full-page image and it should be restored, do it. */
 3235 rhaas@postgresql.org      374         [ +  + ]:CBC     2932953 :     if (XLogRecBlockImageApply(record, block_id))
                                375                 :                :     {
                                376         [ -  + ]:          72880 :         Assert(XLogRecHasBlockImage(record, block_id));
 1261                           377         [ +  + ]:          72880 :         *buf = XLogReadBufferExtended(rlocator, forknum, blkno,
                                378                 :                :                                       get_cleanup_lock ? RBM_ZERO_AND_CLEANUP_LOCK : RBM_ZERO_AND_LOCK,
                                379                 :                :                                       prefetch_buffer);
 3529 kgrittn@postgresql.o      380                 :          72880 :         page = BufferGetPage(*buf);
 4046 heikki.linnakangas@i      381         [ -  + ]:          72880 :         if (!RestoreBlockImage(record, block_id, page))
 1196 michael@paquier.xyz       382         [ #  # ]:UBC           0 :             ereport(ERROR,
                                383                 :                :                     (errcode(ERRCODE_INTERNAL_ERROR),
                                384                 :                :                      errmsg_internal("%s", record->errormsg_buf)));
                                385                 :                : 
                                386                 :                :         /*
                                387                 :                :          * The page may be uninitialized. If so, we can't set the LSN because
                                388                 :                :          * that would corrupt the page.
                                389                 :                :          */
 4046 heikki.linnakangas@i      390         [ +  + ]:CBC       72880 :         if (!PageIsNew(page))
                                391                 :                :         {
                                392                 :          72864 :             PageSetLSN(page, lsn);
                                393                 :                :         }
                                394                 :                : 
                                395                 :          72880 :         MarkBufferDirty(*buf);
                                396                 :                : 
                                397                 :                :         /*
                                398                 :                :          * At the end of crash recovery the init forks of unlogged relations
                                399                 :                :          * are copied, without going through shared buffers. So we need to
                                400                 :                :          * force the on-disk state of init forks to always be in sync with the
                                401                 :                :          * state in shared buffers.
                                402                 :                :          */
 3661 andres@anarazel.de        403         [ +  + ]:          72880 :         if (forknum == INIT_FORKNUM)
                                404                 :             26 :             FlushOneBuffer(*buf);
                                405                 :                : 
 4145 heikki.linnakangas@i      406                 :          72880 :         return BLK_RESTORED;
                                407                 :                :     }
                                408                 :                :     else
                                409                 :                :     {
 1261 rhaas@postgresql.org      410                 :        2860073 :         *buf = XLogReadBufferExtended(rlocator, forknum, blkno, mode, prefetch_buffer);
 4145 heikki.linnakangas@i      411         [ +  - ]:        2860073 :         if (BufferIsValid(*buf))
                                412                 :                :         {
 4053                           413   [ +  +  +  + ]:        2860073 :             if (mode != RBM_ZERO_AND_LOCK && mode != RBM_ZERO_AND_CLEANUP_LOCK)
                                414                 :                :             {
                                415         [ +  + ]:        2807734 :                 if (get_cleanup_lock)
                                416                 :           9730 :                     LockBufferForCleanup(*buf);
                                417                 :                :                 else
                                418                 :        2798004 :                     LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE);
                                419                 :                :             }
 3529 kgrittn@postgresql.o      420         [ -  + ]:        2860073 :             if (lsn <= PageGetLSN(BufferGetPage(*buf)))
 4145 heikki.linnakangas@i      421                 :UBC           0 :                 return BLK_DONE;
                                422                 :                :             else
 4145 heikki.linnakangas@i      423                 :CBC     2860073 :                 return BLK_NEEDS_REDO;
                                424                 :                :         }
                                425                 :                :         else
 4145 heikki.linnakangas@i      426                 :UBC           0 :             return BLK_NOTFOUND;
                                427                 :                :     }
                                428                 :                : }
                                429                 :                : 
                                430                 :                : /*
                                431                 :                :  * XLogReadBufferExtended
                                432                 :                :  *      Read a page during XLOG replay
                                433                 :                :  *
                                434                 :                :  * This is functionally comparable to ReadBufferExtended. There's some
                                435                 :                :  * differences in the behavior wrt. the "mode" argument:
                                436                 :                :  *
                                437                 :                :  * In RBM_NORMAL mode, if the page doesn't exist, or contains all-zeroes, we
                                438                 :                :  * return InvalidBuffer. In this case the caller should silently skip the
                                439                 :                :  * update on this page. (In this situation, we expect that the page was later
                                440                 :                :  * dropped or truncated. If we don't see evidence of that later in the WAL
                                441                 :                :  * sequence, we'll complain at the end of WAL replay.)
                                442                 :                :  *
                                443                 :                :  * In RBM_ZERO_* modes, if the page doesn't exist, the relation is extended
                                444                 :                :  * with all-zeroes pages up to the given block number.
                                445                 :                :  *
                                446                 :                :  * In RBM_NORMAL_NO_LOG mode, we return InvalidBuffer if the page doesn't
                                447                 :                :  * exist, and we don't check for all-zeroes.  Thus, no log entry is made
                                448                 :                :  * to imply that the page should be dropped or truncated later.
                                449                 :                :  *
                                450                 :                :  * Optionally, recent_buffer can be used to provide a hint about the location
                                451                 :                :  * of the page in the buffer pool; it does not have to be correct, but avoids
                                452                 :                :  * a buffer mapping table probe if it is.
                                453                 :                :  *
                                454                 :                :  * NB: A redo function should normally not call this directly. To get a page
                                455                 :                :  * to modify, use XLogReadBufferForRedoExtended instead. It is important that
                                456                 :                :  * all pages modified by a WAL record are registered in the WAL records, or
                                457                 :                :  * they will be invisible to tools that need to know which pages are modified.
                                458                 :                :  */
                                459                 :                : Buffer
 1261 rhaas@postgresql.org      460                 :CBC     5579544 : XLogReadBufferExtended(RelFileLocator rlocator, ForkNumber forknum,
                                461                 :                :                        BlockNumber blkno, ReadBufferMode mode,
                                462                 :                :                        Buffer recent_buffer)
                                463                 :                : {
                                464                 :                :     BlockNumber lastblock;
                                465                 :                :     Buffer      buffer;
                                466                 :                :     SMgrRelation smgr;
                                467                 :                : 
 7204 tgl@sss.pgh.pa.us         468         [ -  + ]:        5579544 :     Assert(blkno != P_NEW);
                                469                 :                : 
                                470                 :                :     /* Do we have a clue where the buffer might be already? */
 1351 tmunro@postgresql.or      471   [ +  +  +  - ]:        5579544 :     if (BufferIsValid(recent_buffer) &&
                                472         [ +  + ]:           4007 :         mode == RBM_NORMAL &&
 1261 rhaas@postgresql.org      473                 :           4007 :         ReadRecentBuffer(rlocator, forknum, blkno, recent_buffer))
                                474                 :                :     {
 1351 tmunro@postgresql.or      475                 :           3967 :         buffer = recent_buffer;
                                476                 :           3967 :         goto recent_buffer_fast_path;
                                477                 :                :     }
                                478                 :                : 
                                479                 :                :     /* Open the relation at smgr level */
  655 heikki.linnakangas@i      480                 :        5575577 :     smgr = smgropen(rlocator, INVALID_PROC_NUMBER);
                                481                 :                : 
                                482                 :                :     /*
                                483                 :                :      * Create the target file if it doesn't already exist.  This lets us cope
                                484                 :                :      * if the replay sequence contains writes to a relation that is later
                                485                 :                :      * deleted.  (The original coding of this routine would instead suppress
                                486                 :                :      * the writes, but that seems like it risks losing valuable data if the
                                487                 :                :      * filesystem loses an inode during a crash.  Better to write the data
                                488                 :                :      * until we are actually told to delete the file.)
                                489                 :                :      */
 6238                           490                 :        5575577 :     smgrcreate(smgr, forknum, true);
                                491                 :                : 
 6338                           492                 :        5575577 :     lastblock = smgrnblocks(smgr, forknum);
                                493                 :                : 
 7204 tgl@sss.pgh.pa.us         494         [ +  + ]:        5575577 :     if (blkno < lastblock)
                                495                 :                :     {
                                496                 :                :         /* page exists in file */
 1261 rhaas@postgresql.org      497                 :        5532429 :         buffer = ReadBufferWithoutRelcache(rlocator, forknum, blkno,
                                498                 :                :                                            mode, NULL, true);
                                499                 :                :     }
                                500                 :                :     else
                                501                 :                :     {
                                502                 :                :         /* hm, page doesn't exist in file */
 6257 heikki.linnakangas@i      503         [ -  + ]:          43148 :         if (mode == RBM_NORMAL)
                                504                 :                :         {
 1261 rhaas@postgresql.org      505                 :UBC           0 :             log_invalid_page(rlocator, forknum, blkno, false);
 7188 tgl@sss.pgh.pa.us         506                 :              0 :             return InvalidBuffer;
                                507                 :                :         }
 4356 tgl@sss.pgh.pa.us         508         [ -  + ]:CBC       43148 :         if (mode == RBM_NORMAL_NO_LOG)
 4356 tgl@sss.pgh.pa.us         509                 :UBC           0 :             return InvalidBuffer;
                                510                 :                :         /* OK to extend the file */
                                511                 :                :         /* we do this in recovery only - no rel-extension lock needed */
 7204 tgl@sss.pgh.pa.us         512         [ -  + ]:CBC       43148 :         Assert(InRecovery);
  848 tmunro@postgresql.or      513                 :          43148 :         buffer = ExtendBufferedRelTo(BMR_SMGR(smgr, RELPERSISTENCE_PERMANENT),
                                514                 :                :                                      forknum,
                                515                 :                :                                      NULL,
                                516                 :                :                                      EB_PERFORMING_RECOVERY |
                                517                 :                :                                      EB_SKIP_EXTENSION_LOCK,
                                518                 :                :                                      blkno + 1,
                                519                 :                :                                      mode);
                                520                 :                :     }
                                521                 :                : 
 1351                           522                 :        5579544 : recent_buffer_fast_path:
 6257 heikki.linnakangas@i      523         [ +  + ]:        5579544 :     if (mode == RBM_NORMAL)
                                524                 :                :     {
                                525                 :                :         /* check that page has been initialized */
  111 peter@eisentraut.org      526                 :GNC     2800059 :         Page        page = BufferGetPage(buffer);
                                527                 :                : 
                                528                 :                :         /*
                                529                 :                :          * We assume that PageIsNew is safe without a lock. During recovery,
                                530                 :                :          * there should be no other backends that could modify the buffer at
                                531                 :                :          * the same time.
                                532                 :                :          */
 6367 tgl@sss.pgh.pa.us         533         [ -  + ]:CBC     2800059 :         if (PageIsNew(page))
                                534                 :                :         {
 6176 heikki.linnakangas@i      535                 :UBC           0 :             ReleaseBuffer(buffer);
 1261 rhaas@postgresql.org      536                 :              0 :             log_invalid_page(rlocator, forknum, blkno, true);
 7188 tgl@sss.pgh.pa.us         537                 :              0 :             return InvalidBuffer;
                                538                 :                :         }
                                539                 :                :     }
                                540                 :                : 
 7281 neilc@samurai.com         541                 :CBC     5579544 :     return buffer;
                                542                 :                : }
                                543                 :                : 
                                544                 :                : /*
                                545                 :                :  * Struct actually returned by CreateFakeRelcacheEntry, though the declared
                                546                 :                :  * return type is Relation.
                                547                 :                :  */
                                548                 :                : typedef struct
                                549                 :                : {
                                550                 :                :     RelationData reldata;       /* Note: this must be first */
                                551                 :                :     FormData_pg_class pgc;
                                552                 :                : } FakeRelCacheEntryData;
                                553                 :                : 
                                554                 :                : typedef FakeRelCacheEntryData *FakeRelCacheEntry;
                                555                 :                : 
                                556                 :                : /*
                                557                 :                :  * Create a fake relation cache entry for a physical relation
                                558                 :                :  *
                                559                 :                :  * It's often convenient to use the same functions in XLOG replay as in the
                                560                 :                :  * main codepath, but those functions typically work with a relcache entry.
                                561                 :                :  * We don't have a working relation cache during XLOG replay, but this
                                562                 :                :  * function can be used to create a fake relcache entry instead. Only the
                                563                 :                :  * fields related to physical storage, like rd_rel, are initialized, so the
                                564                 :                :  * fake entry is only usable in low-level operations like ReadBuffer().
                                565                 :                :  *
                                566                 :                :  * This is also used for syncing WAL-skipped files.
                                567                 :                :  *
                                568                 :                :  * Caller must free the returned entry with FreeFakeRelcacheEntry().
                                569                 :                :  */
                                570                 :                : Relation
 1261 rhaas@postgresql.org      571                 :          54530 : CreateFakeRelcacheEntry(RelFileLocator rlocator)
                                572                 :                : {
                                573                 :                :     FakeRelCacheEntry fakeentry;
                                574                 :                :     Relation    rel;
                                575                 :                : 
                                576                 :                :     /* Allocate the Relation struct and all related space in one block. */
    8 michael@paquier.xyz       577                 :GNC       54530 :     fakeentry = palloc0_object(FakeRelCacheEntryData);
 6398 heikki.linnakangas@i      578                 :CBC       54530 :     rel = (Relation) fakeentry;
                                579                 :                : 
                                580                 :          54530 :     rel->rd_rel = &fakeentry->pgc;
 1261 rhaas@postgresql.org      581                 :          54530 :     rel->rd_locator = rlocator;
                                582                 :                : 
                                583                 :                :     /*
                                584                 :                :      * We will never be working with temp rels during recovery or while
                                585                 :                :      * syncing WAL-skipped files.
                                586                 :                :      */
  655 heikki.linnakangas@i      587                 :          54530 :     rel->rd_backend = INVALID_PROC_NUMBER;
                                588                 :                : 
                                589                 :                :     /* It must be a permanent table here */
 4843 rhaas@postgresql.org      590                 :          54530 :     rel->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
                                591                 :                : 
                                592                 :                :     /* We don't know the name of the relation; use relfilenumber instead */
 1177                           593                 :          54530 :     sprintf(RelationGetRelationName(rel), "%u", rlocator.relNumber);
                                594                 :                : 
                                595                 :                :     /*
                                596                 :                :      * We set up the lockRelId in case anything tries to lock the dummy
                                597                 :                :      * relation.  Note that this is fairly bogus since relNumber may be
                                598                 :                :      * different from the relation's OID.  It shouldn't really matter though.
                                599                 :                :      * In recovery, we are running by ourselves and can't have any lock
                                600                 :                :      * conflicts.  While syncing, we already hold AccessExclusiveLock.
                                601                 :                :      */
 1261                           602                 :          54530 :     rel->rd_lockInfo.lockRelId.dbId = rlocator.dbOid;
 1177                           603                 :          54530 :     rel->rd_lockInfo.lockRelId.relId = rlocator.relNumber;
                                604                 :                : 
                                605                 :                :     /*
                                606                 :                :      * Set up a non-pinned SMgrRelation reference, so that we don't need to
                                607                 :                :      * worry about unpinning it on error.
                                608                 :                :      */
  655 heikki.linnakangas@i      609                 :          54530 :     rel->rd_smgr = smgropen(rlocator, INVALID_PROC_NUMBER);
                                610                 :                : 
 6398                           611                 :          54530 :     return rel;
                                612                 :                : }
                                613                 :                : 
                                614                 :                : /*
                                615                 :                :  * Free a fake relation cache entry.
                                616                 :                :  */
                                617                 :                : void
                                618                 :          54530 : FreeFakeRelcacheEntry(Relation fakerel)
                                619                 :                : {
                                620                 :          54530 :     pfree(fakerel);
 9190 vadim4o@yahoo.com         621                 :          54530 : }
                                622                 :                : 
                                623                 :                : /*
                                624                 :                :  * Drop a relation during XLOG replay
                                625                 :                :  *
                                626                 :                :  * This is called when the relation is about to be deleted; we need to remove
                                627                 :                :  * any open "invalid-page" records for the relation.
                                628                 :                :  */
                                629                 :                : void
 1261 rhaas@postgresql.org      630                 :          31852 : XLogDropRelation(RelFileLocator rlocator, ForkNumber forknum)
                                631                 :                : {
                                632                 :          31852 :     forget_invalid_pages(rlocator, forknum, 0);
 7981 tgl@sss.pgh.pa.us         633                 :          31852 : }
                                634                 :                : 
                                635                 :                : /*
                                636                 :                :  * Drop a whole database during XLOG replay
                                637                 :                :  *
                                638                 :                :  * As above, but for DROP DATABASE instead of dropping a single rel
                                639                 :                :  */
                                640                 :                : void
 7204                           641                 :             13 : XLogDropDatabase(Oid dbid)
                                642                 :                : {
                                643                 :                :     /*
                                644                 :                :      * This is unnecessarily heavy-handed, as it will close SMgrRelation
                                645                 :                :      * objects for other databases as well. DROP DATABASE occurs seldom enough
                                646                 :                :      * that it's not worth introducing a variant of smgrdestroy for just this
                                647                 :                :      * purpose.
                                648                 :                :      */
  687 heikki.linnakangas@i      649                 :             13 :     smgrdestroyall();
                                650                 :                : 
 7188 tgl@sss.pgh.pa.us         651                 :             13 :     forget_invalid_pages_db(dbid);
                                652                 :             13 : }
                                653                 :                : 
                                654                 :                : /*
                                655                 :                :  * Truncate a relation during XLOG replay
                                656                 :                :  *
                                657                 :                :  * We need to clean up any open "invalid-page" records for the dropped pages.
                                658                 :                :  */
                                659                 :                : void
 1261 rhaas@postgresql.org      660                 :             54 : XLogTruncateRelation(RelFileLocator rlocator, ForkNumber forkNum,
                                661                 :                :                      BlockNumber nblocks)
                                662                 :                : {
                                663                 :             54 :     forget_invalid_pages(rlocator, forkNum, nblocks);
 7204 tgl@sss.pgh.pa.us         664                 :             54 : }
                                665                 :                : 
                                666                 :                : /*
                                667                 :                :  * Determine which timeline to read an xlog page from and set the
                                668                 :                :  * XLogReaderState's currTLI to that timeline ID.
                                669                 :                :  *
                                670                 :                :  * We care about timelines in xlogreader when we might be reading xlog
                                671                 :                :  * generated prior to a promotion, either if we're currently a standby in
                                672                 :                :  * recovery or if we're a promoted primary reading xlogs generated by the old
                                673                 :                :  * primary before our promotion.
                                674                 :                :  *
                                675                 :                :  * wantPage must be set to the start address of the page to read and
                                676                 :                :  * wantLength to the amount of the page that will be read, up to
                                677                 :                :  * XLOG_BLCKSZ. If the amount to be read isn't known, pass XLOG_BLCKSZ.
                                678                 :                :  *
                                679                 :                :  * The currTLI argument should be the system-wide current timeline.
                                680                 :                :  * Note that this may be different from state->currTLI, which is the timeline
                                681                 :                :  * from which the caller is currently reading previous xlog records.
                                682                 :                :  *
                                683                 :                :  * We switch to an xlog segment from the new timeline eagerly when on a
                                684                 :                :  * historical timeline, as soon as we reach the start of the xlog segment
                                685                 :                :  * containing the timeline switch.  The server copied the segment to the new
                                686                 :                :  * timeline so all the data up to the switch point is the same, but there's no
                                687                 :                :  * guarantee the old segment will still exist. It may have been deleted or
                                688                 :                :  * renamed with a .partial suffix so we can't necessarily keep reading from
                                689                 :                :  * the old TLI even though tliSwitchPoint says it's OK.
                                690                 :                :  *
                                691                 :                :  * We can't just check the timeline when we read a page on a different segment
                                692                 :                :  * to the last page. We could've received a timeline switch from a cascading
                                693                 :                :  * upstream, so the current segment ends abruptly (possibly getting renamed to
                                694                 :                :  * .partial) and we have to switch to a new one.  Even in the middle of reading
                                695                 :                :  * a page we could have to dump the cached page and switch to a new TLI.
                                696                 :                :  *
                                697                 :                :  * Because of this, callers MAY NOT assume that currTLI is the timeline that
                                698                 :                :  * will be in a page's xlp_tli; the page may begin on an older timeline or we
                                699                 :                :  * might be reading from historical timeline data on a segment that's been
                                700                 :                :  * copied to a new timeline.
                                701                 :                :  *
                                702                 :                :  * The caller must also make sure it doesn't read past the current replay
                                703                 :                :  * position (using GetXLogReplayRecPtr) if executing in recovery, so it
                                704                 :                :  * doesn't fail to notice that the current timeline became historical.
                                705                 :                :  */
                                706                 :                : void
 1504 rhaas@postgresql.org      707                 :          32164 : XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage,
                                708                 :                :                           uint32 wantLength, TimeLineID currTLI)
                                709                 :                : {
 1683 tmunro@postgresql.or      710                 :          32164 :     const XLogRecPtr lastReadPage = (state->seg.ws_segno *
                                711                 :          32164 :                                      state->segcxt.ws_segsize + state->segoff);
                                712                 :                : 
   42 alvherre@kurilemu.de      713   [ +  -  -  + ]:GNC       32164 :     Assert(XLogRecPtrIsValid(wantPage) && wantPage % XLOG_BLCKSZ == 0);
 3193 simon@2ndQuadrant.co      714         [ -  + ]:CBC       32164 :     Assert(wantLength <= XLOG_BLCKSZ);
                                715   [ -  +  -  - ]:          32164 :     Assert(state->readLen == 0 || state->readLen <= XLOG_BLCKSZ);
 1504 rhaas@postgresql.org      716         [ -  + ]:          32164 :     Assert(currTLI != 0);
                                717                 :                : 
                                718                 :                :     /*
                                719                 :                :      * If the desired page is currently read in and valid, we have nothing to
                                720                 :                :      * do.
                                721                 :                :      *
                                722                 :                :      * The caller should've ensured that it didn't previously advance readOff
                                723                 :                :      * past the valid limit of this timeline, so it doesn't matter if the
                                724                 :                :      * current TLI has since become historical.
                                725                 :                :      */
 3193 simon@2ndQuadrant.co      726         [ +  + ]:          32164 :     if (lastReadPage == wantPage &&
 1683 tmunro@postgresql.or      727         [ -  + ]:           1907 :         state->readLen != 0 &&
 3137 bruce@momjian.us          728         [ #  # ]:UBC           0 :         lastReadPage + state->readLen >= wantPage + Min(wantLength, XLOG_BLCKSZ - 1))
 3193 simon@2ndQuadrant.co      729                 :              0 :         return;
                                730                 :                : 
                                731                 :                :     /*
                                732                 :                :      * If we're reading from the current timeline, it hasn't become historical
                                733                 :                :      * and the page we're reading is after the last page read, we can again
                                734                 :                :      * just carry on. (Seeking backwards requires a check to make sure the
                                735                 :                :      * older page isn't on a prior timeline).
                                736                 :                :      *
                                737                 :                :      * currTLI might've become historical since the caller obtained the value,
                                738                 :                :      * but the caller is required not to read past the flush limit it saw at
                                739                 :                :      * the time it looked up the timeline. There's nothing we can do about it
                                740                 :                :      * if StartupXLOG() renames it to .partial concurrently.
                                741                 :                :      */
 1504 rhaas@postgresql.org      742   [ +  +  +  + ]:CBC       32164 :     if (state->currTLI == currTLI && wantPage >= lastReadPage)
                                743                 :                :     {
   42 alvherre@kurilemu.de      744         [ -  + ]:GNC       29019 :         Assert(!XLogRecPtrIsValid(state->currTLIValidUntil));
 3193 simon@2ndQuadrant.co      745                 :CBC       29019 :         return;
                                746                 :                :     }
                                747                 :                : 
                                748                 :                :     /*
                                749                 :                :      * If we're just reading pages from a previously validated historical
                                750                 :                :      * timeline and the timeline we're reading from is valid until the end of
                                751                 :                :      * the current segment we can just keep reading.
                                752                 :                :      */
   42 alvherre@kurilemu.de      753         [ +  + ]:GNC        3145 :     if (XLogRecPtrIsValid(state->currTLIValidUntil) &&
 1504 rhaas@postgresql.org      754         [ +  - ]:CBC        1739 :         state->currTLI != currTLI &&
 3193 simon@2ndQuadrant.co      755         [ +  - ]:           1739 :         state->currTLI != 0 &&
 2277 alvherre@alvh.no-ip.      756                 :           1739 :         ((wantPage + wantLength) / state->segcxt.ws_segsize) <
                                757         [ +  + ]:           1739 :         (state->currTLIValidUntil / state->segcxt.ws_segsize))
 3193 simon@2ndQuadrant.co      758                 :           1736 :         return;
                                759                 :                : 
                                760                 :                :     /*
                                761                 :                :      * If we reach this point we're either looking up a page for random
                                762                 :                :      * access, the current timeline just became historical, or we're reading
                                763                 :                :      * from a new segment containing a timeline switch. In all cases we need
                                764                 :                :      * to determine the newest timeline on the segment.
                                765                 :                :      *
                                766                 :                :      * If it's the current timeline we can just keep reading from here unless
                                767                 :                :      * we detect a timeline switch that makes the current timeline historical.
                                768                 :                :      * If it's a historical timeline we can read all the segment on the newest
                                769                 :                :      * timeline because it contains all the old timelines' data too. So only
                                770                 :                :      * one switch check is required.
                                771                 :                :      */
                                772                 :                :     {
                                773                 :                :         /*
                                774                 :                :          * We need to re-read the timeline history in case it's been changed
                                775                 :                :          * by a promotion or replay from a cascaded replica.
                                776                 :                :          */
 1504 rhaas@postgresql.org      777                 :           1409 :         List       *timelineHistory = readTimeLineHistory(currTLI);
                                778                 :                :         XLogRecPtr  endOfSegment;
                                779                 :                : 
 2277 alvherre@alvh.no-ip.      780                 :           1409 :         endOfSegment = ((wantPage / state->segcxt.ws_segsize) + 1) *
                                781                 :           1409 :             state->segcxt.ws_segsize - 1;
                                782         [ -  + ]:           1409 :         Assert(wantPage / state->segcxt.ws_segsize ==
                                783                 :                :                endOfSegment / state->segcxt.ws_segsize);
                                784                 :                : 
                                785                 :                :         /*
                                786                 :                :          * Find the timeline of the last LSN on the segment containing
                                787                 :                :          * wantPage.
                                788                 :                :          */
 3193 simon@2ndQuadrant.co      789                 :           1409 :         state->currTLI = tliOfPointInHistory(endOfSegment, timelineHistory);
                                790                 :           1409 :         state->currTLIValidUntil = tliSwitchPoint(state->currTLI, timelineHistory,
                                791                 :                :                                                   &state->nextTLI);
                                792                 :                : 
   42 alvherre@kurilemu.de      793   [ +  +  -  + ]:GNC        1409 :         Assert(!XLogRecPtrIsValid(state->currTLIValidUntil) ||
                                794                 :                :                wantPage + wantLength < state->currTLIValidUntil);
                                795                 :                : 
 3193 simon@2ndQuadrant.co      796                 :CBC        1409 :         list_free_deep(timelineHistory);
                                797                 :                : 
  164 alvherre@kurilemu.de      798         [ -  + ]:GNC        1409 :         elog(DEBUG3, "switched to timeline %u valid until %X/%08X",
                                799                 :                :              state->currTLI,
                                800                 :                :              LSN_FORMAT_ARGS(state->currTLIValidUntil));
                                801                 :                :     }
                                802                 :                : }
                                803                 :                : 
                                804                 :                : /* XLogReaderRoutine->segment_open callback for local pg_wal files */
                                805                 :                : void
 2050 alvherre@alvh.no-ip.      806                 :CBC         756 : wal_segment_open(XLogReaderState *state, XLogSegNo nextSegNo,
                                807                 :                :                  TimeLineID *tli_p)
                                808                 :                : {
 2215                           809                 :            756 :     TimeLineID  tli = *tli_p;
                                810                 :                :     char        path[MAXPGPATH];
                                811                 :                : 
 2045                           812                 :            756 :     XLogFilePath(path, tli, nextSegNo, state->segcxt.ws_segsize);
                                813                 :            756 :     state->seg.ws_file = BasicOpenFile(path, O_RDONLY | PG_BINARY);
                                814         [ +  - ]:            756 :     if (state->seg.ws_file >= 0)
                                815                 :            756 :         return;
                                816                 :                : 
 2215 alvherre@alvh.no-ip.      817         [ #  # ]:UBC           0 :     if (errno == ENOENT)
                                818         [ #  # ]:              0 :         ereport(ERROR,
                                819                 :                :                 (errcode_for_file_access(),
                                820                 :                :                  errmsg("requested WAL segment %s has already been removed",
                                821                 :                :                         path)));
                                822                 :                :     else
                                823         [ #  # ]:              0 :         ereport(ERROR,
                                824                 :                :                 (errcode_for_file_access(),
                                825                 :                :                  errmsg("could not open file \"%s\": %m",
                                826                 :                :                         path)));
                                827                 :                : }
                                828                 :                : 
                                829                 :                : /* stock XLogReaderRoutine->segment_close callback */
                                830                 :                : void
 2050 alvherre@alvh.no-ip.      831                 :CBC        2531 : wal_segment_close(XLogReaderState *state)
                                832                 :                : {
                                833                 :           2531 :     close(state->seg.ws_file);
                                834                 :                :     /* need to check errno? */
                                835                 :           2531 :     state->seg.ws_file = -1;
                                836                 :           2531 : }
                                837                 :                : 
                                838                 :                : /*
                                839                 :                :  * XLogReaderRoutine->page_read callback for reading local xlog files
                                840                 :                :  *
                                841                 :                :  * Public because it would likely be very helpful for someone writing another
                                842                 :                :  * output method outside walsender, e.g. in a bgworker.
                                843                 :                :  */
                                844                 :                : int
 1683 tmunro@postgresql.or      845                 :          18385 : read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
                                846                 :                :                      int reqLen, XLogRecPtr targetRecPtr, char *cur_page)
                                847                 :                : {
 1350 jdavis@postgresql.or      848                 :          18385 :     return read_local_xlog_page_guts(state, targetPagePtr, reqLen,
                                849                 :                :                                      targetRecPtr, cur_page, true);
                                850                 :                : }
                                851                 :                : 
                                852                 :                : /*
                                853                 :                :  * Same as read_local_xlog_page except that it doesn't wait for future WAL
                                854                 :                :  * to be available.
                                855                 :                :  */
                                856                 :                : int
                                857                 :           3828 : read_local_xlog_page_no_wait(XLogReaderState *state, XLogRecPtr targetPagePtr,
                                858                 :                :                              int reqLen, XLogRecPtr targetRecPtr,
                                859                 :                :                              char *cur_page)
                                860                 :                : {
                                861                 :           3828 :     return read_local_xlog_page_guts(state, targetPagePtr, reqLen,
                                862                 :                :                                      targetRecPtr, cur_page, false);
                                863                 :                : }
                                864                 :                : 
                                865                 :                : /*
                                866                 :                :  * Implementation of read_local_xlog_page and its no wait version.
                                867                 :                :  */
                                868                 :                : static int
                                869                 :          22213 : read_local_xlog_page_guts(XLogReaderState *state, XLogRecPtr targetPagePtr,
                                870                 :                :                           int reqLen, XLogRecPtr targetRecPtr,
                                871                 :                :                           char *cur_page, bool wait_for_wal)
                                872                 :                : {
                                873                 :                :     XLogRecPtr  read_upto,
                                874                 :                :                 loc;
                                875                 :                :     TimeLineID  tli;
                                876                 :                :     int         count;
                                877                 :                :     WALReadError errinfo;
                                878                 :                :     TimeLineID  currTLI;
                                879                 :                : 
 3620 simon@2ndQuadrant.co      880                 :          22213 :     loc = targetPagePtr + reqLen;
                                881                 :                : 
                                882                 :                :     /*
                                883                 :                :      * Loop waiting for xlog to be available if necessary
                                884                 :                :      *
                                885                 :                :      * TODO: The walsender has its own version of this function, which uses a
                                886                 :                :      * condition variable to wake up whenever WAL is flushed. We could use the
                                887                 :                :      * same infrastructure here, instead of the check/sleep/repeat style of
                                888                 :                :      * loop.
                                889                 :                :      */
                                890                 :                :     while (1)
                                891                 :                :     {
                                892                 :                :         /*
                                893                 :                :          * Determine the limit of xlog we can currently read to, and what the
                                894                 :                :          * most recent timeline is.
                                895                 :                :          */
 3515 alvherre@alvh.no-ip.      896         [ +  + ]:          22979 :         if (!RecoveryInProgress())
 1504 rhaas@postgresql.org      897                 :          22206 :             read_upto = GetFlushRecPtr(&currTLI);
                                898                 :                :         else
                                899                 :            773 :             read_upto = GetXLogReplayRecPtr(&currTLI);
                                900                 :          22979 :         tli = currTLI;
                                901                 :                : 
                                902                 :                :         /*
                                903                 :                :          * Check which timeline to get the record from.
                                904                 :                :          *
                                905                 :                :          * We have to do it each time through the loop because if we're in
                                906                 :                :          * recovery as a cascading standby, the current timeline might've
                                907                 :                :          * become historical. We can't rely on RecoveryInProgress() because in
                                908                 :                :          * a standby configuration like
                                909                 :                :          *
                                910                 :                :          * A => B => C
                                911                 :                :          *
                                912                 :                :          * if we're a logical decoding session on C, and B gets promoted, our
                                913                 :                :          * timeline will change while we remain in recovery.
                                914                 :                :          *
                                915                 :                :          * We can't just keep reading from the old timeline as the last WAL
                                916                 :                :          * archive in the timeline will get renamed to .partial by
                                917                 :                :          * StartupXLOG().
                                918                 :                :          *
                                919                 :                :          * If that happens after our caller determined the TLI but before we
                                920                 :                :          * actually read the xlog page, we might still try to read from the
                                921                 :                :          * old (now renamed) segment and fail. There's not much we can do
                                922                 :                :          * about this, but it can only happen when we're a leaf of a cascading
                                923                 :                :          * standby whose primary gets promoted while we're decoding, so a
                                924                 :                :          * one-off ERROR isn't too bad.
                                925                 :                :          */
                                926                 :          22979 :         XLogReadDetermineTimeline(state, targetPagePtr, reqLen, tli);
                                927                 :                : 
                                928         [ +  + ]:          22979 :         if (state->currTLI == currTLI)
                                929                 :                :         {
                                930                 :                : 
 3193 simon@2ndQuadrant.co      931         [ +  + ]:          21240 :             if (loc <= read_upto)
                                932                 :          20464 :                 break;
                                933                 :                : 
                                934                 :                :             /* If asked, let's not wait for future WAL. */
 1350 jdavis@postgresql.or      935         [ +  + ]:            776 :             if (!wait_for_wal)
                                936                 :                :             {
                                937                 :                :                 ReadLocalXLogPageNoWaitPrivate *private_data;
                                938                 :                : 
                                939                 :                :                 /*
                                940                 :                :                  * Inform the caller of read_local_xlog_page_no_wait that the
                                941                 :                :                  * end of WAL has been reached.
                                942                 :                :                  */
 1328                           943                 :             10 :                 private_data = (ReadLocalXLogPageNoWaitPrivate *)
                                944                 :                :                     state->private_data;
                                945                 :             10 :                 private_data->end_of_wal = true;
 1350                           946                 :             10 :                 break;
                                947                 :                :             }
                                948                 :                : 
 3193 simon@2ndQuadrant.co      949         [ -  + ]:            766 :             CHECK_FOR_INTERRUPTS();
                                950                 :            766 :             pg_usleep(1000L);
                                951                 :                :         }
                                952                 :                :         else
                                953                 :                :         {
                                954                 :                :             /*
                                955                 :                :              * We're on a historical timeline, so limit reading to the switch
                                956                 :                :              * point where we moved to the next timeline.
                                957                 :                :              *
                                958                 :                :              * We don't need to GetFlushRecPtr or GetXLogReplayRecPtr. We know
                                959                 :                :              * about the new timeline, so we must've received past the end of
                                960                 :                :              * it.
                                961                 :                :              */
                                962                 :           1739 :             read_upto = state->currTLIValidUntil;
                                963                 :                : 
                                964                 :                :             /*
                                965                 :                :              * Setting tli to our wanted record's TLI is slightly wrong; the
                                966                 :                :              * page might begin on an older timeline if it contains a timeline
                                967                 :                :              * switch, since its xlog segment will have been copied from the
                                968                 :                :              * prior timeline. This is pretty harmless though, as nothing
                                969                 :                :              * cares so long as the timeline doesn't go backwards.  We should
                                970                 :                :              * read the page header instead; FIXME someday.
                                971                 :                :              */
 2215 alvherre@alvh.no-ip.      972                 :           1739 :             tli = state->currTLI;
                                973                 :                : 
                                974                 :                :             /* No need to wait on a historical timeline */
 3193 simon@2ndQuadrant.co      975                 :           1739 :             break;
                                976                 :                :         }
                                977                 :                :     }
                                978                 :                : 
 3550 alvherre@alvh.no-ip.      979         [ +  + ]:          22213 :     if (targetPagePtr + XLOG_BLCKSZ <= read_upto)
                                980                 :                :     {
                                981                 :                :         /*
                                982                 :                :          * more than one block available; read only that block, have caller
                                983                 :                :          * come back if they need more.
                                984                 :                :          */
 3620 simon@2ndQuadrant.co      985                 :          21513 :         count = XLOG_BLCKSZ;
                                986                 :                :     }
 3550 alvherre@alvh.no-ip.      987         [ +  + ]:            700 :     else if (targetPagePtr + reqLen > read_upto)
                                988                 :                :     {
                                989                 :                :         /* not enough data there */
 1683 tmunro@postgresql.or      990                 :             10 :         return -1;
                                991                 :                :     }
                                992                 :                :     else
                                993                 :                :     {
                                994                 :                :         /* enough bytes available to satisfy the request */
 3550 alvherre@alvh.no-ip.      995                 :            690 :         count = read_upto - targetPagePtr;
                                996                 :                :     }
                                997                 :                : 
  671 jdavis@postgresql.or      998         [ -  + ]:          22203 :     if (!WALRead(state, cur_page, targetPagePtr, count, tli,
                                999                 :                :                  &errinfo))
 2215 alvherre@alvh.no-ip.     1000                 :UBC           0 :         WALReadRaiseError(&errinfo);
                               1001                 :                : 
                               1002                 :                :     /* number of valid bytes in the buffer */
 1683 tmunro@postgresql.or     1003                 :CBC       22203 :     return count;
                               1004                 :                : }
                               1005                 :                : 
                               1006                 :                : /*
                               1007                 :                :  * Backend-specific convenience code to handle read errors encountered by
                               1008                 :                :  * WALRead().
                               1009                 :                :  */
                               1010                 :                : void
 2215 alvherre@alvh.no-ip.     1011                 :UBC           0 : WALReadRaiseError(WALReadError *errinfo)
                               1012                 :                : {
                               1013                 :              0 :     WALOpenSegment *seg = &errinfo->wre_seg;
                               1014                 :                :     char        fname[MAXFNAMELEN];
                               1015                 :                : 
 2207 michael@paquier.xyz      1016                 :              0 :     XLogFileName(fname, seg->ws_tli, seg->ws_segno, wal_segment_size);
                               1017                 :                : 
 2215 alvherre@alvh.no-ip.     1018         [ #  # ]:              0 :     if (errinfo->wre_read < 0)
                               1019                 :                :     {
                               1020                 :              0 :         errno = errinfo->wre_errno;
                               1021         [ #  # ]:              0 :         ereport(ERROR,
                               1022                 :                :                 (errcode_for_file_access(),
                               1023                 :                :                  errmsg("could not read from WAL segment %s, offset %d: %m",
                               1024                 :                :                         fname, errinfo->wre_off)));
                               1025                 :                :     }
                               1026         [ #  # ]:              0 :     else if (errinfo->wre_read == 0)
                               1027                 :                :     {
                               1028         [ #  # ]:              0 :         ereport(ERROR,
                               1029                 :                :                 (errcode(ERRCODE_DATA_CORRUPTED),
                               1030                 :                :                  errmsg("could not read from WAL segment %s, offset %d: read %d of %d",
                               1031                 :                :                         fname, errinfo->wre_off, errinfo->wre_read,
                               1032                 :                :                         errinfo->wre_req)));
                               1033                 :                :     }
                               1034                 :              0 : }

Generated by: LCOV version 2.4-beta