LCOV - differential code coverage report
Current view: top level - src/backend/storage/buffer - freelist.c (source / functions) Coverage Total Hit UNC LBC UBC GBC GIC GNC CBC EUB ECB DUB DCB
Current: bed3ffbf9d952be6c7d739d068cdce44c046dfb7 vs 574581b50ac9c63dd9e4abebb731a3b67e5b50f6 Lines: 93.0 % 171 159 3 1 8 3 2 42 112 1 4 68
Current Date: 2026-05-05 10:23:31 +0900 Functions: 100.0 % 15 15 4 11 6
Baseline: lcov-20260505-025707-baseline Branches: 73.8 % 84 62 5 1 16 1 17 44 2 4 10 30
Baseline Date: 2026-05-05 10:27:06 +0900 Line coverage date bins:
Legend: Lines:     hit not hit
Branches: + taken - not taken # not executed
(7,30] days: 100.0 % 8 8 8
(30,360] days: 91.4 % 35 32 3 32
(360..) days: 93.0 % 128 119 1 8 3 2 2 112 1
Function coverage date bins:
(7,30] days: 100.0 % 2 2 2
(30,360] days: 100.0 % 2 2 2
(360..) days: 100.0 % 11 11 11
Branch coverage date bins:
(30,360] days: 77.3 % 22 17 5 17
(360..) days: 66.2 % 68 45 1 16 1 44 2 4

 Age         Owner                    Branch data    TLA  Line data    Source code
                                  1                 :                : /*-------------------------------------------------------------------------
                                  2                 :                :  *
                                  3                 :                :  * freelist.c
                                  4                 :                :  *    routines for managing the buffer pool's replacement strategy.
                                  5                 :                :  *
                                  6                 :                :  *
                                  7                 :                :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
                                  8                 :                :  * Portions Copyright (c) 1994, Regents of the University of California
                                  9                 :                :  *
                                 10                 :                :  *
                                 11                 :                :  * IDENTIFICATION
                                 12                 :                :  *    src/backend/storage/buffer/freelist.c
                                 13                 :                :  *
                                 14                 :                :  *-------------------------------------------------------------------------
                                 15                 :                :  */
                                 16                 :                : #include "postgres.h"
                                 17                 :                : 
                                 18                 :                : #include "pgstat.h"
                                 19                 :                : #include "port/atomics.h"
                                 20                 :                : #include "storage/buf_internals.h"
                                 21                 :                : #include "storage/bufmgr.h"
                                 22                 :                : #include "storage/proc.h"
                                 23                 :                : #include "storage/shmem.h"
                                 24                 :                : #include "storage/subsystems.h"
                                 25                 :                : 
                                 26                 :                : #define INT_ACCESS_ONCE(var)    ((int)(*((volatile int *)&(var))))
                                 27                 :                : 
                                 28                 :                : 
                                 29                 :                : /*
                                 30                 :                :  * The shared freelist control information.
                                 31                 :                :  */
                                 32                 :                : typedef struct
                                 33                 :                : {
                                 34                 :                :     /* Spinlock: protects the values below */
                                 35                 :                :     slock_t     buffer_strategy_lock;
                                 36                 :                : 
                                 37                 :                :     /*
                                 38                 :                :      * clock-sweep hand: index of next buffer to consider grabbing. Note that
                                 39                 :                :      * this isn't a concrete buffer - we only ever increase the value. So, to
                                 40                 :                :      * get an actual buffer, it needs to be used modulo NBuffers.
                                 41                 :                :      */
                                 42                 :                :     pg_atomic_uint32 nextVictimBuffer;
                                 43                 :                : 
                                 44                 :                :     /*
                                 45                 :                :      * Statistics.  These counters should be wide enough that they can't
                                 46                 :                :      * overflow during a single bgwriter cycle.
                                 47                 :                :      */
                                 48                 :                :     uint32      completePasses; /* Complete cycles of the clock-sweep */
                                 49                 :                :     pg_atomic_uint32 numBufferAllocs;   /* Buffers allocated since last reset */
                                 50                 :                : 
                                 51                 :                :     /*
                                 52                 :                :      * Bgworker process to be notified upon activity or -1 if none. See
                                 53                 :                :      * StrategyNotifyBgWriter.
                                 54                 :                :      */
                                 55                 :                :     int         bgwprocno;
                                 56                 :                : } BufferStrategyControl;
                                 57                 :                : 
                                 58                 :                : /* Pointers to shared state */
                                 59                 :                : static BufferStrategyControl *StrategyControl = NULL;
                                 60                 :                : 
                                 61                 :                : static void StrategyCtlShmemRequest(void *arg);
                                 62                 :                : static void StrategyCtlShmemInit(void *arg);
                                 63                 :                : 
                                 64                 :                : const ShmemCallbacks StrategyCtlShmemCallbacks = {
                                 65                 :                :     .request_fn = StrategyCtlShmemRequest,
                                 66                 :                :     .init_fn = StrategyCtlShmemInit,
                                 67                 :                : };
                                 68                 :                : 
                                 69                 :                : /*
                                 70                 :                :  * Private (non-shared) state for managing a ring of shared buffers to re-use.
                                 71                 :                :  * This is currently the only kind of BufferAccessStrategy object, but someday
                                 72                 :                :  * we might have more kinds.
                                 73                 :                :  */
                                 74                 :                : typedef struct BufferAccessStrategyData
                                 75                 :                : {
                                 76                 :                :     /* Overall strategy type */
                                 77                 :                :     BufferAccessStrategyType btype;
                                 78                 :                :     /* Number of elements in buffers[] array */
                                 79                 :                :     int         nbuffers;
                                 80                 :                : 
                                 81                 :                :     /*
                                 82                 :                :      * Index of the "current" slot in the ring, ie, the one most recently
                                 83                 :                :      * returned by GetBufferFromRing.
                                 84                 :                :      */
                                 85                 :                :     int         current;
                                 86                 :                : 
                                 87                 :                :     /*
                                 88                 :                :      * Array of buffer numbers.  InvalidBuffer (that is, zero) indicates we
                                 89                 :                :      * have not yet selected a buffer for this ring slot.  For allocation
                                 90                 :                :      * simplicity this is palloc'd together with the fixed fields of the
                                 91                 :                :      * struct.
                                 92                 :                :      */
                                 93                 :                :     Buffer      buffers[FLEXIBLE_ARRAY_MEMBER];
                                 94                 :                : }           BufferAccessStrategyData;
                                 95                 :                : 
                                 96                 :                : 
                                 97                 :                : /* Prototypes for internal functions */
                                 98                 :                : static BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy,
                                 99                 :                :                                      uint64 *buf_state);
                                100                 :                : static void AddBufferToRing(BufferAccessStrategy strategy,
                                101                 :                :                             BufferDesc *buf);
                                102                 :                : 
                                103                 :                : /*
                                104                 :                :  * ClockSweepTick - Helper routine for StrategyGetBuffer()
                                105                 :                :  *
                                106                 :                :  * Move the clock hand one buffer ahead of its current position and return the
                                107                 :                :  * id of the buffer now under the hand.
                                108                 :                :  */
                                109                 :                : static inline uint32
 4149 andres@anarazel.de        110                 :CBC     5445309 : ClockSweepTick(void)
                                111                 :                : {
                                112                 :                :     uint32      victim;
                                113                 :                : 
                                114                 :                :     /*
                                115                 :                :      * Atomically move hand ahead one buffer - if there's several processes
                                116                 :                :      * doing this, this can lead to buffers being returned slightly out of
                                117                 :                :      * apparent order.
                                118                 :                :      */
                                119                 :                :     victim =
                                120                 :        5445309 :         pg_atomic_fetch_add_u32(&StrategyControl->nextVictimBuffer, 1);
                                121                 :                : 
                                122         [ +  + ]:        5445309 :     if (victim >= NBuffers)
                                123                 :                :     {
 4000 bruce@momjian.us          124                 :          36827 :         uint32      originalVictim = victim;
                                125                 :                : 
                                126                 :                :         /* always wrap what we look up in BufferDescriptors */
 4149 andres@anarazel.de        127                 :          36827 :         victim = victim % NBuffers;
                                128                 :                : 
                                129                 :                :         /*
                                130                 :                :          * If we're the one that just caused a wraparound, force
                                131                 :                :          * completePasses to be incremented while holding the spinlock. We
                                132                 :                :          * need the spinlock so StrategySyncStart() can return a consistent
                                133                 :                :          * value consisting of nextVictimBuffer and completePasses.
                                134                 :                :          */
                                135         [ +  + ]:          36827 :         if (victim == 0)
                                136                 :                :         {
                                137                 :                :             uint32      expected;
                                138                 :                :             uint32      wrapped;
 4000 bruce@momjian.us          139                 :          36752 :             bool        success = false;
                                140                 :                : 
 4149 andres@anarazel.de        141                 :          36752 :             expected = originalVictim + 1;
                                142                 :                : 
                                143         [ +  + ]:          73569 :             while (!success)
                                144                 :                :             {
                                145                 :                :                 /*
                                146                 :                :                  * Acquire the spinlock while increasing completePasses. That
                                147                 :                :                  * allows other readers to read nextVictimBuffer and
                                148                 :                :                  * completePasses in a consistent manner which is required for
                                149                 :                :                  * StrategySyncStart().  In theory delaying the increment
                                150                 :                :                  * could lead to an overflow of nextVictimBuffers, but that's
                                151                 :                :                  * highly unlikely and wouldn't be particularly harmful.
                                152                 :                :                  */
                                153         [ -  + ]:          36817 :                 SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
                                154                 :                : 
                                155                 :          36817 :                 wrapped = expected % NBuffers;
                                156                 :                : 
                                157                 :          36817 :                 success = pg_atomic_compare_exchange_u32(&StrategyControl->nextVictimBuffer,
                                158                 :                :                                                          &expected, wrapped);
                                159         [ +  + ]:          36817 :                 if (success)
                                160                 :          36752 :                     StrategyControl->completePasses++;
                                161                 :          36817 :                 SpinLockRelease(&StrategyControl->buffer_strategy_lock);
                                162                 :                :             }
                                163                 :                :         }
                                164                 :                :     }
                                165                 :        5445309 :     return victim;
                                166                 :                : }
                                167                 :                : 
                                168                 :                : /*
                                169                 :                :  * StrategyGetBuffer
                                170                 :                :  *
                                171                 :                :  *  Called by the bufmgr to get the next candidate buffer to use in
                                172                 :                :  *  GetVictimBuffer(). The only hard requirement GetVictimBuffer() has is that
                                173                 :                :  *  the selected buffer must not currently be pinned by anyone.
                                174                 :                :  *
                                175                 :                :  *  strategy is a BufferAccessStrategy object, or NULL for default strategy.
                                176                 :                :  *
                                177                 :                :  *  It is the callers responsibility to ensure the buffer ownership can be
                                178                 :                :  *  tracked via TrackNewBufferPin().
                                179                 :                :  *
                                180                 :                :  *  The buffer is pinned and marked as owned, using TrackNewBufferPin(),
                                181                 :                :  *  before returning.
                                182                 :                :  */
                                183                 :                : BufferDesc *
  110 andres@anarazel.de        184                 :GNC     2264641 : StrategyGetBuffer(BufferAccessStrategy strategy, uint64 *buf_state, bool *from_ring)
                                185                 :                : {
                                186                 :                :     BufferDesc *buf;
                                187                 :                :     int         bgwprocno;
                                188                 :                :     int         trycounter;
                                189                 :                : 
 1181 andres@anarazel.de        190                 :CBC     2264641 :     *from_ring = false;
                                191                 :                : 
                                192                 :                :     /*
                                193                 :                :      * If given a strategy object, see whether it can select a buffer. We
                                194                 :                :      * assume strategy objects don't need buffer_strategy_lock.
                                195                 :                :      */
 6915 tgl@sss.pgh.pa.us         196         [ +  + ]:        2264641 :     if (strategy != NULL)
                                197                 :                :     {
 3677 andres@anarazel.de        198                 :        1004264 :         buf = GetBufferFromRing(strategy, buf_state);
 6915 tgl@sss.pgh.pa.us         199         [ +  + ]:        1004264 :         if (buf != NULL)
                                200                 :                :         {
 1181 andres@anarazel.de        201                 :         411529 :             *from_ring = true;
 6915 tgl@sss.pgh.pa.us         202                 :         411529 :             return buf;
                                203                 :                :         }
                                204                 :                :     }
                                205                 :                : 
                                206                 :                :     /*
                                207                 :                :      * If asked, we need to waken the bgwriter. Since we don't want to rely on
                                208                 :                :      * a spinlock for this we force a read from shared memory once, and then
                                209                 :                :      * set the latch based on that value. We need to go through that length
                                210                 :                :      * because otherwise bgwprocno might be reset while/after we check because
                                211                 :                :      * the compiler might just reread from memory.
                                212                 :                :      *
                                213                 :                :      * This can possibly set the latch of the wrong process if the bgwriter
                                214                 :                :      * dies in the wrong moment. But since PGPROC->procLatch is never
                                215                 :                :      * deallocated the worst consequence of that is that we set the latch of
                                216                 :                :      * some arbitrary process.
                                217                 :                :      */
 4149 andres@anarazel.de        218                 :        1853112 :     bgwprocno = INT_ACCESS_ONCE(StrategyControl->bgwprocno);
                                219         [ +  + ]:        1853112 :     if (bgwprocno != -1)
                                220                 :                :     {
                                221                 :                :         /* reset bgwprocno first, before setting the latch */
                                222                 :            452 :         StrategyControl->bgwprocno = -1;
                                223                 :                : 
                                224                 :                :         /*
                                225                 :                :          * Not acquiring ProcArrayLock here which is slightly icky. It's
                                226                 :                :          * actually fine because procLatch isn't ever freed, so we just can
                                227                 :                :          * potentially set the wrong process' (or no process') latch.
                                228                 :                :          */
  120 drowley@postgresql.o      229                 :GNC         452 :         SetLatch(&GetPGProcByNumber(bgwprocno)->procLatch);
                                230                 :                :     }
                                231                 :                : 
                                232                 :                :     /*
                                233                 :                :      * We count buffer allocation requests so that the bgwriter can estimate
                                234                 :                :      * the rate of buffer consumption.  Note that buffers recycled by a
                                235                 :                :      * strategy object are intentionally not counted here.
                                236                 :                :      */
 4149 andres@anarazel.de        237                 :CBC     1853112 :     pg_atomic_fetch_add_u32(&StrategyControl->numBufferAllocs, 1);
                                238                 :                : 
                                239                 :                :     /* Use the "clock sweep" algorithm to find a free buffer */
 7732 tgl@sss.pgh.pa.us         240                 :GNC     1853112 :     trycounter = NBuffers;
                                241                 :                :     for (;;)
 8209 JanWieck@Yahoo.com        242                 :GIC     3592197 :     {
                                243                 :                :         uint64      old_buf_state;
                                244                 :                :         uint64      local_buf_state;
                                245                 :                : 
 4114 andres@anarazel.de        246                 :GNC     5445309 :         buf = GetBufferDescriptor(ClockSweepTick());
                                247                 :                : 
                                248                 :                :         /*
                                249                 :                :          * Check whether the buffer can be used and pin it if so. Do this
                                250                 :                :          * using a CAS loop, to avoid having to lock the buffer header.
                                251                 :                :          */
  110                           252                 :        5445309 :         old_buf_state = pg_atomic_read_u64(&buf->state);
                                253                 :                :         for (;;)
                                254                 :                :         {
  209                           255                 :        5445334 :             local_buf_state = old_buf_state;
                                256                 :                : 
                                257                 :                :             /*
                                258                 :                :              * If the buffer is pinned or has a nonzero usage_count, we cannot
                                259                 :                :              * use it; decrement the usage_count (unless pinned) and keep
                                260                 :                :              * scanning.
                                261                 :                :              */
                                262                 :                : 
                                263         [ +  + ]:        5445334 :             if (BUF_STATE_GET_REFCOUNT(local_buf_state) != 0)
                                264                 :                :             {
                                265         [ -  + ]:         111173 :                 if (--trycounter == 0)
                                266                 :                :                 {
                                267                 :                :                     /*
                                268                 :                :                      * We've scanned all the buffers without making any state
                                269                 :                :                      * changes, so all the buffers are pinned (or were when we
                                270                 :                :                      * looked at them). We could hope that someone will free
                                271                 :                :                      * one eventually, but it's probably better to fail than
                                272                 :                :                      * to risk getting stuck in an infinite loop.
                                273                 :                :                      */
  209 andres@anarazel.de        274         [ #  # ]:UNC           0 :                     elog(ERROR, "no unpinned buffers available");
                                275                 :                :                 }
  209 andres@anarazel.de        276                 :GNC      111173 :                 break;
                                277                 :                :             }
                                278                 :                : 
                                279                 :                :             /* See equivalent code in PinBuffer() */
                                280         [ +  + ]:        5334161 :             if (unlikely(local_buf_state & BM_LOCKED))
                                281                 :                :             {
                                282                 :              1 :                 old_buf_state = WaitBufHdrUnlocked(buf);
                                283                 :              1 :                 continue;
                                284                 :                :             }
                                285                 :                : 
 3677 andres@anarazel.de        286         [ +  + ]:CBC     5334160 :             if (BUF_STATE_GET_USAGECOUNT(local_buf_state) != 0)
                                287                 :                :             {
                                288                 :        3481046 :                 local_buf_state -= BUF_USAGECOUNT_ONE;
                                289                 :                : 
  110 andres@anarazel.de        290         [ +  + ]:GNC     3481046 :                 if (pg_atomic_compare_exchange_u64(&buf->state, &old_buf_state,
                                291                 :                :                                                    local_buf_state))
                                292                 :                :                 {
  209                           293                 :        3481024 :                     trycounter = NBuffers;
                                294                 :        3481024 :                     break;
                                295                 :                :                 }
                                296                 :                :             }
                                297                 :                :             else
                                298                 :                :             {
                                299                 :                :                 /* pin the buffer if the CAS succeeds */
                                300                 :        1853114 :                 local_buf_state += BUF_REFCOUNT_ONE;
                                301                 :                : 
  110                           302         [ +  + ]:        1853114 :                 if (pg_atomic_compare_exchange_u64(&buf->state, &old_buf_state,
                                303                 :                :                                                    local_buf_state))
                                304                 :                :                 {
                                305                 :                :                     /* Found a usable buffer */
  209                           306         [ +  + ]:        1853112 :                     if (strategy != NULL)
                                307                 :         592735 :                         AddBufferToRing(strategy, buf);
                                308                 :        1853112 :                     *buf_state = local_buf_state;
                                309                 :                : 
                                310                 :        1853112 :                     TrackNewBufferPin(BufferDescriptorGetBuffer(buf));
                                311                 :                : 
                                312                 :        1853112 :                     return buf;
                                313                 :                :                 }
                                314                 :                :             }
                                315                 :                :         }
                                316                 :                :     }
10892 scrappy@hub.org           317                 :ECB    (108412) : }
                                318                 :                : 
                                319                 :                : /*
                                320                 :                :  * StrategySyncStart -- tell BgBufferSync where to start syncing
                                321                 :                :  *
                                322                 :                :  * The result is the buffer index of the best buffer to sync first.
                                323                 :                :  * BgBufferSync() will proceed circularly around the buffer array from there.
                                324                 :                :  *
                                325                 :                :  * In addition, we return the completed-pass count (which is effectively
                                326                 :                :  * the higher-order bits of nextVictimBuffer) and the count of recent buffer
                                327                 :                :  * allocs if non-NULL pointers are passed.  The alloc count is reset after
                                328                 :                :  * being read.
                                329                 :                :  */
                                330                 :                : int
 6797 tgl@sss.pgh.pa.us         331                 :CBC       13948 : StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
                                332                 :                : {
                                333                 :                :     uint32      nextVictimBuffer;
                                334                 :                :     int         result;
                                335                 :                : 
 4240 rhaas@postgresql.org      336         [ +  + ]:          13948 :     SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
 4149 andres@anarazel.de        337                 :          13948 :     nextVictimBuffer = pg_atomic_read_u32(&StrategyControl->nextVictimBuffer);
                                338                 :          13948 :     result = nextVictimBuffer % NBuffers;
                                339                 :                : 
 6797 tgl@sss.pgh.pa.us         340         [ +  - ]:          13948 :     if (complete_passes)
                                341                 :                :     {
                                342                 :          13948 :         *complete_passes = StrategyControl->completePasses;
                                343                 :                : 
                                344                 :                :         /*
                                345                 :                :          * Additionally add the number of wraparounds that happened before
                                346                 :                :          * completePasses could be incremented. C.f. ClockSweepTick().
                                347                 :                :          */
 4149 andres@anarazel.de        348                 :          13948 :         *complete_passes += nextVictimBuffer / NBuffers;
                                349                 :                :     }
                                350                 :                : 
 6797 tgl@sss.pgh.pa.us         351         [ +  - ]:          13948 :     if (num_buf_alloc)
                                352                 :                :     {
 4149 andres@anarazel.de        353                 :          13948 :         *num_buf_alloc = pg_atomic_exchange_u32(&StrategyControl->numBufferAllocs, 0);
                                354                 :                :     }
 4240 rhaas@postgresql.org      355                 :          13948 :     SpinLockRelease(&StrategyControl->buffer_strategy_lock);
 7732 tgl@sss.pgh.pa.us         356                 :          13948 :     return result;
                                357                 :                : }
                                358                 :                : 
                                359                 :                : /*
                                360                 :                :  * StrategyNotifyBgWriter -- set or clear allocation notification latch
                                361                 :                :  *
                                362                 :                :  * If bgwprocno isn't -1, the next invocation of StrategyGetBuffer will
                                363                 :                :  * set that latch.  Pass -1 to clear the pending notification before it
                                364                 :                :  * happens.  This feature is used by the bgwriter process to wake itself up
                                365                 :                :  * from hibernation, and is not meant for anybody else to use.
                                366                 :                :  */
                                367                 :                : void
 4149 andres@anarazel.de        368                 :            724 : StrategyNotifyBgWriter(int bgwprocno)
                                369                 :                : {
                                370                 :                :     /*
                                371                 :                :      * We acquire buffer_strategy_lock just to ensure that the store appears
                                372                 :                :      * atomic to StrategyGetBuffer.  The bgwriter should call this rather
                                373                 :                :      * infrequently, so there's no performance penalty from being safe.
                                374                 :                :      */
 4240 rhaas@postgresql.org      375         [ -  + ]:            724 :     SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
 4149 andres@anarazel.de        376                 :            724 :     StrategyControl->bgwprocno = bgwprocno;
 4240 rhaas@postgresql.org      377                 :            724 :     SpinLockRelease(&StrategyControl->buffer_strategy_lock);
 5109 tgl@sss.pgh.pa.us         378                 :            724 : }
                                379                 :                : 
                                380                 :                : 
                                381                 :                : /*
                                382                 :                :  * StrategyCtlShmemRequest -- request shared memory for the buffer
                                383                 :                :  *      cache replacement strategy.
                                384                 :                :  */
                                385                 :                : static void
   29 heikki.linnakangas@i      386                 :GNC        1244 : StrategyCtlShmemRequest(void *arg)
                                387                 :                : {
                                388                 :           1244 :     ShmemRequestStruct(.name = "Buffer Strategy Status",
                                389                 :                :                        .size = sizeof(BufferStrategyControl),
                                390                 :                :                        .ptr = (void **) &StrategyControl
                                391                 :                :         );
 7761 tgl@sss.pgh.pa.us         392                 :GIC        1244 : }
                                393                 :                : 
                                394                 :                : /*
                                395                 :                :  * StrategyCtlShmemInit -- initialize the buffer cache replacement strategy.
                                396                 :                :  */
                                397                 :                : static void
   29 heikki.linnakangas@i      398                 :GNC        1241 : StrategyCtlShmemInit(void *arg)
                                399                 :                : {
                                400                 :           1241 :     SpinLockInit(&StrategyControl->buffer_strategy_lock);
                                401                 :                : 
                                402                 :                :     /* Initialize the clock-sweep pointer */
                                403                 :           1241 :     pg_atomic_init_u32(&StrategyControl->nextVictimBuffer, 0);
                                404                 :                : 
                                405                 :                :     /* Clear statistics */
                                406                 :           1241 :     StrategyControl->completePasses = 0;
                                407                 :           1241 :     pg_atomic_init_u32(&StrategyControl->numBufferAllocs, 0);
                                408                 :                : 
                                409                 :                :     /* No pending notification */
                                410                 :           1241 :     StrategyControl->bgwprocno = -1;
 8209 JanWieck@Yahoo.com        411                 :CBC        1241 : }
                                412                 :                : 
                                413                 :                : 
                                414                 :                : /* ----------------------------------------------------------------
                                415                 :                :  *              Backend-private buffer ring management
                                416                 :                :  * ----------------------------------------------------------------
                                417                 :                :  */
                                418                 :                : 
                                419                 :                : 
                                420                 :                : /*
                                421                 :                :  * GetAccessStrategy -- create a BufferAccessStrategy object
                                422                 :                :  *
                                423                 :                :  * The object is allocated in the current memory context.
                                424                 :                :  */
                                425                 :                : BufferAccessStrategy
 6915 tgl@sss.pgh.pa.us         426                 :         164933 : GetAccessStrategy(BufferAccessStrategyType btype)
                                427                 :                : {
                                428                 :                :     int         ring_size_kb;
                                429                 :                : 
                                430                 :                :     /*
                                431                 :                :      * Select ring size to use.  See buffer/README for rationales.
                                432                 :                :      *
                                433                 :                :      * Note: if you change the ring size for BAS_BULKREAD, see also
                                434                 :                :      * SYNC_SCAN_REPORT_INTERVAL in access/heap/syncscan.c.
                                435                 :                :      */
                                436   [ -  +  +  +  :         164933 :     switch (btype)
                                                 - ]
                                437                 :                :     {
 6915 tgl@sss.pgh.pa.us         438                 :UBC           0 :         case BAS_NORMAL:
                                439                 :                :             /* if someone asks for NORMAL, just give 'em a "default" object */
                                440                 :              0 :             return NULL;
                                441                 :                : 
 6915 tgl@sss.pgh.pa.us         442                 :CBC       91415 :         case BAS_BULKREAD:
                                443                 :                :             {
                                444                 :                :                 int         ring_max_kb;
                                445                 :                : 
                                446                 :                :                 /*
                                447                 :                :                  * The ring always needs to be large enough to allow some
                                448                 :                :                  * separation in time between providing a buffer to the user
                                449                 :                :                  * of the strategy and that buffer being reused. Otherwise the
                                450                 :                :                  * user's pin will prevent reuse of the buffer, even without
                                451                 :                :                  * concurrent activity.
                                452                 :                :                  *
                                453                 :                :                  * We also need to ensure the ring always is large enough for
                                454                 :                :                  * SYNC_SCAN_REPORT_INTERVAL, as noted above.
                                455                 :                :                  *
                                456                 :                :                  * Thus we start out a minimal size and increase the size
                                457                 :                :                  * further if appropriate.
                                458                 :                :                  */
  392 andres@anarazel.de        459                 :          91415 :                 ring_size_kb = 256;
                                460                 :                : 
                                461                 :                :                 /*
                                462                 :                :                  * There's no point in a larger ring if we won't be allowed to
                                463                 :                :                  * pin sufficiently many buffers.  But we never limit to less
                                464                 :                :                  * than the minimal size above.
                                465                 :                :                  */
                                466                 :          91415 :                 ring_max_kb = GetPinLimit() * (BLCKSZ / 1024);
                                467                 :          91415 :                 ring_max_kb = Max(ring_size_kb, ring_max_kb);
                                468                 :                : 
                                469                 :                :                 /*
                                470                 :                :                  * We would like the ring to additionally have space for the
                                471                 :                :                  * configured degree of IO concurrency. While being read in,
                                472                 :                :                  * buffers can obviously not yet be reused.
                                473                 :                :                  *
                                474                 :                :                  * Each IO can be up to io_combine_limit blocks large, and we
                                475                 :                :                  * want to start up to effective_io_concurrency IOs.
                                476                 :                :                  *
                                477                 :                :                  * Note that effective_io_concurrency may be 0, which disables
                                478                 :                :                  * AIO.
                                479                 :                :                  */
                                480                 :          91415 :                 ring_size_kb += (BLCKSZ / 1024) *
                                481                 :          91415 :                     io_combine_limit * effective_io_concurrency;
                                482                 :                : 
                                483         [ +  - ]:          91415 :                 if (ring_size_kb > ring_max_kb)
                                484                 :          91415 :                     ring_size_kb = ring_max_kb;
                                485                 :          91415 :                 break;
                                486                 :                :             }
 6389 tgl@sss.pgh.pa.us         487                 :          73498 :         case BAS_BULKWRITE:
 1124 drowley@postgresql.o      488                 :          73498 :             ring_size_kb = 16 * 1024;
 6389 tgl@sss.pgh.pa.us         489                 :          73498 :             break;
 6915 tgl@sss.pgh.pa.us         490                 :GBC          20 :         case BAS_VACUUM:
  759 tmunro@postgresql.or      491                 :             20 :             ring_size_kb = 2048;
 6915 tgl@sss.pgh.pa.us         492                 :             20 :             break;
                                493                 :                : 
 6915 tgl@sss.pgh.pa.us         494                 :UBC           0 :         default:
                                495         [ #  # ]:              0 :             elog(ERROR, "unrecognized buffer access strategy: %d",
                                496                 :                :                  (int) btype);
                                497                 :                :             return NULL;        /* keep compiler quiet */
                                498                 :                :     }
                                499                 :                : 
 1124 drowley@postgresql.o      500                 :CBC      164933 :     return GetAccessStrategyWithSize(btype, ring_size_kb);
                                501                 :                : }
                                502                 :                : 
                                503                 :                : /*
                                504                 :                :  * GetAccessStrategyWithSize -- create a BufferAccessStrategy object with a
                                505                 :                :  *      number of buffers equivalent to the passed in size.
                                506                 :                :  *
                                507                 :                :  * If the given ring size is 0, no BufferAccessStrategy will be created and
                                508                 :                :  * the function will return NULL.  ring_size_kb must not be negative.
                                509                 :                :  */
                                510                 :                : BufferAccessStrategy
                                511                 :         173214 : GetAccessStrategyWithSize(BufferAccessStrategyType btype, int ring_size_kb)
                                512                 :                : {
                                513                 :                :     int         ring_buffers;
                                514                 :                :     BufferAccessStrategy strategy;
                                515                 :                : 
                                516         [ -  + ]:         173214 :     Assert(ring_size_kb >= 0);
                                517                 :                : 
                                518                 :                :     /* Figure out how many buffers ring_size_kb is */
                                519                 :         173214 :     ring_buffers = ring_size_kb / (BLCKSZ / 1024);
                                520                 :                : 
                                521                 :                :     /* 0 means unlimited, so no BufferAccessStrategy required */
                                522         [ +  + ]:         173214 :     if (ring_buffers == 0)
                                523                 :              8 :         return NULL;
                                524                 :                : 
                                525                 :                :     /* Cap to 1/8th of shared_buffers */
                                526                 :         173206 :     ring_buffers = Min(NBuffers / 8, ring_buffers);
                                527                 :                : 
                                528                 :                :     /* NBuffers should never be less than 16, so this shouldn't happen */
                                529         [ -  + ]:         173206 :     Assert(ring_buffers > 0);
                                530                 :                : 
                                531                 :                :     /* Allocate the object and initialize all elements to zeroes */
                                532                 :                :     strategy = (BufferAccessStrategy)
 6915 tgl@sss.pgh.pa.us         533                 :         173206 :         palloc0(offsetof(BufferAccessStrategyData, buffers) +
                                534                 :                :                 ring_buffers * sizeof(Buffer));
                                535                 :                : 
                                536                 :                :     /* Set fields that don't start out zero */
                                537                 :         173206 :     strategy->btype = btype;
 1124 drowley@postgresql.o      538                 :         173206 :     strategy->nbuffers = ring_buffers;
                                539                 :                : 
 6915 tgl@sss.pgh.pa.us         540                 :         173206 :     return strategy;
                                541                 :                : }
                                542                 :                : 
                                543                 :                : /*
                                544                 :                :  * GetAccessStrategyBufferCount -- an accessor for the number of buffers in
                                545                 :                :  *      the ring
                                546                 :                :  *
                                547                 :                :  * Returns 0 on NULL input to match behavior of GetAccessStrategyWithSize()
                                548                 :                :  * returning NULL with 0 size.
                                549                 :                :  */
                                550                 :                : int
 1124 drowley@postgresql.o      551                 :             25 : GetAccessStrategyBufferCount(BufferAccessStrategy strategy)
                                552                 :                : {
                                553         [ -  + ]:             25 :     if (strategy == NULL)
 1124 drowley@postgresql.o      554                 :UBC           0 :         return 0;
                                555                 :                : 
 1124 drowley@postgresql.o      556                 :CBC          25 :     return strategy->nbuffers;
                                557                 :                : }
                                558                 :                : 
                                559                 :                : /*
                                560                 :                :  * GetAccessStrategyPinLimit -- get cap of number of buffers that should be pinned
                                561                 :                :  *
                                562                 :                :  * When pinning extra buffers to look ahead, users of a ring-based strategy are
                                563                 :                :  * in danger of pinning too much of the ring at once while performing look-ahead.
                                564                 :                :  * For some strategies, that means "escaping" from the ring, and in others it
                                565                 :                :  * means forcing dirty data to disk very frequently with associated WAL
                                566                 :                :  * flushing.  Since external code has no insight into any of that, allow
                                567                 :                :  * individual strategy types to expose a clamp that should be applied when
                                568                 :                :  * deciding on a maximum number of buffers to pin at once.
                                569                 :                :  *
                                570                 :                :  * Callers should combine this number with other relevant limits and take the
                                571                 :                :  * minimum.
                                572                 :                :  */
                                573                 :                : int
  759 tmunro@postgresql.or      574                 :         574103 : GetAccessStrategyPinLimit(BufferAccessStrategy strategy)
                                575                 :                : {
                                576         [ +  + ]:         574103 :     if (strategy == NULL)
                                577                 :         456774 :         return NBuffers;
                                578                 :                : 
                                579         [ +  + ]:         117329 :     switch (strategy->btype)
                                580                 :                :     {
                                581                 :          86843 :         case BAS_BULKREAD:
                                582                 :                : 
                                583                 :                :             /*
                                584                 :                :              * Since BAS_BULKREAD uses StrategyRejectBuffer(), dirty buffers
                                585                 :                :              * shouldn't be a problem and the caller is free to pin up to the
                                586                 :                :              * entire ring at once.
                                587                 :                :              */
                                588                 :          86843 :             return strategy->nbuffers;
                                589                 :                : 
                                590                 :          30486 :         default:
                                591                 :                : 
                                592                 :                :             /*
                                593                 :                :              * Tell caller not to pin more than half the buffers in the ring.
                                594                 :                :              * This is a trade-off between look ahead distance and deferring
                                595                 :                :              * writeback and associated WAL traffic.
                                596                 :                :              */
                                597                 :          30486 :             return strategy->nbuffers / 2;
                                598                 :                :     }
                                599                 :                : }
                                600                 :                : 
                                601                 :                : /*
                                602                 :                :  * FreeAccessStrategy -- release a BufferAccessStrategy object
                                603                 :                :  *
                                604                 :                :  * A simple pfree would do at the moment, but we would prefer that callers
                                605                 :                :  * don't assume that much about the representation of BufferAccessStrategy.
                                606                 :                :  */
                                607                 :                : void
 6915 tgl@sss.pgh.pa.us         608                 :         158648 : FreeAccessStrategy(BufferAccessStrategy strategy)
                                609                 :                : {
                                610                 :                :     /* don't crash if called on a "default" strategy */
                                611         [ +  - ]:         158648 :     if (strategy != NULL)
                                612                 :         158648 :         pfree(strategy);
                                613                 :         158648 : }
                                614                 :                : 
                                615                 :                : /*
                                616                 :                :  * GetBufferFromRing -- returns a buffer from the ring, or NULL if the
                                617                 :                :  *      ring is empty / not usable.
                                618                 :                :  *
                                619                 :                :  * The buffer is pinned and marked as owned, using TrackNewBufferPin(), before
                                620                 :                :  * returning.
                                621                 :                :  */
                                622                 :                : static BufferDesc *
  110 andres@anarazel.de        623                 :GNC     1004264 : GetBufferFromRing(BufferAccessStrategy strategy, uint64 *buf_state)
                                624                 :                : {
                                625                 :                :     BufferDesc *buf;
                                626                 :                :     Buffer      bufnum;
                                627                 :                :     uint64      old_buf_state;
                                628                 :                :     uint64      local_buf_state;    /* to avoid repeated (de-)referencing */
                                629                 :                : 
                                630                 :                : 
                                631                 :                :     /* Advance to next ring slot */
 1128 drowley@postgresql.o      632         [ +  + ]:CBC     1004264 :     if (++strategy->current >= strategy->nbuffers)
 6915 tgl@sss.pgh.pa.us         633                 :          31303 :         strategy->current = 0;
                                634                 :                : 
                                635                 :                :     /*
                                636                 :                :      * If the slot hasn't been filled yet, tell the caller to allocate a new
                                637                 :                :      * buffer with the normal allocation strategy.  He will then fill this
                                638                 :                :      * slot by calling AddBufferToRing with the new buffer.
                                639                 :                :      */
                                640                 :        1004264 :     bufnum = strategy->buffers[strategy->current];
                                641         [ +  + ]:        1004264 :     if (bufnum == InvalidBuffer)
                                642                 :         579301 :         return NULL;
                                643                 :                : 
  209 andres@anarazel.de        644                 :GNC      424963 :     buf = GetBufferDescriptor(bufnum - 1);
                                645                 :                : 
                                646                 :                :     /*
                                647                 :                :      * Check whether the buffer can be used and pin it if so. Do this using a
                                648                 :                :      * CAS loop, to avoid having to lock the buffer header.
                                649                 :                :      */
  110                           650                 :         424963 :     old_buf_state = pg_atomic_read_u64(&buf->state);
                                651                 :                :     for (;;)
                                652                 :                :     {
  209                           653                 :         424963 :         local_buf_state = old_buf_state;
                                654                 :                : 
                                655                 :                :         /*
                                656                 :                :          * If the buffer is pinned we cannot use it under any circumstances.
                                657                 :                :          *
                                658                 :                :          * If usage_count is 0 or 1 then the buffer is fair game (we expect 1,
                                659                 :                :          * since our own previous usage of the ring element would have left it
                                660                 :                :          * there, but it might've been decremented by clock-sweep since then).
                                661                 :                :          * A higher usage_count indicates someone else has touched the buffer,
                                662                 :                :          * so we shouldn't re-use it.
                                663                 :                :          */
                                664         [ +  + ]:         424963 :         if (BUF_STATE_GET_REFCOUNT(local_buf_state) != 0
                                665         [ +  + ]:         417715 :             || BUF_STATE_GET_USAGECOUNT(local_buf_state) > 1)
                                666                 :                :             break;
                                667                 :                : 
                                668                 :                :         /* See equivalent code in PinBuffer() */
                                669         [ -  + ]:         411529 :         if (unlikely(local_buf_state & BM_LOCKED))
                                670                 :                :         {
  209 andres@anarazel.de        671                 :UNC           0 :             old_buf_state = WaitBufHdrUnlocked(buf);
                                672                 :              0 :             continue;
                                673                 :                :         }
                                674                 :                : 
                                675                 :                :         /* pin the buffer if the CAS succeeds */
  209 andres@anarazel.de        676                 :GNC      411529 :         local_buf_state += BUF_REFCOUNT_ONE;
                                677                 :                : 
  110                           678         [ +  - ]:         411529 :         if (pg_atomic_compare_exchange_u64(&buf->state, &old_buf_state,
                                679                 :                :                                            local_buf_state))
                                680                 :                :         {
  209                           681                 :         411529 :             *buf_state = local_buf_state;
                                682                 :                : 
                                683                 :         411529 :             TrackNewBufferPin(BufferDescriptorGetBuffer(buf));
                                684                 :         411529 :             return buf;
                                685                 :                :         }
                                686                 :                :     }
                                687                 :                : 
                                688                 :                :     /*
                                689                 :                :      * Tell caller to allocate a new buffer with the normal allocation
                                690                 :                :      * strategy.  He'll then replace this ring element via AddBufferToRing.
                                691                 :                :      */
 6915 tgl@sss.pgh.pa.us         692                 :CBC       13434 :     return NULL;
                                693                 :                : }
                                694                 :                : 
                                695                 :                : /*
                                696                 :                :  * AddBufferToRing -- add a buffer to the buffer ring
                                697                 :                :  *
                                698                 :                :  * Caller must hold the buffer header spinlock on the buffer.  Since this
                                699                 :                :  * is called with the spinlock held, it had better be quite cheap.
                                700                 :                :  */
                                701                 :                : static void
 3823 rhaas@postgresql.org      702                 :         592735 : AddBufferToRing(BufferAccessStrategy strategy, BufferDesc *buf)
                                703                 :                : {
 6915 tgl@sss.pgh.pa.us         704                 :         592735 :     strategy->buffers[strategy->current] = BufferDescriptorGetBuffer(buf);
                                705                 :         592735 : }
                                706                 :                : 
                                707                 :                : /*
                                708                 :                :  * Utility function returning the IOContext of a given BufferAccessStrategy's
                                709                 :                :  * strategy ring.
                                710                 :                :  */
                                711                 :                : IOContext
 1181 andres@anarazel.de        712                 :       85731892 : IOContextForStrategy(BufferAccessStrategy strategy)
                                713                 :                : {
                                714         [ +  + ]:       85731892 :     if (!strategy)
                                715                 :       83134177 :         return IOCONTEXT_NORMAL;
                                716                 :                : 
                                717   [ -  +  +  +  :        2597715 :     switch (strategy->btype)
                                                 - ]
                                718                 :                :     {
 1181 andres@anarazel.de        719                 :UBC           0 :         case BAS_NORMAL:
                                720                 :                : 
                                721                 :                :             /*
                                722                 :                :              * Currently, GetAccessStrategy() returns NULL for
                                723                 :                :              * BufferAccessStrategyType BAS_NORMAL, so this case is
                                724                 :                :              * unreachable.
                                725                 :                :              */
                                726                 :              0 :             pg_unreachable();
                                727                 :                :             return IOCONTEXT_NORMAL;
 1181 andres@anarazel.de        728                 :CBC     1840887 :         case BAS_BULKREAD:
                                729                 :        1840887 :             return IOCONTEXT_BULKREAD;
                                730                 :         330827 :         case BAS_BULKWRITE:
                                731                 :         330827 :             return IOCONTEXT_BULKWRITE;
                                732                 :         426001 :         case BAS_VACUUM:
                                733                 :         426001 :             return IOCONTEXT_VACUUM;
                                734                 :                :     }
                                735                 :                : 
 1181 andres@anarazel.de        736         [ #  # ]:UBC           0 :     elog(ERROR, "unrecognized BufferAccessStrategyType: %d", strategy->btype);
                                737                 :                :     pg_unreachable();
                                738                 :                : }
                                739                 :                : 
                                740                 :                : /*
                                741                 :                :  * StrategyRejectBuffer -- consider rejecting a dirty buffer
                                742                 :                :  *
                                743                 :                :  * When a nondefault strategy is used, the buffer manager calls this function
                                744                 :                :  * when it turns out that the buffer selected by StrategyGetBuffer needs to
                                745                 :                :  * be written out and doing so would require flushing WAL too.  This gives us
                                746                 :                :  * a chance to choose a different victim.
                                747                 :                :  *
                                748                 :                :  * Returns true if buffer manager should ask for a new victim, and false
                                749                 :                :  * if this buffer should be written and re-used.
                                750                 :                :  */
                                751                 :                : bool
 1181 andres@anarazel.de        752                 :CBC       26827 : StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring)
                                753                 :                : {
                                754                 :                :     /* We only do this in bulkread mode */
 6915 tgl@sss.pgh.pa.us         755         [ +  + ]:          26827 :     if (strategy->btype != BAS_BULKREAD)
                                756                 :           4327 :         return false;
                                757                 :                : 
                                758                 :                :     /* Don't muck with behavior of normal buffer-replacement strategy */
 1181 andres@anarazel.de        759   [ +  -  -  + ]:          45000 :     if (!from_ring ||
 3240 tgl@sss.pgh.pa.us         760                 :          22500 :         strategy->buffers[strategy->current] != BufferDescriptorGetBuffer(buf))
 6915 tgl@sss.pgh.pa.us         761                 :LBC       (621) :         return false;
                                762                 :                : 
                                763                 :                :     /*
                                764                 :                :      * Remove the dirty buffer from the ring; necessary to prevent infinite
                                765                 :                :      * loop if all ring members are dirty.
                                766                 :                :      */
 6915 tgl@sss.pgh.pa.us         767                 :CBC       22500 :     strategy->buffers[strategy->current] = InvalidBuffer;
                                768                 :                : 
                                769                 :          22500 :     return true;
                                770                 :                : }
        

Generated by: LCOV version 2.5.0-beta