Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * freelist.c
4 : : * routines for managing the buffer pool's replacement strategy.
5 : : *
6 : : *
7 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
8 : : * Portions Copyright (c) 1994, Regents of the University of California
9 : : *
10 : : *
11 : : * IDENTIFICATION
12 : : * src/backend/storage/buffer/freelist.c
13 : : *
14 : : *-------------------------------------------------------------------------
15 : : */
16 : : #include "postgres.h"
17 : :
18 : : #include "pgstat.h"
19 : : #include "port/atomics.h"
20 : : #include "storage/buf_internals.h"
21 : : #include "storage/bufmgr.h"
22 : : #include "storage/proc.h"
23 : : #include "storage/shmem.h"
24 : : #include "storage/subsystems.h"
25 : :
26 : : #define INT_ACCESS_ONCE(var) ((int)(*((volatile int *)&(var))))
27 : :
28 : :
29 : : /*
30 : : * The shared freelist control information.
31 : : */
32 : : typedef struct
33 : : {
34 : : /* Spinlock: protects the values below */
35 : : slock_t buffer_strategy_lock;
36 : :
37 : : /*
38 : : * clock-sweep hand: index of next buffer to consider grabbing. Note that
39 : : * this isn't a concrete buffer - we only ever increase the value. So, to
40 : : * get an actual buffer, it needs to be used modulo NBuffers.
41 : : */
42 : : pg_atomic_uint32 nextVictimBuffer;
43 : :
44 : : /*
45 : : * Statistics. These counters should be wide enough that they can't
46 : : * overflow during a single bgwriter cycle.
47 : : */
48 : : uint32 completePasses; /* Complete cycles of the clock-sweep */
49 : : pg_atomic_uint32 numBufferAllocs; /* Buffers allocated since last reset */
50 : :
51 : : /*
52 : : * Bgworker process to be notified upon activity or -1 if none. See
53 : : * StrategyNotifyBgWriter.
54 : : */
55 : : int bgwprocno;
56 : : } BufferStrategyControl;
57 : :
58 : : /* Pointers to shared state */
59 : : static BufferStrategyControl *StrategyControl = NULL;
60 : :
61 : : static void StrategyCtlShmemRequest(void *arg);
62 : : static void StrategyCtlShmemInit(void *arg);
63 : :
64 : : const ShmemCallbacks StrategyCtlShmemCallbacks = {
65 : : .request_fn = StrategyCtlShmemRequest,
66 : : .init_fn = StrategyCtlShmemInit,
67 : : };
68 : :
69 : : /*
70 : : * Private (non-shared) state for managing a ring of shared buffers to re-use.
71 : : * This is currently the only kind of BufferAccessStrategy object, but someday
72 : : * we might have more kinds.
73 : : */
74 : : typedef struct BufferAccessStrategyData
75 : : {
76 : : /* Overall strategy type */
77 : : BufferAccessStrategyType btype;
78 : : /* Number of elements in buffers[] array */
79 : : int nbuffers;
80 : :
81 : : /*
82 : : * Index of the "current" slot in the ring, ie, the one most recently
83 : : * returned by GetBufferFromRing.
84 : : */
85 : : int current;
86 : :
87 : : /*
88 : : * Array of buffer numbers. InvalidBuffer (that is, zero) indicates we
89 : : * have not yet selected a buffer for this ring slot. For allocation
90 : : * simplicity this is palloc'd together with the fixed fields of the
91 : : * struct.
92 : : */
93 : : Buffer buffers[FLEXIBLE_ARRAY_MEMBER];
94 : : } BufferAccessStrategyData;
95 : :
96 : :
97 : : /* Prototypes for internal functions */
98 : : static BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy,
99 : : uint64 *buf_state);
100 : : static void AddBufferToRing(BufferAccessStrategy strategy,
101 : : BufferDesc *buf);
102 : :
103 : : /*
104 : : * ClockSweepTick - Helper routine for StrategyGetBuffer()
105 : : *
106 : : * Move the clock hand one buffer ahead of its current position and return the
107 : : * id of the buffer now under the hand.
108 : : */
109 : : static inline uint32
4149 andres@anarazel.de 110 :CBC 5445309 : ClockSweepTick(void)
111 : : {
112 : : uint32 victim;
113 : :
114 : : /*
115 : : * Atomically move hand ahead one buffer - if there's several processes
116 : : * doing this, this can lead to buffers being returned slightly out of
117 : : * apparent order.
118 : : */
119 : : victim =
120 : 5445309 : pg_atomic_fetch_add_u32(&StrategyControl->nextVictimBuffer, 1);
121 : :
122 [ + + ]: 5445309 : if (victim >= NBuffers)
123 : : {
4000 bruce@momjian.us 124 : 36827 : uint32 originalVictim = victim;
125 : :
126 : : /* always wrap what we look up in BufferDescriptors */
4149 andres@anarazel.de 127 : 36827 : victim = victim % NBuffers;
128 : :
129 : : /*
130 : : * If we're the one that just caused a wraparound, force
131 : : * completePasses to be incremented while holding the spinlock. We
132 : : * need the spinlock so StrategySyncStart() can return a consistent
133 : : * value consisting of nextVictimBuffer and completePasses.
134 : : */
135 [ + + ]: 36827 : if (victim == 0)
136 : : {
137 : : uint32 expected;
138 : : uint32 wrapped;
4000 bruce@momjian.us 139 : 36752 : bool success = false;
140 : :
4149 andres@anarazel.de 141 : 36752 : expected = originalVictim + 1;
142 : :
143 [ + + ]: 73569 : while (!success)
144 : : {
145 : : /*
146 : : * Acquire the spinlock while increasing completePasses. That
147 : : * allows other readers to read nextVictimBuffer and
148 : : * completePasses in a consistent manner which is required for
149 : : * StrategySyncStart(). In theory delaying the increment
150 : : * could lead to an overflow of nextVictimBuffers, but that's
151 : : * highly unlikely and wouldn't be particularly harmful.
152 : : */
153 [ - + ]: 36817 : SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
154 : :
155 : 36817 : wrapped = expected % NBuffers;
156 : :
157 : 36817 : success = pg_atomic_compare_exchange_u32(&StrategyControl->nextVictimBuffer,
158 : : &expected, wrapped);
159 [ + + ]: 36817 : if (success)
160 : 36752 : StrategyControl->completePasses++;
161 : 36817 : SpinLockRelease(&StrategyControl->buffer_strategy_lock);
162 : : }
163 : : }
164 : : }
165 : 5445309 : return victim;
166 : : }
167 : :
168 : : /*
169 : : * StrategyGetBuffer
170 : : *
171 : : * Called by the bufmgr to get the next candidate buffer to use in
172 : : * GetVictimBuffer(). The only hard requirement GetVictimBuffer() has is that
173 : : * the selected buffer must not currently be pinned by anyone.
174 : : *
175 : : * strategy is a BufferAccessStrategy object, or NULL for default strategy.
176 : : *
177 : : * It is the callers responsibility to ensure the buffer ownership can be
178 : : * tracked via TrackNewBufferPin().
179 : : *
180 : : * The buffer is pinned and marked as owned, using TrackNewBufferPin(),
181 : : * before returning.
182 : : */
183 : : BufferDesc *
110 andres@anarazel.de 184 :GNC 2264641 : StrategyGetBuffer(BufferAccessStrategy strategy, uint64 *buf_state, bool *from_ring)
185 : : {
186 : : BufferDesc *buf;
187 : : int bgwprocno;
188 : : int trycounter;
189 : :
1181 andres@anarazel.de 190 :CBC 2264641 : *from_ring = false;
191 : :
192 : : /*
193 : : * If given a strategy object, see whether it can select a buffer. We
194 : : * assume strategy objects don't need buffer_strategy_lock.
195 : : */
6915 tgl@sss.pgh.pa.us 196 [ + + ]: 2264641 : if (strategy != NULL)
197 : : {
3677 andres@anarazel.de 198 : 1004264 : buf = GetBufferFromRing(strategy, buf_state);
6915 tgl@sss.pgh.pa.us 199 [ + + ]: 1004264 : if (buf != NULL)
200 : : {
1181 andres@anarazel.de 201 : 411529 : *from_ring = true;
6915 tgl@sss.pgh.pa.us 202 : 411529 : return buf;
203 : : }
204 : : }
205 : :
206 : : /*
207 : : * If asked, we need to waken the bgwriter. Since we don't want to rely on
208 : : * a spinlock for this we force a read from shared memory once, and then
209 : : * set the latch based on that value. We need to go through that length
210 : : * because otherwise bgwprocno might be reset while/after we check because
211 : : * the compiler might just reread from memory.
212 : : *
213 : : * This can possibly set the latch of the wrong process if the bgwriter
214 : : * dies in the wrong moment. But since PGPROC->procLatch is never
215 : : * deallocated the worst consequence of that is that we set the latch of
216 : : * some arbitrary process.
217 : : */
4149 andres@anarazel.de 218 : 1853112 : bgwprocno = INT_ACCESS_ONCE(StrategyControl->bgwprocno);
219 [ + + ]: 1853112 : if (bgwprocno != -1)
220 : : {
221 : : /* reset bgwprocno first, before setting the latch */
222 : 452 : StrategyControl->bgwprocno = -1;
223 : :
224 : : /*
225 : : * Not acquiring ProcArrayLock here which is slightly icky. It's
226 : : * actually fine because procLatch isn't ever freed, so we just can
227 : : * potentially set the wrong process' (or no process') latch.
228 : : */
120 drowley@postgresql.o 229 :GNC 452 : SetLatch(&GetPGProcByNumber(bgwprocno)->procLatch);
230 : : }
231 : :
232 : : /*
233 : : * We count buffer allocation requests so that the bgwriter can estimate
234 : : * the rate of buffer consumption. Note that buffers recycled by a
235 : : * strategy object are intentionally not counted here.
236 : : */
4149 andres@anarazel.de 237 :CBC 1853112 : pg_atomic_fetch_add_u32(&StrategyControl->numBufferAllocs, 1);
238 : :
239 : : /* Use the "clock sweep" algorithm to find a free buffer */
7732 tgl@sss.pgh.pa.us 240 :GNC 1853112 : trycounter = NBuffers;
241 : : for (;;)
8209 JanWieck@Yahoo.com 242 :GIC 3592197 : {
243 : : uint64 old_buf_state;
244 : : uint64 local_buf_state;
245 : :
4114 andres@anarazel.de 246 :GNC 5445309 : buf = GetBufferDescriptor(ClockSweepTick());
247 : :
248 : : /*
249 : : * Check whether the buffer can be used and pin it if so. Do this
250 : : * using a CAS loop, to avoid having to lock the buffer header.
251 : : */
110 252 : 5445309 : old_buf_state = pg_atomic_read_u64(&buf->state);
253 : : for (;;)
254 : : {
209 255 : 5445334 : local_buf_state = old_buf_state;
256 : :
257 : : /*
258 : : * If the buffer is pinned or has a nonzero usage_count, we cannot
259 : : * use it; decrement the usage_count (unless pinned) and keep
260 : : * scanning.
261 : : */
262 : :
263 [ + + ]: 5445334 : if (BUF_STATE_GET_REFCOUNT(local_buf_state) != 0)
264 : : {
265 [ - + ]: 111173 : if (--trycounter == 0)
266 : : {
267 : : /*
268 : : * We've scanned all the buffers without making any state
269 : : * changes, so all the buffers are pinned (or were when we
270 : : * looked at them). We could hope that someone will free
271 : : * one eventually, but it's probably better to fail than
272 : : * to risk getting stuck in an infinite loop.
273 : : */
209 andres@anarazel.de 274 [ # # ]:UNC 0 : elog(ERROR, "no unpinned buffers available");
275 : : }
209 andres@anarazel.de 276 :GNC 111173 : break;
277 : : }
278 : :
279 : : /* See equivalent code in PinBuffer() */
280 [ + + ]: 5334161 : if (unlikely(local_buf_state & BM_LOCKED))
281 : : {
282 : 1 : old_buf_state = WaitBufHdrUnlocked(buf);
283 : 1 : continue;
284 : : }
285 : :
3677 andres@anarazel.de 286 [ + + ]:CBC 5334160 : if (BUF_STATE_GET_USAGECOUNT(local_buf_state) != 0)
287 : : {
288 : 3481046 : local_buf_state -= BUF_USAGECOUNT_ONE;
289 : :
110 andres@anarazel.de 290 [ + + ]:GNC 3481046 : if (pg_atomic_compare_exchange_u64(&buf->state, &old_buf_state,
291 : : local_buf_state))
292 : : {
209 293 : 3481024 : trycounter = NBuffers;
294 : 3481024 : break;
295 : : }
296 : : }
297 : : else
298 : : {
299 : : /* pin the buffer if the CAS succeeds */
300 : 1853114 : local_buf_state += BUF_REFCOUNT_ONE;
301 : :
110 302 [ + + ]: 1853114 : if (pg_atomic_compare_exchange_u64(&buf->state, &old_buf_state,
303 : : local_buf_state))
304 : : {
305 : : /* Found a usable buffer */
209 306 [ + + ]: 1853112 : if (strategy != NULL)
307 : 592735 : AddBufferToRing(strategy, buf);
308 : 1853112 : *buf_state = local_buf_state;
309 : :
310 : 1853112 : TrackNewBufferPin(BufferDescriptorGetBuffer(buf));
311 : :
312 : 1853112 : return buf;
313 : : }
314 : : }
315 : : }
316 : : }
10892 scrappy@hub.org 317 :ECB (108412) : }
318 : :
319 : : /*
320 : : * StrategySyncStart -- tell BgBufferSync where to start syncing
321 : : *
322 : : * The result is the buffer index of the best buffer to sync first.
323 : : * BgBufferSync() will proceed circularly around the buffer array from there.
324 : : *
325 : : * In addition, we return the completed-pass count (which is effectively
326 : : * the higher-order bits of nextVictimBuffer) and the count of recent buffer
327 : : * allocs if non-NULL pointers are passed. The alloc count is reset after
328 : : * being read.
329 : : */
330 : : int
6797 tgl@sss.pgh.pa.us 331 :CBC 13948 : StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
332 : : {
333 : : uint32 nextVictimBuffer;
334 : : int result;
335 : :
4240 rhaas@postgresql.org 336 [ + + ]: 13948 : SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
4149 andres@anarazel.de 337 : 13948 : nextVictimBuffer = pg_atomic_read_u32(&StrategyControl->nextVictimBuffer);
338 : 13948 : result = nextVictimBuffer % NBuffers;
339 : :
6797 tgl@sss.pgh.pa.us 340 [ + - ]: 13948 : if (complete_passes)
341 : : {
342 : 13948 : *complete_passes = StrategyControl->completePasses;
343 : :
344 : : /*
345 : : * Additionally add the number of wraparounds that happened before
346 : : * completePasses could be incremented. C.f. ClockSweepTick().
347 : : */
4149 andres@anarazel.de 348 : 13948 : *complete_passes += nextVictimBuffer / NBuffers;
349 : : }
350 : :
6797 tgl@sss.pgh.pa.us 351 [ + - ]: 13948 : if (num_buf_alloc)
352 : : {
4149 andres@anarazel.de 353 : 13948 : *num_buf_alloc = pg_atomic_exchange_u32(&StrategyControl->numBufferAllocs, 0);
354 : : }
4240 rhaas@postgresql.org 355 : 13948 : SpinLockRelease(&StrategyControl->buffer_strategy_lock);
7732 tgl@sss.pgh.pa.us 356 : 13948 : return result;
357 : : }
358 : :
359 : : /*
360 : : * StrategyNotifyBgWriter -- set or clear allocation notification latch
361 : : *
362 : : * If bgwprocno isn't -1, the next invocation of StrategyGetBuffer will
363 : : * set that latch. Pass -1 to clear the pending notification before it
364 : : * happens. This feature is used by the bgwriter process to wake itself up
365 : : * from hibernation, and is not meant for anybody else to use.
366 : : */
367 : : void
4149 andres@anarazel.de 368 : 724 : StrategyNotifyBgWriter(int bgwprocno)
369 : : {
370 : : /*
371 : : * We acquire buffer_strategy_lock just to ensure that the store appears
372 : : * atomic to StrategyGetBuffer. The bgwriter should call this rather
373 : : * infrequently, so there's no performance penalty from being safe.
374 : : */
4240 rhaas@postgresql.org 375 [ - + ]: 724 : SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
4149 andres@anarazel.de 376 : 724 : StrategyControl->bgwprocno = bgwprocno;
4240 rhaas@postgresql.org 377 : 724 : SpinLockRelease(&StrategyControl->buffer_strategy_lock);
5109 tgl@sss.pgh.pa.us 378 : 724 : }
379 : :
380 : :
381 : : /*
382 : : * StrategyCtlShmemRequest -- request shared memory for the buffer
383 : : * cache replacement strategy.
384 : : */
385 : : static void
29 heikki.linnakangas@i 386 :GNC 1244 : StrategyCtlShmemRequest(void *arg)
387 : : {
388 : 1244 : ShmemRequestStruct(.name = "Buffer Strategy Status",
389 : : .size = sizeof(BufferStrategyControl),
390 : : .ptr = (void **) &StrategyControl
391 : : );
7761 tgl@sss.pgh.pa.us 392 :GIC 1244 : }
393 : :
394 : : /*
395 : : * StrategyCtlShmemInit -- initialize the buffer cache replacement strategy.
396 : : */
397 : : static void
29 heikki.linnakangas@i 398 :GNC 1241 : StrategyCtlShmemInit(void *arg)
399 : : {
400 : 1241 : SpinLockInit(&StrategyControl->buffer_strategy_lock);
401 : :
402 : : /* Initialize the clock-sweep pointer */
403 : 1241 : pg_atomic_init_u32(&StrategyControl->nextVictimBuffer, 0);
404 : :
405 : : /* Clear statistics */
406 : 1241 : StrategyControl->completePasses = 0;
407 : 1241 : pg_atomic_init_u32(&StrategyControl->numBufferAllocs, 0);
408 : :
409 : : /* No pending notification */
410 : 1241 : StrategyControl->bgwprocno = -1;
8209 JanWieck@Yahoo.com 411 :CBC 1241 : }
412 : :
413 : :
414 : : /* ----------------------------------------------------------------
415 : : * Backend-private buffer ring management
416 : : * ----------------------------------------------------------------
417 : : */
418 : :
419 : :
420 : : /*
421 : : * GetAccessStrategy -- create a BufferAccessStrategy object
422 : : *
423 : : * The object is allocated in the current memory context.
424 : : */
425 : : BufferAccessStrategy
6915 tgl@sss.pgh.pa.us 426 : 164933 : GetAccessStrategy(BufferAccessStrategyType btype)
427 : : {
428 : : int ring_size_kb;
429 : :
430 : : /*
431 : : * Select ring size to use. See buffer/README for rationales.
432 : : *
433 : : * Note: if you change the ring size for BAS_BULKREAD, see also
434 : : * SYNC_SCAN_REPORT_INTERVAL in access/heap/syncscan.c.
435 : : */
436 [ - + + + : 164933 : switch (btype)
- ]
437 : : {
6915 tgl@sss.pgh.pa.us 438 :UBC 0 : case BAS_NORMAL:
439 : : /* if someone asks for NORMAL, just give 'em a "default" object */
440 : 0 : return NULL;
441 : :
6915 tgl@sss.pgh.pa.us 442 :CBC 91415 : case BAS_BULKREAD:
443 : : {
444 : : int ring_max_kb;
445 : :
446 : : /*
447 : : * The ring always needs to be large enough to allow some
448 : : * separation in time between providing a buffer to the user
449 : : * of the strategy and that buffer being reused. Otherwise the
450 : : * user's pin will prevent reuse of the buffer, even without
451 : : * concurrent activity.
452 : : *
453 : : * We also need to ensure the ring always is large enough for
454 : : * SYNC_SCAN_REPORT_INTERVAL, as noted above.
455 : : *
456 : : * Thus we start out a minimal size and increase the size
457 : : * further if appropriate.
458 : : */
392 andres@anarazel.de 459 : 91415 : ring_size_kb = 256;
460 : :
461 : : /*
462 : : * There's no point in a larger ring if we won't be allowed to
463 : : * pin sufficiently many buffers. But we never limit to less
464 : : * than the minimal size above.
465 : : */
466 : 91415 : ring_max_kb = GetPinLimit() * (BLCKSZ / 1024);
467 : 91415 : ring_max_kb = Max(ring_size_kb, ring_max_kb);
468 : :
469 : : /*
470 : : * We would like the ring to additionally have space for the
471 : : * configured degree of IO concurrency. While being read in,
472 : : * buffers can obviously not yet be reused.
473 : : *
474 : : * Each IO can be up to io_combine_limit blocks large, and we
475 : : * want to start up to effective_io_concurrency IOs.
476 : : *
477 : : * Note that effective_io_concurrency may be 0, which disables
478 : : * AIO.
479 : : */
480 : 91415 : ring_size_kb += (BLCKSZ / 1024) *
481 : 91415 : io_combine_limit * effective_io_concurrency;
482 : :
483 [ + - ]: 91415 : if (ring_size_kb > ring_max_kb)
484 : 91415 : ring_size_kb = ring_max_kb;
485 : 91415 : break;
486 : : }
6389 tgl@sss.pgh.pa.us 487 : 73498 : case BAS_BULKWRITE:
1124 drowley@postgresql.o 488 : 73498 : ring_size_kb = 16 * 1024;
6389 tgl@sss.pgh.pa.us 489 : 73498 : break;
6915 tgl@sss.pgh.pa.us 490 :GBC 20 : case BAS_VACUUM:
759 tmunro@postgresql.or 491 : 20 : ring_size_kb = 2048;
6915 tgl@sss.pgh.pa.us 492 : 20 : break;
493 : :
6915 tgl@sss.pgh.pa.us 494 :UBC 0 : default:
495 [ # # ]: 0 : elog(ERROR, "unrecognized buffer access strategy: %d",
496 : : (int) btype);
497 : : return NULL; /* keep compiler quiet */
498 : : }
499 : :
1124 drowley@postgresql.o 500 :CBC 164933 : return GetAccessStrategyWithSize(btype, ring_size_kb);
501 : : }
502 : :
503 : : /*
504 : : * GetAccessStrategyWithSize -- create a BufferAccessStrategy object with a
505 : : * number of buffers equivalent to the passed in size.
506 : : *
507 : : * If the given ring size is 0, no BufferAccessStrategy will be created and
508 : : * the function will return NULL. ring_size_kb must not be negative.
509 : : */
510 : : BufferAccessStrategy
511 : 173214 : GetAccessStrategyWithSize(BufferAccessStrategyType btype, int ring_size_kb)
512 : : {
513 : : int ring_buffers;
514 : : BufferAccessStrategy strategy;
515 : :
516 [ - + ]: 173214 : Assert(ring_size_kb >= 0);
517 : :
518 : : /* Figure out how many buffers ring_size_kb is */
519 : 173214 : ring_buffers = ring_size_kb / (BLCKSZ / 1024);
520 : :
521 : : /* 0 means unlimited, so no BufferAccessStrategy required */
522 [ + + ]: 173214 : if (ring_buffers == 0)
523 : 8 : return NULL;
524 : :
525 : : /* Cap to 1/8th of shared_buffers */
526 : 173206 : ring_buffers = Min(NBuffers / 8, ring_buffers);
527 : :
528 : : /* NBuffers should never be less than 16, so this shouldn't happen */
529 [ - + ]: 173206 : Assert(ring_buffers > 0);
530 : :
531 : : /* Allocate the object and initialize all elements to zeroes */
532 : : strategy = (BufferAccessStrategy)
6915 tgl@sss.pgh.pa.us 533 : 173206 : palloc0(offsetof(BufferAccessStrategyData, buffers) +
534 : : ring_buffers * sizeof(Buffer));
535 : :
536 : : /* Set fields that don't start out zero */
537 : 173206 : strategy->btype = btype;
1124 drowley@postgresql.o 538 : 173206 : strategy->nbuffers = ring_buffers;
539 : :
6915 tgl@sss.pgh.pa.us 540 : 173206 : return strategy;
541 : : }
542 : :
543 : : /*
544 : : * GetAccessStrategyBufferCount -- an accessor for the number of buffers in
545 : : * the ring
546 : : *
547 : : * Returns 0 on NULL input to match behavior of GetAccessStrategyWithSize()
548 : : * returning NULL with 0 size.
549 : : */
550 : : int
1124 drowley@postgresql.o 551 : 25 : GetAccessStrategyBufferCount(BufferAccessStrategy strategy)
552 : : {
553 [ - + ]: 25 : if (strategy == NULL)
1124 drowley@postgresql.o 554 :UBC 0 : return 0;
555 : :
1124 drowley@postgresql.o 556 :CBC 25 : return strategy->nbuffers;
557 : : }
558 : :
559 : : /*
560 : : * GetAccessStrategyPinLimit -- get cap of number of buffers that should be pinned
561 : : *
562 : : * When pinning extra buffers to look ahead, users of a ring-based strategy are
563 : : * in danger of pinning too much of the ring at once while performing look-ahead.
564 : : * For some strategies, that means "escaping" from the ring, and in others it
565 : : * means forcing dirty data to disk very frequently with associated WAL
566 : : * flushing. Since external code has no insight into any of that, allow
567 : : * individual strategy types to expose a clamp that should be applied when
568 : : * deciding on a maximum number of buffers to pin at once.
569 : : *
570 : : * Callers should combine this number with other relevant limits and take the
571 : : * minimum.
572 : : */
573 : : int
759 tmunro@postgresql.or 574 : 574103 : GetAccessStrategyPinLimit(BufferAccessStrategy strategy)
575 : : {
576 [ + + ]: 574103 : if (strategy == NULL)
577 : 456774 : return NBuffers;
578 : :
579 [ + + ]: 117329 : switch (strategy->btype)
580 : : {
581 : 86843 : case BAS_BULKREAD:
582 : :
583 : : /*
584 : : * Since BAS_BULKREAD uses StrategyRejectBuffer(), dirty buffers
585 : : * shouldn't be a problem and the caller is free to pin up to the
586 : : * entire ring at once.
587 : : */
588 : 86843 : return strategy->nbuffers;
589 : :
590 : 30486 : default:
591 : :
592 : : /*
593 : : * Tell caller not to pin more than half the buffers in the ring.
594 : : * This is a trade-off between look ahead distance and deferring
595 : : * writeback and associated WAL traffic.
596 : : */
597 : 30486 : return strategy->nbuffers / 2;
598 : : }
599 : : }
600 : :
601 : : /*
602 : : * FreeAccessStrategy -- release a BufferAccessStrategy object
603 : : *
604 : : * A simple pfree would do at the moment, but we would prefer that callers
605 : : * don't assume that much about the representation of BufferAccessStrategy.
606 : : */
607 : : void
6915 tgl@sss.pgh.pa.us 608 : 158648 : FreeAccessStrategy(BufferAccessStrategy strategy)
609 : : {
610 : : /* don't crash if called on a "default" strategy */
611 [ + - ]: 158648 : if (strategy != NULL)
612 : 158648 : pfree(strategy);
613 : 158648 : }
614 : :
615 : : /*
616 : : * GetBufferFromRing -- returns a buffer from the ring, or NULL if the
617 : : * ring is empty / not usable.
618 : : *
619 : : * The buffer is pinned and marked as owned, using TrackNewBufferPin(), before
620 : : * returning.
621 : : */
622 : : static BufferDesc *
110 andres@anarazel.de 623 :GNC 1004264 : GetBufferFromRing(BufferAccessStrategy strategy, uint64 *buf_state)
624 : : {
625 : : BufferDesc *buf;
626 : : Buffer bufnum;
627 : : uint64 old_buf_state;
628 : : uint64 local_buf_state; /* to avoid repeated (de-)referencing */
629 : :
630 : :
631 : : /* Advance to next ring slot */
1128 drowley@postgresql.o 632 [ + + ]:CBC 1004264 : if (++strategy->current >= strategy->nbuffers)
6915 tgl@sss.pgh.pa.us 633 : 31303 : strategy->current = 0;
634 : :
635 : : /*
636 : : * If the slot hasn't been filled yet, tell the caller to allocate a new
637 : : * buffer with the normal allocation strategy. He will then fill this
638 : : * slot by calling AddBufferToRing with the new buffer.
639 : : */
640 : 1004264 : bufnum = strategy->buffers[strategy->current];
641 [ + + ]: 1004264 : if (bufnum == InvalidBuffer)
642 : 579301 : return NULL;
643 : :
209 andres@anarazel.de 644 :GNC 424963 : buf = GetBufferDescriptor(bufnum - 1);
645 : :
646 : : /*
647 : : * Check whether the buffer can be used and pin it if so. Do this using a
648 : : * CAS loop, to avoid having to lock the buffer header.
649 : : */
110 650 : 424963 : old_buf_state = pg_atomic_read_u64(&buf->state);
651 : : for (;;)
652 : : {
209 653 : 424963 : local_buf_state = old_buf_state;
654 : :
655 : : /*
656 : : * If the buffer is pinned we cannot use it under any circumstances.
657 : : *
658 : : * If usage_count is 0 or 1 then the buffer is fair game (we expect 1,
659 : : * since our own previous usage of the ring element would have left it
660 : : * there, but it might've been decremented by clock-sweep since then).
661 : : * A higher usage_count indicates someone else has touched the buffer,
662 : : * so we shouldn't re-use it.
663 : : */
664 [ + + ]: 424963 : if (BUF_STATE_GET_REFCOUNT(local_buf_state) != 0
665 [ + + ]: 417715 : || BUF_STATE_GET_USAGECOUNT(local_buf_state) > 1)
666 : : break;
667 : :
668 : : /* See equivalent code in PinBuffer() */
669 [ - + ]: 411529 : if (unlikely(local_buf_state & BM_LOCKED))
670 : : {
209 andres@anarazel.de 671 :UNC 0 : old_buf_state = WaitBufHdrUnlocked(buf);
672 : 0 : continue;
673 : : }
674 : :
675 : : /* pin the buffer if the CAS succeeds */
209 andres@anarazel.de 676 :GNC 411529 : local_buf_state += BUF_REFCOUNT_ONE;
677 : :
110 678 [ + - ]: 411529 : if (pg_atomic_compare_exchange_u64(&buf->state, &old_buf_state,
679 : : local_buf_state))
680 : : {
209 681 : 411529 : *buf_state = local_buf_state;
682 : :
683 : 411529 : TrackNewBufferPin(BufferDescriptorGetBuffer(buf));
684 : 411529 : return buf;
685 : : }
686 : : }
687 : :
688 : : /*
689 : : * Tell caller to allocate a new buffer with the normal allocation
690 : : * strategy. He'll then replace this ring element via AddBufferToRing.
691 : : */
6915 tgl@sss.pgh.pa.us 692 :CBC 13434 : return NULL;
693 : : }
694 : :
695 : : /*
696 : : * AddBufferToRing -- add a buffer to the buffer ring
697 : : *
698 : : * Caller must hold the buffer header spinlock on the buffer. Since this
699 : : * is called with the spinlock held, it had better be quite cheap.
700 : : */
701 : : static void
3823 rhaas@postgresql.org 702 : 592735 : AddBufferToRing(BufferAccessStrategy strategy, BufferDesc *buf)
703 : : {
6915 tgl@sss.pgh.pa.us 704 : 592735 : strategy->buffers[strategy->current] = BufferDescriptorGetBuffer(buf);
705 : 592735 : }
706 : :
707 : : /*
708 : : * Utility function returning the IOContext of a given BufferAccessStrategy's
709 : : * strategy ring.
710 : : */
711 : : IOContext
1181 andres@anarazel.de 712 : 85731892 : IOContextForStrategy(BufferAccessStrategy strategy)
713 : : {
714 [ + + ]: 85731892 : if (!strategy)
715 : 83134177 : return IOCONTEXT_NORMAL;
716 : :
717 [ - + + + : 2597715 : switch (strategy->btype)
- ]
718 : : {
1181 andres@anarazel.de 719 :UBC 0 : case BAS_NORMAL:
720 : :
721 : : /*
722 : : * Currently, GetAccessStrategy() returns NULL for
723 : : * BufferAccessStrategyType BAS_NORMAL, so this case is
724 : : * unreachable.
725 : : */
726 : 0 : pg_unreachable();
727 : : return IOCONTEXT_NORMAL;
1181 andres@anarazel.de 728 :CBC 1840887 : case BAS_BULKREAD:
729 : 1840887 : return IOCONTEXT_BULKREAD;
730 : 330827 : case BAS_BULKWRITE:
731 : 330827 : return IOCONTEXT_BULKWRITE;
732 : 426001 : case BAS_VACUUM:
733 : 426001 : return IOCONTEXT_VACUUM;
734 : : }
735 : :
1181 andres@anarazel.de 736 [ # # ]:UBC 0 : elog(ERROR, "unrecognized BufferAccessStrategyType: %d", strategy->btype);
737 : : pg_unreachable();
738 : : }
739 : :
740 : : /*
741 : : * StrategyRejectBuffer -- consider rejecting a dirty buffer
742 : : *
743 : : * When a nondefault strategy is used, the buffer manager calls this function
744 : : * when it turns out that the buffer selected by StrategyGetBuffer needs to
745 : : * be written out and doing so would require flushing WAL too. This gives us
746 : : * a chance to choose a different victim.
747 : : *
748 : : * Returns true if buffer manager should ask for a new victim, and false
749 : : * if this buffer should be written and re-used.
750 : : */
751 : : bool
1181 andres@anarazel.de 752 :CBC 26827 : StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring)
753 : : {
754 : : /* We only do this in bulkread mode */
6915 tgl@sss.pgh.pa.us 755 [ + + ]: 26827 : if (strategy->btype != BAS_BULKREAD)
756 : 4327 : return false;
757 : :
758 : : /* Don't muck with behavior of normal buffer-replacement strategy */
1181 andres@anarazel.de 759 [ + - - + ]: 45000 : if (!from_ring ||
3240 tgl@sss.pgh.pa.us 760 : 22500 : strategy->buffers[strategy->current] != BufferDescriptorGetBuffer(buf))
6915 tgl@sss.pgh.pa.us 761 :LBC (621) : return false;
762 : :
763 : : /*
764 : : * Remove the dirty buffer from the ring; necessary to prevent infinite
765 : : * loop if all ring members are dirty.
766 : : */
6915 tgl@sss.pgh.pa.us 767 :CBC 22500 : strategy->buffers[strategy->current] = InvalidBuffer;
768 : :
769 : 22500 : return true;
770 : : }
|