Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * freelist.c
4 : : * routines for managing the buffer pool's replacement strategy.
5 : : *
6 : : *
7 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : : * Portions Copyright (c) 1994, Regents of the University of California
9 : : *
10 : : *
11 : : * IDENTIFICATION
12 : : * src/backend/storage/buffer/freelist.c
13 : : *
14 : : *-------------------------------------------------------------------------
15 : : */
16 : : #include "postgres.h"
17 : :
18 : : #include "pgstat.h"
19 : : #include "port/atomics.h"
20 : : #include "storage/buf_internals.h"
21 : : #include "storage/bufmgr.h"
22 : : #include "storage/proc.h"
23 : :
24 : : #define INT_ACCESS_ONCE(var) ((int)(*((volatile int *)&(var))))
25 : :
26 : :
27 : : /*
28 : : * The shared freelist control information.
29 : : */
30 : : typedef struct
31 : : {
32 : : /* Spinlock: protects the values below */
33 : : slock_t buffer_strategy_lock;
34 : :
35 : : /*
36 : : * clock-sweep hand: index of next buffer to consider grabbing. Note that
37 : : * this isn't a concrete buffer - we only ever increase the value. So, to
38 : : * get an actual buffer, it needs to be used modulo NBuffers.
39 : : */
40 : : pg_atomic_uint32 nextVictimBuffer;
41 : :
42 : : /*
43 : : * Statistics. These counters should be wide enough that they can't
44 : : * overflow during a single bgwriter cycle.
45 : : */
46 : : uint32 completePasses; /* Complete cycles of the clock-sweep */
47 : : pg_atomic_uint32 numBufferAllocs; /* Buffers allocated since last reset */
48 : :
49 : : /*
50 : : * Bgworker process to be notified upon activity or -1 if none. See
51 : : * StrategyNotifyBgWriter.
52 : : */
53 : : int bgwprocno;
54 : : } BufferStrategyControl;
55 : :
56 : : /* Pointers to shared state */
57 : : static BufferStrategyControl *StrategyControl = NULL;
58 : :
59 : : /*
60 : : * Private (non-shared) state for managing a ring of shared buffers to re-use.
61 : : * This is currently the only kind of BufferAccessStrategy object, but someday
62 : : * we might have more kinds.
63 : : */
64 : : typedef struct BufferAccessStrategyData
65 : : {
66 : : /* Overall strategy type */
67 : : BufferAccessStrategyType btype;
68 : : /* Number of elements in buffers[] array */
69 : : int nbuffers;
70 : :
71 : : /*
72 : : * Index of the "current" slot in the ring, ie, the one most recently
73 : : * returned by GetBufferFromRing.
74 : : */
75 : : int current;
76 : :
77 : : /*
78 : : * Array of buffer numbers. InvalidBuffer (that is, zero) indicates we
79 : : * have not yet selected a buffer for this ring slot. For allocation
80 : : * simplicity this is palloc'd together with the fixed fields of the
81 : : * struct.
82 : : */
83 : : Buffer buffers[FLEXIBLE_ARRAY_MEMBER];
84 : : } BufferAccessStrategyData;
85 : :
86 : :
87 : : /* Prototypes for internal functions */
88 : : static BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy,
89 : : uint32 *buf_state);
90 : : static void AddBufferToRing(BufferAccessStrategy strategy,
91 : : BufferDesc *buf);
92 : :
93 : : /*
94 : : * ClockSweepTick - Helper routine for StrategyGetBuffer()
95 : : *
96 : : * Move the clock hand one buffer ahead of its current position and return the
97 : : * id of the buffer now under the hand.
98 : : */
99 : : static inline uint32
3908 andres@anarazel.de 100 :CBC 4705863 : ClockSweepTick(void)
101 : : {
102 : : uint32 victim;
103 : :
104 : : /*
105 : : * Atomically move hand ahead one buffer - if there's several processes
106 : : * doing this, this can lead to buffers being returned slightly out of
107 : : * apparent order.
108 : : */
109 : : victim =
110 : 4705863 : pg_atomic_fetch_add_u32(&StrategyControl->nextVictimBuffer, 1);
111 : :
112 [ + + ]: 4705863 : if (victim >= NBuffers)
113 : : {
3759 bruce@momjian.us 114 : 32238 : uint32 originalVictim = victim;
115 : :
116 : : /* always wrap what we look up in BufferDescriptors */
3908 andres@anarazel.de 117 : 32238 : victim = victim % NBuffers;
118 : :
119 : : /*
120 : : * If we're the one that just caused a wraparound, force
121 : : * completePasses to be incremented while holding the spinlock. We
122 : : * need the spinlock so StrategySyncStart() can return a consistent
123 : : * value consisting of nextVictimBuffer and completePasses.
124 : : */
125 [ + + ]: 32238 : if (victim == 0)
126 : : {
127 : : uint32 expected;
128 : : uint32 wrapped;
3759 bruce@momjian.us 129 : 32193 : bool success = false;
130 : :
3908 andres@anarazel.de 131 : 32193 : expected = originalVictim + 1;
132 : :
133 [ + + ]: 64431 : while (!success)
134 : : {
135 : : /*
136 : : * Acquire the spinlock while increasing completePasses. That
137 : : * allows other readers to read nextVictimBuffer and
138 : : * completePasses in a consistent manner which is required for
139 : : * StrategySyncStart(). In theory delaying the increment
140 : : * could lead to an overflow of nextVictimBuffers, but that's
141 : : * highly unlikely and wouldn't be particularly harmful.
142 : : */
143 [ - + ]: 32238 : SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
144 : :
145 : 32238 : wrapped = expected % NBuffers;
146 : :
147 : 32238 : success = pg_atomic_compare_exchange_u32(&StrategyControl->nextVictimBuffer,
148 : : &expected, wrapped);
149 [ + + ]: 32238 : if (success)
150 : 32193 : StrategyControl->completePasses++;
151 : 32238 : SpinLockRelease(&StrategyControl->buffer_strategy_lock);
152 : : }
153 : : }
154 : : }
155 : 4705863 : return victim;
156 : : }
157 : :
158 : : /*
159 : : * StrategyGetBuffer
160 : : *
161 : : * Called by the bufmgr to get the next candidate buffer to use in
162 : : * BufferAlloc(). The only hard requirement BufferAlloc() has is that
163 : : * the selected buffer must not currently be pinned by anyone.
164 : : *
165 : : * strategy is a BufferAccessStrategy object, or NULL for default strategy.
166 : : *
167 : : * To ensure that no one else can pin the buffer before we do, we must
168 : : * return the buffer with the buffer header spinlock still held.
169 : : */
170 : : BufferDesc *
940 171 : 1895448 : StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_ring)
172 : : {
173 : : BufferDesc *buf;
174 : : int bgwprocno;
175 : : int trycounter;
176 : : uint32 local_buf_state; /* to avoid repeated (de-)referencing */
177 : :
178 : 1895448 : *from_ring = false;
179 : :
180 : : /*
181 : : * If given a strategy object, see whether it can select a buffer. We
182 : : * assume strategy objects don't need buffer_strategy_lock.
183 : : */
6674 tgl@sss.pgh.pa.us 184 [ + + ]: 1895448 : if (strategy != NULL)
185 : : {
3436 andres@anarazel.de 186 : 798659 : buf = GetBufferFromRing(strategy, buf_state);
6674 tgl@sss.pgh.pa.us 187 [ + + ]: 798659 : if (buf != NULL)
188 : : {
940 andres@anarazel.de 189 : 310328 : *from_ring = true;
6674 tgl@sss.pgh.pa.us 190 : 310328 : return buf;
191 : : }
192 : : }
193 : :
194 : : /*
195 : : * If asked, we need to waken the bgwriter. Since we don't want to rely on
196 : : * a spinlock for this we force a read from shared memory once, and then
197 : : * set the latch based on that value. We need to go through that length
198 : : * because otherwise bgwprocno might be reset while/after we check because
199 : : * the compiler might just reread from memory.
200 : : *
201 : : * This can possibly set the latch of the wrong process if the bgwriter
202 : : * dies in the wrong moment. But since PGPROC->procLatch is never
203 : : * deallocated the worst consequence of that is that we set the latch of
204 : : * some arbitrary process.
205 : : */
3908 andres@anarazel.de 206 : 1585120 : bgwprocno = INT_ACCESS_ONCE(StrategyControl->bgwprocno);
207 [ + + ]: 1585120 : if (bgwprocno != -1)
208 : : {
209 : : /* reset bgwprocno first, before setting the latch */
210 : 254 : StrategyControl->bgwprocno = -1;
211 : :
212 : : /*
213 : : * Not acquiring ProcArrayLock here which is slightly icky. It's
214 : : * actually fine because procLatch isn't ever freed, so we just can
215 : : * potentially set the wrong process' (or no process') latch.
216 : : */
217 : 254 : SetLatch(&ProcGlobal->allProcs[bgwprocno].procLatch);
218 : : }
219 : :
220 : : /*
221 : : * We count buffer allocation requests so that the bgwriter can estimate
222 : : * the rate of buffer consumption. Note that buffers recycled by a
223 : : * strategy object are intentionally not counted here.
224 : : */
225 : 1585120 : pg_atomic_fetch_add_u32(&StrategyControl->numBufferAllocs, 1);
226 : :
227 : : /* Use the "clock sweep" algorithm to find a free buffer */
7491 tgl@sss.pgh.pa.us 228 : 1585120 : trycounter = NBuffers;
229 : : for (;;)
230 : : {
3873 andres@anarazel.de 231 : 4705863 : buf = GetBufferDescriptor(ClockSweepTick());
232 : :
233 : : /*
234 : : * If the buffer is pinned or has a nonzero usage_count, we cannot use
235 : : * it; decrement the usage_count (unless pinned) and keep scanning.
236 : : */
3436 237 : 4705863 : local_buf_state = LockBufHdr(buf);
238 : :
239 [ + + ]: 4705863 : if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0)
240 : : {
241 [ + + ]: 4612909 : if (BUF_STATE_GET_USAGECOUNT(local_buf_state) != 0)
242 : : {
243 : 3027789 : local_buf_state -= BUF_USAGECOUNT_ONE;
244 : :
6674 tgl@sss.pgh.pa.us 245 : 3027789 : trycounter = NBuffers;
246 : : }
247 : : else
248 : : {
249 : : /* Found a usable buffer */
250 [ + + ]: 1585120 : if (strategy != NULL)
251 : 488331 : AddBufferToRing(strategy, buf);
3436 andres@anarazel.de 252 : 1585120 : *buf_state = local_buf_state;
6674 tgl@sss.pgh.pa.us 253 : 1585120 : return buf;
254 : : }
255 : : }
7491 256 [ - + ]: 92954 : else if (--trycounter == 0)
257 : : {
258 : : /*
259 : : * We've scanned all the buffers without making any state changes,
260 : : * so all the buffers are pinned (or were when we looked at them).
261 : : * We could hope that someone will free one eventually, but it's
262 : : * probably better to fail than to risk getting stuck in an
263 : : * infinite loop.
264 : : */
3436 andres@anarazel.de 265 :UBC 0 : UnlockBufHdr(buf, local_buf_state);
7810 tgl@sss.pgh.pa.us 266 [ # # ]: 0 : elog(ERROR, "no unpinned buffers available");
267 : : }
3436 andres@anarazel.de 268 :CBC 3120743 : UnlockBufHdr(buf, local_buf_state);
269 : : }
270 : : }
271 : :
272 : : /*
273 : : * StrategySyncStart -- tell BgBufferSync where to start syncing
274 : : *
275 : : * The result is the buffer index of the best buffer to sync first.
276 : : * BgBufferSync() will proceed circularly around the buffer array from there.
277 : : *
278 : : * In addition, we return the completed-pass count (which is effectively
279 : : * the higher-order bits of nextVictimBuffer) and the count of recent buffer
280 : : * allocs if non-NULL pointers are passed. The alloc count is reset after
281 : : * being read.
282 : : */
283 : : int
6556 tgl@sss.pgh.pa.us 284 : 11160 : StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
285 : : {
286 : : uint32 nextVictimBuffer;
287 : : int result;
288 : :
3999 rhaas@postgresql.org 289 [ - + ]: 11160 : SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
3908 andres@anarazel.de 290 : 11160 : nextVictimBuffer = pg_atomic_read_u32(&StrategyControl->nextVictimBuffer);
291 : 11160 : result = nextVictimBuffer % NBuffers;
292 : :
6556 tgl@sss.pgh.pa.us 293 [ + - ]: 11160 : if (complete_passes)
294 : : {
295 : 11160 : *complete_passes = StrategyControl->completePasses;
296 : :
297 : : /*
298 : : * Additionally add the number of wraparounds that happened before
299 : : * completePasses could be incremented. C.f. ClockSweepTick().
300 : : */
3908 andres@anarazel.de 301 : 11160 : *complete_passes += nextVictimBuffer / NBuffers;
302 : : }
303 : :
6556 tgl@sss.pgh.pa.us 304 [ + - ]: 11160 : if (num_buf_alloc)
305 : : {
3908 andres@anarazel.de 306 : 11160 : *num_buf_alloc = pg_atomic_exchange_u32(&StrategyControl->numBufferAllocs, 0);
307 : : }
3999 rhaas@postgresql.org 308 : 11160 : SpinLockRelease(&StrategyControl->buffer_strategy_lock);
7491 tgl@sss.pgh.pa.us 309 : 11160 : return result;
310 : : }
311 : :
312 : : /*
313 : : * StrategyNotifyBgWriter -- set or clear allocation notification latch
314 : : *
315 : : * If bgwprocno isn't -1, the next invocation of StrategyGetBuffer will
316 : : * set that latch. Pass -1 to clear the pending notification before it
317 : : * happens. This feature is used by the bgwriter process to wake itself up
318 : : * from hibernation, and is not meant for anybody else to use.
319 : : */
320 : : void
3908 andres@anarazel.de 321 : 426 : StrategyNotifyBgWriter(int bgwprocno)
322 : : {
323 : : /*
324 : : * We acquire buffer_strategy_lock just to ensure that the store appears
325 : : * atomic to StrategyGetBuffer. The bgwriter should call this rather
326 : : * infrequently, so there's no performance penalty from being safe.
327 : : */
3999 rhaas@postgresql.org 328 [ - + ]: 426 : SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
3908 andres@anarazel.de 329 : 426 : StrategyControl->bgwprocno = bgwprocno;
3999 rhaas@postgresql.org 330 : 426 : SpinLockRelease(&StrategyControl->buffer_strategy_lock);
4868 tgl@sss.pgh.pa.us 331 : 426 : }
332 : :
333 : :
334 : : /*
335 : : * StrategyShmemSize
336 : : *
337 : : * estimate the size of shared memory used by the freelist-related structures.
338 : : *
339 : : * Note: for somewhat historical reasons, the buffer lookup hashtable size
340 : : * is also determined here.
341 : : */
342 : : Size
7520 343 : 1909 : StrategyShmemSize(void)
344 : : {
7322 345 : 1909 : Size size = 0;
346 : :
347 : : /* size of lookup hash table ... see comment in StrategyInitialize */
6985 348 : 1909 : size = add_size(size, BufTableShmemSize(NBuffers + NUM_BUFFER_PARTITIONS));
349 : :
350 : : /* size of the shared replacement strategy control block */
7322 351 : 1909 : size = add_size(size, MAXALIGN(sizeof(BufferStrategyControl)));
352 : :
7520 353 : 1909 : return size;
354 : : }
355 : :
356 : : /*
357 : : * StrategyInitialize -- initialize the buffer cache replacement
358 : : * strategy.
359 : : *
360 : : * Assumes: All of the buffers are already built into a linked list.
361 : : * Only called by postmaster and only during initialization.
362 : : */
363 : : void
7968 JanWieck@Yahoo.com 364 : 1029 : StrategyInitialize(bool init)
365 : : {
366 : : bool found;
367 : :
368 : : /*
369 : : * Initialize the shared buffer lookup hashtable.
370 : : *
371 : : * Since we can't tolerate running out of lookup table entries, we must be
372 : : * sure to specify an adequate table size here. The maximum steady-state
373 : : * usage is of course NBuffers entries, but BufferAlloc() tries to insert
374 : : * a new entry before deleting the old. In principle this could be
375 : : * happening in each partition concurrently, so we could need as many as
376 : : * NBuffers + NUM_BUFFER_PARTITIONS entries.
377 : : */
6985 tgl@sss.pgh.pa.us 378 : 1029 : InitBufTable(NBuffers + NUM_BUFFER_PARTITIONS);
379 : :
380 : : /*
381 : : * Get or create the shared strategy control block
382 : : */
7968 JanWieck@Yahoo.com 383 : 1029 : StrategyControl = (BufferStrategyControl *)
7810 tgl@sss.pgh.pa.us 384 : 1029 : ShmemInitStruct("Buffer Strategy Status",
385 : : sizeof(BufferStrategyControl),
386 : : &found);
387 : :
7968 JanWieck@Yahoo.com 388 [ + - ]: 1029 : if (!found)
389 : : {
390 : : /*
391 : : * Only done once, usually in postmaster
392 : : */
393 [ - + ]: 1029 : Assert(init);
394 : :
3999 rhaas@postgresql.org 395 : 1029 : SpinLockInit(&StrategyControl->buffer_strategy_lock);
396 : :
397 : : /* Initialize the clock-sweep pointer */
3908 andres@anarazel.de 398 : 1029 : pg_atomic_init_u32(&StrategyControl->nextVictimBuffer, 0);
399 : :
400 : : /* Clear statistics */
6556 tgl@sss.pgh.pa.us 401 : 1029 : StrategyControl->completePasses = 0;
3908 andres@anarazel.de 402 : 1029 : pg_atomic_init_u32(&StrategyControl->numBufferAllocs, 0);
403 : :
404 : : /* No pending notification */
405 : 1029 : StrategyControl->bgwprocno = -1;
406 : : }
407 : : else
7968 JanWieck@Yahoo.com 408 [ # # ]:UBC 0 : Assert(!init);
7968 JanWieck@Yahoo.com 409 :CBC 1029 : }
410 : :
411 : :
412 : : /* ----------------------------------------------------------------
413 : : * Backend-private buffer ring management
414 : : * ----------------------------------------------------------------
415 : : */
416 : :
417 : :
418 : : /*
419 : : * GetAccessStrategy -- create a BufferAccessStrategy object
420 : : *
421 : : * The object is allocated in the current memory context.
422 : : */
423 : : BufferAccessStrategy
6674 tgl@sss.pgh.pa.us 424 : 138470 : GetAccessStrategy(BufferAccessStrategyType btype)
425 : : {
426 : : int ring_size_kb;
427 : :
428 : : /*
429 : : * Select ring size to use. See buffer/README for rationales.
430 : : *
431 : : * Note: if you change the ring size for BAS_BULKREAD, see also
432 : : * SYNC_SCAN_REPORT_INTERVAL in access/heap/syncscan.c.
433 : : */
434 [ - + + - : 138470 : switch (btype)
- ]
435 : : {
6674 tgl@sss.pgh.pa.us 436 :UBC 0 : case BAS_NORMAL:
437 : : /* if someone asks for NORMAL, just give 'em a "default" object */
438 : 0 : return NULL;
439 : :
6674 tgl@sss.pgh.pa.us 440 :CBC 77323 : case BAS_BULKREAD:
441 : : {
442 : : int ring_max_kb;
443 : :
444 : : /*
445 : : * The ring always needs to be large enough to allow some
446 : : * separation in time between providing a buffer to the user
447 : : * of the strategy and that buffer being reused. Otherwise the
448 : : * user's pin will prevent reuse of the buffer, even without
449 : : * concurrent activity.
450 : : *
451 : : * We also need to ensure the ring always is large enough for
452 : : * SYNC_SCAN_REPORT_INTERVAL, as noted above.
453 : : *
454 : : * Thus we start out a minimal size and increase the size
455 : : * further if appropriate.
456 : : */
151 andres@anarazel.de 457 : 77323 : ring_size_kb = 256;
458 : :
459 : : /*
460 : : * There's no point in a larger ring if we won't be allowed to
461 : : * pin sufficiently many buffers. But we never limit to less
462 : : * than the minimal size above.
463 : : */
464 : 77323 : ring_max_kb = GetPinLimit() * (BLCKSZ / 1024);
465 : 77323 : ring_max_kb = Max(ring_size_kb, ring_max_kb);
466 : :
467 : : /*
468 : : * We would like the ring to additionally have space for the
469 : : * configured degree of IO concurrency. While being read in,
470 : : * buffers can obviously not yet be reused.
471 : : *
472 : : * Each IO can be up to io_combine_limit blocks large, and we
473 : : * want to start up to effective_io_concurrency IOs.
474 : : *
475 : : * Note that effective_io_concurrency may be 0, which disables
476 : : * AIO.
477 : : */
478 : 77323 : ring_size_kb += (BLCKSZ / 1024) *
479 : 77323 : io_combine_limit * effective_io_concurrency;
480 : :
481 [ + - ]: 77323 : if (ring_size_kb > ring_max_kb)
482 : 77323 : ring_size_kb = ring_max_kb;
483 : 77323 : break;
484 : : }
6148 tgl@sss.pgh.pa.us 485 : 61147 : case BAS_BULKWRITE:
883 drowley@postgresql.o 486 : 61147 : ring_size_kb = 16 * 1024;
6148 tgl@sss.pgh.pa.us 487 : 61147 : break;
6674 tgl@sss.pgh.pa.us 488 :UBC 0 : case BAS_VACUUM:
518 tmunro@postgresql.or 489 : 0 : ring_size_kb = 2048;
6674 tgl@sss.pgh.pa.us 490 : 0 : break;
491 : :
492 : 0 : default:
493 [ # # ]: 0 : elog(ERROR, "unrecognized buffer access strategy: %d",
494 : : (int) btype);
495 : : return NULL; /* keep compiler quiet */
496 : : }
497 : :
883 drowley@postgresql.o 498 :CBC 138470 : return GetAccessStrategyWithSize(btype, ring_size_kb);
499 : : }
500 : :
501 : : /*
502 : : * GetAccessStrategyWithSize -- create a BufferAccessStrategy object with a
503 : : * number of buffers equivalent to the passed in size.
504 : : *
505 : : * If the given ring size is 0, no BufferAccessStrategy will be created and
506 : : * the function will return NULL. ring_size_kb must not be negative.
507 : : */
508 : : BufferAccessStrategy
509 : 145095 : GetAccessStrategyWithSize(BufferAccessStrategyType btype, int ring_size_kb)
510 : : {
511 : : int ring_buffers;
512 : : BufferAccessStrategy strategy;
513 : :
514 [ - + ]: 145095 : Assert(ring_size_kb >= 0);
515 : :
516 : : /* Figure out how many buffers ring_size_kb is */
517 : 145095 : ring_buffers = ring_size_kb / (BLCKSZ / 1024);
518 : :
519 : : /* 0 means unlimited, so no BufferAccessStrategy required */
520 [ + + ]: 145095 : if (ring_buffers == 0)
521 : 6 : return NULL;
522 : :
523 : : /* Cap to 1/8th of shared_buffers */
524 : 145089 : ring_buffers = Min(NBuffers / 8, ring_buffers);
525 : :
526 : : /* NBuffers should never be less than 16, so this shouldn't happen */
527 [ - + ]: 145089 : Assert(ring_buffers > 0);
528 : :
529 : : /* Allocate the object and initialize all elements to zeroes */
530 : : strategy = (BufferAccessStrategy)
6674 tgl@sss.pgh.pa.us 531 : 145089 : palloc0(offsetof(BufferAccessStrategyData, buffers) +
532 : : ring_buffers * sizeof(Buffer));
533 : :
534 : : /* Set fields that don't start out zero */
535 : 145089 : strategy->btype = btype;
883 drowley@postgresql.o 536 : 145089 : strategy->nbuffers = ring_buffers;
537 : :
6674 tgl@sss.pgh.pa.us 538 : 145089 : return strategy;
539 : : }
540 : :
541 : : /*
542 : : * GetAccessStrategyBufferCount -- an accessor for the number of buffers in
543 : : * the ring
544 : : *
545 : : * Returns 0 on NULL input to match behavior of GetAccessStrategyWithSize()
546 : : * returning NULL with 0 size.
547 : : */
548 : : int
883 drowley@postgresql.o 549 : 17 : GetAccessStrategyBufferCount(BufferAccessStrategy strategy)
550 : : {
551 [ - + ]: 17 : if (strategy == NULL)
883 drowley@postgresql.o 552 :UBC 0 : return 0;
553 : :
883 drowley@postgresql.o 554 :CBC 17 : return strategy->nbuffers;
555 : : }
556 : :
557 : : /*
558 : : * GetAccessStrategyPinLimit -- get cap of number of buffers that should be pinned
559 : : *
560 : : * When pinning extra buffers to look ahead, users of a ring-based strategy are
561 : : * in danger of pinning too much of the ring at once while performing look-ahead.
562 : : * For some strategies, that means "escaping" from the ring, and in others it
563 : : * means forcing dirty data to disk very frequently with associated WAL
564 : : * flushing. Since external code has no insight into any of that, allow
565 : : * individual strategy types to expose a clamp that should be applied when
566 : : * deciding on a maximum number of buffers to pin at once.
567 : : *
568 : : * Callers should combine this number with other relevant limits and take the
569 : : * minimum.
570 : : */
571 : : int
518 tmunro@postgresql.or 572 : 437347 : GetAccessStrategyPinLimit(BufferAccessStrategy strategy)
573 : : {
574 [ + + ]: 437347 : if (strategy == NULL)
575 : 341003 : return NBuffers;
576 : :
577 [ + + ]: 96344 : switch (strategy->btype)
578 : : {
579 : 72661 : case BAS_BULKREAD:
580 : :
581 : : /*
582 : : * Since BAS_BULKREAD uses StrategyRejectBuffer(), dirty buffers
583 : : * shouldn't be a problem and the caller is free to pin up to the
584 : : * entire ring at once.
585 : : */
586 : 72661 : return strategy->nbuffers;
587 : :
588 : 23683 : default:
589 : :
590 : : /*
591 : : * Tell caller not to pin more than half the buffers in the ring.
592 : : * This is a trade-off between look ahead distance and deferring
593 : : * writeback and associated WAL traffic.
594 : : */
595 : 23683 : return strategy->nbuffers / 2;
596 : : }
597 : : }
598 : :
599 : : /*
600 : : * FreeAccessStrategy -- release a BufferAccessStrategy object
601 : : *
602 : : * A simple pfree would do at the moment, but we would prefer that callers
603 : : * don't assume that much about the representation of BufferAccessStrategy.
604 : : */
605 : : void
6674 tgl@sss.pgh.pa.us 606 : 132580 : FreeAccessStrategy(BufferAccessStrategy strategy)
607 : : {
608 : : /* don't crash if called on a "default" strategy */
609 [ + - ]: 132580 : if (strategy != NULL)
610 : 132580 : pfree(strategy);
611 : 132580 : }
612 : :
613 : : /*
614 : : * GetBufferFromRing -- returns a buffer from the ring, or NULL if the
615 : : * ring is empty / not usable.
616 : : *
617 : : * The bufhdr spin lock is held on the returned buffer.
618 : : */
619 : : static BufferDesc *
3436 andres@anarazel.de 620 : 798659 : GetBufferFromRing(BufferAccessStrategy strategy, uint32 *buf_state)
621 : : {
622 : : BufferDesc *buf;
623 : : Buffer bufnum;
624 : : uint32 local_buf_state; /* to avoid repeated (de-)referencing */
625 : :
626 : :
627 : : /* Advance to next ring slot */
887 drowley@postgresql.o 628 [ + + ]: 798659 : if (++strategy->current >= strategy->nbuffers)
6674 tgl@sss.pgh.pa.us 629 : 23830 : strategy->current = 0;
630 : :
631 : : /*
632 : : * If the slot hasn't been filled yet, tell the caller to allocate a new
633 : : * buffer with the normal allocation strategy. He will then fill this
634 : : * slot by calling AddBufferToRing with the new buffer.
635 : : */
636 : 798659 : bufnum = strategy->buffers[strategy->current];
637 [ + + ]: 798659 : if (bufnum == InvalidBuffer)
638 : 476539 : return NULL;
639 : :
640 : : /*
641 : : * If the buffer is pinned we cannot use it under any circumstances.
642 : : *
643 : : * If usage_count is 0 or 1 then the buffer is fair game (we expect 1,
644 : : * since our own previous usage of the ring element would have left it
645 : : * there, but it might've been decremented by clock-sweep since then). A
646 : : * higher usage_count indicates someone else has touched the buffer, so we
647 : : * shouldn't re-use it.
648 : : */
3873 andres@anarazel.de 649 : 322120 : buf = GetBufferDescriptor(bufnum - 1);
3436 650 : 322120 : local_buf_state = LockBufHdr(buf);
651 [ + + ]: 322120 : if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0
652 [ + + ]: 316564 : && BUF_STATE_GET_USAGECOUNT(local_buf_state) <= 1)
653 : : {
654 : 310328 : *buf_state = local_buf_state;
6674 tgl@sss.pgh.pa.us 655 : 310328 : return buf;
656 : : }
3436 andres@anarazel.de 657 : 11792 : UnlockBufHdr(buf, local_buf_state);
658 : :
659 : : /*
660 : : * Tell caller to allocate a new buffer with the normal allocation
661 : : * strategy. He'll then replace this ring element via AddBufferToRing.
662 : : */
6674 tgl@sss.pgh.pa.us 663 : 11792 : return NULL;
664 : : }
665 : :
666 : : /*
667 : : * AddBufferToRing -- add a buffer to the buffer ring
668 : : *
669 : : * Caller must hold the buffer header spinlock on the buffer. Since this
670 : : * is called with the spinlock held, it had better be quite cheap.
671 : : */
672 : : static void
3582 rhaas@postgresql.org 673 : 488331 : AddBufferToRing(BufferAccessStrategy strategy, BufferDesc *buf)
674 : : {
6674 tgl@sss.pgh.pa.us 675 : 488331 : strategy->buffers[strategy->current] = BufferDescriptorGetBuffer(buf);
676 : 488331 : }
677 : :
678 : : /*
679 : : * Utility function returning the IOContext of a given BufferAccessStrategy's
680 : : * strategy ring.
681 : : */
682 : : IOContext
940 andres@anarazel.de 683 : 58016760 : IOContextForStrategy(BufferAccessStrategy strategy)
684 : : {
685 [ + + ]: 58016760 : if (!strategy)
686 : 55884650 : return IOCONTEXT_NORMAL;
687 : :
688 [ - + + + : 2132110 : switch (strategy->btype)
- ]
689 : : {
940 andres@anarazel.de 690 :UBC 0 : case BAS_NORMAL:
691 : :
692 : : /*
693 : : * Currently, GetAccessStrategy() returns NULL for
694 : : * BufferAccessStrategyType BAS_NORMAL, so this case is
695 : : * unreachable.
696 : : */
697 : 0 : pg_unreachable();
698 : : return IOCONTEXT_NORMAL;
940 andres@anarazel.de 699 :CBC 1574020 : case BAS_BULKREAD:
700 : 1574020 : return IOCONTEXT_BULKREAD;
701 : 275883 : case BAS_BULKWRITE:
702 : 275883 : return IOCONTEXT_BULKWRITE;
703 : 282207 : case BAS_VACUUM:
704 : 282207 : return IOCONTEXT_VACUUM;
705 : : }
706 : :
940 andres@anarazel.de 707 [ # # ]:UBC 0 : elog(ERROR, "unrecognized BufferAccessStrategyType: %d", strategy->btype);
708 : : pg_unreachable();
709 : : }
710 : :
711 : : /*
712 : : * StrategyRejectBuffer -- consider rejecting a dirty buffer
713 : : *
714 : : * When a nondefault strategy is used, the buffer manager calls this function
715 : : * when it turns out that the buffer selected by StrategyGetBuffer needs to
716 : : * be written out and doing so would require flushing WAL too. This gives us
717 : : * a chance to choose a different victim.
718 : : *
719 : : * Returns true if buffer manager should ask for a new victim, and false
720 : : * if this buffer should be written and re-used.
721 : : */
722 : : bool
940 andres@anarazel.de 723 :CBC 9417 : StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring)
724 : : {
725 : : /* We only do this in bulkread mode */
6674 tgl@sss.pgh.pa.us 726 [ + + ]: 9417 : if (strategy->btype != BAS_BULKREAD)
727 : 2341 : return false;
728 : :
729 : : /* Don't muck with behavior of normal buffer-replacement strategy */
940 andres@anarazel.de 730 [ + + - + ]: 13424 : if (!from_ring ||
2999 tgl@sss.pgh.pa.us 731 : 6348 : strategy->buffers[strategy->current] != BufferDescriptorGetBuffer(buf))
6674 732 : 728 : return false;
733 : :
734 : : /*
735 : : * Remove the dirty buffer from the ring; necessary to prevent infinite
736 : : * loop if all ring members are dirty.
737 : : */
738 : 6348 : strategy->buffers[strategy->current] = InvalidBuffer;
739 : :
740 : 6348 : return true;
741 : : }
|