Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * visibilitymap.c
4 : : * bitmap for tracking visibility of heap tuples
5 : : *
6 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/access/heap/visibilitymap.c
12 : : *
13 : : * INTERFACE ROUTINES
14 : : * visibilitymap_clear - clear bits for one page in the visibility map
15 : : * visibilitymap_pin - pin a map page for setting a bit
16 : : * visibilitymap_pin_ok - check whether correct map page is already pinned
17 : : * visibilitymap_set - set bit(s) in a previously pinned page and log
18 : : * visibilitymap_set_vmbits - set bit(s) in a pinned page
19 : : * visibilitymap_get_status - get status of bits
20 : : * visibilitymap_count - count number of bits set in visibility map
21 : : * visibilitymap_prepare_truncate -
22 : : * prepare for truncation of the visibility map
23 : : *
24 : : * NOTES
25 : : *
26 : : * The visibility map is a bitmap with two bits (all-visible and all-frozen)
27 : : * per heap page. A set all-visible bit means that all tuples on the page are
28 : : * known visible to all transactions, and therefore the page doesn't need to
29 : : * be vacuumed. A set all-frozen bit means that all tuples on the page are
30 : : * completely frozen, and therefore the page doesn't need to be vacuumed even
31 : : * if whole table scanning vacuum is required (e.g. anti-wraparound vacuum).
32 : : * The all-frozen bit must be set only when the page is already all-visible.
33 : : *
34 : : * The map is conservative in the sense that we make sure that whenever a bit
35 : : * is set, we know the condition is true, but if a bit is not set, it might or
36 : : * might not be true.
37 : : *
38 : : * Clearing visibility map bits is not separately WAL-logged. The callers
39 : : * must make sure that whenever a bit is cleared, the bit is cleared on WAL
40 : : * replay of the updating operation as well.
41 : : *
42 : : * When we *set* a visibility map during VACUUM, we must write WAL. This may
43 : : * seem counterintuitive, since the bit is basically a hint: if it is clear,
44 : : * it may still be the case that every tuple on the page is visible to all
45 : : * transactions; we just don't know that for certain. The difficulty is that
46 : : * there are two bits which are typically set together: the PD_ALL_VISIBLE bit
47 : : * on the page itself, and the visibility map bit. If a crash occurs after the
48 : : * visibility map page makes it to disk and before the updated heap page makes
49 : : * it to disk, redo must set the bit on the heap page. Otherwise, the next
50 : : * insert, update, or delete on the heap page will fail to realize that the
51 : : * visibility map bit must be cleared, possibly causing index-only scans to
52 : : * return wrong answers.
53 : : *
54 : : * VACUUM will normally skip pages for which the visibility map bit is set;
55 : : * such pages can't contain any dead tuples and therefore don't need vacuuming.
56 : : *
57 : : * LOCKING
58 : : *
59 : : * In heapam.c, whenever a page is modified so that not all tuples on the
60 : : * page are visible to everyone anymore, the corresponding bit in the
61 : : * visibility map is cleared. In order to be crash-safe, we need to do this
62 : : * while still holding a lock on the heap page and in the same critical
63 : : * section that logs the page modification. However, we don't want to hold
64 : : * the buffer lock over any I/O that may be required to read in the visibility
65 : : * map page. To avoid this, we examine the heap page before locking it;
66 : : * if the page-level PD_ALL_VISIBLE bit is set, we pin the visibility map
67 : : * bit. Then, we lock the buffer. But this creates a race condition: there
68 : : * is a possibility that in the time it takes to lock the buffer, the
69 : : * PD_ALL_VISIBLE bit gets set. If that happens, we have to unlock the
70 : : * buffer, pin the visibility map page, and relock the buffer. This shouldn't
71 : : * happen often, because only VACUUM currently sets visibility map bits,
72 : : * and the race will only occur if VACUUM processes a given page at almost
73 : : * exactly the same time that someone tries to further modify it.
74 : : *
75 : : * To set a bit, you need to hold a lock on the heap page. That prevents
76 : : * the race condition where VACUUM sees that all tuples on the page are
77 : : * visible to everyone, but another backend modifies the page before VACUUM
78 : : * sets the bit in the visibility map.
79 : : *
80 : : * When a bit is set, the LSN of the visibility map page is updated to make
81 : : * sure that the visibility map update doesn't get written to disk before the
82 : : * WAL record of the changes that made it possible to set the bit is flushed.
83 : : * But when a bit is cleared, we don't have to do that because it's always
84 : : * safe to clear a bit in the map from correctness point of view.
85 : : *
86 : : *-------------------------------------------------------------------------
87 : : */
88 : : #include "postgres.h"
89 : :
90 : : #include "access/heapam_xlog.h"
91 : : #include "access/visibilitymap.h"
92 : : #include "access/xloginsert.h"
93 : : #include "access/xlogutils.h"
94 : : #include "miscadmin.h"
95 : : #include "port/pg_bitutils.h"
96 : : #include "storage/bufmgr.h"
97 : : #include "storage/smgr.h"
98 : : #include "utils/inval.h"
99 : : #include "utils/rel.h"
100 : :
101 : :
102 : : /*#define TRACE_VISIBILITYMAP */
103 : :
104 : : /*
105 : : * Size of the bitmap on each visibility map page, in bytes. There's no
106 : : * extra headers, so the whole page minus the standard page header is
107 : : * used for the bitmap.
108 : : */
109 : : #define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData))
110 : :
111 : : /* Number of heap blocks we can represent in one byte */
112 : : #define HEAPBLOCKS_PER_BYTE (BITS_PER_BYTE / BITS_PER_HEAPBLOCK)
113 : :
114 : : /* Number of heap blocks we can represent in one visibility map page. */
115 : : #define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE)
116 : :
117 : : /* Mapping from heap block number to the right bit in the visibility map */
118 : : #define HEAPBLK_TO_MAPBLOCK(x) ((x) / HEAPBLOCKS_PER_PAGE)
119 : : #define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
120 : : #define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK)
121 : :
122 : : /* Masks for counting subsets of bits in the visibility map. */
123 : : #define VISIBLE_MASK8 (0x55) /* The lower bit of each bit pair */
124 : : #define FROZEN_MASK8 (0xaa) /* The upper bit of each bit pair */
125 : :
126 : : /* prototypes for internal routines */
127 : : static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend);
128 : : static Buffer vm_extend(Relation rel, BlockNumber vm_nblocks);
129 : :
130 : :
131 : : /*
132 : : * visibilitymap_clear - clear specified bits for one page in visibility map
133 : : *
134 : : * You must pass a buffer containing the correct map page to this function.
135 : : * Call visibilitymap_pin first to pin the right one. This function doesn't do
136 : : * any I/O. Returns true if any bits have been cleared and false otherwise.
137 : : */
138 : : bool
1135 pg@bowt.ie 139 :CBC 19048 : visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
140 : : {
6173 heikki.linnakangas@i 141 : 19048 : BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
142 : 19048 : int mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
3434 rhaas@postgresql.org 143 : 19048 : int mapOffset = HEAPBLK_TO_OFFSET(heapBlk);
3389 andres@anarazel.de 144 : 19048 : uint8 mask = flags << mapOffset;
145 : : char *map;
146 : 19048 : bool cleared = false;
147 : :
148 : : /* Must never clear all_visible bit while leaving all_frozen bit set */
149 [ - + ]: 19048 : Assert(flags & VISIBILITYMAP_VALID_BITS);
1016 pg@bowt.ie 150 [ - + ]: 19048 : Assert(flags != VISIBILITYMAP_ALL_VISIBLE);
151 : :
152 : : #ifdef TRACE_VISIBILITYMAP
153 : : elog(DEBUG1, "vm_clear %s %d", RelationGetRelationName(rel), heapBlk);
154 : : #endif
155 : :
1135 156 [ + - - + ]: 19048 : if (!BufferIsValid(vmbuf) || BufferGetBlockNumber(vmbuf) != mapBlock)
5243 rhaas@postgresql.org 157 [ # # ]:UBC 0 : elog(ERROR, "wrong buffer passed to visibilitymap_clear");
158 : :
1135 pg@bowt.ie 159 :CBC 19048 : LockBuffer(vmbuf, BUFFER_LOCK_EXCLUSIVE);
160 : 19048 : map = PageGetContents(BufferGetPage(vmbuf));
161 : :
6173 heikki.linnakangas@i 162 [ + + ]: 19048 : if (map[mapByte] & mask)
163 : : {
164 : 17000 : map[mapByte] &= ~mask;
165 : :
1135 pg@bowt.ie 166 : 17000 : MarkBufferDirty(vmbuf);
3389 andres@anarazel.de 167 : 17000 : cleared = true;
168 : : }
169 : :
1135 pg@bowt.ie 170 : 19048 : LockBuffer(vmbuf, BUFFER_LOCK_UNLOCK);
171 : :
3389 andres@anarazel.de 172 : 19048 : return cleared;
173 : : }
174 : :
175 : : /*
176 : : * visibilitymap_pin - pin a map page for setting a bit
177 : : *
178 : : * Setting a bit in the visibility map is a two-phase operation. First, call
179 : : * visibilitymap_pin, to pin the visibility map page containing the bit for
180 : : * the heap page. Because that can require I/O to read the map page, you
181 : : * shouldn't hold a lock on the heap page while doing that. Then, call
182 : : * visibilitymap_set to actually set the bit.
183 : : *
184 : : * On entry, *vmbuf should be InvalidBuffer or a valid buffer returned by
185 : : * an earlier call to visibilitymap_pin or visibilitymap_get_status on the same
186 : : * relation. On return, *vmbuf is a valid buffer with the map page containing
187 : : * the bit for heapBlk.
188 : : *
189 : : * If the page doesn't exist in the map file yet, it is extended.
190 : : */
191 : : void
1135 pg@bowt.ie 192 : 107937 : visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
193 : : {
6173 heikki.linnakangas@i 194 : 107937 : BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
195 : :
196 : : /* Reuse the old pinned buffer if possible */
1135 pg@bowt.ie 197 [ + + ]: 107937 : if (BufferIsValid(*vmbuf))
198 : : {
199 [ + - ]: 83160 : if (BufferGetBlockNumber(*vmbuf) == mapBlock)
6173 heikki.linnakangas@i 200 : 83160 : return;
201 : :
1135 pg@bowt.ie 202 :UBC 0 : ReleaseBuffer(*vmbuf);
203 : : }
1135 pg@bowt.ie 204 :CBC 24777 : *vmbuf = vm_readbuf(rel, mapBlock, true);
205 : : }
206 : :
207 : : /*
208 : : * visibilitymap_pin_ok - do we already have the correct page pinned?
209 : : *
210 : : * On entry, vmbuf should be InvalidBuffer or a valid buffer returned by
211 : : * an earlier call to visibilitymap_pin or visibilitymap_get_status on the same
212 : : * relation. The return value indicates whether the buffer covers the
213 : : * given heapBlk.
214 : : */
215 : : bool
216 : 15178 : visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf)
217 : : {
5243 rhaas@postgresql.org 218 : 15178 : BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
219 : :
1135 pg@bowt.ie 220 [ + + + - ]: 15178 : return BufferIsValid(vmbuf) && BufferGetBlockNumber(vmbuf) == mapBlock;
221 : : }
222 : :
223 : : /*
224 : : * visibilitymap_set - set bit(s) on a previously pinned page
225 : : *
226 : : * recptr is the LSN of the XLOG record we're replaying, if we're in recovery,
227 : : * or InvalidXLogRecPtr in normal running. The VM page LSN is advanced to the
228 : : * one provided; in normal running, we generate a new XLOG record and set the
229 : : * page LSN to that value (though the heap page's LSN may *not* be updated;
230 : : * see below). cutoff_xid is the largest xmin on the page being marked
231 : : * all-visible; it is needed for Hot Standby, and can be InvalidTransactionId
232 : : * if the page contains no tuples. It can also be set to InvalidTransactionId
233 : : * when a page that is already all-visible is being marked all-frozen.
234 : : *
235 : : * Caller is expected to set the heap page's PD_ALL_VISIBLE bit before calling
236 : : * this function. Except in recovery, caller should also pass the heap
237 : : * buffer. When checksums are enabled and we're not in recovery, we must add
238 : : * the heap buffer to the WAL chain to protect it from being torn.
239 : : *
240 : : * You must pass a buffer containing the correct map page to this function.
241 : : * Call visibilitymap_pin first to pin the right one. This function doesn't do
242 : : * any I/O.
243 : : *
244 : : * Returns the state of the page's VM bits before setting flags.
245 : : */
246 : : uint8
4603 simon@2ndQuadrant.co 247 : 42292 : visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
248 : : XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid,
249 : : uint8 flags)
250 : : {
6173 heikki.linnakangas@i 251 : 42292 : BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
252 : 42292 : uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
3434 rhaas@postgresql.org 253 : 42292 : uint8 mapOffset = HEAPBLK_TO_OFFSET(heapBlk);
254 : : Page page;
255 : : uint8 *map;
256 : : uint8 status;
257 : :
258 : : #ifdef TRACE_VISIBILITYMAP
259 : : elog(DEBUG1, "vm_set flags 0x%02X for %s %d",
260 : : flags, RelationGetRelationName(rel), heapBlk);
261 : : #endif
262 : :
5243 263 [ + + - + ]: 42292 : Assert(InRecovery || XLogRecPtrIsInvalid(recptr));
60 peter@eisentraut.org 264 [ + + - + ]:GNC 42292 : Assert(InRecovery || PageIsAllVisible(BufferGetPage(heapBuf)));
941 andres@anarazel.de 265 [ - + ]:CBC 42292 : Assert((flags & VISIBILITYMAP_VALID_BITS) == flags);
266 : :
267 : : /* Must never set all_frozen bit without also setting all_visible bit */
1016 pg@bowt.ie 268 [ - + ]: 42292 : Assert(flags != VISIBILITYMAP_ALL_FROZEN);
269 : :
270 : : /* Check that we have the right heap page pinned, if present */
4603 simon@2ndQuadrant.co 271 [ + + - + ]: 42292 : if (BufferIsValid(heapBuf) && BufferGetBlockNumber(heapBuf) != heapBlk)
4603 simon@2ndQuadrant.co 272 [ # # ]:UBC 0 : elog(ERROR, "wrong heap buffer passed to visibilitymap_set");
273 : :
20 andres@anarazel.de 274 [ + + - + ]:GNC 42292 : Assert(!BufferIsValid(heapBuf) ||
275 : : BufferIsLockedByMeInMode(heapBuf, BUFFER_LOCK_EXCLUSIVE));
276 : :
277 : : /* Check that we have the right VM page pinned */
4603 simon@2ndQuadrant.co 278 [ + - - + ]:CBC 42292 : if (!BufferIsValid(vmBuf) || BufferGetBlockNumber(vmBuf) != mapBlock)
4603 simon@2ndQuadrant.co 279 [ # # ]:UBC 0 : elog(ERROR, "wrong VM buffer passed to visibilitymap_set");
280 : :
3478 kgrittn@postgresql.o 281 :CBC 42292 : page = BufferGetPage(vmBuf);
3428 rhaas@postgresql.org 282 : 42292 : map = (uint8 *) PageGetContents(page);
4603 simon@2ndQuadrant.co 283 : 42292 : LockBuffer(vmBuf, BUFFER_LOCK_EXCLUSIVE);
284 : :
315 melanieplageman@gmai 285 : 42292 : status = (map[mapByte] >> mapOffset) & VISIBILITYMAP_VALID_BITS;
286 [ + - ]: 42292 : if (flags != status)
287 : : {
5243 rhaas@postgresql.org 288 : 42292 : START_CRIT_SECTION();
289 : :
3434 290 : 42292 : map[mapByte] |= (flags << mapOffset);
4603 simon@2ndQuadrant.co 291 : 42292 : MarkBufferDirty(vmBuf);
292 : :
5243 rhaas@postgresql.org 293 [ + + + + : 42292 : if (RelationNeedsWAL(rel))
+ - + - ]
294 : : {
295 [ + + ]: 40734 : if (XLogRecPtrIsInvalid(recptr))
296 : : {
4603 simon@2ndQuadrant.co 297 [ - + ]: 36143 : Assert(!InRecovery);
941 andres@anarazel.de 298 : 36143 : recptr = log_heap_visible(rel, heapBuf, vmBuf, cutoff_xid, flags);
299 : :
300 : : /*
301 : : * If data checksums are enabled (or wal_log_hints=on), we
302 : : * need to protect the heap page from being torn.
303 : : *
304 : : * If not, then we must *not* update the heap page's LSN. In
305 : : * this case, the FPI for the heap page was omitted from the
306 : : * WAL record inserted above, so it would be incorrect to
307 : : * update the heap page's LSN.
308 : : */
4337 heikki.linnakangas@i 309 [ + + - + ]: 36143 : if (XLogHintBitIsNeeded())
310 : : {
3478 kgrittn@postgresql.o 311 : 32993 : Page heapPage = BufferGetPage(heapBuf);
312 : :
4603 simon@2ndQuadrant.co 313 : 32993 : PageSetLSN(heapPage, recptr);
314 : : }
315 : : }
6173 heikki.linnakangas@i 316 : 40734 : PageSetLSN(page, recptr);
317 : : }
318 : :
5243 rhaas@postgresql.org 319 [ - + ]: 42292 : END_CRIT_SECTION();
320 : : }
321 : :
4603 simon@2ndQuadrant.co 322 : 42292 : LockBuffer(vmBuf, BUFFER_LOCK_UNLOCK);
315 melanieplageman@gmai 323 : 42292 : return status;
324 : : }
325 : :
326 : : /*
327 : : * Set VM (visibility map) flags in the VM block in vmBuf.
328 : : *
329 : : * This function is intended for callers that log VM changes together
330 : : * with the heap page modifications that rendered the page all-visible.
331 : : * Callers that log VM changes separately should use visibilitymap_set().
332 : : *
333 : : * vmBuf must be pinned and exclusively locked, and it must cover the VM bits
334 : : * corresponding to heapBlk.
335 : : *
336 : : * In normal operation (not recovery), this must be called inside a critical
337 : : * section that also applies the necessary heap page changes and, if
338 : : * applicable, emits WAL.
339 : : *
340 : : * The caller is responsible for ensuring consistency between the heap page
341 : : * and the VM page by holding a pin and exclusive lock on the buffer
342 : : * containing heapBlk.
343 : : *
344 : : * rlocator is used only for debugging messages.
345 : : */
346 : : uint8
19 melanieplageman@gmai 347 :GNC 19945 : visibilitymap_set_vmbits(BlockNumber heapBlk,
348 : : Buffer vmBuf, uint8 flags,
349 : : const RelFileLocator rlocator)
350 : : {
351 : 19945 : BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
352 : 19945 : uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
353 : 19945 : uint8 mapOffset = HEAPBLK_TO_OFFSET(heapBlk);
354 : : Page page;
355 : : uint8 *map;
356 : : uint8 status;
357 : :
358 : : #ifdef TRACE_VISIBILITYMAP
359 : : elog(DEBUG1, "vm_set flags 0x%02X for %s %d",
360 : : flags,
361 : : relpathbackend(rlocator, MyProcNumber, MAIN_FORKNUM).str,
362 : : heapBlk);
363 : : #endif
364 : :
365 : : /* Call in same critical section where WAL is emitted. */
366 [ + + - + ]: 19945 : Assert(InRecovery || CritSectionCount > 0);
367 : :
368 : : /* Flags should be valid. Also never clear bits with this function */
369 [ - + ]: 19945 : Assert((flags & VISIBILITYMAP_VALID_BITS) == flags);
370 : :
371 : : /* Must never set all_frozen bit without also setting all_visible bit */
372 [ - + ]: 19945 : Assert(flags != VISIBILITYMAP_ALL_FROZEN);
373 : :
374 : : /* Check that we have the right VM page pinned */
375 [ + - - + ]: 19945 : if (!BufferIsValid(vmBuf) || BufferGetBlockNumber(vmBuf) != mapBlock)
19 melanieplageman@gmai 376 [ # # ]:UNC 0 : elog(ERROR, "wrong VM buffer passed to visibilitymap_set");
377 : :
19 melanieplageman@gmai 378 [ - + ]:GNC 19945 : Assert(BufferIsLockedByMeInMode(vmBuf, BUFFER_LOCK_EXCLUSIVE));
379 : :
380 : 19945 : page = BufferGetPage(vmBuf);
381 : 19945 : map = (uint8 *) PageGetContents(page);
382 : :
383 : 19945 : status = (map[mapByte] >> mapOffset) & VISIBILITYMAP_VALID_BITS;
384 [ + - ]: 19945 : if (flags != status)
385 : : {
386 : 19945 : map[mapByte] |= (flags << mapOffset);
387 : 19945 : MarkBufferDirty(vmBuf);
388 : : }
389 : :
390 : 19945 : return status;
391 : : }
392 : :
393 : : /*
394 : : * visibilitymap_get_status - get status of bits
395 : : *
396 : : * Are all tuples on heapBlk visible to all or are marked frozen, according
397 : : * to the visibility map?
398 : : *
399 : : * On entry, *vmbuf should be InvalidBuffer or a valid buffer returned by an
400 : : * earlier call to visibilitymap_pin or visibilitymap_get_status on the same
401 : : * relation. On return, *vmbuf is a valid buffer with the map page containing
402 : : * the bit for heapBlk, or InvalidBuffer. The caller is responsible for
403 : : * releasing *vmbuf after it's done testing and setting bits.
404 : : *
405 : : * NOTE: This function is typically called without a lock on the heap page,
406 : : * so somebody else could change the bit just after we look at it. In fact,
407 : : * since we don't lock the visibility map page either, it's even possible that
408 : : * someone else could have changed the bit just before we look at it, but yet
409 : : * we might see the old value. It is the caller's responsibility to deal with
410 : : * all concurrency issues!
411 : : */
412 : : uint8
1135 pg@bowt.ie 413 :CBC 3033587 : visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
414 : : {
6173 heikki.linnakangas@i 415 : 3033587 : BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
416 : 3033587 : uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
3434 rhaas@postgresql.org 417 : 3033587 : uint8 mapOffset = HEAPBLK_TO_OFFSET(heapBlk);
418 : : char *map;
419 : : uint8 result;
420 : :
421 : : #ifdef TRACE_VISIBILITYMAP
422 : : elog(DEBUG1, "vm_get_status %s %d", RelationGetRelationName(rel), heapBlk);
423 : : #endif
424 : :
425 : : /* Reuse the old pinned buffer if possible */
1135 pg@bowt.ie 426 [ + + ]: 3033587 : if (BufferIsValid(*vmbuf))
427 : : {
428 [ - + ]: 2200977 : if (BufferGetBlockNumber(*vmbuf) != mapBlock)
429 : : {
1135 pg@bowt.ie 430 :UBC 0 : ReleaseBuffer(*vmbuf);
431 : 0 : *vmbuf = InvalidBuffer;
432 : : }
433 : : }
434 : :
1135 pg@bowt.ie 435 [ + + ]:CBC 3033587 : if (!BufferIsValid(*vmbuf))
436 : : {
437 : 832610 : *vmbuf = vm_readbuf(rel, mapBlock, false);
438 [ + + ]: 832610 : if (!BufferIsValid(*vmbuf))
96 nathan@postgresql.or 439 :GNC 767842 : return (uint8) 0;
440 : : }
441 : :
1135 pg@bowt.ie 442 :CBC 2265745 : map = PageGetContents(BufferGetPage(*vmbuf));
443 : :
444 : : /*
445 : : * A single byte read is atomic. There could be memory-ordering effects
446 : : * here, but for performance reasons we make it the caller's job to worry
447 : : * about that.
448 : : */
3434 rhaas@postgresql.org 449 : 2265745 : result = ((map[mapByte] >> mapOffset) & VISIBILITYMAP_VALID_BITS);
450 : 2265745 : return result;
451 : : }
452 : :
453 : : /*
454 : : * visibilitymap_count - count number of bits set in visibility map
455 : : *
456 : : * Note: we ignore the possibility of race conditions when the table is being
457 : : * extended concurrently with the call. New pages added to the table aren't
458 : : * going to be marked all-visible or all-frozen, so they won't affect the result.
459 : : */
460 : : void
3528 461 : 27095 : visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
462 : : {
463 : : BlockNumber mapBlock;
2447 tgl@sss.pgh.pa.us 464 : 27095 : BlockNumber nvisible = 0;
465 : 27095 : BlockNumber nfrozen = 0;
466 : :
467 : : /* all_visible must be specified */
3528 rhaas@postgresql.org 468 [ - + ]: 27095 : Assert(all_visible);
469 : :
4888 bruce@momjian.us 470 : 27095 : for (mapBlock = 0;; mapBlock++)
5128 tgl@sss.pgh.pa.us 471 : 8132 : {
472 : : Buffer mapBuffer;
473 : : uint64 *map;
474 : :
475 : : /*
476 : : * Read till we fall off the end of the map. We assume that any extra
477 : : * bytes in the last page are zeroed, so we don't bother excluding
478 : : * them from the count.
479 : : */
480 : 35227 : mapBuffer = vm_readbuf(rel, mapBlock, false);
481 [ + + ]: 35227 : if (!BufferIsValid(mapBuffer))
482 : 27095 : break;
483 : :
484 : : /*
485 : : * We choose not to lock the page, since the result is going to be
486 : : * immediately stale anyway if anyone is concurrently setting or
487 : : * clearing bits, and we only really need an approximate value.
488 : : */
2447 489 : 8132 : map = (uint64 *) PageGetContents(BufferGetPage(mapBuffer));
490 : :
570 nathan@postgresql.or 491 : 8132 : nvisible += pg_popcount_masked((const char *) map, MAPSIZE, VISIBLE_MASK8);
492 [ + - ]: 8132 : if (all_frozen)
493 : 8132 : nfrozen += pg_popcount_masked((const char *) map, MAPSIZE, FROZEN_MASK8);
494 : :
5128 tgl@sss.pgh.pa.us 495 : 8132 : ReleaseBuffer(mapBuffer);
496 : : }
497 : :
2447 498 : 27095 : *all_visible = nvisible;
499 [ + - ]: 27095 : if (all_frozen)
500 : 27095 : *all_frozen = nfrozen;
5128 501 : 27095 : }
502 : :
503 : : /*
504 : : * visibilitymap_prepare_truncate -
505 : : * prepare for truncation of the visibility map
506 : : *
507 : : * nheapblocks is the new size of the heap.
508 : : *
509 : : * Return the number of blocks of new visibility map.
510 : : * If it's InvalidBlockNumber, there is nothing to truncate;
511 : : * otherwise the caller is responsible for calling smgrtruncate()
512 : : * to truncate the visibility map pages.
513 : : */
514 : : BlockNumber
2226 fujii@postgresql.org 515 : 202 : visibilitymap_prepare_truncate(Relation rel, BlockNumber nheapblocks)
516 : : {
517 : : BlockNumber newnblocks;
518 : :
519 : : /* last remaining block, byte, and bit */
6173 heikki.linnakangas@i 520 : 202 : BlockNumber truncBlock = HEAPBLK_TO_MAPBLOCK(nheapblocks);
5983 bruce@momjian.us 521 : 202 : uint32 truncByte = HEAPBLK_TO_MAPBYTE(nheapblocks);
3434 rhaas@postgresql.org 522 : 202 : uint8 truncOffset = HEAPBLK_TO_OFFSET(nheapblocks);
523 : :
524 : : #ifdef TRACE_VISIBILITYMAP
525 : : elog(DEBUG1, "vm_truncate %s %d", RelationGetRelationName(rel), nheapblocks);
526 : : #endif
527 : :
528 : : /*
529 : : * If no visibility map has been created yet for this relation, there's
530 : : * nothing to truncate.
531 : : */
1569 tgl@sss.pgh.pa.us 532 [ - + ]: 202 : if (!smgrexists(RelationGetSmgr(rel), VISIBILITYMAP_FORKNUM))
2226 fujii@postgresql.org 533 :UBC 0 : return InvalidBlockNumber;
534 : :
535 : : /*
536 : : * Unless the new size is exactly at a visibility map page boundary, the
537 : : * tail bits in the last remaining map page, representing truncated heap
538 : : * blocks, need to be cleared. This is not only tidy, but also necessary
539 : : * because we don't get a chance to clear the bits if the heap is extended
540 : : * again.
541 : : */
3434 rhaas@postgresql.org 542 [ + + + + ]:CBC 202 : if (truncByte != 0 || truncOffset != 0)
6173 heikki.linnakangas@i 543 : 132 : {
544 : : Buffer mapBuffer;
545 : : Page page;
546 : : char *map;
547 : :
548 : 132 : newnblocks = truncBlock + 1;
549 : :
550 : 132 : mapBuffer = vm_readbuf(rel, truncBlock, false);
551 [ - + ]: 132 : if (!BufferIsValid(mapBuffer))
552 : : {
553 : : /* nothing to do, the file was already smaller */
2226 fujii@postgresql.org 554 :UBC 0 : return InvalidBlockNumber;
555 : : }
556 : :
3478 kgrittn@postgresql.o 557 :CBC 132 : page = BufferGetPage(mapBuffer);
6173 heikki.linnakangas@i 558 : 132 : map = PageGetContents(page);
559 : :
560 : 132 : LockBuffer(mapBuffer, BUFFER_LOCK_EXCLUSIVE);
561 : :
562 : : /* NO EREPORT(ERROR) from here till changes are logged */
3296 563 : 132 : START_CRIT_SECTION();
564 : :
565 : : /* Clear out the unwanted bytes. */
6173 566 [ + + + - : 132 : MemSet(&map[truncByte + 1], 0, MAPSIZE - (truncByte + 1));
+ - - + -
- ]
567 : :
568 : : /*----
569 : : * Mask out the unwanted bits of the last remaining byte.
570 : : *
571 : : * ((1 << 0) - 1) = 00000000
572 : : * ((1 << 1) - 1) = 00000001
573 : : * ...
574 : : * ((1 << 6) - 1) = 00111111
575 : : * ((1 << 7) - 1) = 01111111
576 : : *----
577 : : */
3434 rhaas@postgresql.org 578 : 132 : map[truncByte] &= (1 << truncOffset) - 1;
579 : :
580 : : /*
581 : : * Truncation of a relation is WAL-logged at a higher-level, and we
582 : : * will be called at WAL replay. But if checksums are enabled, we need
583 : : * to still write a WAL record to protect against a torn page, if the
584 : : * page is flushed to disk before the truncation WAL record. We cannot
585 : : * use MarkBufferDirtyHint here, because that will not dirty the page
586 : : * during recovery.
587 : : */
6173 heikki.linnakangas@i 588 : 132 : MarkBufferDirty(mapBuffer);
3296 589 [ + + + + : 132 : if (!InRecovery && RelationNeedsWAL(rel) && XLogHintBitIsNeeded())
+ + + - +
- - + -
- ]
590 : 110 : log_newpage_buffer(mapBuffer, false);
591 : :
592 [ - + ]: 132 : END_CRIT_SECTION();
593 : :
6173 594 : 132 : UnlockReleaseBuffer(mapBuffer);
595 : : }
596 : : else
597 : 70 : newnblocks = truncBlock;
598 : :
1569 tgl@sss.pgh.pa.us 599 [ + + ]: 202 : if (smgrnblocks(RelationGetSmgr(rel), VISIBILITYMAP_FORKNUM) <= newnblocks)
600 : : {
601 : : /* nothing to do, the file was already smaller than requested size */
2226 fujii@postgresql.org 602 : 132 : return InvalidBlockNumber;
603 : : }
604 : :
605 : 70 : return newnblocks;
606 : : }
607 : :
608 : : /*
609 : : * Read a visibility map page.
610 : : *
611 : : * If the page doesn't exist, InvalidBuffer is returned, or if 'extend' is
612 : : * true, the visibility map file is extended.
613 : : */
614 : : static Buffer
6173 heikki.linnakangas@i 615 : 892746 : vm_readbuf(Relation rel, BlockNumber blkno, bool extend)
616 : : {
617 : : Buffer buf;
618 : : SMgrRelation reln;
619 : :
620 : : /*
621 : : * Caution: re-using this smgr pointer could fail if the relcache entry
622 : : * gets closed. It's safe as long as we only do smgr-level operations
623 : : * between here and the last use of the pointer.
624 : : */
1569 tgl@sss.pgh.pa.us 625 : 892746 : reln = RelationGetSmgr(rel);
626 : :
627 : : /*
628 : : * If we haven't cached the size of the visibility map fork yet, check it
629 : : * first.
630 : : */
631 [ + + ]: 892746 : if (reln->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] == InvalidBlockNumber)
632 : : {
633 [ + + ]: 38895 : if (smgrexists(reln, VISIBILITYMAP_FORKNUM))
634 : 18080 : smgrnblocks(reln, VISIBILITYMAP_FORKNUM);
635 : : else
636 : 20815 : reln->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] = 0;
637 : : }
638 : :
639 : : /*
640 : : * For reading we use ZERO_ON_ERROR mode, and initialize the page if
641 : : * necessary. It's always safe to clear bits, so it's better to clear
642 : : * corrupt pages than error out.
643 : : *
644 : : * We use the same path below to initialize pages when extending the
645 : : * relation, as a concurrent extension can end up with vm_extend()
646 : : * returning an already-initialized page.
647 : : */
648 [ + + ]: 892746 : if (blkno >= reln->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM])
649 : : {
6173 heikki.linnakangas@i 650 [ + + ]: 797935 : if (extend)
937 andres@anarazel.de 651 : 2998 : buf = vm_extend(rel, blkno + 1);
652 : : else
6173 heikki.linnakangas@i 653 : 794937 : return InvalidBuffer;
654 : : }
655 : : else
937 andres@anarazel.de 656 : 94811 : buf = ReadBufferExtended(rel, VISIBILITYMAP_FORKNUM, blkno,
657 : : RBM_ZERO_ON_ERROR, NULL);
658 : :
659 : : /*
660 : : * Initializing the page when needed is trickier than it looks, because of
661 : : * the possibility of multiple backends doing this concurrently, and our
662 : : * desire to not uselessly take the buffer lock in the normal path where
663 : : * the page is OK. We must take the lock to initialize the page, so
664 : : * recheck page newness after we have the lock, in case someone else
665 : : * already did it. Also, because we initially check PageIsNew with no
666 : : * lock, it's possible to fall through and return the buffer while someone
667 : : * else is still initializing the page (i.e., we might see pd_upper as set
668 : : * but other page header fields are still zeroes). This is harmless for
669 : : * callers that will take a buffer lock themselves, but some callers
670 : : * inspect the page without any lock at all. The latter is OK only so
671 : : * long as it doesn't depend on the page header having correct contents.
672 : : * Current usage is safe because PageGetContents() does not require that.
673 : : */
3478 kgrittn@postgresql.o 674 [ + + ]: 97809 : if (PageIsNew(BufferGetPage(buf)))
675 : : {
2664 tgl@sss.pgh.pa.us 676 : 3047 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
677 [ + - ]: 3047 : if (PageIsNew(BufferGetPage(buf)))
678 : 3047 : PageInit(BufferGetPage(buf), BLCKSZ, 0);
679 : 3047 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
680 : : }
6173 heikki.linnakangas@i 681 : 97809 : return buf;
682 : : }
683 : :
684 : : /*
685 : : * Ensure that the visibility map fork is at least vm_nblocks long, extending
686 : : * it if necessary with zeroed pages.
687 : : */
688 : : static Buffer
689 : 2998 : vm_extend(Relation rel, BlockNumber vm_nblocks)
690 : : {
691 : : Buffer buf;
692 : :
797 tmunro@postgresql.or 693 : 2998 : buf = ExtendBufferedRelTo(BMR_REL(rel), VISIBILITYMAP_FORKNUM, NULL,
694 : : EB_CREATE_FORK_IF_NEEDED |
695 : : EB_CLEAR_SIZE_CACHE,
696 : : vm_nblocks,
697 : : RBM_ZERO_ON_ERROR);
698 : :
699 : : /*
700 : : * Send a shared-inval message to force other backends to close any smgr
701 : : * references they may have for this rel, which we are about to change.
702 : : * This is a useful optimization because it means that backends don't have
703 : : * to keep checking for creation or extension of the file, which happens
704 : : * infrequently.
705 : : */
937 andres@anarazel.de 706 : 2998 : CacheInvalidateSmgr(RelationGetSmgr(rel)->smgr_rlocator);
707 : :
708 : 2998 : return buf;
709 : : }
|