Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * visibilitymap.c
4 : : * bitmap for tracking visibility of heap tuples
5 : : *
6 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/access/heap/visibilitymap.c
12 : : *
13 : : * INTERFACE ROUTINES
14 : : * visibilitymap_clear - clear bits for one page in the visibility map
15 : : * visibilitymap_pin - pin a map page for setting a bit
16 : : * visibilitymap_pin_ok - check whether correct map page is already pinned
17 : : * visibilitymap_set - set bit(s) in a previously pinned page and log
18 : : * visibilitymap_set_vmbits - set bit(s) in a pinned page
19 : : * visibilitymap_get_status - get status of bits
20 : : * visibilitymap_count - count number of bits set in visibility map
21 : : * visibilitymap_prepare_truncate -
22 : : * prepare for truncation of the visibility map
23 : : *
24 : : * NOTES
25 : : *
26 : : * The visibility map is a bitmap with two bits (all-visible and all-frozen)
27 : : * per heap page. A set all-visible bit means that all tuples on the page are
28 : : * known visible to all transactions, and therefore the page doesn't need to
29 : : * be vacuumed. A set all-frozen bit means that all tuples on the page are
30 : : * completely frozen, and therefore the page doesn't need to be vacuumed even
31 : : * if whole table scanning vacuum is required (e.g. anti-wraparound vacuum).
32 : : * The all-frozen bit must be set only when the page is already all-visible.
33 : : *
34 : : * The map is conservative in the sense that we make sure that whenever a bit
35 : : * is set, we know the condition is true, but if a bit is not set, it might or
36 : : * might not be true.
37 : : *
38 : : * Clearing visibility map bits is not separately WAL-logged. The callers
39 : : * must make sure that whenever a bit is cleared, the bit is cleared on WAL
40 : : * replay of the updating operation as well.
41 : : *
42 : : * When we *set* a visibility map during VACUUM, we must write WAL. This may
43 : : * seem counterintuitive, since the bit is basically a hint: if it is clear,
44 : : * it may still be the case that every tuple on the page is visible to all
45 : : * transactions; we just don't know that for certain. The difficulty is that
46 : : * there are two bits which are typically set together: the PD_ALL_VISIBLE bit
47 : : * on the page itself, and the visibility map bit. If a crash occurs after the
48 : : * visibility map page makes it to disk and before the updated heap page makes
49 : : * it to disk, redo must set the bit on the heap page. Otherwise, the next
50 : : * insert, update, or delete on the heap page will fail to realize that the
51 : : * visibility map bit must be cleared, possibly causing index-only scans to
52 : : * return wrong answers.
53 : : *
54 : : * VACUUM will normally skip pages for which the visibility map bit is set;
55 : : * such pages can't contain any dead tuples and therefore don't need vacuuming.
56 : : *
57 : : * LOCKING
58 : : *
59 : : * In heapam.c, whenever a page is modified so that not all tuples on the
60 : : * page are visible to everyone anymore, the corresponding bit in the
61 : : * visibility map is cleared. In order to be crash-safe, we need to do this
62 : : * while still holding a lock on the heap page and in the same critical
63 : : * section that logs the page modification. However, we don't want to hold
64 : : * the buffer lock over any I/O that may be required to read in the visibility
65 : : * map page. To avoid this, we examine the heap page before locking it;
66 : : * if the page-level PD_ALL_VISIBLE bit is set, we pin the visibility map
67 : : * bit. Then, we lock the buffer. But this creates a race condition: there
68 : : * is a possibility that in the time it takes to lock the buffer, the
69 : : * PD_ALL_VISIBLE bit gets set. If that happens, we have to unlock the
70 : : * buffer, pin the visibility map page, and relock the buffer. This shouldn't
71 : : * happen often, because only VACUUM currently sets visibility map bits,
72 : : * and the race will only occur if VACUUM processes a given page at almost
73 : : * exactly the same time that someone tries to further modify it.
74 : : *
75 : : * To set a bit, you need to hold a lock on the heap page. That prevents
76 : : * the race condition where VACUUM sees that all tuples on the page are
77 : : * visible to everyone, but another backend modifies the page before VACUUM
78 : : * sets the bit in the visibility map.
79 : : *
80 : : * When a bit is set, the LSN of the visibility map page is updated to make
81 : : * sure that the visibility map update doesn't get written to disk before the
82 : : * WAL record of the changes that made it possible to set the bit is flushed.
83 : : * But when a bit is cleared, we don't have to do that because it's always
84 : : * safe to clear a bit in the map from correctness point of view.
85 : : *
86 : : *-------------------------------------------------------------------------
87 : : */
88 : : #include "postgres.h"
89 : :
90 : : #include "access/heapam_xlog.h"
91 : : #include "access/visibilitymap.h"
92 : : #include "access/xloginsert.h"
93 : : #include "access/xlogutils.h"
94 : : #include "miscadmin.h"
95 : : #include "port/pg_bitutils.h"
96 : : #include "storage/bufmgr.h"
97 : : #include "storage/smgr.h"
98 : : #include "utils/inval.h"
99 : : #include "utils/rel.h"
100 : :
101 : :
102 : : /*#define TRACE_VISIBILITYMAP */
103 : :
104 : : /*
105 : : * Size of the bitmap on each visibility map page, in bytes. There's no
106 : : * extra headers, so the whole page minus the standard page header is
107 : : * used for the bitmap.
108 : : */
109 : : #define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData))
110 : :
111 : : /* Number of heap blocks we can represent in one byte */
112 : : #define HEAPBLOCKS_PER_BYTE (BITS_PER_BYTE / BITS_PER_HEAPBLOCK)
113 : :
114 : : /* Number of heap blocks we can represent in one visibility map page. */
115 : : #define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE)
116 : :
117 : : /* Mapping from heap block number to the right bit in the visibility map */
118 : : #define HEAPBLK_TO_MAPBLOCK(x) ((x) / HEAPBLOCKS_PER_PAGE)
119 : : #define HEAPBLK_TO_MAPBLOCK_LIMIT(x) \
120 : : (((x) + HEAPBLOCKS_PER_PAGE - 1) / HEAPBLOCKS_PER_PAGE)
121 : : #define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
122 : : #define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK)
123 : :
124 : : /* Masks for counting subsets of bits in the visibility map. */
125 : : #define VISIBLE_MASK8 (0x55) /* The lower bit of each bit pair */
126 : : #define FROZEN_MASK8 (0xaa) /* The upper bit of each bit pair */
127 : :
128 : : /* prototypes for internal routines */
129 : : static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend);
130 : : static Buffer vm_extend(Relation rel, BlockNumber vm_nblocks);
131 : :
132 : :
133 : : /*
134 : : * visibilitymap_clear - clear specified bits for one page in visibility map
135 : : *
136 : : * You must pass a buffer containing the correct map page to this function.
137 : : * Call visibilitymap_pin first to pin the right one. This function doesn't do
138 : : * any I/O. Returns true if any bits have been cleared and false otherwise.
139 : : */
140 : : bool
1273 pg@bowt.ie 141 :CBC 19322 : visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
142 : : {
6311 heikki.linnakangas@i 143 : 19322 : BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
144 : 19322 : int mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
3572 rhaas@postgresql.org 145 : 19322 : int mapOffset = HEAPBLK_TO_OFFSET(heapBlk);
3527 andres@anarazel.de 146 : 19322 : uint8 mask = flags << mapOffset;
147 : : char *map;
148 : 19322 : bool cleared = false;
149 : :
150 : : /* Must never clear all_visible bit while leaving all_frozen bit set */
151 [ - + ]: 19322 : Assert(flags & VISIBILITYMAP_VALID_BITS);
1154 pg@bowt.ie 152 [ - + ]: 19322 : Assert(flags != VISIBILITYMAP_ALL_VISIBLE);
153 : :
154 : : #ifdef TRACE_VISIBILITYMAP
155 : : elog(DEBUG1, "vm_clear %s %d", RelationGetRelationName(rel), heapBlk);
156 : : #endif
157 : :
1273 158 [ + - - + ]: 19322 : if (!BufferIsValid(vmbuf) || BufferGetBlockNumber(vmbuf) != mapBlock)
5381 rhaas@postgresql.org 159 [ # # ]:UBC 0 : elog(ERROR, "wrong buffer passed to visibilitymap_clear");
160 : :
1273 pg@bowt.ie 161 :CBC 19322 : LockBuffer(vmbuf, BUFFER_LOCK_EXCLUSIVE);
162 : 19322 : map = PageGetContents(BufferGetPage(vmbuf));
163 : :
6311 heikki.linnakangas@i 164 [ + + ]: 19322 : if (map[mapByte] & mask)
165 : : {
166 : 17014 : map[mapByte] &= ~mask;
167 : :
1273 pg@bowt.ie 168 : 17014 : MarkBufferDirty(vmbuf);
3527 andres@anarazel.de 169 : 17014 : cleared = true;
170 : : }
171 : :
1273 pg@bowt.ie 172 : 19322 : LockBuffer(vmbuf, BUFFER_LOCK_UNLOCK);
173 : :
3527 andres@anarazel.de 174 : 19322 : return cleared;
175 : : }
176 : :
177 : : /*
178 : : * visibilitymap_pin - pin a map page for setting a bit
179 : : *
180 : : * Setting a bit in the visibility map is a two-phase operation. First, call
181 : : * visibilitymap_pin, to pin the visibility map page containing the bit for
182 : : * the heap page. Because that can require I/O to read the map page, you
183 : : * shouldn't hold a lock on the heap page while doing that. Then, call
184 : : * visibilitymap_set to actually set the bit.
185 : : *
186 : : * On entry, *vmbuf should be InvalidBuffer or a valid buffer returned by
187 : : * an earlier call to visibilitymap_pin or visibilitymap_get_status on the same
188 : : * relation. On return, *vmbuf is a valid buffer with the map page containing
189 : : * the bit for heapBlk.
190 : : *
191 : : * If the page doesn't exist in the map file yet, it is extended.
192 : : */
193 : : void
1273 pg@bowt.ie 194 : 514326 : visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
195 : : {
6311 heikki.linnakangas@i 196 : 514326 : BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
197 : :
198 : : /* Reuse the old pinned buffer if possible */
1273 pg@bowt.ie 199 [ + + ]: 514326 : if (BufferIsValid(*vmbuf))
200 : : {
201 [ + - ]: 89259 : if (BufferGetBlockNumber(*vmbuf) == mapBlock)
6311 heikki.linnakangas@i 202 : 89259 : return;
203 : :
1273 pg@bowt.ie 204 :UBC 0 : ReleaseBuffer(*vmbuf);
205 : : }
1273 pg@bowt.ie 206 :CBC 425067 : *vmbuf = vm_readbuf(rel, mapBlock, true);
207 : : }
208 : :
209 : : /*
210 : : * visibilitymap_pin_ok - do we already have the correct page pinned?
211 : : *
212 : : * On entry, vmbuf should be InvalidBuffer or a valid buffer returned by
213 : : * an earlier call to visibilitymap_pin or visibilitymap_get_status on the same
214 : : * relation. The return value indicates whether the buffer covers the
215 : : * given heapBlk.
216 : : */
217 : : bool
218 : 15412 : visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf)
219 : : {
5381 rhaas@postgresql.org 220 : 15412 : BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
221 : :
1273 pg@bowt.ie 222 [ + + + - ]: 15412 : return BufferIsValid(vmbuf) && BufferGetBlockNumber(vmbuf) == mapBlock;
223 : : }
224 : :
225 : : /*
226 : : * visibilitymap_set - set bit(s) on a previously pinned page
227 : : *
228 : : * recptr is the LSN of the XLOG record we're replaying, if we're in recovery,
229 : : * or InvalidXLogRecPtr in normal running. The VM page LSN is advanced to the
230 : : * one provided; in normal running, we generate a new XLOG record and set the
231 : : * page LSN to that value (though the heap page's LSN may *not* be updated;
232 : : * see below). cutoff_xid is the largest xmin on the page being marked
233 : : * all-visible; it is needed for Hot Standby, and can be InvalidTransactionId
234 : : * if the page contains no tuples. It can also be set to InvalidTransactionId
235 : : * when a page that is already all-visible is being marked all-frozen.
236 : : *
237 : : * Caller is expected to set the heap page's PD_ALL_VISIBLE bit before calling
238 : : * this function. Except in recovery, caller should also pass the heap
239 : : * buffer. When checksums are enabled and we're not in recovery, we must add
240 : : * the heap buffer to the WAL chain to protect it from being torn.
241 : : *
242 : : * You must pass a buffer containing the correct map page to this function.
243 : : * Call visibilitymap_pin first to pin the right one. This function doesn't do
244 : : * any I/O.
245 : : */
246 : : void
4741 simon@2ndQuadrant.co 247 : 46473 : visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
248 : : XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid,
249 : : uint8 flags)
250 : : {
6311 heikki.linnakangas@i 251 : 46473 : BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
252 : 46473 : uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
3572 rhaas@postgresql.org 253 : 46473 : uint8 mapOffset = HEAPBLK_TO_OFFSET(heapBlk);
254 : : Page page;
255 : : uint8 *map;
256 : : uint8 status;
257 : :
258 : : #ifdef TRACE_VISIBILITYMAP
259 : : elog(DEBUG1, "vm_set flags 0x%02X for %s %d",
260 : : flags, RelationGetRelationName(rel), heapBlk);
261 : : #endif
262 : :
129 alvherre@kurilemu.de 263 [ + + - + ]:GNC 46473 : Assert(InRecovery || !XLogRecPtrIsValid(recptr));
198 peter@eisentraut.org 264 [ + + - + ]: 46473 : Assert(InRecovery || PageIsAllVisible(BufferGetPage(heapBuf)));
1079 andres@anarazel.de 265 [ - + ]:CBC 46473 : Assert((flags & VISIBILITYMAP_VALID_BITS) == flags);
266 : :
267 : : /* Must never set all_frozen bit without also setting all_visible bit */
1154 pg@bowt.ie 268 [ - + ]: 46473 : Assert(flags != VISIBILITYMAP_ALL_FROZEN);
269 : :
270 : : /* Check that we have the right heap page pinned, if present */
4741 simon@2ndQuadrant.co 271 [ + + - + ]: 46473 : if (BufferIsValid(heapBuf) && BufferGetBlockNumber(heapBuf) != heapBlk)
4741 simon@2ndQuadrant.co 272 [ # # ]:UBC 0 : elog(ERROR, "wrong heap buffer passed to visibilitymap_set");
273 : :
158 andres@anarazel.de 274 [ + + - + ]:GNC 46473 : Assert(!BufferIsValid(heapBuf) ||
275 : : BufferIsLockedByMeInMode(heapBuf, BUFFER_LOCK_EXCLUSIVE));
276 : :
277 : : /* Check that we have the right VM page pinned */
4741 simon@2ndQuadrant.co 278 [ + - - + ]:CBC 46473 : if (!BufferIsValid(vmBuf) || BufferGetBlockNumber(vmBuf) != mapBlock)
4741 simon@2ndQuadrant.co 279 [ # # ]:UBC 0 : elog(ERROR, "wrong VM buffer passed to visibilitymap_set");
280 : :
3616 kgrittn@postgresql.o 281 :CBC 46473 : page = BufferGetPage(vmBuf);
3566 rhaas@postgresql.org 282 : 46473 : map = (uint8 *) PageGetContents(page);
4741 simon@2ndQuadrant.co 283 : 46473 : LockBuffer(vmBuf, BUFFER_LOCK_EXCLUSIVE);
284 : :
453 melanieplageman@gmai 285 : 46473 : status = (map[mapByte] >> mapOffset) & VISIBILITYMAP_VALID_BITS;
286 [ + - ]: 46473 : if (flags != status)
287 : : {
5381 rhaas@postgresql.org 288 : 46473 : START_CRIT_SECTION();
289 : :
3572 290 : 46473 : map[mapByte] |= (flags << mapOffset);
4741 simon@2ndQuadrant.co 291 : 46473 : MarkBufferDirty(vmBuf);
292 : :
5381 rhaas@postgresql.org 293 [ + + + + : 46473 : if (RelationNeedsWAL(rel))
+ - + - ]
294 : : {
129 alvherre@kurilemu.de 295 [ + + ]:GNC 44914 : if (!XLogRecPtrIsValid(recptr))
296 : : {
4741 simon@2ndQuadrant.co 297 [ - + ]:CBC 40364 : Assert(!InRecovery);
1079 andres@anarazel.de 298 : 40364 : recptr = log_heap_visible(rel, heapBuf, vmBuf, cutoff_xid, flags);
299 : :
300 : : /*
301 : : * If data checksums are enabled (or wal_log_hints=on), we
302 : : * need to protect the heap page from being torn.
303 : : *
304 : : * If not, then we must *not* update the heap page's LSN. In
305 : : * this case, the FPI for the heap page was omitted from the
306 : : * WAL record inserted above, so it would be incorrect to
307 : : * update the heap page's LSN.
308 : : */
4475 heikki.linnakangas@i 309 [ + + - + ]: 40364 : if (XLogHintBitIsNeeded())
310 : : {
3616 kgrittn@postgresql.o 311 : 37149 : Page heapPage = BufferGetPage(heapBuf);
312 : :
4741 simon@2ndQuadrant.co 313 : 37149 : PageSetLSN(heapPage, recptr);
314 : : }
315 : : }
6311 heikki.linnakangas@i 316 : 44914 : PageSetLSN(page, recptr);
317 : : }
318 : :
5381 rhaas@postgresql.org 319 [ - + ]: 46473 : END_CRIT_SECTION();
320 : : }
321 : :
4741 simon@2ndQuadrant.co 322 : 46473 : LockBuffer(vmBuf, BUFFER_LOCK_UNLOCK);
6311 heikki.linnakangas@i 323 :GNC 46473 : }
324 : :
325 : : /*
326 : : * Set VM (visibility map) flags in the VM block in vmBuf.
327 : : *
328 : : * This function is intended for callers that log VM changes together
329 : : * with the heap page modifications that rendered the page all-visible.
330 : : * Callers that log VM changes separately should use visibilitymap_set().
331 : : *
332 : : * vmBuf must be pinned and exclusively locked, and it must cover the VM bits
333 : : * corresponding to heapBlk.
334 : : *
335 : : * In normal operation (not recovery), this must be called inside a critical
336 : : * section that also applies the necessary heap page changes and, if
337 : : * applicable, emits WAL.
338 : : *
339 : : * The caller is responsible for ensuring consistency between the heap page
340 : : * and the VM page by holding a pin and exclusive lock on the buffer
341 : : * containing heapBlk.
342 : : *
343 : : * rlocator is used only for debugging messages.
344 : : */
345 : : void
157 melanieplageman@gmai 346 : 19918 : visibilitymap_set_vmbits(BlockNumber heapBlk,
347 : : Buffer vmBuf, uint8 flags,
348 : : const RelFileLocator rlocator)
349 : : {
350 : 19918 : BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
351 : 19918 : uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
352 : 19918 : uint8 mapOffset = HEAPBLK_TO_OFFSET(heapBlk);
353 : : Page page;
354 : : uint8 *map;
355 : : uint8 status;
356 : :
357 : : #ifdef TRACE_VISIBILITYMAP
358 : : elog(DEBUG1, "vm_set flags 0x%02X for %s %d",
359 : : flags,
360 : : relpathbackend(rlocator, MyProcNumber, MAIN_FORKNUM).str,
361 : : heapBlk);
362 : : #endif
363 : :
364 : : /* Call in same critical section where WAL is emitted. */
365 [ + + - + ]: 19918 : Assert(InRecovery || CritSectionCount > 0);
366 : :
367 : : /* Flags should be valid. Also never clear bits with this function */
368 [ - + ]: 19918 : Assert((flags & VISIBILITYMAP_VALID_BITS) == flags);
369 : :
370 : : /* Must never set all_frozen bit without also setting all_visible bit */
371 [ - + ]: 19918 : Assert(flags != VISIBILITYMAP_ALL_FROZEN);
372 : :
373 : : /* Check that we have the right VM page pinned */
374 [ + - - + ]: 19918 : if (!BufferIsValid(vmBuf) || BufferGetBlockNumber(vmBuf) != mapBlock)
157 melanieplageman@gmai 375 [ # # ]:UNC 0 : elog(ERROR, "wrong VM buffer passed to visibilitymap_set");
376 : :
157 melanieplageman@gmai 377 [ - + ]:GNC 19918 : Assert(BufferIsLockedByMeInMode(vmBuf, BUFFER_LOCK_EXCLUSIVE));
378 : :
379 : 19918 : page = BufferGetPage(vmBuf);
380 : 19918 : map = (uint8 *) PageGetContents(page);
381 : :
382 : 19918 : status = (map[mapByte] >> mapOffset) & VISIBILITYMAP_VALID_BITS;
383 [ + - ]: 19918 : if (flags != status)
384 : : {
385 : 19918 : map[mapByte] |= (flags << mapOffset);
386 : 19918 : MarkBufferDirty(vmBuf);
387 : : }
157 melanieplageman@gmai 388 :GIC 19918 : }
389 : :
390 : : /*
391 : : * visibilitymap_get_status - get status of bits
392 : : *
393 : : * Are all tuples on heapBlk visible to all or are marked frozen, according
394 : : * to the visibility map?
395 : : *
396 : : * On entry, *vmbuf should be InvalidBuffer or a valid buffer returned by an
397 : : * earlier call to visibilitymap_pin or visibilitymap_get_status on the same
398 : : * relation. On return, *vmbuf is a valid buffer with the map page containing
399 : : * the bit for heapBlk, or InvalidBuffer. The caller is responsible for
400 : : * releasing *vmbuf after it's done testing and setting bits.
401 : : *
402 : : * NOTE: This function is typically called without a lock on the heap page,
403 : : * so somebody else could change the bit just after we look at it. In fact,
404 : : * since we don't lock the visibility map page either, it's even possible that
405 : : * someone else could have changed the bit just before we look at it, but yet
406 : : * we might see the old value. It is the caller's responsibility to deal with
407 : : * all concurrency issues!
408 : : */
409 : : uint8
1273 pg@bowt.ie 410 :CBC 3171185 : visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
411 : : {
6311 heikki.linnakangas@i 412 : 3171185 : BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
413 : 3171185 : uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
3572 rhaas@postgresql.org 414 : 3171185 : uint8 mapOffset = HEAPBLK_TO_OFFSET(heapBlk);
415 : : char *map;
416 : : uint8 result;
417 : :
418 : : #ifdef TRACE_VISIBILITYMAP
419 : : elog(DEBUG1, "vm_get_status %s %d", RelationGetRelationName(rel), heapBlk);
420 : : #endif
421 : :
422 : : /* Reuse the old pinned buffer if possible */
1273 pg@bowt.ie 423 [ + + ]: 3171185 : if (BufferIsValid(*vmbuf))
424 : : {
425 [ - + ]: 2332494 : if (BufferGetBlockNumber(*vmbuf) != mapBlock)
426 : : {
1273 pg@bowt.ie 427 :UBC 0 : ReleaseBuffer(*vmbuf);
428 : 0 : *vmbuf = InvalidBuffer;
429 : : }
430 : : }
431 : :
1273 pg@bowt.ie 432 [ + + ]:CBC 3171185 : if (!BufferIsValid(*vmbuf))
433 : : {
434 : 838691 : *vmbuf = vm_readbuf(rel, mapBlock, false);
435 [ + + ]: 838691 : if (!BufferIsValid(*vmbuf))
234 nathan@postgresql.or 436 :GNC 766348 : return (uint8) 0;
437 : : }
438 : :
1273 pg@bowt.ie 439 :CBC 2404837 : map = PageGetContents(BufferGetPage(*vmbuf));
440 : :
441 : : /*
442 : : * A single byte read is atomic. There could be memory-ordering effects
443 : : * here, but for performance reasons we make it the caller's job to worry
444 : : * about that.
445 : : */
3572 rhaas@postgresql.org 446 : 2404837 : result = ((map[mapByte] >> mapOffset) & VISIBILITYMAP_VALID_BITS);
447 : 2404837 : return result;
448 : : }
449 : :
450 : : /*
451 : : * visibilitymap_count - count number of bits set in visibility map
452 : : *
453 : : * Note: we ignore the possibility of race conditions when the table is being
454 : : * extended concurrently with the call. New pages added to the table aren't
455 : : * going to be marked all-visible or all-frozen, so they won't affect the result.
456 : : */
457 : : void
3666 458 : 28492 : visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
459 : : {
460 : : BlockNumber mapBlock;
2585 tgl@sss.pgh.pa.us 461 : 28492 : BlockNumber nvisible = 0;
462 : 28492 : BlockNumber nfrozen = 0;
463 : :
464 : : /* all_visible must be specified */
3666 rhaas@postgresql.org 465 [ - + ]: 28492 : Assert(all_visible);
466 : :
5026 bruce@momjian.us 467 : 28492 : for (mapBlock = 0;; mapBlock++)
5266 tgl@sss.pgh.pa.us 468 : 8501 : {
469 : : Buffer mapBuffer;
470 : : uint64 *map;
471 : :
472 : : /*
473 : : * Read till we fall off the end of the map. We assume that any extra
474 : : * bytes in the last page are zeroed, so we don't bother excluding
475 : : * them from the count.
476 : : */
477 : 36993 : mapBuffer = vm_readbuf(rel, mapBlock, false);
478 [ + + ]: 36993 : if (!BufferIsValid(mapBuffer))
479 : 28492 : break;
480 : :
481 : : /*
482 : : * We choose not to lock the page, since the result is going to be
483 : : * immediately stale anyway if anyone is concurrently setting or
484 : : * clearing bits, and we only really need an approximate value.
485 : : */
2585 486 : 8501 : map = (uint64 *) PageGetContents(BufferGetPage(mapBuffer));
487 : :
708 nathan@postgresql.or 488 : 8501 : nvisible += pg_popcount_masked((const char *) map, MAPSIZE, VISIBLE_MASK8);
489 [ + - ]: 8501 : if (all_frozen)
490 : 8501 : nfrozen += pg_popcount_masked((const char *) map, MAPSIZE, FROZEN_MASK8);
491 : :
5266 tgl@sss.pgh.pa.us 492 : 8501 : ReleaseBuffer(mapBuffer);
493 : : }
494 : :
2585 495 : 28492 : *all_visible = nvisible;
496 [ + - ]: 28492 : if (all_frozen)
497 : 28492 : *all_frozen = nfrozen;
5266 498 : 28492 : }
499 : :
500 : : /*
501 : : * visibilitymap_prepare_truncate -
502 : : * prepare for truncation of the visibility map
503 : : *
504 : : * nheapblocks is the new size of the heap.
505 : : *
506 : : * Return the number of blocks of new visibility map.
507 : : * If it's InvalidBlockNumber, there is nothing to truncate;
508 : : * otherwise the caller is responsible for calling smgrtruncate()
509 : : * to truncate the visibility map pages.
510 : : */
511 : : BlockNumber
2364 fujii@postgresql.org 512 : 202 : visibilitymap_prepare_truncate(Relation rel, BlockNumber nheapblocks)
513 : : {
514 : : BlockNumber newnblocks;
515 : :
516 : : /* last remaining block, byte, and bit */
6311 heikki.linnakangas@i 517 : 202 : BlockNumber truncBlock = HEAPBLK_TO_MAPBLOCK(nheapblocks);
6121 bruce@momjian.us 518 : 202 : uint32 truncByte = HEAPBLK_TO_MAPBYTE(nheapblocks);
3572 rhaas@postgresql.org 519 : 202 : uint8 truncOffset = HEAPBLK_TO_OFFSET(nheapblocks);
520 : :
521 : : #ifdef TRACE_VISIBILITYMAP
522 : : elog(DEBUG1, "vm_truncate %s %d", RelationGetRelationName(rel), nheapblocks);
523 : : #endif
524 : :
525 : : /*
526 : : * If no visibility map has been created yet for this relation, there's
527 : : * nothing to truncate.
528 : : */
1707 tgl@sss.pgh.pa.us 529 [ - + ]: 202 : if (!smgrexists(RelationGetSmgr(rel), VISIBILITYMAP_FORKNUM))
2364 fujii@postgresql.org 530 :UBC 0 : return InvalidBlockNumber;
531 : :
532 : : /*
533 : : * Unless the new size is exactly at a visibility map page boundary, the
534 : : * tail bits in the last remaining map page, representing truncated heap
535 : : * blocks, need to be cleared. This is not only tidy, but also necessary
536 : : * because we don't get a chance to clear the bits if the heap is extended
537 : : * again.
538 : : */
3572 rhaas@postgresql.org 539 [ + + + + ]:CBC 202 : if (truncByte != 0 || truncOffset != 0)
6311 heikki.linnakangas@i 540 : 126 : {
541 : : Buffer mapBuffer;
542 : : Page page;
543 : : char *map;
544 : :
545 : 126 : newnblocks = truncBlock + 1;
546 : :
547 : 126 : mapBuffer = vm_readbuf(rel, truncBlock, false);
548 [ - + ]: 126 : if (!BufferIsValid(mapBuffer))
549 : : {
550 : : /* nothing to do, the file was already smaller */
2364 fujii@postgresql.org 551 :UBC 0 : return InvalidBlockNumber;
552 : : }
553 : :
3616 kgrittn@postgresql.o 554 :CBC 126 : page = BufferGetPage(mapBuffer);
6311 heikki.linnakangas@i 555 : 126 : map = PageGetContents(page);
556 : :
557 : 126 : LockBuffer(mapBuffer, BUFFER_LOCK_EXCLUSIVE);
558 : :
559 : : /* NO EREPORT(ERROR) from here till changes are logged */
3434 560 : 126 : START_CRIT_SECTION();
561 : :
562 : : /* Clear out the unwanted bytes. */
6311 563 [ + + + - : 126 : MemSet(&map[truncByte + 1], 0, MAPSIZE - (truncByte + 1));
+ - - + -
- ]
564 : :
565 : : /*----
566 : : * Mask out the unwanted bits of the last remaining byte.
567 : : *
568 : : * ((1 << 0) - 1) = 00000000
569 : : * ((1 << 1) - 1) = 00000001
570 : : * ...
571 : : * ((1 << 6) - 1) = 00111111
572 : : * ((1 << 7) - 1) = 01111111
573 : : *----
574 : : */
3572 rhaas@postgresql.org 575 : 126 : map[truncByte] &= (1 << truncOffset) - 1;
576 : :
577 : : /*
578 : : * Truncation of a relation is WAL-logged at a higher-level, and we
579 : : * will be called at WAL replay. But if checksums are enabled, we need
580 : : * to still write a WAL record to protect against a torn page, if the
581 : : * page is flushed to disk before the truncation WAL record. We cannot
582 : : * use MarkBufferDirtyHint here, because that will not dirty the page
583 : : * during recovery.
584 : : */
6311 heikki.linnakangas@i 585 : 126 : MarkBufferDirty(mapBuffer);
3434 586 [ + + + + : 126 : if (!InRecovery && RelationNeedsWAL(rel) && XLogHintBitIsNeeded())
+ + + - +
- - + -
- ]
587 : 103 : log_newpage_buffer(mapBuffer, false);
588 : :
589 [ - + ]: 126 : END_CRIT_SECTION();
590 : :
6311 591 : 126 : UnlockReleaseBuffer(mapBuffer);
592 : : }
593 : : else
594 : 76 : newnblocks = truncBlock;
595 : :
1707 tgl@sss.pgh.pa.us 596 [ + + ]: 202 : if (smgrnblocks(RelationGetSmgr(rel), VISIBILITYMAP_FORKNUM) <= newnblocks)
597 : : {
598 : : /* nothing to do, the file was already smaller than requested size */
2364 fujii@postgresql.org 599 : 126 : return InvalidBlockNumber;
600 : : }
601 : :
602 : 76 : return newnblocks;
603 : : }
604 : :
605 : : /*
606 : : * visibilitymap_truncation_length -
607 : : * compute truncation length for visibility map
608 : : *
609 : : * Given a proposed truncation length for the main fork, compute the
610 : : * correct truncation length for the visibility map. Should return the
611 : : * same answer as visibilitymap_prepare_truncate(), but without modifying
612 : : * anything.
613 : : */
614 : : BlockNumber
6 rhaas@postgresql.org 615 : 1 : visibilitymap_truncation_length(BlockNumber nheapblocks)
616 : : {
617 : 1 : return HEAPBLK_TO_MAPBLOCK_LIMIT(nheapblocks);
618 : : }
619 : :
620 : : /*
621 : : * Read a visibility map page.
622 : : *
623 : : * If the page doesn't exist, InvalidBuffer is returned, or if 'extend' is
624 : : * true, the visibility map file is extended.
625 : : */
626 : : static Buffer
6311 heikki.linnakangas@i 627 : 1300877 : vm_readbuf(Relation rel, BlockNumber blkno, bool extend)
628 : : {
629 : : Buffer buf;
630 : : SMgrRelation reln;
631 : :
632 : : /*
633 : : * Caution: re-using this smgr pointer could fail if the relcache entry
634 : : * gets closed. It's safe as long as we only do smgr-level operations
635 : : * between here and the last use of the pointer.
636 : : */
1707 tgl@sss.pgh.pa.us 637 : 1300877 : reln = RelationGetSmgr(rel);
638 : :
639 : : /*
640 : : * If we haven't cached the size of the visibility map fork yet, check it
641 : : * first.
642 : : */
643 [ + + ]: 1300877 : if (reln->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] == InvalidBlockNumber)
644 : : {
645 [ + + ]: 41922 : if (smgrexists(reln, VISIBILITYMAP_FORKNUM))
646 : 20095 : smgrnblocks(reln, VISIBILITYMAP_FORKNUM);
647 : : else
648 : 21827 : reln->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] = 0;
649 : : }
650 : :
651 : : /*
652 : : * For reading we use ZERO_ON_ERROR mode, and initialize the page if
653 : : * necessary. It's always safe to clear bits, so it's better to clear
654 : : * corrupt pages than error out.
655 : : *
656 : : * We use the same path below to initialize pages when extending the
657 : : * relation, as a concurrent extension can end up with vm_extend()
658 : : * returning an already-initialized page.
659 : : */
660 [ + + ]: 1300877 : if (blkno >= reln->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM])
661 : : {
6311 heikki.linnakangas@i 662 [ + + ]: 797934 : if (extend)
1075 andres@anarazel.de 663 : 3094 : buf = vm_extend(rel, blkno + 1);
664 : : else
6311 heikki.linnakangas@i 665 : 794840 : return InvalidBuffer;
666 : : }
667 : : else
1075 andres@anarazel.de 668 : 502943 : buf = ReadBufferExtended(rel, VISIBILITYMAP_FORKNUM, blkno,
669 : : RBM_ZERO_ON_ERROR, NULL);
670 : :
671 : : /*
672 : : * Initializing the page when needed is trickier than it looks, because of
673 : : * the possibility of multiple backends doing this concurrently, and our
674 : : * desire to not uselessly take the buffer lock in the normal path where
675 : : * the page is OK. We must take the lock to initialize the page, so
676 : : * recheck page newness after we have the lock, in case someone else
677 : : * already did it. Also, because we initially check PageIsNew with no
678 : : * lock, it's possible to fall through and return the buffer while someone
679 : : * else is still initializing the page (i.e., we might see pd_upper as set
680 : : * but other page header fields are still zeroes). This is harmless for
681 : : * callers that will take a buffer lock themselves, but some callers
682 : : * inspect the page without any lock at all. The latter is OK only so
683 : : * long as it doesn't depend on the page header having correct contents.
684 : : * Current usage is safe because PageGetContents() does not require that.
685 : : */
3616 kgrittn@postgresql.o 686 [ + + ]: 506037 : if (PageIsNew(BufferGetPage(buf)))
687 : : {
2802 tgl@sss.pgh.pa.us 688 : 3157 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
689 [ + - ]: 3157 : if (PageIsNew(BufferGetPage(buf)))
690 : 3157 : PageInit(BufferGetPage(buf), BLCKSZ, 0);
691 : 3157 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
692 : : }
6311 heikki.linnakangas@i 693 : 506037 : return buf;
694 : : }
695 : :
696 : : /*
697 : : * Ensure that the visibility map fork is at least vm_nblocks long, extending
698 : : * it if necessary with zeroed pages.
699 : : */
700 : : static Buffer
701 : 3094 : vm_extend(Relation rel, BlockNumber vm_nblocks)
702 : : {
703 : : Buffer buf;
704 : :
935 tmunro@postgresql.or 705 : 3094 : buf = ExtendBufferedRelTo(BMR_REL(rel), VISIBILITYMAP_FORKNUM, NULL,
706 : : EB_CREATE_FORK_IF_NEEDED |
707 : : EB_CLEAR_SIZE_CACHE,
708 : : vm_nblocks,
709 : : RBM_ZERO_ON_ERROR);
710 : :
711 : : /*
712 : : * Send a shared-inval message to force other backends to close any smgr
713 : : * references they may have for this rel, which we are about to change.
714 : : * This is a useful optimization because it means that backends don't have
715 : : * to keep checking for creation or extension of the file, which happens
716 : : * infrequently.
717 : : */
1075 andres@anarazel.de 718 : 3094 : CacheInvalidateSmgr(RelationGetSmgr(rel)->smgr_rlocator);
719 : :
720 : 3094 : return buf;
721 : : }
|