Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * visibilitymap.c
4 : : * bitmap for tracking visibility of heap tuples
5 : : *
6 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/access/heap/visibilitymap.c
12 : : *
13 : : * INTERFACE ROUTINES
14 : : * visibilitymap_clear - clear bits for one page in the visibility map
15 : : * visibilitymap_pin - pin a map page for setting a bit
16 : : * visibilitymap_pin_ok - check whether correct map page is already pinned
17 : : * visibilitymap_set - set bit(s) in a previously pinned page
18 : : * visibilitymap_get_status - get status of bits
19 : : * visibilitymap_count - count number of bits set in visibility map
20 : : * visibilitymap_prepare_truncate -
21 : : * prepare for truncation of the visibility map
22 : : *
23 : : * NOTES
24 : : *
25 : : * The visibility map is a bitmap with two bits (all-visible and all-frozen)
26 : : * per heap page. A set all-visible bit means that all tuples on the page are
27 : : * known visible to all transactions, and therefore the page doesn't need to
28 : : * be vacuumed. A set all-frozen bit means that all tuples on the page are
29 : : * completely frozen, and therefore the page doesn't need to be vacuumed even
30 : : * if whole table scanning vacuum is required (e.g. anti-wraparound vacuum).
31 : : * The all-frozen bit must be set only when the page is already all-visible.
32 : : *
33 : : * The map is conservative in the sense that we make sure that whenever a bit
34 : : * is set, we know the condition is true, but if a bit is not set, it might or
35 : : * might not be true.
36 : : *
37 : : * Changes to the visibility map bits are not separately WAL-logged. Callers
38 : : * must make sure that whenever a visibility map bit is cleared, the bit is
39 : : * cleared on WAL replay of the updating operation. And whenever a visibility
40 : : * map bit is set, the bit is set on WAL replay of the operation that rendered
41 : : * the page all-visible/all-frozen.
42 : : *
43 : : * The visibility map bits operate as a hint in one direction: if they are
44 : : * clear, it may still be the case that every tuple on the page is visible to
45 : : * all transactions (we just don't know that for certain). However, if they
46 : : * are set, we may skip vacuuming pages and advance relfrozenxid or skip
47 : : * reading heap pages for an index-only scan. If they are incorrectly set,
48 : : * this can lead to data corruption and wrong results.
49 : : *
50 : : * Additionally, it is critical that the heap-page level PD_ALL_VISIBLE bit be
51 : : * correctly set and cleared along with the VM bits.
52 : : *
53 : : * When clearing the VM, if a crash occurs after the heap page makes it to
54 : : * disk but before the VM page makes it to disk, replay must clear the VM or
55 : : * the next index-only scan can return wrong results or vacuum may incorrectly
56 : : * advance relfrozenxid.
57 : : *
58 : : * When setting the VM, if a crash occurs after the visibility map page makes
59 : : * it to disk and before the updated heap page makes it to disk, redo must set
60 : : * the bit on the heap page. Otherwise, the next insert, update, or delete on
61 : : * the heap page will fail to realize that the visibility map bit must be
62 : : * cleared, possibly causing index-only scans to return wrong answers.
63 : : *
64 : : * VACUUM will normally skip pages for which the visibility map bit is set;
65 : : * such pages can't contain any dead tuples and therefore don't need vacuuming.
66 : : *
67 : : * LOCKING
68 : : *
69 : : * In heapam.c, whenever a page is modified so that not all tuples on the
70 : : * page are visible to everyone anymore, the corresponding bit in the
71 : : * visibility map is cleared. In order to be crash-safe, we need to do this
72 : : * while still holding a lock on the heap page and in the same critical
73 : : * section that logs the page modification. However, we don't want to hold
74 : : * the buffer lock over any I/O that may be required to read in the visibility
75 : : * map page. To avoid this, we examine the heap page before locking it;
76 : : * if the page-level PD_ALL_VISIBLE bit is set, we pin the visibility map
77 : : * bit. Then, we lock the buffer. But this creates a race condition: there
78 : : * is a possibility that in the time it takes to lock the buffer, the
79 : : * PD_ALL_VISIBLE bit gets set. If that happens, we have to unlock the
80 : : * buffer, pin the visibility map page, and relock the buffer. This shouldn't
81 : : * happen often, because only VACUUM currently sets visibility map bits,
82 : : * and the race will only occur if VACUUM processes a given page at almost
83 : : * exactly the same time that someone tries to further modify it.
84 : : *
85 : : * To set a bit, you need to hold a lock on the heap page. That prevents
86 : : * the race condition where VACUUM sees that all tuples on the page are
87 : : * visible to everyone, but another backend modifies the page before VACUUM
88 : : * sets the bit in the visibility map.
89 : : *
90 : : * When a bit is set, the LSN of the visibility map page is updated to make
91 : : * sure that the visibility map update doesn't get written to disk before the
92 : : * WAL record of the changes that made it possible to set the bit is flushed.
93 : : * But when a bit is cleared, we don't have to do that because it's always
94 : : * safe to clear a bit in the map from correctness point of view.
95 : : *
96 : : *-------------------------------------------------------------------------
97 : : */
98 : : #include "postgres.h"
99 : :
100 : : #include "access/heapam_xlog.h"
101 : : #include "access/visibilitymap.h"
102 : : #include "access/xloginsert.h"
103 : : #include "access/xlogutils.h"
104 : : #include "miscadmin.h"
105 : : #include "port/pg_bitutils.h"
106 : : #include "storage/bufmgr.h"
107 : : #include "storage/smgr.h"
108 : : #include "utils/inval.h"
109 : : #include "utils/rel.h"
110 : :
111 : :
112 : : /*#define TRACE_VISIBILITYMAP */
113 : :
114 : : /*
115 : : * Size of the bitmap on each visibility map page, in bytes. There's no
116 : : * extra headers, so the whole page minus the standard page header is
117 : : * used for the bitmap.
118 : : */
119 : : #define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData))
120 : :
121 : : /* Number of heap blocks we can represent in one byte */
122 : : #define HEAPBLOCKS_PER_BYTE (BITS_PER_BYTE / BITS_PER_HEAPBLOCK)
123 : :
124 : : /* Number of heap blocks we can represent in one visibility map page. */
125 : : #define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE)
126 : :
127 : : /* Mapping from heap block number to the right bit in the visibility map */
128 : : #define HEAPBLK_TO_MAPBLOCK(x) ((x) / HEAPBLOCKS_PER_PAGE)
129 : : #define HEAPBLK_TO_MAPBLOCK_LIMIT(x) \
130 : : (((x) + HEAPBLOCKS_PER_PAGE - 1) / HEAPBLOCKS_PER_PAGE)
131 : : #define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
132 : : #define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK)
133 : :
134 : : /* Masks for counting subsets of bits in the visibility map. */
135 : : #define VISIBLE_MASK8 (0x55) /* The lower bit of each bit pair */
136 : : #define FROZEN_MASK8 (0xaa) /* The upper bit of each bit pair */
137 : :
138 : : /* prototypes for internal routines */
139 : : static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend);
140 : : static Buffer vm_extend(Relation rel, BlockNumber vm_nblocks);
141 : :
142 : :
143 : : /*
144 : : * visibilitymap_clear - clear specified bits for one page in visibility map
145 : : *
146 : : * You must pass a buffer containing the correct map page to this function.
147 : : * Call visibilitymap_pin first to pin the right one. This function doesn't do
148 : : * any I/O. Returns true if any bits have been cleared and false otherwise.
149 : : */
150 : : bool
1324 pg@bowt.ie 151 :CBC 28128 : visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
152 : : {
6362 heikki.linnakangas@i 153 : 28128 : BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
154 : 28128 : int mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
3623 rhaas@postgresql.org 155 : 28128 : int mapOffset = HEAPBLK_TO_OFFSET(heapBlk);
3578 andres@anarazel.de 156 : 28128 : uint8 mask = flags << mapOffset;
157 : : char *map;
158 : 28128 : bool cleared = false;
159 : :
160 : : /* Must never clear all_visible bit while leaving all_frozen bit set */
161 [ - + ]: 28128 : Assert(flags & VISIBILITYMAP_VALID_BITS);
1205 pg@bowt.ie 162 [ - + ]: 28128 : Assert(flags != VISIBILITYMAP_ALL_VISIBLE);
163 : :
164 : : #ifdef TRACE_VISIBILITYMAP
165 : : elog(DEBUG1, "vm_clear %s %d", RelationGetRelationName(rel), heapBlk);
166 : : #endif
167 : :
1324 168 [ + - - + ]: 28128 : if (!BufferIsValid(vmbuf) || BufferGetBlockNumber(vmbuf) != mapBlock)
5432 rhaas@postgresql.org 169 [ # # ]:UBC 0 : elog(ERROR, "wrong buffer passed to visibilitymap_clear");
170 : :
1324 pg@bowt.ie 171 :CBC 28128 : LockBuffer(vmbuf, BUFFER_LOCK_EXCLUSIVE);
172 : 28128 : map = PageGetContents(BufferGetPage(vmbuf));
173 : :
6362 heikki.linnakangas@i 174 [ + + ]: 28128 : if (map[mapByte] & mask)
175 : : {
176 : 23797 : map[mapByte] &= ~mask;
177 : :
1324 pg@bowt.ie 178 : 23797 : MarkBufferDirty(vmbuf);
3578 andres@anarazel.de 179 : 23797 : cleared = true;
180 : : }
181 : :
1324 pg@bowt.ie 182 : 28128 : LockBuffer(vmbuf, BUFFER_LOCK_UNLOCK);
183 : :
3578 andres@anarazel.de 184 : 28128 : return cleared;
185 : : }
186 : :
187 : : /*
188 : : * visibilitymap_pin - pin a map page for setting a bit
189 : : *
190 : : * Setting a bit in the visibility map is a two-phase operation. First, call
191 : : * visibilitymap_pin, to pin the visibility map page containing the bit for
192 : : * the heap page. Because that can require I/O to read the map page, you
193 : : * shouldn't hold a lock on the heap page while doing that. Then, call
194 : : * visibilitymap_set to actually set the bit.
195 : : *
196 : : * On entry, *vmbuf should be InvalidBuffer or a valid buffer returned by
197 : : * an earlier call to visibilitymap_pin or visibilitymap_get_status on the same
198 : : * relation. On return, *vmbuf is a valid buffer with the map page containing
199 : : * the bit for heapBlk.
200 : : *
201 : : * If the page doesn't exist in the map file yet, it is extended.
202 : : */
203 : : void
1324 pg@bowt.ie 204 : 682187 : visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
205 : : {
6362 heikki.linnakangas@i 206 : 682187 : BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
207 : :
208 : : /* Reuse the old pinned buffer if possible */
1324 pg@bowt.ie 209 [ + + ]: 682187 : if (BufferIsValid(*vmbuf))
210 : : {
211 [ + - ]: 180409 : if (BufferGetBlockNumber(*vmbuf) == mapBlock)
6362 heikki.linnakangas@i 212 : 180409 : return;
213 : :
1324 pg@bowt.ie 214 :UBC 0 : ReleaseBuffer(*vmbuf);
215 : : }
1324 pg@bowt.ie 216 :CBC 501778 : *vmbuf = vm_readbuf(rel, mapBlock, true);
217 : : }
218 : :
219 : : /*
220 : : * visibilitymap_pin_ok - do we already have the correct page pinned?
221 : : *
222 : : * On entry, vmbuf should be InvalidBuffer or a valid buffer returned by
223 : : * an earlier call to visibilitymap_pin or visibilitymap_get_status on the same
224 : : * relation. The return value indicates whether the buffer covers the
225 : : * given heapBlk.
226 : : */
227 : : bool
228 : 19830 : visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf)
229 : : {
5432 rhaas@postgresql.org 230 : 19830 : BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
231 : :
1324 pg@bowt.ie 232 [ + + + - ]: 19830 : return BufferIsValid(vmbuf) && BufferGetBlockNumber(vmbuf) == mapBlock;
233 : : }
234 : :
235 : : /*
236 : : * Set VM (visibility map) flags in the VM block in vmBuf.
237 : : *
238 : : * This function is intended for callers that log VM changes together
239 : : * with the heap page modifications that rendered the page all-visible.
240 : : *
241 : : * vmBuf must be pinned and exclusively locked, and it must cover the VM bits
242 : : * corresponding to heapBlk.
243 : : *
244 : : * In normal operation (not recovery), this must be called inside a critical
245 : : * section that also applies the necessary heap page changes and, if
246 : : * applicable, emits WAL.
247 : : *
248 : : * The caller is responsible for ensuring consistency between the heap page
249 : : * and the VM page by holding a pin and exclusive lock on the buffer
250 : : * containing heapBlk.
251 : : *
252 : : * rlocator is used only for debugging messages.
253 : : */
254 : : void
42 melanieplageman@gmai 255 :GNC 94723 : visibilitymap_set(BlockNumber heapBlk,
256 : : Buffer vmBuf, uint8 flags,
257 : : const RelFileLocator rlocator)
258 : : {
208 melanieplageman@gmai 259 :CBC 94723 : BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
260 : 94723 : uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
261 : 94723 : uint8 mapOffset = HEAPBLK_TO_OFFSET(heapBlk);
262 : : Page page;
263 : : uint8 *map;
264 : : uint8 status;
265 : :
266 : : #ifdef TRACE_VISIBILITYMAP
267 : : elog(DEBUG1, "vm_set flags 0x%02X for %s %d",
268 : : flags,
269 : : relpathbackend(rlocator, MyProcNumber, MAIN_FORKNUM).str,
270 : : heapBlk);
271 : : #endif
272 : :
273 : : /* Call in same critical section where WAL is emitted. */
208 melanieplageman@gmai 274 [ + + - + ]:GNC 94723 : Assert(InRecovery || CritSectionCount > 0);
275 : :
276 : : /* Flags should be valid. Also never clear bits with this function */
208 melanieplageman@gmai 277 [ - + ]:CBC 94723 : Assert((flags & VISIBILITYMAP_VALID_BITS) == flags);
278 : :
279 : : /* Must never set all_frozen bit without also setting all_visible bit */
280 [ - + ]: 94723 : Assert(flags != VISIBILITYMAP_ALL_FROZEN);
281 : :
282 : : /* Check that we have the right VM page pinned */
283 [ + - - + ]: 94723 : if (!BufferIsValid(vmBuf) || BufferGetBlockNumber(vmBuf) != mapBlock)
208 melanieplageman@gmai 284 [ # # ]:UBC 0 : elog(ERROR, "wrong VM buffer passed to visibilitymap_set");
285 : :
208 melanieplageman@gmai 286 [ - + ]:GNC 94723 : Assert(BufferIsLockedByMeInMode(vmBuf, BUFFER_LOCK_EXCLUSIVE));
287 : :
208 melanieplageman@gmai 288 :CBC 94723 : page = BufferGetPage(vmBuf);
289 : 94723 : map = (uint8 *) PageGetContents(page);
290 : :
291 : 94723 : status = (map[mapByte] >> mapOffset) & VISIBILITYMAP_VALID_BITS;
292 [ + - ]: 94723 : if (flags != status)
293 : : {
294 : 94723 : map[mapByte] |= (flags << mapOffset);
295 : 94723 : MarkBufferDirty(vmBuf);
296 : : }
208 melanieplageman@gmai 297 :GIC 94723 : }
298 : :
299 : : /*
300 : : * visibilitymap_get_status - get status of bits
301 : : *
302 : : * Are all tuples on heapBlk visible to all or are marked frozen, according
303 : : * to the visibility map?
304 : : *
305 : : * On entry, *vmbuf should be InvalidBuffer or a valid buffer returned by an
306 : : * earlier call to visibilitymap_pin or visibilitymap_get_status on the same
307 : : * relation. On return, *vmbuf is a valid buffer with the map page containing
308 : : * the bit for heapBlk, or InvalidBuffer. The caller is responsible for
309 : : * releasing *vmbuf after it's done testing and setting bits.
310 : : *
311 : : * NOTE: This function is typically called without a lock on the heap page,
312 : : * so somebody else could change the bit just after we look at it. In fact,
313 : : * since we don't lock the visibility map page either, it's even possible that
314 : : * someone else could have changed the bit just before we look at it, but yet
315 : : * we might see the old value. It is the caller's responsibility to deal with
316 : : * all concurrency issues!
317 : : */
318 : : uint8
1324 pg@bowt.ie 319 :CBC 4245385 : visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
320 : : {
6362 heikki.linnakangas@i 321 : 4245385 : BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
322 : 4245385 : uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
3623 rhaas@postgresql.org 323 : 4245385 : uint8 mapOffset = HEAPBLK_TO_OFFSET(heapBlk);
324 : : char *map;
325 : : uint8 result;
326 : :
327 : : #ifdef TRACE_VISIBILITYMAP
328 : : elog(DEBUG1, "vm_get_status %s %d", RelationGetRelationName(rel), heapBlk);
329 : : #endif
330 : :
331 : : /* Reuse the old pinned buffer if possible */
1324 pg@bowt.ie 332 [ + + ]: 4245385 : if (BufferIsValid(*vmbuf))
333 : : {
334 [ - + ]: 4110929 : if (BufferGetBlockNumber(*vmbuf) != mapBlock)
335 : : {
1324 pg@bowt.ie 336 :UBC 0 : ReleaseBuffer(*vmbuf);
337 : 0 : *vmbuf = InvalidBuffer;
338 : : }
339 : : }
340 : :
1324 pg@bowt.ie 341 [ + + ]:CBC 4245385 : if (!BufferIsValid(*vmbuf))
342 : : {
343 : 134456 : *vmbuf = vm_readbuf(rel, mapBlock, false);
344 [ + + ]: 134456 : if (!BufferIsValid(*vmbuf))
285 nathan@postgresql.or 345 :GNC 48460 : return (uint8) 0;
346 : : }
347 : :
1324 pg@bowt.ie 348 :CBC 4196925 : map = PageGetContents(BufferGetPage(*vmbuf));
349 : :
350 : : /*
351 : : * A single byte read is atomic. There could be memory-ordering effects
352 : : * here, but for performance reasons we make it the caller's job to worry
353 : : * about that.
354 : : */
3623 rhaas@postgresql.org 355 : 4196925 : result = ((map[mapByte] >> mapOffset) & VISIBILITYMAP_VALID_BITS);
356 : 4196925 : return result;
357 : : }
358 : :
359 : : /*
360 : : * visibilitymap_count - count number of bits set in visibility map
361 : : *
362 : : * Note: we ignore the possibility of race conditions when the table is being
363 : : * extended concurrently with the call. New pages added to the table aren't
364 : : * going to be marked all-visible or all-frozen, so they won't affect the result.
365 : : */
366 : : void
3717 367 : 33998 : visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
368 : : {
369 : : BlockNumber mapBlock;
2636 tgl@sss.pgh.pa.us 370 : 33998 : BlockNumber nvisible = 0;
371 : 33998 : BlockNumber nfrozen = 0;
372 : :
373 : : /* all_visible must be specified */
3717 rhaas@postgresql.org 374 [ - + ]: 33998 : Assert(all_visible);
375 : :
5077 bruce@momjian.us 376 : 33998 : for (mapBlock = 0;; mapBlock++)
5317 tgl@sss.pgh.pa.us 377 : 10587 : {
378 : : Buffer mapBuffer;
379 : : uint64 *map;
380 : :
381 : : /*
382 : : * Read till we fall off the end of the map. We assume that any extra
383 : : * bytes in the last page are zeroed, so we don't bother excluding
384 : : * them from the count.
385 : : */
386 : 44585 : mapBuffer = vm_readbuf(rel, mapBlock, false);
387 [ + + ]: 44585 : if (!BufferIsValid(mapBuffer))
388 : 33998 : break;
389 : :
390 : : /*
391 : : * We choose not to lock the page, since the result is going to be
392 : : * immediately stale anyway if anyone is concurrently setting or
393 : : * clearing bits, and we only really need an approximate value.
394 : : */
2636 395 : 10587 : map = (uint64 *) PageGetContents(BufferGetPage(mapBuffer));
396 : :
759 nathan@postgresql.or 397 : 10587 : nvisible += pg_popcount_masked((const char *) map, MAPSIZE, VISIBLE_MASK8);
398 [ + - ]: 10587 : if (all_frozen)
399 : 10587 : nfrozen += pg_popcount_masked((const char *) map, MAPSIZE, FROZEN_MASK8);
400 : :
5317 tgl@sss.pgh.pa.us 401 : 10587 : ReleaseBuffer(mapBuffer);
402 : : }
403 : :
2636 404 : 33998 : *all_visible = nvisible;
405 [ + - ]: 33998 : if (all_frozen)
406 : 33998 : *all_frozen = nfrozen;
5317 407 : 33998 : }
408 : :
409 : : /*
410 : : * visibilitymap_prepare_truncate -
411 : : * prepare for truncation of the visibility map
412 : : *
413 : : * nheapblocks is the new size of the heap.
414 : : *
415 : : * Return the number of blocks of new visibility map.
416 : : * If it's InvalidBlockNumber, there is nothing to truncate;
417 : : * otherwise the caller is responsible for calling smgrtruncate()
418 : : * to truncate the visibility map pages.
419 : : */
420 : : BlockNumber
2415 fujii@postgresql.org 421 : 258 : visibilitymap_prepare_truncate(Relation rel, BlockNumber nheapblocks)
422 : : {
423 : : BlockNumber newnblocks;
424 : :
425 : : /* last remaining block, byte, and bit */
6362 heikki.linnakangas@i 426 : 258 : BlockNumber truncBlock = HEAPBLK_TO_MAPBLOCK(nheapblocks);
6172 bruce@momjian.us 427 : 258 : uint32 truncByte = HEAPBLK_TO_MAPBYTE(nheapblocks);
3623 rhaas@postgresql.org 428 : 258 : uint8 truncOffset = HEAPBLK_TO_OFFSET(nheapblocks);
429 : :
430 : : #ifdef TRACE_VISIBILITYMAP
431 : : elog(DEBUG1, "vm_truncate %s %d", RelationGetRelationName(rel), nheapblocks);
432 : : #endif
433 : :
434 : : /*
435 : : * If no visibility map has been created yet for this relation, there's
436 : : * nothing to truncate.
437 : : */
1758 tgl@sss.pgh.pa.us 438 [ - + ]: 258 : if (!smgrexists(RelationGetSmgr(rel), VISIBILITYMAP_FORKNUM))
2415 fujii@postgresql.org 439 :UBC 0 : return InvalidBlockNumber;
440 : :
441 : : /*
442 : : * Unless the new size is exactly at a visibility map page boundary, the
443 : : * tail bits in the last remaining map page, representing truncated heap
444 : : * blocks, need to be cleared. This is not only tidy, but also necessary
445 : : * because we don't get a chance to clear the bits if the heap is extended
446 : : * again.
447 : : */
3623 rhaas@postgresql.org 448 [ + + + + ]:CBC 258 : if (truncByte != 0 || truncOffset != 0)
6362 heikki.linnakangas@i 449 : 154 : {
450 : : Buffer mapBuffer;
451 : : Page page;
452 : : char *map;
453 : :
454 : 154 : newnblocks = truncBlock + 1;
455 : :
456 : 154 : mapBuffer = vm_readbuf(rel, truncBlock, false);
457 [ - + ]: 154 : if (!BufferIsValid(mapBuffer))
458 : : {
459 : : /* nothing to do, the file was already smaller */
2415 fujii@postgresql.org 460 :UBC 0 : return InvalidBlockNumber;
461 : : }
462 : :
3667 kgrittn@postgresql.o 463 :CBC 154 : page = BufferGetPage(mapBuffer);
6362 heikki.linnakangas@i 464 : 154 : map = PageGetContents(page);
465 : :
466 : 154 : LockBuffer(mapBuffer, BUFFER_LOCK_EXCLUSIVE);
467 : :
468 : : /* NO EREPORT(ERROR) from here till changes are logged */
3485 469 : 154 : START_CRIT_SECTION();
470 : :
471 : : /* Clear out the unwanted bytes. */
6362 472 [ + + + - : 154 : MemSet(&map[truncByte + 1], 0, MAPSIZE - (truncByte + 1));
+ - - + -
- ]
473 : :
474 : : /*----
475 : : * Mask out the unwanted bits of the last remaining byte.
476 : : *
477 : : * ((1 << 0) - 1) = 00000000
478 : : * ((1 << 1) - 1) = 00000001
479 : : * ...
480 : : * ((1 << 6) - 1) = 00111111
481 : : * ((1 << 7) - 1) = 01111111
482 : : *----
483 : : */
3623 rhaas@postgresql.org 484 : 154 : map[truncByte] &= (1 << truncOffset) - 1;
485 : :
486 : : /*
487 : : * Truncation of a relation is WAL-logged at a higher-level, and we
488 : : * will be called at WAL replay. But if checksums are enabled, we need
489 : : * to still write a WAL record to protect against a torn page, if the
490 : : * page is flushed to disk before the truncation WAL record. We cannot
491 : : * use MarkBufferDirtyHint here, because that will not dirty the page
492 : : * during recovery.
493 : : */
6362 heikki.linnakangas@i 494 : 154 : MarkBufferDirty(mapBuffer);
3485 495 [ + + + + : 154 : if (!InRecovery && RelationNeedsWAL(rel) && XLogHintBitIsNeeded())
+ + + - +
- + + +
- ]
496 : 133 : log_newpage_buffer(mapBuffer, false);
497 : :
498 [ - + ]: 154 : END_CRIT_SECTION();
499 : :
6362 500 : 154 : UnlockReleaseBuffer(mapBuffer);
501 : : }
502 : : else
503 : 104 : newnblocks = truncBlock;
504 : :
1758 tgl@sss.pgh.pa.us 505 [ + + ]: 258 : if (smgrnblocks(RelationGetSmgr(rel), VISIBILITYMAP_FORKNUM) <= newnblocks)
506 : : {
507 : : /* nothing to do, the file was already smaller than requested size */
2415 fujii@postgresql.org 508 : 154 : return InvalidBlockNumber;
509 : : }
510 : :
511 : 104 : return newnblocks;
512 : : }
513 : :
514 : : /*
515 : : * visibilitymap_truncation_length -
516 : : * compute truncation length for visibility map
517 : : *
518 : : * Given a proposed truncation length for the main fork, compute the
519 : : * correct truncation length for the visibility map. Should return the
520 : : * same answer as visibilitymap_prepare_truncate(), but without modifying
521 : : * anything.
522 : : */
523 : : BlockNumber
57 rhaas@postgresql.org 524 : 1 : visibilitymap_truncation_length(BlockNumber nheapblocks)
525 : : {
526 : 1 : return HEAPBLK_TO_MAPBLOCK_LIMIT(nheapblocks);
527 : : }
528 : :
529 : : /*
530 : : * Read a visibility map page.
531 : : *
532 : : * If the page doesn't exist, InvalidBuffer is returned, or if 'extend' is
533 : : * true, the visibility map file is extended.
534 : : */
535 : : static Buffer
6362 heikki.linnakangas@i 536 : 680973 : vm_readbuf(Relation rel, BlockNumber blkno, bool extend)
537 : : {
538 : : Buffer buf;
539 : : SMgrRelation reln;
540 : :
541 : : /*
542 : : * Caution: re-using this smgr pointer could fail if the relcache entry
543 : : * gets closed. It's safe as long as we only do smgr-level operations
544 : : * between here and the last use of the pointer.
545 : : */
1758 tgl@sss.pgh.pa.us 546 : 680973 : reln = RelationGetSmgr(rel);
547 : :
548 : : /*
549 : : * If we haven't cached the size of the visibility map fork yet, check it
550 : : * first.
551 : : */
552 [ + + ]: 680973 : if (reln->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] == InvalidBlockNumber)
553 : : {
554 [ + + ]: 56478 : if (smgrexists(reln, VISIBILITYMAP_FORKNUM))
555 : 30250 : smgrnblocks(reln, VISIBILITYMAP_FORKNUM);
556 : : else
557 : 26228 : reln->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] = 0;
558 : : }
559 : :
560 : : /*
561 : : * For reading we use ZERO_ON_ERROR mode, and initialize the page if
562 : : * necessary. It's always safe to clear bits, so it's better to clear
563 : : * corrupt pages than error out.
564 : : *
565 : : * We use the same path below to initialize pages when extending the
566 : : * relation, as a concurrent extension can end up with vm_extend()
567 : : * returning an already-initialized page.
568 : : */
569 [ + + ]: 680973 : if (blkno >= reln->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM])
570 : : {
6362 heikki.linnakangas@i 571 [ + + ]: 87012 : if (extend)
1126 andres@anarazel.de 572 : 4554 : buf = vm_extend(rel, blkno + 1);
573 : : else
6362 heikki.linnakangas@i 574 : 82458 : return InvalidBuffer;
575 : : }
576 : : else
1126 andres@anarazel.de 577 : 593961 : buf = ReadBufferExtended(rel, VISIBILITYMAP_FORKNUM, blkno,
578 : : RBM_ZERO_ON_ERROR, NULL);
579 : :
580 : : /*
581 : : * Initializing the page when needed is trickier than it looks, because of
582 : : * the possibility of multiple backends doing this concurrently, and our
583 : : * desire to not uselessly take the buffer lock in the normal path where
584 : : * the page is OK. We must take the lock to initialize the page, so
585 : : * recheck page newness after we have the lock, in case someone else
586 : : * already did it. Also, because we initially check PageIsNew with no
587 : : * lock, it's possible to fall through and return the buffer while someone
588 : : * else is still initializing the page (i.e., we might see pd_upper as set
589 : : * but other page header fields are still zeroes). This is harmless for
590 : : * callers that will take a buffer lock themselves, but some callers
591 : : * inspect the page without any lock at all. The latter is OK only so
592 : : * long as it doesn't depend on the page header having correct contents.
593 : : * Current usage is safe because PageGetContents() does not require that.
594 : : */
3667 kgrittn@postgresql.o 595 [ + + ]: 598515 : if (PageIsNew(BufferGetPage(buf)))
596 : : {
2853 tgl@sss.pgh.pa.us 597 : 4929 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
598 [ + - ]: 4929 : if (PageIsNew(BufferGetPage(buf)))
599 : 4929 : PageInit(BufferGetPage(buf), BLCKSZ, 0);
600 : 4929 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
601 : : }
6362 heikki.linnakangas@i 602 : 598515 : return buf;
603 : : }
604 : :
605 : : /*
606 : : * Ensure that the visibility map fork is at least vm_nblocks long, extending
607 : : * it if necessary with zeroed pages.
608 : : */
609 : : static Buffer
610 : 4554 : vm_extend(Relation rel, BlockNumber vm_nblocks)
611 : : {
612 : : Buffer buf;
613 : :
986 tmunro@postgresql.or 614 : 4554 : buf = ExtendBufferedRelTo(BMR_REL(rel), VISIBILITYMAP_FORKNUM, NULL,
615 : : EB_CREATE_FORK_IF_NEEDED |
616 : : EB_CLEAR_SIZE_CACHE,
617 : : vm_nblocks,
618 : : RBM_ZERO_ON_ERROR);
619 : :
620 : : /*
621 : : * Send a shared-inval message to force other backends to close any smgr
622 : : * references they may have for this rel, which we are about to change.
623 : : * This is a useful optimization because it means that backends don't have
624 : : * to keep checking for creation or extension of the file, which happens
625 : : * infrequently.
626 : : */
1126 andres@anarazel.de 627 : 4554 : CacheInvalidateSmgr(RelationGetSmgr(rel)->smgr_rlocator);
628 : :
629 : 4554 : return buf;
630 : : }
|