Age Owner Branch data TLA Line data Source code
1 : : /*
2 : : * brin_pageops.c
3 : : * Page-handling routines for BRIN indexes
4 : : *
5 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
6 : : * Portions Copyright (c) 1994, Regents of the University of California
7 : : *
8 : : * IDENTIFICATION
9 : : * src/backend/access/brin/brin_pageops.c
10 : : */
11 : : #include "postgres.h"
12 : :
13 : : #include "access/brin_page.h"
14 : : #include "access/brin_pageops.h"
15 : : #include "access/brin_revmap.h"
16 : : #include "access/brin_xlog.h"
17 : : #include "access/xloginsert.h"
18 : : #include "miscadmin.h"
19 : : #include "storage/bufmgr.h"
20 : : #include "storage/freespace.h"
21 : : #include "storage/lmgr.h"
22 : : #include "utils/rel.h"
23 : :
24 : : /*
25 : : * Maximum size of an entry in a BRIN_PAGETYPE_REGULAR page. We can tolerate
26 : : * a single item per page, unlike other index AMs.
27 : : */
28 : : #define BrinMaxItemSize \
29 : : MAXALIGN_DOWN(BLCKSZ - \
30 : : (MAXALIGN(SizeOfPageHeaderData + \
31 : : sizeof(ItemIdData)) + \
32 : : MAXALIGN(sizeof(BrinSpecialSpace))))
33 : :
34 : : static Buffer brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
35 : : bool *extended);
36 : : static Size br_page_get_freespace(Page page);
37 : : static void brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer);
38 : :
39 : :
40 : : /*
41 : : * Update tuple origtup (size origsz), located in offset oldoff of buffer
42 : : * oldbuf, to newtup (size newsz) as summary tuple for the page range starting
43 : : * at heapBlk. oldbuf must not be locked on entry, and is not locked at exit.
44 : : *
45 : : * If samepage is true, attempt to put the new tuple in the same page, but if
46 : : * there's no room, use some other one.
47 : : *
48 : : * If the update is successful, return true; the revmap is updated to point to
49 : : * the new tuple. If the update is not done for whatever reason, return false.
50 : : * Caller may retry the update if this happens.
51 : : */
52 : : bool
4008 alvherre@alvh.no-ip. 53 :CBC 13727 : brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
54 : : BrinRevmap *revmap, BlockNumber heapBlk,
55 : : Buffer oldbuf, OffsetNumber oldoff,
56 : : const BrinTuple *origtup, Size origsz,
57 : : const BrinTuple *newtup, Size newsz,
58 : : bool samepage)
59 : : {
60 : : Page oldpage;
61 : : ItemId oldlp;
62 : : BrinTuple *oldtup;
63 : : Size oldsz;
64 : : Buffer newbuf;
2764 tgl@sss.pgh.pa.us 65 : 13727 : BlockNumber newblk = InvalidBlockNumber;
66 : : bool extended;
67 : :
3809 68 [ - + ]: 13727 : Assert(newsz == MAXALIGN(newsz));
69 : :
70 : : /* If the item is oversized, don't bother. */
3654 alvherre@alvh.no-ip. 71 [ - + ]: 13727 : if (newsz > BrinMaxItemSize)
72 : : {
3654 alvherre@alvh.no-ip. 73 [ # # ]:UBC 0 : ereport(ERROR,
74 : : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
75 : : errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
76 : : newsz, BrinMaxItemSize, RelationGetRelationName(idxrel))));
77 : : return false; /* keep compiler quiet */
78 : : }
79 : :
80 : : /* make sure the revmap is long enough to contain the entry we need */
4008 alvherre@alvh.no-ip. 81 :CBC 13727 : brinRevmapExtend(revmap, heapBlk);
82 : :
83 [ + + ]: 13727 : if (!samepage)
84 : : {
85 : : /* need a page on which to put the item */
86 : 307 : newbuf = brin_getinsertbuffer(idxrel, oldbuf, newsz, &extended);
87 [ - + ]: 307 : if (!BufferIsValid(newbuf))
88 : : {
3730 alvherre@alvh.no-ip. 89 [ # # ]:UBC 0 : Assert(!extended);
4008 90 : 0 : return false;
91 : : }
92 : :
93 : : /*
94 : : * Note: it's possible (though unlikely) that the returned newbuf is
95 : : * the same as oldbuf, if brin_getinsertbuffer determined that the old
96 : : * buffer does in fact have enough space.
97 : : */
4008 alvherre@alvh.no-ip. 98 [ - + ]:CBC 307 : if (newbuf == oldbuf)
99 : : {
3730 alvherre@alvh.no-ip. 100 [ # # ]:UBC 0 : Assert(!extended);
4008 101 : 0 : newbuf = InvalidBuffer;
102 : : }
103 : : else
2764 tgl@sss.pgh.pa.us 104 :CBC 307 : newblk = BufferGetBlockNumber(newbuf);
105 : : }
106 : : else
107 : : {
4008 alvherre@alvh.no-ip. 108 : 13420 : LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
109 : 13420 : newbuf = InvalidBuffer;
3729 110 : 13420 : extended = false;
111 : : }
3478 kgrittn@postgresql.o 112 : 13727 : oldpage = BufferGetPage(oldbuf);
4008 alvherre@alvh.no-ip. 113 : 13727 : oldlp = PageGetItemId(oldpage, oldoff);
114 : :
115 : : /*
116 : : * Check that the old tuple wasn't updated concurrently: it might have
117 : : * moved someplace else entirely, and for that matter the whole page
118 : : * might've become a revmap page. Note that in the first two cases
119 : : * checked here, the "oldlp" we just calculated is garbage; but
120 : : * PageGetItemId() is simple enough that it was safe to do that
121 : : * calculation anyway.
122 : : */
2917 tgl@sss.pgh.pa.us 123 [ + - + - ]: 27454 : if (!BRIN_IS_REGULAR_PAGE(oldpage) ||
124 : 13727 : oldoff > PageGetMaxOffsetNumber(oldpage) ||
125 [ - + ]: 13727 : !ItemIdIsNormal(oldlp))
126 : : {
4008 alvherre@alvh.no-ip. 127 :UBC 0 : LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
128 : :
129 : : /*
130 : : * If this happens, and the new buffer was obtained by extending the
131 : : * relation, then we need to ensure we don't leave it uninitialized or
132 : : * forget about it.
133 : : */
134 [ # # ]: 0 : if (BufferIsValid(newbuf))
135 : : {
3730 136 [ # # ]: 0 : if (extended)
137 : 0 : brin_initialize_empty_new_buffer(idxrel, newbuf);
4008 138 : 0 : UnlockReleaseBuffer(newbuf);
3730 139 [ # # ]: 0 : if (extended)
2764 tgl@sss.pgh.pa.us 140 : 0 : FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
141 : : }
4008 alvherre@alvh.no-ip. 142 : 0 : return false;
143 : : }
144 : :
4008 alvherre@alvh.no-ip. 145 :CBC 13727 : oldsz = ItemIdGetLength(oldlp);
146 : 13727 : oldtup = (BrinTuple *) PageGetItem(oldpage, oldlp);
147 : :
148 : : /*
149 : : * ... or it might have been updated in place to different contents.
150 : : */
151 [ - + ]: 13727 : if (!brin_tuples_equal(oldtup, oldsz, origtup, origsz))
152 : : {
4008 alvherre@alvh.no-ip. 153 :UBC 0 : LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
154 [ # # ]: 0 : if (BufferIsValid(newbuf))
155 : : {
156 : : /* As above, initialize and record new page if we got one */
3730 157 [ # # ]: 0 : if (extended)
158 : 0 : brin_initialize_empty_new_buffer(idxrel, newbuf);
4008 159 : 0 : UnlockReleaseBuffer(newbuf);
3730 160 [ # # ]: 0 : if (extended)
2764 tgl@sss.pgh.pa.us 161 : 0 : FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
162 : : }
4008 alvherre@alvh.no-ip. 163 : 0 : return false;
164 : : }
165 : :
166 : : /*
167 : : * Great, the old tuple is intact. We can proceed with the update.
168 : : *
169 : : * If there's enough room in the old page for the new tuple, replace it.
170 : : *
171 : : * Note that there might now be enough space on the page even though the
172 : : * caller told us there isn't, if a concurrent update moved another tuple
173 : : * elsewhere or replaced a tuple with a smaller one.
174 : : */
3885 alvherre@alvh.no-ip. 175 [ + + + + ]:CBC 27163 : if (((BrinPageFlags(oldpage) & BRIN_EVACUATE_PAGE) == 0) &&
4008 176 : 13436 : brin_can_do_samepage_update(oldbuf, origsz, newsz))
177 : : {
178 : 13420 : START_CRIT_SECTION();
1 peter@eisentraut.org 179 [ - + ]:GNC 13420 : if (!PageIndexTupleOverwrite(oldpage, oldoff, newtup, newsz))
3336 tgl@sss.pgh.pa.us 180 [ # # ]:UBC 0 : elog(ERROR, "failed to replace BRIN tuple");
4008 alvherre@alvh.no-ip. 181 :CBC 13420 : MarkBufferDirty(oldbuf);
182 : :
183 : : /* XLOG stuff */
184 [ + + + + : 13420 : if (RelationNeedsWAL(idxrel))
+ - + - ]
185 : : {
186 : : xl_brin_samepage_update xlrec;
187 : : XLogRecPtr recptr;
188 : 13417 : uint8 info = XLOG_BRIN_SAMEPAGE_UPDATE;
189 : :
3995 heikki.linnakangas@i 190 : 13417 : xlrec.offnum = oldoff;
191 : :
192 : 13417 : XLogBeginInsert();
259 peter@eisentraut.org 193 : 13417 : XLogRegisterData(&xlrec, SizeOfBrinSamepageUpdate);
194 : :
3995 heikki.linnakangas@i 195 : 13417 : XLogRegisterBuffer(0, oldbuf, REGBUF_STANDARD);
259 peter@eisentraut.org 196 : 13417 : XLogRegisterBufData(0, newtup, newsz);
197 : :
3995 heikki.linnakangas@i 198 : 13417 : recptr = XLogInsert(RM_BRIN_ID, info);
199 : :
4008 alvherre@alvh.no-ip. 200 : 13417 : PageSetLSN(oldpage, recptr);
201 : : }
202 : :
203 [ - + ]: 13420 : END_CRIT_SECTION();
204 : :
205 : 13420 : LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
206 : :
2764 tgl@sss.pgh.pa.us 207 [ - + ]: 13420 : if (BufferIsValid(newbuf))
208 : : {
209 : : /* As above, initialize and record new page if we got one */
2764 tgl@sss.pgh.pa.us 210 [ # # ]:UBC 0 : if (extended)
211 : 0 : brin_initialize_empty_new_buffer(idxrel, newbuf);
212 : 0 : UnlockReleaseBuffer(newbuf);
213 [ # # ]: 0 : if (extended)
214 : 0 : FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
215 : : }
216 : :
4008 alvherre@alvh.no-ip. 217 :CBC 13420 : return true;
218 : : }
219 [ - + ]: 307 : else if (newbuf == InvalidBuffer)
220 : : {
221 : : /*
222 : : * Not enough space, but caller said that there was. Tell them to
223 : : * start over.
224 : : */
4008 alvherre@alvh.no-ip. 225 :UBC 0 : LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
226 : 0 : return false;
227 : : }
228 : : else
229 : : {
230 : : /*
231 : : * Not enough free space on the oldpage. Put the new tuple on the new
232 : : * page, and update the revmap.
233 : : */
3478 kgrittn@postgresql.o 234 :CBC 307 : Page newpage = BufferGetPage(newbuf);
235 : : Buffer revmapbuf;
236 : : ItemPointerData newtid;
237 : : OffsetNumber newoff;
3730 alvherre@alvh.no-ip. 238 : 307 : Size freespace = 0;
239 : :
4008 240 : 307 : revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
241 : :
242 : 307 : START_CRIT_SECTION();
243 : :
244 : : /*
245 : : * We need to initialize the page if it's newly obtained. Note we
246 : : * will WAL-log the initialization as part of the update, so we don't
247 : : * need to do that here.
248 : : */
3730 249 [ + + ]: 307 : if (extended)
2764 tgl@sss.pgh.pa.us 250 : 11 : brin_page_init(newpage, BRIN_PAGETYPE_REGULAR);
251 : :
3336 252 : 307 : PageIndexTupleDeleteNoCompact(oldpage, oldoff);
1 peter@eisentraut.org 253 :GNC 307 : newoff = PageAddItem(newpage, newtup, newsz, InvalidOffsetNumber, false, false);
4008 alvherre@alvh.no-ip. 254 [ - + ]:CBC 307 : if (newoff == InvalidOffsetNumber)
4008 alvherre@alvh.no-ip. 255 [ # # ]:UBC 0 : elog(ERROR, "failed to add BRIN tuple to new page");
4008 alvherre@alvh.no-ip. 256 :CBC 307 : MarkBufferDirty(oldbuf);
257 : 307 : MarkBufferDirty(newbuf);
258 : :
259 : : /* needed to update FSM below */
3730 260 [ + + ]: 307 : if (extended)
261 : 11 : freespace = br_page_get_freespace(newpage);
262 : :
2764 tgl@sss.pgh.pa.us 263 : 307 : ItemPointerSet(&newtid, newblk, newoff);
4008 alvherre@alvh.no-ip. 264 : 307 : brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, newtid);
265 : 307 : MarkBufferDirty(revmapbuf);
266 : :
267 : : /* XLOG stuff */
268 [ + - + + : 307 : if (RelationNeedsWAL(idxrel))
+ - + - ]
269 : : {
270 : : xl_brin_update xlrec;
271 : : XLogRecPtr recptr;
272 : : uint8 info;
273 : :
274 [ + + ]: 307 : info = XLOG_BRIN_UPDATE | (extended ? XLOG_BRIN_INIT_PAGE : 0);
275 : :
3995 heikki.linnakangas@i 276 : 307 : xlrec.insert.offnum = newoff;
4007 alvherre@alvh.no-ip. 277 : 307 : xlrec.insert.heapBlk = heapBlk;
278 : 307 : xlrec.insert.pagesPerRange = pagesPerRange;
3995 heikki.linnakangas@i 279 : 307 : xlrec.oldOffnum = oldoff;
280 : :
281 : 307 : XLogBeginInsert();
282 : :
283 : : /* new page */
259 peter@eisentraut.org 284 : 307 : XLogRegisterData(&xlrec, SizeOfBrinUpdate);
285 : :
3995 heikki.linnakangas@i 286 [ + + ]: 307 : XLogRegisterBuffer(0, newbuf, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
259 peter@eisentraut.org 287 : 307 : XLogRegisterBufData(0, newtup, newsz);
288 : :
289 : : /* revmap page */
3214 alvherre@alvh.no-ip. 290 : 307 : XLogRegisterBuffer(1, revmapbuf, 0);
291 : :
292 : : /* old page */
3995 heikki.linnakangas@i 293 : 307 : XLogRegisterBuffer(2, oldbuf, REGBUF_STANDARD);
294 : :
295 : 307 : recptr = XLogInsert(RM_BRIN_ID, info);
296 : :
4008 alvherre@alvh.no-ip. 297 : 307 : PageSetLSN(oldpage, recptr);
298 : 307 : PageSetLSN(newpage, recptr);
3478 kgrittn@postgresql.o 299 : 307 : PageSetLSN(BufferGetPage(revmapbuf), recptr);
300 : : }
301 : :
4008 alvherre@alvh.no-ip. 302 [ - + ]: 307 : END_CRIT_SECTION();
303 : :
304 : 307 : LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
305 : 307 : LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
306 : 307 : UnlockReleaseBuffer(newbuf);
307 : :
3730 308 [ + + ]: 307 : if (extended)
309 : : {
2366 akapila@postgresql.o 310 : 11 : RecordPageWithFreeSpace(idxrel, newblk, freespace);
2764 tgl@sss.pgh.pa.us 311 : 11 : FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
312 : : }
313 : :
4008 alvherre@alvh.no-ip. 314 : 307 : return true;
315 : : }
316 : : }
317 : :
318 : : /*
319 : : * Return whether brin_doupdate can do a samepage update.
320 : : */
321 : : bool
322 : 26872 : brin_can_do_samepage_update(Buffer buffer, Size origsz, Size newsz)
323 : : {
324 : : return
325 [ + + ]: 30836 : ((newsz <= origsz) ||
3478 kgrittn@postgresql.o 326 [ + + ]: 3964 : PageGetExactFreeSpace(BufferGetPage(buffer)) >= (newsz - origsz));
327 : : }
328 : :
329 : : /*
330 : : * Insert an index tuple into the index relation. The revmap is updated to
331 : : * mark the range containing the given page as pointing to the inserted entry.
332 : : * A WAL record is written.
333 : : *
334 : : * The buffer, if valid, is first checked for free space to insert the new
335 : : * entry; if there isn't enough, a new buffer is obtained and pinned. No
336 : : * buffer lock must be held on entry, no buffer lock is held on exit.
337 : : *
338 : : * Return value is the offset number where the tuple was inserted.
339 : : */
340 : : OffsetNumber
4008 alvherre@alvh.no-ip. 341 : 2830 : brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
342 : : BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk,
343 : : const BrinTuple *tup, Size itemsz)
344 : : {
345 : : Page page;
346 : : BlockNumber blk;
347 : : OffsetNumber off;
2764 tgl@sss.pgh.pa.us 348 : 2830 : Size freespace = 0;
349 : : Buffer revmapbuf;
350 : : ItemPointerData tid;
351 : : bool extended;
352 : :
3809 353 [ - + ]: 2830 : Assert(itemsz == MAXALIGN(itemsz));
354 : :
355 : : /* If the item is oversized, don't even bother. */
3654 alvherre@alvh.no-ip. 356 [ - + ]: 2830 : if (itemsz > BrinMaxItemSize)
357 : : {
3654 alvherre@alvh.no-ip. 358 [ # # ]:UBC 0 : ereport(ERROR,
359 : : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
360 : : errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
361 : : itemsz, BrinMaxItemSize, RelationGetRelationName(idxrel))));
362 : : return InvalidOffsetNumber; /* keep compiler quiet */
363 : : }
364 : :
365 : : /* Make sure the revmap is long enough to contain the entry we need */
4008 alvherre@alvh.no-ip. 366 :CBC 2830 : brinRevmapExtend(revmap, heapBlk);
367 : :
368 : : /*
369 : : * Acquire lock on buffer supplied by caller, if any. If it doesn't have
370 : : * enough space, unpin it to obtain a new one below.
371 : : */
372 [ + + ]: 2830 : if (BufferIsValid(*buffer))
373 : : {
374 : : /*
375 : : * It's possible that another backend (or ourselves!) extended the
376 : : * revmap over the page we held a pin on, so we cannot assume that
377 : : * it's still a regular page.
378 : : */
379 : 1174 : LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
3478 kgrittn@postgresql.o 380 [ + + ]: 1174 : if (br_page_get_freespace(BufferGetPage(*buffer)) < itemsz)
381 : : {
4008 alvherre@alvh.no-ip. 382 : 60 : UnlockReleaseBuffer(*buffer);
383 : 60 : *buffer = InvalidBuffer;
384 : : }
385 : : }
386 : :
387 : : /*
388 : : * If we still don't have a usable buffer, have brin_getinsertbuffer
389 : : * obtain one for us.
390 : : */
391 [ + + ]: 2830 : if (!BufferIsValid(*buffer))
392 : : {
393 : : do
3654 394 : 1716 : *buffer = brin_getinsertbuffer(idxrel, InvalidBuffer, itemsz, &extended);
395 [ - + ]: 1716 : while (!BufferIsValid(*buffer));
396 : : }
397 : : else
3729 398 : 1114 : extended = false;
399 : :
400 : : /* Now obtain lock on revmap buffer */
4008 401 : 2830 : revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
402 : :
3478 kgrittn@postgresql.o 403 : 2830 : page = BufferGetPage(*buffer);
4008 alvherre@alvh.no-ip. 404 : 2830 : blk = BufferGetBlockNumber(*buffer);
405 : :
406 : : /* Execute the actual insertion */
407 : 2830 : START_CRIT_SECTION();
3730 408 [ + + ]: 2830 : if (extended)
2764 tgl@sss.pgh.pa.us 409 : 243 : brin_page_init(page, BRIN_PAGETYPE_REGULAR);
1 peter@eisentraut.org 410 :GNC 2830 : off = PageAddItem(page, tup, itemsz, InvalidOffsetNumber, false, false);
4008 alvherre@alvh.no-ip. 411 [ - + ]:CBC 2830 : if (off == InvalidOffsetNumber)
2764 tgl@sss.pgh.pa.us 412 [ # # ]:UBC 0 : elog(ERROR, "failed to add BRIN tuple to new page");
4008 alvherre@alvh.no-ip. 413 :CBC 2830 : MarkBufferDirty(*buffer);
414 : :
415 : : /* needed to update FSM below */
2764 tgl@sss.pgh.pa.us 416 [ + + ]: 2830 : if (extended)
417 : 243 : freespace = br_page_get_freespace(page);
418 : :
4008 alvherre@alvh.no-ip. 419 : 2830 : ItemPointerSet(&tid, blk, off);
420 : 2830 : brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, tid);
421 : 2830 : MarkBufferDirty(revmapbuf);
422 : :
423 : : /* XLOG stuff */
424 [ + + + + : 2830 : if (RelationNeedsWAL(idxrel))
+ + + + ]
425 : : {
426 : : xl_brin_insert xlrec;
427 : : XLogRecPtr recptr;
428 : : uint8 info;
429 : :
430 [ + + ]: 2370 : info = XLOG_BRIN_INSERT | (extended ? XLOG_BRIN_INIT_PAGE : 0);
431 : 2370 : xlrec.heapBlk = heapBlk;
432 : 2370 : xlrec.pagesPerRange = pagesPerRange;
3995 heikki.linnakangas@i 433 : 2370 : xlrec.offnum = off;
434 : :
435 : 2370 : XLogBeginInsert();
259 peter@eisentraut.org 436 : 2370 : XLogRegisterData(&xlrec, SizeOfBrinInsert);
437 : :
3995 heikki.linnakangas@i 438 [ + + ]: 2370 : XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
259 peter@eisentraut.org 439 : 2370 : XLogRegisterBufData(0, tup, itemsz);
440 : :
3995 heikki.linnakangas@i 441 : 2370 : XLogRegisterBuffer(1, revmapbuf, 0);
442 : :
443 : 2370 : recptr = XLogInsert(RM_BRIN_ID, info);
444 : :
4008 alvherre@alvh.no-ip. 445 : 2370 : PageSetLSN(page, recptr);
3478 kgrittn@postgresql.o 446 : 2370 : PageSetLSN(BufferGetPage(revmapbuf), recptr);
447 : : }
448 : :
4008 alvherre@alvh.no-ip. 449 [ - + ]: 2830 : END_CRIT_SECTION();
450 : :
451 : : /* Tuple is firmly on buffer; we can release our locks */
452 : 2830 : LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
453 : 2830 : LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
454 : :
455 : : BRIN_elog((DEBUG2, "inserted tuple (%u,%u) for range starting at %u",
456 : : blk, off, heapBlk));
457 : :
458 [ + + ]: 2830 : if (extended)
459 : : {
2366 akapila@postgresql.o 460 : 243 : RecordPageWithFreeSpace(idxrel, blk, freespace);
2764 tgl@sss.pgh.pa.us 461 : 243 : FreeSpaceMapVacuumRange(idxrel, blk, blk + 1);
462 : : }
463 : :
4008 alvherre@alvh.no-ip. 464 : 2830 : return off;
465 : : }
466 : :
467 : : /*
468 : : * Initialize a page with the given type.
469 : : *
470 : : * Caller is responsible for marking it dirty, as appropriate.
471 : : */
472 : : void
473 : 759 : brin_page_init(Page page, uint16 type)
474 : : {
475 : 759 : PageInit(page, BLCKSZ, sizeof(BrinSpecialSpace));
476 : :
3885 477 : 759 : BrinPageType(page) = type;
4008 478 : 759 : }
479 : :
480 : : /*
481 : : * Initialize a new BRIN index's metapage.
482 : : */
483 : : void
484 : 224 : brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
485 : : {
486 : : BrinMetaPageData *metadata;
487 : :
488 : 224 : brin_page_init(page, BRIN_PAGETYPE_META);
489 : :
490 : 224 : metadata = (BrinMetaPageData *) PageGetContents(page);
491 : :
492 : 224 : metadata->brinMagic = BRIN_META_MAGIC;
493 : 224 : metadata->brinVersion = version;
494 : 224 : metadata->pagesPerRange = pagesPerRange;
495 : :
496 : : /*
497 : : * Note we cheat here a little. 0 is not a valid revmap block number
498 : : * (because it's the metapage buffer), but doing this enables the first
499 : : * revmap page to be created when the index is.
500 : : */
501 : 224 : metadata->lastRevmapPage = 0;
502 : :
503 : : /*
504 : : * Set pd_lower just past the end of the metadata. This is essential,
505 : : * because without doing so, metadata will be lost if xlog.c compresses
506 : : * the page.
507 : : */
2917 tgl@sss.pgh.pa.us 508 : 224 : ((PageHeader) page)->pd_lower =
509 : 224 : ((char *) metadata + sizeof(BrinMetaPageData)) - (char *) page;
4008 alvherre@alvh.no-ip. 510 : 224 : }
511 : :
512 : : /*
513 : : * Initiate page evacuation protocol.
514 : : *
515 : : * The page must be locked in exclusive mode by the caller.
516 : : *
517 : : * If the page is not yet initialized or empty, return false without doing
518 : : * anything; it can be used for revmap without any further changes. If it
519 : : * contains tuples, mark it for evacuation and return true.
520 : : */
521 : : bool
522 : 185 : brin_start_evacuating_page(Relation idxRel, Buffer buf)
523 : : {
524 : : OffsetNumber off;
525 : : OffsetNumber maxoff;
526 : : Page page;
527 : :
3478 kgrittn@postgresql.o 528 : 185 : page = BufferGetPage(buf);
529 : :
4008 alvherre@alvh.no-ip. 530 [ + + ]: 185 : if (PageIsNew(page))
531 : 183 : return false;
532 : :
533 : 2 : maxoff = PageGetMaxOffsetNumber(page);
534 [ + + ]: 292 : for (off = FirstOffsetNumber; off <= maxoff; off++)
535 : : {
536 : : ItemId lp;
537 : :
538 : 291 : lp = PageGetItemId(page, off);
539 [ + + ]: 291 : if (ItemIdIsUsed(lp))
540 : : {
541 : : /*
542 : : * Prevent other backends from adding more stuff to this page:
543 : : * BRIN_EVACUATE_PAGE informs br_page_get_freespace that this page
544 : : * can no longer be used to add new tuples. Note that this flag
545 : : * is not WAL-logged, except accidentally.
546 : : */
3885 547 : 1 : BrinPageFlags(page) |= BRIN_EVACUATE_PAGE;
4008 548 : 1 : MarkBufferDirtyHint(buf, true);
549 : :
550 : 1 : return true;
551 : : }
552 : : }
553 : 1 : return false;
554 : : }
555 : :
556 : : /*
557 : : * Move all tuples out of a page.
558 : : *
559 : : * The caller must hold lock on the page. The lock and pin are released.
560 : : */
561 : : void
562 : 1 : brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange,
563 : : BrinRevmap *revmap, Buffer buf)
564 : : {
565 : : OffsetNumber off;
566 : : OffsetNumber maxoff;
567 : : Page page;
3126 568 : 1 : BrinTuple *btup = NULL;
569 : 1 : Size btupsz = 0;
570 : :
3478 kgrittn@postgresql.o 571 : 1 : page = BufferGetPage(buf);
572 : :
3885 alvherre@alvh.no-ip. 573 [ - + ]: 1 : Assert(BrinPageFlags(page) & BRIN_EVACUATE_PAGE);
574 : :
4008 575 : 1 : maxoff = PageGetMaxOffsetNumber(page);
576 [ + + ]: 292 : for (off = FirstOffsetNumber; off <= maxoff; off++)
577 : : {
578 : : BrinTuple *tup;
579 : : Size sz;
580 : : ItemId lp;
581 : :
582 [ - + ]: 291 : CHECK_FOR_INTERRUPTS();
583 : :
584 : 291 : lp = PageGetItemId(page, off);
585 [ + - ]: 291 : if (ItemIdIsUsed(lp))
586 : : {
587 : 291 : sz = ItemIdGetLength(lp);
588 : 291 : tup = (BrinTuple *) PageGetItem(page, lp);
3126 589 : 291 : tup = brin_copy_tuple(tup, sz, btup, &btupsz);
590 : :
4008 591 : 291 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
592 : :
593 [ - + ]: 291 : if (!brin_doupdate(idxRel, pagesPerRange, revmap, tup->bt_blkno,
594 : : buf, off, tup, sz, tup, sz, false))
4008 alvherre@alvh.no-ip. 595 :UBC 0 : off--; /* retry */
596 : :
4008 alvherre@alvh.no-ip. 597 :CBC 291 : LockBuffer(buf, BUFFER_LOCK_SHARE);
598 : :
599 : : /* It's possible that someone extended the revmap over this page */
600 [ - + ]: 291 : if (!BRIN_IS_REGULAR_PAGE(page))
4008 alvherre@alvh.no-ip. 601 :UBC 0 : break;
602 : : }
603 : : }
604 : :
4008 alvherre@alvh.no-ip. 605 :CBC 1 : UnlockReleaseBuffer(buf);
606 : 1 : }
607 : :
608 : : /*
609 : : * Given a BRIN index page, initialize it if necessary, and record its
610 : : * current free space in the FSM.
611 : : *
612 : : * The main use for this is when, during vacuuming, an uninitialized page is
613 : : * found, which could be the result of relation extension followed by a crash
614 : : * before the page can be used.
615 : : *
616 : : * Here, we don't bother to update upper FSM pages, instead expecting that our
617 : : * caller (brin_vacuum_scan) will fix them at the end of the scan. Elsewhere
618 : : * in this file, it's generally a good idea to propagate additions of free
619 : : * space into the upper FSM pages immediately.
620 : : */
621 : : void
3730 622 : 285 : brin_page_cleanup(Relation idxrel, Buffer buf)
623 : : {
3478 kgrittn@postgresql.o 624 : 285 : Page page = BufferGetPage(buf);
625 : :
626 : : /*
627 : : * If a page was left uninitialized, initialize it now; also record it in
628 : : * FSM.
629 : : *
630 : : * Somebody else might be extending the relation concurrently. To avoid
631 : : * re-initializing the page before they can grab the buffer lock, we
632 : : * acquire the extension lock momentarily. Since they hold the extension
633 : : * lock from before getting the page and after its been initialized, we're
634 : : * sure to see their initialization.
635 : : */
3730 alvherre@alvh.no-ip. 636 [ - + ]: 285 : if (PageIsNew(page))
637 : : {
3730 alvherre@alvh.no-ip. 638 :UBC 0 : LockRelationForExtension(idxrel, ShareLock);
639 : 0 : UnlockRelationForExtension(idxrel, ShareLock);
640 : :
641 : 0 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
642 [ # # ]: 0 : if (PageIsNew(page))
643 : : {
644 : 0 : brin_initialize_empty_new_buffer(idxrel, buf);
645 : 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
2764 tgl@sss.pgh.pa.us 646 : 0 : return;
647 : : }
3730 alvherre@alvh.no-ip. 648 : 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
649 : : }
650 : :
651 : : /* Nothing to be done for non-regular index pages */
3478 kgrittn@postgresql.o 652 [ + + ]:CBC 285 : if (BRIN_IS_META_PAGE(BufferGetPage(buf)) ||
653 [ + + ]: 220 : BRIN_IS_REVMAP_PAGE(BufferGetPage(buf)))
2764 tgl@sss.pgh.pa.us 654 : 130 : return;
655 : :
656 : : /* Measure free space and record it */
657 : 155 : RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buf),
658 : : br_page_get_freespace(page));
659 : : }
660 : :
661 : : /*
662 : : * Return a pinned and exclusively locked buffer which can be used to insert an
663 : : * index item of size itemsz (caller must ensure not to request sizes
664 : : * impossible to fulfill). If oldbuf is a valid buffer, it is also locked (in
665 : : * an order determined to avoid deadlocks).
666 : : *
667 : : * If we find that the old page is no longer a regular index page (because
668 : : * of a revmap extension), the old buffer is unlocked and we return
669 : : * InvalidBuffer.
670 : : *
671 : : * If there's no existing page with enough free space to accommodate the new
672 : : * item, the relation is extended. If this happens, *extended is set to true,
673 : : * and it is the caller's responsibility to initialize the page (and WAL-log
674 : : * that fact) prior to use. The caller should also update the FSM with the
675 : : * page's remaining free space after the insertion.
676 : : *
677 : : * Note that the caller is not expected to update FSM unless *extended is set
678 : : * true. This policy means that we'll update FSM when a page is created, and
679 : : * when it's found to have too little space for a desired tuple insertion,
680 : : * but not every single time we add a tuple to the page.
681 : : *
682 : : * Note that in some corner cases it is possible for this routine to extend
683 : : * the relation and then not return the new page. It is this routine's
684 : : * responsibility to WAL-log the page initialization and to record the page in
685 : : * FSM if that happens, since the caller certainly can't do it.
686 : : */
687 : : static Buffer
4008 alvherre@alvh.no-ip. 688 : 2023 : brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
689 : : bool *extended)
690 : : {
691 : : BlockNumber oldblk;
692 : : BlockNumber newblk;
693 : : Page page;
694 : : Size freespace;
695 : :
696 : : /* callers must have checked */
3654 697 [ - + ]: 2023 : Assert(itemsz <= BrinMaxItemSize);
698 : :
4008 699 [ + + ]: 2023 : if (BufferIsValid(oldbuf))
700 : 307 : oldblk = BufferGetBlockNumber(oldbuf);
701 : : else
702 : 1716 : oldblk = InvalidBlockNumber;
703 : :
704 : : /* Choose initial target page, re-using existing target if known */
2764 tgl@sss.pgh.pa.us 705 [ + - ]: 2023 : newblk = RelationGetTargetBlock(irel);
706 [ + + ]: 2023 : if (newblk == InvalidBlockNumber)
2366 akapila@postgresql.o 707 : 220 : newblk = GetPageWithFreeSpace(irel, itemsz);
708 : :
709 : : /*
710 : : * Loop until we find a page with sufficient free space. By the time we
711 : : * return to caller out of this loop, both buffers are valid and locked;
712 : : * if we have to restart here, neither page is locked and newblk isn't
713 : : * pinned (if it's even valid).
714 : : */
715 : : for (;;)
4008 alvherre@alvh.no-ip. 716 : 77 : {
717 : : Buffer buf;
718 : 2100 : bool extensionLockHeld = false;
719 : :
720 [ - + ]: 2100 : CHECK_FOR_INTERRUPTS();
721 : :
2764 tgl@sss.pgh.pa.us 722 : 2100 : *extended = false;
723 : :
4008 alvherre@alvh.no-ip. 724 [ + + ]: 2100 : if (newblk == InvalidBlockNumber)
725 : : {
726 : : /*
727 : : * There's not enough free space in any existing index page,
728 : : * according to the FSM: extend the relation to obtain a shiny new
729 : : * page.
730 : : *
731 : : * XXX: It's likely possible to use RBM_ZERO_AND_LOCK here,
732 : : * which'd avoid the need to hold the extension lock during buffer
733 : : * reclaim.
734 : : */
735 [ + - + + ]: 254 : if (!RELATION_IS_LOCAL(irel))
736 : : {
737 : 29 : LockRelationForExtension(irel, ExclusiveLock);
738 : 29 : extensionLockHeld = true;
739 : : }
740 : 254 : buf = ReadBuffer(irel, P_NEW);
741 : 254 : newblk = BufferGetBlockNumber(buf);
3730 742 : 254 : *extended = true;
743 : :
744 : : BRIN_elog((DEBUG2, "brin_getinsertbuffer: extending to page %u",
745 : : BufferGetBlockNumber(buf)));
746 : : }
4008 747 [ + + ]: 1846 : else if (newblk == oldblk)
748 : : {
749 : : /*
750 : : * There's an odd corner-case here where the FSM is out-of-date,
751 : : * and gave us the old page.
752 : : */
753 : 13 : buf = oldbuf;
754 : : }
755 : : else
756 : : {
757 : 1833 : buf = ReadBuffer(irel, newblk);
758 : : }
759 : :
760 : : /*
761 : : * We lock the old buffer first, if it's earlier than the new one; but
762 : : * then we need to check that it hasn't been turned into a revmap page
763 : : * concurrently. If we detect that that happened, give up and tell
764 : : * caller to start over.
765 : : */
766 [ + + + + ]: 2100 : if (BufferIsValid(oldbuf) && oldblk < newblk)
767 : : {
768 : 311 : LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
3478 kgrittn@postgresql.o 769 [ - + ]: 311 : if (!BRIN_IS_REGULAR_PAGE(BufferGetPage(oldbuf)))
770 : : {
4008 alvherre@alvh.no-ip. 771 :UBC 0 : LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
772 : :
773 : : /*
774 : : * It is possible that the new page was obtained from
775 : : * extending the relation. In that case, we must be sure to
776 : : * record it in the FSM before leaving, because otherwise the
777 : : * space would be lost forever. However, we cannot let an
778 : : * uninitialized page get in the FSM, so we need to initialize
779 : : * it first.
780 : : */
3730 781 [ # # ]: 0 : if (*extended)
782 : 0 : brin_initialize_empty_new_buffer(irel, buf);
783 : :
784 [ # # ]: 0 : if (extensionLockHeld)
785 : 0 : UnlockRelationForExtension(irel, ExclusiveLock);
786 : :
4008 787 : 0 : ReleaseBuffer(buf);
788 : :
2764 tgl@sss.pgh.pa.us 789 [ # # ]: 0 : if (*extended)
790 : : {
791 : 0 : FreeSpaceMapVacuumRange(irel, newblk, newblk + 1);
792 : : /* shouldn't matter, but don't confuse caller */
793 : 0 : *extended = false;
794 : : }
795 : :
4008 alvherre@alvh.no-ip. 796 : 0 : return InvalidBuffer;
797 : : }
798 : : }
799 : :
4008 alvherre@alvh.no-ip. 800 :CBC 2100 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
801 : :
802 [ + + ]: 2100 : if (extensionLockHeld)
803 : 29 : UnlockRelationForExtension(irel, ExclusiveLock);
804 : :
3478 kgrittn@postgresql.o 805 : 2100 : page = BufferGetPage(buf);
806 : :
807 : : /*
808 : : * We have a new buffer to insert into. Check that the new page has
809 : : * enough free space, and return it if it does; otherwise start over.
810 : : * (br_page_get_freespace also checks that the FSM didn't hand us a
811 : : * page that has since been repurposed for the revmap.)
812 : : */
3730 alvherre@alvh.no-ip. 813 : 4200 : freespace = *extended ?
3654 814 [ + + ]: 2100 : BrinMaxItemSize : br_page_get_freespace(page);
4008 815 [ + + ]: 2100 : if (freespace >= itemsz)
816 : : {
2764 tgl@sss.pgh.pa.us 817 : 2023 : RelationSetTargetBlock(irel, newblk);
818 : :
819 : : /*
820 : : * Lock the old buffer if not locked already. Note that in this
821 : : * case we know for sure it's a regular page: it's later than the
822 : : * new page we just got, which is not a revmap page, and revmap
823 : : * pages are always consecutive.
824 : : */
4008 alvherre@alvh.no-ip. 825 [ + + - + ]: 2023 : if (BufferIsValid(oldbuf) && oldblk > newblk)
826 : : {
4008 alvherre@alvh.no-ip. 827 :UBC 0 : LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
3478 kgrittn@postgresql.o 828 [ # # ]: 0 : Assert(BRIN_IS_REGULAR_PAGE(BufferGetPage(oldbuf)));
829 : : }
830 : :
4008 alvherre@alvh.no-ip. 831 :CBC 2023 : return buf;
832 : : }
833 : :
834 : : /* This page is no good. */
835 : :
836 : : /*
837 : : * If an entirely new page does not contain enough free space for the
838 : : * new item, then surely that item is oversized. Complain loudly; but
839 : : * first make sure we initialize the page and record it as free, for
840 : : * next time.
841 : : */
3730 842 [ - + ]: 77 : if (*extended)
843 : : {
3730 alvherre@alvh.no-ip. 844 :UBC 0 : brin_initialize_empty_new_buffer(irel, buf);
845 : : /* since this should not happen, skip FreeSpaceMapVacuum */
846 : :
4008 847 [ # # ]: 0 : ereport(ERROR,
848 : : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
849 : : errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
850 : : itemsz, freespace, RelationGetRelationName(irel))));
851 : : return InvalidBuffer; /* keep compiler quiet */
852 : : }
853 : :
4008 alvherre@alvh.no-ip. 854 [ + + ]:CBC 77 : if (newblk != oldblk)
855 : 64 : UnlockReleaseBuffer(buf);
856 [ + + + - ]: 77 : if (BufferIsValid(oldbuf) && oldblk <= newblk)
857 : 17 : LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
858 : :
859 : : /*
860 : : * Update the FSM with the new, presumably smaller, freespace value
861 : : * for this page, then search for a new target page.
862 : : */
863 : 77 : newblk = RecordAndGetPageWithFreeSpace(irel, newblk, freespace, itemsz);
864 : : }
865 : : }
866 : :
867 : : /*
868 : : * Initialize a page as an empty regular BRIN page, WAL-log this, and record
869 : : * the page in FSM.
870 : : *
871 : : * There are several corner situations in which we extend the relation to
872 : : * obtain a new page and later find that we cannot use it immediately. When
873 : : * that happens, we don't want to leave the page go unrecorded in FSM, because
874 : : * there is no mechanism to get the space back and the index would bloat.
875 : : * Also, because we would not WAL-log the action that would initialize the
876 : : * page, the page would go uninitialized in a standby (or after recovery).
877 : : *
878 : : * While we record the page in FSM here, caller is responsible for doing FSM
879 : : * upper-page update if that seems appropriate.
880 : : */
881 : : static void
3730 alvherre@alvh.no-ip. 882 :UBC 0 : brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer)
883 : : {
884 : : Page page;
885 : :
886 : : BRIN_elog((DEBUG2,
887 : : "brin_initialize_empty_new_buffer: initializing blank page %u",
888 : : BufferGetBlockNumber(buffer)));
889 : :
890 : 0 : START_CRIT_SECTION();
3478 kgrittn@postgresql.o 891 : 0 : page = BufferGetPage(buffer);
3730 alvherre@alvh.no-ip. 892 : 0 : brin_page_init(page, BRIN_PAGETYPE_REGULAR);
893 : 0 : MarkBufferDirty(buffer);
894 : 0 : log_newpage_buffer(buffer, true);
895 [ # # ]: 0 : END_CRIT_SECTION();
896 : :
897 : : /*
898 : : * We update the FSM for this page, but this is not WAL-logged. This is
899 : : * acceptable because VACUUM will scan the index and update the FSM with
900 : : * pages whose FSM records were forgotten in a crash.
901 : : */
902 : 0 : RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buffer),
903 : : br_page_get_freespace(page));
904 : 0 : }
905 : :
906 : :
907 : : /*
908 : : * Return the amount of free space on a regular BRIN index page.
909 : : *
910 : : * If the page is not a regular page, or has been marked with the
911 : : * BRIN_EVACUATE_PAGE flag, returns 0.
912 : : */
913 : : static Size
4008 alvherre@alvh.no-ip. 914 :CBC 3429 : br_page_get_freespace(Page page)
915 : : {
916 [ + - ]: 3429 : if (!BRIN_IS_REGULAR_PAGE(page) ||
3885 917 [ - + ]: 3429 : (BrinPageFlags(page) & BRIN_EVACUATE_PAGE) != 0)
4008 alvherre@alvh.no-ip. 918 :UBC 0 : return 0;
919 : : else
4008 alvherre@alvh.no-ip. 920 :CBC 3429 : return PageGetFreeSpace(page);
921 : : }
|