Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * gistvacuum.c
4 : : * vacuuming routines for the postgres GiST index access method.
5 : : *
6 : : *
7 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : : * Portions Copyright (c) 1994, Regents of the University of California
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/access/gist/gistvacuum.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : #include "postgres.h"
16 : :
17 : : #include "access/genam.h"
18 : : #include "access/gist_private.h"
19 : : #include "access/transam.h"
20 : : #include "commands/vacuum.h"
21 : : #include "lib/integerset.h"
22 : : #include "miscadmin.h"
23 : : #include "storage/indexfsm.h"
24 : : #include "storage/lmgr.h"
25 : : #include "storage/read_stream.h"
26 : : #include "utils/memutils.h"
27 : :
28 : : /* Working state needed by gistbulkdelete */
29 : : typedef struct
30 : : {
31 : : IndexVacuumInfo *info;
32 : : IndexBulkDeleteResult *stats;
33 : : IndexBulkDeleteCallback callback;
34 : : void *callback_state;
35 : : GistNSN startNSN;
36 : :
37 : : /*
38 : : * These are used to memorize all internal and empty leaf pages. They are
39 : : * used for deleting all the empty pages.
40 : : */
41 : : IntegerSet *internal_page_set;
42 : : IntegerSet *empty_leaf_set;
43 : : MemoryContext page_set_context;
44 : : } GistVacState;
45 : :
46 : : static void gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
47 : : IndexBulkDeleteCallback callback, void *callback_state);
48 : : static void gistvacuumpage(GistVacState *vstate, Buffer buffer);
49 : : static void gistvacuum_delete_empty_pages(IndexVacuumInfo *info,
50 : : GistVacState *vstate);
51 : : static bool gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
52 : : Buffer parentBuffer, OffsetNumber downlink,
53 : : Buffer leafBuffer);
54 : :
55 : : /*
56 : : * VACUUM bulkdelete stage: remove index entries.
57 : : */
58 : : IndexBulkDeleteResult *
2377 heikki.linnakangas@i 59 :CBC 21 : gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
60 : : IndexBulkDeleteCallback callback, void *callback_state)
61 : : {
62 : : /* allocate stats if first time through, else re-use existing struct */
2063 akapila@postgresql.o 63 [ + - ]: 21 : if (stats == NULL)
64 : 21 : stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
65 : :
66 : 21 : gistvacuumscan(info, stats, callback, callback_state);
67 : :
68 : 21 : return stats;
69 : : }
70 : :
71 : : /*
72 : : * VACUUM cleanup stage: delete empty pages, and update index statistics.
73 : : */
74 : : IndexBulkDeleteResult *
2377 heikki.linnakangas@i 75 : 47 : gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
76 : : {
77 : : /* No-op in ANALYZE ONLY mode */
6010 tgl@sss.pgh.pa.us 78 [ + + ]: 47 : if (info->analyze_only)
3520 79 : 12 : return stats;
80 : :
81 : : /*
82 : : * If gistbulkdelete was called, we need not do anything, just return the
83 : : * stats from the latest gistbulkdelete call. If it wasn't called, we
84 : : * still need to do a pass over the index, to obtain index statistics.
85 : : */
2063 akapila@postgresql.o 86 [ + + ]: 35 : if (stats == NULL)
87 : : {
88 : 23 : stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
89 : 23 : gistvacuumscan(info, stats, NULL, NULL);
90 : : }
91 : :
92 : : /*
93 : : * It's quite possible for us to be fooled by concurrent page splits into
94 : : * double-counting some index tuples, so disbelieve any total that exceeds
95 : : * the underlying heap's count ... if we know that accurately. Otherwise
96 : : * this might just make matters worse.
97 : : */
2377 heikki.linnakangas@i 98 [ + - ]: 35 : if (!info->estimated_count)
99 : : {
2063 akapila@postgresql.o 100 [ - + ]: 35 : if (stats->num_index_tuples > info->num_heap_tuples)
2063 akapila@postgresql.o 101 :UBC 0 : stats->num_index_tuples = info->num_heap_tuples;
102 : : }
103 : :
2063 akapila@postgresql.o 104 :CBC 35 : return stats;
105 : : }
106 : :
107 : : /*
108 : : * gistvacuumscan --- scan the index for VACUUMing purposes
109 : : *
110 : : * This scans the index for leaf tuples that are deletable according to the
111 : : * vacuum callback, and updates the stats. Both btbulkdelete and
112 : : * btvacuumcleanup invoke this (the latter only if no btbulkdelete call
113 : : * occurred).
114 : : *
115 : : * This also makes note of any empty leaf pages, as well as all internal
116 : : * pages while looping over all index pages. After scanning all the pages, we
117 : : * remove the empty pages so that they can be reused. Any deleted pages are
118 : : * added directly to the free space map. (They should've been added there
119 : : * when they were originally deleted, already, but it's possible that the FSM
120 : : * was lost at a crash, for example.)
121 : : *
122 : : * The caller is responsible for initially allocating/zeroing a stats struct.
123 : : */
124 : : static void
125 : 44 : gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
126 : : IndexBulkDeleteCallback callback, void *callback_state)
127 : : {
2377 heikki.linnakangas@i 128 : 44 : Relation rel = info->index;
129 : : GistVacState vstate;
130 : : BlockNumber num_pages;
131 : : bool needLock;
132 : : MemoryContext oldctx;
133 : : BlockRangeReadStreamPrivate p;
169 melanieplageman@gmai 134 : 44 : ReadStream *stream = NULL;
135 : :
136 : : /*
137 : : * Reset fields that track information about the entire index now. This
138 : : * avoids double-counting in the case where a single VACUUM command
139 : : * requires multiple scans of the index.
140 : : *
141 : : * Avoid resetting the tuples_removed and pages_newly_deleted fields here,
142 : : * since they track information about the VACUUM command, and so must last
143 : : * across each call to gistvacuumscan().
144 : : *
145 : : * (Note that pages_free is treated as state about the whole index, not
146 : : * the current VACUUM. This is appropriate because RecordFreeIndexPage()
147 : : * calls are idempotent, and get repeated for the same deleted pages in
148 : : * some scenarios. The point for us is to track the number of recyclable
149 : : * pages in the index at the end of the VACUUM command.)
150 : : */
1654 pg@bowt.ie 151 : 44 : stats->num_pages = 0;
2063 akapila@postgresql.o 152 : 44 : stats->estimated_count = false;
153 : 44 : stats->num_index_tuples = 0;
154 : 44 : stats->pages_deleted = 0;
155 : 44 : stats->pages_free = 0;
156 : :
157 : : /*
158 : : * Create the integer sets to remember all the internal and the empty leaf
159 : : * pages in page_set_context. Internally, the integer set will remember
160 : : * this context so that the subsequent allocations for these integer sets
161 : : * will be done from the same context.
162 : : *
163 : : * XXX the allocation sizes used below pre-date generation context's block
164 : : * growing code. These values should likely be benchmarked and set to
165 : : * more suitable values.
166 : : */
167 : 44 : vstate.page_set_context = GenerationContextCreate(CurrentMemoryContext,
168 : : "GiST VACUUM page set context",
169 : : 16 * 1024,
170 : : 16 * 1024,
171 : : 16 * 1024);
172 : 44 : oldctx = MemoryContextSwitchTo(vstate.page_set_context);
173 : 44 : vstate.internal_page_set = intset_create();
174 : 44 : vstate.empty_leaf_set = intset_create();
2151 175 : 44 : MemoryContextSwitchTo(oldctx);
176 : :
177 : : /* Set up info to pass down to gistvacuumpage */
2360 heikki.linnakangas@i 178 : 44 : vstate.info = info;
2377 179 : 44 : vstate.stats = stats;
180 : 44 : vstate.callback = callback;
181 : 44 : vstate.callback_state = callback_state;
182 [ + - + + : 44 : if (RelationNeedsWAL(rel))
+ - + - ]
183 : 44 : vstate.startNSN = GetInsertRecPtr();
184 : : else
2377 heikki.linnakangas@i 185 :UBC 0 : vstate.startNSN = gistGetFakeLSN(rel);
186 : :
187 : : /*
188 : : * The outer loop iterates over all index pages, in physical order (we
189 : : * hope the kernel will cooperate in providing read-ahead for speed). It
190 : : * is critical that we visit all leaf pages, including ones added after we
191 : : * start the scan, else we might fail to delete some deletable tuples.
192 : : * Hence, we must repeatedly check the relation length. We must acquire
193 : : * the relation-extension lock while doing so to avoid a race condition:
194 : : * if someone else is extending the relation, there is a window where
195 : : * bufmgr/smgr have created a new all-zero page but it hasn't yet been
196 : : * write-locked by gistNewBuffer(). If we manage to scan such a page
197 : : * here, we'll improperly assume it can be recycled. Taking the lock
198 : : * synchronizes things enough to prevent a problem: either num_pages won't
199 : : * include the new page, or gistNewBuffer already has write lock on the
200 : : * buffer and it will be fully initialized before we can examine it. (See
201 : : * also vacuumlazy.c, which has the same issue.) Also, we need not worry
202 : : * if a page is added immediately after we look; the page splitting code
203 : : * already has write-lock on the left page before it adds a right page, so
204 : : * we must already have processed any tuples due to be moved into such a
205 : : * page.
206 : : *
207 : : * We can skip locking for new or temp relations, however, since no one
208 : : * else could be accessing them.
209 : : */
2377 heikki.linnakangas@i 210 [ + - + - ]:CBC 44 : needLock = !RELATION_IS_LOCAL(rel);
211 : :
169 melanieplageman@gmai 212 : 44 : p.current_blocknum = GIST_ROOT_BLKNO;
213 : :
214 : : /*
215 : : * It is safe to use batchmode as block_range_read_stream_cb takes no
216 : : * locks.
217 : : */
131 218 : 44 : stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE |
219 : : READ_STREAM_FULL |
220 : : READ_STREAM_USE_BATCHING,
221 : : info->strategy,
222 : : rel,
223 : : MAIN_FORKNUM,
224 : : block_range_read_stream_cb,
225 : : &p,
226 : : 0);
227 : : for (;;)
228 : : {
229 : : /* Get the current relation length */
2377 heikki.linnakangas@i 230 [ + - ]: 88 : if (needLock)
231 : 88 : LockRelationForExtension(rel, ExclusiveLock);
232 : 88 : num_pages = RelationGetNumberOfBlocks(rel);
233 [ + - ]: 88 : if (needLock)
234 : 88 : UnlockRelationForExtension(rel, ExclusiveLock);
235 : :
236 : : /* Quit if we've scanned the whole relation */
169 melanieplageman@gmai 237 [ + + ]: 88 : if (p.current_blocknum >= num_pages)
2377 heikki.linnakangas@i 238 : 44 : break;
239 : :
169 melanieplageman@gmai 240 : 44 : p.last_exclusive = num_pages;
241 : :
242 : : /* Iterate over pages, then loop back to recheck relation length */
243 : : while (true)
244 : 1489 : {
245 : : Buffer buf;
246 : :
247 : : /* call vacuum_delay_point while not holding any buffer lock */
248 : 1533 : vacuum_delay_point(false);
249 : :
250 : 1533 : buf = read_stream_next_buffer(stream, NULL);
251 : :
252 [ + + ]: 1533 : if (!BufferIsValid(buf))
253 : 44 : break;
254 : :
255 : 1489 : gistvacuumpage(&vstate, buf);
256 : : }
257 : :
258 : : /*
259 : : * We have to reset the read stream to use it again. After returning
260 : : * InvalidBuffer, the read stream API won't invoke our callback again
261 : : * until the stream has been reset.
262 : : */
263 : 44 : read_stream_reset(stream);
264 : : }
265 : :
266 : 44 : read_stream_end(stream);
267 : :
268 : : /*
269 : : * If we found any recyclable pages (and recorded them in the FSM), then
270 : : * forcibly update the upper-level FSM pages to ensure that searchers can
271 : : * find them. It's possible that the pages were also found during
272 : : * previous scans and so this is a waste of time, but it's cheap enough
273 : : * relative to scanning the index that it shouldn't matter much, and
274 : : * making sure that free pages are available sooner not later seems
275 : : * worthwhile.
276 : : *
277 : : * Note that if no recyclable pages exist, we don't bother vacuuming the
278 : : * FSM at all.
279 : : */
2063 akapila@postgresql.o 280 [ - + ]: 44 : if (stats->pages_free > 0)
2377 heikki.linnakangas@i 281 :UBC 0 : IndexFreeSpaceMapVacuum(rel);
282 : :
283 : : /* update statistics */
2063 akapila@postgresql.o 284 :CBC 44 : stats->num_pages = num_pages;
285 : :
286 : : /*
287 : : * If we saw any empty pages, try to unlink them from the tree so that
288 : : * they can be reused.
289 : : */
290 : 44 : gistvacuum_delete_empty_pages(info, &vstate);
291 : :
292 : : /* we don't need the internal and empty page sets anymore */
293 : 44 : MemoryContextDelete(vstate.page_set_context);
294 : 44 : vstate.page_set_context = NULL;
295 : 44 : vstate.internal_page_set = NULL;
296 : 44 : vstate.empty_leaf_set = NULL;
7289 bruce@momjian.us 297 : 44 : }
298 : :
299 : : /*
300 : : * gistvacuumpage --- VACUUM one page
301 : : *
302 : : * This processes a single page for gistbulkdelete(). `buffer` contains the
303 : : * page to process. In some cases we must go back and reexamine
304 : : * previously-scanned pages; this routine recurses when necessary to handle
305 : : * that case.
306 : : */
307 : : static void
169 melanieplageman@gmai 308 : 1489 : gistvacuumpage(GistVacState *vstate, Buffer buffer)
309 : : {
2360 heikki.linnakangas@i 310 : 1489 : IndexVacuumInfo *info = vstate->info;
2377 311 : 1489 : IndexBulkDeleteCallback callback = vstate->callback;
312 : 1489 : void *callback_state = vstate->callback_state;
7067 tgl@sss.pgh.pa.us 313 : 1489 : Relation rel = info->index;
169 melanieplageman@gmai 314 : 1489 : BlockNumber orig_blkno = BufferGetBlockNumber(buffer);
315 : : Page page;
316 : : BlockNumber recurse_to;
317 : :
318 : : /*
319 : : * orig_blkno is the highest block number reached by the outer
320 : : * gistvacuumscan() loop. This will be the same as blkno unless we are
321 : : * recursing to reexamine a previous page.
322 : : */
323 : 1489 : BlockNumber blkno = orig_blkno;
324 : :
2377 heikki.linnakangas@i 325 : 1489 : restart:
326 : 1489 : recurse_to = InvalidBlockNumber;
327 : :
328 : : /*
329 : : * We are not going to stay here for a long time, aggressively grab an
330 : : * exclusive lock.
331 : : */
332 : 1489 : LockBuffer(buffer, GIST_EXCLUSIVE);
8 peter@eisentraut.org 333 :GNC 1489 : page = BufferGetPage(buffer);
334 : :
2360 heikki.linnakangas@i 335 [ - + ]:CBC 1489 : if (gistPageRecyclable(page))
336 : : {
337 : : /* Okay to recycle this page */
2377 heikki.linnakangas@i 338 :UBC 0 : RecordFreeIndexPage(rel, blkno);
2063 akapila@postgresql.o 339 : 0 : vstate->stats->pages_deleted++;
1654 pg@bowt.ie 340 : 0 : vstate->stats->pages_free++;
341 : : }
2360 heikki.linnakangas@i 342 [ - + ]:CBC 1489 : else if (GistPageIsDeleted(page))
343 : : {
344 : : /* Already deleted, but can't recycle yet */
2063 akapila@postgresql.o 345 :UBC 0 : vstate->stats->pages_deleted++;
346 : : }
2377 heikki.linnakangas@i 347 [ + + ]:CBC 1489 : else if (GistPageIsLeaf(page))
348 : : {
349 : : OffsetNumber todelete[MaxOffsetNumber];
350 : 1461 : int ntodelete = 0;
351 : : int nremain;
352 : 1461 : GISTPageOpaque opaque = GistPageGetOpaque(page);
353 : 1461 : OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
354 : :
355 : : /*
356 : : * Check whether we need to recurse back to earlier pages. What we
357 : : * are concerned about is a page split that happened since we started
358 : : * the vacuum scan. If the split moved some tuples to a lower page
359 : : * then we might have missed 'em. If so, set up for tail recursion.
360 : : *
361 : : * This is similar to the checks we do during searches, when following
362 : : * a downlink, but we don't need to jump to higher-numbered pages,
363 : : * because we will process them later, anyway.
364 : : */
365 [ + - - + ]: 2922 : if ((GistFollowRight(page) ||
366 : 1461 : vstate->startNSN < GistPageGetNSN(page)) &&
2377 heikki.linnakangas@i 367 [ # # ]:UBC 0 : (opaque->rightlink != InvalidBlockNumber) &&
368 [ # # ]: 0 : (opaque->rightlink < orig_blkno))
369 : : {
370 : 0 : recurse_to = opaque->rightlink;
371 : : }
372 : :
373 : : /*
374 : : * Scan over all items to see which ones need to be deleted according
375 : : * to the callback function.
376 : : */
2377 heikki.linnakangas@i 377 [ + + ]:CBC 1461 : if (callback)
378 : : {
379 : : OffsetNumber off;
380 : :
381 : 570 : for (off = FirstOffsetNumber;
382 [ + + ]: 69540 : off <= maxoff;
383 : 68970 : off = OffsetNumberNext(off))
384 : : {
385 : 68970 : ItemId iid = PageGetItemId(page, off);
386 : 68970 : IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
387 : :
7289 bruce@momjian.us 388 [ + + ]: 68970 : if (callback(&(idxtuple->t_tid), callback_state))
2377 heikki.linnakangas@i 389 : 57450 : todelete[ntodelete++] = off;
390 : : }
391 : : }
392 : :
393 : : /*
394 : : * Apply any needed deletes. We issue just one WAL record per page,
395 : : * so as to minimize WAL traffic.
396 : : */
397 [ + + ]: 1461 : if (ntodelete > 0)
398 : : {
399 : 561 : START_CRIT_SECTION();
400 : :
401 : 561 : MarkBufferDirty(buffer);
402 : :
403 : 561 : PageIndexMultiDelete(page, todelete, ntodelete);
404 : 561 : GistMarkTuplesDeleted(page);
405 : :
406 [ + - + + : 561 : if (RelationNeedsWAL(rel))
+ - + - ]
407 : 561 : {
408 : : XLogRecPtr recptr;
409 : :
410 : 561 : recptr = gistXLogUpdate(buffer,
411 : : todelete, ntodelete,
412 : : NULL, 0, InvalidBuffer);
413 : 561 : PageSetLSN(page, recptr);
414 : : }
415 : : else
2377 heikki.linnakangas@i 416 :UBC 0 : PageSetLSN(page, gistGetFakeLSN(rel));
417 : :
2377 heikki.linnakangas@i 418 [ - + ]:CBC 561 : END_CRIT_SECTION();
419 : :
2063 akapila@postgresql.o 420 : 561 : vstate->stats->tuples_removed += ntodelete;
421 : : /* must recompute maxoff */
7376 teodor@sigaev.ru 422 : 561 : maxoff = PageGetMaxOffsetNumber(page);
423 : : }
424 : :
2360 heikki.linnakangas@i 425 : 1461 : nremain = maxoff - FirstOffsetNumber + 1;
426 [ + + ]: 1461 : if (nremain == 0)
427 : : {
428 : : /*
429 : : * The page is now completely empty. Remember its block number,
430 : : * so that we will try to delete the page in the second stage.
431 : : *
432 : : * Skip this when recursing, because IntegerSet requires that the
433 : : * values are added in ascending order. The next VACUUM will pick
434 : : * it up.
435 : : */
436 [ + - ]: 249 : if (blkno == orig_blkno)
2063 akapila@postgresql.o 437 : 249 : intset_add_member(vstate->empty_leaf_set, blkno);
438 : : }
439 : : else
440 : 1212 : vstate->stats->num_index_tuples += nremain;
441 : : }
442 : : else
443 : : {
444 : : /*
445 : : * On an internal page, check for "invalid tuples", left behind by an
446 : : * incomplete page split on PostgreSQL 9.0 or below. These are not
447 : : * created by newer PostgreSQL versions, but unfortunately, there is
448 : : * no version number anywhere in a GiST index, so we don't know
449 : : * whether this index might still contain invalid tuples or not.
450 : : */
2377 heikki.linnakangas@i 451 : 28 : OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
452 : : OffsetNumber off;
453 : :
454 : 28 : for (off = FirstOffsetNumber;
455 [ + + ]: 1473 : off <= maxoff;
456 : 1445 : off = OffsetNumberNext(off))
457 : : {
458 : 1445 : ItemId iid = PageGetItemId(page, off);
459 : 1445 : IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
460 : :
461 [ - + ]: 1445 : if (GistTupleIsInvalid(idxtuple))
2377 heikki.linnakangas@i 462 [ # # ]:UBC 0 : ereport(LOG,
463 : : (errmsg("index \"%s\" contains an inner tuple marked as invalid",
464 : : RelationGetRelationName(rel)),
465 : : errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
466 : : errhint("Please REINDEX it.")));
467 : : }
468 : :
469 : : /*
470 : : * Remember the block number of this page, so that we can revisit it
471 : : * later in gistvacuum_delete_empty_pages(), when we search for
472 : : * parents of empty leaf pages.
473 : : */
2360 heikki.linnakangas@i 474 [ + - ]:CBC 28 : if (blkno == orig_blkno)
2063 akapila@postgresql.o 475 : 28 : intset_add_member(vstate->internal_page_set, blkno);
476 : : }
477 : :
2377 heikki.linnakangas@i 478 : 1489 : UnlockReleaseBuffer(buffer);
479 : :
480 : : /*
481 : : * This is really tail recursion, but if the compiler is too stupid to
482 : : * optimize it as such, we'd eat an uncomfortably large amount of stack
483 : : * space per recursion level (due to the deletable[] array). A failure is
484 : : * improbable since the number of levels isn't likely to be large ... but
485 : : * just in case, let's hand-optimize into a loop.
486 : : */
487 [ - + ]: 1489 : if (recurse_to != InvalidBlockNumber)
488 : : {
2377 heikki.linnakangas@i 489 :UBC 0 : blkno = recurse_to;
490 : :
491 : : /* check for vacuum delay while not holding any buffer lock */
169 melanieplageman@gmai 492 : 0 : vacuum_delay_point(false);
493 : :
494 : 0 : buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
495 : : info->strategy);
2377 heikki.linnakangas@i 496 : 0 : goto restart;
497 : : }
7383 teodor@sigaev.ru 498 :CBC 1489 : }
499 : :
500 : : /*
501 : : * Scan all internal pages, and try to delete their empty child pages.
502 : : */
503 : : static void
2063 akapila@postgresql.o 504 : 44 : gistvacuum_delete_empty_pages(IndexVacuumInfo *info, GistVacState *vstate)
505 : : {
2360 heikki.linnakangas@i 506 : 44 : Relation rel = info->index;
507 : : BlockNumber empty_pages_remaining;
508 : : uint64 blkno;
509 : :
510 : : /*
511 : : * Rescan all inner pages to find those that have empty child pages.
512 : : */
2063 akapila@postgresql.o 513 : 44 : empty_pages_remaining = intset_num_entries(vstate->empty_leaf_set);
514 : 44 : intset_begin_iterate(vstate->internal_page_set);
2360 heikki.linnakangas@i 515 [ + + + + ]: 56 : while (empty_pages_remaining > 0 &&
2063 akapila@postgresql.o 516 : 9 : intset_iterate_next(vstate->internal_page_set, &blkno))
517 : : {
518 : : Buffer buffer;
519 : : Page page;
520 : : OffsetNumber off,
521 : : maxoff;
522 : : OffsetNumber todelete[MaxOffsetNumber];
523 : : BlockNumber leafs_to_delete[MaxOffsetNumber];
524 : : int ntodelete;
525 : : int deleted;
526 : :
2360 heikki.linnakangas@i 527 : 3 : buffer = ReadBufferExtended(rel, MAIN_FORKNUM, (BlockNumber) blkno,
528 : : RBM_NORMAL, info->strategy);
529 : :
530 : 3 : LockBuffer(buffer, GIST_SHARE);
8 peter@eisentraut.org 531 :GNC 3 : page = BufferGetPage(buffer);
532 : :
2360 heikki.linnakangas@i 533 [ + - + - :CBC 3 : if (PageIsNew(page) || GistPageIsDeleted(page) || GistPageIsLeaf(page))
- + ]
534 : : {
535 : : /*
536 : : * This page was an internal page earlier, but now it's something
537 : : * else. Shouldn't happen...
538 : : */
2360 heikki.linnakangas@i 539 :UBC 0 : Assert(false);
540 : : UnlockReleaseBuffer(buffer);
541 : : continue;
542 : : }
543 : :
544 : : /*
545 : : * Scan all the downlinks, and see if any of them point to empty leaf
546 : : * pages.
547 : : */
2360 heikki.linnakangas@i 548 :CBC 3 : maxoff = PageGetMaxOffsetNumber(page);
549 : 3 : ntodelete = 0;
550 : 3 : for (off = FirstOffsetNumber;
551 [ + + + - ]: 492 : off <= maxoff && ntodelete < maxoff - 1;
552 : 489 : off = OffsetNumberNext(off))
553 : : {
554 : 489 : ItemId iid = PageGetItemId(page, off);
555 : 489 : IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
556 : : BlockNumber leafblk;
557 : :
558 : 489 : leafblk = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
2063 akapila@postgresql.o 559 [ + + ]: 489 : if (intset_is_member(vstate->empty_leaf_set, leafblk))
560 : : {
2360 heikki.linnakangas@i 561 : 243 : leafs_to_delete[ntodelete] = leafblk;
562 : 243 : todelete[ntodelete++] = off;
563 : : }
564 : : }
565 : :
566 : : /*
567 : : * In order to avoid deadlock, child page must be locked before
568 : : * parent, so we must release the lock on the parent, lock the child,
569 : : * and then re-acquire the lock the parent. (And we wouldn't want to
570 : : * do I/O, while holding a lock, anyway.)
571 : : *
572 : : * At the instant that we're not holding a lock on the parent, the
573 : : * downlink might get moved by a concurrent insert, so we must
574 : : * re-check that it still points to the same child page after we have
575 : : * acquired both locks. Also, another backend might have inserted a
576 : : * tuple to the page, so that it is no longer empty. gistdeletepage()
577 : : * re-checks all these conditions.
578 : : */
579 : 3 : LockBuffer(buffer, GIST_UNLOCK);
580 : :
581 : 3 : deleted = 0;
582 [ + + ]: 246 : for (int i = 0; i < ntodelete; i++)
583 : : {
584 : : Buffer leafbuf;
585 : :
586 : : /*
587 : : * Don't remove the last downlink from the parent. That would
588 : : * confuse the insertion code.
589 : : */
590 [ - + ]: 243 : if (PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
2360 heikki.linnakangas@i 591 :UBC 0 : break;
592 : :
2360 heikki.linnakangas@i 593 :CBC 243 : leafbuf = ReadBufferExtended(rel, MAIN_FORKNUM, leafs_to_delete[i],
594 : : RBM_NORMAL, info->strategy);
595 : 243 : LockBuffer(leafbuf, GIST_EXCLUSIVE);
596 : 243 : gistcheckpage(rel, leafbuf);
597 : :
598 : 243 : LockBuffer(buffer, GIST_EXCLUSIVE);
2063 akapila@postgresql.o 599 [ + - ]: 243 : if (gistdeletepage(info, vstate->stats,
2360 heikki.linnakangas@i 600 : 243 : buffer, todelete[i] - deleted,
601 : : leafbuf))
602 : 243 : deleted++;
603 : 243 : LockBuffer(buffer, GIST_UNLOCK);
604 : :
605 : 243 : UnlockReleaseBuffer(leafbuf);
606 : : }
607 : :
608 : 3 : ReleaseBuffer(buffer);
609 : :
610 : : /*
611 : : * We can stop the scan as soon as we have seen the downlinks, even if
612 : : * we were not able to remove them all.
613 : : */
614 : 3 : empty_pages_remaining -= ntodelete;
615 : : }
616 : 44 : }
617 : :
618 : : /*
619 : : * gistdeletepage takes a leaf page, and its parent, and tries to delete the
620 : : * leaf. Both pages must be locked.
621 : : *
622 : : * Even if the page was empty when we first saw it, a concurrent inserter might
623 : : * have added a tuple to it since. Similarly, the downlink might have moved.
624 : : * We re-check all the conditions, to make sure the page is still deletable,
625 : : * before modifying anything.
626 : : *
627 : : * Returns true, if the page was deleted, and false if a concurrent update
628 : : * prevented it.
629 : : */
630 : : static bool
2063 akapila@postgresql.o 631 : 243 : gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
632 : : Buffer parentBuffer, OffsetNumber downlink,
633 : : Buffer leafBuffer)
634 : : {
2360 heikki.linnakangas@i 635 : 243 : Page parentPage = BufferGetPage(parentBuffer);
636 : 243 : Page leafPage = BufferGetPage(leafBuffer);
637 : : ItemId iid;
638 : : IndexTuple idxtuple;
639 : : XLogRecPtr recptr;
640 : : FullTransactionId txid;
641 : :
642 : : /*
643 : : * Check that the leaf is still empty and deletable.
644 : : */
645 [ - + ]: 243 : if (!GistPageIsLeaf(leafPage))
646 : : {
647 : : /* a leaf page should never become a non-leaf page */
2360 heikki.linnakangas@i 648 :UBC 0 : Assert(false);
649 : : return false;
650 : : }
651 : :
2360 heikki.linnakangas@i 652 [ - + ]:CBC 243 : if (GistFollowRight(leafPage))
2360 heikki.linnakangas@i 653 :UBC 0 : return false; /* don't mess with a concurrent page split */
654 : :
2360 heikki.linnakangas@i 655 [ - + ]:CBC 243 : if (PageGetMaxOffsetNumber(leafPage) != InvalidOffsetNumber)
2360 heikki.linnakangas@i 656 :UBC 0 : return false; /* not empty anymore */
657 : :
658 : : /*
659 : : * Ok, the leaf is deletable. Is the downlink in the parent page still
660 : : * valid? It might have been moved by a concurrent insert. We could try
661 : : * to re-find it by scanning the page again, possibly moving right if the
662 : : * was split. But for now, let's keep it simple and just give up. The
663 : : * next VACUUM will pick it up.
664 : : */
2360 heikki.linnakangas@i 665 [ + - + - ]:CBC 243 : if (PageIsNew(parentPage) || GistPageIsDeleted(parentPage) ||
666 [ - + ]: 243 : GistPageIsLeaf(parentPage))
667 : : {
668 : : /* shouldn't happen, internal pages are never deleted */
2360 heikki.linnakangas@i 669 :UBC 0 : Assert(false);
670 : : return false;
671 : : }
672 : :
2360 heikki.linnakangas@i 673 [ + - ]:CBC 243 : if (PageGetMaxOffsetNumber(parentPage) < downlink
674 [ - + ]: 243 : || PageGetMaxOffsetNumber(parentPage) <= FirstOffsetNumber)
2360 heikki.linnakangas@i 675 :UBC 0 : return false;
676 : :
2360 heikki.linnakangas@i 677 :CBC 243 : iid = PageGetItemId(parentPage, downlink);
678 : 243 : idxtuple = (IndexTuple) PageGetItem(parentPage, iid);
679 [ - + ]: 486 : if (BufferGetBlockNumber(leafBuffer) !=
680 : 243 : ItemPointerGetBlockNumber(&(idxtuple->t_tid)))
2360 heikki.linnakangas@i 681 :UBC 0 : return false;
682 : :
683 : : /*
684 : : * All good, proceed with the deletion.
685 : : *
686 : : * The page cannot be immediately recycled, because in-progress scans that
687 : : * saw the downlink might still visit it. Mark the page with the current
688 : : * next-XID counter, so that we know when it can be recycled. Once that
689 : : * XID becomes older than GlobalXmin, we know that all scans that are
690 : : * currently in progress must have ended. (That's much more conservative
691 : : * than needed, but let's keep it safe and simple.)
692 : : */
2236 heikki.linnakangas@i 693 :CBC 243 : txid = ReadNextFullTransactionId();
694 : :
2360 695 : 243 : START_CRIT_SECTION();
696 : :
697 : : /* mark the page as deleted */
698 : 243 : MarkBufferDirty(leafBuffer);
2236 699 : 243 : GistPageSetDeleted(leafPage, txid);
1654 pg@bowt.ie 700 : 243 : stats->pages_newly_deleted++;
2063 akapila@postgresql.o 701 : 243 : stats->pages_deleted++;
702 : :
703 : : /* remove the downlink from the parent */
2360 heikki.linnakangas@i 704 : 243 : MarkBufferDirty(parentBuffer);
705 : 243 : PageIndexTupleDelete(parentPage, downlink);
706 : :
707 [ + - + + : 243 : if (RelationNeedsWAL(info->index))
+ - + - ]
708 : 243 : recptr = gistXLogPageDelete(leafBuffer, txid, parentBuffer, downlink);
709 : : else
2360 heikki.linnakangas@i 710 :UBC 0 : recptr = gistGetFakeLSN(info->index);
2360 heikki.linnakangas@i 711 :CBC 243 : PageSetLSN(parentPage, recptr);
712 : 243 : PageSetLSN(leafPage, recptr);
713 : :
714 [ - + ]: 243 : END_CRIT_SECTION();
715 : :
716 : 243 : return true;
717 : : }
|