Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * gistbuild.c
4 : : * build algorithm for GiST indexes implementation.
5 : : *
6 : : * There are two different strategies:
7 : : *
8 : : * 1. Sort all input tuples, pack them into GiST leaf pages in the sorted
9 : : * order, and create downlinks and internal pages as we go. This builds
10 : : * the index from the bottom up, similar to how B-tree index build
11 : : * works.
12 : : *
13 : : * 2. Start with an empty index, and insert all tuples one by one.
14 : : *
15 : : * The sorted method is used if the operator classes for all columns have
16 : : * a 'sortsupport' defined. Otherwise, we resort to the second strategy.
17 : : *
18 : : * The second strategy can optionally use buffers at different levels of
19 : : * the tree to reduce I/O, see "Buffering build algorithm" in the README
20 : : * for a more detailed explanation. It initially calls insert over and
21 : : * over, but switches to the buffered algorithm after a certain number of
22 : : * tuples (unless buffering mode is disabled).
23 : : *
24 : : *
25 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
26 : : * Portions Copyright (c) 1994, Regents of the University of California
27 : : *
28 : : * IDENTIFICATION
29 : : * src/backend/access/gist/gistbuild.c
30 : : *
31 : : *-------------------------------------------------------------------------
32 : : */
33 : : #include "postgres.h"
34 : :
35 : : #include <math.h>
36 : :
37 : : #include "access/genam.h"
38 : : #include "access/gist_private.h"
39 : : #include "access/tableam.h"
40 : : #include "access/xloginsert.h"
41 : : #include "miscadmin.h"
42 : : #include "nodes/execnodes.h"
43 : : #include "optimizer/optimizer.h"
44 : : #include "storage/bufmgr.h"
45 : : #include "storage/bulk_write.h"
46 : :
47 : : #include "utils/memutils.h"
48 : : #include "utils/rel.h"
49 : : #include "utils/tuplesort.h"
50 : :
51 : : /* Step of index tuples for check whether to switch to buffering build mode */
52 : : #define BUFFERING_MODE_SWITCH_CHECK_STEP 256
53 : :
54 : : /*
55 : : * Number of tuples to process in the slow way before switching to buffering
56 : : * mode, when buffering is explicitly turned on. Also, the number of tuples
57 : : * to process between readjusting the buffer size parameter, while in
58 : : * buffering mode.
59 : : */
60 : : #define BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET 4096
61 : :
62 : : /*
63 : : * Strategy used to build the index. It can change between the
64 : : * GIST_BUFFERING_* modes on the fly, but if the Sorted method is used,
65 : : * that needs to be decided up-front and cannot be changed afterwards.
66 : : */
67 : : typedef enum
68 : : {
69 : : GIST_SORTED_BUILD, /* bottom-up build by sorting */
70 : : GIST_BUFFERING_DISABLED, /* in regular build mode and aren't going to
71 : : * switch */
72 : : GIST_BUFFERING_AUTO, /* in regular build mode, but will switch to
73 : : * buffering build mode if the index grows too
74 : : * big */
75 : : GIST_BUFFERING_STATS, /* gathering statistics of index tuple size
76 : : * before switching to the buffering build
77 : : * mode */
78 : : GIST_BUFFERING_ACTIVE, /* in buffering build mode */
79 : : } GistBuildMode;
80 : :
81 : : /* Working state for gistbuild and its callback */
82 : : typedef struct
83 : : {
84 : : Relation indexrel;
85 : : Relation heaprel;
86 : : GISTSTATE *giststate;
87 : :
88 : : Size freespace; /* amount of free space to leave on pages */
89 : :
90 : : GistBuildMode buildMode;
91 : :
92 : : int64 indtuples; /* number of tuples indexed */
93 : :
94 : : /*
95 : : * Extra data structures used during a buffering build. 'gfbb' contains
96 : : * information related to managing the build buffers. 'parentMap' is a
97 : : * lookup table of the parent of each internal page.
98 : : */
99 : : int64 indtuplesSize; /* total size of all indexed tuples */
100 : : GISTBuildBuffers *gfbb;
101 : : HTAB *parentMap;
102 : :
103 : : /*
104 : : * Extra data structures used during a sorting build.
105 : : */
106 : : Tuplesortstate *sortstate; /* state data for tuplesort.c */
107 : :
108 : : BlockNumber pages_allocated;
109 : :
110 : : BulkWriteState *bulkstate;
111 : : } GISTBuildState;
112 : :
113 : : #define GIST_SORTED_BUILD_PAGE_NUM 4
114 : :
115 : : /*
116 : : * In sorted build, we use a stack of these structs, one for each level,
117 : : * to hold an in-memory buffer of last pages at the level.
118 : : *
119 : : * Sorting GiST build requires good linearization of the sort opclass. This is
120 : : * not always the case in multidimensional data. To tackle the anomalies, we
121 : : * buffer index tuples and apply picksplit that can be multidimension-aware.
122 : : */
123 : : typedef struct GistSortedBuildLevelState
124 : : {
125 : : int current_page;
126 : : BlockNumber last_blkno;
127 : : struct GistSortedBuildLevelState *parent; /* Upper level, if any */
128 : : Page pages[GIST_SORTED_BUILD_PAGE_NUM];
129 : : } GistSortedBuildLevelState;
130 : :
131 : : /* prototypes for private functions */
132 : :
133 : : static void gistSortedBuildCallback(Relation index, ItemPointer tid,
134 : : Datum *values, bool *isnull,
135 : : bool tupleIsAlive, void *state);
136 : : static void gist_indexsortbuild(GISTBuildState *state);
137 : : static void gist_indexsortbuild_levelstate_add(GISTBuildState *state,
138 : : GistSortedBuildLevelState *levelstate,
139 : : IndexTuple itup);
140 : : static void gist_indexsortbuild_levelstate_flush(GISTBuildState *state,
141 : : GistSortedBuildLevelState *levelstate);
142 : :
143 : : static void gistInitBuffering(GISTBuildState *buildstate);
144 : : static int calculatePagesPerBuffer(GISTBuildState *buildstate, int levelStep);
145 : : static void gistBuildCallback(Relation index,
146 : : ItemPointer tid,
147 : : Datum *values,
148 : : bool *isnull,
149 : : bool tupleIsAlive,
150 : : void *state);
151 : : static void gistBufferingBuildInsert(GISTBuildState *buildstate,
152 : : IndexTuple itup);
153 : : static bool gistProcessItup(GISTBuildState *buildstate, IndexTuple itup,
154 : : BlockNumber startblkno, int startlevel);
155 : : static BlockNumber gistbufferinginserttuples(GISTBuildState *buildstate,
156 : : Buffer buffer, int level,
157 : : IndexTuple *itup, int ntup, OffsetNumber oldoffnum,
158 : : BlockNumber parentblk, OffsetNumber downlinkoffnum);
159 : : static Buffer gistBufferingFindCorrectParent(GISTBuildState *buildstate,
160 : : BlockNumber childblkno, int level,
161 : : BlockNumber *parentblkno,
162 : : OffsetNumber *downlinkoffnum);
163 : : static void gistProcessEmptyingQueue(GISTBuildState *buildstate);
164 : : static void gistEmptyAllBuffers(GISTBuildState *buildstate);
165 : : static int gistGetMaxLevel(Relation index);
166 : :
167 : : static void gistInitParentMap(GISTBuildState *buildstate);
168 : : static void gistMemorizeParent(GISTBuildState *buildstate, BlockNumber child,
169 : : BlockNumber parent);
170 : : static void gistMemorizeAllDownlinks(GISTBuildState *buildstate,
171 : : Buffer parentbuf);
172 : : static BlockNumber gistGetParent(GISTBuildState *buildstate, BlockNumber child);
173 : :
174 : :
175 : : /*
176 : : * Main entry point to GiST index build.
177 : : */
178 : : IndexBuildResult *
3520 tgl@sss.pgh.pa.us 179 :CBC 741 : gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
180 : : {
181 : : IndexBuildResult *result;
182 : : double reltuples;
183 : : GISTBuildState buildstate;
5112 heikki.linnakangas@i 184 : 741 : MemoryContext oldcxt = CurrentMemoryContext;
185 : : int fillfactor;
186 : : Oid SortSupportFnOids[INDEX_MAX_KEYS];
1790 tgl@sss.pgh.pa.us 187 : 741 : GiSTOptions *options = (GiSTOptions *) index->rd_options;
188 : :
189 : : /*
190 : : * We expect to be called exactly once for any index relation. If that's
191 : : * not the case, big trouble's what we have.
192 : : */
1815 heikki.linnakangas@i 193 [ - + ]: 741 : if (RelationGetNumberOfBlocks(index) != 0)
1815 heikki.linnakangas@i 194 [ # # ]:UBC 0 : elog(ERROR, "index \"%s\" already contains data",
195 : : RelationGetRelationName(index));
196 : :
5112 heikki.linnakangas@i 197 :CBC 741 : buildstate.indexrel = index;
2451 akorotkov@postgresql 198 : 741 : buildstate.heaprel = heap;
1815 heikki.linnakangas@i 199 : 741 : buildstate.sortstate = NULL;
200 : 741 : buildstate.giststate = initGISTstate(index);
201 : :
202 : : /*
203 : : * Create a temporary memory context that is reset once for each tuple
204 : : * processed. (Note: we don't bother to make this a child of the
205 : : * giststate's scanCxt, so we have to delete it separately at the end.)
206 : : */
207 : 741 : buildstate.giststate->tempCxt = createTempGistContext();
208 : :
209 : : /*
210 : : * Choose build strategy. First check whether the user specified to use
211 : : * buffering mode. (The use-case for that in the field is somewhat
212 : : * questionable perhaps, but it's important for testing purposes.)
213 : : */
1790 tgl@sss.pgh.pa.us 214 [ + + ]: 741 : if (options)
215 : : {
2173 alvherre@alvh.no-ip. 216 [ + + ]: 16 : if (options->buffering_mode == GIST_OPTION_BUFFERING_ON)
1815 heikki.linnakangas@i 217 : 6 : buildstate.buildMode = GIST_BUFFERING_STATS;
2173 alvherre@alvh.no-ip. 218 [ + + ]: 10 : else if (options->buffering_mode == GIST_OPTION_BUFFERING_OFF)
1815 heikki.linnakangas@i 219 : 3 : buildstate.buildMode = GIST_BUFFERING_DISABLED;
220 : : else /* must be "auto" */
221 : 7 : buildstate.buildMode = GIST_BUFFERING_AUTO;
222 : : }
223 : : else
224 : : {
225 : 725 : buildstate.buildMode = GIST_BUFFERING_AUTO;
226 : : }
227 : :
228 : : /*
229 : : * Unless buffering mode was forced, see if we can use sorting instead.
230 : : */
1790 tgl@sss.pgh.pa.us 231 [ + + ]: 741 : if (buildstate.buildMode != GIST_BUFFERING_STATS)
232 : : {
233 : 735 : bool hasallsortsupports = true;
234 : 735 : int keyscount = IndexRelationGetNumberOfKeyAttributes(index);
235 : :
236 [ + + ]: 1598 : for (int i = 0; i < keyscount; i++)
237 : : {
238 : 1204 : SortSupportFnOids[i] = index_getprocid(index, i + 1,
239 : : GIST_SORTSUPPORT_PROC);
240 [ + + ]: 1204 : if (!OidIsValid(SortSupportFnOids[i]))
241 : : {
242 : 341 : hasallsortsupports = false;
243 : 341 : break;
244 : : }
245 : : }
246 [ + + ]: 735 : if (hasallsortsupports)
247 : 394 : buildstate.buildMode = GIST_SORTED_BUILD;
248 : : }
249 : :
250 : : /*
251 : : * Calculate target amount of free space to leave on pages.
252 : : */
1815 heikki.linnakangas@i 253 [ + + ]: 741 : fillfactor = options ? options->fillfactor : GIST_DEFAULT_FILLFACTOR;
5112 254 : 741 : buildstate.freespace = BLCKSZ * (100 - fillfactor) / 100;
255 : :
256 : : /*
257 : : * Build the index using the chosen strategy.
258 : : */
1815 259 : 741 : buildstate.indtuples = 0;
260 : 741 : buildstate.indtuplesSize = 0;
261 : :
262 [ + + ]: 741 : if (buildstate.buildMode == GIST_SORTED_BUILD)
263 : : {
264 : : /*
265 : : * Sort all data, build the index from bottom up.
266 : : */
267 : 394 : buildstate.sortstate = tuplesort_begin_index_gist(heap,
268 : : index,
269 : : maintenance_work_mem,
270 : : NULL,
271 : : TUPLESORT_NONE);
272 : :
273 : : /* Scan the table, adding all tuples to the tuplesort */
274 : 394 : reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
275 : : gistSortedBuildCallback,
276 : : &buildstate, NULL);
277 : :
278 : : /*
279 : : * Perform the sort and build index pages.
280 : : */
281 : 394 : tuplesort_performsort(buildstate.sortstate);
282 : :
283 : 394 : gist_indexsortbuild(&buildstate);
284 : :
285 : 394 : tuplesort_end(buildstate.sortstate);
286 : : }
287 : : else
288 : : {
289 : : /*
290 : : * Initialize an empty index and insert all tuples, possibly using
291 : : * buffers on intermediate levels.
292 : : */
293 : : Buffer buffer;
294 : : Page page;
295 : :
296 : : /* initialize the root page */
889 andres@anarazel.de 297 : 347 : buffer = gistNewBuffer(index, heap);
1815 heikki.linnakangas@i 298 [ - + ]: 347 : Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
299 : 347 : page = BufferGetPage(buffer);
300 : :
301 : 347 : START_CRIT_SECTION();
302 : :
303 : 347 : GISTInitBuffer(buffer, F_LEAF);
304 : :
305 : 347 : MarkBufferDirty(buffer);
306 : 347 : PageSetLSN(page, GistBuildLSN);
307 : :
308 : 347 : UnlockReleaseBuffer(buffer);
309 : :
310 [ - + ]: 347 : END_CRIT_SECTION();
311 : :
312 : : /* Scan the table, inserting all the tuples to the index. */
313 : 347 : reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
314 : : gistBuildCallback,
315 : : &buildstate, NULL);
316 : :
317 : : /*
318 : : * If buffering was used, flush out all the tuples that are still in
319 : : * the buffers.
320 : : */
321 [ + + ]: 347 : if (buildstate.buildMode == GIST_BUFFERING_ACTIVE)
322 : : {
323 [ - + ]: 3 : elog(DEBUG1, "all tuples processed, emptying buffers");
324 : 3 : gistEmptyAllBuffers(&buildstate);
325 : 3 : gistFreeBuildBuffers(buildstate.gfbb);
326 : : }
327 : :
328 : : /*
329 : : * We didn't write WAL records as we built the index, so if
330 : : * WAL-logging is required, write all pages to the WAL now.
331 : : */
332 [ + + + + : 347 : if (RelationNeedsWAL(index))
+ + + + ]
333 : : {
334 : 205 : log_newpage_range(index, MAIN_FORKNUM,
335 : : 0, RelationGetNumberOfBlocks(index),
336 : : true);
337 : : }
338 : : }
339 : :
340 : : /* okay, all heap tuples are indexed */
341 : 741 : MemoryContextSwitchTo(oldcxt);
342 : 741 : MemoryContextDelete(buildstate.giststate->tempCxt);
343 : :
344 : 741 : freeGISTstate(buildstate.giststate);
345 : :
346 : : /*
347 : : * Return statistics
348 : : */
349 : 741 : result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
350 : :
351 : 741 : result->heap_tuples = reltuples;
352 : 741 : result->index_tuples = (double) buildstate.indtuples;
353 : :
354 : 741 : return result;
355 : : }
356 : :
357 : : /*-------------------------------------------------------------------------
358 : : * Routines for sorted build
359 : : *-------------------------------------------------------------------------
360 : : */
361 : :
362 : : /*
363 : : * Per-tuple callback for table_index_build_scan.
364 : : */
365 : : static void
366 : 135951 : gistSortedBuildCallback(Relation index,
367 : : ItemPointer tid,
368 : : Datum *values,
369 : : bool *isnull,
370 : : bool tupleIsAlive,
371 : : void *state)
372 : : {
373 : 135951 : GISTBuildState *buildstate = (GISTBuildState *) state;
374 : : MemoryContext oldCtx;
375 : : Datum compressed_values[INDEX_MAX_KEYS];
376 : :
377 : 135951 : oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);
378 : :
379 : : /* Form an index tuple and point it at the heap tuple */
380 : 135951 : gistCompressValues(buildstate->giststate, index,
381 : : values, isnull,
382 : : true, compressed_values);
383 : :
384 : 135951 : tuplesort_putindextuplevalues(buildstate->sortstate,
385 : : buildstate->indexrel,
386 : : tid,
387 : : compressed_values, isnull);
388 : :
389 : 135951 : MemoryContextSwitchTo(oldCtx);
390 : 135951 : MemoryContextReset(buildstate->giststate->tempCxt);
391 : :
392 : : /* Update tuple count. */
393 : 135951 : buildstate->indtuples += 1;
394 : 135951 : }
395 : :
396 : : /*
397 : : * Build GiST index from bottom up from pre-sorted tuples.
398 : : */
399 : : static void
400 : 394 : gist_indexsortbuild(GISTBuildState *state)
401 : : {
402 : : IndexTuple itup;
403 : : GistSortedBuildLevelState *levelstate;
404 : : BulkWriteBuffer rootbuf;
405 : :
406 : : /* Reserve block 0 for the root page */
561 407 : 394 : state->pages_allocated = 1;
408 : :
409 : 394 : state->bulkstate = smgr_bulk_start_rel(state->indexrel, MAIN_FORKNUM);
410 : :
411 : : /* Allocate a temporary buffer for the first leaf page batch. */
1307 akorotkov@postgresql 412 : 394 : levelstate = palloc0(sizeof(GistSortedBuildLevelState));
561 heikki.linnakangas@i 413 : 394 : levelstate->pages[0] = palloc(BLCKSZ);
1307 akorotkov@postgresql 414 : 394 : levelstate->parent = NULL;
561 heikki.linnakangas@i 415 : 394 : gistinitpage(levelstate->pages[0], F_LEAF);
416 : :
417 : : /*
418 : : * Fill index pages with tuples in the sorted order.
419 : : */
1815 420 [ + + ]: 136345 : while ((itup = tuplesort_getindextuple(state->sortstate, true)) != NULL)
421 : : {
1307 akorotkov@postgresql 422 : 135951 : gist_indexsortbuild_levelstate_add(state, levelstate, itup);
1815 heikki.linnakangas@i 423 : 135951 : MemoryContextReset(state->giststate->tempCxt);
424 : : }
425 : :
426 : : /*
427 : : * Write out the partially full non-root pages.
428 : : *
429 : : * Keep in mind that flush can build a new root. If number of pages is > 1
430 : : * then new root is required.
431 : : */
1307 akorotkov@postgresql 432 [ + + + + ]: 438 : while (levelstate->parent != NULL || levelstate->current_page != 0)
433 : : {
434 : : GistSortedBuildLevelState *parent;
435 : :
436 : 44 : gist_indexsortbuild_levelstate_flush(state, levelstate);
437 : 44 : parent = levelstate->parent;
438 [ + + ]: 220 : for (int i = 0; i < GIST_SORTED_BUILD_PAGE_NUM; i++)
439 [ + + ]: 176 : if (levelstate->pages[i])
440 : 149 : pfree(levelstate->pages[i]);
441 : 44 : pfree(levelstate);
442 : 44 : levelstate = parent;
443 : : }
444 : :
445 : : /* Write out the root */
446 : 394 : PageSetLSN(levelstate->pages[0], GistBuildLSN);
561 heikki.linnakangas@i 447 : 394 : rootbuf = smgr_bulk_get_buf(state->bulkstate);
448 : 394 : memcpy(rootbuf, levelstate->pages[0], BLCKSZ);
449 : 394 : smgr_bulk_write(state->bulkstate, GIST_ROOT_BLKNO, rootbuf, true);
450 : :
1307 akorotkov@postgresql 451 : 394 : pfree(levelstate);
452 : :
561 heikki.linnakangas@i 453 : 394 : smgr_bulk_finish(state->bulkstate);
1815 454 : 394 : }
455 : :
456 : : /*
457 : : * Add tuple to a page. If the pages are full, write them out and re-initialize
458 : : * a new page first.
459 : : */
460 : : static void
1307 akorotkov@postgresql 461 : 136861 : gist_indexsortbuild_levelstate_add(GISTBuildState *state,
462 : : GistSortedBuildLevelState *levelstate,
463 : : IndexTuple itup)
464 : : {
465 : : Size sizeNeeded;
466 : :
467 : : /* Check if tuple can be added to the current page */
468 : 136861 : sizeNeeded = IndexTupleSize(itup) + sizeof(ItemIdData); /* fillfactor ignored */
469 [ + + ]: 136861 : if (PageGetFreeSpace(levelstate->pages[levelstate->current_page]) < sizeNeeded)
470 : : {
471 : : Page newPage;
472 : 646 : Page old_page = levelstate->pages[levelstate->current_page];
473 : 646 : uint16 old_page_flags = GistPageGetOpaque(old_page)->flags;
474 : :
475 [ + + ]: 646 : if (levelstate->current_page + 1 == GIST_SORTED_BUILD_PAGE_NUM)
476 : : {
477 : 145 : gist_indexsortbuild_levelstate_flush(state, levelstate);
478 : : }
479 : : else
480 : 501 : levelstate->current_page++;
481 : :
482 [ + + ]: 646 : if (levelstate->pages[levelstate->current_page] == NULL)
561 heikki.linnakangas@i 483 : 105 : levelstate->pages[levelstate->current_page] = palloc0(BLCKSZ);
484 : :
1307 akorotkov@postgresql 485 : 646 : newPage = levelstate->pages[levelstate->current_page];
486 : 646 : gistinitpage(newPage, old_page_flags);
487 : : }
488 : :
489 : 136861 : gistfillbuffer(levelstate->pages[levelstate->current_page], &itup, 1, InvalidOffsetNumber);
1815 heikki.linnakangas@i 490 : 136861 : }
491 : :
492 : : static void
1307 akorotkov@postgresql 493 : 189 : gist_indexsortbuild_levelstate_flush(GISTBuildState *state,
494 : : GistSortedBuildLevelState *levelstate)
495 : : {
496 : : GistSortedBuildLevelState *parent;
497 : : BlockNumber blkno;
498 : : MemoryContext oldCtx;
499 : : IndexTuple union_tuple;
500 : : SplitPageLayout *dist;
501 : : IndexTuple *itvec;
502 : : int vect_len;
503 : 189 : bool isleaf = GistPageIsLeaf(levelstate->pages[0]);
504 : :
1815 heikki.linnakangas@i 505 [ - + ]: 189 : CHECK_FOR_INTERRUPTS();
506 : :
1307 akorotkov@postgresql 507 : 189 : oldCtx = MemoryContextSwitchTo(state->giststate->tempCxt);
508 : :
509 : : /* Get index tuples from first page */
510 : 189 : itvec = gistextractpage(levelstate->pages[0], &vect_len);
511 [ + + ]: 189 : if (levelstate->current_page > 0)
512 : : {
513 : : /* Append tuples from each page */
514 [ + + ]: 683 : for (int i = 1; i < levelstate->current_page + 1; i++)
515 : : {
516 : : int len_local;
517 : 501 : IndexTuple *itvec_local = gistextractpage(levelstate->pages[i], &len_local);
518 : :
519 : 501 : itvec = gistjoinvector(itvec, &vect_len, itvec_local, len_local);
520 : 501 : pfree(itvec_local);
521 : : }
522 : :
523 : : /* Apply picksplit to list of all collected tuples */
524 : 182 : dist = gistSplit(state->indexrel, levelstate->pages[0], itvec, vect_len, state->giststate);
525 : : }
526 : : else
527 : : {
528 : : /* Create split layout from single page */
613 rhaas@postgresql.org 529 : 7 : dist = (SplitPageLayout *) palloc0(sizeof(SplitPageLayout));
1307 akorotkov@postgresql 530 : 7 : union_tuple = gistunion(state->indexrel, itvec, vect_len,
531 : : state->giststate);
532 : 7 : dist->itup = union_tuple;
533 : 7 : dist->list = gistfillitupvec(itvec, vect_len, &(dist->lenlist));
534 : 7 : dist->block.num = vect_len;
535 : : }
536 : :
1815 heikki.linnakangas@i 537 : 189 : MemoryContextSwitchTo(oldCtx);
538 : :
539 : : /* Reset page counter */
1307 akorotkov@postgresql 540 : 189 : levelstate->current_page = 0;
541 : :
542 : : /* Create pages for all partitions in split result */
543 [ + + ]: 1099 : for (; dist != NULL; dist = dist->next)
544 : : {
545 : : char *data;
546 : : BulkWriteBuffer buf;
547 : : Page target;
548 : :
549 : : /* check once per page */
550 [ - + ]: 910 : CHECK_FOR_INTERRUPTS();
551 : :
552 : : /* Create page and copy data */
553 : 910 : data = (char *) (dist->list);
561 heikki.linnakangas@i 554 : 910 : buf = smgr_bulk_get_buf(state->bulkstate);
555 : 910 : target = (Page) buf;
1307 akorotkov@postgresql 556 : 910 : gistinitpage(target, isleaf ? F_LEAF : 0);
557 [ + + ]: 136650 : for (int i = 0; i < dist->block.num; i++)
558 : : {
559 : 135740 : IndexTuple thistup = (IndexTuple) data;
560 : :
561 [ - + ]: 135740 : if (PageAddItem(target, (Item) data, IndexTupleSize(thistup), i + FirstOffsetNumber, false, false) == InvalidOffsetNumber)
1307 akorotkov@postgresql 562 [ # # ]:UBC 0 : elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(state->indexrel));
563 : :
1307 akorotkov@postgresql 564 :CBC 135740 : data += IndexTupleSize(thistup);
565 : : }
566 : 910 : union_tuple = dist->itup;
567 : :
568 : : /*
569 : : * Set the right link to point to the previous page. This is just for
570 : : * debugging purposes: GiST only follows the right link if a page is
571 : : * split concurrently to a scan, and that cannot happen during index
572 : : * build.
573 : : *
574 : : * It's a bit counterintuitive that we set the right link on the new
575 : : * page to point to the previous page, not the other way around. But
576 : : * GiST pages are not ordered like B-tree pages are, so as long as the
577 : : * right-links form a chain through all the pages at the same level,
578 : : * the order doesn't matter.
579 : : */
580 [ + + ]: 910 : if (levelstate->last_blkno)
581 : 866 : GistPageGetOpaque(target)->rightlink = levelstate->last_blkno;
582 : :
583 : : /*
584 : : * The page is now complete. Assign a block number to it, and pass it
585 : : * to the bulk writer.
586 : : */
561 heikki.linnakangas@i 587 : 910 : blkno = state->pages_allocated++;
588 : 910 : PageSetLSN(target, GistBuildLSN);
589 : 910 : smgr_bulk_write(state->bulkstate, blkno, buf, true);
590 : 910 : ItemPointerSetBlockNumber(&(union_tuple->t_tid), blkno);
1307 akorotkov@postgresql 591 : 910 : levelstate->last_blkno = blkno;
592 : :
593 : : /*
594 : : * Insert the downlink to the parent page. If this was the root,
595 : : * create a new page as the parent, which becomes the new root.
596 : : */
597 : 910 : parent = levelstate->parent;
598 [ + + ]: 910 : if (parent == NULL)
599 : : {
600 : 44 : parent = palloc0(sizeof(GistSortedBuildLevelState));
561 heikki.linnakangas@i 601 : 44 : parent->pages[0] = palloc(BLCKSZ);
1307 akorotkov@postgresql 602 : 44 : parent->parent = NULL;
603 : 44 : gistinitpage(parent->pages[0], 0);
604 : :
605 : 44 : levelstate->parent = parent;
606 : : }
607 : 910 : gist_indexsortbuild_levelstate_add(state, parent, union_tuple);
608 : : }
1815 heikki.linnakangas@i 609 : 189 : }
610 : :
611 : :
612 : : /*-------------------------------------------------------------------------
613 : : * Routines for non-sorted build
614 : : *-------------------------------------------------------------------------
615 : : */
616 : :
617 : : /*
618 : : * Attempt to switch to buffering mode.
619 : : *
620 : : * If there is not enough memory for buffering build, sets bufferingMode
621 : : * to GIST_BUFFERING_DISABLED, so that we don't bother to try the switch
622 : : * anymore. Otherwise initializes the build buffers, and sets bufferingMode to
623 : : * GIST_BUFFERING_ACTIVE.
624 : : */
625 : : static void
5112 626 : 3 : gistInitBuffering(GISTBuildState *buildstate)
627 : : {
628 : 3 : Relation index = buildstate->indexrel;
629 : : int pagesPerBuffer;
630 : : Size pageFreeSpace;
631 : : Size itupAvgSize,
632 : : itupMinSize;
633 : : double avgIndexTuplesPerPage,
634 : : maxIndexTuplesPerPage;
635 : : int i;
636 : : int levelStep;
637 : :
638 : : /* Calc space of index page which is available for index tuples */
639 : 3 : pageFreeSpace = BLCKSZ - SizeOfPageHeaderData - sizeof(GISTPageOpaqueData)
640 : : - sizeof(ItemIdData)
641 : 3 : - buildstate->freespace;
642 : :
643 : : /*
644 : : * Calculate average size of already inserted index tuples using gathered
645 : : * statistics.
646 : : */
647 : 3 : itupAvgSize = (double) buildstate->indtuplesSize /
648 : 3 : (double) buildstate->indtuples;
649 : :
650 : : /*
651 : : * Calculate minimal possible size of index tuple by index metadata.
652 : : * Minimal possible size of varlena is VARHDRSZ.
653 : : *
654 : : * XXX: that's not actually true, as a short varlen can be just 2 bytes.
655 : : * And we should take padding into account here.
656 : : */
657 : 3 : itupMinSize = (Size) MAXALIGN(sizeof(IndexTupleData));
658 [ + + ]: 6 : for (i = 0; i < index->rd_att->natts; i++)
659 : : {
260 drowley@postgresql.o 660 : 3 : CompactAttribute *attr = TupleDescCompactAttr(index->rd_att, i);
661 : :
662 [ - + ]: 3 : if (attr->attlen < 0)
5112 heikki.linnakangas@i 663 :UBC 0 : itupMinSize += VARHDRSZ;
664 : : else
260 drowley@postgresql.o 665 :CBC 3 : itupMinSize += attr->attlen;
666 : : }
667 : :
668 : : /* Calculate average and maximal number of index tuples which fit to page */
5112 heikki.linnakangas@i 669 : 3 : avgIndexTuplesPerPage = pageFreeSpace / itupAvgSize;
670 : 3 : maxIndexTuplesPerPage = pageFreeSpace / itupMinSize;
671 : :
672 : : /*
673 : : * We need to calculate two parameters for the buffering algorithm:
674 : : * levelStep and pagesPerBuffer.
675 : : *
676 : : * levelStep determines the size of subtree that we operate on, while
677 : : * emptying a buffer. A higher value is better, as you need fewer buffer
678 : : * emptying steps to build the index. However, if you set it too high, the
679 : : * subtree doesn't fit in cache anymore, and you quickly lose the benefit
680 : : * of the buffers.
681 : : *
682 : : * In Arge et al's paper, levelStep is chosen as logB(M/4B), where B is
683 : : * the number of tuples on page (ie. fanout), and M is the amount of
684 : : * internal memory available. Curiously, they doesn't explain *why* that
685 : : * setting is optimal. We calculate it by taking the highest levelStep so
686 : : * that a subtree still fits in cache. For a small B, our way of
687 : : * calculating levelStep is very close to Arge et al's formula. For a
688 : : * large B, our formula gives a value that is 2x higher.
689 : : *
690 : : * The average size (in pages) of a subtree of depth n can be calculated
691 : : * as a geometric series:
692 : : *
693 : : * B^0 + B^1 + B^2 + ... + B^n = (1 - B^(n + 1)) / (1 - B)
694 : : *
695 : : * where B is the average number of index tuples on page. The subtree is
696 : : * cached in the shared buffer cache and the OS cache, so we choose
697 : : * levelStep so that the subtree size is comfortably smaller than
698 : : * effective_cache_size, with a safety factor of 4.
699 : : *
700 : : * The estimate on the average number of index tuples on page is based on
701 : : * average tuple sizes observed before switching to buffered build, so the
702 : : * real subtree size can be somewhat larger. Also, it would selfish to
703 : : * gobble the whole cache for our index build. The safety factor of 4
704 : : * should account for those effects.
705 : : *
706 : : * The other limiting factor for setting levelStep is that while
707 : : * processing a subtree, we need to hold one page for each buffer at the
708 : : * next lower buffered level. The max. number of buffers needed for that
709 : : * is maxIndexTuplesPerPage^levelStep. This is very conservative, but
710 : : * hopefully maintenance_work_mem is set high enough that you're
711 : : * constrained by effective_cache_size rather than maintenance_work_mem.
712 : : *
713 : : * XXX: the buffer hash table consumes a fair amount of memory too per
714 : : * buffer, but that is not currently taken into account. That scales on
715 : : * the total number of buffers used, ie. the index size and on levelStep.
716 : : * Note that a higher levelStep *reduces* the amount of memory needed for
717 : : * the hash table.
718 : : */
719 : 3 : levelStep = 1;
720 : : for (;;)
721 : 3 : {
722 : : double subtreesize;
723 : : double maxlowestlevelpages;
724 : :
725 : : /* size of an average subtree at this levelStep (in pages). */
4848 726 : 6 : subtreesize =
727 : 6 : (1 - pow(avgIndexTuplesPerPage, (double) (levelStep + 1))) /
728 : 6 : (1 - avgIndexTuplesPerPage);
729 : :
730 : : /* max number of pages at the lowest level of a subtree */
731 : 6 : maxlowestlevelpages = pow(maxIndexTuplesPerPage, (double) levelStep);
732 : :
733 : : /* subtree must fit in cache (with safety factor of 4) */
734 [ - + ]: 6 : if (subtreesize > effective_cache_size / 4)
4848 heikki.linnakangas@i 735 :UBC 0 : break;
736 : :
737 : : /* each node in the lowest level of a subtree has one page in memory */
4848 heikki.linnakangas@i 738 [ + + ]:CBC 6 : if (maxlowestlevelpages > ((double) maintenance_work_mem * 1024) / BLCKSZ)
739 : 3 : break;
740 : :
741 : : /* Good, we can handle this levelStep. See if we can go one higher. */
5112 742 : 3 : levelStep++;
743 : : }
744 : :
745 : : /*
746 : : * We just reached an unacceptable value of levelStep in previous loop.
747 : : * So, decrease levelStep to get last acceptable value.
748 : : */
749 : 3 : levelStep--;
750 : :
751 : : /*
752 : : * If there's not enough cache or maintenance_work_mem, fall back to plain
753 : : * inserts.
754 : : */
755 [ - + ]: 3 : if (levelStep <= 0)
756 : : {
5112 heikki.linnakangas@i 757 [ # # ]:UBC 0 : elog(DEBUG1, "failed to switch to buffered GiST build");
1815 758 : 0 : buildstate->buildMode = GIST_BUFFERING_DISABLED;
5112 759 : 0 : return;
760 : : }
761 : :
762 : : /*
763 : : * The second parameter to set is pagesPerBuffer, which determines the
764 : : * size of each buffer. We adjust pagesPerBuffer also during the build,
765 : : * which is why this calculation is in a separate function.
766 : : */
5112 heikki.linnakangas@i 767 :CBC 3 : pagesPerBuffer = calculatePagesPerBuffer(buildstate, levelStep);
768 : :
769 : : /* Initialize GISTBuildBuffers with these parameters */
770 : 3 : buildstate->gfbb = gistInitBuildBuffers(pagesPerBuffer, levelStep,
771 : : gistGetMaxLevel(index));
772 : :
4847 773 : 3 : gistInitParentMap(buildstate);
774 : :
1815 775 : 3 : buildstate->buildMode = GIST_BUFFERING_ACTIVE;
776 : :
5112 777 [ - + ]: 3 : elog(DEBUG1, "switched to buffered GiST build; level step = %d, pagesPerBuffer = %d",
778 : : levelStep, pagesPerBuffer);
779 : : }
780 : :
781 : : /*
782 : : * Calculate pagesPerBuffer parameter for the buffering algorithm.
783 : : *
784 : : * Buffer size is chosen so that assuming that tuples are distributed
785 : : * randomly, emptying half a buffer fills on average one page in every buffer
786 : : * at the next lower level.
787 : : */
788 : : static int
789 : 6 : calculatePagesPerBuffer(GISTBuildState *buildstate, int levelStep)
790 : : {
791 : : double pagesPerBuffer;
792 : : double avgIndexTuplesPerPage;
793 : : double itupAvgSize;
794 : : Size pageFreeSpace;
795 : :
796 : : /* Calc space of index page which is available for index tuples */
797 : 6 : pageFreeSpace = BLCKSZ - SizeOfPageHeaderData - sizeof(GISTPageOpaqueData)
798 : : - sizeof(ItemIdData)
799 : 6 : - buildstate->freespace;
800 : :
801 : : /*
802 : : * Calculate average size of already inserted index tuples using gathered
803 : : * statistics.
804 : : */
805 : 6 : itupAvgSize = (double) buildstate->indtuplesSize /
806 : 6 : (double) buildstate->indtuples;
807 : :
808 : 6 : avgIndexTuplesPerPage = pageFreeSpace / itupAvgSize;
809 : :
810 : : /*
811 : : * Recalculate required size of buffers.
812 : : */
813 : 6 : pagesPerBuffer = 2 * pow(avgIndexTuplesPerPage, levelStep);
814 : :
tgl@sss.pgh.pa.us 815 : 6 : return (int) rint(pagesPerBuffer);
816 : : }
817 : :
818 : : /*
819 : : * Per-tuple callback for table_index_build_scan.
820 : : */
821 : : static void
heikki.linnakangas@i 822 : 292874 : gistBuildCallback(Relation index,
823 : : ItemPointer tid,
824 : : Datum *values,
825 : : bool *isnull,
826 : : bool tupleIsAlive,
827 : : void *state)
828 : : {
829 : 292874 : GISTBuildState *buildstate = (GISTBuildState *) state;
830 : : IndexTuple itup;
831 : : MemoryContext oldCtx;
832 : :
5090 tgl@sss.pgh.pa.us 833 : 292874 : oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);
834 : :
835 : : /* form an index tuple and point it at the heap tuple */
1815 heikki.linnakangas@i 836 : 292874 : itup = gistFormTuple(buildstate->giststate, index,
837 : : values, isnull,
838 : : true);
2129 andres@anarazel.de 839 : 292874 : itup->t_tid = *tid;
840 : :
841 : : /* Update tuple count and total size. */
892 tgl@sss.pgh.pa.us 842 : 292874 : buildstate->indtuples += 1;
843 : 292874 : buildstate->indtuplesSize += IndexTupleSize(itup);
844 : :
845 : : /*
846 : : * XXX In buffering builds, the tempCxt is also reset down inside
847 : : * gistProcessEmptyingQueue(). This is not great because it risks
848 : : * confusion and possible use of dangling pointers (for example, itup
849 : : * might be already freed when control returns here). It's generally
850 : : * better that a memory context be "owned" by only one function. However,
851 : : * currently this isn't causing issues so it doesn't seem worth the amount
852 : : * of refactoring that would be needed to avoid it.
853 : : */
1815 heikki.linnakangas@i 854 [ + + ]: 292874 : if (buildstate->buildMode == GIST_BUFFERING_ACTIVE)
855 : : {
856 : : /* We have buffers, so use them. */
5112 857 : 17715 : gistBufferingBuildInsert(buildstate, itup);
858 : : }
859 : : else
860 : : {
861 : : /*
862 : : * There's no buffers (yet). Since we already have the index relation
863 : : * locked, we call gistdoinsert directly.
864 : : */
865 : 275159 : gistdoinsert(index, itup, buildstate->freespace,
866 : : buildstate->giststate, buildstate->heaprel, true);
867 : : }
868 : :
869 : 292874 : MemoryContextSwitchTo(oldCtx);
5090 tgl@sss.pgh.pa.us 870 : 292874 : MemoryContextReset(buildstate->giststate->tempCxt);
871 : :
1815 heikki.linnakangas@i 872 [ + + ]: 292874 : if (buildstate->buildMode == GIST_BUFFERING_ACTIVE &&
5112 873 [ + + ]: 17715 : buildstate->indtuples % BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET == 0)
874 : : {
875 : : /* Adjust the target buffer size now */
876 : 3 : buildstate->gfbb->pagesPerBuffer =
877 : 3 : calculatePagesPerBuffer(buildstate, buildstate->gfbb->levelStep);
878 : : }
879 : :
880 : : /*
881 : : * In 'auto' mode, check if the index has grown too large to fit in cache,
882 : : * and switch to buffering mode if it has.
883 : : *
884 : : * To avoid excessive calls to smgrnblocks(), only check this every
885 : : * BUFFERING_MODE_SWITCH_CHECK_STEP index tuples.
886 : : *
887 : : * In 'stats' state, switch as soon as we have seen enough tuples to have
888 : : * some idea of the average tuple size.
889 : : */
1815 890 [ + + ]: 292874 : if ((buildstate->buildMode == GIST_BUFFERING_AUTO &&
5112 891 [ + + ]: 262871 : buildstate->indtuples % BUFFERING_MODE_SWITCH_CHECK_STEP == 0 &&
1517 tgl@sss.pgh.pa.us 892 [ + - ]: 997 : effective_cache_size < smgrnblocks(RelationGetSmgr(index),
893 : 292874 : MAIN_FORKNUM)) ||
1815 heikki.linnakangas@i 894 [ + + ]: 292874 : (buildstate->buildMode == GIST_BUFFERING_STATS &&
5112 895 [ + + ]: 12288 : buildstate->indtuples >= BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET))
896 : : {
897 : : /*
898 : : * Index doesn't fit in effective cache anymore. Try to switch to
899 : : * buffering build mode.
900 : : */
901 : 3 : gistInitBuffering(buildstate);
902 : : }
903 : 292874 : }
904 : :
905 : : /*
906 : : * Insert function for buffering index build.
907 : : */
908 : : static void
909 : 17715 : gistBufferingBuildInsert(GISTBuildState *buildstate, IndexTuple itup)
910 : : {
911 : : /* Insert the tuple to buffers. */
4847 912 : 17715 : gistProcessItup(buildstate, itup, 0, buildstate->gfbb->rootlevel);
913 : :
914 : : /* If we filled up (half of a) buffer, process buffer emptying. */
5112 915 : 17715 : gistProcessEmptyingQueue(buildstate);
916 : 17715 : }
917 : :
918 : : /*
919 : : * Process an index tuple. Runs the tuple down the tree until we reach a leaf
920 : : * page or node buffer, and inserts the tuple there. Returns true if we have
921 : : * to stop buffer emptying process (because one of child buffers can't take
922 : : * index tuples anymore).
923 : : */
924 : : static bool
925 : 34881 : gistProcessItup(GISTBuildState *buildstate, IndexTuple itup,
926 : : BlockNumber startblkno, int startlevel)
927 : : {
5090 tgl@sss.pgh.pa.us 928 : 34881 : GISTSTATE *giststate = buildstate->giststate;
5112 heikki.linnakangas@i 929 : 34881 : GISTBuildBuffers *gfbb = buildstate->gfbb;
930 : 34881 : Relation indexrel = buildstate->indexrel;
931 : : BlockNumber childblkno;
932 : : Buffer buffer;
933 : 34881 : bool result = false;
934 : : BlockNumber blkno;
935 : : int level;
4847 936 : 34881 : OffsetNumber downlinkoffnum = InvalidOffsetNumber;
937 : 34881 : BlockNumber parentblkno = InvalidBlockNumber;
938 : :
939 [ - + ]: 34881 : CHECK_FOR_INTERRUPTS();
940 : :
941 : : /*
942 : : * Loop until we reach a leaf page (level == 0) or a level with buffers
943 : : * (not including the level we start at, because we would otherwise make
944 : : * no progress).
945 : : */
946 : 34881 : blkno = startblkno;
947 : 34881 : level = startlevel;
948 : : for (;;)
5112 949 : 34881 : {
950 : : ItemId iid;
951 : : IndexTuple idxtuple,
952 : : newtup;
953 : : Page page;
954 : : OffsetNumber childoffnum;
955 : :
956 : : /* Have we reached a level with buffers? */
4847 957 [ + + + - : 69762 : if (LEVEL_HAS_BUFFERS(level, gfbb) && level != startlevel)
+ + + + ]
5112 958 : 17166 : break;
959 : :
960 : : /* Have we reached a leaf page? */
4847 961 [ + + ]: 52596 : if (level == 0)
5112 962 : 17715 : break;
963 : :
964 : : /*
965 : : * Nope. Descend down to the next level then. Choose a child to
966 : : * descend down to.
967 : : */
968 : :
4847 969 : 34881 : buffer = ReadBuffer(indexrel, blkno);
5112 970 : 34881 : LockBuffer(buffer, GIST_EXCLUSIVE);
971 : :
8 peter@eisentraut.org 972 :GNC 34881 : page = BufferGetPage(buffer);
5112 heikki.linnakangas@i 973 :CBC 34881 : childoffnum = gistchoose(indexrel, page, itup, giststate);
974 : 34881 : iid = PageGetItemId(page, childoffnum);
975 : 34881 : idxtuple = (IndexTuple) PageGetItem(page, iid);
976 : 34881 : childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
977 : :
4847 978 [ + + ]: 34881 : if (level > 1)
979 : 17166 : gistMemorizeParent(buildstate, childblkno, blkno);
980 : :
981 : : /*
982 : : * Check that the key representing the target child node is consistent
983 : : * with the key we're inserting. Update it if it's not.
984 : : */
5112 985 : 34881 : newtup = gistgetadjusted(indexrel, idxtuple, itup, giststate);
986 [ + + ]: 34881 : if (newtup)
987 : : {
4480 sfrost@snowman.net 988 : 34473 : blkno = gistbufferinginserttuples(buildstate,
989 : : buffer,
990 : : level,
991 : : &newtup,
992 : : 1,
993 : : childoffnum,
994 : : InvalidBlockNumber,
995 : : InvalidOffsetNumber);
996 : : /* gistbufferinginserttuples() released the buffer */
997 : : }
998 : : else
4847 heikki.linnakangas@i 999 : 408 : UnlockReleaseBuffer(buffer);
1000 : :
1001 : : /* Descend to the child */
1002 : 34881 : parentblkno = blkno;
1003 : 34881 : blkno = childblkno;
1004 : 34881 : downlinkoffnum = childoffnum;
1005 [ - + ]: 34881 : Assert(level > 0);
1006 : 34881 : level--;
1007 : : }
1008 : :
1009 [ + + + - : 34881 : if (LEVEL_HAS_BUFFERS(level, gfbb))
+ - ]
5112 1010 : 17166 : {
1011 : : /*
1012 : : * We've reached level with buffers. Place the index tuple to the
1013 : : * buffer, and add the buffer to the emptying queue if it overflows.
1014 : : */
1015 : : GISTNodeBuffer *childNodeBuffer;
1016 : :
1017 : : /* Find the buffer or create a new one */
4847 1018 : 17166 : childNodeBuffer = gistGetNodeBuffer(gfbb, giststate, blkno, level);
1019 : :
1020 : : /* Add index tuple to it */
5112 1021 : 17166 : gistPushItupToNodeBuffer(gfbb, childNodeBuffer, itup);
1022 : :
1023 [ - + ]: 17166 : if (BUFFER_OVERFLOWED(childNodeBuffer, gfbb))
5112 heikki.linnakangas@i 1024 :UBC 0 : result = true;
1025 : : }
1026 : : else
1027 : : {
1028 : : /*
1029 : : * We've reached a leaf page. Place the tuple here.
1030 : : */
4847 heikki.linnakangas@i 1031 [ - + ]:CBC 17715 : Assert(level == 0);
1032 : 17715 : buffer = ReadBuffer(indexrel, blkno);
5112 1033 : 17715 : LockBuffer(buffer, GIST_EXCLUSIVE);
4847 1034 : 17715 : gistbufferinginserttuples(buildstate, buffer, level,
1035 : : &itup, 1, InvalidOffsetNumber,
1036 : : parentblkno, downlinkoffnum);
1037 : : /* gistbufferinginserttuples() released the buffer */
1038 : : }
1039 : :
5112 1040 : 34881 : return result;
1041 : : }
1042 : :
1043 : : /*
1044 : : * Insert tuples to a given page.
1045 : : *
1046 : : * This is analogous with gistinserttuples() in the regular insertion code.
1047 : : *
1048 : : * Returns the block number of the page where the (first) new or updated tuple
1049 : : * was inserted. Usually that's the original page, but might be a sibling page
1050 : : * if the original page was split.
1051 : : *
1052 : : * Caller should hold a lock on 'buffer' on entry. This function will unlock
1053 : : * and unpin it.
1054 : : */
1055 : : static BlockNumber
4847 1056 : 52572 : gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level,
1057 : : IndexTuple *itup, int ntup, OffsetNumber oldoffnum,
1058 : : BlockNumber parentblk, OffsetNumber downlinkoffnum)
1059 : : {
5112 1060 : 52572 : GISTBuildBuffers *gfbb = buildstate->gfbb;
1061 : : List *splitinfo;
1062 : : bool is_split;
4483 bruce@momjian.us 1063 : 52572 : BlockNumber placed_to_blk = InvalidBlockNumber;
1064 : :
5112 heikki.linnakangas@i 1065 : 52572 : is_split = gistplacetopage(buildstate->indexrel,
1066 : : buildstate->freespace,
1067 : : buildstate->giststate,
1068 : : buffer,
1069 : : itup, ntup, oldoffnum, &placed_to_blk,
1070 : : InvalidBuffer,
1071 : : &splitinfo,
1072 : : false,
1073 : : buildstate->heaprel, true);
1074 : :
1075 : : /*
1076 : : * If this is a root split, update the root path item kept in memory. This
1077 : : * ensures that all path stacks are always complete, including all parent
1078 : : * nodes up to the root. That simplifies the algorithm to re-find correct
1079 : : * parent.
1080 : : */
1081 [ + + + + ]: 52572 : if (is_split && BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO)
1082 : : {
3426 kgrittn@postgresql.o 1083 : 3 : Page page = BufferGetPage(buffer);
1084 : : OffsetNumber off;
1085 : : OffsetNumber maxoff;
1086 : :
4847 heikki.linnakangas@i 1087 [ - + ]: 3 : Assert(level == gfbb->rootlevel);
1088 : 3 : gfbb->rootlevel++;
1089 : :
1090 [ - + ]: 3 : elog(DEBUG2, "splitting GiST root page, now %d levels deep", gfbb->rootlevel);
1091 : :
1092 : : /*
1093 : : * All the downlinks on the old root page are now on one of the child
1094 : : * pages. Visit all the new child pages to memorize the parents of the
1095 : : * grandchildren.
1096 : : */
1097 [ + - ]: 3 : if (gfbb->rootlevel > 1)
1098 : : {
1099 : 3 : maxoff = PageGetMaxOffsetNumber(page);
1100 [ + + ]: 9 : for (off = FirstOffsetNumber; off <= maxoff; off++)
1101 : : {
4836 bruce@momjian.us 1102 : 6 : ItemId iid = PageGetItemId(page, off);
1103 : 6 : IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
4847 heikki.linnakangas@i 1104 : 6 : BlockNumber childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
4836 bruce@momjian.us 1105 : 6 : Buffer childbuf = ReadBuffer(buildstate->indexrel, childblkno);
1106 : :
4847 heikki.linnakangas@i 1107 : 6 : LockBuffer(childbuf, GIST_SHARE);
1108 : 6 : gistMemorizeAllDownlinks(buildstate, childbuf);
1109 : 6 : UnlockReleaseBuffer(childbuf);
1110 : :
1111 : : /*
1112 : : * Also remember that the parent of the new child page is the
1113 : : * root block.
1114 : : */
1115 : 6 : gistMemorizeParent(buildstate, childblkno, GIST_ROOT_BLKNO);
1116 : : }
1117 : : }
1118 : : }
1119 : :
5112 1120 [ + + ]: 52572 : if (splitinfo)
1121 : : {
1122 : : /*
1123 : : * Insert the downlinks to the parent. This is analogous with
1124 : : * gistfinishsplit() in the regular insertion code, but the locking is
1125 : : * simpler, and we have to maintain the buffers on internal nodes and
1126 : : * the parent map.
1127 : : */
1128 : : IndexTuple *downlinks;
1129 : : int ndownlinks,
1130 : : i;
1131 : : Buffer parentBuffer;
1132 : : ListCell *lc;
1133 : :
1134 : : /* Parent may have changed since we memorized this path. */
1135 : : parentBuffer =
4847 1136 : 384 : gistBufferingFindCorrectParent(buildstate,
1137 : : BufferGetBlockNumber(buffer),
1138 : : level,
1139 : : &parentblk,
1140 : : &downlinkoffnum);
1141 : :
1142 : : /*
1143 : : * If there's a buffer associated with this page, that needs to be
1144 : : * split too. gistRelocateBuildBuffersOnSplit() will also adjust the
1145 : : * downlinks in 'splitinfo', to make sure they're consistent not only
1146 : : * with the tuples already on the pages, but also the tuples in the
1147 : : * buffers that will eventually be inserted to them.
1148 : : */
5112 1149 : 384 : gistRelocateBuildBuffersOnSplit(gfbb,
1150 : : buildstate->giststate,
1151 : : buildstate->indexrel,
1152 : : level,
1153 : : buffer, splitinfo);
1154 : :
1155 : : /* Create an array of all the downlink tuples */
1156 : 384 : ndownlinks = list_length(splitinfo);
1157 : 384 : downlinks = (IndexTuple *) palloc(sizeof(IndexTuple) * ndownlinks);
1158 : 384 : i = 0;
1159 [ + - + + : 1152 : foreach(lc, splitinfo)
+ + ]
1160 : : {
1161 : 768 : GISTPageSplitInfo *splitinfo = lfirst(lc);
1162 : :
1163 : : /*
1164 : : * Remember the parent of each new child page in our parent map.
1165 : : * This assumes that the downlinks fit on the parent page. If the
1166 : : * parent page is split, too, when we recurse up to insert the
1167 : : * downlinks, the recursive gistbufferinginserttuples() call will
1168 : : * update the map again.
1169 : : */
4847 1170 [ + + ]: 768 : if (level > 0)
1171 : 12 : gistMemorizeParent(buildstate,
1172 : : BufferGetBlockNumber(splitinfo->buf),
1173 : : BufferGetBlockNumber(parentBuffer));
1174 : :
1175 : : /*
1176 : : * Also update the parent map for all the downlinks that got moved
1177 : : * to a different page. (actually this also loops through the
1178 : : * downlinks that stayed on the original page, but it does no
1179 : : * harm).
1180 : : */
1181 [ - + ]: 768 : if (level > 1)
4847 heikki.linnakangas@i 1182 :UBC 0 : gistMemorizeAllDownlinks(buildstate, splitinfo->buf);
1183 : :
1184 : : /*
1185 : : * Since there's no concurrent access, we can release the lower
1186 : : * level buffers immediately. This includes the original page.
1187 : : */
4847 heikki.linnakangas@i 1188 :CBC 768 : UnlockReleaseBuffer(splitinfo->buf);
5112 1189 : 768 : downlinks[i++] = splitinfo->downlink;
1190 : : }
1191 : :
1192 : : /* Insert them into parent. */
4847 1193 : 384 : gistbufferinginserttuples(buildstate, parentBuffer, level + 1,
1194 : : downlinks, ndownlinks, downlinkoffnum,
1195 : : InvalidBlockNumber, InvalidOffsetNumber);
1196 : :
2999 tgl@sss.pgh.pa.us 1197 : 384 : list_free_deep(splitinfo); /* we don't need this anymore */
1198 : : }
1199 : : else
4847 heikki.linnakangas@i 1200 : 52188 : UnlockReleaseBuffer(buffer);
1201 : :
4769 1202 : 52572 : return placed_to_blk;
1203 : : }
1204 : :
1205 : : /*
1206 : : * Find the downlink pointing to a child page.
1207 : : *
1208 : : * 'childblkno' indicates the child page to find the parent for. 'level' is
1209 : : * the level of the child. On entry, *parentblkno and *downlinkoffnum can
1210 : : * point to a location where the downlink used to be - we will check that
1211 : : * location first, and save some cycles if it hasn't moved. The function
1212 : : * returns a buffer containing the downlink, exclusively-locked, and
1213 : : * *parentblkno and *downlinkoffnum are set to the real location of the
1214 : : * downlink.
1215 : : *
1216 : : * If the child page is a leaf (level == 0), the caller must supply a correct
1217 : : * parentblkno. Otherwise we use the parent map hash table to find the parent
1218 : : * block.
1219 : : *
1220 : : * This function serves the same purpose as gistFindCorrectParent() during
1221 : : * normal index inserts, but this is simpler because we don't need to deal
1222 : : * with concurrent inserts.
1223 : : */
1224 : : static Buffer
5112 1225 : 384 : gistBufferingFindCorrectParent(GISTBuildState *buildstate,
1226 : : BlockNumber childblkno, int level,
1227 : : BlockNumber *parentblkno,
1228 : : OffsetNumber *downlinkoffnum)
1229 : : {
1230 : : BlockNumber parent;
1231 : : Buffer buffer;
1232 : : Page page;
1233 : : OffsetNumber maxoff;
1234 : : OffsetNumber off;
1235 : :
4847 1236 [ + + ]: 384 : if (level > 0)
1237 : 6 : parent = gistGetParent(buildstate, childblkno);
1238 : : else
1239 : : {
1240 : : /*
1241 : : * For a leaf page, the caller must supply a correct parent block
1242 : : * number.
1243 : : */
1244 [ - + ]: 378 : if (*parentblkno == InvalidBlockNumber)
1603 peter@eisentraut.org 1245 [ # # ]:UBC 0 : elog(ERROR, "no parent buffer provided of child %u", childblkno);
4847 heikki.linnakangas@i 1246 :CBC 378 : parent = *parentblkno;
1247 : : }
1248 : :
1249 : 384 : buffer = ReadBuffer(buildstate->indexrel, parent);
3426 kgrittn@postgresql.o 1250 : 384 : page = BufferGetPage(buffer);
5112 heikki.linnakangas@i 1251 : 384 : LockBuffer(buffer, GIST_EXCLUSIVE);
4847 1252 : 384 : gistcheckpage(buildstate->indexrel, buffer);
1253 : 384 : maxoff = PageGetMaxOffsetNumber(page);
1254 : :
1255 : : /* Check if it was not moved */
1256 [ + + + - ]: 384 : if (parent == *parentblkno && *parentblkno != InvalidBlockNumber &&
1257 [ + - + - ]: 378 : *downlinkoffnum != InvalidOffsetNumber && *downlinkoffnum <= maxoff)
1258 : : {
4836 bruce@momjian.us 1259 : 378 : ItemId iid = PageGetItemId(page, *downlinkoffnum);
1260 : 378 : IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
1261 : :
4847 heikki.linnakangas@i 1262 [ + - ]: 378 : if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == childblkno)
1263 : : {
1264 : : /* Still there */
1265 : 378 : return buffer;
1266 : : }
1267 : : }
1268 : :
1269 : : /*
1270 : : * Downlink was not at the offset where it used to be. Scan the page to
1271 : : * find it. During normal gist insertions, it might've moved to another
1272 : : * page, to the right, but during a buffering build, we keep track of the
1273 : : * parent of each page in the lookup table so we should always know what
1274 : : * page it's on.
1275 : : */
1276 [ + - ]: 15 : for (off = FirstOffsetNumber; off <= maxoff; off = OffsetNumberNext(off))
1277 : : {
4836 bruce@momjian.us 1278 : 15 : ItemId iid = PageGetItemId(page, off);
1279 : 15 : IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
1280 : :
4847 heikki.linnakangas@i 1281 [ + + ]: 15 : if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == childblkno)
1282 : : {
1283 : : /* yes!!, found it */
1284 : 6 : *downlinkoffnum = off;
1285 : 6 : return buffer;
1286 : : }
1287 : : }
1288 : :
4847 heikki.linnakangas@i 1289 [ # # ]:UBC 0 : elog(ERROR, "failed to re-find parent for block %u", childblkno);
1290 : : return InvalidBuffer; /* keep compiler quiet */
1291 : : }
1292 : :
1293 : : /*
1294 : : * Process buffers emptying stack. Emptying of one buffer can cause emptying
1295 : : * of other buffers. This function iterates until this cascading emptying
1296 : : * process finished, e.g. until buffers emptying stack is empty.
1297 : : */
1298 : : static void
5112 heikki.linnakangas@i 1299 :CBC 17724 : gistProcessEmptyingQueue(GISTBuildState *buildstate)
1300 : : {
1301 : 17724 : GISTBuildBuffers *gfbb = buildstate->gfbb;
1302 : :
1303 : : /* Iterate while we have elements in buffers emptying stack. */
1304 [ + + ]: 17733 : while (gfbb->bufferEmptyingQueue != NIL)
1305 : : {
1306 : : GISTNodeBuffer *emptyingNodeBuffer;
1307 : :
1308 : : /* Get node buffer from emptying stack. */
1309 : 9 : emptyingNodeBuffer = (GISTNodeBuffer *) linitial(gfbb->bufferEmptyingQueue);
1310 : 9 : gfbb->bufferEmptyingQueue = list_delete_first(gfbb->bufferEmptyingQueue);
1311 : 9 : emptyingNodeBuffer->queuedForEmptying = false;
1312 : :
1313 : : /*
1314 : : * We are going to load last pages of buffers where emptying will be
1315 : : * to. So let's unload any previously loaded buffers.
1316 : : */
1317 : 9 : gistUnloadNodeBuffers(gfbb);
1318 : :
1319 : : /*
1320 : : * Pop tuples from the buffer and run them down to the buffers at
1321 : : * lower level, or leaf pages. We continue until one of the lower
1322 : : * level buffers fills up, or this buffer runs empty.
1323 : : *
1324 : : * In Arge et al's paper, the buffer emptying is stopped after
1325 : : * processing 1/2 node buffer worth of tuples, to avoid overfilling
1326 : : * any of the lower level buffers. However, it's more efficient to
1327 : : * keep going until one of the lower level buffers actually fills up,
1328 : : * so that's what we do. This doesn't need to be exact, if a buffer
1329 : : * overfills by a few tuples, there's no harm done.
1330 : : */
1331 : : while (true)
1332 : 17166 : {
1333 : : IndexTuple itup;
1334 : :
1335 : : /* Get next index tuple from the buffer */
1336 [ + + ]: 17175 : if (!gistPopItupFromNodeBuffer(gfbb, emptyingNodeBuffer, &itup))
1337 : 9 : break;
1338 : :
1339 : : /*
1340 : : * Run it down to the underlying node buffer or leaf page.
1341 : : *
1342 : : * Note: it's possible that the buffer we're emptying splits as a
1343 : : * result of this call. If that happens, our emptyingNodeBuffer
1344 : : * points to the left half of the split. After split, it's very
1345 : : * likely that the new left buffer is no longer over the half-full
1346 : : * threshold, but we might as well keep flushing tuples from it
1347 : : * until we fill a lower-level buffer.
1348 : : */
4847 1349 [ - + ]: 17166 : if (gistProcessItup(buildstate, itup, emptyingNodeBuffer->nodeBlocknum, emptyingNodeBuffer->level))
1350 : : {
1351 : : /*
1352 : : * A lower level buffer filled up. Stop emptying this buffer,
1353 : : * to avoid overflowing the lower level buffer.
1354 : : */
5112 heikki.linnakangas@i 1355 :UBC 0 : break;
1356 : : }
1357 : :
1358 : : /* Free all the memory allocated during index tuple processing */
5090 tgl@sss.pgh.pa.us 1359 :CBC 17166 : MemoryContextReset(buildstate->giststate->tempCxt);
1360 : : }
1361 : : }
5112 heikki.linnakangas@i 1362 : 17724 : }
1363 : :
1364 : : /*
1365 : : * Empty all node buffers, from top to bottom. This is done at the end of
1366 : : * index build to flush all remaining tuples to the index.
1367 : : *
1368 : : * Note: This destroys the buffersOnLevels lists, so the buffers should not
1369 : : * be inserted to after this call.
1370 : : */
1371 : : static void
1372 : 3 : gistEmptyAllBuffers(GISTBuildState *buildstate)
1373 : : {
1374 : 3 : GISTBuildBuffers *gfbb = buildstate->gfbb;
1375 : : MemoryContext oldCtx;
1376 : : int i;
1377 : :
5090 tgl@sss.pgh.pa.us 1378 : 3 : oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);
1379 : :
1380 : : /*
1381 : : * Iterate through the levels from top to bottom.
1382 : : */
5112 heikki.linnakangas@i 1383 [ + + ]: 9 : for (i = gfbb->buffersOnLevelsLen - 1; i >= 0; i--)
1384 : : {
1385 : : /*
1386 : : * Empty all buffers on this level. Note that new buffers can pop up
1387 : : * in the list during the processing, as a result of page splits, so a
1388 : : * simple walk through the list won't work. We remove buffers from the
1389 : : * list when we see them empty; a buffer can't become non-empty once
1390 : : * it's been fully emptied.
1391 : : */
1392 [ + + ]: 24 : while (gfbb->buffersOnLevels[i] != NIL)
1393 : : {
1394 : : GISTNodeBuffer *nodeBuffer;
1395 : :
1396 : 18 : nodeBuffer = (GISTNodeBuffer *) linitial(gfbb->buffersOnLevels[i]);
1397 : :
1398 [ + + ]: 18 : if (nodeBuffer->blocksCount != 0)
1399 : : {
1400 : : /*
1401 : : * Add this buffer to the emptying queue, and proceed to empty
1402 : : * the queue.
1403 : : */
5108 1404 [ + - ]: 9 : if (!nodeBuffer->queuedForEmptying)
1405 : : {
1406 : 9 : MemoryContextSwitchTo(gfbb->context);
1407 : 9 : nodeBuffer->queuedForEmptying = true;
1408 : 9 : gfbb->bufferEmptyingQueue =
1409 : 9 : lcons(nodeBuffer, gfbb->bufferEmptyingQueue);
5090 tgl@sss.pgh.pa.us 1410 : 9 : MemoryContextSwitchTo(buildstate->giststate->tempCxt);
1411 : : }
5112 heikki.linnakangas@i 1412 : 9 : gistProcessEmptyingQueue(buildstate);
1413 : : }
1414 : : else
1415 : 9 : gfbb->buffersOnLevels[i] =
1416 : 9 : list_delete_first(gfbb->buffersOnLevels[i]);
1417 : : }
4847 1418 [ - + ]: 6 : elog(DEBUG2, "emptied all buffers at level %d", i);
1419 : : }
5112 1420 : 3 : MemoryContextSwitchTo(oldCtx);
1421 : 3 : }
1422 : :
1423 : : /*
1424 : : * Get the depth of the GiST index.
1425 : : */
1426 : : static int
1427 : 3 : gistGetMaxLevel(Relation index)
1428 : : {
1429 : : int maxLevel;
1430 : : BlockNumber blkno;
1431 : :
1432 : : /*
1433 : : * Traverse down the tree, starting from the root, until we hit the leaf
1434 : : * level.
1435 : : */
1436 : 3 : maxLevel = 0;
1437 : 3 : blkno = GIST_ROOT_BLKNO;
1438 : : while (true)
1439 : 3 : {
1440 : : Buffer buffer;
1441 : : Page page;
1442 : : IndexTuple itup;
1443 : :
1444 : 6 : buffer = ReadBuffer(index, blkno);
1445 : :
1446 : : /*
1447 : : * There's no concurrent access during index build, so locking is just
1448 : : * pro forma.
1449 : : */
1450 : 6 : LockBuffer(buffer, GIST_SHARE);
8 peter@eisentraut.org 1451 :GNC 6 : page = BufferGetPage(buffer);
1452 : :
5112 heikki.linnakangas@i 1453 [ + + ]:CBC 6 : if (GistPageIsLeaf(page))
1454 : : {
1455 : : /* We hit the bottom, so we're done. */
1456 : 3 : UnlockReleaseBuffer(buffer);
1457 : 3 : break;
1458 : : }
1459 : :
1460 : : /*
1461 : : * Pick the first downlink on the page, and follow it. It doesn't
1462 : : * matter which downlink we choose, the tree has the same depth
1463 : : * everywhere, so we just pick the first one.
1464 : : */
1465 : 3 : itup = (IndexTuple) PageGetItem(page,
2999 tgl@sss.pgh.pa.us 1466 : 3 : PageGetItemId(page, FirstOffsetNumber));
5112 heikki.linnakangas@i 1467 : 3 : blkno = ItemPointerGetBlockNumber(&(itup->t_tid));
1468 : 3 : UnlockReleaseBuffer(buffer);
1469 : :
1470 : : /*
1471 : : * We're going down on the tree. It means that there is yet one more
1472 : : * level in the tree.
1473 : : */
1474 : 3 : maxLevel++;
1475 : : }
1476 : 3 : return maxLevel;
1477 : : }
1478 : :
1479 : :
1480 : : /*
1481 : : * Routines for managing the parent map.
1482 : : *
1483 : : * Whenever a page is split, we need to insert the downlinks into the parent.
1484 : : * We need to somehow find the parent page to do that. In normal insertions,
1485 : : * we keep a stack of nodes visited when we descend the tree. However, in
1486 : : * buffering build, we can start descending the tree from any internal node,
1487 : : * when we empty a buffer by cascading tuples to its children. So we don't
1488 : : * have a full stack up to the root available at that time.
1489 : : *
1490 : : * So instead, we maintain a hash table to track the parent of every internal
1491 : : * page. We don't need to track the parents of leaf nodes, however. Whenever
1492 : : * we insert to a leaf, we've just descended down from its parent, so we know
1493 : : * its immediate parent already. This helps a lot to limit the memory used
1494 : : * by this hash table.
1495 : : *
1496 : : * Whenever an internal node is split, the parent map needs to be updated.
1497 : : * the parent of the new child page needs to be recorded, and also the
1498 : : * entries for all page whose downlinks are moved to a new page at the split
1499 : : * needs to be updated.
1500 : : *
1501 : : * We also update the parent map whenever we descend the tree. That might seem
1502 : : * unnecessary, because we maintain the map whenever a downlink is moved or
1503 : : * created, but it is needed because we switch to buffering mode after
1504 : : * creating a tree with regular index inserts. Any pages created before
1505 : : * switching to buffering mode will not be present in the parent map initially,
1506 : : * but will be added there the first time we visit them.
1507 : : */
1508 : :
1509 : : typedef struct
1510 : : {
1511 : : BlockNumber childblkno; /* hash key */
1512 : : BlockNumber parentblkno;
1513 : : } ParentMapEntry;
1514 : :
1515 : : static void
4847 1516 : 3 : gistInitParentMap(GISTBuildState *buildstate)
1517 : : {
1518 : : HASHCTL hashCtl;
1519 : :
1520 : 3 : hashCtl.keysize = sizeof(BlockNumber);
1521 : 3 : hashCtl.entrysize = sizeof(ParentMapEntry);
1522 : 3 : hashCtl.hcxt = CurrentMemoryContext;
1523 : 3 : buildstate->parentMap = hash_create("gistbuild parent map",
1524 : : 1024,
1525 : : &hashCtl,
1526 : : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
1527 : 3 : }
1528 : :
1529 : : static void
1530 : 17463 : gistMemorizeParent(GISTBuildState *buildstate, BlockNumber child, BlockNumber parent)
1531 : : {
1532 : : ParentMapEntry *entry;
1533 : : bool found;
1534 : :
1535 : 17463 : entry = (ParentMapEntry *) hash_search(buildstate->parentMap,
1536 : : &child,
1537 : : HASH_ENTER,
1538 : : &found);
1539 : 17463 : entry->parentblkno = parent;
1540 : 17463 : }
1541 : :
1542 : : /*
1543 : : * Scan all downlinks on a page, and memorize their parent.
1544 : : */
1545 : : static void
1546 : 6 : gistMemorizeAllDownlinks(GISTBuildState *buildstate, Buffer parentbuf)
1547 : : {
1548 : : OffsetNumber maxoff;
1549 : : OffsetNumber off;
1550 : 6 : BlockNumber parentblkno = BufferGetBlockNumber(parentbuf);
3426 kgrittn@postgresql.o 1551 : 6 : Page page = BufferGetPage(parentbuf);
1552 : :
4847 heikki.linnakangas@i 1553 [ - + ]: 6 : Assert(!GistPageIsLeaf(page));
1554 : :
1555 : 6 : maxoff = PageGetMaxOffsetNumber(page);
1556 [ + + ]: 285 : for (off = FirstOffsetNumber; off <= maxoff; off++)
1557 : : {
4836 bruce@momjian.us 1558 : 279 : ItemId iid = PageGetItemId(page, off);
1559 : 279 : IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
4847 heikki.linnakangas@i 1560 : 279 : BlockNumber childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
1561 : :
1562 : 279 : gistMemorizeParent(buildstate, childblkno, parentblkno);
1563 : : }
1564 : 6 : }
1565 : :
1566 : : static BlockNumber
1567 : 6 : gistGetParent(GISTBuildState *buildstate, BlockNumber child)
1568 : : {
1569 : : ParentMapEntry *entry;
1570 : : bool found;
1571 : :
1572 : : /* Find node buffer in hash table */
1573 : 6 : entry = (ParentMapEntry *) hash_search(buildstate->parentMap,
1574 : : &child,
1575 : : HASH_FIND,
1576 : : &found);
1577 [ - + ]: 6 : if (!found)
1603 peter@eisentraut.org 1578 [ # # ]:UBC 0 : elog(ERROR, "could not find parent of block %u in lookup table", child);
1579 : :
4847 heikki.linnakangas@i 1580 :CBC 6 : return entry->parentblkno;
1581 : : }
|