Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * hashsearch.c
4 : : * search code for postgres hash tables
5 : : *
6 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/access/hash/hashsearch.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : #include "postgres.h"
16 : :
17 : : #include "access/hash.h"
18 : : #include "access/relscan.h"
19 : : #include "miscadmin.h"
20 : : #include "pgstat.h"
21 : : #include "storage/predicate.h"
22 : : #include "utils/rel.h"
23 : :
24 : : static bool _hash_readpage(IndexScanDesc scan, Buffer *bufP,
25 : : ScanDirection dir);
26 : : static int _hash_load_qualified_items(IndexScanDesc scan, Page page,
27 : : OffsetNumber offnum, ScanDirection dir);
28 : : static inline void _hash_saveitem(HashScanOpaque so, int itemIndex,
29 : : OffsetNumber offnum, IndexTuple itup);
30 : : static void _hash_readnext(IndexScanDesc scan, Buffer *bufp,
31 : : Page *pagep, HashPageOpaque *opaquep);
32 : :
33 : : /*
34 : : * _hash_next() -- Get the next item in a scan.
35 : : *
36 : : * On entry, so->currPos describes the current page, which may
37 : : * be pinned but not locked, and so->currPos.itemIndex identifies
38 : : * which item was previously returned.
39 : : *
40 : : * On successful exit, scan->xs_heaptid is set to the TID of the next
41 : : * heap tuple. so->currPos is updated as needed.
42 : : *
43 : : * On failure exit (no more tuples), we return false with pin
44 : : * held on bucket page but no pins or locks held on overflow
45 : : * page.
46 : : */
47 : : bool
10651 scrappy@hub.org 48 :CBC 50563 : _hash_next(IndexScanDesc scan, ScanDirection dir)
49 : : {
8038 tgl@sss.pgh.pa.us 50 : 50563 : Relation rel = scan->indexRelation;
51 : 50563 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
52 : : HashScanPosItem *currItem;
53 : : BlockNumber blkno;
54 : : Buffer buf;
2906 rhaas@postgresql.org 55 : 50563 : bool end_of_scan = false;
56 : :
57 : : /*
58 : : * Advance to the next tuple on the current page; or if done, try to read
59 : : * data from the next or previous page based on the scan direction. Before
60 : : * moving to the next or previous page make sure that we deal with all the
61 : : * killed items.
62 : : */
63 [ + + ]: 50563 : if (ScanDirectionIsForward(dir))
64 : : {
65 [ + + ]: 34063 : if (++so->currPos.itemIndex > so->currPos.lastItem)
66 : : {
67 [ + + ]: 283 : if (so->numKilled > 0)
2906 rhaas@postgresql.org 68 :GBC 1 : _hash_kill_items(scan);
69 : :
2906 rhaas@postgresql.org 70 :CBC 283 : blkno = so->currPos.nextPage;
71 [ + + ]: 283 : if (BlockNumberIsValid(blkno))
72 : : {
73 : 78 : buf = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
74 [ - + ]: 78 : if (!_hash_readpage(scan, &buf, dir))
2906 rhaas@postgresql.org 75 :UBC 0 : end_of_scan = true;
76 : : }
77 : : else
2906 rhaas@postgresql.org 78 :CBC 205 : end_of_scan = true;
79 : : }
80 : : }
81 : : else
82 : : {
83 [ + + ]: 16500 : if (--so->currPos.itemIndex < so->currPos.firstItem)
84 : : {
85 [ - + ]: 42 : if (so->numKilled > 0)
2906 rhaas@postgresql.org 86 :UBC 0 : _hash_kill_items(scan);
87 : :
2906 rhaas@postgresql.org 88 :CBC 42 : blkno = so->currPos.prevPage;
89 [ + + ]: 42 : if (BlockNumberIsValid(blkno))
90 : : {
91 : 39 : buf = _hash_getbuf(rel, blkno, HASH_READ,
92 : : LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
93 : :
94 : : /*
95 : : * We always maintain the pin on bucket page for whole scan
96 : : * operation, so releasing the additional pin we have acquired
97 : : * here.
98 : : */
99 [ + + ]: 39 : if (buf == so->hashso_bucket_buf ||
100 [ - + ]: 36 : buf == so->hashso_split_bucket_buf)
101 : 3 : _hash_dropbuf(rel, buf);
102 : :
103 [ - + ]: 39 : if (!_hash_readpage(scan, &buf, dir))
2906 rhaas@postgresql.org 104 :UBC 0 : end_of_scan = true;
105 : : }
106 : : else
2906 rhaas@postgresql.org 107 :CBC 3 : end_of_scan = true;
108 : : }
109 : : }
110 : :
111 [ + + ]: 50563 : if (end_of_scan)
112 : : {
113 : 208 : _hash_dropscanbuf(rel, so);
114 : 208 : HashScanPosInvalidate(so->currPos);
8510 tgl@sss.pgh.pa.us 115 : 208 : return false;
116 : : }
117 : :
118 : : /* OK, itemIndex says what to return */
2906 rhaas@postgresql.org 119 : 50355 : currItem = &so->currPos.items[so->currPos.itemIndex];
2371 andres@anarazel.de 120 : 50355 : scan->xs_heaptid = currItem->heapTid;
121 : :
8510 tgl@sss.pgh.pa.us 122 : 50355 : return true;
123 : : }
124 : :
125 : : /*
126 : : * Advance to next page in a bucket, if any. If we are scanning the bucket
127 : : * being populated during split operation then this function advances to the
128 : : * bucket being split after the last bucket page of bucket being populated.
129 : : */
130 : : static void
3202 rhaas@postgresql.org 131 : 97 : _hash_readnext(IndexScanDesc scan,
132 : : Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
133 : : {
134 : : BlockNumber blkno;
135 : 97 : Relation rel = scan->indexRelation;
136 : 97 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
137 : 97 : bool block_found = false;
138 : :
10226 bruce@momjian.us 139 : 97 : blkno = (*opaquep)->hasho_nextblkno;
140 : :
141 : : /*
142 : : * Retain the pin on primary bucket page till the end of scan. Refer the
143 : : * comments in _hash_first to know the reason of retaining pin.
144 : : */
3202 rhaas@postgresql.org 145 [ + + - + ]: 97 : if (*bufp == so->hashso_bucket_buf || *bufp == so->hashso_split_bucket_buf)
3179 146 : 61 : LockBuffer(*bufp, BUFFER_LOCK_UNLOCK);
147 : : else
3202 148 : 36 : _hash_relbuf(rel, *bufp);
149 : :
10226 bruce@momjian.us 150 : 97 : *bufp = InvalidBuffer;
151 : : /* check for interrupts while we're not holding any buffer lock */
5968 tgl@sss.pgh.pa.us 152 [ - + ]: 97 : CHECK_FOR_INTERRUPTS();
10226 bruce@momjian.us 153 [ + + ]: 97 : if (BlockNumberIsValid(blkno))
154 : : {
6701 tgl@sss.pgh.pa.us 155 : 39 : *bufp = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
3202 rhaas@postgresql.org 156 : 39 : block_found = true;
157 : : }
158 [ - + - - ]: 58 : else if (so->hashso_buc_populated && !so->hashso_buc_split)
159 : : {
160 : : /*
161 : : * end of bucket, scan bucket being split if there was a split in
162 : : * progress at the start of scan.
163 : : */
3202 rhaas@postgresql.org 164 :UBC 0 : *bufp = so->hashso_split_bucket_buf;
165 : :
166 : : /*
167 : : * buffer for bucket being split must be valid as we acquire the pin
168 : : * on it before the start of scan and retain it till end of scan.
169 : : */
170 [ # # ]: 0 : Assert(BufferIsValid(*bufp));
171 : :
3179 172 : 0 : LockBuffer(*bufp, BUFFER_LOCK_SHARE);
2709 teodor@sigaev.ru 173 : 0 : PredicateLockPage(rel, BufferGetBlockNumber(*bufp), scan->xs_snapshot);
174 : :
175 : : /*
176 : : * setting hashso_buc_split to true indicates that we are scanning
177 : : * bucket being split.
178 : : */
3202 rhaas@postgresql.org 179 : 0 : so->hashso_buc_split = true;
180 : :
181 : 0 : block_found = true;
182 : : }
183 : :
3202 rhaas@postgresql.org 184 [ + + ]:CBC 97 : if (block_found)
185 : : {
3426 kgrittn@postgresql.o 186 : 39 : *pagep = BufferGetPage(*bufp);
1254 michael@paquier.xyz 187 : 39 : *opaquep = HashPageGetOpaque(*pagep);
188 : : }
10651 scrappy@hub.org 189 : 97 : }
190 : :
191 : : /*
192 : : * Advance to previous page in a bucket, if any. If the current scan has
193 : : * started during split operation then this function advances to bucket
194 : : * being populated after the first bucket page of bucket being split.
195 : : */
196 : : static void
3202 rhaas@postgresql.org 197 :UBC 0 : _hash_readprev(IndexScanDesc scan,
198 : : Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
199 : : {
200 : : BlockNumber blkno;
201 : 0 : Relation rel = scan->indexRelation;
202 : 0 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
203 : : bool haveprevblk;
204 : :
10226 bruce@momjian.us 205 : 0 : blkno = (*opaquep)->hasho_prevblkno;
206 : :
207 : : /*
208 : : * Retain the pin on primary bucket page till the end of scan. Refer the
209 : : * comments in _hash_first to know the reason of retaining pin.
210 : : */
3202 rhaas@postgresql.org 211 [ # # # # ]: 0 : if (*bufp == so->hashso_bucket_buf || *bufp == so->hashso_split_bucket_buf)
212 : : {
3179 213 : 0 : LockBuffer(*bufp, BUFFER_LOCK_UNLOCK);
3133 214 : 0 : haveprevblk = false;
215 : : }
216 : : else
217 : : {
3202 218 : 0 : _hash_relbuf(rel, *bufp);
3133 219 : 0 : haveprevblk = true;
220 : : }
221 : :
10226 bruce@momjian.us 222 : 0 : *bufp = InvalidBuffer;
223 : : /* check for interrupts while we're not holding any buffer lock */
5968 tgl@sss.pgh.pa.us 224 [ # # ]: 0 : CHECK_FOR_INTERRUPTS();
225 : :
3133 rhaas@postgresql.org 226 [ # # ]: 0 : if (haveprevblk)
227 : : {
228 [ # # ]: 0 : Assert(BlockNumberIsValid(blkno));
6701 tgl@sss.pgh.pa.us 229 : 0 : *bufp = _hash_getbuf(rel, blkno, HASH_READ,
230 : : LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
3426 kgrittn@postgresql.o 231 : 0 : *pagep = BufferGetPage(*bufp);
1254 michael@paquier.xyz 232 : 0 : *opaquep = HashPageGetOpaque(*pagep);
233 : :
234 : : /*
235 : : * We always maintain the pin on bucket page for whole scan operation,
236 : : * so releasing the additional pin we have acquired here.
237 : : */
3202 rhaas@postgresql.org 238 [ # # # # ]: 0 : if (*bufp == so->hashso_bucket_buf || *bufp == so->hashso_split_bucket_buf)
239 : 0 : _hash_dropbuf(rel, *bufp);
240 : : }
241 [ # # # # ]: 0 : else if (so->hashso_buc_populated && so->hashso_buc_split)
242 : : {
243 : : /*
244 : : * end of bucket, scan bucket being populated if there was a split in
245 : : * progress at the start of scan.
246 : : */
247 : 0 : *bufp = so->hashso_bucket_buf;
248 : :
249 : : /*
250 : : * buffer for bucket being populated must be valid as we acquire the
251 : : * pin on it before the start of scan and retain it till end of scan.
252 : : */
253 [ # # ]: 0 : Assert(BufferIsValid(*bufp));
254 : :
3179 255 : 0 : LockBuffer(*bufp, BUFFER_LOCK_SHARE);
3202 256 : 0 : *pagep = BufferGetPage(*bufp);
1254 michael@paquier.xyz 257 : 0 : *opaquep = HashPageGetOpaque(*pagep);
258 : :
259 : : /* move to the end of bucket chain */
3202 rhaas@postgresql.org 260 [ # # ]: 0 : while (BlockNumberIsValid((*opaquep)->hasho_nextblkno))
261 : 0 : _hash_readnext(scan, bufp, pagep, opaquep);
262 : :
263 : : /*
264 : : * setting hashso_buc_split to false indicates that we are scanning
265 : : * bucket being populated.
266 : : */
267 : 0 : so->hashso_buc_split = false;
268 : : }
10651 scrappy@hub.org 269 : 0 : }
270 : :
271 : : /*
272 : : * _hash_first() -- Find the first item in a scan.
273 : : *
274 : : * We find the first item (or, if backward scan, the last item) in the
275 : : * index that satisfies the qualification associated with the scan
276 : : * descriptor.
277 : : *
278 : : * On successful exit, if the page containing current index tuple is an
279 : : * overflow page, both pin and lock are released whereas if it is a bucket
280 : : * page then it is pinned but not locked and data about the matching
281 : : * tuple(s) on the page has been loaded into so->currPos,
282 : : * scan->xs_heaptid is set to the heap TID of the current tuple.
283 : : *
284 : : * On failure exit (no more tuples), we return false, with pin held on
285 : : * bucket page but no pins or locks held on overflow page.
286 : : */
287 : : bool
10651 scrappy@hub.org 288 :CBC 270 : _hash_first(IndexScanDesc scan, ScanDirection dir)
289 : : {
8038 tgl@sss.pgh.pa.us 290 : 270 : Relation rel = scan->indexRelation;
291 : 270 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
292 : : ScanKey cur;
293 : : uint32 hashkey;
294 : : Bucket bucket;
295 : : Buffer buf;
296 : : Page page;
297 : : HashPageOpaque opaque;
298 : : HashScanPosItem *currItem;
299 : :
6677 300 [ + + + - : 270 : pgstat_count_index_scan(rel);
+ - ]
179 pg@bowt.ie 301 [ + + ]: 270 : if (scan->instrument)
302 : 266 : scan->instrument->nsearches++;
303 : :
304 : : /*
305 : : * We do not support hash scans with no index qualification, because we
306 : : * would have to read the whole index rather than just one bucket. That
307 : : * creates a whole raft of problems, since we haven't got a practical way
308 : : * to lock all the buckets against splits or compactions.
309 : : */
8038 tgl@sss.pgh.pa.us 310 [ - + ]: 270 : if (scan->numberOfKeys < 1)
8038 tgl@sss.pgh.pa.us 311 [ # # ]:UBC 0 : ereport(ERROR,
312 : : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
313 : : errmsg("hash indexes do not support whole-index scans")));
314 : :
315 : : /* There may be more than one index qual, but we hash only the first */
6794 tgl@sss.pgh.pa.us 316 :CBC 270 : cur = &scan->keyData[0];
317 : :
318 : : /* We support only single-column hash indexes */
319 [ - + ]: 270 : Assert(cur->sk_attno == 1);
320 : : /* And there's only one operator strategy, too */
321 [ - + ]: 270 : Assert(cur->sk_strategy == HTEqualStrategyNumber);
322 : :
323 : : /*
324 : : * If the constant in the index qual is NULL, assume it cannot match any
325 : : * items in the index.
326 : : */
327 [ - + ]: 270 : if (cur->sk_flags & SK_ISNULL)
8038 tgl@sss.pgh.pa.us 328 :UBC 0 : return false;
329 : :
330 : : /*
331 : : * Okay to compute the hash key. We want to do this before acquiring any
332 : : * locks, in case a user-defined hash function happens to be slow.
333 : : *
334 : : * If scankey operator is not a cross-type comparison, we can use the
335 : : * cached hash function; otherwise gotta look it up in the catalogs.
336 : : *
337 : : * We support the convention that sk_subtype == InvalidOid means the
338 : : * opclass input type; this is a hack to simplify life for ScanKeyInit().
339 : : */
6794 tgl@sss.pgh.pa.us 340 [ + + ]:CBC 270 : if (cur->sk_subtype == rel->rd_opcintype[0] ||
341 [ + - ]: 4 : cur->sk_subtype == InvalidOid)
342 : 270 : hashkey = _hash_datum2hashkey(rel, cur->sk_argument);
343 : : else
6794 tgl@sss.pgh.pa.us 344 :UBC 0 : hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
345 : : cur->sk_subtype);
346 : :
6200 tgl@sss.pgh.pa.us 347 :CBC 270 : so->hashso_sk_hash = hashkey;
348 : :
3133 rhaas@postgresql.org 349 : 270 : buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_READ, NULL);
2709 teodor@sigaev.ru 350 : 270 : PredicateLockPage(rel, BufferGetBlockNumber(buf), scan->xs_snapshot);
3426 kgrittn@postgresql.o 351 : 270 : page = BufferGetPage(buf);
1254 michael@paquier.xyz 352 : 270 : opaque = HashPageGetOpaque(page);
3133 rhaas@postgresql.org 353 : 270 : bucket = opaque->hasho_bucket;
354 : :
3202 355 : 270 : so->hashso_bucket_buf = buf;
356 : :
357 : : /*
358 : : * If a bucket split is in progress, then while scanning the bucket being
359 : : * populated, we need to skip tuples that were copied from bucket being
360 : : * split. We also need to maintain a pin on the bucket being split to
361 : : * ensure that split-cleanup work done by vacuum doesn't remove tuples
362 : : * from it till this scan is done. We need to maintain a pin on the
363 : : * bucket being populated to ensure that vacuum doesn't squeeze that
364 : : * bucket till this scan is complete; otherwise, the ordering of tuples
365 : : * can't be maintained during forward and backward scans. Here, we have
366 : : * to be cautious about locking order: first, acquire the lock on bucket
367 : : * being split; then, release the lock on it but not the pin; then,
368 : : * acquire a lock on bucket being populated and again re-verify whether
369 : : * the bucket split is still in progress. Acquiring the lock on bucket
370 : : * being split first ensures that the vacuum waits for this scan to
371 : : * finish.
372 : : */
373 [ - + ]: 270 : if (H_BUCKET_BEING_POPULATED(opaque))
374 : : {
375 : : BlockNumber old_blkno;
376 : : Buffer old_buf;
377 : :
3202 rhaas@postgresql.org 378 :UBC 0 : old_blkno = _hash_get_oldblock_from_newbucket(rel, bucket);
379 : :
380 : : /*
381 : : * release the lock on new bucket and re-acquire it after acquiring
382 : : * the lock on old bucket.
383 : : */
3179 384 : 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
385 : :
3202 386 : 0 : old_buf = _hash_getbuf(rel, old_blkno, HASH_READ, LH_BUCKET_PAGE);
387 : :
388 : : /*
389 : : * remember the split bucket buffer so as to use it later for
390 : : * scanning.
391 : : */
392 : 0 : so->hashso_split_bucket_buf = old_buf;
3179 393 : 0 : LockBuffer(old_buf, BUFFER_LOCK_UNLOCK);
394 : :
395 : 0 : LockBuffer(buf, BUFFER_LOCK_SHARE);
3202 396 : 0 : page = BufferGetPage(buf);
1254 michael@paquier.xyz 397 : 0 : opaque = HashPageGetOpaque(page);
3202 rhaas@postgresql.org 398 [ # # ]: 0 : Assert(opaque->hasho_bucket == bucket);
399 : :
400 [ # # ]: 0 : if (H_BUCKET_BEING_POPULATED(opaque))
401 : 0 : so->hashso_buc_populated = true;
402 : : else
403 : : {
404 : 0 : _hash_dropbuf(rel, so->hashso_split_bucket_buf);
405 : 0 : so->hashso_split_bucket_buf = InvalidBuffer;
406 : : }
407 : : }
408 : :
409 : : /* If a backwards scan is requested, move to the end of the chain */
10226 bruce@momjian.us 410 [ + + ]:CBC 270 : if (ScanDirectionIsBackward(dir))
411 : : {
412 : : /*
413 : : * Backward scans that start during split needs to start from end of
414 : : * bucket being split.
415 : : */
3202 rhaas@postgresql.org 416 [ + + ]: 42 : while (BlockNumberIsValid(opaque->hasho_nextblkno) ||
417 [ - + - - ]: 3 : (so->hashso_buc_populated && !so->hashso_buc_split))
418 : 39 : _hash_readnext(scan, &buf, &page, &opaque);
419 : : }
420 : :
421 : : /* remember which buffer we have pinned, if any */
2906 422 [ - + ]: 270 : Assert(BufferIsInvalid(so->currPos.buf));
423 : 270 : so->currPos.buf = buf;
424 : :
425 : : /* Now find all the tuples satisfying the qualification from a page */
426 [ + + ]: 270 : if (!_hash_readpage(scan, &buf, dir))
8510 tgl@sss.pgh.pa.us 427 : 58 : return false;
428 : :
429 : : /* OK, itemIndex says what to return */
2906 rhaas@postgresql.org 430 : 212 : currItem = &so->currPos.items[so->currPos.itemIndex];
2371 andres@anarazel.de 431 : 212 : scan->xs_heaptid = currItem->heapTid;
432 : :
433 : : /* if we're here, _hash_readpage found a valid tuples */
8510 tgl@sss.pgh.pa.us 434 : 212 : return true;
435 : : }
436 : :
437 : : /*
438 : : * _hash_readpage() -- Load data from current index page into so->currPos
439 : : *
440 : : * We scan all the items in the current index page and save them into
441 : : * so->currPos if it satisfies the qualification. If no matching items
442 : : * are found in the current page, we move to the next or previous page
443 : : * in a bucket chain as indicated by the direction.
444 : : *
445 : : * Return true if any matching items are found else return false.
446 : : */
447 : : static bool
2906 rhaas@postgresql.org 448 : 387 : _hash_readpage(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
449 : : {
8038 tgl@sss.pgh.pa.us 450 : 387 : Relation rel = scan->indexRelation;
451 : 387 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
452 : : Buffer buf;
453 : : Page page;
454 : : HashPageOpaque opaque;
455 : : OffsetNumber offnum;
456 : : uint16 itemIndex;
457 : :
10226 bruce@momjian.us 458 : 387 : buf = *bufP;
2906 rhaas@postgresql.org 459 [ - + ]: 387 : Assert(BufferIsValid(buf));
7244 tgl@sss.pgh.pa.us 460 : 387 : _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
3426 kgrittn@postgresql.o 461 : 387 : page = BufferGetPage(buf);
1254 michael@paquier.xyz 462 : 387 : opaque = HashPageGetOpaque(page);
463 : :
2906 rhaas@postgresql.org 464 : 387 : so->currPos.buf = buf;
465 : 387 : so->currPos.currPage = BufferGetBlockNumber(buf);
466 : :
467 [ + + ]: 387 : if (ScanDirectionIsForward(dir))
468 : : {
469 : 345 : BlockNumber prev_blkno = InvalidBlockNumber;
470 : :
471 : : for (;;)
472 : : {
473 : : /* new page, locate starting position by binary search */
474 : 345 : offnum = _hash_binsearch(page, so->hashso_sk_hash);
475 : :
476 : 345 : itemIndex = _hash_load_qualified_items(scan, page, offnum, dir);
477 : :
478 [ + + ]: 345 : if (itemIndex != 0)
10225 bruce@momjian.us 479 : 287 : break;
480 : :
481 : : /*
482 : : * Could not find any matching tuples in the current page, move to
483 : : * the next page. Before leaving the current page, deal with any
484 : : * killed items.
485 : : */
2906 rhaas@postgresql.org 486 [ - + ]: 58 : if (so->numKilled > 0)
2906 rhaas@postgresql.org 487 :UBC 0 : _hash_kill_items(scan);
488 : :
489 : : /*
490 : : * If this is a primary bucket page, hasho_prevblkno is not a real
491 : : * block number.
492 : : */
2906 rhaas@postgresql.org 493 [ - + ]:CBC 58 : if (so->currPos.buf == so->hashso_bucket_buf ||
2906 rhaas@postgresql.org 494 [ # # ]:UBC 0 : so->currPos.buf == so->hashso_split_bucket_buf)
2906 rhaas@postgresql.org 495 :CBC 58 : prev_blkno = InvalidBlockNumber;
496 : : else
2906 rhaas@postgresql.org 497 :UBC 0 : prev_blkno = opaque->hasho_prevblkno;
498 : :
2906 rhaas@postgresql.org 499 :CBC 58 : _hash_readnext(scan, &buf, &page, &opaque);
500 [ - + ]: 58 : if (BufferIsValid(buf))
501 : : {
2906 rhaas@postgresql.org 502 :UBC 0 : so->currPos.buf = buf;
503 : 0 : so->currPos.currPage = BufferGetBlockNumber(buf);
504 : : }
505 : : else
506 : : {
507 : : /*
508 : : * Remember next and previous block numbers for scrollable
509 : : * cursors to know the start position and return false
510 : : * indicating that no more matching tuples were found. Also,
511 : : * don't reset currPage or lsn, because we expect
512 : : * _hash_kill_items to be called for the old page after this
513 : : * function returns.
514 : : */
2906 rhaas@postgresql.org 515 :CBC 58 : so->currPos.prevPage = prev_blkno;
516 : 58 : so->currPos.nextPage = InvalidBlockNumber;
517 : 58 : so->currPos.buf = buf;
518 : 58 : return false;
519 : : }
520 : : }
521 : :
522 : 287 : so->currPos.firstItem = 0;
523 : 287 : so->currPos.lastItem = itemIndex - 1;
524 : 287 : so->currPos.itemIndex = 0;
525 : : }
526 : : else
527 : : {
528 : 42 : BlockNumber next_blkno = InvalidBlockNumber;
529 : :
530 : : for (;;)
531 : : {
532 : : /* new page, locate starting position by binary search */
533 : 42 : offnum = _hash_binsearch_last(page, so->hashso_sk_hash);
534 : :
535 : 42 : itemIndex = _hash_load_qualified_items(scan, page, offnum, dir);
536 : :
537 [ + - ]: 42 : if (itemIndex != MaxIndexTuplesPerPage)
10225 bruce@momjian.us 538 : 42 : break;
539 : :
540 : : /*
541 : : * Could not find any matching tuples in the current page, move to
542 : : * the previous page. Before leaving the current page, deal with
543 : : * any killed items.
544 : : */
2906 rhaas@postgresql.org 545 [ # # ]:UBC 0 : if (so->numKilled > 0)
546 : 0 : _hash_kill_items(scan);
547 : :
548 [ # # ]: 0 : if (so->currPos.buf == so->hashso_bucket_buf ||
549 [ # # ]: 0 : so->currPos.buf == so->hashso_split_bucket_buf)
550 : 0 : next_blkno = opaque->hasho_nextblkno;
551 : :
552 : 0 : _hash_readprev(scan, &buf, &page, &opaque);
553 [ # # ]: 0 : if (BufferIsValid(buf))
554 : : {
555 : 0 : so->currPos.buf = buf;
556 : 0 : so->currPos.currPage = BufferGetBlockNumber(buf);
557 : : }
558 : : else
559 : : {
560 : : /*
561 : : * Remember next and previous block numbers for scrollable
562 : : * cursors to know the start position and return false
563 : : * indicating that no more matching tuples were found. Also,
564 : : * don't reset currPage or lsn, because we expect
565 : : * _hash_kill_items to be called for the old page after this
566 : : * function returns.
567 : : */
568 : 0 : so->currPos.prevPage = InvalidBlockNumber;
569 : 0 : so->currPos.nextPage = next_blkno;
570 : 0 : so->currPos.buf = buf;
571 : 0 : return false;
572 : : }
573 : : }
574 : :
2906 rhaas@postgresql.org 575 :CBC 42 : so->currPos.firstItem = itemIndex;
576 : 42 : so->currPos.lastItem = MaxIndexTuplesPerPage - 1;
577 : 42 : so->currPos.itemIndex = MaxIndexTuplesPerPage - 1;
578 : : }
579 : :
580 [ + + ]: 329 : if (so->currPos.buf == so->hashso_bucket_buf ||
581 [ - + ]: 117 : so->currPos.buf == so->hashso_split_bucket_buf)
582 : : {
583 : 212 : so->currPos.prevPage = InvalidBlockNumber;
584 : 212 : so->currPos.nextPage = opaque->hasho_nextblkno;
585 : 212 : LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);
586 : : }
587 : : else
588 : : {
589 : 117 : so->currPos.prevPage = opaque->hasho_prevblkno;
590 : 117 : so->currPos.nextPage = opaque->hasho_nextblkno;
591 : 117 : _hash_relbuf(rel, so->currPos.buf);
592 : 117 : so->currPos.buf = InvalidBuffer;
593 : : }
594 : :
595 [ - + ]: 329 : Assert(so->currPos.firstItem <= so->currPos.lastItem);
596 : 329 : return true;
597 : : }
598 : :
599 : : /*
600 : : * Load all the qualified items from a current index page
601 : : * into so->currPos. Helper function for _hash_readpage.
602 : : */
603 : : static int
604 : 387 : _hash_load_qualified_items(IndexScanDesc scan, Page page,
605 : : OffsetNumber offnum, ScanDirection dir)
606 : : {
607 : 387 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
608 : : IndexTuple itup;
609 : : int itemIndex;
610 : : OffsetNumber maxoff;
611 : :
612 : 387 : maxoff = PageGetMaxOffsetNumber(page);
613 : :
614 [ + + ]: 387 : if (ScanDirectionIsForward(dir))
615 : : {
616 : : /* load items[] in ascending order */
617 : 345 : itemIndex = 0;
618 : :
619 [ + + ]: 34413 : while (offnum <= maxoff)
620 : : {
621 [ - + ]: 34220 : Assert(offnum >= FirstOffsetNumber);
622 : 34220 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
623 : :
624 : : /*
625 : : * skip the tuples that are moved by split operation for the scan
626 : : * that has started when split was in progress. Also, skip the
627 : : * tuples that are marked as dead.
628 : : */
629 [ - + - - ]: 34220 : if ((so->hashso_buc_populated && !so->hashso_buc_split &&
2906 rhaas@postgresql.org 630 [ # # ]:UBC 0 : (itup->t_info & INDEX_MOVED_BY_SPLIT_MASK)) ||
2906 rhaas@postgresql.org 631 [ + - ]:CBC 34220 : (scan->ignore_killed_tuples &&
632 [ + + ]: 34220 : (ItemIdIsDead(PageGetItemId(page, offnum)))))
633 : : {
2906 rhaas@postgresql.org 634 :GBC 1 : offnum = OffsetNumberNext(offnum); /* move forward */
635 : 1 : continue;
636 : : }
637 : :
2906 rhaas@postgresql.org 638 [ + + + - ]:CBC 68286 : if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup) &&
639 : 34067 : _hash_checkqual(scan, itup))
640 : : {
641 : : /* tuple is qualified, so remember it */
642 : 34067 : _hash_saveitem(so, itemIndex, offnum, itup);
643 : 34067 : itemIndex++;
644 : : }
645 : : else
646 : : {
647 : : /*
648 : : * No more matching tuples exist in this page. so, exit while
649 : : * loop.
650 : : */
651 : : break;
652 : : }
653 : :
654 : 34067 : offnum = OffsetNumberNext(offnum);
655 : : }
656 : :
657 [ - + ]: 345 : Assert(itemIndex <= MaxIndexTuplesPerPage);
658 : 345 : return itemIndex;
659 : : }
660 : : else
661 : : {
662 : : /* load items[] in descending order */
663 : 42 : itemIndex = MaxIndexTuplesPerPage;
664 : :
665 [ + + ]: 16542 : while (offnum >= FirstOffsetNumber)
666 : : {
667 [ - + ]: 16500 : Assert(offnum <= maxoff);
668 : 16500 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
669 : :
670 : : /*
671 : : * skip the tuples that are moved by split operation for the scan
672 : : * that has started when split was in progress. Also, skip the
673 : : * tuples that are marked as dead.
674 : : */
675 [ - + - - ]: 16500 : if ((so->hashso_buc_populated && !so->hashso_buc_split &&
2906 rhaas@postgresql.org 676 [ # # ]:UBC 0 : (itup->t_info & INDEX_MOVED_BY_SPLIT_MASK)) ||
2906 rhaas@postgresql.org 677 [ + - ]:CBC 16500 : (scan->ignore_killed_tuples &&
678 [ - + ]: 16500 : (ItemIdIsDead(PageGetItemId(page, offnum)))))
679 : : {
2906 rhaas@postgresql.org 680 :UBC 0 : offnum = OffsetNumberPrev(offnum); /* move back */
681 : 0 : continue;
682 : : }
683 : :
2906 rhaas@postgresql.org 684 [ + - + - ]:CBC 33000 : if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup) &&
685 : 16500 : _hash_checkqual(scan, itup))
686 : : {
687 : 16500 : itemIndex--;
688 : : /* tuple is qualified, so remember it */
689 : 16500 : _hash_saveitem(so, itemIndex, offnum, itup);
690 : : }
691 : : else
692 : : {
693 : : /*
694 : : * No more matching tuples exist in this page. so, exit while
695 : : * loop.
696 : : */
697 : : break;
698 : : }
699 : :
700 : 16500 : offnum = OffsetNumberPrev(offnum);
701 : : }
702 : :
703 [ - + ]: 42 : Assert(itemIndex >= 0);
704 : 42 : return itemIndex;
705 : : }
706 : : }
707 : :
708 : : /* Save an index item into so->currPos.items[itemIndex] */
709 : : static inline void
710 : 50567 : _hash_saveitem(HashScanOpaque so, int itemIndex,
711 : : OffsetNumber offnum, IndexTuple itup)
712 : : {
713 : 50567 : HashScanPosItem *currItem = &so->currPos.items[itemIndex];
714 : :
715 : 50567 : currItem->heapTid = itup->t_tid;
716 : 50567 : currItem->indexOffset = offnum;
10651 scrappy@hub.org 717 : 50567 : }
|