Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * heapam.h
4 : : * POSTGRES heap access method definitions.
5 : : *
6 : : *
7 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : : * Portions Copyright (c) 1994, Regents of the University of California
9 : : *
10 : : * src/include/access/heapam.h
11 : : *
12 : : *-------------------------------------------------------------------------
13 : : */
14 : : #ifndef HEAPAM_H
15 : : #define HEAPAM_H
16 : :
17 : : #include "access/heapam_xlog.h"
18 : : #include "access/relation.h" /* for backward compatibility */
19 : : #include "access/relscan.h"
20 : : #include "access/sdir.h"
21 : : #include "access/skey.h"
22 : : #include "access/table.h" /* for backward compatibility */
23 : : #include "access/tableam.h"
24 : : #include "commands/vacuum.h"
25 : : #include "nodes/lockoptions.h"
26 : : #include "nodes/primnodes.h"
27 : : #include "storage/bufpage.h"
28 : : #include "storage/dsm.h"
29 : : #include "storage/lockdefs.h"
30 : : #include "storage/read_stream.h"
31 : : #include "storage/shm_toc.h"
32 : : #include "utils/relcache.h"
33 : : #include "utils/snapshot.h"
34 : :
35 : :
36 : : /* "options" flag bits for heap_insert */
37 : : #define HEAP_INSERT_SKIP_FSM TABLE_INSERT_SKIP_FSM
38 : : #define HEAP_INSERT_FROZEN TABLE_INSERT_FROZEN
39 : : #define HEAP_INSERT_NO_LOGICAL TABLE_INSERT_NO_LOGICAL
40 : : #define HEAP_INSERT_SPECULATIVE 0x0010
41 : :
42 : : /* "options" flag bits for heap_page_prune_and_freeze */
43 : : #define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW (1 << 0)
44 : : #define HEAP_PAGE_PRUNE_FREEZE (1 << 1)
45 : :
46 : : typedef struct BulkInsertStateData *BulkInsertState;
47 : : struct TupleTableSlot;
48 : : struct VacuumCutoffs;
49 : :
50 : : #define MaxLockTupleMode LockTupleExclusive
51 : :
52 : : /*
53 : : * Descriptor for heap table scans.
54 : : */
55 : : typedef struct HeapScanDescData
56 : : {
57 : : TableScanDescData rs_base; /* AM independent part of the descriptor */
58 : :
59 : : /* state set up at initscan time */
60 : : BlockNumber rs_nblocks; /* total number of blocks in rel */
61 : : BlockNumber rs_startblock; /* block # to start at */
62 : : BlockNumber rs_numblocks; /* max number of blocks to scan */
63 : : /* rs_numblocks is usually InvalidBlockNumber, meaning "scan whole rel" */
64 : :
65 : : /* scan current state */
66 : : bool rs_inited; /* false = scan not init'd yet */
67 : : OffsetNumber rs_coffset; /* current offset # in non-page-at-a-time mode */
68 : : BlockNumber rs_cblock; /* current block # in scan, if any */
69 : : Buffer rs_cbuf; /* current buffer in scan, if any */
70 : : /* NB: if rs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
71 : :
72 : : BufferAccessStrategy rs_strategy; /* access strategy for reads */
73 : :
74 : : HeapTupleData rs_ctup; /* current tuple in scan, if any */
75 : :
76 : : /* For scans that stream reads */
77 : : ReadStream *rs_read_stream;
78 : :
79 : : /*
80 : : * For sequential scans and TID range scans to stream reads. The read
81 : : * stream is allocated at the beginning of the scan and reset on rescan or
82 : : * when the scan direction changes. The scan direction is saved each time
83 : : * a new page is requested. If the scan direction changes from one page to
84 : : * the next, the read stream releases all previously pinned buffers and
85 : : * resets the prefetch block.
86 : : */
87 : : ScanDirection rs_dir;
88 : : BlockNumber rs_prefetch_block;
89 : :
90 : : /*
91 : : * For parallel scans to store page allocation data. NULL when not
92 : : * performing a parallel scan.
93 : : */
94 : : ParallelBlockTableScanWorkerData *rs_parallelworkerdata;
95 : :
96 : : /* these fields only used in page-at-a-time mode and for bitmap scans */
97 : : uint32 rs_cindex; /* current tuple's index in vistuples */
98 : : uint32 rs_ntuples; /* number of visible tuples on page */
99 : : OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]; /* their offsets */
100 : : } HeapScanDescData;
101 : : typedef struct HeapScanDescData *HeapScanDesc;
102 : :
103 : : typedef struct BitmapHeapScanDescData
104 : : {
105 : : HeapScanDescData rs_heap_base;
106 : :
107 : : /* Holds no data */
108 : : } BitmapHeapScanDescData;
109 : : typedef struct BitmapHeapScanDescData *BitmapHeapScanDesc;
110 : :
111 : : /*
112 : : * Descriptor for fetches from heap via an index.
113 : : */
114 : : typedef struct IndexFetchHeapData
115 : : {
116 : : IndexFetchTableData xs_base; /* AM independent part of the descriptor */
117 : :
118 : : Buffer xs_cbuf; /* current heap buffer in scan, if any */
119 : : /* NB: if xs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
120 : : } IndexFetchHeapData;
121 : :
122 : : /* Result codes for HeapTupleSatisfiesVacuum */
123 : : typedef enum
124 : : {
125 : : HEAPTUPLE_DEAD, /* tuple is dead and deletable */
126 : : HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */
127 : : HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */
128 : : HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in progress */
129 : : HEAPTUPLE_DELETE_IN_PROGRESS, /* deleting xact is still in progress */
130 : : } HTSV_Result;
131 : :
132 : : /*
133 : : * heap_prepare_freeze_tuple may request that heap_freeze_execute_prepared
134 : : * check any tuple's to-be-frozen xmin and/or xmax status using pg_xact
135 : : */
136 : : #define HEAP_FREEZE_CHECK_XMIN_COMMITTED 0x01
137 : : #define HEAP_FREEZE_CHECK_XMAX_ABORTED 0x02
138 : :
139 : : /* heap_prepare_freeze_tuple state describing how to freeze a tuple */
140 : : typedef struct HeapTupleFreeze
141 : : {
142 : : /* Fields describing how to process tuple */
143 : : TransactionId xmax;
144 : : uint16 t_infomask2;
145 : : uint16 t_infomask;
146 : : uint8 frzflags;
147 : :
148 : : /* xmin/xmax check flags */
149 : : uint8 checkflags;
150 : : /* Page offset number for tuple */
151 : : OffsetNumber offset;
152 : : } HeapTupleFreeze;
153 : :
154 : : /*
155 : : * State used by VACUUM to track the details of freezing all eligible tuples
156 : : * on a given heap page.
157 : : *
158 : : * VACUUM prepares freeze plans for each page via heap_prepare_freeze_tuple
159 : : * calls (every tuple with storage gets its own call). This page-level freeze
160 : : * state is updated across each call, which ultimately determines whether or
161 : : * not freezing the page is required.
162 : : *
163 : : * Aside from the basic question of whether or not freezing will go ahead, the
164 : : * state also tracks the oldest extant XID/MXID in the table as a whole, for
165 : : * the purposes of advancing relfrozenxid/relminmxid values in pg_class later
166 : : * on. Each heap_prepare_freeze_tuple call pushes NewRelfrozenXid and/or
167 : : * NewRelminMxid back as required to avoid unsafe final pg_class values. Any
168 : : * and all unfrozen XIDs or MXIDs that remain after VACUUM finishes _must_
169 : : * have values >= the final relfrozenxid/relminmxid values in pg_class. This
170 : : * includes XIDs that remain as MultiXact members from any tuple's xmax.
171 : : *
172 : : * When 'freeze_required' flag isn't set after all tuples are examined, the
173 : : * final choice on freezing is made by vacuumlazy.c. It can decide to trigger
174 : : * freezing based on whatever criteria it deems appropriate. However, it is
175 : : * recommended that vacuumlazy.c avoid early freezing when freezing does not
176 : : * enable setting the target page all-frozen in the visibility map afterwards.
177 : : */
178 : : typedef struct HeapPageFreeze
179 : : {
180 : : /* Is heap_prepare_freeze_tuple caller required to freeze page? */
181 : : bool freeze_required;
182 : :
183 : : /*
184 : : * "Freeze" NewRelfrozenXid/NewRelminMxid trackers.
185 : : *
186 : : * Trackers used when heap_freeze_execute_prepared freezes, or when there
187 : : * are zero freeze plans for a page. It is always valid for vacuumlazy.c
188 : : * to freeze any page, by definition. This even includes pages that have
189 : : * no tuples with storage to consider in the first place. That way the
190 : : * 'totally_frozen' results from heap_prepare_freeze_tuple can always be
191 : : * used in the same way, even when no freeze plans need to be executed to
192 : : * "freeze the page". Only the "freeze" path needs to consider the need
193 : : * to set pages all-frozen in the visibility map under this scheme.
194 : : *
195 : : * When we freeze a page, we generally freeze all XIDs < OldestXmin, only
196 : : * leaving behind XIDs that are ineligible for freezing, if any. And so
197 : : * you might wonder why these trackers are necessary at all; why should
198 : : * _any_ page that VACUUM freezes _ever_ be left with XIDs/MXIDs that
199 : : * ratchet back the top-level NewRelfrozenXid/NewRelminMxid trackers?
200 : : *
201 : : * It is useful to use a definition of "freeze the page" that does not
202 : : * overspecify how MultiXacts are affected. heap_prepare_freeze_tuple
203 : : * generally prefers to remove Multis eagerly, but lazy processing is used
204 : : * in cases where laziness allows VACUUM to avoid allocating a new Multi.
205 : : * The "freeze the page" trackers enable this flexibility.
206 : : */
207 : : TransactionId FreezePageRelfrozenXid;
208 : : MultiXactId FreezePageRelminMxid;
209 : :
210 : : /*
211 : : * "No freeze" NewRelfrozenXid/NewRelminMxid trackers.
212 : : *
213 : : * These trackers are maintained in the same way as the trackers used when
214 : : * VACUUM scans a page that isn't cleanup locked. Both code paths are
215 : : * based on the same general idea (do less work for this page during the
216 : : * ongoing VACUUM, at the cost of having to accept older final values).
217 : : */
218 : : TransactionId NoFreezePageRelfrozenXid;
219 : : MultiXactId NoFreezePageRelminMxid;
220 : :
221 : : } HeapPageFreeze;
222 : :
223 : : /*
224 : : * Per-page state returned by heap_page_prune_and_freeze()
225 : : */
226 : : typedef struct PruneFreezeResult
227 : : {
228 : : int ndeleted; /* Number of tuples deleted from the page */
229 : : int nnewlpdead; /* Number of newly LP_DEAD items */
230 : : int nfrozen; /* Number of tuples we froze */
231 : :
232 : : /* Number of live and recently dead tuples on the page, after pruning */
233 : : int live_tuples;
234 : : int recently_dead_tuples;
235 : :
236 : : /*
237 : : * all_visible and all_frozen indicate if the all-visible and all-frozen
238 : : * bits in the visibility map can be set for this page, after pruning.
239 : : *
240 : : * vm_conflict_horizon is the newest xmin of live tuples on the page. The
241 : : * caller can use it as the conflict horizon when setting the VM bits. It
242 : : * is only valid if we froze some tuples (nfrozen > 0), and all_frozen is
243 : : * true.
244 : : *
245 : : * These are only set if the HEAP_PRUNE_FREEZE option is set.
246 : : */
247 : : bool all_visible;
248 : : bool all_frozen;
249 : : TransactionId vm_conflict_horizon;
250 : :
251 : : /*
252 : : * Whether or not the page makes rel truncation unsafe. This is set to
253 : : * 'true', even if the page contains LP_DEAD items. VACUUM will remove
254 : : * them before attempting to truncate.
255 : : */
256 : : bool hastup;
257 : :
258 : : /*
259 : : * LP_DEAD items on the page after pruning. Includes existing LP_DEAD
260 : : * items.
261 : : */
262 : : int lpdead_items;
263 : : OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
264 : : } PruneFreezeResult;
265 : :
266 : : /* 'reason' codes for heap_page_prune_and_freeze() */
267 : : typedef enum
268 : : {
269 : : PRUNE_ON_ACCESS, /* on-access pruning */
270 : : PRUNE_VACUUM_SCAN, /* VACUUM 1st heap pass */
271 : : PRUNE_VACUUM_CLEANUP, /* VACUUM 2nd heap pass */
272 : : } PruneReason;
273 : :
274 : : /* ----------------
275 : : * function prototypes for heap access method
276 : : *
277 : : * heap_create, heap_create_with_catalog, and heap_drop_with_catalog
278 : : * are declared in catalog/heap.h
279 : : * ----------------
280 : : */
281 : :
282 : :
283 : : /*
284 : : * HeapScanIsValid
285 : : * True iff the heap scan is valid.
286 : : */
287 : : #define HeapScanIsValid(scan) PointerIsValid(scan)
288 : :
289 : : extern TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot,
290 : : int nkeys, ScanKey key,
291 : : ParallelTableScanDesc parallel_scan,
292 : : uint32 flags);
293 : : extern void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk,
294 : : BlockNumber numBlks);
295 : : extern void heap_prepare_pagescan(TableScanDesc sscan);
296 : : extern void heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params,
297 : : bool allow_strat, bool allow_sync, bool allow_pagemode);
298 : : extern void heap_endscan(TableScanDesc sscan);
299 : : extern HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction);
300 : : extern bool heap_getnextslot(TableScanDesc sscan,
301 : : ScanDirection direction, struct TupleTableSlot *slot);
302 : : extern void heap_set_tidrange(TableScanDesc sscan, ItemPointer mintid,
303 : : ItemPointer maxtid);
304 : : extern bool heap_getnextslot_tidrange(TableScanDesc sscan,
305 : : ScanDirection direction,
306 : : TupleTableSlot *slot);
307 : : extern bool heap_fetch(Relation relation, Snapshot snapshot,
308 : : HeapTuple tuple, Buffer *userbuf, bool keep_buf);
309 : : extern bool heap_hot_search_buffer(ItemPointer tid, Relation relation,
310 : : Buffer buffer, Snapshot snapshot, HeapTuple heapTuple,
311 : : bool *all_dead, bool first_call);
312 : :
313 : : extern void heap_get_latest_tid(TableScanDesc sscan, ItemPointer tid);
314 : :
315 : : extern BulkInsertState GetBulkInsertState(void);
316 : : extern void FreeBulkInsertState(BulkInsertState);
317 : : extern void ReleaseBulkInsertStatePin(BulkInsertState bistate);
318 : :
319 : : extern void heap_insert(Relation relation, HeapTuple tup, CommandId cid,
320 : : int options, BulkInsertState bistate);
321 : : extern void heap_multi_insert(Relation relation, struct TupleTableSlot **slots,
322 : : int ntuples, CommandId cid, int options,
323 : : BulkInsertState bistate);
324 : : extern TM_Result heap_delete(Relation relation, ItemPointer tid,
325 : : CommandId cid, Snapshot crosscheck, bool wait,
326 : : struct TM_FailureData *tmfd, bool changingPart);
327 : : extern void heap_finish_speculative(Relation relation, ItemPointer tid);
328 : : extern void heap_abort_speculative(Relation relation, ItemPointer tid);
329 : : extern TM_Result heap_update(Relation relation, ItemPointer otid,
330 : : HeapTuple newtup,
331 : : CommandId cid, Snapshot crosscheck, bool wait,
332 : : struct TM_FailureData *tmfd, LockTupleMode *lockmode,
333 : : TU_UpdateIndexes *update_indexes);
334 : : extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
335 : : CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy,
336 : : bool follow_updates,
337 : : Buffer *buffer, struct TM_FailureData *tmfd);
338 : :
339 : : extern bool heap_inplace_lock(Relation relation,
340 : : HeapTuple oldtup_ptr, Buffer buffer,
341 : : void (*release_callback) (void *), void *arg);
342 : : extern void heap_inplace_update_and_unlock(Relation relation,
343 : : HeapTuple oldtup, HeapTuple tuple,
344 : : Buffer buffer);
345 : : extern void heap_inplace_unlock(Relation relation,
346 : : HeapTuple oldtup, Buffer buffer);
347 : : extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
348 : : const struct VacuumCutoffs *cutoffs,
349 : : HeapPageFreeze *pagefrz,
350 : : HeapTupleFreeze *frz, bool *totally_frozen);
351 : :
352 : : extern void heap_pre_freeze_checks(Buffer buffer,
353 : : HeapTupleFreeze *tuples, int ntuples);
354 : : extern void heap_freeze_prepared_tuples(Buffer buffer,
355 : : HeapTupleFreeze *tuples, int ntuples);
356 : : extern bool heap_freeze_tuple(HeapTupleHeader tuple,
357 : : TransactionId relfrozenxid, TransactionId relminmxid,
358 : : TransactionId FreezeLimit, TransactionId MultiXactCutoff);
359 : : extern bool heap_tuple_should_freeze(HeapTupleHeader tuple,
360 : : const struct VacuumCutoffs *cutoffs,
361 : : TransactionId *NoFreezePageRelfrozenXid,
362 : : MultiXactId *NoFreezePageRelminMxid);
363 : : extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple);
364 : :
365 : : extern void simple_heap_insert(Relation relation, HeapTuple tup);
366 : : extern void simple_heap_delete(Relation relation, ItemPointer tid);
367 : : extern void simple_heap_update(Relation relation, ItemPointer otid,
368 : : HeapTuple tup, TU_UpdateIndexes *update_indexes);
369 : :
370 : : extern TransactionId heap_index_delete_tuples(Relation rel,
371 : : TM_IndexDeleteOp *delstate);
372 : :
373 : : /* in heap/pruneheap.c */
374 : : struct GlobalVisState;
375 : : extern void heap_page_prune_opt(Relation relation, Buffer buffer);
376 : : extern void heap_page_prune_and_freeze(Relation relation, Buffer buffer,
377 : : struct GlobalVisState *vistest,
378 : : int options,
379 : : struct VacuumCutoffs *cutoffs,
380 : : PruneFreezeResult *presult,
381 : : PruneReason reason,
382 : : OffsetNumber *off_loc,
383 : : TransactionId *new_relfrozen_xid,
384 : : MultiXactId *new_relmin_mxid);
385 : : extern void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only,
386 : : OffsetNumber *redirected, int nredirected,
387 : : OffsetNumber *nowdead, int ndead,
388 : : OffsetNumber *nowunused, int nunused);
389 : : extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets);
390 : : extern void log_heap_prune_and_freeze(Relation relation, Buffer buffer,
391 : : TransactionId conflict_xid,
392 : : bool cleanup_lock,
393 : : PruneReason reason,
394 : : HeapTupleFreeze *frozen, int nfrozen,
395 : : OffsetNumber *redirected, int nredirected,
396 : : OffsetNumber *dead, int ndead,
397 : : OffsetNumber *unused, int nunused);
398 : :
399 : : /* in heap/vacuumlazy.c */
400 : : extern void heap_vacuum_rel(Relation rel,
401 : : const VacuumParams params, BufferAccessStrategy bstrategy);
402 : :
403 : : /* in heap/heapam_visibility.c */
404 : : extern bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot,
405 : : Buffer buffer);
406 : : extern TM_Result HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
407 : : Buffer buffer);
408 : : extern HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin,
409 : : Buffer buffer);
410 : : extern HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer,
411 : : TransactionId *dead_after);
412 : : extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer,
413 : : uint16 infomask, TransactionId xid);
414 : : extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple);
415 : : extern bool HeapTupleIsSurelyDead(HeapTuple htup,
416 : : struct GlobalVisState *vistest);
417 : :
418 : : /*
419 : : * To avoid leaking too much knowledge about reorderbuffer implementation
420 : : * details this is implemented in reorderbuffer.c not heapam_visibility.c
421 : : */
422 : : struct HTAB;
423 : : extern bool ResolveCminCmaxDuringDecoding(struct HTAB *tuplecid_data,
424 : : Snapshot snapshot,
425 : : HeapTuple htup,
426 : : Buffer buffer,
427 : : CommandId *cmin, CommandId *cmax);
428 : : extern void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple,
429 : : Buffer buffer, Snapshot snapshot);
430 : :
431 : : /*
432 : : * heap_execute_freeze_tuple
433 : : * Execute the prepared freezing of a tuple with caller's freeze plan.
434 : : *
435 : : * Caller is responsible for ensuring that no other backend can access the
436 : : * storage underlying this tuple, either by holding an exclusive lock on the
437 : : * buffer containing it (which is what lazy VACUUM does), or by having it be
438 : : * in private storage (which is what CLUSTER and friends do).
439 : : */
440 : : static inline void
359 michael@paquier.xyz 441 :CBC 1260743 : heap_execute_freeze_tuple(HeapTupleHeader tuple, HeapTupleFreeze *frz)
442 : : {
443 : 1260743 : HeapTupleHeaderSetXmax(tuple, frz->xmax);
444 : :
445 [ - + ]: 1260743 : if (frz->frzflags & XLH_FREEZE_XVAC)
359 michael@paquier.xyz 446 :UBC 0 : HeapTupleHeaderSetXvac(tuple, FrozenTransactionId);
447 : :
359 michael@paquier.xyz 448 [ - + ]:CBC 1260743 : if (frz->frzflags & XLH_INVALID_XVAC)
359 michael@paquier.xyz 449 :UBC 0 : HeapTupleHeaderSetXvac(tuple, InvalidTransactionId);
450 : :
359 michael@paquier.xyz 451 :CBC 1260743 : tuple->t_infomask = frz->t_infomask;
452 : 1260743 : tuple->t_infomask2 = frz->t_infomask2;
453 : 1260743 : }
454 : :
455 : : #endif /* HEAPAM_H */
|