Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * multixact.c
4 : : * PostgreSQL multi-transaction-log manager
5 : : *
6 : : * The pg_multixact manager is a pg_xact-like manager that stores an array of
7 : : * MultiXactMember for each MultiXactId. It is a fundamental part of the
8 : : * shared-row-lock implementation. Each MultiXactMember is comprised of a
9 : : * TransactionId and a set of flag bits. The name is a bit historical:
10 : : * originally, a MultiXactId consisted of more than one TransactionId (except
11 : : * in rare corner cases), hence "multi". Nowadays, however, it's perfectly
12 : : * legitimate to have MultiXactIds that only include a single Xid.
13 : : *
14 : : * The meaning of the flag bits is opaque to this module, but they are mostly
15 : : * used in heapam.c to identify lock modes that each of the member transactions
16 : : * is holding on any given tuple. This module just contains support to store
17 : : * and retrieve the arrays.
18 : : *
19 : : * We use two SLRU areas, one for storing the offsets at which the data
20 : : * starts for each MultiXactId in the other one. This trick allows us to
21 : : * store variable length arrays of TransactionIds. (We could alternatively
22 : : * use one area containing counts and TransactionIds, with valid MultiXactId
23 : : * values pointing at slots containing counts; but that way seems less robust
24 : : * since it would get completely confused if someone inquired about a bogus
25 : : * MultiXactId that pointed to an intermediate slot containing an XID.)
26 : : *
27 : : * XLOG interactions: this module generates a record whenever a new OFFSETs or
28 : : * MEMBERs page is initialized to zeroes, as well as an
29 : : * XLOG_MULTIXACT_CREATE_ID record whenever a new MultiXactId is defined.
30 : : * This module ignores the WAL rule "write xlog before data," because it
31 : : * suffices that actions recording a MultiXactId in a heap xmax do follow that
32 : : * rule. The only way for the MXID to be referenced from any data page is for
33 : : * heap_lock_tuple() or heap_update() to have put it there, and each generates
34 : : * an XLOG record that must follow ours. The normal LSN interlock between the
35 : : * data page and that XLOG record will ensure that our XLOG record reaches
36 : : * disk first. If the SLRU members/offsets data reaches disk sooner than the
37 : : * XLOG records, we do not care; after recovery, no xmax will refer to it. On
38 : : * the flip side, to ensure that all referenced entries _do_ reach disk, this
39 : : * module's XLOG records completely rebuild the data entered since the last
40 : : * checkpoint. We flush and sync all dirty OFFSETs and MEMBERs pages to disk
41 : : * before each checkpoint is considered complete.
42 : : *
43 : : * Like clog.c, and unlike subtrans.c, we have to preserve state across
44 : : * crashes and ensure that MXID and offset numbering increases monotonically
45 : : * across a crash. We do this in the same way as it's done for transaction
46 : : * IDs: the WAL record is guaranteed to contain evidence of every MXID we
47 : : * could need to worry about, and we just make sure that at the end of
48 : : * replay, the next-MXID and next-offset counters are at least as large as
49 : : * anything we saw during replay.
50 : : *
51 : : * We are able to remove segments no longer necessary by carefully tracking
52 : : * each table's used values: during vacuum, any multixact older than a certain
53 : : * value is removed; the cutoff value is stored in pg_class. The minimum value
54 : : * across all tables in each database is stored in pg_database, and the global
55 : : * minimum across all databases is part of pg_control and is kept in shared
56 : : * memory. Whenever that minimum is advanced, the SLRUs are truncated.
57 : : *
58 : : * When new multixactid values are to be created, care is taken that the
59 : : * counter does not fall within the wraparound horizon considering the global
60 : : * minimum value.
61 : : *
62 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
63 : : * Portions Copyright (c) 1994, Regents of the University of California
64 : : *
65 : : * src/backend/access/transam/multixact.c
66 : : *
67 : : *-------------------------------------------------------------------------
68 : : */
69 : : #include "postgres.h"
70 : :
71 : : #include "access/multixact.h"
72 : : #include "access/multixact_internal.h"
73 : : #include "access/slru.h"
74 : : #include "access/twophase.h"
75 : : #include "access/twophase_rmgr.h"
76 : : #include "access/xlog.h"
77 : : #include "access/xloginsert.h"
78 : : #include "access/xlogutils.h"
79 : : #include "miscadmin.h"
80 : : #include "pg_trace.h"
81 : : #include "pgstat.h"
82 : : #include "postmaster/autovacuum.h"
83 : : #include "storage/pmsignal.h"
84 : : #include "storage/proc.h"
85 : : #include "storage/procarray.h"
86 : : #include "storage/subsystems.h"
87 : : #include "utils/guc_hooks.h"
88 : : #include "utils/injection_point.h"
89 : : #include "utils/lsyscache.h"
90 : : #include "utils/memutils.h"
91 : :
92 : :
93 : : /*
94 : : * Thresholds used to keep members disk usage in check when multixids have a
95 : : * lot of members. When MULTIXACT_MEMBER_LOW_THRESHOLD is reached, vacuum
96 : : * starts freezing multixids more aggressively, even if the normal multixid
97 : : * age limits haven't been reached yet.
98 : : */
99 : : #define MULTIXACT_MEMBER_LOW_THRESHOLD UINT64CONST(2000000000)
100 : : #define MULTIXACT_MEMBER_HIGH_THRESHOLD UINT64CONST(4000000000)
101 : :
102 : : static inline MultiXactId
144 heikki.linnakangas@i 103 :GNC 107669 : NextMultiXactId(MultiXactId multi)
104 : : {
105 [ + + ]: 107669 : return multi == MaxMultiXactId ? FirstMultiXactId : multi + 1;
106 : : }
107 : :
108 : : static inline MultiXactId
688 heikki.linnakangas@i 109 :UBC 0 : PreviousMultiXactId(MultiXactId multi)
110 : : {
111 [ # # ]: 0 : return multi == FirstMultiXactId ? MaxMultiXactId : multi - 1;
112 : : }
113 : :
114 : : /*
115 : : * Links to shared-memory data structures for MultiXact control
116 : : */
117 : : static bool MultiXactOffsetPagePrecedes(int64 page1, int64 page2);
118 : : static int MultiXactOffsetIoErrorDetail(const void *opaque_data);
119 : : static bool MultiXactMemberPagePrecedes(int64 page1, int64 page2);
120 : : static int MultiXactMemberIoErrorDetail(const void *opaque_data);
121 : :
122 : : static SlruDesc MultiXactOffsetSlruDesc;
123 : : static SlruDesc MultiXactMemberSlruDesc;
124 : :
125 : : #define MultiXactOffsetCtl (&MultiXactOffsetSlruDesc)
126 : : #define MultiXactMemberCtl (&MultiXactMemberSlruDesc)
127 : :
128 : : /*
129 : : * MultiXact state shared across all backends. All this state is protected
130 : : * by MultiXactGenLock. (We also use SLRU bank's lock of MultiXactOffset and
131 : : * MultiXactMember to guard accesses to the two sets of SLRU buffers. For
132 : : * concurrency's sake, we avoid holding more than one of these locks at a
133 : : * time.)
134 : : */
135 : : typedef struct MultiXactStateData
136 : : {
137 : : /* next-to-be-assigned MultiXactId */
138 : : MultiXactId nextMXact;
139 : :
140 : : /* next-to-be-assigned offset */
141 : : MultiXactOffset nextOffset;
142 : :
143 : : /* Have we completed multixact startup? */
144 : : bool finishedStartup;
145 : :
146 : : /*
147 : : * Oldest multixact that is still potentially referenced by a relation.
148 : : * Anything older than this should not be consulted. These values are
149 : : * updated by vacuum.
150 : : */
151 : : MultiXactId oldestMultiXactId;
152 : : Oid oldestMultiXactDB;
153 : :
154 : : /*
155 : : * Oldest multixact offset that is potentially referenced by a multixact
156 : : * referenced by a relation.
157 : : */
158 : : MultiXactOffset oldestOffset;
159 : :
160 : : /* support for anti-wraparound measures */
161 : : MultiXactId multiVacLimit;
162 : : MultiXactId multiWarnLimit;
163 : : MultiXactId multiStopLimit;
164 : : MultiXactId multiWrapLimit;
165 : :
166 : : /*
167 : : * Per-backend data starts here. We have two arrays stored in the area
168 : : * immediately following the MultiXactStateData struct:
169 : : *
170 : : * OldestMemberMXactId[k] is the oldest MultiXactId each backend's current
171 : : * transaction(s) could possibly be a member of, or InvalidMultiXactId
172 : : * when the backend has no live transaction that could possibly be a
173 : : * member of a MultiXact. Each backend sets its entry to the current
174 : : * nextMXact counter just before first acquiring a shared lock in a given
175 : : * transaction, and clears it at transaction end. (This works because only
176 : : * during or after acquiring a shared lock could an XID possibly become a
177 : : * member of a MultiXact, and that MultiXact would have to be created
178 : : * during or after the lock acquisition.)
179 : : *
180 : : * In the OldestMemberMXactId array, there's a slot for all normal
181 : : * backends (0..MaxBackends-1) followed by a slot for max_prepared_xacts
182 : : * prepared transactions.
183 : : *
184 : : * OldestVisibleMXactId[k] is the oldest MultiXactId each backend's
185 : : * current transaction(s) think is potentially live, or InvalidMultiXactId
186 : : * when not in a transaction or not in a transaction that's paid any
187 : : * attention to MultiXacts yet. This is computed when first needed in a
188 : : * given transaction, and cleared at transaction end. We can compute it
189 : : * as the minimum of the valid OldestMemberMXactId[] entries at the time
190 : : * we compute it (using nextMXact if none are valid). Each backend is
191 : : * required not to attempt to access any SLRU data for MultiXactIds older
192 : : * than its own OldestVisibleMXactId[] setting; this is necessary because
193 : : * the relevant SLRU data can be concurrently truncated away.
194 : : *
195 : : * In the OldestVisibleMXactId array, there's a slot for all normal
196 : : * backends (0..MaxBackends-1) only. No slots for prepared transactions.
197 : : *
198 : : * The oldest valid value among all of the OldestMemberMXactId[] and
199 : : * OldestVisibleMXactId[] entries is considered by vacuum as the earliest
200 : : * possible value still having any live member transaction -- OldestMxact.
201 : : * Any value older than that is typically removed from tuple headers, or
202 : : * "frozen" via being replaced with a new xmax. VACUUM can sometimes even
203 : : * remove an individual MultiXact xmax whose value is >= its OldestMxact
204 : : * cutoff, though typically only when no individual member XID is still
205 : : * running. See FreezeMultiXactId for full details.
206 : : *
207 : : * Whenever VACUUM advances relminmxid, then either its OldestMxact cutoff
208 : : * or the oldest extant Multi remaining in the table is used as the new
209 : : * pg_class.relminmxid value (whichever is earlier). The minimum of all
210 : : * relminmxid values in each database is stored in pg_database.datminmxid.
211 : : * In turn, the minimum of all of those values is stored in pg_control.
212 : : * This is used as the truncation point for pg_multixact when unneeded
213 : : * segments get removed by vac_truncate_clog() during vacuuming.
214 : : */
215 : : MultiXactId perBackendXactIds[FLEXIBLE_ARRAY_MEMBER];
216 : : } MultiXactStateData;
217 : :
218 : : /*
219 : : * Sizes of OldestMemberMXactId and OldestVisibleMXactId arrays.
220 : : */
221 : : #define NumMemberSlots (MaxBackends + max_prepared_xacts)
222 : : #define NumVisibleSlots MaxBackends
223 : :
224 : : /* Pointers to the state data in shared memory */
225 : : static MultiXactStateData *MultiXactState;
226 : : static MultiXactId *OldestMemberMXactId;
227 : : static MultiXactId *OldestVisibleMXactId;
228 : :
229 : : static void MultiXactShmemRequest(void *arg);
230 : : static void MultiXactShmemInit(void *arg);
231 : : static void MultiXactShmemAttach(void *arg);
232 : :
233 : : const ShmemCallbacks MultiXactShmemCallbacks = {
234 : : .request_fn = MultiXactShmemRequest,
235 : : .init_fn = MultiXactShmemInit,
236 : : .attach_fn = MultiXactShmemAttach,
237 : : };
238 : :
239 : : static inline MultiXactId *
64 heikki.linnakangas@i 240 :CBC 4975422 : MyOldestMemberMXactIdSlot(void)
241 : : {
242 : : /*
243 : : * The first MaxBackends entries in the OldestMemberMXactId array are
244 : : * reserved for regular backends. MyProcNumber should index into one of
245 : : * them.
246 : : */
247 [ + - - + ]: 4975422 : Assert(MyProcNumber >= 0 && MyProcNumber < MaxBackends);
248 : 4975422 : return &OldestMemberMXactId[MyProcNumber];
249 : : }
250 : :
251 : : static inline MultiXactId *
252 : 147 : PreparedXactOldestMemberMXactIdSlot(ProcNumber procno)
253 : : {
254 : : int prepared_xact_idx;
255 : :
256 [ - + ]: 147 : Assert(procno >= FIRST_PREPARED_XACT_PROC_NUMBER);
257 : 147 : prepared_xact_idx = procno - FIRST_PREPARED_XACT_PROC_NUMBER;
258 : :
259 : : /*
260 : : * The first MaxBackends entries in the OldestMemberMXactId array are
261 : : * reserved for regular backends. Prepared xacts come after them.
262 : : */
263 [ - + ]: 147 : Assert(MaxBackends + prepared_xact_idx < NumMemberSlots);
264 : 147 : return &OldestMemberMXactId[MaxBackends + prepared_xact_idx];
265 : : }
266 : :
267 : : static inline MultiXactId *
268 : 521310 : MyOldestVisibleMXactIdSlot(void)
269 : : {
270 [ + - - + ]: 521310 : Assert(MyProcNumber >= 0 && MyProcNumber < NumVisibleSlots);
271 : 521310 : return &OldestVisibleMXactId[MyProcNumber];
272 : : }
273 : :
274 : : /*
275 : : * Definitions for the backend-local MultiXactId cache.
276 : : *
277 : : * We use this cache to store known MultiXacts, so we don't need to go to
278 : : * SLRU areas every time.
279 : : *
280 : : * The cache lasts for the duration of a single transaction, the rationale
281 : : * for this being that most entries will contain our own TransactionId and
282 : : * so they will be uninteresting by the time our next transaction starts.
283 : : * (XXX not clear that this is correct --- other members of the MultiXact
284 : : * could hang around longer than we did. However, it's not clear what a
285 : : * better policy for flushing old cache entries would be.) FIXME actually
286 : : * this is plain wrong now that multixact's may contain update Xids.
287 : : *
288 : : * We allocate the cache entries in a memory context that is deleted at
289 : : * transaction end, so we don't need to do retail freeing of entries.
290 : : */
291 : : typedef struct mXactCacheEnt
292 : : {
293 : : MultiXactId multi;
294 : : int nmembers;
295 : : dlist_node node;
296 : : MultiXactMember members[FLEXIBLE_ARRAY_MEMBER];
297 : : } mXactCacheEnt;
298 : :
299 : : #define MAX_CACHE_ENTRIES 256
300 : : static dclist_head MXactCache = DCLIST_STATIC_INIT(MXactCache);
301 : : static MemoryContext MXactContext = NULL;
302 : :
303 : : #ifdef MULTIXACT_DEBUG
304 : : #define debug_elog2(a,b) elog(a,b)
305 : : #define debug_elog3(a,b,c) elog(a,b,c)
306 : : #define debug_elog4(a,b,c,d) elog(a,b,c,d)
307 : : #define debug_elog5(a,b,c,d,e) elog(a,b,c,d,e)
308 : : #define debug_elog6(a,b,c,d,e,f) elog(a,b,c,d,e,f)
309 : : #else
310 : : #define debug_elog2(a,b)
311 : : #define debug_elog3(a,b,c)
312 : : #define debug_elog4(a,b,c,d)
313 : : #define debug_elog5(a,b,c,d,e)
314 : : #define debug_elog6(a,b,c,d,e,f)
315 : : #endif
316 : :
317 : : /* internal MultiXactId management */
318 : : static void MultiXactIdSetOldestVisible(void);
319 : : static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
320 : : int nmembers, MultiXactMember *members);
321 : : static MultiXactId GetNewMultiXactId(int nmembers, MultiXactOffset *offset);
322 : :
323 : : /* MultiXact cache management */
324 : : static int mxactMemberComparator(const void *arg1, const void *arg2);
325 : : static MultiXactId mXactCacheGetBySet(int nmembers, MultiXactMember *members);
326 : : static int mXactCacheGetById(MultiXactId multi, MultiXactMember **members);
327 : : static void mXactCachePut(MultiXactId multi, int nmembers,
328 : : MultiXactMember *members);
329 : :
330 : : /* management of SLRU infrastructure */
331 : :
332 : : /* opaque_data type for MultiXactMemberIoErrorDetail */
333 : : typedef struct MultiXactMemberSlruReadContext
334 : : {
335 : : MultiXactId multi;
336 : : MultiXactOffset offset;
337 : : } MultiXactMemberSlruReadContext;
338 : :
339 : : static void ExtendMultiXactOffset(MultiXactId multi);
340 : : static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
341 : : static void SetOldestOffset(void);
342 : : static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result);
343 : : static void WriteMTruncateXlogRec(Oid oldestMultiDB,
344 : : MultiXactId oldestMulti,
345 : : MultiXactOffset oldestOffset);
346 : :
347 : :
348 : : /*
349 : : * MultiXactIdCreate
350 : : * Construct a MultiXactId representing two TransactionIds.
351 : : *
352 : : * The two XIDs must be different, or be requesting different statuses.
353 : : *
354 : : * NB - we don't worry about our local MultiXactId cache here, because that
355 : : * is handled by the lower-level routines.
356 : : */
357 : : MultiXactId
4850 alvherre@alvh.no-ip. 358 : 1185 : MultiXactIdCreate(TransactionId xid1, MultiXactStatus status1,
359 : : TransactionId xid2, MultiXactStatus status2)
360 : : {
361 : : MultiXactId newMulti;
362 : : MultiXactMember members[2];
363 : :
1285 peter@eisentraut.org 364 [ - + ]: 1185 : Assert(TransactionIdIsValid(xid1));
365 [ - + ]: 1185 : Assert(TransactionIdIsValid(xid2));
366 : :
4850 alvherre@alvh.no-ip. 367 [ + + - + ]: 1185 : Assert(!TransactionIdEquals(xid1, xid2) || (status1 != status2));
368 : :
369 : : /* MultiXactIdSetOldestMember() must have been called already. */
64 heikki.linnakangas@i 370 [ - + ]: 1185 : Assert(MultiXactIdIsValid(*MyOldestMemberMXactIdSlot()));
371 : :
372 : : /*
373 : : * Note: unlike MultiXactIdExpand, we don't bother to check that both XIDs
374 : : * are still running. In typical usage, xid2 will be our own XID and the
375 : : * caller just did a check on xid1, so it'd be wasted effort.
376 : : */
377 : :
4850 alvherre@alvh.no-ip. 378 : 1185 : members[0].xid = xid1;
379 : 1185 : members[0].status = status1;
380 : 1185 : members[1].xid = xid2;
381 : 1185 : members[1].status = status2;
382 : :
4523 383 : 1185 : newMulti = MultiXactIdCreateFromMembers(2, members);
384 : :
385 : : debug_elog3(DEBUG2, "Create: %s",
386 : : mxid_to_string(newMulti, 2, members));
387 : :
7672 tgl@sss.pgh.pa.us 388 : 1185 : return newMulti;
389 : : }
390 : :
391 : : /*
392 : : * MultiXactIdExpand
393 : : * Add a TransactionId to a pre-existing MultiXactId.
394 : : *
395 : : * If the TransactionId is already a member of the passed MultiXactId with the
396 : : * same status, just return it as-is.
397 : : *
398 : : * Note that we do NOT actually modify the membership of a pre-existing
399 : : * MultiXactId; instead we create a new one. This is necessary to avoid
400 : : * a race condition against code trying to wait for one MultiXactId to finish;
401 : : * see notes in heapam.c.
402 : : *
403 : : * NB - we don't worry about our local MultiXactId cache here, because that
404 : : * is handled by the lower-level routines.
405 : : *
406 : : * Note: It is critical that MultiXactIds that come from an old cluster (i.e.
407 : : * one upgraded by pg_upgrade from a cluster older than this feature) are not
408 : : * passed in.
409 : : */
410 : : MultiXactId
4850 alvherre@alvh.no-ip. 411 : 75533 : MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status)
412 : : {
413 : : MultiXactId newMulti;
414 : : MultiXactMember *members;
415 : : MultiXactMember *newMembers;
416 : : int nmembers;
417 : : int i;
418 : : int j;
419 : :
1285 peter@eisentraut.org 420 [ - + ]: 75533 : Assert(MultiXactIdIsValid(multi));
421 [ - + ]: 75533 : Assert(TransactionIdIsValid(xid));
422 : :
423 : : /* MultiXactIdSetOldestMember() must have been called already. */
64 heikki.linnakangas@i 424 [ - + ]: 75533 : Assert(MultiXactIdIsValid(*MyOldestMemberMXactIdSlot()));
425 : :
426 : : debug_elog5(DEBUG2, "Expand: received multi %u, xid %u status %s",
427 : : multi, xid, mxstatus_to_string(status));
428 : :
429 : : /*
430 : : * Note: we don't allow for old multis here. The reason is that the only
431 : : * caller of this function does a check that the multixact is no longer
432 : : * running.
433 : : */
4298 alvherre@alvh.no-ip. 434 : 75533 : nmembers = GetMultiXactIdMembers(multi, &members, false, false);
435 : :
7677 tgl@sss.pgh.pa.us 436 [ - + ]: 75533 : if (nmembers < 0)
437 : : {
438 : : MultiXactMember member;
439 : :
440 : : /*
441 : : * The MultiXactId is obsolete. This can only happen if all the
442 : : * MultiXactId members stop running between the caller checking and
443 : : * passing it to us. It would be better to return that fact to the
444 : : * caller, but it would complicate the API and it's unlikely to happen
445 : : * too often, so just deal with it by creating a singleton MultiXact.
446 : : */
4850 alvherre@alvh.no-ip. 447 :UBC 0 : member.xid = xid;
448 : 0 : member.status = status;
4523 449 : 0 : newMulti = MultiXactIdCreateFromMembers(1, &member);
450 : :
451 : : debug_elog4(DEBUG2, "Expand: %u has no members, create singleton %u",
452 : : multi, newMulti);
7677 tgl@sss.pgh.pa.us 453 : 0 : return newMulti;
454 : : }
455 : :
456 : : /*
457 : : * If the TransactionId is already a member of the MultiXactId with the
458 : : * same status, just return the existing MultiXactId.
459 : : */
7677 tgl@sss.pgh.pa.us 460 [ + + ]:CBC 1465921 : for (i = 0; i < nmembers; i++)
461 : : {
4850 alvherre@alvh.no-ip. 462 [ + + ]: 1390388 : if (TransactionIdEquals(members[i].xid, xid) &&
463 [ - + ]: 54 : (members[i].status == status))
464 : : {
465 : : debug_elog4(DEBUG2, "Expand: %u is already a member of %u",
466 : : xid, multi);
7668 tgl@sss.pgh.pa.us 467 :UBC 0 : pfree(members);
7677 468 : 0 : return multi;
469 : : }
470 : : }
471 : :
472 : : /*
473 : : * Determine which of the members of the MultiXactId are still of
474 : : * interest. This is any running transaction, and also any transaction
475 : : * that grabbed something stronger than just a lock and was committed. (An
476 : : * update that aborted is of no interest here; and having more than one
477 : : * update Xid in a multixact would cause errors elsewhere.)
478 : : *
479 : : * Removing dead members is not just an optimization: freezing of tuples
480 : : * whose Xmax are multis depends on this behavior.
481 : : *
482 : : * Note we have the same race condition here as above: j could be 0 at the
483 : : * end of the loop.
484 : : */
146 michael@paquier.xyz 485 :GNC 75533 : newMembers = palloc_array(MultiXactMember, nmembers + 1);
486 : :
7677 tgl@sss.pgh.pa.us 487 [ + + ]:CBC 1465921 : for (i = 0, j = 0; i < nmembers; i++)
488 : : {
4850 alvherre@alvh.no-ip. 489 [ + + ]: 1390388 : if (TransactionIdIsInProgress(members[i].xid) ||
4394 490 [ + + - + ]: 74685 : (ISUPDATE_from_mxstatus(members[i].status) &&
4850 491 : 18 : TransactionIdDidCommit(members[i].xid)))
492 : : {
493 : 1315721 : newMembers[j].xid = members[i].xid;
494 : 1315721 : newMembers[j++].status = members[i].status;
495 : : }
496 : : }
497 : :
498 : 75533 : newMembers[j].xid = xid;
499 : 75533 : newMembers[j++].status = status;
4523 500 : 75533 : newMulti = MultiXactIdCreateFromMembers(j, newMembers);
501 : :
7677 tgl@sss.pgh.pa.us 502 : 75533 : pfree(members);
503 : 75533 : pfree(newMembers);
504 : :
505 : : debug_elog3(DEBUG2, "Expand: returning new multi %u", newMulti);
506 : :
507 : 75533 : return newMulti;
508 : : }
509 : :
510 : : /*
511 : : * MultiXactIdIsRunning
512 : : * Returns whether a MultiXactId is "running".
513 : : *
514 : : * We return true if at least one member of the given MultiXactId is still
515 : : * running. Note that a "false" result is certain not to change,
516 : : * because it is not legal to add members to an existing MultiXactId.
517 : : *
518 : : * Caller is expected to have verified that the multixact does not come from
519 : : * a pg_upgraded share-locked tuple.
520 : : */
521 : : bool
4298 alvherre@alvh.no-ip. 522 : 149854 : MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly)
523 : : {
524 : : MultiXactMember *members;
525 : : int nmembers;
526 : : int i;
527 : :
528 : : debug_elog3(DEBUG2, "IsRunning %u?", multi);
529 : :
530 : : /*
531 : : * "false" here means we assume our callers have checked that the given
532 : : * multi cannot possibly come from a pg_upgraded database.
533 : : */
534 : 149854 : nmembers = GetMultiXactIdMembers(multi, &members, false, isLockOnly);
535 : :
4043 536 [ + + ]: 149854 : if (nmembers <= 0)
537 : : {
538 : : debug_elog2(DEBUG2, "IsRunning: no members");
7677 tgl@sss.pgh.pa.us 539 : 758 : return false;
540 : : }
541 : :
542 : : /*
543 : : * Checking for myself is cheap compared to looking in shared memory;
544 : : * return true if any live subtransaction of the current top-level
545 : : * transaction is a member.
546 : : *
547 : : * This is not needed for correctness, it's just a fast path.
548 : : */
549 [ + + ]: 2891450 : for (i = 0; i < nmembers; i++)
550 : : {
4850 alvherre@alvh.no-ip. 551 [ + + ]: 2742512 : if (TransactionIdIsCurrentTransactionId(members[i].xid))
552 : : {
553 : : debug_elog3(DEBUG2, "IsRunning: I (%d) am running!", i);
7668 tgl@sss.pgh.pa.us 554 : 158 : pfree(members);
7677 555 : 158 : return true;
556 : : }
557 : : }
558 : :
559 : : /*
560 : : * This could be made faster by having another entry point in procarray.c,
561 : : * walking the PGPROC array only once for all the members. But in most
562 : : * cases nmembers should be small enough that it doesn't much matter.
563 : : */
564 [ + + ]: 296131 : for (i = 0; i < nmembers; i++)
565 : : {
4850 alvherre@alvh.no-ip. 566 [ + + ]: 296091 : if (TransactionIdIsInProgress(members[i].xid))
567 : : {
568 : : debug_elog4(DEBUG2, "IsRunning: member %d (%u) is running",
569 : : i, members[i].xid);
7668 tgl@sss.pgh.pa.us 570 : 148898 : pfree(members);
7677 571 : 148898 : return true;
572 : : }
573 : : }
574 : :
575 : 40 : pfree(members);
576 : :
577 : : debug_elog3(DEBUG2, "IsRunning: %u is not running", multi);
578 : :
579 : 40 : return false;
580 : : }
581 : :
582 : : /*
583 : : * MultiXactIdSetOldestMember
584 : : * Save the oldest MultiXactId this transaction could be a member of.
585 : : *
586 : : * We set the OldestMemberMXactId for a given transaction the first time it's
587 : : * going to do some operation that might require a MultiXactId (tuple lock,
588 : : * update or delete). We need to do this even if we end up using a
589 : : * TransactionId instead of a MultiXactId, because there is a chance that
590 : : * another transaction would add our XID to a MultiXactId.
591 : : *
592 : : * The value to set is the next-to-be-assigned MultiXactId, so this is meant to
593 : : * be called just before doing any such possibly-MultiXactId-able operation.
594 : : */
595 : : void
596 : 4387546 : MultiXactIdSetOldestMember(void)
597 : : {
64 heikki.linnakangas@i 598 [ + + ]: 4387546 : if (!MultiXactIdIsValid(*MyOldestMemberMXactIdSlot()))
599 : : {
600 : : MultiXactId nextMXact;
601 : :
602 : : /*
603 : : * You might think we don't need to acquire a lock here, since
604 : : * fetching and storing of TransactionIds is probably atomic, but in
605 : : * fact we do: suppose we pick up nextMXact and then lose the CPU for
606 : : * a long time. Someone else could advance nextMXact, and then
607 : : * another someone else could compute an OldestVisibleMXactId that
608 : : * would be after the value we are going to store when we get control
609 : : * back. Which would be wrong.
610 : : *
611 : : * Note that a shared lock is sufficient, because it's enough to stop
612 : : * someone from advancing nextMXact; and nobody else could be trying
613 : : * to write to our OldestMember entry, only reading (and we assume
614 : : * storing it is atomic.)
615 : : */
4506 alvherre@alvh.no-ip. 616 : 88938 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
617 : :
7677 tgl@sss.pgh.pa.us 618 : 88938 : nextMXact = MultiXactState->nextMXact;
619 : :
64 heikki.linnakangas@i 620 : 88938 : *MyOldestMemberMXactIdSlot() = nextMXact;
621 : :
7677 tgl@sss.pgh.pa.us 622 : 88938 : LWLockRelease(MultiXactGenLock);
623 : :
624 : : debug_elog4(DEBUG2, "MultiXact: setting OldestMember[%d] = %u",
625 : : MyProcNumber, nextMXact);
626 : : }
627 : 4387546 : }
628 : :
629 : : /*
630 : : * MultiXactIdSetOldestVisible
631 : : * Save the oldest MultiXactId this transaction considers possibly live.
632 : : *
633 : : * We set the OldestVisibleMXactId for a given transaction the first time
634 : : * it's going to inspect any MultiXactId. Once we have set this, we are
635 : : * guaranteed that SLRU data for MultiXactIds >= our own OldestVisibleMXactId
636 : : * won't be truncated away.
637 : : *
638 : : * The value to set is the oldest of nextMXact and all the valid per-backend
639 : : * OldestMemberMXactId[] entries. Because of the locking we do, we can be
640 : : * certain that no subsequent call to MultiXactIdSetOldestMember can set
641 : : * an OldestMemberMXactId[] entry older than what we compute here. Therefore
642 : : * there is no live transaction, now or later, that can be a member of any
643 : : * MultiXactId older than the OldestVisibleMXactId we compute here.
644 : : */
645 : : static void
646 : 92506 : MultiXactIdSetOldestVisible(void)
647 : : {
64 heikki.linnakangas@i 648 [ + + ]: 92506 : if (!MultiXactIdIsValid(*MyOldestVisibleMXactIdSlot()))
649 : : {
650 : : MultiXactId oldestMXact;
651 : : int i;
652 : :
7677 tgl@sss.pgh.pa.us 653 : 3222 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
654 : :
655 : 3222 : oldestMXact = MultiXactState->nextMXact;
64 heikki.linnakangas@i 656 [ + + ]: 411286 : for (i = 0; i < NumMemberSlots; i++)
657 : : {
7677 tgl@sss.pgh.pa.us 658 : 408064 : MultiXactId thisoldest = OldestMemberMXactId[i];
659 : :
660 [ + + + + ]: 463984 : if (MultiXactIdIsValid(thisoldest) &&
661 : 55920 : MultiXactIdPrecedes(thisoldest, oldestMXact))
662 : 5671 : oldestMXact = thisoldest;
663 : : }
664 : :
64 heikki.linnakangas@i 665 : 3222 : *MyOldestVisibleMXactIdSlot() = oldestMXact;
666 : :
7677 tgl@sss.pgh.pa.us 667 : 3222 : LWLockRelease(MultiXactGenLock);
668 : :
669 : : debug_elog4(DEBUG2, "MultiXact: setting OldestVisible[%d] = %u",
670 : : MyProcNumber, oldestMXact);
671 : : }
672 : 92506 : }
673 : :
674 : : /*
675 : : * ReadNextMultiXactId
676 : : * Return the next MultiXactId to be assigned, but don't allocate it
677 : : */
678 : : MultiXactId
4850 alvherre@alvh.no-ip. 679 : 38019 : ReadNextMultiXactId(void)
680 : : {
681 : : MultiXactId mxid;
682 : :
683 : : /* XXX we could presumably do this without a lock. */
684 : 38019 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
685 : 38019 : mxid = MultiXactState->nextMXact;
686 : 38019 : LWLockRelease(MultiXactGenLock);
687 : :
688 : 38019 : return mxid;
689 : : }
690 : :
691 : : /*
692 : : * ReadMultiXactIdRange
693 : : * Get the range of IDs that may still be referenced by a relation.
694 : : */
695 : : void
2021 rhaas@postgresql.org 696 : 1430 : ReadMultiXactIdRange(MultiXactId *oldest, MultiXactId *next)
697 : : {
698 : 1430 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
699 : 1430 : *oldest = MultiXactState->oldestMultiXactId;
700 : 1430 : *next = MultiXactState->nextMXact;
701 : 1430 : LWLockRelease(MultiXactGenLock);
702 : 1430 : }
703 : :
704 : :
705 : : /*
706 : : * MultiXactIdCreateFromMembers
707 : : * Make a new MultiXactId from the specified set of members
708 : : *
709 : : * Make XLOG, SLRU and cache entries for a new MultiXactId, recording the
710 : : * given TransactionIds as members. Returns the newly created MultiXactId.
711 : : *
712 : : * NB: the passed members[] array will be sorted in-place.
713 : : */
714 : : MultiXactId
4523 alvherre@alvh.no-ip. 715 : 76719 : MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
716 : : {
717 : : MultiXactId multi;
718 : : MultiXactOffset offset;
719 : : xl_multixact_create xlrec;
720 : :
721 : : debug_elog3(DEBUG2, "Create: %s",
722 : : mxid_to_string(InvalidMultiXactId, nmembers, members));
723 : :
724 : : /*
725 : : * See if the same set of members already exists in our cache; if so, just
726 : : * re-use that MultiXactId. (Note: it might seem that looking in our
727 : : * cache is insufficient, and we ought to search disk to see if a
728 : : * duplicate definition already exists. But since we only ever create
729 : : * MultiXacts containing our own XID, in most cases any such MultiXacts
730 : : * were in fact created by us, and so will be in our cache. There are
731 : : * corner cases where someone else added us to a MultiXact without our
732 : : * knowledge, but it's not worth checking for.)
733 : : */
4850 734 : 76719 : multi = mXactCacheGetBySet(nmembers, members);
7677 tgl@sss.pgh.pa.us 735 [ + + ]: 76719 : if (MultiXactIdIsValid(multi))
736 : : {
737 : : debug_elog2(DEBUG2, "Create: in cache!");
738 : 71415 : return multi;
739 : : }
740 : :
741 : : /* Verify that there is a single update Xid among the given members. */
742 : : {
743 : : int i;
4394 alvherre@alvh.no-ip. 744 : 5304 : bool has_update = false;
745 : :
746 [ + + ]: 100026 : for (i = 0; i < nmembers; i++)
747 : : {
748 [ + + ]: 94722 : if (ISUPDATE_from_mxstatus(members[i].status))
749 : : {
750 [ - + ]: 2376 : if (has_update)
1258 alvherre@alvh.no-ip. 751 [ # # ]:UBC 0 : elog(ERROR, "new multixact has more than one updating member: %s",
752 : : mxid_to_string(InvalidMultiXactId, nmembers, members));
4394 alvherre@alvh.no-ip. 753 :CBC 2376 : has_update = true;
754 : : }
755 : : }
756 : : }
757 : :
758 : : /* Load the injection point before entering the critical section */
620 michael@paquier.xyz 759 : 5304 : INJECTION_POINT_LOAD("multixact-create-from-members");
760 : :
761 : : /*
762 : : * Assign the MXID and offsets range to use, and make sure there is space
763 : : * in the OFFSETs and MEMBERs files. NB: this routine does
764 : : * START_CRIT_SECTION().
765 : : *
766 : : * Note: unlike MultiXactIdCreate and MultiXactIdExpand, we do not check
767 : : * that we've called MultiXactIdSetOldestMember here. This is because
768 : : * this routine is used in some places to create new MultiXactIds of which
769 : : * the current backend is not a member, notably during freezing of multis
770 : : * in vacuum. During vacuum, in particular, it would be unacceptable to
771 : : * keep OldestMulti set, in case it runs for long.
772 : : */
4850 alvherre@alvh.no-ip. 773 : 5304 : multi = GetNewMultiXactId(nmembers, &offset);
774 : :
360 michael@paquier.xyz 775 : 5304 : INJECTION_POINT_CACHED("multixact-create-from-members", NULL);
776 : :
777 : : /* Make an XLOG entry describing the new MXID. */
7636 tgl@sss.pgh.pa.us 778 : 5304 : xlrec.mid = multi;
779 : 5304 : xlrec.moff = offset;
4850 alvherre@alvh.no-ip. 780 : 5304 : xlrec.nmembers = nmembers;
781 : :
782 : : /*
783 : : * XXX Note: there's a lot of padding space in MultiXactMember. We could
784 : : * find a more compact representation of this Xlog record -- perhaps all
785 : : * the status flags in one XLogRecData, then all the xids in another one?
786 : : * Not clear that it's worth the trouble though.
787 : : */
4184 heikki.linnakangas@i 788 : 5304 : XLogBeginInsert();
448 peter@eisentraut.org 789 : 5304 : XLogRegisterData(&xlrec, SizeOfMultiXactCreate);
790 : 5304 : XLogRegisterData(members, nmembers * sizeof(MultiXactMember));
791 : :
4184 heikki.linnakangas@i 792 : 5304 : (void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID);
793 : :
794 : : /* Now enter the information into the OFFSETs and MEMBERs logs */
4850 alvherre@alvh.no-ip. 795 : 5304 : RecordNewMultiXact(multi, offset, nmembers, members);
796 : :
797 : : /* Done with critical section */
7494 tgl@sss.pgh.pa.us 798 [ - + ]: 5304 : END_CRIT_SECTION();
799 : :
800 : : /* Store the new MultiXactId in the local cache, too */
4850 alvherre@alvh.no-ip. 801 : 5304 : mXactCachePut(multi, nmembers, members);
802 : :
803 : : debug_elog2(DEBUG2, "Create: all done");
804 : :
7636 tgl@sss.pgh.pa.us 805 : 5304 : return multi;
806 : : }
807 : :
808 : : /*
809 : : * RecordNewMultiXact
810 : : * Write info about a new multixact into the offsets and members files
811 : : *
812 : : * This is broken out of MultiXactIdCreateFromMembers so that xlog replay can
813 : : * use it.
814 : : */
815 : : static void
816 : 5309 : RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
817 : : int nmembers, MultiXactMember *members)
818 : : {
819 : : int64 pageno;
820 : : int64 prev_pageno;
821 : : int entryno;
822 : : int slotno;
823 : : MultiXactOffset *offptr;
824 : : MultiXactId next;
825 : : int64 next_pageno;
826 : : int next_entryno;
827 : : MultiXactOffset *next_offptr;
828 : : MultiXactOffset next_offset;
829 : : LWLock *lock;
797 alvherre@alvh.no-ip. 830 : 5309 : LWLock *prevlock = NULL;
831 : :
832 : : /* position of this multixid in the offsets SLRU area */
7677 tgl@sss.pgh.pa.us 833 : 5309 : pageno = MultiXactIdToOffsetPage(multi);
834 : 5309 : entryno = MultiXactIdToOffsetEntry(multi);
835 : :
836 : : /* position of the next multixid */
144 heikki.linnakangas@i 837 :GNC 5309 : next = NextMultiXactId(multi);
153 heikki.linnakangas@i 838 :CBC 5309 : next_pageno = MultiXactIdToOffsetPage(next);
839 : 5309 : next_entryno = MultiXactIdToOffsetEntry(next);
840 : :
841 : : /*
842 : : * Set the starting offset of this multixid's members.
843 : : *
844 : : * In the common case, it was already set by the previous
845 : : * RecordNewMultiXact call, as this was the next multixid of the previous
846 : : * multixid. But if multiple backends are generating multixids
847 : : * concurrently, we might race ahead and get called before the previous
848 : : * multixid.
849 : : */
797 alvherre@alvh.no-ip. 850 : 5309 : lock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
851 : 5309 : LWLockAcquire(lock, LW_EXCLUSIVE);
852 : :
53 heikki.linnakangas@i 853 :GNC 5309 : slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, &multi);
7636 tgl@sss.pgh.pa.us 854 :CBC 5309 : offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
7677 855 : 5309 : offptr += entryno;
856 : :
153 heikki.linnakangas@i 857 [ + + ]: 5309 : if (*offptr != offset)
858 : : {
859 : : /* should already be set to the correct value, or not at all */
860 [ - + ]: 1 : Assert(*offptr == 0);
861 : 1 : *offptr = offset;
862 : 1 : MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
863 : : }
864 : :
865 : : /*
866 : : * Set the next multixid's offset to the end of this multixid's members.
867 : : */
868 [ + + ]: 5309 : if (next_pageno == pageno)
869 : : {
870 : 5304 : next_offptr = offptr + 1;
871 : : }
872 : : else
873 : : {
874 : : /* must be the first entry on the page */
153 heikki.linnakangas@i 875 [ + + - + ]:GBC 5 : Assert(next_entryno == 0 || next == FirstMultiXactId);
876 : :
877 : : /* Swap the lock for a lock on the next page */
878 : 5 : LWLockRelease(lock);
879 : 5 : lock = SimpleLruGetBankLock(MultiXactOffsetCtl, next_pageno);
880 : 5 : LWLockAcquire(lock, LW_EXCLUSIVE);
881 : :
53 heikki.linnakangas@i 882 :GNC 5 : slotno = SimpleLruReadPage(MultiXactOffsetCtl, next_pageno, true, &next);
153 heikki.linnakangas@i 883 :GBC 5 : next_offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
884 : 5 : next_offptr += next_entryno;
885 : : }
886 : :
887 : : /* Like in GetNewMultiXactId(), skip over offset 0 */
151 heikki.linnakangas@i 888 :CBC 5309 : next_offset = offset + nmembers;
889 [ - + ]: 5309 : if (next_offset == 0)
151 heikki.linnakangas@i 890 :UBC 0 : next_offset = 1;
151 heikki.linnakangas@i 891 [ + - ]:CBC 5309 : if (*next_offptr != next_offset)
892 : : {
893 : : /* should already be set to the correct value, or not at all */
153 894 [ - + ]: 5309 : Assert(*next_offptr == 0);
151 895 : 5309 : *next_offptr = next_offset;
153 896 : 5309 : MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
897 : : }
898 : :
899 : : /* Release MultiXactOffset SLRU lock. */
797 alvherre@alvh.no-ip. 900 : 5309 : LWLockRelease(lock);
901 : :
7677 tgl@sss.pgh.pa.us 902 : 5309 : prev_pageno = -1;
903 : :
153 heikki.linnakangas@i 904 [ + + ]: 100041 : for (int i = 0; i < nmembers; i++, offset++)
905 : : {
906 : : TransactionId *memberptr;
907 : : uint32 *flagsptr;
908 : : uint32 flagsval;
909 : : int bshift;
910 : : int flagsoff;
911 : : int memberoff;
912 : :
4850 alvherre@alvh.no-ip. 913 [ - + ]: 94732 : Assert(members[i].status <= MultiXactStatusUpdate);
914 : :
7677 tgl@sss.pgh.pa.us 915 : 94732 : pageno = MXOffsetToMemberPage(offset);
4850 alvherre@alvh.no-ip. 916 : 94732 : memberoff = MXOffsetToMemberOffset(offset);
917 : 94732 : flagsoff = MXOffsetToFlagsOffset(offset);
918 : 94732 : bshift = MXOffsetToFlagsBitShift(offset);
919 : :
7677 tgl@sss.pgh.pa.us 920 [ + + ]: 94732 : if (pageno != prev_pageno)
921 : : {
53 heikki.linnakangas@i 922 :GNC 5363 : MultiXactMemberSlruReadContext slru_read_context = {multi, offset};
923 : :
924 : : /*
925 : : * MultiXactMember SLRU page is changed so check if this new page
926 : : * fall into the different SLRU bank then release the old bank's
927 : : * lock and acquire lock on the new bank.
928 : : */
797 alvherre@alvh.no-ip. 929 :CBC 5363 : lock = SimpleLruGetBankLock(MultiXactMemberCtl, pageno);
930 [ + - ]: 5363 : if (lock != prevlock)
931 : : {
932 [ + + ]: 5363 : if (prevlock != NULL)
797 alvherre@alvh.no-ip. 933 :GBC 54 : LWLockRelease(prevlock);
934 : :
797 alvherre@alvh.no-ip. 935 :CBC 5363 : LWLockAcquire(lock, LW_EXCLUSIVE);
936 : 5363 : prevlock = lock;
937 : : }
53 heikki.linnakangas@i 938 :GNC 5363 : slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true,
939 : : &slru_read_context);
7677 tgl@sss.pgh.pa.us 940 :CBC 5363 : prev_pageno = pageno;
941 : : }
942 : :
943 : 94732 : memberptr = (TransactionId *)
4850 alvherre@alvh.no-ip. 944 : 94732 : (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
945 : :
946 : 94732 : *memberptr = members[i].xid;
947 : :
948 : 94732 : flagsptr = (uint32 *)
949 : 94732 : (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
950 : :
951 : 94732 : flagsval = *flagsptr;
952 : 94732 : flagsval &= ~(((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift);
953 : 94732 : flagsval |= (members[i].status << bshift);
954 : 94732 : *flagsptr = flagsval;
955 : :
7486 tgl@sss.pgh.pa.us 956 : 94732 : MultiXactMemberCtl->shared->page_dirty[slotno] = true;
957 : : }
958 : :
797 alvherre@alvh.no-ip. 959 [ + - ]: 5309 : if (prevlock != NULL)
960 : 5309 : LWLockRelease(prevlock);
7677 tgl@sss.pgh.pa.us 961 : 5309 : }
962 : :
963 : : /*
964 : : * GetNewMultiXactId
965 : : * Get the next MultiXactId.
966 : : *
967 : : * Also, reserve the needed amount of space in the "members" area. The
968 : : * starting offset of the reserved space is returned in *offset.
969 : : *
970 : : * This may generate XLOG records for expansion of the offsets and/or members
971 : : * files. Unfortunately, we have to do that while holding MultiXactGenLock
972 : : * to avoid race conditions --- the XLOG record for zeroing a page must appear
973 : : * before any backend can possibly try to store data in that page!
974 : : *
975 : : * We start a critical section before advancing the shared counters. The
976 : : * caller must end the critical section after writing SLRU data.
977 : : */
978 : : static MultiXactId
4850 alvherre@alvh.no-ip. 979 : 5304 : GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
980 : : {
981 : : MultiXactId result;
982 : : MultiXactOffset nextOffset;
983 : :
984 : : debug_elog3(DEBUG2, "GetNew: for %d xids", nmembers);
985 : :
986 : : /* safety check, we should never get this far in a HS standby */
987 [ - + ]: 5304 : if (RecoveryInProgress())
4850 alvherre@alvh.no-ip. 988 [ # # ]:UBC 0 : elog(ERROR, "cannot assign MultiXactIds during recovery");
989 : :
7677 tgl@sss.pgh.pa.us 990 :CBC 5304 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
991 : :
992 : : /* Assign the MXID */
993 : 5304 : result = MultiXactState->nextMXact;
994 : :
995 : : /*----------
996 : : * Check to see if it's safe to assign another MultiXactId. This protects
997 : : * against catastrophic data loss due to multixact wraparound. The basic
998 : : * rules are:
999 : : *
1000 : : * If we're past multiVacLimit or the safe threshold for member storage
1001 : : * space, or we don't know what the safe threshold for member storage is,
1002 : : * start trying to force autovacuum cycles.
1003 : : * If we're past multiWarnLimit, start issuing warnings.
1004 : : * If we're past multiStopLimit, refuse to create new MultiXactIds.
1005 : : *
1006 : : * Note these are pretty much the same protections in GetNewTransactionId.
1007 : : *----------
1008 : : */
3971 andres@anarazel.de 1009 [ - + ]: 5304 : if (!MultiXactIdPrecedes(result, MultiXactState->multiVacLimit))
1010 : : {
1011 : : /*
1012 : : * For safety's sake, we release MultiXactGenLock while sending
1013 : : * signals, warnings, etc. This is not so much because we care about
1014 : : * preserving concurrency in this situation, as to avoid any
1015 : : * possibility of deadlock while doing get_database_name(). First,
1016 : : * copy all the shared values we'll need in this path.
1017 : : */
4850 alvherre@alvh.no-ip. 1018 :UBC 0 : MultiXactId multiWarnLimit = MultiXactState->multiWarnLimit;
1019 : 0 : MultiXactId multiStopLimit = MultiXactState->multiStopLimit;
1020 : 0 : MultiXactId multiWrapLimit = MultiXactState->multiWrapLimit;
1021 : 0 : Oid oldest_datoid = MultiXactState->oldestMultiXactDB;
1022 : :
1023 : 0 : LWLockRelease(MultiXactGenLock);
1024 : :
1025 [ # # ]: 0 : if (IsUnderPostmaster &&
1026 [ # # ]: 0 : !MultiXactIdPrecedes(result, multiStopLimit))
1027 : : {
1028 : 0 : char *oldest_datname = get_database_name(oldest_datoid);
1029 : :
1030 : : /*
1031 : : * Immediately kick autovacuum into action as we're already in
1032 : : * ERROR territory.
1033 : : */
3971 andres@anarazel.de 1034 : 0 : SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
1035 : :
1036 : : /* complain even if that DB has disappeared */
4850 alvherre@alvh.no-ip. 1037 [ # # ]: 0 : if (oldest_datname)
1038 [ # # ]: 0 : ereport(ERROR,
1039 : : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1040 : : errmsg("database is not accepting commands that assign new MultiXactIds to avoid wraparound data loss in database \"%s\"",
1041 : : oldest_datname),
1042 : : errhint("Execute a database-wide VACUUM in that database.\n"
1043 : : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1044 : : else
1045 [ # # ]: 0 : ereport(ERROR,
1046 : : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1047 : : errmsg("database is not accepting commands that assign new MultiXactIds to avoid wraparound data loss in database with OID %u",
1048 : : oldest_datoid),
1049 : : errhint("Execute a database-wide VACUUM in that database.\n"
1050 : : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1051 : : }
1052 : :
1053 : : /*
1054 : : * To avoid swamping the postmaster with signals, we issue the autovac
1055 : : * request only once per 64K multis generated. This still gives
1056 : : * plenty of chances before we get into real trouble.
1057 : : */
144 heikki.linnakangas@i 1058 [ # # # # :UNC 0 : if (IsUnderPostmaster && ((result % 65536) == 0 || result == FirstMultiXactId))
# # ]
3971 andres@anarazel.de 1059 :UBC 0 : SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
1060 : :
1061 [ # # ]: 0 : if (!MultiXactIdPrecedes(result, multiWarnLimit))
1062 : : {
4850 alvherre@alvh.no-ip. 1063 : 0 : char *oldest_datname = get_database_name(oldest_datoid);
1064 : :
1065 : : /* complain even if that DB has disappeared */
1066 [ # # ]: 0 : if (oldest_datname)
1067 [ # # ]: 0 : ereport(WARNING,
1068 : : (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
1069 : : "database \"%s\" must be vacuumed before %u more MultiXactIds are used",
1070 : : multiWrapLimit - result,
1071 : : oldest_datname,
1072 : : multiWrapLimit - result),
1073 : : errdetail("Approximately %.2f%% of MultiXactIds are available for use.",
1074 : : (double) (multiWrapLimit - result) / (MaxMultiXactId / 2) * 100),
1075 : : errhint("Execute a database-wide VACUUM in that database.\n"
1076 : : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1077 : : else
1078 [ # # ]: 0 : ereport(WARNING,
1079 : : (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
1080 : : "database with OID %u must be vacuumed before %u more MultiXactIds are used",
1081 : : multiWrapLimit - result,
1082 : : oldest_datoid,
1083 : : multiWrapLimit - result),
1084 : : errdetail("Approximately %.2f%% of MultiXactIds are available for use.",
1085 : : (double) (multiWrapLimit - result) / (MaxMultiXactId / 2) * 100),
1086 : : errhint("Execute a database-wide VACUUM in that database.\n"
1087 : : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1088 : : }
1089 : :
1090 : : /* Re-acquire lock and start over */
1091 : 0 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
1092 : 0 : result = MultiXactState->nextMXact;
1093 : : }
1094 : :
1095 : : /*
1096 : : * Make sure there is room for the next MXID in the file. Assigning this
1097 : : * MXID sets the next MXID's offset already.
1098 : : */
144 heikki.linnakangas@i 1099 :GNC 5304 : ExtendMultiXactOffset(NextMultiXactId(result));
1100 : :
1101 : : /*
1102 : : * Reserve the members space, similarly to above.
1103 : : */
7494 tgl@sss.pgh.pa.us 1104 :CBC 5304 : nextOffset = MultiXactState->nextOffset;
1105 : :
1106 : : /*
1107 : : * Offsets are 64-bit integers and will never wrap around. Firstly, it
1108 : : * would take an unrealistic amount of time and resources to consume 2^64
1109 : : * offsets. Secondly, multixid creation is WAL-logged, so you would run
1110 : : * out of LSNs before reaching offset wraparound. Nevertheless, check for
1111 : : * wraparound as a sanity check.
1112 : : */
147 heikki.linnakangas@i 1113 [ - + ]:GNC 5304 : if (nextOffset + nmembers < nextOffset)
147 heikki.linnakangas@i 1114 [ # # ]:UNC 0 : ereport(ERROR,
1115 : : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1116 : : errmsg("MultiXact members would wrap around")));
147 heikki.linnakangas@i 1117 :GNC 5304 : *offset = nextOffset;
1118 : :
4850 alvherre@alvh.no-ip. 1119 :CBC 5304 : ExtendMultiXactMember(nextOffset, nmembers);
1120 : :
1121 : : /*
1122 : : * Critical section from here until caller has written the data into the
1123 : : * just-reserved SLRU space; we don't want to error out with a partly
1124 : : * written MultiXact structure. (In particular, failing to write our
1125 : : * start offset after advancing nextMXact would effectively corrupt the
1126 : : * previous MultiXact.)
1127 : : */
7494 tgl@sss.pgh.pa.us 1128 : 5304 : START_CRIT_SECTION();
1129 : :
1130 : : /*
1131 : : * Advance counters. As in GetNewTransactionId(), this must not happen
1132 : : * until after file extension has succeeded!
1133 : : */
144 heikki.linnakangas@i 1134 :GNC 5304 : MultiXactState->nextMXact = NextMultiXactId(result);
4850 alvherre@alvh.no-ip. 1135 :CBC 5304 : MultiXactState->nextOffset += nmembers;
1136 : :
7677 tgl@sss.pgh.pa.us 1137 : 5304 : LWLockRelease(MultiXactGenLock);
1138 : :
1139 : : debug_elog4(DEBUG2, "GetNew: returning %u offset %" PRIu64,
1140 : : result, *offset);
1141 : 5304 : return result;
1142 : : }
1143 : :
1144 : : /*
1145 : : * GetMultiXactIdMembers
1146 : : * Return the set of MultiXactMembers that make up a MultiXactId
1147 : : *
1148 : : * Return value is the number of members found, or -1 if there are none,
1149 : : * and *members is set to a newly palloc'ed array of members. It's the
1150 : : * caller's responsibility to free it when done with it.
1151 : : *
1152 : : * from_pgupgrade must be passed as true if and only if only the multixact
1153 : : * corresponds to a value from a tuple that was locked in a 9.2-or-older
1154 : : * installation and later pg_upgrade'd (that is, the infomask is
1155 : : * HEAP_LOCKED_UPGRADED). In this case, we know for certain that no members
1156 : : * can still be running, so we return -1 just like for an empty multixact
1157 : : * without any further checking. It would be wrong to try to resolve such a
1158 : : * multixact: either the multixact is within the current valid multixact
1159 : : * range, in which case the returned result would be bogus, or outside that
1160 : : * range, in which case an error would be raised.
1161 : : *
1162 : : * In all other cases, the passed multixact must be within the known valid
1163 : : * range, that is, greater than or equal to oldestMultiXactId, and less than
1164 : : * nextMXact. Otherwise, an error is raised.
1165 : : *
1166 : : * isLockOnly must be set to true if caller is certain that the given multi
1167 : : * is used only to lock tuples; can be false without loss of correctness,
1168 : : * but passing a true means we can return quickly without checking for
1169 : : * old updates.
1170 : : */
1171 : : int
4850 alvherre@alvh.no-ip. 1172 : 550054 : GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
1173 : : bool from_pgupgrade, bool isLockOnly)
1174 : : {
1175 : : int64 pageno;
1176 : : int64 prev_pageno;
1177 : : int entryno;
1178 : : int slotno;
1179 : : MultiXactOffset *offptr;
1180 : : MultiXactOffset offset;
1181 : : MultiXactOffset nextMXOffset;
1182 : : int length;
1183 : : MultiXactId oldestMXact;
1184 : : MultiXactId nextMXact;
1185 : : MultiXactMember *ptr;
1186 : : LWLock *lock;
1187 : :
1188 : : debug_elog3(DEBUG2, "GetMembers: asked for %u", multi);
1189 : :
3602 1190 [ + - - + ]: 550054 : if (!MultiXactIdIsValid(multi) || from_pgupgrade)
1191 : : {
1783 heikki.linnakangas@i 1192 :UBC 0 : *members = NULL;
4541 alvherre@alvh.no-ip. 1193 : 0 : return -1;
1194 : : }
1195 : :
1196 : : /* See if the MultiXactId is in the local cache */
4850 alvherre@alvh.no-ip. 1197 :CBC 550054 : length = mXactCacheGetById(multi, members);
7677 tgl@sss.pgh.pa.us 1198 [ + + ]: 550054 : if (length >= 0)
1199 : : {
1200 : : debug_elog3(DEBUG2, "GetMembers: found %s in the cache",
1201 : : mxid_to_string(multi, length, *members));
1202 : 457548 : return length;
1203 : : }
1204 : :
1205 : : /* Set our OldestVisibleMXactId[] entry if we didn't already */
1206 : 92506 : MultiXactIdSetOldestVisible();
1207 : :
1208 : : /*
1209 : : * If we know the multi is used only for locking and not for updates, then
1210 : : * we can skip checking if the value is older than our oldest visible
1211 : : * multi. It cannot possibly still be running.
1212 : : */
1324 pg@bowt.ie 1213 [ + + + + ]: 96230 : if (isLockOnly &&
64 heikki.linnakangas@i 1214 : 3724 : MultiXactIdPrecedes(multi, *MyOldestVisibleMXactIdSlot()))
1215 : : {
1216 : : debug_elog2(DEBUG2, "GetMembers: a locker-only multi is too old");
4298 alvherre@alvh.no-ip. 1217 : 759 : *members = NULL;
1218 : 759 : return -1;
1219 : : }
1220 : :
1221 : : /*
1222 : : * We check known limits on MultiXact before resorting to the SLRU area.
1223 : : *
1224 : : * An ID older than MultiXactState->oldestMultiXactId cannot possibly be
1225 : : * useful; it has already been removed, or will be removed shortly, by
1226 : : * truncation. If one is passed, an error is raised.
1227 : : *
1228 : : * Also, an ID >= nextMXact shouldn't ever be seen here; if it is seen, it
1229 : : * implies undetected ID wraparound has occurred. This raises a hard
1230 : : * error.
1231 : : *
1232 : : * Shared lock is enough here since we aren't modifying any global state.
1233 : : * Acquire it just long enough to grab the current counter values.
1234 : : */
7677 tgl@sss.pgh.pa.us 1235 : 91747 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
1236 : :
4850 alvherre@alvh.no-ip. 1237 : 91747 : oldestMXact = MultiXactState->oldestMultiXactId;
7494 tgl@sss.pgh.pa.us 1238 : 91747 : nextMXact = MultiXactState->nextMXact;
1239 : :
1240 : 91747 : LWLockRelease(MultiXactGenLock);
1241 : :
4850 alvherre@alvh.no-ip. 1242 [ - + ]: 91747 : if (MultiXactIdPrecedes(multi, oldestMXact))
3602 alvherre@alvh.no-ip. 1243 [ # # ]:UBC 0 : ereport(ERROR,
1244 : : (errcode(ERRCODE_INTERNAL_ERROR),
1245 : : errmsg("MultiXactId %u does no longer exist -- apparent wraparound",
1246 : : multi)));
1247 : :
4850 alvherre@alvh.no-ip. 1248 [ - + ]:CBC 91747 : if (!MultiXactIdPrecedes(multi, nextMXact))
4850 alvherre@alvh.no-ip. 1249 [ # # ]:UBC 0 : ereport(ERROR,
1250 : : (errcode(ERRCODE_INTERNAL_ERROR),
1251 : : errmsg("MultiXactId %u has not been created yet -- apparent wraparound",
1252 : : multi)));
1253 : :
1254 : : /*
1255 : : * Find out the offset at which we need to start reading MultiXactMembers
1256 : : * and the number of members in the multixact. We determine the latter as
1257 : : * the difference between this multixact's starting offset and the next
1258 : : * one's.
1259 : : */
7677 tgl@sss.pgh.pa.us 1260 :CBC 91747 : pageno = MultiXactIdToOffsetPage(multi);
1261 : 91747 : entryno = MultiXactIdToOffsetEntry(multi);
1262 : :
1263 : : /* Acquire the bank lock for the page we need. */
797 alvherre@alvh.no-ip. 1264 : 91747 : lock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
792 1265 : 91747 : LWLockAcquire(lock, LW_EXCLUSIVE);
1266 : :
1267 : : /* read this multi's offset */
53 heikki.linnakangas@i 1268 :GNC 91747 : slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, &multi);
7636 tgl@sss.pgh.pa.us 1269 :CBC 91747 : offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
7677 1270 : 91747 : offptr += entryno;
1271 : 91747 : offset = *offptr;
1272 : :
145 heikki.linnakangas@i 1273 [ - + ]:GNC 91747 : if (offset == 0)
145 heikki.linnakangas@i 1274 [ # # ]:UNC 0 : ereport(ERROR,
1275 : : (errcode(ERRCODE_DATA_CORRUPTED),
1276 : : errmsg("MultiXact %u has invalid offset", multi)));
1277 : :
1278 : : /* read next multi's offset */
1279 : : {
1280 : : MultiXactId tmpMXact;
1281 : :
1282 : : /* handle wraparound if needed */
144 heikki.linnakangas@i 1283 :GNC 91747 : tmpMXact = NextMultiXactId(multi);
1284 : :
7677 tgl@sss.pgh.pa.us 1285 :CBC 91747 : prev_pageno = pageno;
1286 : :
1287 : 91747 : pageno = MultiXactIdToOffsetPage(tmpMXact);
1288 : 91747 : entryno = MultiXactIdToOffsetEntry(tmpMXact);
1289 : :
1290 [ + + ]: 91747 : if (pageno != prev_pageno)
1291 : : {
1292 : : LWLock *newlock;
1293 : :
1294 : : /*
1295 : : * Since we're going to access a different SLRU page, if this page
1296 : : * falls under a different bank, release the old bank's lock and
1297 : : * acquire the lock of the new bank.
1298 : : */
792 alvherre@alvh.no-ip. 1299 :GBC 13 : newlock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
1300 [ - + ]: 13 : if (newlock != lock)
1301 : : {
792 alvherre@alvh.no-ip. 1302 :UBC 0 : LWLockRelease(lock);
1303 : 0 : LWLockAcquire(newlock, LW_EXCLUSIVE);
1304 : 0 : lock = newlock;
1305 : : }
53 heikki.linnakangas@i 1306 :GNC 13 : slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, &tmpMXact);
1307 : : }
1308 : :
7636 tgl@sss.pgh.pa.us 1309 :CBC 91747 : offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
7677 1310 : 91747 : offptr += entryno;
7494 1311 : 91747 : nextMXOffset = *offptr;
1312 : : }
1313 : :
792 alvherre@alvh.no-ip. 1314 : 91747 : LWLockRelease(lock);
1315 : 91747 : lock = NULL;
1316 : :
1317 : : /* Sanity check the next offset */
145 heikki.linnakangas@i 1318 [ - + ]:GNC 91747 : if (nextMXOffset == 0)
145 heikki.linnakangas@i 1319 [ # # ]:UNC 0 : ereport(ERROR,
1320 : : (errcode(ERRCODE_DATA_CORRUPTED),
1321 : : errmsg("MultiXact %u has invalid next offset", multi)));
141 heikki.linnakangas@i 1322 [ - + ]:GNC 91747 : if (nextMXOffset == offset)
141 heikki.linnakangas@i 1323 [ # # ]:UNC 0 : ereport(ERROR,
1324 : : (errcode(ERRCODE_DATA_CORRUPTED),
1325 : : errmsg("MultiXact %u with offset (%" PRIu64 ") has zero members",
1326 : : multi, offset)));
145 heikki.linnakangas@i 1327 [ - + ]:GNC 91747 : if (nextMXOffset < offset)
145 heikki.linnakangas@i 1328 [ # # ]:UNC 0 : ereport(ERROR,
1329 : : (errcode(ERRCODE_DATA_CORRUPTED),
1330 : : errmsg("MultiXact %u has offset (%" PRIu64 ") greater than its next offset (%" PRIu64 ")",
1331 : : multi, offset, nextMXOffset)));
145 heikki.linnakangas@i 1332 [ - + ]:GNC 91747 : if (nextMXOffset - offset > INT32_MAX)
145 heikki.linnakangas@i 1333 [ # # ]:UNC 0 : ereport(ERROR,
1334 : : (errcode(ERRCODE_DATA_CORRUPTED),
1335 : : errmsg("MultiXact %u has too many members (%" PRIu64 ")",
1336 : : multi, nextMXOffset - offset)));
145 heikki.linnakangas@i 1337 :GNC 91747 : length = nextMXOffset - offset;
1338 : :
1339 : : /* read the members */
4850 alvherre@alvh.no-ip. 1340 : 91747 : ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember));
7677 tgl@sss.pgh.pa.us 1341 :CBC 91747 : prev_pageno = -1;
792 alvherre@alvh.no-ip. 1342 [ + + ]: 1827949 : for (int i = 0; i < length; i++, offset++)
1343 : : {
1344 : : TransactionId *xactptr;
1345 : : uint32 *flagsptr;
1346 : : int flagsoff;
1347 : : int bshift;
1348 : : int memberoff;
1349 : :
7677 tgl@sss.pgh.pa.us 1350 : 1736202 : pageno = MXOffsetToMemberPage(offset);
4850 alvherre@alvh.no-ip. 1351 : 1736202 : memberoff = MXOffsetToMemberOffset(offset);
1352 : :
7677 tgl@sss.pgh.pa.us 1353 [ + + ]: 1736202 : if (pageno != prev_pageno)
1354 : : {
53 heikki.linnakangas@i 1355 :GNC 91909 : MultiXactMemberSlruReadContext slru_read_context = {multi, offset};
1356 : : LWLock *newlock;
1357 : :
1358 : : /*
1359 : : * Since we're going to access a different SLRU page, if this page
1360 : : * falls under a different bank, release the old bank's lock and
1361 : : * acquire the lock of the new bank.
1362 : : */
792 alvherre@alvh.no-ip. 1363 :CBC 91909 : newlock = SimpleLruGetBankLock(MultiXactMemberCtl, pageno);
1364 [ + - ]: 91909 : if (newlock != lock)
1365 : : {
1366 [ + + ]: 91909 : if (lock)
792 alvherre@alvh.no-ip. 1367 :GBC 162 : LWLockRelease(lock);
792 alvherre@alvh.no-ip. 1368 :CBC 91909 : LWLockAcquire(newlock, LW_EXCLUSIVE);
1369 : 91909 : lock = newlock;
1370 : : }
53 heikki.linnakangas@i 1371 :GNC 91909 : slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true,
1372 : : &slru_read_context);
7677 tgl@sss.pgh.pa.us 1373 :CBC 91909 : prev_pageno = pageno;
1374 : : }
1375 : :
1376 : 1736202 : xactptr = (TransactionId *)
4850 alvherre@alvh.no-ip. 1377 : 1736202 : (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
147 heikki.linnakangas@i 1378 [ - + ]:GNC 1736202 : Assert(TransactionIdIsValid(*xactptr));
1379 : :
4850 alvherre@alvh.no-ip. 1380 :CBC 1736202 : flagsoff = MXOffsetToFlagsOffset(offset);
1381 : 1736202 : bshift = MXOffsetToFlagsBitShift(offset);
1382 : 1736202 : flagsptr = (uint32 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
1383 : :
147 heikki.linnakangas@i 1384 :GNC 1736202 : ptr[i].xid = *xactptr;
1385 : 1736202 : ptr[i].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
1386 : : }
1387 : :
792 alvherre@alvh.no-ip. 1388 :CBC 91747 : LWLockRelease(lock);
1389 : :
1390 : : /*
1391 : : * Copy the result into the local cache.
1392 : : */
147 heikki.linnakangas@i 1393 :GNC 91747 : mXactCachePut(multi, length, ptr);
1394 : :
1395 : : debug_elog3(DEBUG2, "GetMembers: no cache for %s",
1396 : : mxid_to_string(multi, length, ptr));
1783 heikki.linnakangas@i 1397 :CBC 91747 : *members = ptr;
147 heikki.linnakangas@i 1398 :GNC 91747 : return length;
1399 : : }
1400 : :
1401 : : /*
1402 : : * mxactMemberComparator
1403 : : * qsort comparison function for MultiXactMember
1404 : : *
1405 : : * We can't use wraparound comparison for XIDs because that does not respect
1406 : : * the triangle inequality! Any old sort order will do.
1407 : : */
1408 : : static int
4850 alvherre@alvh.no-ip. 1409 :CBC 3050789 : mxactMemberComparator(const void *arg1, const void *arg2)
1410 : : {
1411 : 3050789 : MultiXactMember member1 = *(const MultiXactMember *) arg1;
1412 : 3050789 : MultiXactMember member2 = *(const MultiXactMember *) arg2;
1413 : :
1414 [ + + ]: 3050789 : if (member1.xid > member2.xid)
1415 : 35 : return 1;
1416 [ + + ]: 3050754 : if (member1.xid < member2.xid)
1417 : 3050540 : return -1;
1418 [ + + ]: 214 : if (member1.status > member2.status)
4850 alvherre@alvh.no-ip. 1419 :GBC 16 : return 1;
4850 alvherre@alvh.no-ip. 1420 [ + - ]:CBC 198 : if (member1.status < member2.status)
1421 : 198 : return -1;
4850 alvherre@alvh.no-ip. 1422 :UBC 0 : return 0;
1423 : : }
1424 : :
1425 : : /*
1426 : : * mXactCacheGetBySet
1427 : : * returns a MultiXactId from the cache based on the set of
1428 : : * TransactionIds that compose it, or InvalidMultiXactId if
1429 : : * none matches.
1430 : : *
1431 : : * This is helpful, for example, if two transactions want to lock a huge
1432 : : * table. By using the cache, the second will use the same MultiXactId
1433 : : * for the majority of tuples, thus keeping MultiXactId usage low (saving
1434 : : * both I/O and wraparound issues).
1435 : : *
1436 : : * NB: the passed members array will be sorted in-place.
1437 : : */
1438 : : static MultiXactId
4850 alvherre@alvh.no-ip. 1439 :CBC 76719 : mXactCacheGetBySet(int nmembers, MultiXactMember *members)
1440 : : {
1441 : : dlist_iter iter;
1442 : :
1443 : : debug_elog3(DEBUG2, "CacheGet: looking for %s",
1444 : : mxid_to_string(InvalidMultiXactId, nmembers, members));
1445 : :
1446 : : /* sort the array so comparison is easy */
1447 : 76719 : qsort(members, nmembers, sizeof(MultiXactMember), mxactMemberComparator);
1448 : :
1280 drowley@postgresql.o 1449 [ + - + + ]: 308400 : dclist_foreach(iter, &MXactCache)
1450 : : {
1451 : 303096 : mXactCacheEnt *entry = dclist_container(mXactCacheEnt, node,
1452 : : iter.cur);
1453 : :
4850 alvherre@alvh.no-ip. 1454 [ + + ]: 303096 : if (entry->nmembers != nmembers)
7677 tgl@sss.pgh.pa.us 1455 : 85328 : continue;
1456 : :
1457 : : /*
1458 : : * We assume the cache entries are sorted, and that the unused bits in
1459 : : * "status" are zeroed.
1460 : : */
4850 alvherre@alvh.no-ip. 1461 [ + + ]: 217768 : if (memcmp(members, entry->members, nmembers * sizeof(MultiXactMember)) == 0)
1462 : : {
1463 : : debug_elog3(DEBUG2, "CacheGet: found %u", entry->multi);
1280 drowley@postgresql.o 1464 : 71415 : dclist_move_head(&MXactCache, iter.cur);
7677 tgl@sss.pgh.pa.us 1465 : 71415 : return entry->multi;
1466 : : }
1467 : : }
1468 : :
1469 : : debug_elog2(DEBUG2, "CacheGet: not found :-(");
1470 : 5304 : return InvalidMultiXactId;
1471 : : }
1472 : :
1473 : : /*
1474 : : * mXactCacheGetById
1475 : : * returns the composing MultiXactMember set from the cache for a
1476 : : * given MultiXactId, if present.
1477 : : *
1478 : : * If successful, *xids is set to the address of a palloc'd copy of the
1479 : : * MultiXactMember set. Return value is number of members, or -1 on failure.
1480 : : */
1481 : : static int
4850 alvherre@alvh.no-ip. 1482 : 550054 : mXactCacheGetById(MultiXactId multi, MultiXactMember **members)
1483 : : {
1484 : : dlist_iter iter;
1485 : :
1486 : : debug_elog3(DEBUG2, "CacheGet: looking for %u", multi);
1487 : :
1280 drowley@postgresql.o 1488 [ + - + + ]: 4905846 : dclist_foreach(iter, &MXactCache)
1489 : : {
1490 : 4813340 : mXactCacheEnt *entry = dclist_container(mXactCacheEnt, node,
1491 : : iter.cur);
1492 : :
7677 tgl@sss.pgh.pa.us 1493 [ + + ]: 4813340 : if (entry->multi == multi)
1494 : : {
1495 : : MultiXactMember *ptr;
1496 : : Size size;
1497 : :
4850 alvherre@alvh.no-ip. 1498 : 457548 : size = sizeof(MultiXactMember) * entry->nmembers;
1499 : 457548 : ptr = (MultiXactMember *) palloc(size);
1500 : :
1501 : 457548 : memcpy(ptr, entry->members, size);
1502 : :
1503 : : debug_elog3(DEBUG2, "CacheGet: found %s",
1504 : : mxid_to_string(multi,
1505 : : entry->nmembers,
1506 : : entry->members));
1507 : :
1508 : : /*
1509 : : * Note we modify the list while not using a modifiable iterator.
1510 : : * This is acceptable only because we exit the iteration
1511 : : * immediately afterwards.
1512 : : */
1280 drowley@postgresql.o 1513 : 457548 : dclist_move_head(&MXactCache, iter.cur);
1514 : :
1783 heikki.linnakangas@i 1515 : 457548 : *members = ptr;
4850 alvherre@alvh.no-ip. 1516 : 457548 : return entry->nmembers;
1517 : : }
1518 : : }
1519 : :
1520 : : debug_elog2(DEBUG2, "CacheGet: not found");
7677 tgl@sss.pgh.pa.us 1521 : 92506 : return -1;
1522 : : }
1523 : :
1524 : : /*
1525 : : * mXactCachePut
1526 : : * Add a new MultiXactId and its composing set into the local cache.
1527 : : */
1528 : : static void
4850 alvherre@alvh.no-ip. 1529 : 97051 : mXactCachePut(MultiXactId multi, int nmembers, MultiXactMember *members)
1530 : : {
1531 : : mXactCacheEnt *entry;
1532 : :
1533 : : debug_elog3(DEBUG2, "CachePut: storing %s",
1534 : : mxid_to_string(multi, nmembers, members));
1535 : :
7677 tgl@sss.pgh.pa.us 1536 [ + + ]: 97051 : if (MXactContext == NULL)
1537 : : {
1538 : : /* The cache only lives as long as the current transaction */
1539 : : debug_elog2(DEBUG2, "CachePut: initializing memory context");
1540 : 3359 : MXactContext = AllocSetContextCreate(TopTransactionContext,
1541 : : "MultiXact cache context",
1542 : : ALLOCSET_SMALL_SIZES);
1543 : : }
1544 : :
1545 : : entry = (mXactCacheEnt *)
1546 : 97051 : MemoryContextAlloc(MXactContext,
4850 alvherre@alvh.no-ip. 1547 : 97051 : offsetof(mXactCacheEnt, members) +
1548 : : nmembers * sizeof(MultiXactMember));
1549 : :
7677 tgl@sss.pgh.pa.us 1550 : 97051 : entry->multi = multi;
4850 alvherre@alvh.no-ip. 1551 : 97051 : entry->nmembers = nmembers;
1552 : 97051 : memcpy(entry->members, members, nmembers * sizeof(MultiXactMember));
1553 : :
1554 : : /* mXactCacheGetBySet assumes the entries are sorted, so sort them */
1555 : 97051 : qsort(entry->members, nmembers, sizeof(MultiXactMember), mxactMemberComparator);
1556 : :
1280 drowley@postgresql.o 1557 : 97051 : dclist_push_head(&MXactCache, &entry->node);
1558 [ + + ]: 97051 : if (dclist_count(&MXactCache) > MAX_CACHE_ENTRIES)
1559 : : {
1560 : : dlist_node *node;
1561 : :
1280 drowley@postgresql.o 1562 :GBC 9478 : node = dclist_tail_node(&MXactCache);
1563 : 9478 : dclist_delete_from(&MXactCache, node);
1564 : :
1565 : 9478 : entry = dclist_container(mXactCacheEnt, node, node);
1566 : : debug_elog3(DEBUG2, "CachePut: pruning cached multi %u",
1567 : : entry->multi);
1568 : :
4526 alvherre@alvh.no-ip. 1569 : 9478 : pfree(entry);
1570 : : }
7677 tgl@sss.pgh.pa.us 1571 :CBC 97051 : }
1572 : :
1573 : : char *
4850 alvherre@alvh.no-ip. 1574 :GBC 188088 : mxstatus_to_string(MultiXactStatus status)
1575 : : {
1576 [ + - - - : 188088 : switch (status)
+ - - ]
1577 : : {
1578 : 183634 : case MultiXactStatusForKeyShare:
1579 : 183634 : return "keysh";
4850 alvherre@alvh.no-ip. 1580 :UBC 0 : case MultiXactStatusForShare:
1581 : 0 : return "sh";
1582 : 0 : case MultiXactStatusForNoKeyUpdate:
1583 : 0 : return "fornokeyupd";
1584 : 0 : case MultiXactStatusForUpdate:
1585 : 0 : return "forupd";
4850 alvherre@alvh.no-ip. 1586 :GBC 4454 : case MultiXactStatusNoKeyUpdate:
1587 : 4454 : return "nokeyupd";
4850 alvherre@alvh.no-ip. 1588 :UBC 0 : case MultiXactStatusUpdate:
1589 : 0 : return "upd";
1590 : 0 : default:
1591 [ # # ]: 0 : elog(ERROR, "unrecognized multixact status %d", status);
1592 : : return "";
1593 : : }
1594 : : }
1595 : :
1596 : : char *
1597 : 0 : mxid_to_string(MultiXactId multi, int nmembers, MultiXactMember *members)
1598 : : {
1599 : : static char *str = NULL;
1600 : : StringInfoData buf;
1601 : : int i;
1602 : :
1603 [ # # ]: 0 : if (str != NULL)
1604 : 0 : pfree(str);
1605 : :
1606 : 0 : initStringInfo(&buf);
1607 : :
1608 : 0 : appendStringInfo(&buf, "%u %d[%u (%s)", multi, nmembers, members[0].xid,
1609 : : mxstatus_to_string(members[0].status));
1610 : :
1611 [ # # ]: 0 : for (i = 1; i < nmembers; i++)
1612 : 0 : appendStringInfo(&buf, ", %u (%s)", members[i].xid,
1613 : 0 : mxstatus_to_string(members[i].status));
1614 : :
1615 : 0 : appendStringInfoChar(&buf, ']');
1616 : 0 : str = MemoryContextStrdup(TopMemoryContext, buf.data);
1617 : 0 : pfree(buf.data);
7677 tgl@sss.pgh.pa.us 1618 : 0 : return str;
1619 : : }
1620 : :
1621 : : /*
1622 : : * AtEOXact_MultiXact
1623 : : * Handle transaction end for MultiXact
1624 : : *
1625 : : * This is called at top transaction commit or abort (we don't care which).
1626 : : */
1627 : : void
7677 tgl@sss.pgh.pa.us 1628 :CBC 421563 : AtEOXact_MultiXact(void)
1629 : : {
1630 : : /*
1631 : : * Reset our OldestMemberMXactId and OldestVisibleMXactId values, both of
1632 : : * which should only be valid while within a transaction.
1633 : : *
1634 : : * We assume that storing a MultiXactId is atomic and so we need not take
1635 : : * MultiXactGenLock to do this.
1636 : : */
64 heikki.linnakangas@i 1637 : 421563 : *MyOldestMemberMXactIdSlot() = InvalidMultiXactId;
1638 : 421563 : *MyOldestVisibleMXactIdSlot() = InvalidMultiXactId;
1639 : :
1640 : : /*
1641 : : * Discard the local MultiXactId cache. Since MXactContext was created as
1642 : : * a child of TopTransactionContext, we needn't delete it explicitly.
1643 : : */
7677 tgl@sss.pgh.pa.us 1644 : 421563 : MXactContext = NULL;
1280 drowley@postgresql.o 1645 : 421563 : dclist_init(&MXactCache);
7677 tgl@sss.pgh.pa.us 1646 : 421563 : }
1647 : :
1648 : : /*
1649 : : * AtPrepare_MultiXact
1650 : : * Save multixact state at 2PC transaction prepare
1651 : : *
1652 : : * In this phase, we only store our OldestMemberMXactId value in the two-phase
1653 : : * state file.
1654 : : */
1655 : : void
6007 heikki.linnakangas@i 1656 : 295 : AtPrepare_MultiXact(void)
1657 : : {
64 1658 : 295 : MultiXactId myOldestMember = *MyOldestMemberMXactIdSlot();
1659 : :
6007 1660 [ + + ]: 295 : if (MultiXactIdIsValid(myOldestMember))
1661 : 67 : RegisterTwoPhaseRecord(TWOPHASE_RM_MULTIXACT_ID, 0,
1662 : : &myOldestMember, sizeof(MultiXactId));
1663 : 295 : }
1664 : :
1665 : : /*
1666 : : * PostPrepare_MultiXact
1667 : : * Clean up after successful PREPARE TRANSACTION
1668 : : */
1669 : : void
302 michael@paquier.xyz 1670 :GNC 295 : PostPrepare_MultiXact(FullTransactionId fxid)
1671 : : {
1672 : : MultiXactId myOldestMember;
1673 : :
1674 : : /*
1675 : : * Transfer our OldestMemberMXactId value to the slot reserved for the
1676 : : * prepared transaction.
1677 : : */
64 heikki.linnakangas@i 1678 :CBC 295 : myOldestMember = *MyOldestMemberMXactIdSlot();
6007 1679 [ + + ]: 295 : if (MultiXactIdIsValid(myOldestMember))
1680 : : {
302 michael@paquier.xyz 1681 :GNC 67 : ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, false);
1682 : :
1683 : : /*
1684 : : * Even though storing MultiXactId is atomic, acquire lock to make
1685 : : * sure others see both changes, not just the reset of the slot of the
1686 : : * current backend. Using a volatile pointer might suffice, but this
1687 : : * isn't a hot spot.
1688 : : */
6007 heikki.linnakangas@i 1689 :CBC 67 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
1690 : :
64 1691 : 67 : *PreparedXactOldestMemberMXactIdSlot(dummyProcNumber) = myOldestMember;
1692 : 67 : *MyOldestMemberMXactIdSlot() = InvalidMultiXactId;
1693 : :
6007 1694 : 67 : LWLockRelease(MultiXactGenLock);
1695 : : }
1696 : :
1697 : : /*
1698 : : * We don't need to transfer OldestVisibleMXactId value, because the
1699 : : * transaction is not going to be looking at any more multixacts once it's
1700 : : * prepared.
1701 : : *
1702 : : * We assume that storing a MultiXactId is atomic and so we need not take
1703 : : * MultiXactGenLock to do this.
1704 : : */
64 1705 : 295 : *MyOldestVisibleMXactIdSlot() = InvalidMultiXactId;
1706 : :
1707 : : /*
1708 : : * Discard the local MultiXactId cache like in AtEOXact_MultiXact.
1709 : : */
6007 1710 : 295 : MXactContext = NULL;
1280 drowley@postgresql.o 1711 : 295 : dclist_init(&MXactCache);
6007 heikki.linnakangas@i 1712 : 295 : }
1713 : :
1714 : : /*
1715 : : * multixact_twophase_recover
1716 : : * Recover the state of a prepared transaction at startup
1717 : : */
1718 : : void
302 michael@paquier.xyz 1719 :GNC 8 : multixact_twophase_recover(FullTransactionId fxid, uint16 info,
1720 : : void *recdata, uint32 len)
1721 : : {
1722 : 8 : ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, false);
1723 : : MultiXactId oldestMember;
1724 : :
1725 : : /*
1726 : : * Get the oldest member XID from the state file record, and set it in the
1727 : : * OldestMemberMXactId slot reserved for this prepared transaction.
1728 : : */
6007 heikki.linnakangas@i 1729 [ - + ]:CBC 8 : Assert(len == sizeof(MultiXactId));
5912 bruce@momjian.us 1730 : 8 : oldestMember = *((MultiXactId *) recdata);
1731 : :
64 heikki.linnakangas@i 1732 : 8 : *PreparedXactOldestMemberMXactIdSlot(dummyProcNumber) = oldestMember;
6007 1733 : 8 : }
1734 : :
1735 : : /*
1736 : : * multixact_twophase_postcommit
1737 : : * Similar to AtEOXact_MultiXact but for COMMIT PREPARED
1738 : : */
1739 : : void
302 michael@paquier.xyz 1740 :GNC 72 : multixact_twophase_postcommit(FullTransactionId fxid, uint16 info,
1741 : : void *recdata, uint32 len)
1742 : : {
1743 : 72 : ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, true);
1744 : :
6007 heikki.linnakangas@i 1745 [ - + ]:CBC 72 : Assert(len == sizeof(MultiXactId));
1746 : :
64 1747 : 72 : *PreparedXactOldestMemberMXactIdSlot(dummyProcNumber) = InvalidMultiXactId;
6007 1748 : 72 : }
1749 : :
1750 : : /*
1751 : : * multixact_twophase_postabort
1752 : : * This is actually just the same as the COMMIT case.
1753 : : */
1754 : : void
302 michael@paquier.xyz 1755 :GNC 30 : multixact_twophase_postabort(FullTransactionId fxid, uint16 info,
1756 : : void *recdata, uint32 len)
1757 : : {
1758 : 30 : multixact_twophase_postcommit(fxid, info, recdata, len);
6007 heikki.linnakangas@i 1759 :CBC 30 : }
1760 : :
1761 : :
1762 : : /*
1763 : : * Register shared memory needs for MultiXact.
1764 : : */
1765 : : static void
29 heikki.linnakangas@i 1766 :GNC 1244 : MultiXactShmemRequest(void *arg)
1767 : : {
1768 : : Size size;
1769 : :
1770 : : /*
1771 : : * Calculate the size of the MultiXactState struct, and the two
1772 : : * per-backend MultiXactId arrays. They are carved out of the same
1773 : : * allocation.
1774 : : */
64 heikki.linnakangas@i 1775 :CBC 1244 : size = offsetof(MultiXactStateData, perBackendXactIds);
1776 : 1244 : size = add_size(size,
1777 : 1244 : mul_size(sizeof(MultiXactId), NumMemberSlots));
1778 : 1244 : size = add_size(size,
1779 : : mul_size(sizeof(MultiXactId), NumVisibleSlots));
29 heikki.linnakangas@i 1780 :GNC 1244 : ShmemRequestStruct(.name = "Shared MultiXact State",
1781 : : .size = size,
1782 : : .ptr = (void **) &MultiXactState,
1783 : : );
1784 : :
1785 : 1244 : SimpleLruRequest(.desc = &MultiXactOffsetSlruDesc,
1786 : : .name = "multixact_offset",
1787 : : .Dir = "pg_multixact/offsets",
1788 : : .long_segment_names = false,
1789 : :
1790 : : .nslots = multixact_offset_buffers,
1791 : :
1792 : : .sync_handler = SYNC_HANDLER_MULTIXACT_OFFSET,
1793 : : .PagePrecedes = MultiXactOffsetPagePrecedes,
1794 : : .errdetail_for_io_error = MultiXactOffsetIoErrorDetail,
1795 : :
1796 : : .buffer_tranche_id = LWTRANCHE_MULTIXACTOFFSET_BUFFER,
1797 : : .bank_tranche_id = LWTRANCHE_MULTIXACTOFFSET_SLRU,
1798 : : );
1799 : :
1800 : 1244 : SimpleLruRequest(.desc = &MultiXactMemberSlruDesc,
1801 : : .name = "multixact_member",
1802 : : .Dir = "pg_multixact/members",
1803 : : .long_segment_names = true,
1804 : :
1805 : : .nslots = multixact_member_buffers,
1806 : :
1807 : : .sync_handler = SYNC_HANDLER_MULTIXACT_MEMBER,
1808 : : .PagePrecedes = MultiXactMemberPagePrecedes,
1809 : : .errdetail_for_io_error = MultiXactMemberIoErrorDetail,
1810 : :
1811 : : .buffer_tranche_id = LWTRANCHE_MULTIXACTMEMBER_BUFFER,
1812 : : .bank_tranche_id = LWTRANCHE_MULTIXACTMEMBER_SLRU,
1813 : : );
1814 : 1244 : }
1815 : :
1816 : : static void
1817 : 1241 : MultiXactShmemInit(void *arg)
1818 : : {
1819 : 1241 : SlruPagePrecedesUnitTests(MultiXactOffsetCtl, MULTIXACT_OFFSETS_PER_PAGE);
1820 : :
1821 : : /*
1822 : : * members SLRU doesn't call SimpleLruTruncate() or meet criteria for unit
1823 : : * tests
1824 : : */
1825 : :
1826 : : /* Set up array pointers */
1827 : 1241 : OldestMemberMXactId = MultiXactState->perBackendXactIds;
1828 : 1241 : OldestVisibleMXactId = OldestMemberMXactId + NumMemberSlots;
1829 : 1241 : }
1830 : :
1831 : : static void
29 heikki.linnakangas@i 1832 :UNC 0 : MultiXactShmemAttach(void *arg)
1833 : : {
1834 : : /* Set up array pointers */
7677 tgl@sss.pgh.pa.us 1835 :LBC (1070) : OldestMemberMXactId = MultiXactState->perBackendXactIds;
64 heikki.linnakangas@i 1836 : (1070) : OldestVisibleMXactId = OldestMemberMXactId + NumMemberSlots;
7677 tgl@sss.pgh.pa.us 1837 : (1070) : }
1838 : :
1839 : : /*
1840 : : * GUC check_hook for multixact_offset_buffers
1841 : : */
1842 : : bool
797 alvherre@alvh.no-ip. 1843 :CBC 1286 : check_multixact_offset_buffers(int *newval, void **extra, GucSource source)
1844 : : {
1845 : 1286 : return check_slru_buffers("multixact_offset_buffers", newval);
1846 : : }
1847 : :
1848 : : /*
1849 : : * GUC check_hook for multixact_member_buffers
1850 : : */
1851 : : bool
1852 : 1286 : check_multixact_member_buffers(int *newval, void **extra, GucSource source)
1853 : : {
1854 : 1286 : return check_slru_buffers("multixact_member_buffers", newval);
1855 : : }
1856 : :
1857 : : /*
1858 : : * This func must be called ONCE on system install. It creates the initial
1859 : : * MultiXact segments. (The MultiXacts directories are assumed to have been
1860 : : * created by initdb, and MultiXactShmemInit must have been called already.)
1861 : : */
1862 : : void
7677 tgl@sss.pgh.pa.us 1863 : 57 : BootStrapMultiXact(void)
1864 : : {
1865 : : /* Zero the initial pages and flush them to disk */
302 alvherre@kurilemu.de 1866 :GNC 57 : SimpleLruZeroAndWritePage(MultiXactOffsetCtl, 0);
1867 : 57 : SimpleLruZeroAndWritePage(MultiXactMemberCtl, 0);
7677 tgl@sss.pgh.pa.us 1868 :CBC 57 : }
1869 : :
1870 : : /*
1871 : : * This must be called ONCE during postmaster or standalone-backend startup.
1872 : : *
1873 : : * StartupXLOG has already established nextMXact/nextOffset by calling
1874 : : * MultiXactSetNextMXact and/or MultiXactAdvanceNextMXact, and the oldestMulti
1875 : : * info from pg_control and/or MultiXactAdvanceOldest, but we haven't yet
1876 : : * replayed WAL.
1877 : : */
1878 : : void
1879 : 1077 : StartupMultiXact(void)
1880 : : {
4540 alvherre@alvh.no-ip. 1881 : 1077 : MultiXactId multi = MultiXactState->nextMXact;
1882 : 1077 : MultiXactOffset offset = MultiXactState->nextOffset;
1883 : : int64 pageno;
1884 : :
1885 : : /*
1886 : : * Initialize offset's idea of the latest page number.
1887 : : */
1888 : 1077 : pageno = MultiXactIdToOffsetPage(multi);
819 1889 : 1077 : pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
1890 : : pageno);
1891 : :
1892 : : /*
1893 : : * Initialize member's idea of the latest page number.
1894 : : */
4540 1895 : 1077 : pageno = MXOffsetToMemberPage(offset);
819 1896 : 1077 : pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number,
1897 : : pageno);
4540 1898 : 1077 : }
1899 : :
1900 : : /*
1901 : : * This must be called ONCE at the end of startup/recovery.
1902 : : */
1903 : : void
1904 : 1010 : TrimMultiXact(void)
1905 : : {
1906 : : MultiXactId nextMXact;
1907 : : MultiXactOffset offset;
1908 : : MultiXactId oldestMXact;
1909 : : Oid oldestMXactDB;
1910 : : int64 pageno;
1911 : : int entryno;
1912 : : int flagsoff;
1913 : :
3874 andres@anarazel.de 1914 : 1010 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
1915 : 1010 : nextMXact = MultiXactState->nextMXact;
1916 : 1010 : offset = MultiXactState->nextOffset;
1917 : 1010 : oldestMXact = MultiXactState->oldestMultiXactId;
1918 : 1010 : oldestMXactDB = MultiXactState->oldestMultiXactDB;
1919 : 1010 : LWLockRelease(MultiXactGenLock);
1920 : :
1921 : : /* Clean up offsets state */
1922 : :
1923 : : /*
1924 : : * (Re-)Initialize our idea of the latest page number for offsets.
1925 : : */
1926 : 1010 : pageno = MultiXactIdToOffsetPage(nextMXact);
819 alvherre@alvh.no-ip. 1927 : 1010 : pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
1928 : : pageno);
1929 : :
1930 : : /*
1931 : : * Set the offset of nextMXact on the offsets page. This is normally done
1932 : : * in RecordNewMultiXact() of the previous multixact, but let's be sure
1933 : : * the next page exists, if the nextMXact was reset with pg_resetwal for
1934 : : * example.
1935 : : *
1936 : : * Zero out the remainder of the page. See notes in TrimCLOG() for
1937 : : * background. Unlike CLOG, some WAL record covers every pg_multixact
1938 : : * SLRU mutation. Since, also unlike CLOG, we ignore the WAL rule "write
1939 : : * xlog before data," nextMXact successors may carry obsolete, nonzero
1940 : : * offset values.
1941 : : */
3874 andres@anarazel.de 1942 : 1010 : entryno = MultiXactIdToOffsetEntry(nextMXact);
1943 : : {
1944 : : int slotno;
1945 : : MultiXactOffset *offptr;
797 alvherre@alvh.no-ip. 1946 : 1010 : LWLock *lock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
1947 : :
1948 : 1010 : LWLockAcquire(lock, LW_EXCLUSIVE);
144 heikki.linnakangas@i 1949 [ + + + + ]:GNC 1010 : if (entryno == 0 || nextMXact == FirstMultiXactId)
153 heikki.linnakangas@i 1950 :CBC 992 : slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
1951 : : else
53 heikki.linnakangas@i 1952 :GNC 18 : slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, &nextMXact);
7636 tgl@sss.pgh.pa.us 1953 :CBC 1010 : offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
1954 : 1010 : offptr += entryno;
1955 : :
153 heikki.linnakangas@i 1956 : 1010 : *offptr = offset;
1957 [ + + + - ]: 1010 : if (entryno != 0 && (entryno + 1) * sizeof(MultiXactOffset) != BLCKSZ)
1958 [ + - + - : 1631 : MemSet(offptr + 1, 0, BLCKSZ - (entryno + 1) * sizeof(MultiXactOffset));
+ - + + +
+ ]
1959 : :
7486 tgl@sss.pgh.pa.us 1960 : 1010 : MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
797 alvherre@alvh.no-ip. 1961 : 1010 : LWLockRelease(lock);
1962 : : }
1963 : :
1964 : : /*
1965 : : * And the same for members.
1966 : : *
1967 : : * (Re-)Initialize our idea of the latest page number for members.
1968 : : */
7636 tgl@sss.pgh.pa.us 1969 : 1010 : pageno = MXOffsetToMemberPage(offset);
819 alvherre@alvh.no-ip. 1970 : 1010 : pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number,
1971 : : pageno);
1972 : :
1973 : : /*
1974 : : * Zero out the remainder of the current members page. See notes in
1975 : : * TrimCLOG() for motivation.
1976 : : */
4850 1977 : 1010 : flagsoff = MXOffsetToFlagsOffset(offset);
1978 [ + + ]: 1010 : if (flagsoff != 0)
1979 : : {
53 heikki.linnakangas@i 1980 :GNC 17 : MultiXactMemberSlruReadContext slru_read_context = {InvalidMultiXactId, offset};
1981 : : int slotno;
1982 : : TransactionId *xidptr;
1983 : : int memberoff;
797 alvherre@alvh.no-ip. 1984 :CBC 17 : LWLock *lock = SimpleLruGetBankLock(MultiXactMemberCtl, pageno);
1985 : :
1986 : 17 : LWLockAcquire(lock, LW_EXCLUSIVE);
4850 1987 : 17 : memberoff = MXOffsetToMemberOffset(offset);
53 heikki.linnakangas@i 1988 :GNC 17 : slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, &slru_read_context);
4850 alvherre@alvh.no-ip. 1989 :CBC 17 : xidptr = (TransactionId *)
1990 : 17 : (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
1991 : :
1992 [ + + + - : 17 : MemSet(xidptr, 0, BLCKSZ - memberoff);
+ - - + -
- ]
1993 : :
1994 : : /*
1995 : : * Note: we don't need to zero out the flag bits in the remaining
1996 : : * members of the current group, because they are always reset before
1997 : : * writing.
1998 : : */
1999 : :
7486 tgl@sss.pgh.pa.us 2000 : 17 : MultiXactMemberCtl->shared->page_dirty[slotno] = true;
797 alvherre@alvh.no-ip. 2001 : 17 : LWLockRelease(lock);
2002 : : }
2003 : :
2004 : : /* signal that we're officially up */
3874 andres@anarazel.de 2005 : 1010 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
2006 : 1010 : MultiXactState->finishedStartup = true;
3987 rhaas@postgresql.org 2007 : 1010 : LWLockRelease(MultiXactGenLock);
2008 : :
2009 : : /* Now compute how far away the next multixid wraparound is. */
147 heikki.linnakangas@i 2010 :GNC 1010 : SetMultiXactIdLimit(oldestMXact, oldestMXactDB);
7677 tgl@sss.pgh.pa.us 2011 :CBC 1010 : }
2012 : :
2013 : : /*
2014 : : * Get the MultiXact data to save in a checkpoint record
2015 : : */
2016 : : void
7636 2017 : 1731 : MultiXactGetCheckptMulti(bool is_shutdown,
2018 : : MultiXactId *nextMulti,
2019 : : MultiXactOffset *nextMultiOffset,
2020 : : MultiXactId *oldestMulti,
2021 : : Oid *oldestMultiDB)
2022 : : {
7677 2023 : 1731 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
7636 2024 : 1731 : *nextMulti = MultiXactState->nextMXact;
2025 : 1731 : *nextMultiOffset = MultiXactState->nextOffset;
4850 alvherre@alvh.no-ip. 2026 : 1731 : *oldestMulti = MultiXactState->oldestMultiXactId;
2027 : 1731 : *oldestMultiDB = MultiXactState->oldestMultiXactDB;
7677 tgl@sss.pgh.pa.us 2028 : 1731 : LWLockRelease(MultiXactGenLock);
2029 : :
2030 : : debug_elog6(DEBUG2,
2031 : : "MultiXact: checkpoint is nextMulti %u, nextOffset %" PRIu64 ", oldestMulti %u in DB %u",
2032 : : *nextMulti, *nextMultiOffset, *oldestMulti, *oldestMultiDB);
2033 : 1731 : }
2034 : :
2035 : : /*
2036 : : * Perform a checkpoint --- either during shutdown, or on-the-fly
2037 : : */
2038 : : void
2039 : 1944 : CheckPointMultiXact(void)
2040 : : {
2041 : : TRACE_POSTGRESQL_MULTIXACT_CHECKPOINT_START(true);
2042 : :
2043 : : /*
2044 : : * Write dirty MultiXact pages to disk. This may result in sync requests
2045 : : * queued for later handling by ProcessSyncRequests(), as part of the
2046 : : * checkpoint.
2047 : : */
2048 tmunro@postgresql.or 2048 : 1944 : SimpleLruWriteAll(MultiXactOffsetCtl, true);
2049 : 1944 : SimpleLruWriteAll(MultiXactMemberCtl, true);
2050 : :
2051 : : TRACE_POSTGRESQL_MULTIXACT_CHECKPOINT_DONE(true);
7677 tgl@sss.pgh.pa.us 2052 : 1944 : }
2053 : :
2054 : : /*
2055 : : * Set the next-to-be-assigned MultiXactId and offset
2056 : : *
2057 : : * This is used when we can determine the correct next ID/offset exactly
2058 : : * from a checkpoint record. Although this is only called during bootstrap
2059 : : * and XLog replay, we take the lock in case any hot-standby backends are
2060 : : * examining the values.
2061 : : */
2062 : : void
7636 2063 : 1180 : MultiXactSetNextMXact(MultiXactId nextMulti,
2064 : : MultiXactOffset nextMultiOffset)
2065 : : {
144 heikki.linnakangas@i 2066 [ - + ]:GNC 1180 : Assert(MultiXactIdIsValid(nextMulti));
2067 : : debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %" PRIu64,
2068 : : nextMulti, nextMultiOffset);
2069 : :
5202 tgl@sss.pgh.pa.us 2070 :CBC 1180 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
7677 2071 : 1180 : MultiXactState->nextMXact = nextMulti;
7636 2072 : 1180 : MultiXactState->nextOffset = nextMultiOffset;
5202 2073 : 1180 : LWLockRelease(MultiXactGenLock);
7677 2074 : 1180 : }
2075 : :
2076 : : /*
2077 : : * Determine the last safe MultiXactId to allocate given the currently oldest
2078 : : * datminmxid (ie, the oldest MultiXactId that might exist in any database
2079 : : * of our cluster), and the OID of the (or a) database with that value.
2080 : : *
2081 : : * This also updates MultiXactState->oldestOffset, by looking up the offset of
2082 : : * MultiXactState->oldestMultiXactId.
2083 : : */
2084 : : void
147 heikki.linnakangas@i 2085 :GNC 2260 : SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
2086 : : {
2087 : : MultiXactId multiVacLimit;
2088 : : MultiXactId multiWarnLimit;
2089 : : MultiXactId multiStopLimit;
2090 : : MultiXactId multiWrapLimit;
2091 : : MultiXactId curMulti;
2092 : :
4850 alvherre@alvh.no-ip. 2093 [ - + ]:CBC 2260 : Assert(MultiXactIdIsValid(oldest_datminmxid));
2094 : :
2095 : : /*
2096 : : * We pretend that a wrap will happen halfway through the multixact ID
2097 : : * space, but that's not really true, because multixacts wrap differently
2098 : : * from transaction IDs.
2099 : : */
2100 : 2260 : multiWrapLimit = oldest_datminmxid + (MaxMultiXactId >> 1);
2101 [ - + ]: 2260 : if (multiWrapLimit < FirstMultiXactId)
4850 alvherre@alvh.no-ip. 2102 :UBC 0 : multiWrapLimit += FirstMultiXactId;
2103 : :
2104 : : /*
2105 : : * We'll refuse to continue assigning MultiXactIds once we get within 3M
2106 : : * multi of data loss. See SetTransactionIdLimit.
2107 : : */
2103 noah@leadboat.com 2108 :CBC 2260 : multiStopLimit = multiWrapLimit - 3000000;
4850 alvherre@alvh.no-ip. 2109 [ - + ]: 2260 : if (multiStopLimit < FirstMultiXactId)
4850 alvherre@alvh.no-ip. 2110 :UBC 0 : multiStopLimit -= FirstMultiXactId;
2111 : :
2112 : : /*
2113 : : * We'll start complaining loudly when we get within 100M multis of data
2114 : : * loss. This is kind of arbitrary, but if you let your gas gauge get
2115 : : * down to 5% of full, would you be looking for the next gas station? We
2116 : : * need to be fairly liberal about this number because there are lots of
2117 : : * scenarios where most transactions are done by automatic clients that
2118 : : * won't pay attention to warnings. (No, we're not gonna make this
2119 : : * configurable. If you know enough to configure it, you know enough to
2120 : : * not get in this kind of trouble in the first place.)
2121 : : */
46 nathan@postgresql.or 2122 :GNC 2260 : multiWarnLimit = multiWrapLimit - 100000000;
4850 alvherre@alvh.no-ip. 2123 [ - + ]:CBC 2260 : if (multiWarnLimit < FirstMultiXactId)
4850 alvherre@alvh.no-ip. 2124 :UBC 0 : multiWarnLimit -= FirstMultiXactId;
2125 : :
2126 : : /*
2127 : : * We'll start trying to force autovacuums when oldest_datminmxid gets to
2128 : : * be more than autovacuum_multixact_freeze_max_age mxids old.
2129 : : *
2130 : : * Note: autovacuum_multixact_freeze_max_age is a PGC_POSTMASTER parameter
2131 : : * so that we don't have to worry about dealing with on-the-fly changes in
2132 : : * its value. See SetTransactionIdLimit.
2133 : : */
4464 alvherre@alvh.no-ip. 2134 :CBC 2260 : multiVacLimit = oldest_datminmxid + autovacuum_multixact_freeze_max_age;
4850 2135 [ - + ]: 2260 : if (multiVacLimit < FirstMultiXactId)
4850 alvherre@alvh.no-ip. 2136 :UBC 0 : multiVacLimit += FirstMultiXactId;
2137 : :
2138 : : /* Grab lock for just long enough to set the new limit values */
4850 alvherre@alvh.no-ip. 2139 :CBC 2260 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
2140 : 2260 : MultiXactState->oldestMultiXactId = oldest_datminmxid;
2141 : 2260 : MultiXactState->oldestMultiXactDB = oldest_datoid;
2142 : 2260 : MultiXactState->multiVacLimit = multiVacLimit;
2143 : 2260 : MultiXactState->multiWarnLimit = multiWarnLimit;
2144 : 2260 : MultiXactState->multiStopLimit = multiStopLimit;
2145 : 2260 : MultiXactState->multiWrapLimit = multiWrapLimit;
2146 : 2260 : curMulti = MultiXactState->nextMXact;
2147 : 2260 : LWLockRelease(MultiXactGenLock);
2148 : :
2149 : : /* Log the info */
2150 [ + + ]: 2260 : ereport(DEBUG1,
2151 : : (errmsg_internal("MultiXactId wrap limit is %u, limited by database with OID %u",
2152 : : multiWrapLimit, oldest_datoid)));
2153 : :
2154 : : /*
2155 : : * Computing the actual limits is only possible once the data directory is
2156 : : * in a consistent state. There's no need to compute the limits while
2157 : : * still replaying WAL - no decisions about new multis are made even
2158 : : * though multixact creations might be replayed. So we'll only do further
2159 : : * checks after TrimMultiXact() has been called.
2160 : : */
3874 andres@anarazel.de 2161 [ + + ]: 2260 : if (!MultiXactState->finishedStartup)
2162 : 1136 : return;
2163 : :
2164 [ - + ]: 1124 : Assert(!InRecovery);
2165 : :
2166 : : /*
2167 : : * Offsets are 64-bits wide and never wrap around, so we don't need to
2168 : : * consider them for emergency autovacuum purposes. But now that we're in
2169 : : * a consistent state, determine MultiXactState->oldestOffset. It will be
2170 : : * used to adjust the freezing cutoff, to keep the offsets disk usage in
2171 : : * check.
2172 : : */
147 heikki.linnakangas@i 2173 :GNC 1124 : SetOldestOffset();
2174 : :
2175 : : /*
2176 : : * If past the autovacuum force point, immediately signal an autovac
2177 : : * request. The reason for this is that autovac only processes one
2178 : : * database per invocation. Once it's finished cleaning up the oldest
2179 : : * database, it'll call here, and we'll signal the postmaster to start
2180 : : * another iteration immediately if there are still any old databases.
2181 : : */
2182 [ - + - - ]: 1124 : if (MultiXactIdPrecedes(multiVacLimit, curMulti) && IsUnderPostmaster)
4850 alvherre@alvh.no-ip. 2183 :UBC 0 : SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
2184 : :
2185 : : /* Give an immediate warning if past the wrap warn point */
3874 andres@anarazel.de 2186 [ - + ]:CBC 1124 : if (MultiXactIdPrecedes(multiWarnLimit, curMulti))
2187 : : {
2188 : : char *oldest_datname;
2189 : :
2190 : : /*
2191 : : * We can be called when not inside a transaction, for example during
2192 : : * StartupXLOG(). In such a case we cannot do database access, so we
2193 : : * must just report the oldest DB's OID.
2194 : : *
2195 : : * Note: it's also possible that get_database_name fails and returns
2196 : : * NULL, for example because the database just got dropped. We'll
2197 : : * still warn, even though the warning might now be unnecessary.
2198 : : */
4850 alvherre@alvh.no-ip. 2199 [ # # ]:UBC 0 : if (IsTransactionState())
2200 : 0 : oldest_datname = get_database_name(oldest_datoid);
2201 : : else
2202 : 0 : oldest_datname = NULL;
2203 : :
2204 [ # # ]: 0 : if (oldest_datname)
2205 [ # # ]: 0 : ereport(WARNING,
2206 : : (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
2207 : : "database \"%s\" must be vacuumed before %u more MultiXactIds are used",
2208 : : multiWrapLimit - curMulti,
2209 : : oldest_datname,
2210 : : multiWrapLimit - curMulti),
2211 : : errdetail("Approximately %.2f%% of MultiXactIds are available for use.",
2212 : : (double) (multiWrapLimit - curMulti) / (MaxMultiXactId / 2) * 100),
2213 : : errhint("To avoid MultiXactId assignment failures, execute a database-wide VACUUM in that database.\n"
2214 : : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
2215 : : else
2216 [ # # ]: 0 : ereport(WARNING,
2217 : : (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
2218 : : "database with OID %u must be vacuumed before %u more MultiXactIds are used",
2219 : : multiWrapLimit - curMulti,
2220 : : oldest_datoid,
2221 : : multiWrapLimit - curMulti),
2222 : : errdetail("Approximately %.2f%% of MultiXactIds are available for use.",
2223 : : (double) (multiWrapLimit - curMulti) / (MaxMultiXactId / 2) * 100),
2224 : : errhint("To avoid MultiXactId assignment failures, execute a database-wide VACUUM in that database.\n"
2225 : : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
2226 : : }
2227 : : }
2228 : :
2229 : : /*
2230 : : * Ensure the next-to-be-assigned MultiXactId is at least minMulti,
2231 : : * and similarly nextOffset is at least minMultiOffset.
2232 : : *
2233 : : * This is used when we can determine minimum safe values from an XLog
2234 : : * record (either an on-line checkpoint or an mxact creation log entry).
2235 : : * Although this is only called during XLog replay, we take the lock in case
2236 : : * any hot-standby backends are examining the values.
2237 : : */
2238 : : void
7636 tgl@sss.pgh.pa.us 2239 :CBC 712 : MultiXactAdvanceNextMXact(MultiXactId minMulti,
2240 : : MultiXactOffset minMultiOffset)
2241 : : {
144 heikki.linnakangas@i 2242 [ - + ]:GNC 712 : Assert(MultiXactIdIsValid(minMulti));
2243 : :
5202 tgl@sss.pgh.pa.us 2244 :CBC 712 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
7677 2245 [ + + ]: 712 : if (MultiXactIdPrecedes(MultiXactState->nextMXact, minMulti))
2246 : : {
2247 : : debug_elog3(DEBUG2, "MultiXact: setting next multi to %u", minMulti);
2248 : 5 : MultiXactState->nextMXact = minMulti;
2249 : : }
147 heikki.linnakangas@i 2250 [ + + ]:GNC 712 : if (MultiXactState->nextOffset < minMultiOffset)
2251 : : {
2252 : : debug_elog3(DEBUG2, "MultiXact: setting next offset to %" PRIu64,
2253 : : minMultiOffset);
7636 tgl@sss.pgh.pa.us 2254 :CBC 5 : MultiXactState->nextOffset = minMultiOffset;
2255 : : }
5202 2256 : 712 : LWLockRelease(MultiXactGenLock);
7677 2257 : 712 : }
2258 : :
2259 : : /*
2260 : : * Update our oldestMultiXactId value, but only if it's more recent than what
2261 : : * we had.
2262 : : *
2263 : : * This may only be called during WAL replay.
2264 : : */
2265 : : void
4850 alvherre@alvh.no-ip. 2266 : 751 : MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
2267 : : {
3874 andres@anarazel.de 2268 [ - + ]: 751 : Assert(InRecovery);
2269 : :
4850 alvherre@alvh.no-ip. 2270 [ - + ]: 751 : if (MultiXactIdPrecedes(MultiXactState->oldestMultiXactId, oldestMulti))
147 heikki.linnakangas@i 2271 :UNC 0 : SetMultiXactIdLimit(oldestMulti, oldestMultiDB);
4330 alvherre@alvh.no-ip. 2272 :CBC 751 : }
2273 : :
2274 : : /*
2275 : : * Make sure that MultiXactOffset has room for a newly-allocated MultiXactId.
2276 : : *
2277 : : * NB: this is called while holding MultiXactGenLock. We want it to be very
2278 : : * fast most of the time; even when it's not so fast, no actual I/O need
2279 : : * happen unless we're forced to write out a dirty log or xlog page to make
2280 : : * room in shared memory.
2281 : : */
2282 : : static void
7677 tgl@sss.pgh.pa.us 2283 : 5304 : ExtendMultiXactOffset(MultiXactId multi)
2284 : : {
2285 : : int64 pageno;
2286 : : LWLock *lock;
2287 : :
2288 : : /*
2289 : : * No work except at first MultiXactId of a page. But beware: just after
2290 : : * wraparound, the first MultiXactId of page zero is FirstMultiXactId.
2291 : : */
2292 [ + + + + ]: 5304 : if (MultiXactIdToOffsetEntry(multi) != 0 &&
2293 : : multi != FirstMultiXactId)
2294 : 5299 : return;
2295 : :
7677 tgl@sss.pgh.pa.us 2296 :GBC 5 : pageno = MultiXactIdToOffsetPage(multi);
797 alvherre@alvh.no-ip. 2297 : 5 : lock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
2298 : :
2299 : 5 : LWLockAcquire(lock, LW_EXCLUSIVE);
2300 : :
2301 : : /* Zero the page and make a WAL entry about it */
302 alvherre@kurilemu.de 2302 :GNC 5 : SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
2303 : 5 : XLogSimpleInsertInt64(RM_MULTIXACT_ID, XLOG_MULTIXACT_ZERO_OFF_PAGE,
2304 : : pageno);
2305 : :
797 alvherre@alvh.no-ip. 2306 :GBC 5 : LWLockRelease(lock);
2307 : : }
2308 : :
2309 : : /*
2310 : : * Make sure that MultiXactMember has room for the members of a newly-
2311 : : * allocated MultiXactId.
2312 : : *
2313 : : * Like the above routine, this is called while holding MultiXactGenLock;
2314 : : * same comments apply.
2315 : : */
2316 : : static void
7636 tgl@sss.pgh.pa.us 2317 :CBC 5304 : ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
2318 : : {
2319 : : /*
2320 : : * It's possible that the members span more than one page of the members
2321 : : * file, so we loop to ensure we consider each page. The coding is not
2322 : : * optimal if the members span several pages, but that seems unusual
2323 : : * enough to not worry much about.
2324 : : */
2325 [ + + ]: 10662 : while (nmembers > 0)
2326 : : {
2327 : : int flagsoff;
2328 : : int flagsbit;
2329 : : uint32 difference;
2330 : :
2331 : : /*
2332 : : * Only zero when at first entry of a page.
2333 : : */
4850 alvherre@alvh.no-ip. 2334 : 5358 : flagsoff = MXOffsetToFlagsOffset(offset);
2335 : 5358 : flagsbit = MXOffsetToFlagsBitShift(offset);
2336 [ + + + + ]: 5358 : if (flagsoff == 0 && flagsbit == 0)
2337 : : {
2338 : : int64 pageno;
2339 : : LWLock *lock;
2340 : :
7636 tgl@sss.pgh.pa.us 2341 : 57 : pageno = MXOffsetToMemberPage(offset);
797 alvherre@alvh.no-ip. 2342 : 57 : lock = SimpleLruGetBankLock(MultiXactMemberCtl, pageno);
2343 : :
2344 : 57 : LWLockAcquire(lock, LW_EXCLUSIVE);
2345 : :
2346 : : /* Zero the page and make a WAL entry about it */
302 alvherre@kurilemu.de 2347 :GNC 57 : SimpleLruZeroPage(MultiXactMemberCtl, pageno);
2348 : 57 : XLogSimpleInsertInt64(RM_MULTIXACT_ID,
2349 : : XLOG_MULTIXACT_ZERO_MEM_PAGE, pageno);
2350 : :
797 alvherre@alvh.no-ip. 2351 :CBC 57 : LWLockRelease(lock);
2352 : : }
2353 : :
2354 : : /* Compute the number of items till end of current page. */
147 heikki.linnakangas@i 2355 :GNC 5358 : difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE;
2356 : :
2357 : : /*
2358 : : * Advance to next page. OK if nmembers goes negative.
2359 : : */
4348 alvherre@alvh.no-ip. 2360 :CBC 5358 : nmembers -= difference;
2361 : 5358 : offset += difference;
2362 : : }
7677 tgl@sss.pgh.pa.us 2363 : 5304 : }
2364 : :
2365 : : /*
2366 : : * GetOldestMultiXactId
2367 : : *
2368 : : * Return the oldest MultiXactId that's still possibly still seen as live by
2369 : : * any running transaction. Older ones might still exist on disk, but they no
2370 : : * longer have any running member transaction.
2371 : : *
2372 : : * It's not safe to truncate MultiXact SLRU segments on the value returned by
2373 : : * this function; however, it can be set as the new relminmxid for any table
2374 : : * that VACUUM knows has no remaining MXIDs < the same value. It is only safe
2375 : : * to truncate SLRUs when no table can possibly still have a referencing MXID.
2376 : : */
2377 : : MultiXactId
4850 alvherre@alvh.no-ip. 2378 : 61304 : GetOldestMultiXactId(void)
2379 : : {
2380 : : MultiXactId oldestMXact;
2381 : :
2382 : : /*
2383 : : * This is the oldest valid value among all the OldestMemberMXactId[] and
2384 : : * OldestVisibleMXactId[] entries, or nextMXact if none are valid.
2385 : : */
7677 tgl@sss.pgh.pa.us 2386 : 61304 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
144 heikki.linnakangas@i 2387 :GNC 61304 : oldestMXact = MultiXactState->nextMXact;
64 heikki.linnakangas@i 2388 [ + + ]:CBC 7416725 : for (int i = 0; i < NumMemberSlots; i++)
2389 : : {
2390 : : MultiXactId thisoldest;
2391 : :
7677 tgl@sss.pgh.pa.us 2392 : 7355421 : thisoldest = OldestMemberMXactId[i];
2393 [ + + + + ]: 7393570 : if (MultiXactIdIsValid(thisoldest) &&
2394 : 38149 : MultiXactIdPrecedes(thisoldest, oldestMXact))
2395 : 33 : oldestMXact = thisoldest;
2396 : : }
64 heikki.linnakangas@i 2397 [ + + ]: 7307509 : for (int i = 0; i < NumVisibleSlots; i++)
2398 : : {
2399 : : MultiXactId thisoldest;
2400 : :
7677 tgl@sss.pgh.pa.us 2401 : 7246205 : thisoldest = OldestVisibleMXactId[i];
2402 [ + + + + ]: 7246215 : if (MultiXactIdIsValid(thisoldest) &&
2403 : 10 : MultiXactIdPrecedes(thisoldest, oldestMXact))
2404 : 2 : oldestMXact = thisoldest;
2405 : : }
2406 : :
2407 : 61304 : LWLockRelease(MultiXactGenLock);
2408 : :
4850 alvherre@alvh.no-ip. 2409 : 61304 : return oldestMXact;
2410 : : }
2411 : :
2412 : : /*
2413 : : * Calculate the oldest member offset and install it in MultiXactState, where
2414 : : * it can be used to adjust multixid freezing cutoffs.
2415 : : */
2416 : : static void
147 heikki.linnakangas@i 2417 :GNC 1124 : SetOldestOffset(void)
2418 : : {
2419 : : MultiXactId oldestMultiXactId;
2420 : : MultiXactId nextMXact;
3874 andres@anarazel.de 2421 :CBC 1124 : MultiXactOffset oldestOffset = 0; /* placate compiler */
2422 : : MultiXactOffset nextOffset;
3987 rhaas@postgresql.org 2423 : 1124 : bool oldestOffsetKnown = false;
2424 : :
2425 : : /*
2426 : : * NB: Have to prevent concurrent truncation, we might otherwise try to
2427 : : * lookup an oldestMulti that's concurrently getting truncated away.
2428 : : */
3874 andres@anarazel.de 2429 : 1124 : LWLockAcquire(MultiXactTruncationLock, LW_SHARED);
2430 : :
2431 : : /* Read relevant fields from shared memory. */
3987 rhaas@postgresql.org 2432 : 1124 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
2433 : 1124 : oldestMultiXactId = MultiXactState->oldestMultiXactId;
2434 : 1124 : nextMXact = MultiXactState->nextMXact;
2435 : 1124 : nextOffset = MultiXactState->nextOffset;
3874 andres@anarazel.de 2436 [ - + ]: 1124 : Assert(MultiXactState->finishedStartup);
4025 alvherre@alvh.no-ip. 2437 : 1124 : LWLockRelease(MultiXactGenLock);
2438 : :
2439 : : /*
2440 : : * Determine the offset of the oldest multixact. Normally, we can read
2441 : : * the offset from the multixact itself, but there's an important special
2442 : : * case: if there are no multixacts in existence at all, oldestMXact
2443 : : * obviously can't point to one. It will instead point to the multixact
2444 : : * ID that will be assigned the next time one is needed.
2445 : : */
3987 rhaas@postgresql.org 2446 [ + + ]: 1124 : if (oldestMultiXactId == nextMXact)
2447 : : {
2448 : : /*
2449 : : * When the next multixact gets created, it will be stored at the next
2450 : : * offset.
2451 : : */
2452 : 1104 : oldestOffset = nextOffset;
2453 : 1104 : oldestOffsetKnown = true;
2454 : : }
2455 : : else
2456 : : {
2457 : : /*
2458 : : * Look up the offset at which the oldest existing multixact's members
2459 : : * are stored. If we cannot find it, be careful not to fail, and
2460 : : * leave oldestOffset unchanged. oldestOffset is initialized to zero
2461 : : * at system startup, which prevents truncating members until a proper
2462 : : * value is calculated.
2463 : : *
2464 : : * (We had bugs in early releases of PostgreSQL 9.3.X and 9.4.X where
2465 : : * the supposedly-earliest multixact might not really exist. Those
2466 : : * should be long gone by now, so this should not fail, but let's
2467 : : * still be defensive.)
2468 : : */
2469 : : oldestOffsetKnown =
2470 : 20 : find_multixact_start(oldestMultiXactId, &oldestOffset);
2471 : :
2472 [ + - ]: 20 : if (oldestOffsetKnown)
2473 [ - + ]: 20 : ereport(DEBUG1,
2474 : : (errmsg_internal("oldest MultiXactId member is at offset %" PRIu64,
2475 : : oldestOffset)));
2476 : : else
3874 andres@anarazel.de 2477 [ # # ]:UBC 0 : ereport(LOG,
2478 : : (errmsg("MultiXact member truncation is disabled because oldest checkpointed MultiXact %u does not exist on disk",
2479 : : oldestMultiXactId)));
2480 : : }
2481 : :
3874 andres@anarazel.de 2482 :CBC 1124 : LWLockRelease(MultiXactTruncationLock);
2483 : :
2484 : : /* Install the computed value */
2485 [ + - ]: 1124 : if (oldestOffsetKnown)
2486 : : {
147 heikki.linnakangas@i 2487 :GNC 1124 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
2488 : 1124 : MultiXactState->oldestOffset = oldestOffset;
2489 : 1124 : LWLockRelease(MultiXactGenLock);
2490 : : }
4025 alvherre@alvh.no-ip. 2491 :GIC 1124 : }
2492 : :
2493 : : /*
2494 : : * Find the starting offset of the given MultiXactId.
2495 : : *
2496 : : * Returns false if the file containing the multi does not exist on disk.
2497 : : * Otherwise, returns true and sets *result to the starting member offset.
2498 : : *
2499 : : * This function does not prevent concurrent truncation, so if that's
2500 : : * required, the caller has to protect against that.
2501 : : */
2502 : : static bool
3987 rhaas@postgresql.org 2503 :CBC 20 : find_multixact_start(MultiXactId multi, MultiXactOffset *result)
2504 : : {
2505 : : MultiXactOffset offset;
2506 : : int64 pageno;
2507 : : int entryno;
2508 : : int slotno;
2509 : : MultiXactOffset *offptr;
2510 : :
3874 andres@anarazel.de 2511 [ - + ]: 20 : Assert(MultiXactState->finishedStartup);
2512 : :
4025 alvherre@alvh.no-ip. 2513 : 20 : pageno = MultiXactIdToOffsetPage(multi);
2514 : 20 : entryno = MultiXactIdToOffsetEntry(multi);
2515 : :
2516 : : /*
2517 : : * Write out dirty data, so PhysicalPageExists can work correctly.
2518 : : */
2048 tmunro@postgresql.or 2519 : 20 : SimpleLruWriteAll(MultiXactOffsetCtl, true);
2520 : 20 : SimpleLruWriteAll(MultiXactMemberCtl, true);
2521 : :
3987 rhaas@postgresql.org 2522 [ - + ]: 20 : if (!SimpleLruDoesPhysicalPageExist(MultiXactOffsetCtl, pageno))
3987 rhaas@postgresql.org 2523 :UBC 0 : return false;
2524 : :
2525 : : /* lock is acquired by SimpleLruReadPage_ReadOnly */
53 heikki.linnakangas@i 2526 :GNC 20 : slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno, &multi);
4025 alvherre@alvh.no-ip. 2527 :CBC 20 : offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
2528 : 20 : offptr += entryno;
2529 : 20 : offset = *offptr;
797 2530 : 20 : LWLockRelease(SimpleLruGetBankLock(MultiXactOffsetCtl, pageno));
2531 : :
3987 rhaas@postgresql.org 2532 : 20 : *result = offset;
2533 : 20 : return true;
2534 : : }
2535 : :
2536 : : /*
2537 : : * GetMultiXactInfo
2538 : : *
2539 : : * Returns information about the current MultiXact state, as of:
2540 : : * multixacts: Number of MultiXacts (nextMultiXactId - oldestMultiXactId)
2541 : : * nextOffset: Next-to-be-assigned offset
2542 : : * oldestMultiXactId: Oldest MultiXact ID still in use
2543 : : * oldestOffset: Oldest offset still in use
2544 : : */
2545 : : void
126 michael@paquier.xyz 2546 :GNC 16840 : GetMultiXactInfo(uint32 *multixacts, MultiXactOffset *nextOffset,
2547 : : MultiXactId *oldestMultiXactId, MultiXactOffset *oldestOffset)
2548 : : {
2549 : : MultiXactId nextMultiXactId;
2550 : :
4015 rhaas@postgresql.org 2551 :CBC 16840 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
126 michael@paquier.xyz 2552 :GNC 16840 : *nextOffset = MultiXactState->nextOffset;
259 2553 : 16840 : *oldestMultiXactId = MultiXactState->oldestMultiXactId;
4015 rhaas@postgresql.org 2554 :CBC 16840 : nextMultiXactId = MultiXactState->nextMXact;
259 michael@paquier.xyz 2555 :GNC 16840 : *oldestOffset = MultiXactState->oldestOffset;
4015 rhaas@postgresql.org 2556 :CBC 16840 : LWLockRelease(MultiXactGenLock);
2557 : :
259 michael@paquier.xyz 2558 :GNC 16840 : *multixacts = nextMultiXactId - *oldestMultiXactId;
4015 rhaas@postgresql.org 2559 :GIC 16840 : }
2560 : :
2561 : : /*
2562 : : * Multixact members can be removed once the multixacts that refer to them
2563 : : * are older than every datminmxid. autovacuum_multixact_freeze_max_age and
2564 : : * vacuum_multixact_freeze_table_age work together to make sure we never have
2565 : : * too many multixacts; we hope that, at least under normal circumstances,
2566 : : * this will also be sufficient to keep us from using too many offsets.
2567 : : * However, if the average multixact has many members, we might accumulate a
2568 : : * large amount of members, consuming disk space, while still using few enough
2569 : : * multixids that the multixid limits fail to trigger relminmxid advancement
2570 : : * by VACUUM.
2571 : : *
2572 : : * To prevent that, if the members space usage exceeds a threshold
2573 : : * (MULTIXACT_MEMBER_LOW_THRESHOLD), we effectively reduce
2574 : : * autovacuum_multixact_freeze_max_age to a value just less than the number of
2575 : : * multixacts in use. We hope that this will quickly trigger autovacuuming on
2576 : : * the table or tables with the oldest relminmxid, thus allowing datminmxid
2577 : : * values to advance and removing some members.
2578 : : *
2579 : : * As the amount of the member space in use grows, we become more aggressive
2580 : : * in clamping this value. That not only causes autovacuum to ramp up, but
2581 : : * also makes any manual vacuums the user issues more aggressive. This
2582 : : * happens because vacuum_get_cutoffs() will clamp the freeze table and the
2583 : : * minimum freeze age cutoffs based on the effective
2584 : : * autovacuum_multixact_freeze_max_age this function returns. At the extreme,
2585 : : * when the members usage reaches MULTIXACT_MEMBER_HIGH_THRESHOLD, we clamp
2586 : : * freeze_max_age to zero, and every vacuum of any table will freeze every
2587 : : * multixact.
2588 : : */
2589 : : int
4015 rhaas@postgresql.org 2590 :CBC 16825 : MultiXactMemberFreezeThreshold(void)
2591 : : {
2592 : : uint32 multixacts;
2593 : : uint32 victim_multixacts;
2594 : : double fraction;
2595 : : int result;
2596 : : MultiXactId oldestMultiXactId;
2597 : : MultiXactOffset oldestOffset;
2598 : : MultiXactOffset nextOffset;
2599 : : uint64 members;
2600 : :
2601 : : /* Read the current offsets and multixact usage. */
126 michael@paquier.xyz 2602 :GNC 16825 : GetMultiXactInfo(&multixacts, &nextOffset, &oldestMultiXactId, &oldestOffset);
2603 : 16825 : members = nextOffset - oldestOffset;
2604 : :
2605 : : /* If member space utilization is low, no special action is required. */
147 heikki.linnakangas@i 2606 [ + - ]: 16825 : if (members <= MULTIXACT_MEMBER_LOW_THRESHOLD)
4015 rhaas@postgresql.org 2607 :CBC 16825 : return autovacuum_multixact_freeze_max_age;
2608 : :
2609 : : /*
2610 : : * Compute a target for relminmxid advancement. The number of multixacts
2611 : : * we try to eliminate from the system is based on how far we are past
2612 : : * MULTIXACT_MEMBER_LOW_THRESHOLD.
2613 : : *
2614 : : * The way this formula works is that when members is exactly at the low
2615 : : * threshold, fraction = 0.0, and we set freeze_max_age equal to
2616 : : * mxid_age(oldestMultiXactId). As members grows further, towards the
2617 : : * high threshold, fraction grows linearly from 0.0 to 1.0, and the result
2618 : : * shrinks from mxid_age(oldestMultiXactId) to 0. Beyond the high
2619 : : * threshold, fraction > 1.0 and the result is clamped to 0.
2620 : : */
147 heikki.linnakangas@i 2621 :UNC 0 : fraction = (double) (members - MULTIXACT_MEMBER_LOW_THRESHOLD) /
2622 : : (MULTIXACT_MEMBER_HIGH_THRESHOLD - MULTIXACT_MEMBER_LOW_THRESHOLD);
2623 : :
2624 : : /* fraction could be > 1.0, but lowest possible freeze age is zero */
2625 [ # # ]: 0 : if (fraction >= 1.0)
4015 rhaas@postgresql.org 2626 :UBC 0 : return 0;
2627 : :
147 heikki.linnakangas@i 2628 :UNC 0 : victim_multixacts = multixacts * fraction;
691 heikki.linnakangas@i 2629 :UBC 0 : result = multixacts - victim_multixacts;
2630 : :
2631 : : /*
2632 : : * Clamp to autovacuum_multixact_freeze_max_age, so that we never make
2633 : : * autovacuum less aggressive than it would otherwise be.
2634 : : */
2635 : 0 : return Min(result, autovacuum_multixact_freeze_max_age);
2636 : : }
2637 : :
2638 : :
2639 : : /*
2640 : : * Delete members segments older than newOldestOffset
2641 : : */
2642 : : static void
110 heikki.linnakangas@i 2643 :UNC 0 : PerformMembersTruncation(MultiXactOffset newOldestOffset)
2644 : : {
147 2645 : 0 : SimpleLruTruncate(MultiXactMemberCtl,
2646 : : MXOffsetToMemberPage(newOldestOffset));
3874 andres@anarazel.de 2647 :UBC 0 : }
2648 : :
2649 : : /*
2650 : : * Delete offsets segments older than newOldestMulti
2651 : : */
2652 : : static void
110 heikki.linnakangas@i 2653 :UNC 0 : PerformOffsetsTruncation(MultiXactId newOldestMulti)
2654 : : {
2655 : : /*
2656 : : * We step back one multixact to avoid passing a cutoff page that hasn't
2657 : : * been created yet in the rare case that oldestMulti would be the first
2658 : : * item on a page and oldestMulti == nextMulti. In that case, if we
2659 : : * didn't subtract one, we'd trigger SimpleLruTruncate's wraparound
2660 : : * detection.
2661 : : */
3874 andres@anarazel.de 2662 :UBC 0 : SimpleLruTruncate(MultiXactOffsetCtl,
2663 : : MultiXactIdToOffsetPage(PreviousMultiXactId(newOldestMulti)));
2664 : 0 : }
2665 : :
2666 : : /*
2667 : : * Remove all MultiXactOffset and MultiXactMember segments before the oldest
2668 : : * ones still of interest.
2669 : : *
2670 : : * This is only called on a primary as part of vacuum (via
2671 : : * vac_truncate_clog()). During recovery truncation is done by replaying
2672 : : * truncation WAL records logged here.
2673 : : *
2674 : : * newOldestMulti is the oldest currently required multixact, newOldestMultiDB
2675 : : * is one of the databases preventing newOldestMulti from increasing.
2676 : : */
2677 : : void
3874 andres@anarazel.de 2678 :CBC 114 : TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
2679 : : {
2680 : : MultiXactId oldestMulti;
2681 : : MultiXactId nextMulti;
2682 : : MultiXactOffset newOldestOffset;
2683 : : MultiXactOffset nextOffset;
2684 : :
2685 [ - + ]: 114 : Assert(!RecoveryInProgress());
2686 [ - + ]: 114 : Assert(MultiXactState->finishedStartup);
144 heikki.linnakangas@i 2687 [ - + ]:GNC 114 : Assert(MultiXactIdIsValid(newOldestMulti));
2688 : :
2689 : : /*
2690 : : * We can only allow one truncation to happen at once. Otherwise parts of
2691 : : * members might vanish while we're doing lookups or similar. There's no
2692 : : * need to have an interlock with creating new multis or such, since those
2693 : : * are constrained by the limits (which only grow, never shrink).
2694 : : */
3874 andres@anarazel.de 2695 :CBC 114 : LWLockAcquire(MultiXactTruncationLock, LW_EXCLUSIVE);
2696 : :
4330 alvherre@alvh.no-ip. 2697 : 114 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
3874 andres@anarazel.de 2698 : 114 : nextMulti = MultiXactState->nextMXact;
3987 rhaas@postgresql.org 2699 : 114 : nextOffset = MultiXactState->nextOffset;
3874 andres@anarazel.de 2700 : 114 : oldestMulti = MultiXactState->oldestMultiXactId;
4330 alvherre@alvh.no-ip. 2701 : 114 : LWLockRelease(MultiXactGenLock);
2702 : :
2703 : : /*
2704 : : * Make sure to only attempt truncation if there's values to truncate
2705 : : * away. In normal processing values shouldn't go backwards, but there's
2706 : : * some corner cases (due to bugs) where that's possible.
2707 : : */
3874 andres@anarazel.de 2708 [ + - ]: 114 : if (MultiXactIdPrecedesOrEquals(newOldestMulti, oldestMulti))
2709 : : {
2710 : 114 : LWLockRelease(MultiXactTruncationLock);
2711 : 114 : return;
2712 : : }
2713 : :
2714 : : /*
2715 : : * Compute up to where to truncate MultiXactMember. Lookup the
2716 : : * corresponding member offset for newOldestMulti for that.
2717 : : */
3874 andres@anarazel.de 2718 [ # # ]:UBC 0 : if (newOldestMulti == nextMulti)
2719 : : {
2720 : : /* there are NO MultiXacts */
2721 : 0 : newOldestOffset = nextOffset;
2722 : : }
2723 [ # # ]: 0 : else if (!find_multixact_start(newOldestMulti, &newOldestOffset))
2724 : : {
2725 [ # # ]: 0 : ereport(LOG,
2726 : : (errmsg("cannot truncate up to MultiXact %u because it does not exist on disk, skipping truncation",
2727 : : newOldestMulti)));
2728 : 0 : LWLockRelease(MultiXactTruncationLock);
2729 : 0 : return;
2730 : : }
2731 : :
2732 : : /*
2733 : : * On crash, MultiXactIdCreateFromMembers() can leave behind multixids
2734 : : * that were not yet written out and hence have zero offset on disk. If
2735 : : * such a multixid becomes oldestMulti, we won't be able to look up its
2736 : : * offset. That should be rare, so we don't try to do anything smart about
2737 : : * it. Just skip the truncation, and hope that by the next truncation
2738 : : * attempt, oldestMulti has advanced to a valid multixid.
2739 : : */
110 heikki.linnakangas@i 2740 [ # # ]: 0 : if (newOldestOffset == 0)
2741 : : {
2742 [ # # ]: 0 : ereport(LOG,
2743 : : (errmsg("cannot truncate up to MultiXact %u because it has invalid offset, skipping truncation",
2744 : : newOldestMulti)));
2745 : 0 : LWLockRelease(MultiXactTruncationLock);
2746 : 0 : return;
2747 : : }
2748 : :
3874 andres@anarazel.de 2749 [ # # ]: 0 : elog(DEBUG1, "performing multixact truncation: "
2750 : : "oldestMulti %u (offsets segment %" PRIx64 "), "
2751 : : "oldestOffset %" PRIu64 " (members segment %" PRIx64 ")",
2752 : : newOldestMulti,
2753 : : MultiXactIdToOffsetSegment(newOldestMulti),
2754 : : newOldestOffset,
2755 : : MXOffsetToMemberSegment(newOldestOffset));
2756 : :
2757 : : /*
2758 : : * Do truncation, and the WAL logging of the truncation, in a critical
2759 : : * section. That way offsets/members cannot get out of sync anymore, i.e.
2760 : : * once consistent the newOldestMulti will always exist in members, even
2761 : : * if we crashed in the wrong moment.
2762 : : */
2763 : 0 : START_CRIT_SECTION();
2764 : :
2765 : : /*
2766 : : * Prevent checkpoints from being scheduled concurrently. This is critical
2767 : : * because otherwise a truncation record might not be replayed after a
2768 : : * crash/basebackup, even though the state of the data directory would
2769 : : * require it.
2770 : : */
1488 rhaas@postgresql.org 2771 [ # # ]: 0 : Assert((MyProc->delayChkptFlags & DELAY_CHKPT_START) == 0);
2772 : 0 : MyProc->delayChkptFlags |= DELAY_CHKPT_START;
2773 : :
2774 : : /* WAL log truncation */
110 heikki.linnakangas@i 2775 :UNC 0 : WriteMTruncateXlogRec(newOldestMultiDB, newOldestMulti, newOldestOffset);
2776 : :
2777 : : /*
2778 : : * Update in-memory limits before performing the truncation, while inside
2779 : : * the critical section: Have to do it before truncation, to prevent
2780 : : * concurrent lookups of those values. Has to be inside the critical
2781 : : * section as otherwise a future call to this function would error out,
2782 : : * while looking up the oldest member in offsets, if our caller crashes
2783 : : * before updating the limits.
2784 : : */
3874 andres@anarazel.de 2785 :UBC 0 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
2786 : 0 : MultiXactState->oldestMultiXactId = newOldestMulti;
2787 : 0 : MultiXactState->oldestMultiXactDB = newOldestMultiDB;
147 heikki.linnakangas@i 2788 :UNC 0 : MultiXactState->oldestOffset = newOldestOffset;
3874 andres@anarazel.de 2789 :UBC 0 : LWLockRelease(MultiXactGenLock);
2790 : :
2791 : : /* First truncate members */
110 heikki.linnakangas@i 2792 :UNC 0 : PerformMembersTruncation(newOldestOffset);
2793 : :
2794 : : /* Then offsets */
2795 : 0 : PerformOffsetsTruncation(newOldestMulti);
2796 : :
1488 rhaas@postgresql.org 2797 :UBC 0 : MyProc->delayChkptFlags &= ~DELAY_CHKPT_START;
2798 : :
3874 andres@anarazel.de 2799 [ # # ]: 0 : END_CRIT_SECTION();
2800 : 0 : LWLockRelease(MultiXactTruncationLock);
2801 : : }
2802 : :
2803 : : /*
2804 : : * Decide whether a MultiXactOffset page number is "older" for truncation
2805 : : * purposes. Analogous to CLOGPagePrecedes().
2806 : : *
2807 : : * Offsetting the values is optional, because MultiXactIdPrecedes() has
2808 : : * translational symmetry.
2809 : : */
2810 : : static bool
888 akorotkov@postgresql 2811 :CBC 48399 : MultiXactOffsetPagePrecedes(int64 page1, int64 page2)
2812 : : {
2813 : : MultiXactId multi1;
2814 : : MultiXactId multi2;
2815 : :
7677 tgl@sss.pgh.pa.us 2816 : 48399 : multi1 = ((MultiXactId) page1) * MULTIXACT_OFFSETS_PER_PAGE;
1935 noah@leadboat.com 2817 : 48399 : multi1 += FirstMultiXactId + 1;
7677 tgl@sss.pgh.pa.us 2818 : 48399 : multi2 = ((MultiXactId) page2) * MULTIXACT_OFFSETS_PER_PAGE;
1935 noah@leadboat.com 2819 : 48399 : multi2 += FirstMultiXactId + 1;
2820 : :
2821 [ + + + + ]: 80665 : return (MultiXactIdPrecedes(multi1, multi2) &&
2822 : 32266 : MultiXactIdPrecedes(multi1,
2823 : : multi2 + MULTIXACT_OFFSETS_PER_PAGE - 1));
2824 : : }
2825 : :
2826 : : /*
2827 : : * Decide whether a MultiXactMember page number is "older" for truncation
2828 : : * purposes. There is no "invalid offset number" and members never wrap
2829 : : * around, so use the numbers verbatim.
2830 : : */
2831 : : static bool
888 akorotkov@postgresql 2832 :UBC 0 : MultiXactMemberPagePrecedes(int64 page1, int64 page2)
2833 : : {
147 heikki.linnakangas@i 2834 :UNC 0 : return page1 < page2;
2835 : : }
2836 : :
2837 : : static int
53 2838 : 0 : MultiXactOffsetIoErrorDetail(const void *opaque_data)
2839 : : {
2840 : 0 : MultiXactId multixid = *(const MultiXactId *) opaque_data;
2841 : :
2842 : 0 : return errdetail("Could not access offset of multixact %u.", multixid);
2843 : : }
2844 : :
2845 : : static int
2846 : 0 : MultiXactMemberIoErrorDetail(const void *opaque_data)
2847 : : {
2848 : 0 : const MultiXactMemberSlruReadContext *context = opaque_data;
2849 : :
2850 [ # # ]: 0 : if (MultiXactIdIsValid(context->multi))
2851 : 0 : return errdetail("Could not access member of multixact %u at offset %" PRIu64 ".",
2852 : 0 : context->multi, context->offset);
2853 : : else
2854 : 0 : return errdetail("Could not access multixact member at offset %" PRIu64 ".",
2855 : 0 : context->offset);
2856 : : }
2857 : :
2858 : : /*
2859 : : * Decide which of two MultiXactIds is earlier.
2860 : : *
2861 : : * XXX do we need to do something special for InvalidMultiXactId?
2862 : : * (Doesn't look like it.)
2863 : : */
2864 : : bool
7677 tgl@sss.pgh.pa.us 2865 :CBC 1085212 : MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
2866 : : {
7507 bruce@momjian.us 2867 : 1085212 : int32 diff = (int32) (multi1 - multi2);
2868 : :
7677 tgl@sss.pgh.pa.us 2869 : 1085212 : return (diff < 0);
2870 : : }
2871 : :
2872 : : /*
2873 : : * MultiXactIdPrecedesOrEquals -- is multi1 logically <= multi2?
2874 : : *
2875 : : * XXX do we need to do something special for InvalidMultiXactId?
2876 : : * (Doesn't look like it.)
2877 : : */
2878 : : bool
4541 alvherre@alvh.no-ip. 2879 : 7234 : MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
2880 : : {
2881 : 7234 : int32 diff = (int32) (multi1 - multi2);
2882 : :
2883 : 7234 : return (diff <= 0);
2884 : : }
2885 : :
2886 : :
2887 : : /*
2888 : : * Write a TRUNCATE xlog record
2889 : : *
2890 : : * We must flush the xlog record to disk before returning --- see notes in
2891 : : * TruncateCLOG().
2892 : : */
2893 : : static void
3874 andres@anarazel.de 2894 :UBC 0 : WriteMTruncateXlogRec(Oid oldestMultiDB,
2895 : : MultiXactId oldestMulti,
2896 : : MultiXactOffset oldestOffset)
2897 : : {
2898 : : XLogRecPtr recptr;
2899 : : xl_multixact_truncate xlrec;
2900 : :
2901 : 0 : xlrec.oldestMultiDB = oldestMultiDB;
110 heikki.linnakangas@i 2902 :UNC 0 : xlrec.oldestMulti = oldestMulti;
2903 : 0 : xlrec.oldestOffset = oldestOffset;
2904 : :
3874 andres@anarazel.de 2905 :UBC 0 : XLogBeginInsert();
448 peter@eisentraut.org 2906 : 0 : XLogRegisterData(&xlrec, SizeOfMultiXactTruncate);
3874 andres@anarazel.de 2907 : 0 : recptr = XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_TRUNCATE_ID);
2908 : 0 : XLogFlush(recptr);
2909 : 0 : }
2910 : :
2911 : : /*
2912 : : * MULTIXACT resource manager's routines
2913 : : */
2914 : : void
4184 heikki.linnakangas@i 2915 :CBC 5 : multixact_redo(XLogReaderState *record)
2916 : : {
2917 : 5 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
2918 : :
2919 : : /* Backup blocks are not used in multixact records */
2920 [ - + ]: 5 : Assert(!XLogRecHasAnyBlockRefs(record));
2921 : :
7636 tgl@sss.pgh.pa.us 2922 [ - + ]: 5 : if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
2923 : : {
2924 : : int64 pageno;
2925 : :
888 akorotkov@postgresql 2926 :UBC 0 : memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
302 alvherre@kurilemu.de 2927 :UNC 0 : SimpleLruZeroAndWritePage(MultiXactOffsetCtl, pageno);
2928 : : }
7636 tgl@sss.pgh.pa.us 2929 [ - + ]:CBC 5 : else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
2930 : : {
2931 : : int64 pageno;
2932 : :
888 akorotkov@postgresql 2933 :LBC (2) : memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
302 alvherre@kurilemu.de 2934 :UNC 0 : SimpleLruZeroAndWritePage(MultiXactMemberCtl, pageno);
2935 : : }
7636 tgl@sss.pgh.pa.us 2936 [ + - ]:CBC 5 : else if (info == XLOG_MULTIXACT_CREATE_ID)
2937 : : {
4850 alvherre@alvh.no-ip. 2938 : 5 : xl_multixact_create *xlrec =
1082 tgl@sss.pgh.pa.us 2939 : 5 : (xl_multixact_create *) XLogRecGetData(record);
2940 : : TransactionId max_xid;
2941 : : int i;
2942 : :
2943 : : /* Store the data back into the SLRU files */
4850 alvherre@alvh.no-ip. 2944 : 5 : RecordNewMultiXact(xlrec->mid, xlrec->moff, xlrec->nmembers,
2945 : 5 : xlrec->members);
2946 : :
2947 : : /* Make sure nextMXact/nextOffset are beyond what this record has */
144 heikki.linnakangas@i 2948 :GNC 5 : MultiXactAdvanceNextMXact(NextMultiXactId(xlrec->mid),
4850 alvherre@alvh.no-ip. 2949 :CBC 5 : xlrec->moff + xlrec->nmembers);
2950 : :
2951 : : /*
2952 : : * Make sure nextXid is beyond any XID mentioned in the record. This
2953 : : * should be unnecessary, since any XID found here ought to have other
2954 : : * evidence in the XLOG, but let's be safe.
2955 : : */
4184 heikki.linnakangas@i 2956 : 5 : max_xid = XLogRecGetXid(record);
4850 alvherre@alvh.no-ip. 2957 [ + + ]: 15 : for (i = 0; i < xlrec->nmembers; i++)
2958 : : {
2959 [ - + ]: 10 : if (TransactionIdPrecedes(max_xid, xlrec->members[i].xid))
4850 alvherre@alvh.no-ip. 2960 :UBC 0 : max_xid = xlrec->members[i].xid;
2961 : : }
2962 : :
2595 tmunro@postgresql.or 2963 :CBC 5 : AdvanceNextFullTransactionIdPastXid(max_xid);
2964 : : }
3874 andres@anarazel.de 2965 [ # # ]:UBC 0 : else if (info == XLOG_MULTIXACT_TRUNCATE_ID)
2966 : : {
2967 : : xl_multixact_truncate xlrec;
2968 : :
2969 : 0 : memcpy(&xlrec, XLogRecGetData(record),
2970 : : SizeOfMultiXactTruncate);
2971 : :
2972 [ # # ]: 0 : elog(DEBUG1, "replaying multixact truncation: "
2973 : : "oldestMulti %u (offsets segment %" PRIx64 "), "
2974 : : "oldestOffset %" PRIu64 " (members segment %" PRIx64 ")",
2975 : : xlrec.oldestMulti,
2976 : : MultiXactIdToOffsetSegment(xlrec.oldestMulti),
2977 : : xlrec.oldestOffset,
2978 : : MXOffsetToMemberSegment(xlrec.oldestOffset));
2979 : :
2980 : : /* should not be required, but more than cheap enough */
2981 : 0 : LWLockAcquire(MultiXactTruncationLock, LW_EXCLUSIVE);
2982 : :
2983 : : /*
2984 : : * Advance the horizon values, so they're current at the end of
2985 : : * recovery.
2986 : : */
110 heikki.linnakangas@i 2987 :UNC 0 : SetMultiXactIdLimit(xlrec.oldestMulti, xlrec.oldestMultiDB);
2988 : :
2989 : 0 : PerformMembersTruncation(xlrec.oldestOffset);
2990 : 0 : PerformOffsetsTruncation(xlrec.oldestMulti);
2991 : :
3874 andres@anarazel.de 2992 :UBC 0 : LWLockRelease(MultiXactTruncationLock);
2993 : : }
2994 : : else
7636 tgl@sss.pgh.pa.us 2995 [ # # ]: 0 : elog(PANIC, "multixact_redo: unknown op code %u", info);
7636 tgl@sss.pgh.pa.us 2996 :CBC 5 : }
2997 : :
2998 : : /*
2999 : : * Entrypoint for sync.c to sync offsets files.
3000 : : */
3001 : : int
2048 tmunro@postgresql.or 3002 :UBC 0 : multixactoffsetssyncfiletag(const FileTag *ftag, char *path)
3003 : : {
3004 : 0 : return SlruSyncFileTag(MultiXactOffsetCtl, ftag, path);
3005 : : }
3006 : :
3007 : : /*
3008 : : * Entrypoint for sync.c to sync members files.
3009 : : */
3010 : : int
3011 : 0 : multixactmemberssyncfiletag(const FileTag *ftag, char *path)
3012 : : {
3013 : 0 : return SlruSyncFileTag(MultiXactMemberCtl, ftag, path);
3014 : : }
|