Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * multixact.c
4 : : * PostgreSQL multi-transaction-log manager
5 : : *
6 : : * The pg_multixact manager is a pg_xact-like manager that stores an array of
7 : : * MultiXactMember for each MultiXactId. It is a fundamental part of the
8 : : * shared-row-lock implementation. Each MultiXactMember is comprised of a
9 : : * TransactionId and a set of flag bits. The name is a bit historical:
10 : : * originally, a MultiXactId consisted of more than one TransactionId (except
11 : : * in rare corner cases), hence "multi". Nowadays, however, it's perfectly
12 : : * legitimate to have MultiXactIds that only include a single Xid.
13 : : *
14 : : * The meaning of the flag bits is opaque to this module, but they are mostly
15 : : * used in heapam.c to identify lock modes that each of the member transactions
16 : : * is holding on any given tuple. This module just contains support to store
17 : : * and retrieve the arrays.
18 : : *
19 : : * We use two SLRU areas, one for storing the offsets at which the data
20 : : * starts for each MultiXactId in the other one. This trick allows us to
21 : : * store variable length arrays of TransactionIds. (We could alternatively
22 : : * use one area containing counts and TransactionIds, with valid MultiXactId
23 : : * values pointing at slots containing counts; but that way seems less robust
24 : : * since it would get completely confused if someone inquired about a bogus
25 : : * MultiXactId that pointed to an intermediate slot containing an XID.)
26 : : *
27 : : * XLOG interactions: this module generates a record whenever a new OFFSETs or
28 : : * MEMBERs page is initialized to zeroes, as well as an
29 : : * XLOG_MULTIXACT_CREATE_ID record whenever a new MultiXactId is defined.
30 : : * This module ignores the WAL rule "write xlog before data," because it
31 : : * suffices that actions recording a MultiXactId in a heap xmax do follow that
32 : : * rule. The only way for the MXID to be referenced from any data page is for
33 : : * heap_lock_tuple() or heap_update() to have put it there, and each generates
34 : : * an XLOG record that must follow ours. The normal LSN interlock between the
35 : : * data page and that XLOG record will ensure that our XLOG record reaches
36 : : * disk first. If the SLRU members/offsets data reaches disk sooner than the
37 : : * XLOG records, we do not care; after recovery, no xmax will refer to it. On
38 : : * the flip side, to ensure that all referenced entries _do_ reach disk, this
39 : : * module's XLOG records completely rebuild the data entered since the last
40 : : * checkpoint. We flush and sync all dirty OFFSETs and MEMBERs pages to disk
41 : : * before each checkpoint is considered complete.
42 : : *
43 : : * Like clog.c, and unlike subtrans.c, we have to preserve state across
44 : : * crashes and ensure that MXID and offset numbering increases monotonically
45 : : * across a crash. We do this in the same way as it's done for transaction
46 : : * IDs: the WAL record is guaranteed to contain evidence of every MXID we
47 : : * could need to worry about, and we just make sure that at the end of
48 : : * replay, the next-MXID and next-offset counters are at least as large as
49 : : * anything we saw during replay.
50 : : *
51 : : * We are able to remove segments no longer necessary by carefully tracking
52 : : * each table's used values: during vacuum, any multixact older than a certain
53 : : * value is removed; the cutoff value is stored in pg_class. The minimum value
54 : : * across all tables in each database is stored in pg_database, and the global
55 : : * minimum across all databases is part of pg_control and is kept in shared
56 : : * memory. Whenever that minimum is advanced, the SLRUs are truncated.
57 : : *
58 : : * When new multixactid values are to be created, care is taken that the
59 : : * counter does not fall within the wraparound horizon considering the global
60 : : * minimum value.
61 : : *
62 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
63 : : * Portions Copyright (c) 1994, Regents of the University of California
64 : : *
65 : : * src/backend/access/transam/multixact.c
66 : : *
67 : : *-------------------------------------------------------------------------
68 : : */
69 : : #include "postgres.h"
70 : :
71 : : #include "access/multixact.h"
72 : : #include "access/multixact_internal.h"
73 : : #include "access/slru.h"
74 : : #include "access/twophase.h"
75 : : #include "access/twophase_rmgr.h"
76 : : #include "access/xlog.h"
77 : : #include "access/xloginsert.h"
78 : : #include "access/xlogutils.h"
79 : : #include "miscadmin.h"
80 : : #include "pg_trace.h"
81 : : #include "pgstat.h"
82 : : #include "postmaster/autovacuum.h"
83 : : #include "storage/pmsignal.h"
84 : : #include "storage/proc.h"
85 : : #include "storage/procarray.h"
86 : : #include "utils/guc_hooks.h"
87 : : #include "utils/injection_point.h"
88 : : #include "utils/lsyscache.h"
89 : : #include "utils/memutils.h"
90 : :
91 : :
92 : : /*
93 : : * Thresholds used to keep members disk usage in check when multixids have a
94 : : * lot of members. When MULTIXACT_MEMBER_LOW_THRESHOLD is reached, vacuum
95 : : * starts freezing multixids more aggressively, even if the normal multixid
96 : : * age limits haven't been reached yet.
97 : : */
98 : : #define MULTIXACT_MEMBER_LOW_THRESHOLD UINT64CONST(2000000000)
99 : : #define MULTIXACT_MEMBER_HIGH_THRESHOLD UINT64CONST(4000000000)
100 : :
101 : : static inline MultiXactId
93 heikki.linnakangas@i 102 :GNC 107661 : NextMultiXactId(MultiXactId multi)
103 : : {
104 [ + + ]: 107661 : return multi == MaxMultiXactId ? FirstMultiXactId : multi + 1;
105 : : }
106 : :
107 : : static inline MultiXactId
637 heikki.linnakangas@i 108 :UBC 0 : PreviousMultiXactId(MultiXactId multi)
109 : : {
110 [ # # ]: 0 : return multi == FirstMultiXactId ? MaxMultiXactId : multi - 1;
111 : : }
112 : :
113 : : /*
114 : : * Links to shared-memory data structures for MultiXact control
115 : : */
116 : : static SlruCtlData MultiXactOffsetCtlData;
117 : : static SlruCtlData MultiXactMemberCtlData;
118 : :
119 : : #define MultiXactOffsetCtl (&MultiXactOffsetCtlData)
120 : : #define MultiXactMemberCtl (&MultiXactMemberCtlData)
121 : :
122 : : /*
123 : : * MultiXact state shared across all backends. All this state is protected
124 : : * by MultiXactGenLock. (We also use SLRU bank's lock of MultiXactOffset and
125 : : * MultiXactMember to guard accesses to the two sets of SLRU buffers. For
126 : : * concurrency's sake, we avoid holding more than one of these locks at a
127 : : * time.)
128 : : */
129 : : typedef struct MultiXactStateData
130 : : {
131 : : /* next-to-be-assigned MultiXactId */
132 : : MultiXactId nextMXact;
133 : :
134 : : /* next-to-be-assigned offset */
135 : : MultiXactOffset nextOffset;
136 : :
137 : : /* Have we completed multixact startup? */
138 : : bool finishedStartup;
139 : :
140 : : /*
141 : : * Oldest multixact that is still potentially referenced by a relation.
142 : : * Anything older than this should not be consulted. These values are
143 : : * updated by vacuum.
144 : : */
145 : : MultiXactId oldestMultiXactId;
146 : : Oid oldestMultiXactDB;
147 : :
148 : : /*
149 : : * Oldest multixact offset that is potentially referenced by a multixact
150 : : * referenced by a relation.
151 : : */
152 : : MultiXactOffset oldestOffset;
153 : :
154 : : /* support for anti-wraparound measures */
155 : : MultiXactId multiVacLimit;
156 : : MultiXactId multiWarnLimit;
157 : : MultiXactId multiStopLimit;
158 : : MultiXactId multiWrapLimit;
159 : :
160 : : /*
161 : : * Per-backend data starts here. We have two arrays stored in the area
162 : : * immediately following the MultiXactStateData struct:
163 : : *
164 : : * OldestMemberMXactId[k] is the oldest MultiXactId each backend's current
165 : : * transaction(s) could possibly be a member of, or InvalidMultiXactId
166 : : * when the backend has no live transaction that could possibly be a
167 : : * member of a MultiXact. Each backend sets its entry to the current
168 : : * nextMXact counter just before first acquiring a shared lock in a given
169 : : * transaction, and clears it at transaction end. (This works because only
170 : : * during or after acquiring a shared lock could an XID possibly become a
171 : : * member of a MultiXact, and that MultiXact would have to be created
172 : : * during or after the lock acquisition.)
173 : : *
174 : : * In the OldestMemberMXactId array, there's a slot for all normal
175 : : * backends (0..MaxBackends-1) followed by a slot for max_prepared_xacts
176 : : * prepared transactions.
177 : : *
178 : : * OldestVisibleMXactId[k] is the oldest MultiXactId each backend's
179 : : * current transaction(s) think is potentially live, or InvalidMultiXactId
180 : : * when not in a transaction or not in a transaction that's paid any
181 : : * attention to MultiXacts yet. This is computed when first needed in a
182 : : * given transaction, and cleared at transaction end. We can compute it
183 : : * as the minimum of the valid OldestMemberMXactId[] entries at the time
184 : : * we compute it (using nextMXact if none are valid). Each backend is
185 : : * required not to attempt to access any SLRU data for MultiXactIds older
186 : : * than its own OldestVisibleMXactId[] setting; this is necessary because
187 : : * the relevant SLRU data can be concurrently truncated away.
188 : : *
189 : : * In the OldestVisibleMXactId array, there's a slot for all normal
190 : : * backends (0..MaxBackends-1) only. No slots for prepared transactions.
191 : : *
192 : : * The oldest valid value among all of the OldestMemberMXactId[] and
193 : : * OldestVisibleMXactId[] entries is considered by vacuum as the earliest
194 : : * possible value still having any live member transaction -- OldestMxact.
195 : : * Any value older than that is typically removed from tuple headers, or
196 : : * "frozen" via being replaced with a new xmax. VACUUM can sometimes even
197 : : * remove an individual MultiXact xmax whose value is >= its OldestMxact
198 : : * cutoff, though typically only when no individual member XID is still
199 : : * running. See FreezeMultiXactId for full details.
200 : : *
201 : : * Whenever VACUUM advances relminmxid, then either its OldestMxact cutoff
202 : : * or the oldest extant Multi remaining in the table is used as the new
203 : : * pg_class.relminmxid value (whichever is earlier). The minimum of all
204 : : * relminmxid values in each database is stored in pg_database.datminmxid.
205 : : * In turn, the minimum of all of those values is stored in pg_control.
206 : : * This is used as the truncation point for pg_multixact when unneeded
207 : : * segments get removed by vac_truncate_clog() during vacuuming.
208 : : */
209 : : MultiXactId perBackendXactIds[FLEXIBLE_ARRAY_MEMBER];
210 : : } MultiXactStateData;
211 : :
212 : : /*
213 : : * Sizes of OldestMemberMXactId and OldestVisibleMXactId arrays.
214 : : */
215 : : #define NumMemberSlots (MaxBackends + max_prepared_xacts)
216 : : #define NumVisibleSlots MaxBackends
217 : :
218 : : /* Pointers to the state data in shared memory */
219 : : static MultiXactStateData *MultiXactState;
220 : : static MultiXactId *OldestMemberMXactId;
221 : : static MultiXactId *OldestVisibleMXactId;
222 : :
223 : :
224 : : static inline MultiXactId *
13 heikki.linnakangas@i 225 :CBC 2442402 : MyOldestMemberMXactIdSlot(void)
226 : : {
227 : : /*
228 : : * The first MaxBackends entries in the OldestMemberMXactId array are
229 : : * reserved for regular backends. MyProcNumber should index into one of
230 : : * them.
231 : : */
232 [ + - - + ]: 2442402 : Assert(MyProcNumber >= 0 && MyProcNumber < MaxBackends);
233 : 2442402 : return &OldestMemberMXactId[MyProcNumber];
234 : : }
235 : :
236 : : static inline MultiXactId *
237 : 147 : PreparedXactOldestMemberMXactIdSlot(ProcNumber procno)
238 : : {
239 : : int prepared_xact_idx;
240 : :
241 [ - + ]: 147 : Assert(procno >= FIRST_PREPARED_XACT_PROC_NUMBER);
242 : 147 : prepared_xact_idx = procno - FIRST_PREPARED_XACT_PROC_NUMBER;
243 : :
244 : : /*
245 : : * The first MaxBackends entries in the OldestMemberMXactId array are
246 : : * reserved for regular backends. Prepared xacts come after them.
247 : : */
248 [ - + ]: 147 : Assert(MaxBackends + prepared_xact_idx < NumMemberSlots);
249 : 147 : return &OldestMemberMXactId[MaxBackends + prepared_xact_idx];
250 : : }
251 : :
252 : : static inline MultiXactId *
253 : 436562 : MyOldestVisibleMXactIdSlot(void)
254 : : {
255 [ + - - + ]: 436562 : Assert(MyProcNumber >= 0 && MyProcNumber < NumVisibleSlots);
256 : 436562 : return &OldestVisibleMXactId[MyProcNumber];
257 : : }
258 : :
259 : : /*
260 : : * Definitions for the backend-local MultiXactId cache.
261 : : *
262 : : * We use this cache to store known MultiXacts, so we don't need to go to
263 : : * SLRU areas every time.
264 : : *
265 : : * The cache lasts for the duration of a single transaction, the rationale
266 : : * for this being that most entries will contain our own TransactionId and
267 : : * so they will be uninteresting by the time our next transaction starts.
268 : : * (XXX not clear that this is correct --- other members of the MultiXact
269 : : * could hang around longer than we did. However, it's not clear what a
270 : : * better policy for flushing old cache entries would be.) FIXME actually
271 : : * this is plain wrong now that multixact's may contain update Xids.
272 : : *
273 : : * We allocate the cache entries in a memory context that is deleted at
274 : : * transaction end, so we don't need to do retail freeing of entries.
275 : : */
276 : : typedef struct mXactCacheEnt
277 : : {
278 : : MultiXactId multi;
279 : : int nmembers;
280 : : dlist_node node;
281 : : MultiXactMember members[FLEXIBLE_ARRAY_MEMBER];
282 : : } mXactCacheEnt;
283 : :
284 : : #define MAX_CACHE_ENTRIES 256
285 : : static dclist_head MXactCache = DCLIST_STATIC_INIT(MXactCache);
286 : : static MemoryContext MXactContext = NULL;
287 : :
288 : : #ifdef MULTIXACT_DEBUG
289 : : #define debug_elog2(a,b) elog(a,b)
290 : : #define debug_elog3(a,b,c) elog(a,b,c)
291 : : #define debug_elog4(a,b,c,d) elog(a,b,c,d)
292 : : #define debug_elog5(a,b,c,d,e) elog(a,b,c,d,e)
293 : : #define debug_elog6(a,b,c,d,e,f) elog(a,b,c,d,e,f)
294 : : #else
295 : : #define debug_elog2(a,b)
296 : : #define debug_elog3(a,b,c)
297 : : #define debug_elog4(a,b,c,d)
298 : : #define debug_elog5(a,b,c,d,e)
299 : : #define debug_elog6(a,b,c,d,e,f)
300 : : #endif
301 : :
302 : : /* internal MultiXactId management */
303 : : static void MultiXactIdSetOldestVisible(void);
304 : : static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
305 : : int nmembers, MultiXactMember *members);
306 : : static MultiXactId GetNewMultiXactId(int nmembers, MultiXactOffset *offset);
307 : :
308 : : /* MultiXact cache management */
309 : : static int mxactMemberComparator(const void *arg1, const void *arg2);
310 : : static MultiXactId mXactCacheGetBySet(int nmembers, MultiXactMember *members);
311 : : static int mXactCacheGetById(MultiXactId multi, MultiXactMember **members);
312 : : static void mXactCachePut(MultiXactId multi, int nmembers,
313 : : MultiXactMember *members);
314 : :
315 : : /* management of SLRU infrastructure */
316 : :
317 : : /* opaque_data type for MultiXactMemberIoErrorDetail */
318 : : typedef struct MultiXactMemberSlruReadContext
319 : : {
320 : : MultiXactId multi;
321 : : MultiXactOffset offset;
322 : : } MultiXactMemberSlruReadContext;
323 : :
324 : : static bool MultiXactOffsetPagePrecedes(int64 page1, int64 page2);
325 : : static bool MultiXactMemberPagePrecedes(int64 page1, int64 page2);
326 : : static int MultiXactOffsetIoErrorDetail(const void *opaque_data);
327 : : static int MultiXactMemberIoErrorDetail(const void *opaque_data);
328 : : static void ExtendMultiXactOffset(MultiXactId multi);
329 : : static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
330 : : static void SetOldestOffset(void);
331 : : static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result);
332 : : static void WriteMTruncateXlogRec(Oid oldestMultiDB,
333 : : MultiXactId endTruncOff,
334 : : MultiXactOffset endTruncMemb);
335 : :
336 : :
337 : : /*
338 : : * MultiXactIdCreate
339 : : * Construct a MultiXactId representing two TransactionIds.
340 : : *
341 : : * The two XIDs must be different, or be requesting different statuses.
342 : : *
343 : : * NB - we don't worry about our local MultiXactId cache here, because that
344 : : * is handled by the lower-level routines.
345 : : */
346 : : MultiXactId
4799 alvherre@alvh.no-ip. 347 : 1111 : MultiXactIdCreate(TransactionId xid1, MultiXactStatus status1,
348 : : TransactionId xid2, MultiXactStatus status2)
349 : : {
350 : : MultiXactId newMulti;
351 : : MultiXactMember members[2];
352 : :
1234 peter@eisentraut.org 353 [ - + ]: 1111 : Assert(TransactionIdIsValid(xid1));
354 [ - + ]: 1111 : Assert(TransactionIdIsValid(xid2));
355 : :
4799 alvherre@alvh.no-ip. 356 [ + + - + ]: 1111 : Assert(!TransactionIdEquals(xid1, xid2) || (status1 != status2));
357 : :
358 : : /* MultiXactIdSetOldestMember() must have been called already. */
13 heikki.linnakangas@i 359 [ - + ]: 1111 : Assert(MultiXactIdIsValid(*MyOldestMemberMXactIdSlot()));
360 : :
361 : : /*
362 : : * Note: unlike MultiXactIdExpand, we don't bother to check that both XIDs
363 : : * are still running. In typical usage, xid2 will be our own XID and the
364 : : * caller just did a check on xid1, so it'd be wasted effort.
365 : : */
366 : :
4799 alvherre@alvh.no-ip. 367 : 1111 : members[0].xid = xid1;
368 : 1111 : members[0].status = status1;
369 : 1111 : members[1].xid = xid2;
370 : 1111 : members[1].status = status2;
371 : :
4472 372 : 1111 : newMulti = MultiXactIdCreateFromMembers(2, members);
373 : :
374 : : debug_elog3(DEBUG2, "Create: %s",
375 : : mxid_to_string(newMulti, 2, members));
376 : :
7621 tgl@sss.pgh.pa.us 377 : 1111 : return newMulti;
378 : : }
379 : :
380 : : /*
381 : : * MultiXactIdExpand
382 : : * Add a TransactionId to a pre-existing MultiXactId.
383 : : *
384 : : * If the TransactionId is already a member of the passed MultiXactId with the
385 : : * same status, just return it as-is.
386 : : *
387 : : * Note that we do NOT actually modify the membership of a pre-existing
388 : : * MultiXactId; instead we create a new one. This is necessary to avoid
389 : : * a race condition against code trying to wait for one MultiXactId to finish;
390 : : * see notes in heapam.c.
391 : : *
392 : : * NB - we don't worry about our local MultiXactId cache here, because that
393 : : * is handled by the lower-level routines.
394 : : *
395 : : * Note: It is critical that MultiXactIds that come from an old cluster (i.e.
396 : : * one upgraded by pg_upgrade from a cluster older than this feature) are not
397 : : * passed in.
398 : : */
399 : : MultiXactId
4799 alvherre@alvh.no-ip. 400 : 75542 : MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status)
401 : : {
402 : : MultiXactId newMulti;
403 : : MultiXactMember *members;
404 : : MultiXactMember *newMembers;
405 : : int nmembers;
406 : : int i;
407 : : int j;
408 : :
1234 peter@eisentraut.org 409 [ - + ]: 75542 : Assert(MultiXactIdIsValid(multi));
410 [ - + ]: 75542 : Assert(TransactionIdIsValid(xid));
411 : :
412 : : /* MultiXactIdSetOldestMember() must have been called already. */
13 heikki.linnakangas@i 413 [ - + ]: 75542 : Assert(MultiXactIdIsValid(*MyOldestMemberMXactIdSlot()));
414 : :
415 : : debug_elog5(DEBUG2, "Expand: received multi %u, xid %u status %s",
416 : : multi, xid, mxstatus_to_string(status));
417 : :
418 : : /*
419 : : * Note: we don't allow for old multis here. The reason is that the only
420 : : * caller of this function does a check that the multixact is no longer
421 : : * running.
422 : : */
4247 alvherre@alvh.no-ip. 423 : 75542 : nmembers = GetMultiXactIdMembers(multi, &members, false, false);
424 : :
7626 tgl@sss.pgh.pa.us 425 [ - + ]: 75542 : if (nmembers < 0)
426 : : {
427 : : MultiXactMember member;
428 : :
429 : : /*
430 : : * The MultiXactId is obsolete. This can only happen if all the
431 : : * MultiXactId members stop running between the caller checking and
432 : : * passing it to us. It would be better to return that fact to the
433 : : * caller, but it would complicate the API and it's unlikely to happen
434 : : * too often, so just deal with it by creating a singleton MultiXact.
435 : : */
4799 alvherre@alvh.no-ip. 436 :UBC 0 : member.xid = xid;
437 : 0 : member.status = status;
4472 438 : 0 : newMulti = MultiXactIdCreateFromMembers(1, &member);
439 : :
440 : : debug_elog4(DEBUG2, "Expand: %u has no members, create singleton %u",
441 : : multi, newMulti);
7626 tgl@sss.pgh.pa.us 442 : 0 : return newMulti;
443 : : }
444 : :
445 : : /*
446 : : * If the TransactionId is already a member of the MultiXactId with the
447 : : * same status, just return the existing MultiXactId.
448 : : */
7626 tgl@sss.pgh.pa.us 449 [ + + ]:CBC 1465954 : for (i = 0; i < nmembers; i++)
450 : : {
4799 alvherre@alvh.no-ip. 451 [ + + ]: 1390412 : if (TransactionIdEquals(members[i].xid, xid) &&
452 [ - + ]: 54 : (members[i].status == status))
453 : : {
454 : : debug_elog4(DEBUG2, "Expand: %u is already a member of %u",
455 : : xid, multi);
7617 tgl@sss.pgh.pa.us 456 :UBC 0 : pfree(members);
7626 457 : 0 : return multi;
458 : : }
459 : : }
460 : :
461 : : /*
462 : : * Determine which of the members of the MultiXactId are still of
463 : : * interest. This is any running transaction, and also any transaction
464 : : * that grabbed something stronger than just a lock and was committed. (An
465 : : * update that aborted is of no interest here; and having more than one
466 : : * update Xid in a multixact would cause errors elsewhere.)
467 : : *
468 : : * Removing dead members is not just an optimization: freezing of tuples
469 : : * whose Xmax are multis depends on this behavior.
470 : : *
471 : : * Note we have the same race condition here as above: j could be 0 at the
472 : : * end of the loop.
473 : : */
95 michael@paquier.xyz 474 :GNC 75542 : newMembers = palloc_array(MultiXactMember, nmembers + 1);
475 : :
7626 tgl@sss.pgh.pa.us 476 [ + + ]:CBC 1465954 : for (i = 0, j = 0; i < nmembers; i++)
477 : : {
4799 alvherre@alvh.no-ip. 478 [ + + ]: 1390412 : if (TransactionIdIsInProgress(members[i].xid) ||
4343 479 [ + + - + ]: 74689 : (ISUPDATE_from_mxstatus(members[i].status) &&
4799 480 : 17 : TransactionIdDidCommit(members[i].xid)))
481 : : {
482 : 1315740 : newMembers[j].xid = members[i].xid;
483 : 1315740 : newMembers[j++].status = members[i].status;
484 : : }
485 : : }
486 : :
487 : 75542 : newMembers[j].xid = xid;
488 : 75542 : newMembers[j++].status = status;
4472 489 : 75542 : newMulti = MultiXactIdCreateFromMembers(j, newMembers);
490 : :
7626 tgl@sss.pgh.pa.us 491 : 75542 : pfree(members);
492 : 75542 : pfree(newMembers);
493 : :
494 : : debug_elog3(DEBUG2, "Expand: returning new multi %u", newMulti);
495 : :
496 : 75542 : return newMulti;
497 : : }
498 : :
499 : : /*
500 : : * MultiXactIdIsRunning
501 : : * Returns whether a MultiXactId is "running".
502 : : *
503 : : * We return true if at least one member of the given MultiXactId is still
504 : : * running. Note that a "false" result is certain not to change,
505 : : * because it is not legal to add members to an existing MultiXactId.
506 : : *
507 : : * Caller is expected to have verified that the multixact does not come from
508 : : * a pg_upgraded share-locked tuple.
509 : : */
510 : : bool
4247 alvherre@alvh.no-ip. 511 : 149844 : MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly)
512 : : {
513 : : MultiXactMember *members;
514 : : int nmembers;
515 : : int i;
516 : :
517 : : debug_elog3(DEBUG2, "IsRunning %u?", multi);
518 : :
519 : : /*
520 : : * "false" here means we assume our callers have checked that the given
521 : : * multi cannot possibly come from a pg_upgraded database.
522 : : */
523 : 149844 : nmembers = GetMultiXactIdMembers(multi, &members, false, isLockOnly);
524 : :
3992 525 [ + + ]: 149844 : if (nmembers <= 0)
526 : : {
527 : : debug_elog2(DEBUG2, "IsRunning: no members");
7626 tgl@sss.pgh.pa.us 528 : 735 : return false;
529 : : }
530 : :
531 : : /*
532 : : * Checking for myself is cheap compared to looking in shared memory;
533 : : * return true if any live subtransaction of the current top-level
534 : : * transaction is a member.
535 : : *
536 : : * This is not needed for correctness, it's just a fast path.
537 : : */
538 [ + + ]: 2891504 : for (i = 0; i < nmembers; i++)
539 : : {
4799 alvherre@alvh.no-ip. 540 [ + + ]: 2742551 : if (TransactionIdIsCurrentTransactionId(members[i].xid))
541 : : {
542 : : debug_elog3(DEBUG2, "IsRunning: I (%d) am running!", i);
7617 tgl@sss.pgh.pa.us 543 : 156 : pfree(members);
7626 544 : 156 : return true;
545 : : }
546 : : }
547 : :
548 : : /*
549 : : * This could be made faster by having another entry point in procarray.c,
550 : : * walking the PGPROC array only once for all the members. But in most
551 : : * cases nmembers should be small enough that it doesn't much matter.
552 : : */
553 [ + + ]: 296139 : for (i = 0; i < nmembers; i++)
554 : : {
4799 alvherre@alvh.no-ip. 555 [ + + ]: 296104 : if (TransactionIdIsInProgress(members[i].xid))
556 : : {
557 : : debug_elog4(DEBUG2, "IsRunning: member %d (%u) is running",
558 : : i, members[i].xid);
7617 tgl@sss.pgh.pa.us 559 : 148918 : pfree(members);
7626 560 : 148918 : return true;
561 : : }
562 : : }
563 : :
564 : 35 : pfree(members);
565 : :
566 : : debug_elog3(DEBUG2, "IsRunning: %u is not running", multi);
567 : :
568 : 35 : return false;
569 : : }
570 : :
571 : : /*
572 : : * MultiXactIdSetOldestMember
573 : : * Save the oldest MultiXactId this transaction could be a member of.
574 : : *
575 : : * We set the OldestMemberMXactId for a given transaction the first time it's
576 : : * going to do some operation that might require a MultiXactId (tuple lock,
577 : : * update or delete). We need to do this even if we end up using a
578 : : * TransactionId instead of a MultiXactId, because there is a chance that
579 : : * another transaction would add our XID to a MultiXactId.
580 : : *
581 : : * The value to set is the next-to-be-assigned MultiXactId, so this is meant to
582 : : * be called just before doing any such possibly-MultiXactId-able operation.
583 : : */
584 : : void
585 : 1957327 : MultiXactIdSetOldestMember(void)
586 : : {
13 heikki.linnakangas@i 587 [ + + ]: 1957327 : if (!MultiXactIdIsValid(*MyOldestMemberMXactIdSlot()))
588 : : {
589 : : MultiXactId nextMXact;
590 : :
591 : : /*
592 : : * You might think we don't need to acquire a lock here, since
593 : : * fetching and storing of TransactionIds is probably atomic, but in
594 : : * fact we do: suppose we pick up nextMXact and then lose the CPU for
595 : : * a long time. Someone else could advance nextMXact, and then
596 : : * another someone else could compute an OldestVisibleMXactId that
597 : : * would be after the value we are going to store when we get control
598 : : * back. Which would be wrong.
599 : : *
600 : : * Note that a shared lock is sufficient, because it's enough to stop
601 : : * someone from advancing nextMXact; and nobody else could be trying
602 : : * to write to our OldestMember entry, only reading (and we assume
603 : : * storing it is atomic.)
604 : : */
4455 alvherre@alvh.no-ip. 605 : 70863 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
606 : :
7626 tgl@sss.pgh.pa.us 607 : 70863 : nextMXact = MultiXactState->nextMXact;
608 : :
13 heikki.linnakangas@i 609 : 70863 : *MyOldestMemberMXactIdSlot() = nextMXact;
610 : :
7626 tgl@sss.pgh.pa.us 611 : 70863 : LWLockRelease(MultiXactGenLock);
612 : :
613 : : debug_elog4(DEBUG2, "MultiXact: setting OldestMember[%d] = %u",
614 : : MyProcNumber, nextMXact);
615 : : }
616 : 1957327 : }
617 : :
618 : : /*
619 : : * MultiXactIdSetOldestVisible
620 : : * Save the oldest MultiXactId this transaction considers possibly live.
621 : : *
622 : : * We set the OldestVisibleMXactId for a given transaction the first time
623 : : * it's going to inspect any MultiXactId. Once we have set this, we are
624 : : * guaranteed that SLRU data for MultiXactIds >= our own OldestVisibleMXactId
625 : : * won't be truncated away.
626 : : *
627 : : * The value to set is the oldest of nextMXact and all the valid per-backend
628 : : * OldestMemberMXactId[] entries. Because of the locking we do, we can be
629 : : * certain that no subsequent call to MultiXactIdSetOldestMember can set
630 : : * an OldestMemberMXactId[] entry older than what we compute here. Therefore
631 : : * there is no live transaction, now or later, that can be a member of any
632 : : * MultiXactId older than the OldestVisibleMXactId we compute here.
633 : : */
634 : : static void
635 : 92469 : MultiXactIdSetOldestVisible(void)
636 : : {
13 heikki.linnakangas@i 637 [ + + ]: 92469 : if (!MultiXactIdIsValid(*MyOldestVisibleMXactIdSlot()))
638 : : {
639 : : MultiXactId oldestMXact;
640 : : int i;
641 : :
7626 tgl@sss.pgh.pa.us 642 : 3207 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
643 : :
644 : 3207 : oldestMXact = MultiXactState->nextMXact;
13 heikki.linnakangas@i 645 [ + + ]: 409153 : for (i = 0; i < NumMemberSlots; i++)
646 : : {
7626 tgl@sss.pgh.pa.us 647 : 405946 : MultiXactId thisoldest = OldestMemberMXactId[i];
648 : :
649 [ + + + + ]: 461875 : if (MultiXactIdIsValid(thisoldest) &&
650 : 55929 : MultiXactIdPrecedes(thisoldest, oldestMXact))
651 : 5676 : oldestMXact = thisoldest;
652 : : }
653 : :
13 heikki.linnakangas@i 654 : 3207 : *MyOldestVisibleMXactIdSlot() = oldestMXact;
655 : :
7626 tgl@sss.pgh.pa.us 656 : 3207 : LWLockRelease(MultiXactGenLock);
657 : :
658 : : debug_elog4(DEBUG2, "MultiXact: setting OldestVisible[%d] = %u",
659 : : MyProcNumber, oldestMXact);
660 : : }
661 : 92469 : }
662 : :
663 : : /*
664 : : * ReadNextMultiXactId
665 : : * Return the next MultiXactId to be assigned, but don't allocate it
666 : : */
667 : : MultiXactId
4799 alvherre@alvh.no-ip. 668 : 31564 : ReadNextMultiXactId(void)
669 : : {
670 : : MultiXactId mxid;
671 : :
672 : : /* XXX we could presumably do this without a lock. */
673 : 31564 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
674 : 31564 : mxid = MultiXactState->nextMXact;
675 : 31564 : LWLockRelease(MultiXactGenLock);
676 : :
677 : 31564 : return mxid;
678 : : }
679 : :
680 : : /*
681 : : * ReadMultiXactIdRange
682 : : * Get the range of IDs that may still be referenced by a relation.
683 : : */
684 : : void
1970 rhaas@postgresql.org 685 : 1477 : ReadMultiXactIdRange(MultiXactId *oldest, MultiXactId *next)
686 : : {
687 : 1477 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
688 : 1477 : *oldest = MultiXactState->oldestMultiXactId;
689 : 1477 : *next = MultiXactState->nextMXact;
690 : 1477 : LWLockRelease(MultiXactGenLock);
691 : 1477 : }
692 : :
693 : :
694 : : /*
695 : : * MultiXactIdCreateFromMembers
696 : : * Make a new MultiXactId from the specified set of members
697 : : *
698 : : * Make XLOG, SLRU and cache entries for a new MultiXactId, recording the
699 : : * given TransactionIds as members. Returns the newly created MultiXactId.
700 : : *
701 : : * NB: the passed members[] array will be sorted in-place.
702 : : */
703 : : MultiXactId
4472 alvherre@alvh.no-ip. 704 : 76654 : MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
705 : : {
706 : : MultiXactId multi;
707 : : MultiXactOffset offset;
708 : : xl_multixact_create xlrec;
709 : :
710 : : debug_elog3(DEBUG2, "Create: %s",
711 : : mxid_to_string(InvalidMultiXactId, nmembers, members));
712 : :
713 : : /*
714 : : * See if the same set of members already exists in our cache; if so, just
715 : : * re-use that MultiXactId. (Note: it might seem that looking in our
716 : : * cache is insufficient, and we ought to search disk to see if a
717 : : * duplicate definition already exists. But since we only ever create
718 : : * MultiXacts containing our own XID, in most cases any such MultiXacts
719 : : * were in fact created by us, and so will be in our cache. There are
720 : : * corner cases where someone else added us to a MultiXact without our
721 : : * knowledge, but it's not worth checking for.)
722 : : */
4799 723 : 76654 : multi = mXactCacheGetBySet(nmembers, members);
7626 tgl@sss.pgh.pa.us 724 [ + + ]: 76654 : if (MultiXactIdIsValid(multi))
725 : : {
726 : : debug_elog2(DEBUG2, "Create: in cache!");
727 : 71348 : return multi;
728 : : }
729 : :
730 : : /* Verify that there is a single update Xid among the given members. */
731 : : {
732 : : int i;
4343 alvherre@alvh.no-ip. 733 : 5306 : bool has_update = false;
734 : :
735 [ + + ]: 100042 : for (i = 0; i < nmembers; i++)
736 : : {
737 [ + + ]: 94736 : if (ISUPDATE_from_mxstatus(members[i].status))
738 : : {
739 [ - + ]: 2373 : if (has_update)
1207 alvherre@alvh.no-ip. 740 [ # # ]:UBC 0 : elog(ERROR, "new multixact has more than one updating member: %s",
741 : : mxid_to_string(InvalidMultiXactId, nmembers, members));
4343 alvherre@alvh.no-ip. 742 :CBC 2373 : has_update = true;
743 : : }
744 : : }
745 : : }
746 : :
747 : : /* Load the injection point before entering the critical section */
569 michael@paquier.xyz 748 : 5306 : INJECTION_POINT_LOAD("multixact-create-from-members");
749 : :
750 : : /*
751 : : * Assign the MXID and offsets range to use, and make sure there is space
752 : : * in the OFFSETs and MEMBERs files. NB: this routine does
753 : : * START_CRIT_SECTION().
754 : : *
755 : : * Note: unlike MultiXactIdCreate and MultiXactIdExpand, we do not check
756 : : * that we've called MultiXactIdSetOldestMember here. This is because
757 : : * this routine is used in some places to create new MultiXactIds of which
758 : : * the current backend is not a member, notably during freezing of multis
759 : : * in vacuum. During vacuum, in particular, it would be unacceptable to
760 : : * keep OldestMulti set, in case it runs for long.
761 : : */
4799 alvherre@alvh.no-ip. 762 : 5306 : multi = GetNewMultiXactId(nmembers, &offset);
763 : :
309 michael@paquier.xyz 764 : 5306 : INJECTION_POINT_CACHED("multixact-create-from-members", NULL);
765 : :
766 : : /* Make an XLOG entry describing the new MXID. */
7585 tgl@sss.pgh.pa.us 767 : 5306 : xlrec.mid = multi;
768 : 5306 : xlrec.moff = offset;
4799 alvherre@alvh.no-ip. 769 : 5306 : xlrec.nmembers = nmembers;
770 : :
771 : : /*
772 : : * XXX Note: there's a lot of padding space in MultiXactMember. We could
773 : : * find a more compact representation of this Xlog record -- perhaps all
774 : : * the status flags in one XLogRecData, then all the xids in another one?
775 : : * Not clear that it's worth the trouble though.
776 : : */
4133 heikki.linnakangas@i 777 : 5306 : XLogBeginInsert();
397 peter@eisentraut.org 778 : 5306 : XLogRegisterData(&xlrec, SizeOfMultiXactCreate);
779 : 5306 : XLogRegisterData(members, nmembers * sizeof(MultiXactMember));
780 : :
4133 heikki.linnakangas@i 781 : 5306 : (void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID);
782 : :
783 : : /* Now enter the information into the OFFSETs and MEMBERs logs */
4799 alvherre@alvh.no-ip. 784 : 5306 : RecordNewMultiXact(multi, offset, nmembers, members);
785 : :
786 : : /* Done with critical section */
7443 tgl@sss.pgh.pa.us 787 [ - + ]: 5306 : END_CRIT_SECTION();
788 : :
789 : : /* Store the new MultiXactId in the local cache, too */
4799 alvherre@alvh.no-ip. 790 : 5306 : mXactCachePut(multi, nmembers, members);
791 : :
792 : : debug_elog2(DEBUG2, "Create: all done");
793 : :
7585 tgl@sss.pgh.pa.us 794 : 5306 : return multi;
795 : : }
796 : :
797 : : /*
798 : : * RecordNewMultiXact
799 : : * Write info about a new multixact into the offsets and members files
800 : : *
801 : : * This is broken out of MultiXactIdCreateFromMembers so that xlog replay can
802 : : * use it.
803 : : */
804 : : static void
805 : 5311 : RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
806 : : int nmembers, MultiXactMember *members)
807 : : {
808 : : int64 pageno;
809 : : int64 prev_pageno;
810 : : int entryno;
811 : : int slotno;
812 : : MultiXactOffset *offptr;
813 : : MultiXactId next;
814 : : int64 next_pageno;
815 : : int next_entryno;
816 : : MultiXactOffset *next_offptr;
817 : : MultiXactOffset next_offset;
818 : : LWLock *lock;
746 alvherre@alvh.no-ip. 819 : 5311 : LWLock *prevlock = NULL;
820 : :
821 : : /* position of this multixid in the offsets SLRU area */
7626 tgl@sss.pgh.pa.us 822 : 5311 : pageno = MultiXactIdToOffsetPage(multi);
823 : 5311 : entryno = MultiXactIdToOffsetEntry(multi);
824 : :
825 : : /* position of the next multixid */
93 heikki.linnakangas@i 826 :GNC 5311 : next = NextMultiXactId(multi);
102 heikki.linnakangas@i 827 :CBC 5311 : next_pageno = MultiXactIdToOffsetPage(next);
828 : 5311 : next_entryno = MultiXactIdToOffsetEntry(next);
829 : :
830 : : /*
831 : : * Set the starting offset of this multixid's members.
832 : : *
833 : : * In the common case, it was already set by the previous
834 : : * RecordNewMultiXact call, as this was the next multixid of the previous
835 : : * multixid. But if multiple backends are generating multixids
836 : : * concurrently, we might race ahead and get called before the previous
837 : : * multixid.
838 : : */
746 alvherre@alvh.no-ip. 839 : 5311 : lock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
840 : 5311 : LWLockAcquire(lock, LW_EXCLUSIVE);
841 : :
2 heikki.linnakangas@i 842 :GNC 5311 : slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, &multi);
7585 tgl@sss.pgh.pa.us 843 :CBC 5311 : offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
7626 844 : 5311 : offptr += entryno;
845 : :
102 heikki.linnakangas@i 846 [ + + ]: 5311 : if (*offptr != offset)
847 : : {
848 : : /* should already be set to the correct value, or not at all */
849 [ - + ]: 1 : Assert(*offptr == 0);
850 : 1 : *offptr = offset;
851 : 1 : MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
852 : : }
853 : :
854 : : /*
855 : : * Set the next multixid's offset to the end of this multixid's members.
856 : : */
857 [ + + ]: 5311 : if (next_pageno == pageno)
858 : : {
859 : 5306 : next_offptr = offptr + 1;
860 : : }
861 : : else
862 : : {
863 : : /* must be the first entry on the page */
102 heikki.linnakangas@i 864 [ + + - + ]:GBC 5 : Assert(next_entryno == 0 || next == FirstMultiXactId);
865 : :
866 : : /* Swap the lock for a lock on the next page */
867 : 5 : LWLockRelease(lock);
868 : 5 : lock = SimpleLruGetBankLock(MultiXactOffsetCtl, next_pageno);
869 : 5 : LWLockAcquire(lock, LW_EXCLUSIVE);
870 : :
2 heikki.linnakangas@i 871 :GNC 5 : slotno = SimpleLruReadPage(MultiXactOffsetCtl, next_pageno, true, &next);
102 heikki.linnakangas@i 872 :GBC 5 : next_offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
873 : 5 : next_offptr += next_entryno;
874 : : }
875 : :
876 : : /* Like in GetNewMultiXactId(), skip over offset 0 */
100 heikki.linnakangas@i 877 :CBC 5311 : next_offset = offset + nmembers;
878 [ - + ]: 5311 : if (next_offset == 0)
100 heikki.linnakangas@i 879 :UBC 0 : next_offset = 1;
100 heikki.linnakangas@i 880 [ + - ]:CBC 5311 : if (*next_offptr != next_offset)
881 : : {
882 : : /* should already be set to the correct value, or not at all */
102 883 [ - + ]: 5311 : Assert(*next_offptr == 0);
100 884 : 5311 : *next_offptr = next_offset;
102 885 : 5311 : MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
886 : : }
887 : :
888 : : /* Release MultiXactOffset SLRU lock. */
746 alvherre@alvh.no-ip. 889 : 5311 : LWLockRelease(lock);
890 : :
7626 tgl@sss.pgh.pa.us 891 : 5311 : prev_pageno = -1;
892 : :
102 heikki.linnakangas@i 893 [ + + ]: 100057 : for (int i = 0; i < nmembers; i++, offset++)
894 : : {
895 : : TransactionId *memberptr;
896 : : uint32 *flagsptr;
897 : : uint32 flagsval;
898 : : int bshift;
899 : : int flagsoff;
900 : : int memberoff;
901 : :
4799 alvherre@alvh.no-ip. 902 [ - + ]: 94746 : Assert(members[i].status <= MultiXactStatusUpdate);
903 : :
7626 tgl@sss.pgh.pa.us 904 : 94746 : pageno = MXOffsetToMemberPage(offset);
4799 alvherre@alvh.no-ip. 905 : 94746 : memberoff = MXOffsetToMemberOffset(offset);
906 : 94746 : flagsoff = MXOffsetToFlagsOffset(offset);
907 : 94746 : bshift = MXOffsetToFlagsBitShift(offset);
908 : :
7626 tgl@sss.pgh.pa.us 909 [ + + ]: 94746 : if (pageno != prev_pageno)
910 : : {
2 heikki.linnakangas@i 911 :GNC 5365 : MultiXactMemberSlruReadContext slru_read_context = {multi, offset};
912 : :
913 : : /*
914 : : * MultiXactMember SLRU page is changed so check if this new page
915 : : * fall into the different SLRU bank then release the old bank's
916 : : * lock and acquire lock on the new bank.
917 : : */
746 alvherre@alvh.no-ip. 918 :CBC 5365 : lock = SimpleLruGetBankLock(MultiXactMemberCtl, pageno);
919 [ + - ]: 5365 : if (lock != prevlock)
920 : : {
921 [ + + ]: 5365 : if (prevlock != NULL)
746 alvherre@alvh.no-ip. 922 :GBC 54 : LWLockRelease(prevlock);
923 : :
746 alvherre@alvh.no-ip. 924 :CBC 5365 : LWLockAcquire(lock, LW_EXCLUSIVE);
925 : 5365 : prevlock = lock;
926 : : }
2 heikki.linnakangas@i 927 :GNC 5365 : slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true,
928 : : &slru_read_context);
7626 tgl@sss.pgh.pa.us 929 :CBC 5365 : prev_pageno = pageno;
930 : : }
931 : :
932 : 94746 : memberptr = (TransactionId *)
4799 alvherre@alvh.no-ip. 933 : 94746 : (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
934 : :
935 : 94746 : *memberptr = members[i].xid;
936 : :
937 : 94746 : flagsptr = (uint32 *)
938 : 94746 : (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
939 : :
940 : 94746 : flagsval = *flagsptr;
941 : 94746 : flagsval &= ~(((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift);
942 : 94746 : flagsval |= (members[i].status << bshift);
943 : 94746 : *flagsptr = flagsval;
944 : :
7435 tgl@sss.pgh.pa.us 945 : 94746 : MultiXactMemberCtl->shared->page_dirty[slotno] = true;
946 : : }
947 : :
746 alvherre@alvh.no-ip. 948 [ + - ]: 5311 : if (prevlock != NULL)
949 : 5311 : LWLockRelease(prevlock);
7626 tgl@sss.pgh.pa.us 950 : 5311 : }
951 : :
952 : : /*
953 : : * GetNewMultiXactId
954 : : * Get the next MultiXactId.
955 : : *
956 : : * Also, reserve the needed amount of space in the "members" area. The
957 : : * starting offset of the reserved space is returned in *offset.
958 : : *
959 : : * This may generate XLOG records for expansion of the offsets and/or members
960 : : * files. Unfortunately, we have to do that while holding MultiXactGenLock
961 : : * to avoid race conditions --- the XLOG record for zeroing a page must appear
962 : : * before any backend can possibly try to store data in that page!
963 : : *
964 : : * We start a critical section before advancing the shared counters. The
965 : : * caller must end the critical section after writing SLRU data.
966 : : */
967 : : static MultiXactId
4799 alvherre@alvh.no-ip. 968 : 5306 : GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
969 : : {
970 : : MultiXactId result;
971 : : MultiXactOffset nextOffset;
972 : :
973 : : debug_elog3(DEBUG2, "GetNew: for %d xids", nmembers);
974 : :
975 : : /* safety check, we should never get this far in a HS standby */
976 [ - + ]: 5306 : if (RecoveryInProgress())
4799 alvherre@alvh.no-ip. 977 [ # # ]:UBC 0 : elog(ERROR, "cannot assign MultiXactIds during recovery");
978 : :
7626 tgl@sss.pgh.pa.us 979 :CBC 5306 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
980 : :
981 : : /* Assign the MXID */
982 : 5306 : result = MultiXactState->nextMXact;
983 : :
984 : : /*----------
985 : : * Check to see if it's safe to assign another MultiXactId. This protects
986 : : * against catastrophic data loss due to multixact wraparound. The basic
987 : : * rules are:
988 : : *
989 : : * If we're past multiVacLimit or the safe threshold for member storage
990 : : * space, or we don't know what the safe threshold for member storage is,
991 : : * start trying to force autovacuum cycles.
992 : : * If we're past multiWarnLimit, start issuing warnings.
993 : : * If we're past multiStopLimit, refuse to create new MultiXactIds.
994 : : *
995 : : * Note these are pretty much the same protections in GetNewTransactionId.
996 : : *----------
997 : : */
3920 andres@anarazel.de 998 [ - + ]: 5306 : if (!MultiXactIdPrecedes(result, MultiXactState->multiVacLimit))
999 : : {
1000 : : /*
1001 : : * For safety's sake, we release MultiXactGenLock while sending
1002 : : * signals, warnings, etc. This is not so much because we care about
1003 : : * preserving concurrency in this situation, as to avoid any
1004 : : * possibility of deadlock while doing get_database_name(). First,
1005 : : * copy all the shared values we'll need in this path.
1006 : : */
4799 alvherre@alvh.no-ip. 1007 :UBC 0 : MultiXactId multiWarnLimit = MultiXactState->multiWarnLimit;
1008 : 0 : MultiXactId multiStopLimit = MultiXactState->multiStopLimit;
1009 : 0 : MultiXactId multiWrapLimit = MultiXactState->multiWrapLimit;
1010 : 0 : Oid oldest_datoid = MultiXactState->oldestMultiXactDB;
1011 : :
1012 : 0 : LWLockRelease(MultiXactGenLock);
1013 : :
1014 [ # # ]: 0 : if (IsUnderPostmaster &&
1015 [ # # ]: 0 : !MultiXactIdPrecedes(result, multiStopLimit))
1016 : : {
1017 : 0 : char *oldest_datname = get_database_name(oldest_datoid);
1018 : :
1019 : : /*
1020 : : * Immediately kick autovacuum into action as we're already in
1021 : : * ERROR territory.
1022 : : */
3920 andres@anarazel.de 1023 : 0 : SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
1024 : :
1025 : : /* complain even if that DB has disappeared */
4799 alvherre@alvh.no-ip. 1026 [ # # ]: 0 : if (oldest_datname)
1027 [ # # ]: 0 : ereport(ERROR,
1028 : : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1029 : : errmsg("database is not accepting commands that assign new MultiXactIds to avoid wraparound data loss in database \"%s\"",
1030 : : oldest_datname),
1031 : : errhint("Execute a database-wide VACUUM in that database.\n"
1032 : : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1033 : : else
1034 [ # # ]: 0 : ereport(ERROR,
1035 : : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1036 : : errmsg("database is not accepting commands that assign new MultiXactIds to avoid wraparound data loss in database with OID %u",
1037 : : oldest_datoid),
1038 : : errhint("Execute a database-wide VACUUM in that database.\n"
1039 : : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1040 : : }
1041 : :
1042 : : /*
1043 : : * To avoid swamping the postmaster with signals, we issue the autovac
1044 : : * request only once per 64K multis generated. This still gives
1045 : : * plenty of chances before we get into real trouble.
1046 : : */
93 heikki.linnakangas@i 1047 [ # # # # :UNC 0 : if (IsUnderPostmaster && ((result % 65536) == 0 || result == FirstMultiXactId))
# # ]
3920 andres@anarazel.de 1048 :UBC 0 : SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
1049 : :
1050 [ # # ]: 0 : if (!MultiXactIdPrecedes(result, multiWarnLimit))
1051 : : {
4799 alvherre@alvh.no-ip. 1052 : 0 : char *oldest_datname = get_database_name(oldest_datoid);
1053 : :
1054 : : /* complain even if that DB has disappeared */
1055 [ # # ]: 0 : if (oldest_datname)
1056 [ # # ]: 0 : ereport(WARNING,
1057 : : (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
1058 : : "database \"%s\" must be vacuumed before %u more MultiXactIds are used",
1059 : : multiWrapLimit - result,
1060 : : oldest_datname,
1061 : : multiWrapLimit - result),
1062 : : errhint("Execute a database-wide VACUUM in that database.\n"
1063 : : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1064 : : else
1065 [ # # ]: 0 : ereport(WARNING,
1066 : : (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
1067 : : "database with OID %u must be vacuumed before %u more MultiXactIds are used",
1068 : : multiWrapLimit - result,
1069 : : oldest_datoid,
1070 : : multiWrapLimit - result),
1071 : : errhint("Execute a database-wide VACUUM in that database.\n"
1072 : : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1073 : : }
1074 : :
1075 : : /* Re-acquire lock and start over */
1076 : 0 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
1077 : 0 : result = MultiXactState->nextMXact;
1078 : : }
1079 : :
1080 : : /*
1081 : : * Make sure there is room for the next MXID in the file. Assigning this
1082 : : * MXID sets the next MXID's offset already.
1083 : : */
93 heikki.linnakangas@i 1084 :GNC 5306 : ExtendMultiXactOffset(NextMultiXactId(result));
1085 : :
1086 : : /*
1087 : : * Reserve the members space, similarly to above.
1088 : : */
7443 tgl@sss.pgh.pa.us 1089 :CBC 5306 : nextOffset = MultiXactState->nextOffset;
1090 : :
1091 : : /*
1092 : : * Offsets are 64-bit integers and will never wrap around. Firstly, it
1093 : : * would take an unrealistic amount of time and resources to consume 2^64
1094 : : * offsets. Secondly, multixid creation is WAL-logged, so you would run
1095 : : * out of LSNs before reaching offset wraparound. Nevertheless, check for
1096 : : * wraparound as a sanity check.
1097 : : */
96 heikki.linnakangas@i 1098 [ - + ]:GNC 5306 : if (nextOffset + nmembers < nextOffset)
96 heikki.linnakangas@i 1099 [ # # ]:UBC 0 : ereport(ERROR,
1100 : : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1101 : : errmsg("MultiXact members would wrap around")));
96 heikki.linnakangas@i 1102 :GNC 5306 : *offset = nextOffset;
1103 : :
4799 alvherre@alvh.no-ip. 1104 :CBC 5306 : ExtendMultiXactMember(nextOffset, nmembers);
1105 : :
1106 : : /*
1107 : : * Critical section from here until caller has written the data into the
1108 : : * just-reserved SLRU space; we don't want to error out with a partly
1109 : : * written MultiXact structure. (In particular, failing to write our
1110 : : * start offset after advancing nextMXact would effectively corrupt the
1111 : : * previous MultiXact.)
1112 : : */
7443 tgl@sss.pgh.pa.us 1113 : 5306 : START_CRIT_SECTION();
1114 : :
1115 : : /*
1116 : : * Advance counters. As in GetNewTransactionId(), this must not happen
1117 : : * until after file extension has succeeded!
1118 : : */
93 heikki.linnakangas@i 1119 :GNC 5306 : MultiXactState->nextMXact = NextMultiXactId(result);
4799 alvherre@alvh.no-ip. 1120 :CBC 5306 : MultiXactState->nextOffset += nmembers;
1121 : :
7626 tgl@sss.pgh.pa.us 1122 : 5306 : LWLockRelease(MultiXactGenLock);
1123 : :
1124 : : debug_elog4(DEBUG2, "GetNew: returning %u offset %" PRIu64,
1125 : : result, *offset);
1126 : 5306 : return result;
1127 : : }
1128 : :
1129 : : /*
1130 : : * GetMultiXactIdMembers
1131 : : * Return the set of MultiXactMembers that make up a MultiXactId
1132 : : *
1133 : : * Return value is the number of members found, or -1 if there are none,
1134 : : * and *members is set to a newly palloc'ed array of members. It's the
1135 : : * caller's responsibility to free it when done with it.
1136 : : *
1137 : : * from_pgupgrade must be passed as true if and only if only the multixact
1138 : : * corresponds to a value from a tuple that was locked in a 9.2-or-older
1139 : : * installation and later pg_upgrade'd (that is, the infomask is
1140 : : * HEAP_LOCKED_UPGRADED). In this case, we know for certain that no members
1141 : : * can still be running, so we return -1 just like for an empty multixact
1142 : : * without any further checking. It would be wrong to try to resolve such a
1143 : : * multixact: either the multixact is within the current valid multixact
1144 : : * range, in which case the returned result would be bogus, or outside that
1145 : : * range, in which case an error would be raised.
1146 : : *
1147 : : * In all other cases, the passed multixact must be within the known valid
1148 : : * range, that is, greater than or equal to oldestMultiXactId, and less than
1149 : : * nextMXact. Otherwise, an error is raised.
1150 : : *
1151 : : * isLockOnly must be set to true if caller is certain that the given multi
1152 : : * is used only to lock tuples; can be false without loss of correctness,
1153 : : * but passing a true means we can return quickly without checking for
1154 : : * old updates.
1155 : : */
1156 : : int
4799 alvherre@alvh.no-ip. 1157 : 549959 : GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
1158 : : bool from_pgupgrade, bool isLockOnly)
1159 : : {
1160 : : int64 pageno;
1161 : : int64 prev_pageno;
1162 : : int entryno;
1163 : : int slotno;
1164 : : MultiXactOffset *offptr;
1165 : : MultiXactOffset offset;
1166 : : MultiXactOffset nextMXOffset;
1167 : : int length;
1168 : : MultiXactId oldestMXact;
1169 : : MultiXactId nextMXact;
1170 : : MultiXactMember *ptr;
1171 : : LWLock *lock;
1172 : :
1173 : : debug_elog3(DEBUG2, "GetMembers: asked for %u", multi);
1174 : :
3551 1175 [ + - - + ]: 549959 : if (!MultiXactIdIsValid(multi) || from_pgupgrade)
1176 : : {
1732 heikki.linnakangas@i 1177 :UBC 0 : *members = NULL;
4490 alvherre@alvh.no-ip. 1178 : 0 : return -1;
1179 : : }
1180 : :
1181 : : /* See if the MultiXactId is in the local cache */
4799 alvherre@alvh.no-ip. 1182 :CBC 549959 : length = mXactCacheGetById(multi, members);
7626 tgl@sss.pgh.pa.us 1183 [ + + ]: 549959 : if (length >= 0)
1184 : : {
1185 : : debug_elog3(DEBUG2, "GetMembers: found %s in the cache",
1186 : : mxid_to_string(multi, length, *members));
1187 : 457490 : return length;
1188 : : }
1189 : :
1190 : : /* Set our OldestVisibleMXactId[] entry if we didn't already */
1191 : 92469 : MultiXactIdSetOldestVisible();
1192 : :
1193 : : /*
1194 : : * If we know the multi is used only for locking and not for updates, then
1195 : : * we can skip checking if the value is older than our oldest visible
1196 : : * multi. It cannot possibly still be running.
1197 : : */
1273 pg@bowt.ie 1198 [ + + + + ]: 96175 : if (isLockOnly &&
13 heikki.linnakangas@i 1199 : 3706 : MultiXactIdPrecedes(multi, *MyOldestVisibleMXactIdSlot()))
1200 : : {
1201 : : debug_elog2(DEBUG2, "GetMembers: a locker-only multi is too old");
4247 alvherre@alvh.no-ip. 1202 : 736 : *members = NULL;
1203 : 736 : return -1;
1204 : : }
1205 : :
1206 : : /*
1207 : : * We check known limits on MultiXact before resorting to the SLRU area.
1208 : : *
1209 : : * An ID older than MultiXactState->oldestMultiXactId cannot possibly be
1210 : : * useful; it has already been removed, or will be removed shortly, by
1211 : : * truncation. If one is passed, an error is raised.
1212 : : *
1213 : : * Also, an ID >= nextMXact shouldn't ever be seen here; if it is seen, it
1214 : : * implies undetected ID wraparound has occurred. This raises a hard
1215 : : * error.
1216 : : *
1217 : : * Shared lock is enough here since we aren't modifying any global state.
1218 : : * Acquire it just long enough to grab the current counter values.
1219 : : */
7626 tgl@sss.pgh.pa.us 1220 : 91733 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
1221 : :
4799 alvherre@alvh.no-ip. 1222 : 91733 : oldestMXact = MultiXactState->oldestMultiXactId;
7443 tgl@sss.pgh.pa.us 1223 : 91733 : nextMXact = MultiXactState->nextMXact;
1224 : :
1225 : 91733 : LWLockRelease(MultiXactGenLock);
1226 : :
4799 alvherre@alvh.no-ip. 1227 [ - + ]: 91733 : if (MultiXactIdPrecedes(multi, oldestMXact))
3551 alvherre@alvh.no-ip. 1228 [ # # ]:UBC 0 : ereport(ERROR,
1229 : : (errcode(ERRCODE_INTERNAL_ERROR),
1230 : : errmsg("MultiXactId %u does no longer exist -- apparent wraparound",
1231 : : multi)));
1232 : :
4799 alvherre@alvh.no-ip. 1233 [ - + ]:CBC 91733 : if (!MultiXactIdPrecedes(multi, nextMXact))
4799 alvherre@alvh.no-ip. 1234 [ # # ]:UBC 0 : ereport(ERROR,
1235 : : (errcode(ERRCODE_INTERNAL_ERROR),
1236 : : errmsg("MultiXactId %u has not been created yet -- apparent wraparound",
1237 : : multi)));
1238 : :
1239 : : /*
1240 : : * Find out the offset at which we need to start reading MultiXactMembers
1241 : : * and the number of members in the multixact. We determine the latter as
1242 : : * the difference between this multixact's starting offset and the next
1243 : : * one's.
1244 : : */
7626 tgl@sss.pgh.pa.us 1245 :CBC 91733 : pageno = MultiXactIdToOffsetPage(multi);
1246 : 91733 : entryno = MultiXactIdToOffsetEntry(multi);
1247 : :
1248 : : /* Acquire the bank lock for the page we need. */
746 alvherre@alvh.no-ip. 1249 : 91733 : lock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
741 1250 : 91733 : LWLockAcquire(lock, LW_EXCLUSIVE);
1251 : :
1252 : : /* read this multi's offset */
2 heikki.linnakangas@i 1253 :GNC 91733 : slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, &multi);
7585 tgl@sss.pgh.pa.us 1254 :CBC 91733 : offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
7626 1255 : 91733 : offptr += entryno;
1256 : 91733 : offset = *offptr;
1257 : :
94 heikki.linnakangas@i 1258 [ - + ]:GNC 91733 : if (offset == 0)
94 heikki.linnakangas@i 1259 [ # # ]:UNC 0 : ereport(ERROR,
1260 : : (errcode(ERRCODE_DATA_CORRUPTED),
1261 : : errmsg("MultiXact %u has invalid offset", multi)));
1262 : :
1263 : : /* read next multi's offset */
1264 : : {
1265 : : MultiXactId tmpMXact;
1266 : :
1267 : : /* handle wraparound if needed */
93 heikki.linnakangas@i 1268 :GNC 91733 : tmpMXact = NextMultiXactId(multi);
1269 : :
7626 tgl@sss.pgh.pa.us 1270 :CBC 91733 : prev_pageno = pageno;
1271 : :
1272 : 91733 : pageno = MultiXactIdToOffsetPage(tmpMXact);
1273 : 91733 : entryno = MultiXactIdToOffsetEntry(tmpMXact);
1274 : :
1275 [ + + ]: 91733 : if (pageno != prev_pageno)
1276 : : {
1277 : : LWLock *newlock;
1278 : :
1279 : : /*
1280 : : * Since we're going to access a different SLRU page, if this page
1281 : : * falls under a different bank, release the old bank's lock and
1282 : : * acquire the lock of the new bank.
1283 : : */
741 alvherre@alvh.no-ip. 1284 :GBC 13 : newlock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
1285 [ - + ]: 13 : if (newlock != lock)
1286 : : {
741 alvherre@alvh.no-ip. 1287 :UBC 0 : LWLockRelease(lock);
1288 : 0 : LWLockAcquire(newlock, LW_EXCLUSIVE);
1289 : 0 : lock = newlock;
1290 : : }
2 heikki.linnakangas@i 1291 :GNC 13 : slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, &tmpMXact);
1292 : : }
1293 : :
7585 tgl@sss.pgh.pa.us 1294 :CBC 91733 : offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
7626 1295 : 91733 : offptr += entryno;
7443 1296 : 91733 : nextMXOffset = *offptr;
1297 : : }
1298 : :
741 alvherre@alvh.no-ip. 1299 : 91733 : LWLockRelease(lock);
1300 : 91733 : lock = NULL;
1301 : :
1302 : : /* Sanity check the next offset */
94 heikki.linnakangas@i 1303 [ - + ]:GNC 91733 : if (nextMXOffset == 0)
94 heikki.linnakangas@i 1304 [ # # ]:UNC 0 : ereport(ERROR,
1305 : : (errcode(ERRCODE_DATA_CORRUPTED),
1306 : : errmsg("MultiXact %u has invalid next offset", multi)));
90 heikki.linnakangas@i 1307 [ - + ]:GNC 91733 : if (nextMXOffset == offset)
90 heikki.linnakangas@i 1308 [ # # ]:UNC 0 : ereport(ERROR,
1309 : : (errcode(ERRCODE_DATA_CORRUPTED),
1310 : : errmsg("MultiXact %u with offset (%" PRIu64 ") has zero members",
1311 : : multi, offset)));
94 heikki.linnakangas@i 1312 [ - + ]:GNC 91733 : if (nextMXOffset < offset)
94 heikki.linnakangas@i 1313 [ # # ]:UNC 0 : ereport(ERROR,
1314 : : (errcode(ERRCODE_DATA_CORRUPTED),
1315 : : errmsg("MultiXact %u has offset (%" PRIu64 ") greater than its next offset (%" PRIu64 ")",
1316 : : multi, offset, nextMXOffset)));
94 heikki.linnakangas@i 1317 [ - + ]:GNC 91733 : if (nextMXOffset - offset > INT32_MAX)
94 heikki.linnakangas@i 1318 [ # # ]:UNC 0 : ereport(ERROR,
1319 : : (errcode(ERRCODE_DATA_CORRUPTED),
1320 : : errmsg("MultiXact %u has too many members (%" PRIu64 ")",
1321 : : multi, nextMXOffset - offset)));
94 heikki.linnakangas@i 1322 :GNC 91733 : length = nextMXOffset - offset;
1323 : :
1324 : : /* read the members */
4799 alvherre@alvh.no-ip. 1325 : 91733 : ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember));
7626 tgl@sss.pgh.pa.us 1326 :CBC 91733 : prev_pageno = -1;
741 alvherre@alvh.no-ip. 1327 [ + + ]: 1827895 : for (int i = 0; i < length; i++, offset++)
1328 : : {
1329 : : TransactionId *xactptr;
1330 : : uint32 *flagsptr;
1331 : : int flagsoff;
1332 : : int bshift;
1333 : : int memberoff;
1334 : :
7626 tgl@sss.pgh.pa.us 1335 : 1736162 : pageno = MXOffsetToMemberPage(offset);
4799 alvherre@alvh.no-ip. 1336 : 1736162 : memberoff = MXOffsetToMemberOffset(offset);
1337 : :
7626 tgl@sss.pgh.pa.us 1338 [ + + ]: 1736162 : if (pageno != prev_pageno)
1339 : : {
2 heikki.linnakangas@i 1340 :GNC 91895 : MultiXactMemberSlruReadContext slru_read_context = {multi, offset};
1341 : : LWLock *newlock;
1342 : :
1343 : : /*
1344 : : * Since we're going to access a different SLRU page, if this page
1345 : : * falls under a different bank, release the old bank's lock and
1346 : : * acquire the lock of the new bank.
1347 : : */
741 alvherre@alvh.no-ip. 1348 :CBC 91895 : newlock = SimpleLruGetBankLock(MultiXactMemberCtl, pageno);
1349 [ + - ]: 91895 : if (newlock != lock)
1350 : : {
1351 [ + + ]: 91895 : if (lock)
741 alvherre@alvh.no-ip. 1352 :GBC 162 : LWLockRelease(lock);
741 alvherre@alvh.no-ip. 1353 :CBC 91895 : LWLockAcquire(newlock, LW_EXCLUSIVE);
1354 : 91895 : lock = newlock;
1355 : : }
2 heikki.linnakangas@i 1356 :GNC 91895 : slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true,
1357 : : &slru_read_context);
7626 tgl@sss.pgh.pa.us 1358 :CBC 91895 : prev_pageno = pageno;
1359 : : }
1360 : :
1361 : 1736162 : xactptr = (TransactionId *)
4799 alvherre@alvh.no-ip. 1362 : 1736162 : (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
96 heikki.linnakangas@i 1363 [ - + ]:GNC 1736162 : Assert(TransactionIdIsValid(*xactptr));
1364 : :
4799 alvherre@alvh.no-ip. 1365 :CBC 1736162 : flagsoff = MXOffsetToFlagsOffset(offset);
1366 : 1736162 : bshift = MXOffsetToFlagsBitShift(offset);
1367 : 1736162 : flagsptr = (uint32 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
1368 : :
96 heikki.linnakangas@i 1369 :GNC 1736162 : ptr[i].xid = *xactptr;
1370 : 1736162 : ptr[i].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
1371 : : }
1372 : :
741 alvherre@alvh.no-ip. 1373 :CBC 91733 : LWLockRelease(lock);
1374 : :
1375 : : /*
1376 : : * Copy the result into the local cache.
1377 : : */
96 heikki.linnakangas@i 1378 :GNC 91733 : mXactCachePut(multi, length, ptr);
1379 : :
1380 : : debug_elog3(DEBUG2, "GetMembers: no cache for %s",
1381 : : mxid_to_string(multi, length, ptr));
1732 heikki.linnakangas@i 1382 :CBC 91733 : *members = ptr;
96 heikki.linnakangas@i 1383 :GNC 91733 : return length;
1384 : : }
1385 : :
1386 : : /*
1387 : : * mxactMemberComparator
1388 : : * qsort comparison function for MultiXactMember
1389 : : *
1390 : : * We can't use wraparound comparison for XIDs because that does not respect
1391 : : * the triangle inequality! Any old sort order will do.
1392 : : */
1393 : : static int
4799 alvherre@alvh.no-ip. 1394 :CBC 3050724 : mxactMemberComparator(const void *arg1, const void *arg2)
1395 : : {
1396 : 3050724 : MultiXactMember member1 = *(const MultiXactMember *) arg1;
1397 : 3050724 : MultiXactMember member2 = *(const MultiXactMember *) arg2;
1398 : :
1399 [ + + ]: 3050724 : if (member1.xid > member2.xid)
1400 : 29 : return 1;
1401 [ + + ]: 3050695 : if (member1.xid < member2.xid)
1402 : 3050481 : return -1;
1403 [ + + ]: 214 : if (member1.status > member2.status)
4799 alvherre@alvh.no-ip. 1404 :GBC 16 : return 1;
4799 alvherre@alvh.no-ip. 1405 [ + - ]:CBC 198 : if (member1.status < member2.status)
1406 : 198 : return -1;
4799 alvherre@alvh.no-ip. 1407 :UBC 0 : return 0;
1408 : : }
1409 : :
1410 : : /*
1411 : : * mXactCacheGetBySet
1412 : : * returns a MultiXactId from the cache based on the set of
1413 : : * TransactionIds that compose it, or InvalidMultiXactId if
1414 : : * none matches.
1415 : : *
1416 : : * This is helpful, for example, if two transactions want to lock a huge
1417 : : * table. By using the cache, the second will use the same MultiXactId
1418 : : * for the majority of tuples, thus keeping MultiXactId usage low (saving
1419 : : * both I/O and wraparound issues).
1420 : : *
1421 : : * NB: the passed members array will be sorted in-place.
1422 : : */
1423 : : static MultiXactId
4799 alvherre@alvh.no-ip. 1424 :CBC 76654 : mXactCacheGetBySet(int nmembers, MultiXactMember *members)
1425 : : {
1426 : : dlist_iter iter;
1427 : :
1428 : : debug_elog3(DEBUG2, "CacheGet: looking for %s",
1429 : : mxid_to_string(InvalidMultiXactId, nmembers, members));
1430 : :
1431 : : /* sort the array so comparison is easy */
1432 : 76654 : qsort(members, nmembers, sizeof(MultiXactMember), mxactMemberComparator);
1433 : :
1229 drowley@postgresql.o 1434 [ + - + + ]: 308344 : dclist_foreach(iter, &MXactCache)
1435 : : {
1436 : 303038 : mXactCacheEnt *entry = dclist_container(mXactCacheEnt, node,
1437 : : iter.cur);
1438 : :
4799 alvherre@alvh.no-ip. 1439 [ + + ]: 303038 : if (entry->nmembers != nmembers)
7626 tgl@sss.pgh.pa.us 1440 : 85334 : continue;
1441 : :
1442 : : /*
1443 : : * We assume the cache entries are sorted, and that the unused bits in
1444 : : * "status" are zeroed.
1445 : : */
4799 alvherre@alvh.no-ip. 1446 [ + + ]: 217704 : if (memcmp(members, entry->members, nmembers * sizeof(MultiXactMember)) == 0)
1447 : : {
1448 : : debug_elog3(DEBUG2, "CacheGet: found %u", entry->multi);
1229 drowley@postgresql.o 1449 : 71348 : dclist_move_head(&MXactCache, iter.cur);
7626 tgl@sss.pgh.pa.us 1450 : 71348 : return entry->multi;
1451 : : }
1452 : : }
1453 : :
1454 : : debug_elog2(DEBUG2, "CacheGet: not found :-(");
1455 : 5306 : return InvalidMultiXactId;
1456 : : }
1457 : :
1458 : : /*
1459 : : * mXactCacheGetById
1460 : : * returns the composing MultiXactMember set from the cache for a
1461 : : * given MultiXactId, if present.
1462 : : *
1463 : : * If successful, *xids is set to the address of a palloc'd copy of the
1464 : : * MultiXactMember set. Return value is number of members, or -1 on failure.
1465 : : */
1466 : : static int
4799 alvherre@alvh.no-ip. 1467 : 549959 : mXactCacheGetById(MultiXactId multi, MultiXactMember **members)
1468 : : {
1469 : : dlist_iter iter;
1470 : :
1471 : : debug_elog3(DEBUG2, "CacheGet: looking for %u", multi);
1472 : :
1229 drowley@postgresql.o 1473 [ + - + + ]: 4905684 : dclist_foreach(iter, &MXactCache)
1474 : : {
1475 : 4813215 : mXactCacheEnt *entry = dclist_container(mXactCacheEnt, node,
1476 : : iter.cur);
1477 : :
7626 tgl@sss.pgh.pa.us 1478 [ + + ]: 4813215 : if (entry->multi == multi)
1479 : : {
1480 : : MultiXactMember *ptr;
1481 : : Size size;
1482 : :
4799 alvherre@alvh.no-ip. 1483 : 457490 : size = sizeof(MultiXactMember) * entry->nmembers;
1484 : 457490 : ptr = (MultiXactMember *) palloc(size);
1485 : :
1486 : 457490 : memcpy(ptr, entry->members, size);
1487 : :
1488 : : debug_elog3(DEBUG2, "CacheGet: found %s",
1489 : : mxid_to_string(multi,
1490 : : entry->nmembers,
1491 : : entry->members));
1492 : :
1493 : : /*
1494 : : * Note we modify the list while not using a modifiable iterator.
1495 : : * This is acceptable only because we exit the iteration
1496 : : * immediately afterwards.
1497 : : */
1229 drowley@postgresql.o 1498 : 457490 : dclist_move_head(&MXactCache, iter.cur);
1499 : :
1732 heikki.linnakangas@i 1500 : 457490 : *members = ptr;
4799 alvherre@alvh.no-ip. 1501 : 457490 : return entry->nmembers;
1502 : : }
1503 : : }
1504 : :
1505 : : debug_elog2(DEBUG2, "CacheGet: not found");
7626 tgl@sss.pgh.pa.us 1506 : 92469 : return -1;
1507 : : }
1508 : :
1509 : : /*
1510 : : * mXactCachePut
1511 : : * Add a new MultiXactId and its composing set into the local cache.
1512 : : */
1513 : : static void
4799 alvherre@alvh.no-ip. 1514 : 97039 : mXactCachePut(MultiXactId multi, int nmembers, MultiXactMember *members)
1515 : : {
1516 : : mXactCacheEnt *entry;
1517 : :
1518 : : debug_elog3(DEBUG2, "CachePut: storing %s",
1519 : : mxid_to_string(multi, nmembers, members));
1520 : :
7626 tgl@sss.pgh.pa.us 1521 [ + + ]: 97039 : if (MXactContext == NULL)
1522 : : {
1523 : : /* The cache only lives as long as the current transaction */
1524 : : debug_elog2(DEBUG2, "CachePut: initializing memory context");
1525 : 3340 : MXactContext = AllocSetContextCreate(TopTransactionContext,
1526 : : "MultiXact cache context",
1527 : : ALLOCSET_SMALL_SIZES);
1528 : : }
1529 : :
1530 : : entry = (mXactCacheEnt *)
1531 : 97039 : MemoryContextAlloc(MXactContext,
4799 alvherre@alvh.no-ip. 1532 : 97039 : offsetof(mXactCacheEnt, members) +
1533 : : nmembers * sizeof(MultiXactMember));
1534 : :
7626 tgl@sss.pgh.pa.us 1535 : 97039 : entry->multi = multi;
4799 alvherre@alvh.no-ip. 1536 : 97039 : entry->nmembers = nmembers;
1537 : 97039 : memcpy(entry->members, members, nmembers * sizeof(MultiXactMember));
1538 : :
1539 : : /* mXactCacheGetBySet assumes the entries are sorted, so sort them */
1540 : 97039 : qsort(entry->members, nmembers, sizeof(MultiXactMember), mxactMemberComparator);
1541 : :
1229 drowley@postgresql.o 1542 : 97039 : dclist_push_head(&MXactCache, &entry->node);
1543 [ + + ]: 97039 : if (dclist_count(&MXactCache) > MAX_CACHE_ENTRIES)
1544 : : {
1545 : : dlist_node *node;
1546 : :
1229 drowley@postgresql.o 1547 :GBC 9478 : node = dclist_tail_node(&MXactCache);
1548 : 9478 : dclist_delete_from(&MXactCache, node);
1549 : :
1550 : 9478 : entry = dclist_container(mXactCacheEnt, node, node);
1551 : : debug_elog3(DEBUG2, "CachePut: pruning cached multi %u",
1552 : : entry->multi);
1553 : :
4475 alvherre@alvh.no-ip. 1554 : 9478 : pfree(entry);
1555 : : }
7626 tgl@sss.pgh.pa.us 1556 :CBC 97039 : }
1557 : :
1558 : : char *
4799 alvherre@alvh.no-ip. 1559 :GBC 188088 : mxstatus_to_string(MultiXactStatus status)
1560 : : {
1561 [ + - - - : 188088 : switch (status)
+ - - ]
1562 : : {
1563 : 183634 : case MultiXactStatusForKeyShare:
1564 : 183634 : return "keysh";
4799 alvherre@alvh.no-ip. 1565 :UBC 0 : case MultiXactStatusForShare:
1566 : 0 : return "sh";
1567 : 0 : case MultiXactStatusForNoKeyUpdate:
1568 : 0 : return "fornokeyupd";
1569 : 0 : case MultiXactStatusForUpdate:
1570 : 0 : return "forupd";
4799 alvherre@alvh.no-ip. 1571 :GBC 4454 : case MultiXactStatusNoKeyUpdate:
1572 : 4454 : return "nokeyupd";
4799 alvherre@alvh.no-ip. 1573 :UBC 0 : case MultiXactStatusUpdate:
1574 : 0 : return "upd";
1575 : 0 : default:
1576 [ # # ]: 0 : elog(ERROR, "unrecognized multixact status %d", status);
1577 : : return "";
1578 : : }
1579 : : }
1580 : :
1581 : : char *
1582 : 0 : mxid_to_string(MultiXactId multi, int nmembers, MultiXactMember *members)
1583 : : {
1584 : : static char *str = NULL;
1585 : : StringInfoData buf;
1586 : : int i;
1587 : :
1588 [ # # ]: 0 : if (str != NULL)
1589 : 0 : pfree(str);
1590 : :
1591 : 0 : initStringInfo(&buf);
1592 : :
1593 : 0 : appendStringInfo(&buf, "%u %d[%u (%s)", multi, nmembers, members[0].xid,
1594 : : mxstatus_to_string(members[0].status));
1595 : :
1596 [ # # ]: 0 : for (i = 1; i < nmembers; i++)
1597 : 0 : appendStringInfo(&buf, ", %u (%s)", members[i].xid,
1598 : 0 : mxstatus_to_string(members[i].status));
1599 : :
1600 : 0 : appendStringInfoChar(&buf, ']');
1601 : 0 : str = MemoryContextStrdup(TopMemoryContext, buf.data);
1602 : 0 : pfree(buf.data);
7626 tgl@sss.pgh.pa.us 1603 : 0 : return str;
1604 : : }
1605 : :
1606 : : /*
1607 : : * AtEOXact_MultiXact
1608 : : * Handle transaction end for MultiXact
1609 : : *
1610 : : * This is called at top transaction commit or abort (we don't care which).
1611 : : */
1612 : : void
7626 tgl@sss.pgh.pa.us 1613 :CBC 336868 : AtEOXact_MultiXact(void)
1614 : : {
1615 : : /*
1616 : : * Reset our OldestMemberMXactId and OldestVisibleMXactId values, both of
1617 : : * which should only be valid while within a transaction.
1618 : : *
1619 : : * We assume that storing a MultiXactId is atomic and so we need not take
1620 : : * MultiXactGenLock to do this.
1621 : : */
13 heikki.linnakangas@i 1622 : 336868 : *MyOldestMemberMXactIdSlot() = InvalidMultiXactId;
1623 : 336868 : *MyOldestVisibleMXactIdSlot() = InvalidMultiXactId;
1624 : :
1625 : : /*
1626 : : * Discard the local MultiXactId cache. Since MXactContext was created as
1627 : : * a child of TopTransactionContext, we needn't delete it explicitly.
1628 : : */
7626 tgl@sss.pgh.pa.us 1629 : 336868 : MXactContext = NULL;
1229 drowley@postgresql.o 1630 : 336868 : dclist_init(&MXactCache);
7626 tgl@sss.pgh.pa.us 1631 : 336868 : }
1632 : :
1633 : : /*
1634 : : * AtPrepare_MultiXact
1635 : : * Save multixact state at 2PC transaction prepare
1636 : : *
1637 : : * In this phase, we only store our OldestMemberMXactId value in the two-phase
1638 : : * state file.
1639 : : */
1640 : : void
5956 heikki.linnakangas@i 1641 : 312 : AtPrepare_MultiXact(void)
1642 : : {
13 1643 : 312 : MultiXactId myOldestMember = *MyOldestMemberMXactIdSlot();
1644 : :
5956 1645 [ + + ]: 312 : if (MultiXactIdIsValid(myOldestMember))
1646 : 67 : RegisterTwoPhaseRecord(TWOPHASE_RM_MULTIXACT_ID, 0,
1647 : : &myOldestMember, sizeof(MultiXactId));
1648 : 312 : }
1649 : :
1650 : : /*
1651 : : * PostPrepare_MultiXact
1652 : : * Clean up after successful PREPARE TRANSACTION
1653 : : */
1654 : : void
251 michael@paquier.xyz 1655 :GNC 312 : PostPrepare_MultiXact(FullTransactionId fxid)
1656 : : {
1657 : : MultiXactId myOldestMember;
1658 : :
1659 : : /*
1660 : : * Transfer our OldestMemberMXactId value to the slot reserved for the
1661 : : * prepared transaction.
1662 : : */
13 heikki.linnakangas@i 1663 :CBC 312 : myOldestMember = *MyOldestMemberMXactIdSlot();
5956 1664 [ + + ]: 312 : if (MultiXactIdIsValid(myOldestMember))
1665 : : {
251 michael@paquier.xyz 1666 :GNC 67 : ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, false);
1667 : :
1668 : : /*
1669 : : * Even though storing MultiXactId is atomic, acquire lock to make
1670 : : * sure others see both changes, not just the reset of the slot of the
1671 : : * current backend. Using a volatile pointer might suffice, but this
1672 : : * isn't a hot spot.
1673 : : */
5956 heikki.linnakangas@i 1674 :CBC 67 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
1675 : :
13 1676 : 67 : *PreparedXactOldestMemberMXactIdSlot(dummyProcNumber) = myOldestMember;
1677 : 67 : *MyOldestMemberMXactIdSlot() = InvalidMultiXactId;
1678 : :
5956 1679 : 67 : LWLockRelease(MultiXactGenLock);
1680 : : }
1681 : :
1682 : : /*
1683 : : * We don't need to transfer OldestVisibleMXactId value, because the
1684 : : * transaction is not going to be looking at any more multixacts once it's
1685 : : * prepared.
1686 : : *
1687 : : * We assume that storing a MultiXactId is atomic and so we need not take
1688 : : * MultiXactGenLock to do this.
1689 : : */
13 1690 : 312 : *MyOldestVisibleMXactIdSlot() = InvalidMultiXactId;
1691 : :
1692 : : /*
1693 : : * Discard the local MultiXactId cache like in AtEOXact_MultiXact.
1694 : : */
5956 1695 : 312 : MXactContext = NULL;
1229 drowley@postgresql.o 1696 : 312 : dclist_init(&MXactCache);
5956 heikki.linnakangas@i 1697 : 312 : }
1698 : :
1699 : : /*
1700 : : * multixact_twophase_recover
1701 : : * Recover the state of a prepared transaction at startup
1702 : : */
1703 : : void
251 michael@paquier.xyz 1704 :GNC 8 : multixact_twophase_recover(FullTransactionId fxid, uint16 info,
1705 : : void *recdata, uint32 len)
1706 : : {
1707 : 8 : ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, false);
1708 : : MultiXactId oldestMember;
1709 : :
1710 : : /*
1711 : : * Get the oldest member XID from the state file record, and set it in the
1712 : : * OldestMemberMXactId slot reserved for this prepared transaction.
1713 : : */
5956 heikki.linnakangas@i 1714 [ - + ]:CBC 8 : Assert(len == sizeof(MultiXactId));
5861 bruce@momjian.us 1715 : 8 : oldestMember = *((MultiXactId *) recdata);
1716 : :
13 heikki.linnakangas@i 1717 : 8 : *PreparedXactOldestMemberMXactIdSlot(dummyProcNumber) = oldestMember;
5956 1718 : 8 : }
1719 : :
1720 : : /*
1721 : : * multixact_twophase_postcommit
1722 : : * Similar to AtEOXact_MultiXact but for COMMIT PREPARED
1723 : : */
1724 : : void
251 michael@paquier.xyz 1725 :GNC 72 : multixact_twophase_postcommit(FullTransactionId fxid, uint16 info,
1726 : : void *recdata, uint32 len)
1727 : : {
1728 : 72 : ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, true);
1729 : :
5956 heikki.linnakangas@i 1730 [ - + ]:CBC 72 : Assert(len == sizeof(MultiXactId));
1731 : :
13 1732 : 72 : *PreparedXactOldestMemberMXactIdSlot(dummyProcNumber) = InvalidMultiXactId;
5956 1733 : 72 : }
1734 : :
1735 : : /*
1736 : : * multixact_twophase_postabort
1737 : : * This is actually just the same as the COMMIT case.
1738 : : */
1739 : : void
251 michael@paquier.xyz 1740 :GNC 30 : multixact_twophase_postabort(FullTransactionId fxid, uint16 info,
1741 : : void *recdata, uint32 len)
1742 : : {
1743 : 30 : multixact_twophase_postcommit(fxid, info, recdata, len);
5956 heikki.linnakangas@i 1744 :CBC 30 : }
1745 : :
1746 : : /*
1747 : : * Initialization of shared memory for MultiXact.
1748 : : *
1749 : : * MultiXactSharedStateShmemSize() calculates the size of the MultiXactState
1750 : : * struct, and the two per-backend MultiXactId arrays. They are carved out of
1751 : : * the same allocation. MultiXactShmemSize() additionally includes the memory
1752 : : * needed for the two SLRU areas.
1753 : : */
1754 : : static Size
13 1755 : 4447 : MultiXactSharedStateShmemSize(void)
1756 : : {
1757 : : Size size;
1758 : :
1759 : 4447 : size = offsetof(MultiXactStateData, perBackendXactIds);
1760 : 4447 : size = add_size(size,
1761 : 4447 : mul_size(sizeof(MultiXactId), NumMemberSlots));
1762 : 4447 : size = add_size(size,
1763 : : mul_size(sizeof(MultiXactId), NumVisibleSlots));
1764 : 4447 : return size;
1765 : : }
1766 : :
1767 : : Size
7626 tgl@sss.pgh.pa.us 1768 : 2147 : MultiXactShmemSize(void)
1769 : : {
1770 : : Size size;
1771 : :
13 heikki.linnakangas@i 1772 : 2147 : size = MultiXactSharedStateShmemSize();
746 alvherre@alvh.no-ip. 1773 : 2147 : size = add_size(size, SimpleLruShmemSize(multixact_offset_buffers, 0));
1774 : 2147 : size = add_size(size, SimpleLruShmemSize(multixact_member_buffers, 0));
1775 : :
7512 tgl@sss.pgh.pa.us 1776 : 2147 : return size;
1777 : : }
1778 : :
1779 : : void
7626 1780 : 1150 : MultiXactShmemInit(void)
1781 : : {
1782 : : bool found;
1783 : :
1784 : : debug_elog2(DEBUG2, "Shared Memory Init for MultiXact");
1785 : :
1786 : 1150 : MultiXactOffsetCtl->PagePrecedes = MultiXactOffsetPagePrecedes;
1787 : 1150 : MultiXactMemberCtl->PagePrecedes = MultiXactMemberPagePrecedes;
2 heikki.linnakangas@i 1788 :GNC 1150 : MultiXactOffsetCtl->errdetail_for_io_error = MultiXactOffsetIoErrorDetail;
1789 : 1150 : MultiXactMemberCtl->errdetail_for_io_error = MultiXactMemberIoErrorDetail;
1790 : :
7404 tgl@sss.pgh.pa.us 1791 :CBC 1150 : SimpleLruInit(MultiXactOffsetCtl,
1792 : : "multixact_offset", multixact_offset_buffers, 0,
1793 : : "pg_multixact/offsets", LWTRANCHE_MULTIXACTOFFSET_BUFFER,
1794 : : LWTRANCHE_MULTIXACTOFFSET_SLRU,
1795 : : SYNC_HANDLER_MULTIXACT_OFFSET,
1796 : : false);
1884 noah@leadboat.com 1797 : 1150 : SlruPagePrecedesUnitTests(MultiXactOffsetCtl, MULTIXACT_OFFSETS_PER_PAGE);
7404 tgl@sss.pgh.pa.us 1798 : 1150 : SimpleLruInit(MultiXactMemberCtl,
1799 : : "multixact_member", multixact_member_buffers, 0,
1800 : : "pg_multixact/members", LWTRANCHE_MULTIXACTMEMBER_BUFFER,
1801 : : LWTRANCHE_MULTIXACTMEMBER_SLRU,
1802 : : SYNC_HANDLER_MULTIXACT_MEMBER,
1803 : : true);
1804 : : /* doesn't call SimpleLruTruncate() or meet criteria for unit tests */
1805 : :
1806 : : /* Initialize our shared state struct */
7626 1807 : 1150 : MultiXactState = ShmemInitStruct("Shared MultiXact State",
1808 : : MultiXactSharedStateShmemSize(),
1809 : : &found);
1810 [ + - ]: 1150 : if (!IsUnderPostmaster)
1811 : : {
1812 [ - + ]: 1150 : Assert(!found);
1813 : :
1814 : : /* Make sure we zero out the per-backend state */
13 heikki.linnakangas@i 1815 [ + - + + : 18921 : MemSet(MultiXactState, 0, MultiXactSharedStateShmemSize());
+ - + + +
+ ]
1816 : : }
1817 : : else
7626 tgl@sss.pgh.pa.us 1818 [ # # ]:UBC 0 : Assert(found);
1819 : :
1820 : : /*
1821 : : * Set up array pointers.
1822 : : */
7626 tgl@sss.pgh.pa.us 1823 :CBC 1150 : OldestMemberMXactId = MultiXactState->perBackendXactIds;
13 heikki.linnakangas@i 1824 : 1150 : OldestVisibleMXactId = OldestMemberMXactId + NumMemberSlots;
7626 tgl@sss.pgh.pa.us 1825 : 1150 : }
1826 : :
1827 : : /*
1828 : : * GUC check_hook for multixact_offset_buffers
1829 : : */
1830 : : bool
746 alvherre@alvh.no-ip. 1831 : 1184 : check_multixact_offset_buffers(int *newval, void **extra, GucSource source)
1832 : : {
1833 : 1184 : return check_slru_buffers("multixact_offset_buffers", newval);
1834 : : }
1835 : :
1836 : : /*
1837 : : * GUC check_hook for multixact_member_buffers
1838 : : */
1839 : : bool
1840 : 1184 : check_multixact_member_buffers(int *newval, void **extra, GucSource source)
1841 : : {
1842 : 1184 : return check_slru_buffers("multixact_member_buffers", newval);
1843 : : }
1844 : :
1845 : : /*
1846 : : * This func must be called ONCE on system install. It creates the initial
1847 : : * MultiXact segments. (The MultiXacts directories are assumed to have been
1848 : : * created by initdb, and MultiXactShmemInit must have been called already.)
1849 : : */
1850 : : void
7626 tgl@sss.pgh.pa.us 1851 : 51 : BootStrapMultiXact(void)
1852 : : {
1853 : : /* Zero the initial pages and flush them to disk */
251 alvherre@kurilemu.de 1854 :GNC 51 : SimpleLruZeroAndWritePage(MultiXactOffsetCtl, 0);
1855 : 51 : SimpleLruZeroAndWritePage(MultiXactMemberCtl, 0);
7626 tgl@sss.pgh.pa.us 1856 :CBC 51 : }
1857 : :
1858 : : /*
1859 : : * This must be called ONCE during postmaster or standalone-backend startup.
1860 : : *
1861 : : * StartupXLOG has already established nextMXact/nextOffset by calling
1862 : : * MultiXactSetNextMXact and/or MultiXactAdvanceNextMXact, and the oldestMulti
1863 : : * info from pg_control and/or MultiXactAdvanceOldest, but we haven't yet
1864 : : * replayed WAL.
1865 : : */
1866 : : void
1867 : 1000 : StartupMultiXact(void)
1868 : : {
4489 alvherre@alvh.no-ip. 1869 : 1000 : MultiXactId multi = MultiXactState->nextMXact;
1870 : 1000 : MultiXactOffset offset = MultiXactState->nextOffset;
1871 : : int64 pageno;
1872 : :
1873 : : /*
1874 : : * Initialize offset's idea of the latest page number.
1875 : : */
1876 : 1000 : pageno = MultiXactIdToOffsetPage(multi);
768 1877 : 1000 : pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
1878 : : pageno);
1879 : :
1880 : : /*
1881 : : * Initialize member's idea of the latest page number.
1882 : : */
4489 1883 : 1000 : pageno = MXOffsetToMemberPage(offset);
768 1884 : 1000 : pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number,
1885 : : pageno);
4489 1886 : 1000 : }
1887 : :
1888 : : /*
1889 : : * This must be called ONCE at the end of startup/recovery.
1890 : : */
1891 : : void
1892 : 939 : TrimMultiXact(void)
1893 : : {
1894 : : MultiXactId nextMXact;
1895 : : MultiXactOffset offset;
1896 : : MultiXactId oldestMXact;
1897 : : Oid oldestMXactDB;
1898 : : int64 pageno;
1899 : : int entryno;
1900 : : int flagsoff;
1901 : :
3823 andres@anarazel.de 1902 : 939 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
1903 : 939 : nextMXact = MultiXactState->nextMXact;
1904 : 939 : offset = MultiXactState->nextOffset;
1905 : 939 : oldestMXact = MultiXactState->oldestMultiXactId;
1906 : 939 : oldestMXactDB = MultiXactState->oldestMultiXactDB;
1907 : 939 : LWLockRelease(MultiXactGenLock);
1908 : :
1909 : : /* Clean up offsets state */
1910 : :
1911 : : /*
1912 : : * (Re-)Initialize our idea of the latest page number for offsets.
1913 : : */
1914 : 939 : pageno = MultiXactIdToOffsetPage(nextMXact);
768 alvherre@alvh.no-ip. 1915 : 939 : pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
1916 : : pageno);
1917 : :
1918 : : /*
1919 : : * Set the offset of nextMXact on the offsets page. This is normally done
1920 : : * in RecordNewMultiXact() of the previous multixact, but let's be sure
1921 : : * the next page exists, if the nextMXact was reset with pg_resetwal for
1922 : : * example.
1923 : : *
1924 : : * Zero out the remainder of the page. See notes in TrimCLOG() for
1925 : : * background. Unlike CLOG, some WAL record covers every pg_multixact
1926 : : * SLRU mutation. Since, also unlike CLOG, we ignore the WAL rule "write
1927 : : * xlog before data," nextMXact successors may carry obsolete, nonzero
1928 : : * offset values.
1929 : : */
3823 andres@anarazel.de 1930 : 939 : entryno = MultiXactIdToOffsetEntry(nextMXact);
1931 : : {
1932 : : int slotno;
1933 : : MultiXactOffset *offptr;
746 alvherre@alvh.no-ip. 1934 : 939 : LWLock *lock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
1935 : :
1936 : 939 : LWLockAcquire(lock, LW_EXCLUSIVE);
93 heikki.linnakangas@i 1937 [ + + + + ]:GNC 939 : if (entryno == 0 || nextMXact == FirstMultiXactId)
102 heikki.linnakangas@i 1938 :CBC 921 : slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
1939 : : else
2 heikki.linnakangas@i 1940 :GNC 18 : slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, &nextMXact);
7585 tgl@sss.pgh.pa.us 1941 :CBC 939 : offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
1942 : 939 : offptr += entryno;
1943 : :
102 heikki.linnakangas@i 1944 : 939 : *offptr = offset;
1945 [ + + + - ]: 939 : if (entryno != 0 && (entryno + 1) * sizeof(MultiXactOffset) != BLCKSZ)
1946 [ + - + - : 1560 : MemSet(offptr + 1, 0, BLCKSZ - (entryno + 1) * sizeof(MultiXactOffset));
+ - + + +
+ ]
1947 : :
7435 tgl@sss.pgh.pa.us 1948 : 939 : MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
746 alvherre@alvh.no-ip. 1949 : 939 : LWLockRelease(lock);
1950 : : }
1951 : :
1952 : : /*
1953 : : * And the same for members.
1954 : : *
1955 : : * (Re-)Initialize our idea of the latest page number for members.
1956 : : */
7585 tgl@sss.pgh.pa.us 1957 : 939 : pageno = MXOffsetToMemberPage(offset);
768 alvherre@alvh.no-ip. 1958 : 939 : pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number,
1959 : : pageno);
1960 : :
1961 : : /*
1962 : : * Zero out the remainder of the current members page. See notes in
1963 : : * TrimCLOG() for motivation.
1964 : : */
4799 1965 : 939 : flagsoff = MXOffsetToFlagsOffset(offset);
1966 [ + + ]: 939 : if (flagsoff != 0)
1967 : : {
2 heikki.linnakangas@i 1968 :GNC 17 : MultiXactMemberSlruReadContext slru_read_context = {InvalidMultiXactId, offset};
1969 : : int slotno;
1970 : : TransactionId *xidptr;
1971 : : int memberoff;
746 alvherre@alvh.no-ip. 1972 :CBC 17 : LWLock *lock = SimpleLruGetBankLock(MultiXactMemberCtl, pageno);
1973 : :
1974 : 17 : LWLockAcquire(lock, LW_EXCLUSIVE);
4799 1975 : 17 : memberoff = MXOffsetToMemberOffset(offset);
2 heikki.linnakangas@i 1976 :GNC 17 : slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, &slru_read_context);
4799 alvherre@alvh.no-ip. 1977 :CBC 17 : xidptr = (TransactionId *)
1978 : 17 : (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
1979 : :
1980 [ + + + - : 17 : MemSet(xidptr, 0, BLCKSZ - memberoff);
+ - - + -
- ]
1981 : :
1982 : : /*
1983 : : * Note: we don't need to zero out the flag bits in the remaining
1984 : : * members of the current group, because they are always reset before
1985 : : * writing.
1986 : : */
1987 : :
7435 tgl@sss.pgh.pa.us 1988 : 17 : MultiXactMemberCtl->shared->page_dirty[slotno] = true;
746 alvherre@alvh.no-ip. 1989 : 17 : LWLockRelease(lock);
1990 : : }
1991 : :
1992 : : /* signal that we're officially up */
3823 andres@anarazel.de 1993 : 939 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
1994 : 939 : MultiXactState->finishedStartup = true;
3936 rhaas@postgresql.org 1995 : 939 : LWLockRelease(MultiXactGenLock);
1996 : :
1997 : : /* Now compute how far away the next multixid wraparound is. */
96 heikki.linnakangas@i 1998 :GNC 939 : SetMultiXactIdLimit(oldestMXact, oldestMXactDB);
7626 tgl@sss.pgh.pa.us 1999 :CBC 939 : }
2000 : :
2001 : : /*
2002 : : * Get the MultiXact data to save in a checkpoint record
2003 : : */
2004 : : void
7585 2005 : 1598 : MultiXactGetCheckptMulti(bool is_shutdown,
2006 : : MultiXactId *nextMulti,
2007 : : MultiXactOffset *nextMultiOffset,
2008 : : MultiXactId *oldestMulti,
2009 : : Oid *oldestMultiDB)
2010 : : {
7626 2011 : 1598 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
7585 2012 : 1598 : *nextMulti = MultiXactState->nextMXact;
2013 : 1598 : *nextMultiOffset = MultiXactState->nextOffset;
4799 alvherre@alvh.no-ip. 2014 : 1598 : *oldestMulti = MultiXactState->oldestMultiXactId;
2015 : 1598 : *oldestMultiDB = MultiXactState->oldestMultiXactDB;
7626 tgl@sss.pgh.pa.us 2016 : 1598 : LWLockRelease(MultiXactGenLock);
2017 : :
2018 : : debug_elog6(DEBUG2,
2019 : : "MultiXact: checkpoint is nextMulti %u, nextOffset %" PRIu64 ", oldestMulti %u in DB %u",
2020 : : *nextMulti, *nextMultiOffset, *oldestMulti, *oldestMultiDB);
2021 : 1598 : }
2022 : :
2023 : : /*
2024 : : * Perform a checkpoint --- either during shutdown, or on-the-fly
2025 : : */
2026 : : void
2027 : 1802 : CheckPointMultiXact(void)
2028 : : {
2029 : : TRACE_POSTGRESQL_MULTIXACT_CHECKPOINT_START(true);
2030 : :
2031 : : /*
2032 : : * Write dirty MultiXact pages to disk. This may result in sync requests
2033 : : * queued for later handling by ProcessSyncRequests(), as part of the
2034 : : * checkpoint.
2035 : : */
1997 tmunro@postgresql.or 2036 : 1802 : SimpleLruWriteAll(MultiXactOffsetCtl, true);
2037 : 1802 : SimpleLruWriteAll(MultiXactMemberCtl, true);
2038 : :
2039 : : TRACE_POSTGRESQL_MULTIXACT_CHECKPOINT_DONE(true);
7626 tgl@sss.pgh.pa.us 2040 : 1802 : }
2041 : :
2042 : : /*
2043 : : * Set the next-to-be-assigned MultiXactId and offset
2044 : : *
2045 : : * This is used when we can determine the correct next ID/offset exactly
2046 : : * from a checkpoint record. Although this is only called during bootstrap
2047 : : * and XLog replay, we take the lock in case any hot-standby backends are
2048 : : * examining the values.
2049 : : */
2050 : : void
7585 2051 : 1095 : MultiXactSetNextMXact(MultiXactId nextMulti,
2052 : : MultiXactOffset nextMultiOffset)
2053 : : {
93 heikki.linnakangas@i 2054 [ - + ]:GNC 1095 : Assert(MultiXactIdIsValid(nextMulti));
2055 : : debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %" PRIu64,
2056 : : nextMulti, nextMultiOffset);
2057 : :
5151 tgl@sss.pgh.pa.us 2058 :CBC 1095 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
7626 2059 : 1095 : MultiXactState->nextMXact = nextMulti;
7585 2060 : 1095 : MultiXactState->nextOffset = nextMultiOffset;
5151 2061 : 1095 : LWLockRelease(MultiXactGenLock);
7626 2062 : 1095 : }
2063 : :
2064 : : /*
2065 : : * Determine the last safe MultiXactId to allocate given the currently oldest
2066 : : * datminmxid (ie, the oldest MultiXactId that might exist in any database
2067 : : * of our cluster), and the OID of the (or a) database with that value.
2068 : : *
2069 : : * This also updates MultiXactState->oldestOffset, by looking up the offset of
2070 : : * MultiXactState->oldestMultiXactId.
2071 : : */
2072 : : void
96 heikki.linnakangas@i 2073 :GNC 2095 : SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
2074 : : {
2075 : : MultiXactId multiVacLimit;
2076 : : MultiXactId multiWarnLimit;
2077 : : MultiXactId multiStopLimit;
2078 : : MultiXactId multiWrapLimit;
2079 : : MultiXactId curMulti;
2080 : :
4799 alvherre@alvh.no-ip. 2081 [ - + ]:CBC 2095 : Assert(MultiXactIdIsValid(oldest_datminmxid));
2082 : :
2083 : : /*
2084 : : * We pretend that a wrap will happen halfway through the multixact ID
2085 : : * space, but that's not really true, because multixacts wrap differently
2086 : : * from transaction IDs.
2087 : : */
2088 : 2095 : multiWrapLimit = oldest_datminmxid + (MaxMultiXactId >> 1);
2089 [ - + ]: 2095 : if (multiWrapLimit < FirstMultiXactId)
4799 alvherre@alvh.no-ip. 2090 :UBC 0 : multiWrapLimit += FirstMultiXactId;
2091 : :
2092 : : /*
2093 : : * We'll refuse to continue assigning MultiXactIds once we get within 3M
2094 : : * multi of data loss. See SetTransactionIdLimit.
2095 : : */
2052 noah@leadboat.com 2096 :CBC 2095 : multiStopLimit = multiWrapLimit - 3000000;
4799 alvherre@alvh.no-ip. 2097 [ - + ]: 2095 : if (multiStopLimit < FirstMultiXactId)
4799 alvherre@alvh.no-ip. 2098 :UBC 0 : multiStopLimit -= FirstMultiXactId;
2099 : :
2100 : : /*
2101 : : * We'll start complaining loudly when we get within 40M multis of data
2102 : : * loss. This is kind of arbitrary, but if you let your gas gauge get
2103 : : * down to 2% of full, would you be looking for the next gas station? We
2104 : : * need to be fairly liberal about this number because there are lots of
2105 : : * scenarios where most transactions are done by automatic clients that
2106 : : * won't pay attention to warnings. (No, we're not gonna make this
2107 : : * configurable. If you know enough to configure it, you know enough to
2108 : : * not get in this kind of trouble in the first place.)
2109 : : */
2052 noah@leadboat.com 2110 :CBC 2095 : multiWarnLimit = multiWrapLimit - 40000000;
4799 alvherre@alvh.no-ip. 2111 [ - + ]: 2095 : if (multiWarnLimit < FirstMultiXactId)
4799 alvherre@alvh.no-ip. 2112 :UBC 0 : multiWarnLimit -= FirstMultiXactId;
2113 : :
2114 : : /*
2115 : : * We'll start trying to force autovacuums when oldest_datminmxid gets to
2116 : : * be more than autovacuum_multixact_freeze_max_age mxids old.
2117 : : *
2118 : : * Note: autovacuum_multixact_freeze_max_age is a PGC_POSTMASTER parameter
2119 : : * so that we don't have to worry about dealing with on-the-fly changes in
2120 : : * its value. See SetTransactionIdLimit.
2121 : : */
4413 alvherre@alvh.no-ip. 2122 :CBC 2095 : multiVacLimit = oldest_datminmxid + autovacuum_multixact_freeze_max_age;
4799 2123 [ - + ]: 2095 : if (multiVacLimit < FirstMultiXactId)
4799 alvherre@alvh.no-ip. 2124 :UBC 0 : multiVacLimit += FirstMultiXactId;
2125 : :
2126 : : /* Grab lock for just long enough to set the new limit values */
4799 alvherre@alvh.no-ip. 2127 :CBC 2095 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
2128 : 2095 : MultiXactState->oldestMultiXactId = oldest_datminmxid;
2129 : 2095 : MultiXactState->oldestMultiXactDB = oldest_datoid;
2130 : 2095 : MultiXactState->multiVacLimit = multiVacLimit;
2131 : 2095 : MultiXactState->multiWarnLimit = multiWarnLimit;
2132 : 2095 : MultiXactState->multiStopLimit = multiStopLimit;
2133 : 2095 : MultiXactState->multiWrapLimit = multiWrapLimit;
2134 : 2095 : curMulti = MultiXactState->nextMXact;
2135 : 2095 : LWLockRelease(MultiXactGenLock);
2136 : :
2137 : : /* Log the info */
2138 [ + + ]: 2095 : ereport(DEBUG1,
2139 : : (errmsg_internal("MultiXactId wrap limit is %u, limited by database with OID %u",
2140 : : multiWrapLimit, oldest_datoid)));
2141 : :
2142 : : /*
2143 : : * Computing the actual limits is only possible once the data directory is
2144 : : * in a consistent state. There's no need to compute the limits while
2145 : : * still replaying WAL - no decisions about new multis are made even
2146 : : * though multixact creations might be replayed. So we'll only do further
2147 : : * checks after TrimMultiXact() has been called.
2148 : : */
3823 andres@anarazel.de 2149 [ + + ]: 2095 : if (!MultiXactState->finishedStartup)
2150 : 1053 : return;
2151 : :
2152 [ - + ]: 1042 : Assert(!InRecovery);
2153 : :
2154 : : /*
2155 : : * Offsets are 64-bits wide and never wrap around, so we don't need to
2156 : : * consider them for emergency autovacuum purposes. But now that we're in
2157 : : * a consistent state, determine MultiXactState->oldestOffset. It will be
2158 : : * used to adjust the freezing cutoff, to keep the offsets disk usage in
2159 : : * check.
2160 : : */
96 heikki.linnakangas@i 2161 :GNC 1042 : SetOldestOffset();
2162 : :
2163 : : /*
2164 : : * If past the autovacuum force point, immediately signal an autovac
2165 : : * request. The reason for this is that autovac only processes one
2166 : : * database per invocation. Once it's finished cleaning up the oldest
2167 : : * database, it'll call here, and we'll signal the postmaster to start
2168 : : * another iteration immediately if there are still any old databases.
2169 : : */
2170 [ - + - - ]: 1042 : if (MultiXactIdPrecedes(multiVacLimit, curMulti) && IsUnderPostmaster)
4799 alvherre@alvh.no-ip. 2171 :UBC 0 : SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
2172 : :
2173 : : /* Give an immediate warning if past the wrap warn point */
3823 andres@anarazel.de 2174 [ - + ]:CBC 1042 : if (MultiXactIdPrecedes(multiWarnLimit, curMulti))
2175 : : {
2176 : : char *oldest_datname;
2177 : :
2178 : : /*
2179 : : * We can be called when not inside a transaction, for example during
2180 : : * StartupXLOG(). In such a case we cannot do database access, so we
2181 : : * must just report the oldest DB's OID.
2182 : : *
2183 : : * Note: it's also possible that get_database_name fails and returns
2184 : : * NULL, for example because the database just got dropped. We'll
2185 : : * still warn, even though the warning might now be unnecessary.
2186 : : */
4799 alvherre@alvh.no-ip. 2187 [ # # ]:UBC 0 : if (IsTransactionState())
2188 : 0 : oldest_datname = get_database_name(oldest_datoid);
2189 : : else
2190 : 0 : oldest_datname = NULL;
2191 : :
2192 [ # # ]: 0 : if (oldest_datname)
2193 [ # # ]: 0 : ereport(WARNING,
2194 : : (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
2195 : : "database \"%s\" must be vacuumed before %u more MultiXactIds are used",
2196 : : multiWrapLimit - curMulti,
2197 : : oldest_datname,
2198 : : multiWrapLimit - curMulti),
2199 : : errhint("To avoid MultiXactId assignment failures, execute a database-wide VACUUM in that database.\n"
2200 : : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
2201 : : else
2202 [ # # ]: 0 : ereport(WARNING,
2203 : : (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
2204 : : "database with OID %u must be vacuumed before %u more MultiXactIds are used",
2205 : : multiWrapLimit - curMulti,
2206 : : oldest_datoid,
2207 : : multiWrapLimit - curMulti),
2208 : : errhint("To avoid MultiXactId assignment failures, execute a database-wide VACUUM in that database.\n"
2209 : : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
2210 : : }
2211 : : }
2212 : :
2213 : : /*
2214 : : * Ensure the next-to-be-assigned MultiXactId is at least minMulti,
2215 : : * and similarly nextOffset is at least minMultiOffset.
2216 : : *
2217 : : * This is used when we can determine minimum safe values from an XLog
2218 : : * record (either an on-line checkpoint or an mxact creation log entry).
2219 : : * Although this is only called during XLog replay, we take the lock in case
2220 : : * any hot-standby backends are examining the values.
2221 : : */
2222 : : void
7585 tgl@sss.pgh.pa.us 2223 :CBC 697 : MultiXactAdvanceNextMXact(MultiXactId minMulti,
2224 : : MultiXactOffset minMultiOffset)
2225 : : {
93 heikki.linnakangas@i 2226 [ - + ]:GNC 697 : Assert(MultiXactIdIsValid(minMulti));
2227 : :
5151 tgl@sss.pgh.pa.us 2228 :CBC 697 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
7626 2229 [ + + ]: 697 : if (MultiXactIdPrecedes(MultiXactState->nextMXact, minMulti))
2230 : : {
2231 : : debug_elog3(DEBUG2, "MultiXact: setting next multi to %u", minMulti);
2232 : 5 : MultiXactState->nextMXact = minMulti;
2233 : : }
96 heikki.linnakangas@i 2234 [ + + ]:GNC 697 : if (MultiXactState->nextOffset < minMultiOffset)
2235 : : {
2236 : : debug_elog3(DEBUG2, "MultiXact: setting next offset to %" PRIu64,
2237 : : minMultiOffset);
7585 tgl@sss.pgh.pa.us 2238 :CBC 5 : MultiXactState->nextOffset = minMultiOffset;
2239 : : }
5151 2240 : 697 : LWLockRelease(MultiXactGenLock);
7626 2241 : 697 : }
2242 : :
2243 : : /*
2244 : : * Update our oldestMultiXactId value, but only if it's more recent than what
2245 : : * we had.
2246 : : *
2247 : : * This may only be called during WAL replay.
2248 : : */
2249 : : void
4799 alvherre@alvh.no-ip. 2250 : 734 : MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
2251 : : {
3823 andres@anarazel.de 2252 [ - + ]: 734 : Assert(InRecovery);
2253 : :
4799 alvherre@alvh.no-ip. 2254 [ - + ]: 734 : if (MultiXactIdPrecedes(MultiXactState->oldestMultiXactId, oldestMulti))
96 heikki.linnakangas@i 2255 :UNC 0 : SetMultiXactIdLimit(oldestMulti, oldestMultiDB);
4279 alvherre@alvh.no-ip. 2256 :CBC 734 : }
2257 : :
2258 : : /*
2259 : : * Make sure that MultiXactOffset has room for a newly-allocated MultiXactId.
2260 : : *
2261 : : * NB: this is called while holding MultiXactGenLock. We want it to be very
2262 : : * fast most of the time; even when it's not so fast, no actual I/O need
2263 : : * happen unless we're forced to write out a dirty log or xlog page to make
2264 : : * room in shared memory.
2265 : : */
2266 : : static void
7626 tgl@sss.pgh.pa.us 2267 : 5306 : ExtendMultiXactOffset(MultiXactId multi)
2268 : : {
2269 : : int64 pageno;
2270 : : LWLock *lock;
2271 : :
2272 : : /*
2273 : : * No work except at first MultiXactId of a page. But beware: just after
2274 : : * wraparound, the first MultiXactId of page zero is FirstMultiXactId.
2275 : : */
2276 [ + + + + ]: 5306 : if (MultiXactIdToOffsetEntry(multi) != 0 &&
2277 : : multi != FirstMultiXactId)
2278 : 5301 : return;
2279 : :
7626 tgl@sss.pgh.pa.us 2280 :GBC 5 : pageno = MultiXactIdToOffsetPage(multi);
746 alvherre@alvh.no-ip. 2281 : 5 : lock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
2282 : :
2283 : 5 : LWLockAcquire(lock, LW_EXCLUSIVE);
2284 : :
2285 : : /* Zero the page and make a WAL entry about it */
251 alvherre@kurilemu.de 2286 :GNC 5 : SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
2287 : 5 : XLogSimpleInsertInt64(RM_MULTIXACT_ID, XLOG_MULTIXACT_ZERO_OFF_PAGE,
2288 : : pageno);
2289 : :
746 alvherre@alvh.no-ip. 2290 :GBC 5 : LWLockRelease(lock);
2291 : : }
2292 : :
2293 : : /*
2294 : : * Make sure that MultiXactMember has room for the members of a newly-
2295 : : * allocated MultiXactId.
2296 : : *
2297 : : * Like the above routine, this is called while holding MultiXactGenLock;
2298 : : * same comments apply.
2299 : : */
2300 : : static void
7585 tgl@sss.pgh.pa.us 2301 :CBC 5306 : ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
2302 : : {
2303 : : /*
2304 : : * It's possible that the members span more than one page of the members
2305 : : * file, so we loop to ensure we consider each page. The coding is not
2306 : : * optimal if the members span several pages, but that seems unusual
2307 : : * enough to not worry much about.
2308 : : */
2309 [ + + ]: 10666 : while (nmembers > 0)
2310 : : {
2311 : : int flagsoff;
2312 : : int flagsbit;
2313 : : uint32 difference;
2314 : :
2315 : : /*
2316 : : * Only zero when at first entry of a page.
2317 : : */
4799 alvherre@alvh.no-ip. 2318 : 5360 : flagsoff = MXOffsetToFlagsOffset(offset);
2319 : 5360 : flagsbit = MXOffsetToFlagsBitShift(offset);
2320 [ + + + + ]: 5360 : if (flagsoff == 0 && flagsbit == 0)
2321 : : {
2322 : : int64 pageno;
2323 : : LWLock *lock;
2324 : :
7585 tgl@sss.pgh.pa.us 2325 : 57 : pageno = MXOffsetToMemberPage(offset);
746 alvherre@alvh.no-ip. 2326 : 57 : lock = SimpleLruGetBankLock(MultiXactMemberCtl, pageno);
2327 : :
2328 : 57 : LWLockAcquire(lock, LW_EXCLUSIVE);
2329 : :
2330 : : /* Zero the page and make a WAL entry about it */
251 alvherre@kurilemu.de 2331 :GNC 57 : SimpleLruZeroPage(MultiXactMemberCtl, pageno);
2332 : 57 : XLogSimpleInsertInt64(RM_MULTIXACT_ID,
2333 : : XLOG_MULTIXACT_ZERO_MEM_PAGE, pageno);
2334 : :
746 alvherre@alvh.no-ip. 2335 :CBC 57 : LWLockRelease(lock);
2336 : : }
2337 : :
2338 : : /* Compute the number of items till end of current page. */
96 heikki.linnakangas@i 2339 :GNC 5360 : difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE;
2340 : :
2341 : : /*
2342 : : * Advance to next page. OK if nmembers goes negative.
2343 : : */
4297 alvherre@alvh.no-ip. 2344 :CBC 5360 : nmembers -= difference;
2345 : 5360 : offset += difference;
2346 : : }
7626 tgl@sss.pgh.pa.us 2347 : 5306 : }
2348 : :
2349 : : /*
2350 : : * GetOldestMultiXactId
2351 : : *
2352 : : * Return the oldest MultiXactId that's still possibly still seen as live by
2353 : : * any running transaction. Older ones might still exist on disk, but they no
2354 : : * longer have any running member transaction.
2355 : : *
2356 : : * It's not safe to truncate MultiXact SLRU segments on the value returned by
2357 : : * this function; however, it can be set as the new relminmxid for any table
2358 : : * that VACUUM knows has no remaining MXIDs < the same value. It is only safe
2359 : : * to truncate SLRUs when no table can possibly still have a referencing MXID.
2360 : : */
2361 : : MultiXactId
4799 alvherre@alvh.no-ip. 2362 : 49036 : GetOldestMultiXactId(void)
2363 : : {
2364 : : MultiXactId oldestMXact;
2365 : :
2366 : : /*
2367 : : * This is the oldest valid value among all the OldestMemberMXactId[] and
2368 : : * OldestVisibleMXactId[] entries, or nextMXact if none are valid.
2369 : : */
7626 tgl@sss.pgh.pa.us 2370 : 49036 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
93 heikki.linnakangas@i 2371 :GNC 49036 : oldestMXact = MultiXactState->nextMXact;
13 heikki.linnakangas@i 2372 [ + + ]:CBC 5865888 : for (int i = 0; i < NumMemberSlots; i++)
2373 : : {
2374 : : MultiXactId thisoldest;
2375 : :
7626 tgl@sss.pgh.pa.us 2376 : 5816852 : thisoldest = OldestMemberMXactId[i];
2377 [ + + + + ]: 5846313 : if (MultiXactIdIsValid(thisoldest) &&
2378 : 29461 : MultiXactIdPrecedes(thisoldest, oldestMXact))
2379 : 55 : oldestMXact = thisoldest;
2380 : : }
13 heikki.linnakangas@i 2381 [ + + ]: 5762727 : for (int i = 0; i < NumVisibleSlots; i++)
2382 : : {
2383 : : MultiXactId thisoldest;
2384 : :
7626 tgl@sss.pgh.pa.us 2385 : 5713691 : thisoldest = OldestVisibleMXactId[i];
2386 [ + + + + ]: 5713703 : if (MultiXactIdIsValid(thisoldest) &&
2387 : 12 : MultiXactIdPrecedes(thisoldest, oldestMXact))
2388 : 2 : oldestMXact = thisoldest;
2389 : : }
2390 : :
2391 : 49036 : LWLockRelease(MultiXactGenLock);
2392 : :
4799 alvherre@alvh.no-ip. 2393 : 49036 : return oldestMXact;
2394 : : }
2395 : :
2396 : : /*
2397 : : * Calculate the oldest member offset and install it in MultiXactState, where
2398 : : * it can be used to adjust multixid freezing cutoffs.
2399 : : */
2400 : : static void
96 heikki.linnakangas@i 2401 :GNC 1042 : SetOldestOffset(void)
2402 : : {
2403 : : MultiXactId oldestMultiXactId;
2404 : : MultiXactId nextMXact;
3823 andres@anarazel.de 2405 :CBC 1042 : MultiXactOffset oldestOffset = 0; /* placate compiler */
2406 : : MultiXactOffset nextOffset;
3936 rhaas@postgresql.org 2407 : 1042 : bool oldestOffsetKnown = false;
2408 : :
2409 : : /*
2410 : : * NB: Have to prevent concurrent truncation, we might otherwise try to
2411 : : * lookup an oldestMulti that's concurrently getting truncated away.
2412 : : */
3823 andres@anarazel.de 2413 : 1042 : LWLockAcquire(MultiXactTruncationLock, LW_SHARED);
2414 : :
2415 : : /* Read relevant fields from shared memory. */
3936 rhaas@postgresql.org 2416 : 1042 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
2417 : 1042 : oldestMultiXactId = MultiXactState->oldestMultiXactId;
2418 : 1042 : nextMXact = MultiXactState->nextMXact;
2419 : 1042 : nextOffset = MultiXactState->nextOffset;
3823 andres@anarazel.de 2420 [ - + ]: 1042 : Assert(MultiXactState->finishedStartup);
3974 alvherre@alvh.no-ip. 2421 : 1042 : LWLockRelease(MultiXactGenLock);
2422 : :
2423 : : /*
2424 : : * Determine the offset of the oldest multixact. Normally, we can read
2425 : : * the offset from the multixact itself, but there's an important special
2426 : : * case: if there are no multixacts in existence at all, oldestMXact
2427 : : * obviously can't point to one. It will instead point to the multixact
2428 : : * ID that will be assigned the next time one is needed.
2429 : : */
3936 rhaas@postgresql.org 2430 [ + + ]: 1042 : if (oldestMultiXactId == nextMXact)
2431 : : {
2432 : : /*
2433 : : * When the next multixact gets created, it will be stored at the next
2434 : : * offset.
2435 : : */
2436 : 1022 : oldestOffset = nextOffset;
2437 : 1022 : oldestOffsetKnown = true;
2438 : : }
2439 : : else
2440 : : {
2441 : : /*
2442 : : * Look up the offset at which the oldest existing multixact's members
2443 : : * are stored. If we cannot find it, be careful not to fail, and
2444 : : * leave oldestOffset unchanged. oldestOffset is initialized to zero
2445 : : * at system startup, which prevents truncating members until a proper
2446 : : * value is calculated.
2447 : : *
2448 : : * (We had bugs in early releases of PostgreSQL 9.3.X and 9.4.X where
2449 : : * the supposedly-earliest multixact might not really exist. Those
2450 : : * should be long gone by now, so this should not fail, but let's
2451 : : * still be defensive.)
2452 : : */
2453 : : oldestOffsetKnown =
2454 : 20 : find_multixact_start(oldestMultiXactId, &oldestOffset);
2455 : :
2456 [ + - ]: 20 : if (oldestOffsetKnown)
2457 [ - + ]: 20 : ereport(DEBUG1,
2458 : : (errmsg_internal("oldest MultiXactId member is at offset %" PRIu64,
2459 : : oldestOffset)));
2460 : : else
3823 andres@anarazel.de 2461 [ # # ]:UBC 0 : ereport(LOG,
2462 : : (errmsg("MultiXact member truncation is disabled because oldest checkpointed MultiXact %u does not exist on disk",
2463 : : oldestMultiXactId)));
2464 : : }
2465 : :
3823 andres@anarazel.de 2466 :CBC 1042 : LWLockRelease(MultiXactTruncationLock);
2467 : :
2468 : : /* Install the computed value */
2469 [ + - ]: 1042 : if (oldestOffsetKnown)
2470 : : {
96 heikki.linnakangas@i 2471 :GNC 1042 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
2472 : 1042 : MultiXactState->oldestOffset = oldestOffset;
2473 : 1042 : LWLockRelease(MultiXactGenLock);
2474 : : }
3974 alvherre@alvh.no-ip. 2475 :GIC 1042 : }
2476 : :
2477 : : /*
2478 : : * Find the starting offset of the given MultiXactId.
2479 : : *
2480 : : * Returns false if the file containing the multi does not exist on disk.
2481 : : * Otherwise, returns true and sets *result to the starting member offset.
2482 : : *
2483 : : * This function does not prevent concurrent truncation, so if that's
2484 : : * required, the caller has to protect against that.
2485 : : */
2486 : : static bool
3936 rhaas@postgresql.org 2487 :CBC 20 : find_multixact_start(MultiXactId multi, MultiXactOffset *result)
2488 : : {
2489 : : MultiXactOffset offset;
2490 : : int64 pageno;
2491 : : int entryno;
2492 : : int slotno;
2493 : : MultiXactOffset *offptr;
2494 : :
3823 andres@anarazel.de 2495 [ - + ]: 20 : Assert(MultiXactState->finishedStartup);
2496 : :
3974 alvherre@alvh.no-ip. 2497 : 20 : pageno = MultiXactIdToOffsetPage(multi);
2498 : 20 : entryno = MultiXactIdToOffsetEntry(multi);
2499 : :
2500 : : /*
2501 : : * Write out dirty data, so PhysicalPageExists can work correctly.
2502 : : */
1997 tmunro@postgresql.or 2503 : 20 : SimpleLruWriteAll(MultiXactOffsetCtl, true);
2504 : 20 : SimpleLruWriteAll(MultiXactMemberCtl, true);
2505 : :
3936 rhaas@postgresql.org 2506 [ - + ]: 20 : if (!SimpleLruDoesPhysicalPageExist(MultiXactOffsetCtl, pageno))
3936 rhaas@postgresql.org 2507 :UBC 0 : return false;
2508 : :
2509 : : /* lock is acquired by SimpleLruReadPage_ReadOnly */
2 heikki.linnakangas@i 2510 :GNC 20 : slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno, &multi);
3974 alvherre@alvh.no-ip. 2511 :CBC 20 : offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
2512 : 20 : offptr += entryno;
2513 : 20 : offset = *offptr;
746 2514 : 20 : LWLockRelease(SimpleLruGetBankLock(MultiXactOffsetCtl, pageno));
2515 : :
3936 rhaas@postgresql.org 2516 : 20 : *result = offset;
2517 : 20 : return true;
2518 : : }
2519 : :
2520 : : /*
2521 : : * GetMultiXactInfo
2522 : : *
2523 : : * Returns information about the current MultiXact state, as of:
2524 : : * multixacts: Number of MultiXacts (nextMultiXactId - oldestMultiXactId)
2525 : : * nextOffset: Next-to-be-assigned offset
2526 : : * oldestMultiXactId: Oldest MultiXact ID still in use
2527 : : * oldestOffset: Oldest offset still in use
2528 : : */
2529 : : void
75 michael@paquier.xyz 2530 :GNC 14080 : GetMultiXactInfo(uint32 *multixacts, MultiXactOffset *nextOffset,
2531 : : MultiXactId *oldestMultiXactId, MultiXactOffset *oldestOffset)
2532 : : {
2533 : : MultiXactId nextMultiXactId;
2534 : :
3964 rhaas@postgresql.org 2535 :CBC 14080 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
75 michael@paquier.xyz 2536 :GNC 14080 : *nextOffset = MultiXactState->nextOffset;
208 2537 : 14080 : *oldestMultiXactId = MultiXactState->oldestMultiXactId;
3964 rhaas@postgresql.org 2538 :CBC 14080 : nextMultiXactId = MultiXactState->nextMXact;
208 michael@paquier.xyz 2539 :GNC 14080 : *oldestOffset = MultiXactState->oldestOffset;
3964 rhaas@postgresql.org 2540 :CBC 14080 : LWLockRelease(MultiXactGenLock);
2541 : :
208 michael@paquier.xyz 2542 :GNC 14080 : *multixacts = nextMultiXactId - *oldestMultiXactId;
3964 rhaas@postgresql.org 2543 :GIC 14080 : }
2544 : :
2545 : : /*
2546 : : * Multixact members can be removed once the multixacts that refer to them
2547 : : * are older than every datminmxid. autovacuum_multixact_freeze_max_age and
2548 : : * vacuum_multixact_freeze_table_age work together to make sure we never have
2549 : : * too many multixacts; we hope that, at least under normal circumstances,
2550 : : * this will also be sufficient to keep us from using too many offsets.
2551 : : * However, if the average multixact has many members, we might accumulate a
2552 : : * large amount of members, consuming disk space, while still using few enough
2553 : : * multixids that the multixid limits fail to trigger relminmxid advancement
2554 : : * by VACUUM.
2555 : : *
2556 : : * To prevent that, if the members space usage exceeds a threshold
2557 : : * (MULTIXACT_MEMBER_LOW_THRESHOLD), we effectively reduce
2558 : : * autovacuum_multixact_freeze_max_age to a value just less than the number of
2559 : : * multixacts in use. We hope that this will quickly trigger autovacuuming on
2560 : : * the table or tables with the oldest relminmxid, thus allowing datminmxid
2561 : : * values to advance and removing some members.
2562 : : *
2563 : : * As the amount of the member space in use grows, we become more aggressive
2564 : : * in clamping this value. That not only causes autovacuum to ramp up, but
2565 : : * also makes any manual vacuums the user issues more aggressive. This
2566 : : * happens because vacuum_get_cutoffs() will clamp the freeze table and the
2567 : : * minimum freeze age cutoffs based on the effective
2568 : : * autovacuum_multixact_freeze_max_age this function returns. At the extreme,
2569 : : * when the members usage reaches MULTIXACT_MEMBER_HIGH_THRESHOLD, we clamp
2570 : : * freeze_max_age to zero, and every vacuum of any table will freeze every
2571 : : * multixact.
2572 : : */
2573 : : int
3964 rhaas@postgresql.org 2574 :CBC 14068 : MultiXactMemberFreezeThreshold(void)
2575 : : {
2576 : : uint32 multixacts;
2577 : : uint32 victim_multixacts;
2578 : : double fraction;
2579 : : int result;
2580 : : MultiXactId oldestMultiXactId;
2581 : : MultiXactOffset oldestOffset;
2582 : : MultiXactOffset nextOffset;
2583 : : uint64 members;
2584 : :
2585 : : /* Read the current offsets and multixact usage. */
75 michael@paquier.xyz 2586 :GNC 14068 : GetMultiXactInfo(&multixacts, &nextOffset, &oldestMultiXactId, &oldestOffset);
2587 : 14068 : members = nextOffset - oldestOffset;
2588 : :
2589 : : /* If member space utilization is low, no special action is required. */
96 heikki.linnakangas@i 2590 [ + - ]: 14068 : if (members <= MULTIXACT_MEMBER_LOW_THRESHOLD)
3964 rhaas@postgresql.org 2591 :CBC 14068 : return autovacuum_multixact_freeze_max_age;
2592 : :
2593 : : /*
2594 : : * Compute a target for relminmxid advancement. The number of multixacts
2595 : : * we try to eliminate from the system is based on how far we are past
2596 : : * MULTIXACT_MEMBER_LOW_THRESHOLD.
2597 : : *
2598 : : * The way this formula works is that when members is exactly at the low
2599 : : * threshold, fraction = 0.0, and we set freeze_max_age equal to
2600 : : * mxid_age(oldestMultiXactId). As members grows further, towards the
2601 : : * high threshold, fraction grows linearly from 0.0 to 1.0, and the result
2602 : : * shrinks from mxid_age(oldestMultiXactId) to 0. Beyond the high
2603 : : * threshold, fraction > 1.0 and the result is clamped to 0.
2604 : : */
96 heikki.linnakangas@i 2605 :UNC 0 : fraction = (double) (members - MULTIXACT_MEMBER_LOW_THRESHOLD) /
2606 : : (MULTIXACT_MEMBER_HIGH_THRESHOLD - MULTIXACT_MEMBER_LOW_THRESHOLD);
2607 : :
2608 : : /* fraction could be > 1.0, but lowest possible freeze age is zero */
2609 [ # # ]: 0 : if (fraction >= 1.0)
3964 rhaas@postgresql.org 2610 :UBC 0 : return 0;
2611 : :
96 heikki.linnakangas@i 2612 :UNC 0 : victim_multixacts = multixacts * fraction;
640 heikki.linnakangas@i 2613 :UBC 0 : result = multixacts - victim_multixacts;
2614 : :
2615 : : /*
2616 : : * Clamp to autovacuum_multixact_freeze_max_age, so that we never make
2617 : : * autovacuum less aggressive than it would otherwise be.
2618 : : */
2619 : 0 : return Min(result, autovacuum_multixact_freeze_max_age);
2620 : : }
2621 : :
2622 : :
2623 : : /*
2624 : : * Delete members segments older than newOldestOffset
2625 : : */
2626 : : static void
59 heikki.linnakangas@i 2627 :UNC 0 : PerformMembersTruncation(MultiXactOffset newOldestOffset)
2628 : : {
96 2629 : 0 : SimpleLruTruncate(MultiXactMemberCtl,
2630 : : MXOffsetToMemberPage(newOldestOffset));
3823 andres@anarazel.de 2631 :UBC 0 : }
2632 : :
2633 : : /*
2634 : : * Delete offsets segments older than newOldestMulti
2635 : : */
2636 : : static void
59 heikki.linnakangas@i 2637 :UNC 0 : PerformOffsetsTruncation(MultiXactId newOldestMulti)
2638 : : {
2639 : : /*
2640 : : * We step back one multixact to avoid passing a cutoff page that hasn't
2641 : : * been created yet in the rare case that oldestMulti would be the first
2642 : : * item on a page and oldestMulti == nextMulti. In that case, if we
2643 : : * didn't subtract one, we'd trigger SimpleLruTruncate's wraparound
2644 : : * detection.
2645 : : */
3823 andres@anarazel.de 2646 :UBC 0 : SimpleLruTruncate(MultiXactOffsetCtl,
2647 : : MultiXactIdToOffsetPage(PreviousMultiXactId(newOldestMulti)));
2648 : 0 : }
2649 : :
2650 : : /*
2651 : : * Remove all MultiXactOffset and MultiXactMember segments before the oldest
2652 : : * ones still of interest.
2653 : : *
2654 : : * This is only called on a primary as part of vacuum (via
2655 : : * vac_truncate_clog()). During recovery truncation is done by replaying
2656 : : * truncation WAL records logged here.
2657 : : *
2658 : : * newOldestMulti is the oldest currently required multixact, newOldestMultiDB
2659 : : * is one of the databases preventing newOldestMulti from increasing.
2660 : : */
2661 : : void
3823 andres@anarazel.de 2662 :CBC 103 : TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
2663 : : {
2664 : : MultiXactId oldestMulti;
2665 : : MultiXactId nextMulti;
2666 : : MultiXactOffset newOldestOffset;
2667 : : MultiXactOffset nextOffset;
2668 : :
2669 [ - + ]: 103 : Assert(!RecoveryInProgress());
2670 [ - + ]: 103 : Assert(MultiXactState->finishedStartup);
93 heikki.linnakangas@i 2671 [ - + ]:GNC 103 : Assert(MultiXactIdIsValid(newOldestMulti));
2672 : :
2673 : : /*
2674 : : * We can only allow one truncation to happen at once. Otherwise parts of
2675 : : * members might vanish while we're doing lookups or similar. There's no
2676 : : * need to have an interlock with creating new multis or such, since those
2677 : : * are constrained by the limits (which only grow, never shrink).
2678 : : */
3823 andres@anarazel.de 2679 :CBC 103 : LWLockAcquire(MultiXactTruncationLock, LW_EXCLUSIVE);
2680 : :
4279 alvherre@alvh.no-ip. 2681 : 103 : LWLockAcquire(MultiXactGenLock, LW_SHARED);
3823 andres@anarazel.de 2682 : 103 : nextMulti = MultiXactState->nextMXact;
3936 rhaas@postgresql.org 2683 : 103 : nextOffset = MultiXactState->nextOffset;
3823 andres@anarazel.de 2684 : 103 : oldestMulti = MultiXactState->oldestMultiXactId;
4279 alvherre@alvh.no-ip. 2685 : 103 : LWLockRelease(MultiXactGenLock);
2686 : :
2687 : : /*
2688 : : * Make sure to only attempt truncation if there's values to truncate
2689 : : * away. In normal processing values shouldn't go backwards, but there's
2690 : : * some corner cases (due to bugs) where that's possible.
2691 : : */
3823 andres@anarazel.de 2692 [ + - ]: 103 : if (MultiXactIdPrecedesOrEquals(newOldestMulti, oldestMulti))
2693 : : {
2694 : 103 : LWLockRelease(MultiXactTruncationLock);
2695 : 103 : return;
2696 : : }
2697 : :
2698 : : /*
2699 : : * Compute up to where to truncate MultiXactMember. Lookup the
2700 : : * corresponding member offset for newOldestMulti for that.
2701 : : */
3823 andres@anarazel.de 2702 [ # # ]:UBC 0 : if (newOldestMulti == nextMulti)
2703 : : {
2704 : : /* there are NO MultiXacts */
2705 : 0 : newOldestOffset = nextOffset;
2706 : : }
2707 [ # # ]: 0 : else if (!find_multixact_start(newOldestMulti, &newOldestOffset))
2708 : : {
2709 [ # # ]: 0 : ereport(LOG,
2710 : : (errmsg("cannot truncate up to MultiXact %u because it does not exist on disk, skipping truncation",
2711 : : newOldestMulti)));
2712 : 0 : LWLockRelease(MultiXactTruncationLock);
2713 : 0 : return;
2714 : : }
2715 : :
2716 : : /*
2717 : : * On crash, MultiXactIdCreateFromMembers() can leave behind multixids
2718 : : * that were not yet written out and hence have zero offset on disk. If
2719 : : * such a multixid becomes oldestMulti, we won't be able to look up its
2720 : : * offset. That should be rare, so we don't try to do anything smart about
2721 : : * it. Just skip the truncation, and hope that by the next truncation
2722 : : * attempt, oldestMulti has advanced to a valid multixid.
2723 : : */
59 heikki.linnakangas@i 2724 [ # # ]: 0 : if (newOldestOffset == 0)
2725 : : {
2726 [ # # ]: 0 : ereport(LOG,
2727 : : (errmsg("cannot truncate up to MultiXact %u because it has invalid offset, skipping truncation",
2728 : : newOldestMulti)));
2729 : 0 : LWLockRelease(MultiXactTruncationLock);
2730 : 0 : return;
2731 : : }
2732 : :
3823 andres@anarazel.de 2733 [ # # ]: 0 : elog(DEBUG1, "performing multixact truncation: "
2734 : : "oldestMulti %u (offsets segment %" PRIx64 "), "
2735 : : "oldestOffset %" PRIu64 " (members segment %" PRIx64 ")",
2736 : : newOldestMulti,
2737 : : MultiXactIdToOffsetSegment(newOldestMulti),
2738 : : newOldestOffset,
2739 : : MXOffsetToMemberSegment(newOldestOffset));
2740 : :
2741 : : /*
2742 : : * Do truncation, and the WAL logging of the truncation, in a critical
2743 : : * section. That way offsets/members cannot get out of sync anymore, i.e.
2744 : : * once consistent the newOldestMulti will always exist in members, even
2745 : : * if we crashed in the wrong moment.
2746 : : */
2747 : 0 : START_CRIT_SECTION();
2748 : :
2749 : : /*
2750 : : * Prevent checkpoints from being scheduled concurrently. This is critical
2751 : : * because otherwise a truncation record might not be replayed after a
2752 : : * crash/basebackup, even though the state of the data directory would
2753 : : * require it.
2754 : : */
1437 rhaas@postgresql.org 2755 [ # # ]: 0 : Assert((MyProc->delayChkptFlags & DELAY_CHKPT_START) == 0);
2756 : 0 : MyProc->delayChkptFlags |= DELAY_CHKPT_START;
2757 : :
2758 : : /* WAL log truncation */
59 heikki.linnakangas@i 2759 :UNC 0 : WriteMTruncateXlogRec(newOldestMultiDB, newOldestMulti, newOldestOffset);
2760 : :
2761 : : /*
2762 : : * Update in-memory limits before performing the truncation, while inside
2763 : : * the critical section: Have to do it before truncation, to prevent
2764 : : * concurrent lookups of those values. Has to be inside the critical
2765 : : * section as otherwise a future call to this function would error out,
2766 : : * while looking up the oldest member in offsets, if our caller crashes
2767 : : * before updating the limits.
2768 : : */
3823 andres@anarazel.de 2769 :UBC 0 : LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
2770 : 0 : MultiXactState->oldestMultiXactId = newOldestMulti;
2771 : 0 : MultiXactState->oldestMultiXactDB = newOldestMultiDB;
96 heikki.linnakangas@i 2772 :UNC 0 : MultiXactState->oldestOffset = newOldestOffset;
3823 andres@anarazel.de 2773 :UBC 0 : LWLockRelease(MultiXactGenLock);
2774 : :
2775 : : /* First truncate members */
59 heikki.linnakangas@i 2776 :UNC 0 : PerformMembersTruncation(newOldestOffset);
2777 : :
2778 : : /* Then offsets */
2779 : 0 : PerformOffsetsTruncation(newOldestMulti);
2780 : :
1437 rhaas@postgresql.org 2781 :UBC 0 : MyProc->delayChkptFlags &= ~DELAY_CHKPT_START;
2782 : :
3823 andres@anarazel.de 2783 [ # # ]: 0 : END_CRIT_SECTION();
2784 : 0 : LWLockRelease(MultiXactTruncationLock);
2785 : : }
2786 : :
2787 : : /*
2788 : : * Decide whether a MultiXactOffset page number is "older" for truncation
2789 : : * purposes. Analogous to CLOGPagePrecedes().
2790 : : *
2791 : : * Offsetting the values is optional, because MultiXactIdPrecedes() has
2792 : : * translational symmetry.
2793 : : */
2794 : : static bool
837 akorotkov@postgresql 2795 :CBC 44850 : MultiXactOffsetPagePrecedes(int64 page1, int64 page2)
2796 : : {
2797 : : MultiXactId multi1;
2798 : : MultiXactId multi2;
2799 : :
7626 tgl@sss.pgh.pa.us 2800 : 44850 : multi1 = ((MultiXactId) page1) * MULTIXACT_OFFSETS_PER_PAGE;
1884 noah@leadboat.com 2801 : 44850 : multi1 += FirstMultiXactId + 1;
7626 tgl@sss.pgh.pa.us 2802 : 44850 : multi2 = ((MultiXactId) page2) * MULTIXACT_OFFSETS_PER_PAGE;
1884 noah@leadboat.com 2803 : 44850 : multi2 += FirstMultiXactId + 1;
2804 : :
2805 [ + + + + ]: 74750 : return (MultiXactIdPrecedes(multi1, multi2) &&
2806 : 29900 : MultiXactIdPrecedes(multi1,
2807 : : multi2 + MULTIXACT_OFFSETS_PER_PAGE - 1));
2808 : : }
2809 : :
2810 : : /*
2811 : : * Decide whether a MultiXactMember page number is "older" for truncation
2812 : : * purposes. There is no "invalid offset number" and members never wrap
2813 : : * around, so use the numbers verbatim.
2814 : : */
2815 : : static bool
837 akorotkov@postgresql 2816 :UBC 0 : MultiXactMemberPagePrecedes(int64 page1, int64 page2)
2817 : : {
96 heikki.linnakangas@i 2818 :UNC 0 : return page1 < page2;
2819 : : }
2820 : :
2821 : : static int
2 2822 : 0 : MultiXactOffsetIoErrorDetail(const void *opaque_data)
2823 : : {
2824 : 0 : MultiXactId multixid = *(const MultiXactId *) opaque_data;
2825 : :
2826 : 0 : return errdetail("Could not access offset of multixact %u.", multixid);
2827 : : }
2828 : :
2829 : : static int
2830 : 0 : MultiXactMemberIoErrorDetail(const void *opaque_data)
2831 : : {
2832 : 0 : const MultiXactMemberSlruReadContext *context = opaque_data;
2833 : :
2834 [ # # ]: 0 : if (MultiXactIdIsValid(context->multi))
2835 : 0 : return errdetail("Could not access member of multixact %u at offset %" PRIu64 ".",
2836 : 0 : context->multi, context->offset);
2837 : : else
2838 : 0 : return errdetail("Could not access multixact member at offset %" PRIu64 ".",
2839 : 0 : context->offset);
2840 : : }
2841 : :
2842 : : /*
2843 : : * Decide which of two MultiXactIds is earlier.
2844 : : *
2845 : : * XXX do we need to do something special for InvalidMultiXactId?
2846 : : * (Doesn't look like it.)
2847 : : */
2848 : : bool
7626 tgl@sss.pgh.pa.us 2849 :CBC 873055 : MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
2850 : : {
7456 bruce@momjian.us 2851 : 873055 : int32 diff = (int32) (multi1 - multi2);
2852 : :
7626 tgl@sss.pgh.pa.us 2853 : 873055 : return (diff < 0);
2854 : : }
2855 : :
2856 : : /*
2857 : : * MultiXactIdPrecedesOrEquals -- is multi1 logically <= multi2?
2858 : : *
2859 : : * XXX do we need to do something special for InvalidMultiXactId?
2860 : : * (Doesn't look like it.)
2861 : : */
2862 : : bool
4490 alvherre@alvh.no-ip. 2863 : 6012 : MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
2864 : : {
2865 : 6012 : int32 diff = (int32) (multi1 - multi2);
2866 : :
2867 : 6012 : return (diff <= 0);
2868 : : }
2869 : :
2870 : :
2871 : : /*
2872 : : * Write a TRUNCATE xlog record
2873 : : *
2874 : : * We must flush the xlog record to disk before returning --- see notes in
2875 : : * TruncateCLOG().
2876 : : */
2877 : : static void
3823 andres@anarazel.de 2878 :UBC 0 : WriteMTruncateXlogRec(Oid oldestMultiDB,
2879 : : MultiXactId oldestMulti,
2880 : : MultiXactOffset oldestOffset)
2881 : : {
2882 : : XLogRecPtr recptr;
2883 : : xl_multixact_truncate xlrec;
2884 : :
2885 : 0 : xlrec.oldestMultiDB = oldestMultiDB;
59 heikki.linnakangas@i 2886 :UNC 0 : xlrec.oldestMulti = oldestMulti;
2887 : 0 : xlrec.oldestOffset = oldestOffset;
2888 : :
3823 andres@anarazel.de 2889 :UBC 0 : XLogBeginInsert();
397 peter@eisentraut.org 2890 : 0 : XLogRegisterData(&xlrec, SizeOfMultiXactTruncate);
3823 andres@anarazel.de 2891 : 0 : recptr = XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_TRUNCATE_ID);
2892 : 0 : XLogFlush(recptr);
2893 : 0 : }
2894 : :
2895 : : /*
2896 : : * MULTIXACT resource manager's routines
2897 : : */
2898 : : void
4133 heikki.linnakangas@i 2899 :CBC 5 : multixact_redo(XLogReaderState *record)
2900 : : {
2901 : 5 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
2902 : :
2903 : : /* Backup blocks are not used in multixact records */
2904 [ - + ]: 5 : Assert(!XLogRecHasAnyBlockRefs(record));
2905 : :
7585 tgl@sss.pgh.pa.us 2906 [ - + ]: 5 : if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
2907 : : {
2908 : : int64 pageno;
2909 : :
837 akorotkov@postgresql 2910 :UBC 0 : memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
251 alvherre@kurilemu.de 2911 :UNC 0 : SimpleLruZeroAndWritePage(MultiXactOffsetCtl, pageno);
2912 : : }
7585 tgl@sss.pgh.pa.us 2913 [ - + ]:CBC 5 : else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
2914 : : {
2915 : : int64 pageno;
2916 : :
837 akorotkov@postgresql 2917 :LBC (2) : memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
251 alvherre@kurilemu.de 2918 :UNC 0 : SimpleLruZeroAndWritePage(MultiXactMemberCtl, pageno);
2919 : : }
7585 tgl@sss.pgh.pa.us 2920 [ + - ]:CBC 5 : else if (info == XLOG_MULTIXACT_CREATE_ID)
2921 : : {
4799 alvherre@alvh.no-ip. 2922 : 5 : xl_multixact_create *xlrec =
1031 tgl@sss.pgh.pa.us 2923 : 5 : (xl_multixact_create *) XLogRecGetData(record);
2924 : : TransactionId max_xid;
2925 : : int i;
2926 : :
2927 : : /* Store the data back into the SLRU files */
4799 alvherre@alvh.no-ip. 2928 : 5 : RecordNewMultiXact(xlrec->mid, xlrec->moff, xlrec->nmembers,
2929 : 5 : xlrec->members);
2930 : :
2931 : : /* Make sure nextMXact/nextOffset are beyond what this record has */
93 heikki.linnakangas@i 2932 :GNC 5 : MultiXactAdvanceNextMXact(NextMultiXactId(xlrec->mid),
4799 alvherre@alvh.no-ip. 2933 :CBC 5 : xlrec->moff + xlrec->nmembers);
2934 : :
2935 : : /*
2936 : : * Make sure nextXid is beyond any XID mentioned in the record. This
2937 : : * should be unnecessary, since any XID found here ought to have other
2938 : : * evidence in the XLOG, but let's be safe.
2939 : : */
4133 heikki.linnakangas@i 2940 : 5 : max_xid = XLogRecGetXid(record);
4799 alvherre@alvh.no-ip. 2941 [ + + ]: 15 : for (i = 0; i < xlrec->nmembers; i++)
2942 : : {
2943 [ - + ]: 10 : if (TransactionIdPrecedes(max_xid, xlrec->members[i].xid))
4799 alvherre@alvh.no-ip. 2944 :UBC 0 : max_xid = xlrec->members[i].xid;
2945 : : }
2946 : :
2544 tmunro@postgresql.or 2947 :CBC 5 : AdvanceNextFullTransactionIdPastXid(max_xid);
2948 : : }
3823 andres@anarazel.de 2949 [ # # ]:UBC 0 : else if (info == XLOG_MULTIXACT_TRUNCATE_ID)
2950 : : {
2951 : : xl_multixact_truncate xlrec;
2952 : : int64 pageno;
2953 : :
2954 : 0 : memcpy(&xlrec, XLogRecGetData(record),
2955 : : SizeOfMultiXactTruncate);
2956 : :
2957 [ # # ]: 0 : elog(DEBUG1, "replaying multixact truncation: "
2958 : : "oldestMulti %u (offsets segment %" PRIx64 "), "
2959 : : "oldestOffset %" PRIu64 " (members segment %" PRIx64 ")",
2960 : : xlrec.oldestMulti,
2961 : : MultiXactIdToOffsetSegment(xlrec.oldestMulti),
2962 : : xlrec.oldestOffset,
2963 : : MXOffsetToMemberSegment(xlrec.oldestOffset));
2964 : :
2965 : : /* should not be required, but more than cheap enough */
2966 : 0 : LWLockAcquire(MultiXactTruncationLock, LW_EXCLUSIVE);
2967 : :
2968 : : /*
2969 : : * Advance the horizon values, so they're current at the end of
2970 : : * recovery.
2971 : : */
59 heikki.linnakangas@i 2972 :UNC 0 : SetMultiXactIdLimit(xlrec.oldestMulti, xlrec.oldestMultiDB);
2973 : :
2974 : 0 : PerformMembersTruncation(xlrec.oldestOffset);
2975 : :
2976 : : /*
2977 : : * During XLOG replay, latest_page_number isn't necessarily set up
2978 : : * yet; insert a suitable value to bypass the sanity test in
2979 : : * SimpleLruTruncate.
2980 : : */
2981 : 0 : pageno = MultiXactIdToOffsetPage(xlrec.oldestMulti);
768 alvherre@alvh.no-ip. 2982 : 0 : pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
2983 : : pageno);
59 heikki.linnakangas@i 2984 : 0 : PerformOffsetsTruncation(xlrec.oldestMulti);
2985 : :
3823 andres@anarazel.de 2986 :UBC 0 : LWLockRelease(MultiXactTruncationLock);
2987 : : }
2988 : : else
7585 tgl@sss.pgh.pa.us 2989 [ # # ]: 0 : elog(PANIC, "multixact_redo: unknown op code %u", info);
7585 tgl@sss.pgh.pa.us 2990 :CBC 5 : }
2991 : :
2992 : : /*
2993 : : * Entrypoint for sync.c to sync offsets files.
2994 : : */
2995 : : int
1997 tmunro@postgresql.or 2996 :UBC 0 : multixactoffsetssyncfiletag(const FileTag *ftag, char *path)
2997 : : {
2998 : 0 : return SlruSyncFileTag(MultiXactOffsetCtl, ftag, path);
2999 : : }
3000 : :
3001 : : /*
3002 : : * Entrypoint for sync.c to sync members files.
3003 : : */
3004 : : int
3005 : 0 : multixactmemberssyncfiletag(const FileTag *ftag, char *path)
3006 : : {
3007 : 0 : return SlruSyncFileTag(MultiXactMemberCtl, ftag, path);
3008 : : }
|