Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * lwlock.c
4 : : * Lightweight lock manager
5 : : *
6 : : * Lightweight locks are intended primarily to provide mutual exclusion of
7 : : * access to shared-memory data structures. Therefore, they offer both
8 : : * exclusive and shared lock modes (to support read/write and read-only
9 : : * access to a shared object). There are few other frammishes. User-level
10 : : * locking should be done with the full lock manager --- which depends on
11 : : * LWLocks to protect its shared state.
12 : : *
13 : : * In addition to exclusive and shared modes, lightweight locks can be used to
14 : : * wait until a variable changes value. The variable is initially not set
15 : : * when the lock is acquired with LWLockAcquire, i.e. it remains set to the
16 : : * value it was set to when the lock was released last, and can be updated
17 : : * without releasing the lock by calling LWLockUpdateVar. LWLockWaitForVar
18 : : * waits for the variable to be updated, or until the lock is free. When
19 : : * releasing the lock with LWLockReleaseClearVar() the value can be set to an
20 : : * appropriate value for a free lock. The meaning of the variable is up to
21 : : * the caller, the lightweight lock code just assigns and compares it.
22 : : *
23 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
24 : : * Portions Copyright (c) 1994, Regents of the University of California
25 : : *
26 : : * IDENTIFICATION
27 : : * src/backend/storage/lmgr/lwlock.c
28 : : *
29 : : * NOTES:
30 : : *
31 : : * This used to be a pretty straight forward reader-writer lock
32 : : * implementation, in which the internal state was protected by a
33 : : * spinlock. Unfortunately the overhead of taking the spinlock proved to be
34 : : * too high for workloads/locks that were taken in shared mode very
35 : : * frequently. Often we were spinning in the (obviously exclusive) spinlock,
36 : : * while trying to acquire a shared lock that was actually free.
37 : : *
38 : : * Thus a new implementation was devised that provides wait-free shared lock
39 : : * acquisition for locks that aren't exclusively locked.
40 : : *
41 : : * The basic idea is to have a single atomic variable 'lockcount' instead of
42 : : * the formerly separate shared and exclusive counters and to use atomic
43 : : * operations to acquire the lock. That's fairly easy to do for plain
44 : : * rw-spinlocks, but a lot harder for something like LWLocks that want to wait
45 : : * in the OS.
46 : : *
47 : : * For lock acquisition we use an atomic compare-and-exchange on the lockcount
48 : : * variable. For exclusive lock we swap in a sentinel value
49 : : * (LW_VAL_EXCLUSIVE), for shared locks we count the number of holders.
50 : : *
51 : : * To release the lock we use an atomic decrement to release the lock. If the
52 : : * new value is zero (we get that atomically), we know we can/have to release
53 : : * waiters.
54 : : *
55 : : * Obviously it is important that the sentinel value for exclusive locks
56 : : * doesn't conflict with the maximum number of possible share lockers -
57 : : * luckily MAX_BACKENDS makes that easily possible.
58 : : *
59 : : *
60 : : * The attentive reader might have noticed that naively doing the above has a
61 : : * glaring race condition: We try to lock using the atomic operations and
62 : : * notice that we have to wait. Unfortunately by the time we have finished
63 : : * queuing, the former locker very well might have already finished its
64 : : * work. That's problematic because we're now stuck waiting inside the OS.
65 : :
66 : : * To mitigate those races we use a two phased attempt at locking:
67 : : * Phase 1: Try to do it atomically, if we succeed, nice
68 : : * Phase 2: Add ourselves to the waitqueue of the lock
69 : : * Phase 3: Try to grab the lock again, if we succeed, remove ourselves from
70 : : * the queue
71 : : * Phase 4: Sleep till wake-up, goto Phase 1
72 : : *
73 : : * This protects us against the problem from above as nobody can release too
74 : : * quick, before we're queued, since after Phase 2 we're already queued.
75 : : * -------------------------------------------------------------------------
76 : : */
77 : : #include "postgres.h"
78 : :
79 : : #include "miscadmin.h"
80 : : #include "pg_trace.h"
81 : : #include "pgstat.h"
82 : : #include "port/pg_bitutils.h"
83 : : #include "storage/proc.h"
84 : : #include "storage/proclist.h"
85 : : #include "storage/procnumber.h"
86 : : #include "storage/spin.h"
87 : : #include "utils/memutils.h"
88 : : #include "utils/wait_event.h"
89 : :
90 : : #ifdef LWLOCK_STATS
91 : : #include "utils/hsearch.h"
92 : : #endif
93 : :
94 : :
95 : : #define LW_FLAG_HAS_WAITERS ((uint32) 1 << 31)
96 : : #define LW_FLAG_WAKE_IN_PROGRESS ((uint32) 1 << 30)
97 : : #define LW_FLAG_LOCKED ((uint32) 1 << 29)
98 : : #define LW_FLAG_BITS 3
99 : : #define LW_FLAG_MASK (((1<<LW_FLAG_BITS)-1)<<(32-LW_FLAG_BITS))
100 : :
101 : : /* assumes MAX_BACKENDS is a (power of 2) - 1, checked below */
102 : : #define LW_VAL_EXCLUSIVE (MAX_BACKENDS + 1)
103 : : #define LW_VAL_SHARED 1
104 : :
105 : : /* already (power of 2)-1, i.e. suitable for a mask */
106 : : #define LW_SHARED_MASK MAX_BACKENDS
107 : : #define LW_LOCK_MASK (MAX_BACKENDS | LW_VAL_EXCLUSIVE)
108 : :
109 : :
110 : : StaticAssertDecl(((MAX_BACKENDS + 1) & MAX_BACKENDS) == 0,
111 : : "MAX_BACKENDS + 1 needs to be a power of 2");
112 : :
113 : : StaticAssertDecl((MAX_BACKENDS & LW_FLAG_MASK) == 0,
114 : : "MAX_BACKENDS and LW_FLAG_MASK overlap");
115 : :
116 : : StaticAssertDecl((LW_VAL_EXCLUSIVE & LW_FLAG_MASK) == 0,
117 : : "LW_VAL_EXCLUSIVE and LW_FLAG_MASK overlap");
118 : :
119 : : /*
120 : : * There are three sorts of LWLock "tranches":
121 : : *
122 : : * 1. The individually-named locks defined in lwlocklist.h each have their
123 : : * own tranche. We absorb the names of these tranches from there into
124 : : * BuiltinTrancheNames here.
125 : : *
126 : : * 2. There are some predefined tranches for built-in groups of locks defined
127 : : * in lwlocklist.h. We absorb the names of these tranches, too.
128 : : *
129 : : * 3. Extensions can create new tranches, via either RequestNamedLWLockTranche
130 : : * or LWLockNewTrancheId. These names are stored in shared memory and can be
131 : : * accessed via LWLockTrancheNames.
132 : : *
133 : : * All these names are user-visible as wait event names, so choose with care
134 : : * ... and do not forget to update the documentation's list of wait events.
135 : : */
136 : : static const char *const BuiltinTrancheNames[] = {
137 : : #define PG_LWLOCK(id, lockname) [id] = CppAsString(lockname),
138 : : #define PG_LWLOCKTRANCHE(id, lockname) [LWTRANCHE_##id] = CppAsString(lockname),
139 : : #include "storage/lwlocklist.h"
140 : : #undef PG_LWLOCK
141 : : #undef PG_LWLOCKTRANCHE
142 : : };
143 : :
144 : : StaticAssertDecl(lengthof(BuiltinTrancheNames) ==
145 : : LWTRANCHE_FIRST_USER_DEFINED,
146 : : "missing entries in BuiltinTrancheNames[]");
147 : :
148 : : /*
149 : : * This is indexed by tranche ID minus LWTRANCHE_FIRST_USER_DEFINED, and
150 : : * points to the shared memory locations of the names of all
151 : : * dynamically-created tranches. Backends inherit the pointer by fork from the
152 : : * postmaster (except in the EXEC_BACKEND case, where we have special measures
153 : : * to pass it down).
154 : : */
155 : : char **LWLockTrancheNames = NULL;
156 : :
157 : : /*
158 : : * This points to the main array of LWLocks in shared memory. Backends inherit
159 : : * the pointer by fork from the postmaster (except in the EXEC_BACKEND case,
160 : : * where we have special measures to pass it down).
161 : : */
162 : : LWLockPadded *MainLWLockArray = NULL;
163 : :
164 : : /*
165 : : * We use this structure to keep track of locked LWLocks for release
166 : : * during error recovery. Normally, only a few will be held at once, but
167 : : * occasionally the number can be much higher.
168 : : */
169 : : #define MAX_SIMUL_LWLOCKS 200
170 : :
171 : : /* struct representing the LWLocks we're holding */
172 : : typedef struct LWLockHandle
173 : : {
174 : : LWLock *lock;
175 : : LWLockMode mode;
176 : : } LWLockHandle;
177 : :
178 : : static int num_held_lwlocks = 0;
179 : : static LWLockHandle held_lwlocks[MAX_SIMUL_LWLOCKS];
180 : :
181 : : /* struct representing the LWLock tranche request for named tranche */
182 : : typedef struct NamedLWLockTrancheRequest
183 : : {
184 : : char tranche_name[NAMEDATALEN];
185 : : int num_lwlocks;
186 : : } NamedLWLockTrancheRequest;
187 : :
188 : : /*
189 : : * NamedLWLockTrancheRequests is the valid length of the request array. These
190 : : * variables are non-static so that launch_backend.c can copy them to child
191 : : * processes in EXEC_BACKEND builds.
192 : : */
193 : : int NamedLWLockTrancheRequests = 0;
194 : : NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray = NULL;
195 : :
196 : : /* postmaster's local copy of the request array */
197 : : static NamedLWLockTrancheRequest *LocalNamedLWLockTrancheRequestArray = NULL;
198 : :
199 : : /* shared memory counter of registered tranches */
200 : : int *LWLockCounter = NULL;
201 : :
202 : : /* backend-local counter of registered tranches */
203 : : static int LocalLWLockCounter;
204 : :
205 : : #define MAX_NAMED_TRANCHES 256
206 : :
207 : : static void InitializeLWLocks(void);
208 : : static inline void LWLockReportWaitStart(LWLock *lock);
209 : : static inline void LWLockReportWaitEnd(void);
210 : : static const char *GetLWTrancheName(uint16 trancheId);
211 : :
212 : : #define T_NAME(lock) \
213 : : GetLWTrancheName((lock)->tranche)
214 : :
215 : : #ifdef LWLOCK_STATS
216 : : typedef struct lwlock_stats_key
217 : : {
218 : : int tranche;
219 : : void *instance;
220 : : } lwlock_stats_key;
221 : :
222 : : typedef struct lwlock_stats
223 : : {
224 : : lwlock_stats_key key;
225 : : int sh_acquire_count;
226 : : int ex_acquire_count;
227 : : int block_count;
228 : : int dequeue_self_count;
229 : : int spin_delay_count;
230 : : } lwlock_stats;
231 : :
232 : : static HTAB *lwlock_stats_htab;
233 : : static lwlock_stats lwlock_stats_dummy;
234 : : #endif
235 : :
236 : : #ifdef LOCK_DEBUG
237 : : bool Trace_lwlocks = false;
238 : :
239 : : inline static void
240 : : PRINT_LWDEBUG(const char *where, LWLock *lock, LWLockMode mode)
241 : : {
242 : : /* hide statement & context here, otherwise the log is just too verbose */
243 : : if (Trace_lwlocks)
244 : : {
245 : : uint32 state = pg_atomic_read_u32(&lock->state);
246 : :
247 : : ereport(LOG,
248 : : (errhidestmt(true),
249 : : errhidecontext(true),
250 : : errmsg_internal("%d: %s(%s %p): excl %u shared %u haswaiters %u waiters %u waking %d",
251 : : MyProcPid,
252 : : where, T_NAME(lock), lock,
253 : : (state & LW_VAL_EXCLUSIVE) != 0,
254 : : state & LW_SHARED_MASK,
255 : : (state & LW_FLAG_HAS_WAITERS) != 0,
256 : : pg_atomic_read_u32(&lock->nwaiters),
257 : : (state & LW_FLAG_WAKE_IN_PROGRESS) != 0)));
258 : : }
259 : : }
260 : :
261 : : inline static void
262 : : LOG_LWDEBUG(const char *where, LWLock *lock, const char *msg)
263 : : {
264 : : /* hide statement & context here, otherwise the log is just too verbose */
265 : : if (Trace_lwlocks)
266 : : {
267 : : ereport(LOG,
268 : : (errhidestmt(true),
269 : : errhidecontext(true),
270 : : errmsg_internal("%s(%s %p): %s", where,
271 : : T_NAME(lock), lock, msg)));
272 : : }
273 : : }
274 : :
275 : : #else /* not LOCK_DEBUG */
276 : : #define PRINT_LWDEBUG(a,b,c) ((void)0)
277 : : #define LOG_LWDEBUG(a,b,c) ((void)0)
278 : : #endif /* LOCK_DEBUG */
279 : :
280 : : #ifdef LWLOCK_STATS
281 : :
282 : : static void init_lwlock_stats(void);
283 : : static void print_lwlock_stats(int code, Datum arg);
284 : : static lwlock_stats * get_lwlock_stats_entry(LWLock *lock);
285 : :
286 : : static void
287 : : init_lwlock_stats(void)
288 : : {
289 : : HASHCTL ctl;
290 : : static MemoryContext lwlock_stats_cxt = NULL;
291 : : static bool exit_registered = false;
292 : :
293 : : if (lwlock_stats_cxt != NULL)
294 : : MemoryContextDelete(lwlock_stats_cxt);
295 : :
296 : : /*
297 : : * The LWLock stats will be updated within a critical section, which
298 : : * requires allocating new hash entries. Allocations within a critical
299 : : * section are normally not allowed because running out of memory would
300 : : * lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally
301 : : * turned on in production, so that's an acceptable risk. The hash entries
302 : : * are small, so the risk of running out of memory is minimal in practice.
303 : : */
304 : : lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext,
305 : : "LWLock stats",
306 : : ALLOCSET_DEFAULT_SIZES);
307 : : MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true);
308 : :
309 : : ctl.keysize = sizeof(lwlock_stats_key);
310 : : ctl.entrysize = sizeof(lwlock_stats);
311 : : ctl.hcxt = lwlock_stats_cxt;
312 : : lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
313 : : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
314 : : if (!exit_registered)
315 : : {
316 : : on_shmem_exit(print_lwlock_stats, 0);
317 : : exit_registered = true;
318 : : }
319 : : }
320 : :
321 : : static void
322 : : print_lwlock_stats(int code, Datum arg)
323 : : {
324 : : HASH_SEQ_STATUS scan;
325 : : lwlock_stats *lwstats;
326 : :
327 : : hash_seq_init(&scan, lwlock_stats_htab);
328 : :
329 : : /* Grab an LWLock to keep different backends from mixing reports */
330 : : LWLockAcquire(&MainLWLockArray[0].lock, LW_EXCLUSIVE);
331 : :
332 : : while ((lwstats = (lwlock_stats *) hash_seq_search(&scan)) != NULL)
333 : : {
334 : : fprintf(stderr,
335 : : "PID %d lwlock %s %p: shacq %u exacq %u blk %u spindelay %u dequeue self %u\n",
336 : : MyProcPid, GetLWTrancheName(lwstats->key.tranche),
337 : : lwstats->key.instance, lwstats->sh_acquire_count,
338 : : lwstats->ex_acquire_count, lwstats->block_count,
339 : : lwstats->spin_delay_count, lwstats->dequeue_self_count);
340 : : }
341 : :
342 : : LWLockRelease(&MainLWLockArray[0].lock);
343 : : }
344 : :
345 : : static lwlock_stats *
346 : : get_lwlock_stats_entry(LWLock *lock)
347 : : {
348 : : lwlock_stats_key key;
349 : : lwlock_stats *lwstats;
350 : : bool found;
351 : :
352 : : /*
353 : : * During shared memory initialization, the hash table doesn't exist yet.
354 : : * Stats of that phase aren't very interesting, so just collect operations
355 : : * on all locks in a single dummy entry.
356 : : */
357 : : if (lwlock_stats_htab == NULL)
358 : : return &lwlock_stats_dummy;
359 : :
360 : : /* Fetch or create the entry. */
361 : : MemSet(&key, 0, sizeof(key));
362 : : key.tranche = lock->tranche;
363 : : key.instance = lock;
364 : : lwstats = hash_search(lwlock_stats_htab, &key, HASH_ENTER, &found);
365 : : if (!found)
366 : : {
367 : : lwstats->sh_acquire_count = 0;
368 : : lwstats->ex_acquire_count = 0;
369 : : lwstats->block_count = 0;
370 : : lwstats->dequeue_self_count = 0;
371 : : lwstats->spin_delay_count = 0;
372 : : }
373 : : return lwstats;
374 : : }
375 : : #endif /* LWLOCK_STATS */
376 : :
377 : :
378 : : /*
379 : : * Compute number of LWLocks required by named tranches. These will be
380 : : * allocated in the main array.
381 : : */
382 : : static int
2131 tgl@sss.pgh.pa.us 383 :CBC 3297 : NumLWLocksForNamedTranches(void)
384 : : {
3692 rhaas@postgresql.org 385 : 3297 : int numLocks = 0;
386 : : int i;
387 : :
388 [ + + ]: 3414 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
389 : 117 : numLocks += NamedLWLockTrancheRequestArray[i].num_lwlocks;
390 : :
391 : 3297 : return numLocks;
392 : : }
393 : :
394 : : /*
395 : : * Compute shmem space needed for LWLocks and named tranches.
396 : : */
397 : : Size
8933 tgl@sss.pgh.pa.us 398 : 3297 : LWLockShmemSize(void)
399 : : {
400 : : Size size;
3686 rhaas@postgresql.org 401 : 3297 : int numLocks = NUM_FIXED_LWLOCKS;
402 : :
403 : : /*
404 : : * If re-initializing shared memory, the request array will no longer be
405 : : * accessible, so switch to the copy in postmaster's local memory. We'll
406 : : * copy it back into shared memory later when CreateLWLocks() is called
407 : : * again.
408 : : */
178 nathan@postgresql.or 409 [ - + ]:GNC 3297 : if (LocalNamedLWLockTrancheRequestArray)
178 nathan@postgresql.or 410 :UNC 0 : NamedLWLockTrancheRequestArray = LocalNamedLWLockTrancheRequestArray;
411 : :
412 : : /* Calculate total number of locks needed in the main array. */
2131 tgl@sss.pgh.pa.us 413 :CBC 3297 : numLocks += NumLWLocksForNamedTranches();
414 : :
415 : : /* Space for dynamic allocation counter. */
193 nathan@postgresql.or 416 :GNC 3297 : size = MAXALIGN(sizeof(int));
417 : :
418 : : /* Space for named tranches. */
419 : 3297 : size = add_size(size, mul_size(MAX_NAMED_TRANCHES, sizeof(char *)));
420 : 3297 : size = add_size(size, mul_size(MAX_NAMED_TRANCHES, NAMEDATALEN));
421 : :
422 : : /*
423 : : * Make space for named tranche requests. This is done for the benefit of
424 : : * EXEC_BACKEND builds, which otherwise wouldn't be able to call
425 : : * GetNamedLWLockTranche() outside postmaster.
426 : : */
185 427 : 3297 : size = add_size(size, mul_size(NamedLWLockTrancheRequests,
428 : : sizeof(NamedLWLockTrancheRequest)));
429 : :
430 : : /* Space for the LWLock array, plus room for cache line alignment. */
193 431 : 3297 : size = add_size(size, LWLOCK_PADDED_SIZE);
432 : 3297 : size = add_size(size, mul_size(numLocks, sizeof(LWLockPadded)));
433 : :
7512 tgl@sss.pgh.pa.us 434 :CBC 3297 : return size;
435 : : }
436 : :
437 : : /*
438 : : * Allocate shmem space for the main LWLock array and all tranches and
439 : : * initialize it.
440 : : */
441 : : void
8933 442 : 1150 : CreateLWLocks(void)
443 : : {
4430 rhaas@postgresql.org 444 [ + - ]: 1150 : if (!IsUnderPostmaster)
445 : : {
446 : 1150 : Size spaceLocks = LWLockShmemSize();
447 : : char *ptr;
448 : :
449 : : /* Allocate space */
450 : 1150 : ptr = (char *) ShmemAlloc(spaceLocks);
451 : :
452 : : /* Initialize the dynamic-allocation counter for tranches */
198 nathan@postgresql.or 453 :GNC 1150 : LWLockCounter = (int *) ptr;
454 : 1150 : *LWLockCounter = LWTRANCHE_FIRST_USER_DEFINED;
193 455 : 1150 : ptr += MAXALIGN(sizeof(int));
456 : :
457 : : /* Initialize tranche names */
458 : 1150 : LWLockTrancheNames = (char **) ptr;
459 : 1150 : ptr += MAX_NAMED_TRANCHES * sizeof(char *);
460 [ + + ]: 295550 : for (int i = 0; i < MAX_NAMED_TRANCHES; i++)
461 : : {
462 : 294400 : LWLockTrancheNames[i] = ptr;
463 : 294400 : ptr += NAMEDATALEN;
464 : : }
465 : :
466 : : /*
467 : : * Move named tranche requests to shared memory. This is done for the
468 : : * benefit of EXEC_BACKEND builds, which otherwise wouldn't be able to
469 : : * call GetNamedLWLockTranche() outside postmaster.
470 : : */
185 471 [ + + ]: 1150 : if (NamedLWLockTrancheRequests > 0)
472 : : {
473 : : /*
474 : : * Save the pointer to the request array in postmaster's local
475 : : * memory. We'll need it if we ever need to re-initialize shared
476 : : * memory after a crash.
477 : : */
178 478 : 8 : LocalNamedLWLockTrancheRequestArray = NamedLWLockTrancheRequestArray;
479 : :
185 480 : 8 : memcpy(ptr, NamedLWLockTrancheRequestArray,
481 : : NamedLWLockTrancheRequests * sizeof(NamedLWLockTrancheRequest));
482 : 8 : NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *) ptr;
483 : 8 : ptr += NamedLWLockTrancheRequests * sizeof(NamedLWLockTrancheRequest);
484 : : }
485 : :
486 : : /* Ensure desired alignment of LWLock array */
4430 rhaas@postgresql.org 487 : 1150 : ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE;
488 : 1150 : MainLWLockArray = (LWLockPadded *) ptr;
489 : :
490 : : /* Initialize all LWLocks */
3685 rhaas@postgresql.org 491 :CBC 1150 : InitializeLWLocks();
492 : : }
493 : 1150 : }
494 : :
495 : : /*
496 : : * Initialize LWLocks that are fixed and those belonging to named tranches.
497 : : */
498 : : static void
499 : 1150 : InitializeLWLocks(void)
500 : : {
501 : : int id;
502 : : int i;
503 : : int j;
504 : : LWLockPadded *lock;
505 : :
506 : : /* Initialize all individual LWLocks in main array */
507 [ + + ]: 65550 : for (id = 0, lock = MainLWLockArray; id < NUM_INDIVIDUAL_LWLOCKS; id++, lock++)
3376 508 : 64400 : LWLockInitialize(&lock->lock, id);
509 : :
510 : : /* Initialize buffer mapping LWLocks in main array */
1937 michael@paquier.xyz 511 : 1150 : lock = MainLWLockArray + BUFFER_MAPPING_LWLOCK_OFFSET;
3685 rhaas@postgresql.org 512 [ + + ]: 148350 : for (id = 0; id < NUM_BUFFER_PARTITIONS; id++, lock++)
513 : 147200 : LWLockInitialize(&lock->lock, LWTRANCHE_BUFFER_MAPPING);
514 : :
515 : : /* Initialize lmgrs' LWLocks in main array */
1937 michael@paquier.xyz 516 : 1150 : lock = MainLWLockArray + LOCK_MANAGER_LWLOCK_OFFSET;
3685 rhaas@postgresql.org 517 [ + + ]: 19550 : for (id = 0; id < NUM_LOCK_PARTITIONS; id++, lock++)
518 : 18400 : LWLockInitialize(&lock->lock, LWTRANCHE_LOCK_MANAGER);
519 : :
520 : : /* Initialize predicate lmgrs' LWLocks in main array */
1937 michael@paquier.xyz 521 : 1150 : lock = MainLWLockArray + PREDICATELOCK_MANAGER_LWLOCK_OFFSET;
3685 rhaas@postgresql.org 522 [ + + ]: 19550 : for (id = 0; id < NUM_PREDICATELOCK_PARTITIONS; id++, lock++)
523 : 18400 : LWLockInitialize(&lock->lock, LWTRANCHE_PREDICATE_LOCK_MANAGER);
524 : :
525 : : /*
526 : : * Copy the info about any named tranches into shared memory (so that
527 : : * other processes can see it), and initialize the requested LWLocks.
528 : : */
529 [ + + ]: 1150 : if (NamedLWLockTrancheRequests > 0)
530 : : {
531 : 8 : lock = &MainLWLockArray[NUM_FIXED_LWLOCKS];
532 : :
533 [ + + ]: 47 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
534 : : {
535 : : NamedLWLockTrancheRequest *request;
536 : : int tranche;
537 : :
538 : 39 : request = &NamedLWLockTrancheRequestArray[i];
193 nathan@postgresql.or 539 :GNC 39 : tranche = LWLockNewTrancheId(request->tranche_name);
540 : :
3685 rhaas@postgresql.org 541 [ + + ]:CBC 78 : for (j = 0; j < request->num_lwlocks; j++, lock++)
193 nathan@postgresql.or 542 :GNC 39 : LWLockInitialize(&lock->lock, tranche);
543 : : }
544 : : }
3685 rhaas@postgresql.org 545 :CBC 1150 : }
546 : :
547 : : /*
548 : : * InitLWLockAccess - initialize backend-local state needed to hold LWLocks
549 : : */
550 : : void
4276 heikki.linnakangas@i 551 : 21562 : InitLWLockAccess(void)
552 : : {
553 : : #ifdef LWLOCK_STATS
554 : : init_lwlock_stats();
555 : : #endif
556 : 21562 : }
557 : :
558 : : /*
559 : : * GetNamedLWLockTranche - returns the base address of LWLock from the
560 : : * specified tranche.
561 : : *
562 : : * Caller needs to retrieve the requested number of LWLocks starting from
563 : : * the base lock address returned by this API. This can be used for
564 : : * tranches that are requested by using RequestNamedLWLockTranche() API.
565 : : */
566 : : LWLockPadded *
3692 rhaas@postgresql.org 567 : 9 : GetNamedLWLockTranche(const char *tranche_name)
568 : : {
569 : : int lock_pos;
570 : : int i;
571 : :
572 : : /*
573 : : * Obtain the position of base address of LWLock belonging to requested
574 : : * tranche_name in MainLWLockArray. LWLocks for named tranches are placed
575 : : * in MainLWLockArray after fixed locks.
576 : : */
3686 577 : 9 : lock_pos = NUM_FIXED_LWLOCKS;
3692 578 [ + + ]: 41 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
579 : : {
580 [ + + ]: 40 : if (strcmp(NamedLWLockTrancheRequestArray[i].tranche_name,
581 : : tranche_name) == 0)
582 : 8 : return &MainLWLockArray[lock_pos];
583 : :
3692 rhaas@postgresql.org 584 :GBC 32 : lock_pos += NamedLWLockTrancheRequestArray[i].num_lwlocks;
585 : : }
586 : :
2131 tgl@sss.pgh.pa.us 587 [ + - ]: 1 : elog(ERROR, "requested tranche is not registered");
588 : :
589 : : /* just to keep compiler quiet */
590 : : return NULL;
591 : : }
592 : :
593 : : /*
594 : : * Allocate a new tranche ID with the provided name.
595 : : */
596 : : int
193 nathan@postgresql.or 597 :GNC 283 : LWLockNewTrancheId(const char *name)
598 : : {
599 : : int result;
600 : :
601 [ + + ]: 283 : if (!name)
602 [ + - ]: 1 : ereport(ERROR,
603 : : (errcode(ERRCODE_INVALID_NAME),
604 : : errmsg("tranche name cannot be NULL")));
605 : :
606 [ + + ]: 282 : if (strlen(name) >= NAMEDATALEN)
607 [ + - ]: 1 : ereport(ERROR,
608 : : (errcode(ERRCODE_NAME_TOO_LONG),
609 : : errmsg("tranche name too long"),
610 : : errdetail("LWLock tranche names must be no longer than %d bytes.",
611 : : NAMEDATALEN - 1)));
612 : :
613 : : /*
614 : : * We use the ShmemLock spinlock to protect LWLockCounter and
615 : : * LWLockTrancheNames.
616 : : */
617 : 281 : SpinLockAcquire(ShmemLock);
618 : :
619 [ + + ]: 281 : if (*LWLockCounter - LWTRANCHE_FIRST_USER_DEFINED >= MAX_NAMED_TRANCHES)
620 : : {
621 : 1 : SpinLockRelease(ShmemLock);
622 [ + - ]: 1 : ereport(ERROR,
623 : : (errmsg("maximum number of tranches already registered"),
624 : : errdetail("No more than %d tranches may be registered.",
625 : : MAX_NAMED_TRANCHES)));
626 : : }
627 : :
628 : 280 : result = (*LWLockCounter)++;
629 : 280 : LocalLWLockCounter = *LWLockCounter;
630 : 280 : strlcpy(LWLockTrancheNames[result - LWTRANCHE_FIRST_USER_DEFINED], name, NAMEDATALEN);
631 : :
632 : 280 : SpinLockRelease(ShmemLock);
633 : :
634 : 280 : return result;
635 : : }
636 : :
637 : : /*
638 : : * RequestNamedLWLockTranche
639 : : * Request that extra LWLocks be allocated during postmaster
640 : : * startup.
641 : : *
642 : : * This may only be called via the shmem_request_hook of a library that is
643 : : * loaded into the postmaster via shared_preload_libraries. Calls from
644 : : * elsewhere will fail.
645 : : *
646 : : * The tranche name will be user-visible as a wait event name, so try to
647 : : * use a name that fits the style for those.
648 : : */
649 : : void
3692 rhaas@postgresql.org 650 :CBC 39 : RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
651 : : {
652 : : NamedLWLockTrancheRequest *request;
653 : : static int NamedLWLockTrancheRequestsAllocated;
654 : :
1402 655 [ - + ]: 39 : if (!process_shmem_requests_in_progress)
1402 rhaas@postgresql.org 656 [ # # ]:UBC 0 : elog(FATAL, "cannot request additional LWLocks outside shmem_request_hook");
657 : :
193 nathan@postgresql.or 658 [ - + ]:GNC 39 : if (!tranche_name)
193 nathan@postgresql.or 659 [ # # ]:UNC 0 : ereport(ERROR,
660 : : (errcode(ERRCODE_INVALID_NAME),
661 : : errmsg("tranche name cannot be NULL")));
662 : :
193 nathan@postgresql.or 663 [ - + ]:GNC 39 : if (strlen(tranche_name) >= NAMEDATALEN)
193 nathan@postgresql.or 664 [ # # ]:UNC 0 : ereport(ERROR,
665 : : (errcode(ERRCODE_NAME_TOO_LONG),
666 : : errmsg("tranche name too long"),
667 : : errdetail("LWLock tranche names must be no longer than %d bytes.",
668 : : NAMEDATALEN - 1)));
669 : :
3692 rhaas@postgresql.org 670 [ + + ]:CBC 39 : if (NamedLWLockTrancheRequestArray == NULL)
671 : : {
192 nathan@postgresql.or 672 : 8 : NamedLWLockTrancheRequestsAllocated = 16;
3692 rhaas@postgresql.org 673 : 8 : NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
674 : 8 : MemoryContextAlloc(TopMemoryContext,
675 : : NamedLWLockTrancheRequestsAllocated
676 : : * sizeof(NamedLWLockTrancheRequest));
677 : : }
678 : :
192 nathan@postgresql.or 679 [ + + ]: 39 : if (NamedLWLockTrancheRequests >= NamedLWLockTrancheRequestsAllocated)
680 : : {
192 nathan@postgresql.or 681 :GBC 1 : int i = pg_nextpower2_32(NamedLWLockTrancheRequests + 1);
682 : :
683 : 1 : NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
684 : 1 : repalloc(NamedLWLockTrancheRequestArray,
685 : : i * sizeof(NamedLWLockTrancheRequest));
686 : 1 : NamedLWLockTrancheRequestsAllocated = i;
687 : : }
688 : :
3692 rhaas@postgresql.org 689 :CBC 39 : request = &NamedLWLockTrancheRequestArray[NamedLWLockTrancheRequests];
2131 tgl@sss.pgh.pa.us 690 : 39 : strlcpy(request->tranche_name, tranche_name, NAMEDATALEN);
3692 rhaas@postgresql.org 691 : 39 : request->num_lwlocks = num_lwlocks;
692 : 39 : NamedLWLockTrancheRequests++;
693 : 39 : }
694 : :
695 : : /*
696 : : * LWLockInitialize - initialize a new lwlock; it's initially unlocked
697 : : */
698 : : void
4430 699 : 2317725 : LWLockInitialize(LWLock *lock, int tranche_id)
700 : : {
701 : : /* verify the tranche_id is valid */
193 nathan@postgresql.or 702 :GNC 2317725 : (void) GetLWTrancheName(tranche_id);
703 : :
61 andres@anarazel.de 704 : 2317724 : pg_atomic_init_u32(&lock->state, 0);
705 : : #ifdef LOCK_DEBUG
706 : : pg_atomic_init_u32(&lock->nwaiters, 0);
707 : : #endif
4430 rhaas@postgresql.org 708 :CBC 2317724 : lock->tranche = tranche_id;
3499 709 : 2317724 : proclist_init(&lock->waiters);
4430 710 : 2317724 : }
711 : :
712 : : /*
713 : : * Report start of wait event for light-weight locks.
714 : : *
715 : : * This function will be used by all the light-weight lock calls which
716 : : * needs to wait to acquire the lock. This function distinguishes wait
717 : : * event based on tranche and lock id.
718 : : */
719 : : static inline void
3657 720 : 240318 : LWLockReportWaitStart(LWLock *lock)
721 : : {
3376 722 : 240318 : pgstat_report_wait_start(PG_WAIT_LWLOCK | lock->tranche);
3657 723 : 240318 : }
724 : :
725 : : /*
726 : : * Report end of wait event for light-weight locks.
727 : : */
728 : : static inline void
3640 andres@anarazel.de 729 : 240318 : LWLockReportWaitEnd(void)
730 : : {
3657 rhaas@postgresql.org 731 : 240318 : pgstat_report_wait_end();
732 : 240318 : }
733 : :
734 : : /*
735 : : * Return the name of an LWLock tranche.
736 : : */
737 : : static const char *
2131 tgl@sss.pgh.pa.us 738 : 2318013 : GetLWTrancheName(uint16 trancheId)
739 : : {
740 : : /* Built-in tranche or individual LWLock? */
741 [ + + ]: 2318013 : if (trancheId < LWTRANCHE_FIRST_USER_DEFINED)
781 alvherre@alvh.no-ip. 742 : 2316815 : return BuiltinTrancheNames[trancheId];
743 : :
744 : : /*
745 : : * We only ever add new entries to LWLockTrancheNames, so most lookups can
746 : : * avoid taking the spinlock as long as the backend-local counter
747 : : * (LocalLWLockCounter) is greater than the requested tranche ID. Else,
748 : : * we need to first update the backend-local counter with ShmemLock held
749 : : * before attempting the lookup again. In practice, the latter case is
750 : : * probably rare.
751 : : */
193 nathan@postgresql.or 752 [ + + ]:GNC 1198 : if (trancheId >= LocalLWLockCounter)
753 : : {
754 : 1 : SpinLockAcquire(ShmemLock);
755 : 1 : LocalLWLockCounter = *LWLockCounter;
756 : 1 : SpinLockRelease(ShmemLock);
757 : :
758 [ + - ]: 1 : if (trancheId >= LocalLWLockCounter)
759 [ + - ]: 1 : elog(ERROR, "tranche %d is not registered", trancheId);
760 : : }
761 : :
762 : : /*
763 : : * It's an extension tranche, so look in LWLockTrancheNames.
764 : : */
765 : 1197 : trancheId -= LWTRANCHE_FIRST_USER_DEFINED;
766 : :
2131 tgl@sss.pgh.pa.us 767 :GBC 1197 : return LWLockTrancheNames[trancheId];
768 : : }
769 : :
770 : : /*
771 : : * Return an identifier for an LWLock based on the wait class and event.
772 : : */
773 : : const char *
2131 tgl@sss.pgh.pa.us 774 :CBC 288 : GetLWLockIdentifier(uint32 classId, uint16 eventId)
775 : : {
776 [ - + ]: 288 : Assert(classId == PG_WAIT_LWLOCK);
777 : : /* The event IDs are just tranche numbers. */
778 : 288 : return GetLWTrancheName(eventId);
779 : : }
780 : :
781 : : /*
782 : : * Internal function that tries to atomically acquire the lwlock in the passed
783 : : * in mode.
784 : : *
785 : : * This function will not block waiting for a lock to become free - that's the
786 : : * caller's job.
787 : : *
788 : : * Returns true if the lock isn't free and we need to wait.
789 : : */
790 : : static bool
3949 bruce@momjian.us 791 : 175712423 : LWLockAttemptLock(LWLock *lock, LWLockMode mode)
792 : : {
793 : : uint32 old_state;
794 : :
1234 peter@eisentraut.org 795 [ + + - + ]: 175712423 : Assert(mode == LW_EXCLUSIVE || mode == LW_SHARED);
796 : :
797 : : /*
798 : : * Read once outside the loop, later iterations will get the newer value
799 : : * via compare & exchange.
800 : : */
3880 andres@anarazel.de 801 : 175712423 : old_state = pg_atomic_read_u32(&lock->state);
802 : :
803 : : /* loop until we've determined whether we could acquire the lock or not */
804 : : while (true)
4098 805 : 30191 : {
806 : : uint32 desired_state;
807 : : bool lock_free;
808 : :
3880 809 : 175742614 : desired_state = old_state;
810 : :
4098 811 [ + + ]: 175742614 : if (mode == LW_EXCLUSIVE)
812 : : {
3880 813 : 100466441 : lock_free = (old_state & LW_LOCK_MASK) == 0;
4098 814 [ + + ]: 100466441 : if (lock_free)
815 : 100251356 : desired_state += LW_VAL_EXCLUSIVE;
816 : : }
817 : : else
818 : : {
3880 819 : 75276173 : lock_free = (old_state & LW_VAL_EXCLUSIVE) == 0;
4098 820 [ + + ]: 75276173 : if (lock_free)
821 : 75275265 : desired_state += LW_VAL_SHARED;
822 : : }
823 : :
824 : : /*
825 : : * Attempt to swap in the state we are expecting. If we didn't see
826 : : * lock to be free, that's just the old value. If we saw it as free,
827 : : * we'll attempt to mark it acquired. The reason that we always swap
828 : : * in the value is that this doubles as a memory barrier. We could try
829 : : * to be smarter and only swap in values if we saw the lock as free,
830 : : * but benchmark haven't shown it as beneficial so far.
831 : : *
832 : : * Retry if the value changed since we last looked at it.
833 : : */
834 [ + + ]: 175742614 : if (pg_atomic_compare_exchange_u32(&lock->state,
835 : : &old_state, desired_state))
836 : : {
837 [ + + ]: 175712423 : if (lock_free)
838 : : {
839 : : /* Great! Got the lock. */
840 : : #ifdef LOCK_DEBUG
841 : : if (mode == LW_EXCLUSIVE)
842 : : lock->owner = MyProc;
843 : : #endif
844 : 175503722 : return false;
845 : : }
846 : : else
3324 heikki.linnakangas@i 847 : 208701 : return true; /* somebody else has the lock */
848 : : }
849 : : }
850 : : pg_unreachable();
851 : : }
852 : :
853 : : /*
854 : : * Lock the LWLock's wait list against concurrent activity.
855 : : *
856 : : * NB: even though the wait list is locked, non-conflicting lock operations
857 : : * may still happen concurrently.
858 : : *
859 : : * Time spent holding mutex should be short!
860 : : */
861 : : static void
3626 andres@anarazel.de 862 : 2976540 : LWLockWaitListLock(LWLock *lock)
863 : : {
864 : : uint32 old_state;
865 : : #ifdef LWLOCK_STATS
866 : : lwlock_stats *lwstats;
867 : : uint32 delays = 0;
868 : :
869 : : lwstats = get_lwlock_stats_entry(lock);
870 : : #endif
871 : :
872 : : while (true)
873 : : {
874 : : /*
875 : : * Always try once to acquire the lock directly, without setting up
876 : : * the spin-delay infrastructure. The work necessary for that shows up
877 : : * in profiles and is rarely necessary.
878 : : */
879 : 2983838 : old_state = pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_LOCKED);
86 andres@anarazel.de 880 [ + + ]:GNC 2983838 : if (likely(!(old_state & LW_FLAG_LOCKED)))
3626 andres@anarazel.de 881 :CBC 2976540 : break; /* got lock */
882 : :
883 : : /* and then spin without atomic operations until lock is released */
884 : : {
885 : : SpinDelayStatus delayStatus;
886 : :
3622 887 : 7298 : init_local_spin_delay(&delayStatus);
888 : :
3626 889 [ + + ]: 79411 : while (old_state & LW_FLAG_LOCKED)
890 : : {
891 : 72113 : perform_spin_delay(&delayStatus);
892 : 72113 : old_state = pg_atomic_read_u32(&lock->state);
893 : : }
894 : : #ifdef LWLOCK_STATS
895 : : delays += delayStatus.delays;
896 : : #endif
897 : 7298 : finish_spin_delay(&delayStatus);
898 : : }
899 : :
900 : : /*
901 : : * Retry. The lock might obviously already be re-acquired by the time
902 : : * we're attempting to get it again.
903 : : */
904 : : }
905 : :
906 : : #ifdef LWLOCK_STATS
907 : : lwstats->spin_delay_count += delays;
908 : : #endif
909 : 2976540 : }
910 : :
911 : : /*
912 : : * Unlock the LWLock's wait list.
913 : : *
914 : : * Note that it can be more efficient to manipulate flags and release the
915 : : * locks in a single atomic operation.
916 : : */
917 : : static void
918 : 2824556 : LWLockWaitListUnlock(LWLock *lock)
919 : : {
920 : : uint32 old_state PG_USED_FOR_ASSERTS_ONLY;
921 : :
922 : 2824556 : old_state = pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_LOCKED);
923 : :
924 [ - + ]: 2824556 : Assert(old_state & LW_FLAG_LOCKED);
925 : 2824556 : }
926 : :
927 : : /*
928 : : * Wakeup all the lockers that currently have a chance to acquire the lock.
929 : : */
930 : : static void
4098 931 : 151984 : LWLockWakeup(LWLock *lock)
932 : : {
60 andres@anarazel.de 933 :GNC 151984 : bool new_wake_in_progress = false;
4098 andres@anarazel.de 934 :CBC 151984 : bool wokeup_somebody = false;
935 : : proclist_head wakeup;
936 : : proclist_mutable_iter iter;
937 : :
3499 rhaas@postgresql.org 938 : 151984 : proclist_init(&wakeup);
939 : :
940 : : /* lock wait list while collecting backends to wake up */
3626 andres@anarazel.de 941 : 151984 : LWLockWaitListLock(lock);
942 : :
3499 rhaas@postgresql.org 943 [ + + + + : 300343 : proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
+ + ]
944 : : {
945 : 244675 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
946 : :
4098 andres@anarazel.de 947 [ + + + + ]: 244675 : if (wokeup_somebody && waiter->lwWaitMode == LW_EXCLUSIVE)
948 : 12 : continue;
949 : :
3499 rhaas@postgresql.org 950 : 244663 : proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
951 : 244663 : proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
952 : :
4098 andres@anarazel.de 953 [ + + ]: 244663 : if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
954 : : {
955 : : /*
956 : : * Prevent additional wakeups until retryer gets to run. Backends
957 : : * that are just waiting for the lock to become free don't retry
958 : : * automatically.
959 : : */
60 andres@anarazel.de 960 :GNC 96655 : new_wake_in_progress = true;
961 : :
962 : : /*
963 : : * Don't wakeup (further) exclusive locks.
964 : : */
4098 andres@anarazel.de 965 :CBC 96655 : wokeup_somebody = true;
966 : : }
967 : :
968 : : /*
969 : : * Signal that the process isn't on the wait list anymore. This allows
970 : : * LWLockDequeueSelf() to remove itself of the waitlist with a
971 : : * proclist_delete(), rather than having to check if it has been
972 : : * removed from the list.
973 : : */
1211 974 [ - + ]: 244663 : Assert(waiter->lwWaiting == LW_WS_WAITING);
975 : 244663 : waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
976 : :
977 : : /*
978 : : * Once we've woken up an exclusive lock, there's no point in waking
979 : : * up anybody else.
980 : : */
3949 bruce@momjian.us 981 [ + + ]: 244663 : if (waiter->lwWaitMode == LW_EXCLUSIVE)
4098 andres@anarazel.de 982 : 96316 : break;
983 : : }
984 : :
3499 rhaas@postgresql.org 985 [ + + - + ]: 151984 : Assert(proclist_is_empty(&wakeup) || pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS);
986 : :
987 : : /* unset required flags, and release lock, in one fell swoop */
988 : : {
989 : : uint32 old_state;
990 : : uint32 desired_state;
991 : :
3626 andres@anarazel.de 992 : 151984 : old_state = pg_atomic_read_u32(&lock->state);
993 : : while (true)
994 : : {
995 : 152148 : desired_state = old_state;
996 : :
997 : : /* compute desired flags */
998 : :
60 andres@anarazel.de 999 [ + + ]:GNC 152148 : if (new_wake_in_progress)
61 1000 : 96632 : desired_state |= LW_FLAG_WAKE_IN_PROGRESS;
1001 : : else
1002 : 55516 : desired_state &= ~LW_FLAG_WAKE_IN_PROGRESS;
1003 : :
111 andres@anarazel.de 1004 [ + + ]:CBC 152148 : if (proclist_is_empty(&lock->waiters))
3626 1005 : 127218 : desired_state &= ~LW_FLAG_HAS_WAITERS;
1006 : :
1007 : 152148 : desired_state &= ~LW_FLAG_LOCKED; /* release lock */
1008 : :
1009 [ + + ]: 152148 : if (pg_atomic_compare_exchange_u32(&lock->state, &old_state,
1010 : : desired_state))
1011 : 151984 : break;
1012 : : }
1013 : : }
1014 : :
1015 : : /* Awaken any waiters I removed from the queue. */
3499 rhaas@postgresql.org 1016 [ + + + + : 396647 : proclist_foreach_modify(iter, &wakeup, lwWaitLink)
+ + ]
1017 : : {
1018 : 244663 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1019 : :
1020 : : LOG_LWDEBUG("LWLockRelease", lock, "release waiter");
1021 : 244663 : proclist_delete(&wakeup, iter.cur, lwWaitLink);
1022 : :
1023 : : /*
1024 : : * Guarantee that lwWaiting being unset only becomes visible once the
1025 : : * unlink from the link has completed. Otherwise the target backend
1026 : : * could be woken up for other reason and enqueue for a new lock - if
1027 : : * that happens before the list unlink happens, the list would end up
1028 : : * being corrupted.
1029 : : *
1030 : : * The barrier pairs with the LWLockWaitListLock() when enqueuing for
1031 : : * another lock.
1032 : : */
4098 andres@anarazel.de 1033 : 244663 : pg_write_barrier();
1211 1034 : 244663 : waiter->lwWaiting = LW_WS_NOT_WAITING;
3380 tgl@sss.pgh.pa.us 1035 : 244663 : PGSemaphoreUnlock(waiter->sem);
1036 : : }
4098 andres@anarazel.de 1037 : 151984 : }
1038 : :
1039 : : /*
1040 : : * Add ourselves to the end of the queue.
1041 : : *
1042 : : * NB: Mode can be LW_WAIT_UNTIL_FREE here!
1043 : : */
1044 : : static void
1045 : 260041 : LWLockQueueSelf(LWLock *lock, LWLockMode mode)
1046 : : {
1047 : : /*
1048 : : * If we don't have a PGPROC structure, there's no way to wait. This
1049 : : * should never occur, since MyProc should only be null during shared
1050 : : * memory initialization.
1051 : : */
1052 [ - + ]: 260041 : if (MyProc == NULL)
4098 andres@anarazel.de 1053 [ # # ]:UBC 0 : elog(PANIC, "cannot wait without a PGPROC structure");
1054 : :
1211 andres@anarazel.de 1055 [ - + ]:CBC 260041 : if (MyProc->lwWaiting != LW_WS_NOT_WAITING)
4098 andres@anarazel.de 1056 [ # # ]:UBC 0 : elog(PANIC, "queueing for lock while waiting on another one");
1057 : :
3626 andres@anarazel.de 1058 :CBC 260041 : LWLockWaitListLock(lock);
1059 : :
1060 : : /* setting the flag is protected by the spinlock */
4098 1061 : 260041 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS);
1062 : :
1211 1063 : 260041 : MyProc->lwWaiting = LW_WS_WAITING;
4098 1064 : 260041 : MyProc->lwWaitMode = mode;
1065 : :
1066 : : /* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */
1067 [ + + ]: 260041 : if (mode == LW_WAIT_UNTIL_FREE)
752 heikki.linnakangas@i 1068 : 149277 : proclist_push_head(&lock->waiters, MyProcNumber, lwWaitLink);
1069 : : else
1070 : 110764 : proclist_push_tail(&lock->waiters, MyProcNumber, lwWaitLink);
1071 : :
1072 : : /* Can release the mutex now */
3626 andres@anarazel.de 1073 : 260041 : LWLockWaitListUnlock(lock);
1074 : :
1075 : : #ifdef LOCK_DEBUG
1076 : : pg_atomic_fetch_add_u32(&lock->nwaiters, 1);
1077 : : #endif
4098 1078 : 260041 : }
1079 : :
1080 : : /*
1081 : : * Remove ourselves from the waitlist.
1082 : : *
1083 : : * This is used if we queued ourselves because we thought we needed to sleep
1084 : : * but, after further checking, we discovered that we don't actually need to
1085 : : * do so.
1086 : : */
1087 : : static void
1088 : 19723 : LWLockDequeueSelf(LWLock *lock)
1089 : : {
1090 : : bool on_waitlist;
1091 : :
1092 : : #ifdef LWLOCK_STATS
1093 : : lwlock_stats *lwstats;
1094 : :
1095 : : lwstats = get_lwlock_stats_entry(lock);
1096 : :
1097 : : lwstats->dequeue_self_count++;
1098 : : #endif
1099 : :
3626 1100 : 19723 : LWLockWaitListLock(lock);
1101 : :
1102 : : /*
1103 : : * Remove ourselves from the waitlist, unless we've already been removed.
1104 : : * The removal happens with the wait list lock held, so there's no race in
1105 : : * this check.
1106 : : */
1211 1107 : 19723 : on_waitlist = MyProc->lwWaiting == LW_WS_WAITING;
1108 [ + + ]: 19723 : if (on_waitlist)
752 heikki.linnakangas@i 1109 : 15337 : proclist_delete(&lock->waiters, MyProcNumber, lwWaitLink);
1110 : :
3499 rhaas@postgresql.org 1111 [ + + ]: 19723 : if (proclist_is_empty(&lock->waiters) &&
4098 andres@anarazel.de 1112 [ + + ]: 19245 : (pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS) != 0)
1113 : : {
1114 : 15028 : pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_HAS_WAITERS);
1115 : : }
1116 : :
1117 : : /* XXX: combine with fetch_and above? */
3626 1118 : 19723 : LWLockWaitListUnlock(lock);
1119 : :
1120 : : /* clear waiting state again, nice for debugging */
1211 1121 [ + + ]: 19723 : if (on_waitlist)
1122 : 15337 : MyProc->lwWaiting = LW_WS_NOT_WAITING;
1123 : : else
1124 : : {
3949 bruce@momjian.us 1125 : 4386 : int extraWaits = 0;
1126 : :
1127 : : /*
1128 : : * Somebody else dequeued us and has or will wake us up. Deal with the
1129 : : * superfluous absorption of a wakeup.
1130 : : */
1131 : :
1132 : : /*
1133 : : * Clear LW_FLAG_WAKE_IN_PROGRESS if somebody woke us before we
1134 : : * removed ourselves - they'll have set it.
1135 : : */
61 andres@anarazel.de 1136 :GNC 4386 : pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_WAKE_IN_PROGRESS);
1137 : :
1138 : : /*
1139 : : * Now wait for the scheduled wakeup, otherwise our ->lwWaiting would
1140 : : * get reset at some inconvenient point later. Most of the time this
1141 : : * will immediately return.
1142 : : */
1143 : : for (;;)
1144 : : {
3380 tgl@sss.pgh.pa.us 1145 :CBC 4386 : PGSemaphoreLock(MyProc->sem);
1211 andres@anarazel.de 1146 [ + - ]: 4386 : if (MyProc->lwWaiting == LW_WS_NOT_WAITING)
4098 1147 : 4386 : break;
4098 andres@anarazel.de 1148 :UBC 0 : extraWaits++;
1149 : : }
1150 : :
1151 : : /*
1152 : : * Fix the process wait semaphore's count for any absorbed wakeups.
1153 : : */
4098 andres@anarazel.de 1154 [ - + ]:CBC 4386 : while (extraWaits-- > 0)
3380 tgl@sss.pgh.pa.us 1155 :UBC 0 : PGSemaphoreUnlock(MyProc->sem);
1156 : : }
1157 : :
1158 : : #ifdef LOCK_DEBUG
1159 : : {
1160 : : /* not waiting anymore */
1161 : : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1162 : :
1163 : : Assert(nwaiters < MAX_BACKENDS);
1164 : : }
1165 : : #endif
4098 andres@anarazel.de 1166 :CBC 19723 : }
1167 : :
1168 : : /*
1169 : : * LWLockAcquire - acquire a lightweight lock in the specified mode
1170 : : *
1171 : : * If the lock is not available, sleep until it is. Returns true if the lock
1172 : : * was available immediately, false if we had to sleep.
1173 : : *
1174 : : * Side effect: cancel/die interrupts are held off until lock release.
1175 : : */
1176 : : bool
3880 1177 : 174053311 : LWLockAcquire(LWLock *lock, LWLockMode mode)
1178 : : {
8678 JanWieck@Yahoo.com 1179 : 174053311 : PGPROC *proc = MyProc;
4377 heikki.linnakangas@i 1180 : 174053311 : bool result = true;
8833 tgl@sss.pgh.pa.us 1181 : 174053311 : int extraWaits = 0;
1182 : : #ifdef LWLOCK_STATS
1183 : : lwlock_stats *lwstats;
1184 : :
1185 : : lwstats = get_lwlock_stats_entry(lock);
1186 : : #endif
1187 : :
1234 peter@eisentraut.org 1188 [ + + - + ]: 174053311 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1189 : :
1190 : : PRINT_LWDEBUG("LWLockAcquire", lock, mode);
1191 : :
1192 : : #ifdef LWLOCK_STATS
1193 : : /* Count lock acquisition attempts */
1194 : : if (mode == LW_EXCLUSIVE)
1195 : : lwstats->ex_acquire_count++;
1196 : : else
1197 : : lwstats->sh_acquire_count++;
1198 : : #endif /* LWLOCK_STATS */
1199 : :
1200 : : /*
1201 : : * We can't wait if we haven't got a PGPROC. This should only occur
1202 : : * during bootstrap or shared memory initialization. Put an Assert here
1203 : : * to catch unsafe coding practices.
1204 : : */
8572 tgl@sss.pgh.pa.us 1205 [ + + - + ]: 174053311 : Assert(!(proc == NULL && IsUnderPostmaster));
1206 : :
1207 : : /* Ensure we will have room to remember the lock */
7646 1208 [ - + ]: 174053311 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
7646 tgl@sss.pgh.pa.us 1209 [ # # ]:UBC 0 : elog(ERROR, "too many LWLocks taken");
1210 : :
1211 : : /*
1212 : : * Lock out cancel/die interrupts until we exit the code section protected
1213 : : * by the LWLock. This ensures that interrupts will not interfere with
1214 : : * manipulations of data structures in shared memory.
1215 : : */
8933 tgl@sss.pgh.pa.us 1216 :CBC 174053311 : HOLD_INTERRUPTS();
1217 : :
1218 : : /*
1219 : : * Loop here to try to acquire lock after each time we are signaled by
1220 : : * LWLockRelease.
1221 : : *
1222 : : * NOTE: it might seem better to have LWLockRelease actually grant us the
1223 : : * lock, rather than retrying and possibly having to go back to sleep. But
1224 : : * in practice that is no good because it means a process swap for every
1225 : : * lock acquisition when two or more processes are contending for the same
1226 : : * lock. Since LWLocks are normally used to protect not-very-long
1227 : : * sections of computation, a process needs to be able to acquire and
1228 : : * release the same lock many times during a single CPU time slice, even
1229 : : * in the presence of contention. The efficiency of being able to do that
1230 : : * outweighs the inefficiency of sometimes wasting a process dispatch
1231 : : * cycle because the lock is not free when a released waiter finally gets
1232 : : * to run. See pgsql-hackers archives for 29-Dec-01.
1233 : : */
1234 : : for (;;)
8842 bruce@momjian.us 1235 : 91957 : {
1236 : : bool mustwait;
1237 : :
1238 : : /*
1239 : : * Try to grab the lock the first time, we're not in the waitqueue
1240 : : * yet/anymore.
1241 : : */
4098 andres@anarazel.de 1242 : 174145268 : mustwait = LWLockAttemptLock(lock, mode);
1243 : :
8833 tgl@sss.pgh.pa.us 1244 [ + + ]: 174145268 : if (!mustwait)
1245 : : {
1246 : : LOG_LWDEBUG("LWLockAcquire", lock, "immediately acquired lock");
1247 : 174034504 : break; /* got the lock */
1248 : : }
1249 : :
1250 : : /*
1251 : : * Ok, at this point we couldn't grab the lock on the first try. We
1252 : : * cannot simply queue ourselves to the end of the list and wait to be
1253 : : * woken up because by now the lock could long have been released.
1254 : : * Instead add us to the queue and try to grab the lock again. If we
1255 : : * succeed we need to revert the queuing and be happy, otherwise we
1256 : : * recheck the lock. If we still couldn't grab it, we know that the
1257 : : * other locker will see our queue entries when releasing since they
1258 : : * existed before we checked for the lock.
1259 : : */
1260 : :
1261 : : /* add to the queue */
4098 andres@anarazel.de 1262 : 110764 : LWLockQueueSelf(lock, mode);
1263 : :
1264 : : /* we're now guaranteed to be woken up if necessary */
1265 : 110764 : mustwait = LWLockAttemptLock(lock, mode);
1266 : :
1267 : : /* ok, grabbed the lock the second time round, need to undo queueing */
1268 [ + + ]: 110764 : if (!mustwait)
1269 : : {
1270 : : LOG_LWDEBUG("LWLockAcquire", lock, "acquired, undoing queue");
1271 : :
1272 : 18807 : LWLockDequeueSelf(lock);
1273 : 18807 : break;
1274 : : }
1275 : :
1276 : : /*
1277 : : * Wait until awakened.
1278 : : *
1279 : : * It is possible that we get awakened for a reason other than being
1280 : : * signaled by LWLockRelease. If so, loop back and wait again. Once
1281 : : * we've gotten the LWLock, re-increment the sema by the number of
1282 : : * additional signals received.
1283 : : */
1284 : : LOG_LWDEBUG("LWLockAcquire", lock, "waiting");
1285 : :
1286 : : #ifdef LWLOCK_STATS
1287 : : lwstats->block_count++;
1288 : : #endif
1289 : :
3657 rhaas@postgresql.org 1290 : 91957 : LWLockReportWaitStart(lock);
1291 : : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1292 : : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
1293 : :
1294 : : for (;;)
1295 : : {
3380 tgl@sss.pgh.pa.us 1296 : 91957 : PGSemaphoreLock(proc->sem);
1211 andres@anarazel.de 1297 [ + - ]: 91957 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
8933 tgl@sss.pgh.pa.us 1298 : 91957 : break;
8933 tgl@sss.pgh.pa.us 1299 :UBC 0 : extraWaits++;
1300 : : }
1301 : :
1302 : : /* Retrying, allow LWLockRelease to release waiters again. */
61 andres@anarazel.de 1303 :GNC 91957 : pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_WAKE_IN_PROGRESS);
1304 : :
1305 : : #ifdef LOCK_DEBUG
1306 : : {
1307 : : /* not waiting anymore */
1308 : : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1309 : :
1310 : : Assert(nwaiters < MAX_BACKENDS);
1311 : : }
1312 : : #endif
1313 : :
1314 : : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1315 : : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
3657 rhaas@postgresql.org 1316 :CBC 91957 : LWLockReportWaitEnd();
1317 : :
1318 : : LOG_LWDEBUG("LWLockAcquire", lock, "awakened");
1319 : :
1320 : : /* Now loop back and try to acquire lock again. */
4377 heikki.linnakangas@i 1321 : 91957 : result = false;
1322 : : }
1323 : :
1324 : : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_ENABLED())
1325 : : TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), mode);
1326 : :
1327 : : /* Add lock to list of locks held by this backend */
4098 andres@anarazel.de 1328 : 174053311 : held_lwlocks[num_held_lwlocks].lock = lock;
1329 : 174053311 : held_lwlocks[num_held_lwlocks++].mode = mode;
1330 : :
1331 : : /*
1332 : : * Fix the process wait semaphore's count for any absorbed wakeups.
1333 : : */
8833 tgl@sss.pgh.pa.us 1334 [ - + ]: 174053311 : while (extraWaits-- > 0)
3380 tgl@sss.pgh.pa.us 1335 :UBC 0 : PGSemaphoreUnlock(proc->sem);
1336 : :
4377 heikki.linnakangas@i 1337 :CBC 174053311 : return result;
1338 : : }
1339 : :
1340 : : /*
1341 : : * LWLockConditionalAcquire - acquire a lightweight lock in the specified mode
1342 : : *
1343 : : * If the lock is not available, return false with no side-effects.
1344 : : *
1345 : : * If successful, cancel/die interrupts are held off until lock release.
1346 : : */
1347 : : bool
4192 rhaas@postgresql.org 1348 : 1320016 : LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
1349 : : {
1350 : : bool mustwait;
1351 : :
1234 peter@eisentraut.org 1352 [ + + - + ]: 1320016 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1353 : :
1354 : : PRINT_LWDEBUG("LWLockConditionalAcquire", lock, mode);
1355 : :
1356 : : /* Ensure we will have room to remember the lock */
7646 tgl@sss.pgh.pa.us 1357 [ - + ]: 1320016 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
7646 tgl@sss.pgh.pa.us 1358 [ # # ]:UBC 0 : elog(ERROR, "too many LWLocks taken");
1359 : :
1360 : : /*
1361 : : * Lock out cancel/die interrupts until we exit the code section protected
1362 : : * by the LWLock. This ensures that interrupts will not interfere with
1363 : : * manipulations of data structures in shared memory.
1364 : : */
8933 tgl@sss.pgh.pa.us 1365 :CBC 1320016 : HOLD_INTERRUPTS();
1366 : :
1367 : : /* Check for the lock */
4098 andres@anarazel.de 1368 : 1320016 : mustwait = LWLockAttemptLock(lock, mode);
1369 : :
8933 tgl@sss.pgh.pa.us 1370 [ + + ]: 1320016 : if (mustwait)
1371 : : {
1372 : : /* Failed to get lock, so release interrupt holdoff */
1373 [ - + ]: 1607 : RESUME_INTERRUPTS();
1374 : :
1375 : : LOG_LWDEBUG("LWLockConditionalAcquire", lock, "failed");
1376 : : if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL_ENABLED())
1377 : : TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(T_NAME(lock), mode);
1378 : : }
1379 : : else
1380 : : {
1381 : : /* Add lock to list of locks held by this backend */
4098 andres@anarazel.de 1382 : 1318409 : held_lwlocks[num_held_lwlocks].lock = lock;
1383 : 1318409 : held_lwlocks[num_held_lwlocks++].mode = mode;
1384 : : if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_ENABLED())
1385 : : TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(lock), mode);
1386 : : }
8933 tgl@sss.pgh.pa.us 1387 : 1320016 : return !mustwait;
1388 : : }
1389 : :
1390 : : /*
1391 : : * LWLockAcquireOrWait - Acquire lock, or wait until it's free
1392 : : *
1393 : : * The semantics of this function are a bit funky. If the lock is currently
1394 : : * free, it is acquired in the given mode, and the function returns true. If
1395 : : * the lock isn't immediately free, the function waits until it is released
1396 : : * and returns false, but does not acquire the lock.
1397 : : *
1398 : : * This is currently used for WALWriteLock: when a backend flushes the WAL,
1399 : : * holding WALWriteLock, it can flush the commit records of many other
1400 : : * backends as a side-effect. Those other backends need to wait until the
1401 : : * flush finishes, but don't need to acquire the lock anymore. They can just
1402 : : * wake up, observe that their records have already been flushed, and return.
1403 : : */
1404 : : bool
4192 rhaas@postgresql.org 1405 : 134154 : LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
1406 : : {
5158 heikki.linnakangas@i 1407 : 134154 : PGPROC *proc = MyProc;
1408 : : bool mustwait;
1409 : 134154 : int extraWaits = 0;
1410 : : #ifdef LWLOCK_STATS
1411 : : lwlock_stats *lwstats;
1412 : :
1413 : : lwstats = get_lwlock_stats_entry(lock);
1414 : : #endif
1415 : :
4098 andres@anarazel.de 1416 [ + - - + ]: 134154 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1417 : :
1418 : : PRINT_LWDEBUG("LWLockAcquireOrWait", lock, mode);
1419 : :
1420 : : /* Ensure we will have room to remember the lock */
5158 heikki.linnakangas@i 1421 [ - + ]: 134154 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
5158 heikki.linnakangas@i 1422 [ # # ]:UBC 0 : elog(ERROR, "too many LWLocks taken");
1423 : :
1424 : : /*
1425 : : * Lock out cancel/die interrupts until we exit the code section protected
1426 : : * by the LWLock. This ensures that interrupts will not interfere with
1427 : : * manipulations of data structures in shared memory.
1428 : : */
5158 heikki.linnakangas@i 1429 :CBC 134154 : HOLD_INTERRUPTS();
1430 : :
1431 : : /*
1432 : : * NB: We're using nearly the same twice-in-a-row lock acquisition
1433 : : * protocol as LWLockAcquire(). Check its comments for details.
1434 : : */
4098 andres@anarazel.de 1435 : 134154 : mustwait = LWLockAttemptLock(lock, mode);
1436 : :
5158 heikki.linnakangas@i 1437 [ + + ]: 134154 : if (mustwait)
1438 : : {
4098 andres@anarazel.de 1439 : 2221 : LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
1440 : :
1441 : 2221 : mustwait = LWLockAttemptLock(lock, mode);
1442 : :
1443 [ + + ]: 2221 : if (mustwait)
1444 : : {
1445 : : /*
1446 : : * Wait until awakened. Like in LWLockAcquire, be prepared for
1447 : : * bogus wakeups.
1448 : : */
1449 : : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "waiting");
1450 : :
1451 : : #ifdef LWLOCK_STATS
1452 : : lwstats->block_count++;
1453 : : #endif
1454 : :
3657 rhaas@postgresql.org 1455 : 2152 : LWLockReportWaitStart(lock);
1456 : : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1457 : : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
1458 : :
1459 : : for (;;)
1460 : : {
3380 tgl@sss.pgh.pa.us 1461 : 2152 : PGSemaphoreLock(proc->sem);
1211 andres@anarazel.de 1462 [ + - ]: 2152 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
4098 1463 : 2152 : break;
4098 andres@anarazel.de 1464 :UBC 0 : extraWaits++;
1465 : : }
1466 : :
1467 : : #ifdef LOCK_DEBUG
1468 : : {
1469 : : /* not waiting anymore */
1470 : : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1471 : :
1472 : : Assert(nwaiters < MAX_BACKENDS);
1473 : : }
1474 : : #endif
1475 : : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1476 : : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
3657 rhaas@postgresql.org 1477 :CBC 2152 : LWLockReportWaitEnd();
1478 : :
1479 : : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "awakened");
1480 : : }
1481 : : else
1482 : : {
1483 : : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "acquired, undoing queue");
1484 : :
1485 : : /*
1486 : : * Got lock in the second attempt, undo queueing. We need to treat
1487 : : * this as having successfully acquired the lock, otherwise we'd
1488 : : * not necessarily wake up people we've prevented from acquiring
1489 : : * the lock.
1490 : : */
4098 andres@anarazel.de 1491 : 69 : LWLockDequeueSelf(lock);
1492 : : }
1493 : : }
1494 : :
1495 : : /*
1496 : : * Fix the process wait semaphore's count for any absorbed wakeups.
1497 : : */
5158 heikki.linnakangas@i 1498 [ - + ]: 134154 : while (extraWaits-- > 0)
3380 tgl@sss.pgh.pa.us 1499 :UBC 0 : PGSemaphoreUnlock(proc->sem);
1500 : :
5158 heikki.linnakangas@i 1501 [ + + ]:CBC 134154 : if (mustwait)
1502 : : {
1503 : : /* Failed to get lock, so release interrupt holdoff */
1504 [ - + ]: 2152 : RESUME_INTERRUPTS();
1505 : : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "failed");
1506 : : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL_ENABLED())
1507 : : TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL(T_NAME(lock), mode);
1508 : : }
1509 : : else
1510 : : {
1511 : : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "succeeded");
1512 : : /* Add lock to list of locks held by this backend */
4098 andres@anarazel.de 1513 : 132002 : held_lwlocks[num_held_lwlocks].lock = lock;
1514 : 132002 : held_lwlocks[num_held_lwlocks++].mode = mode;
1515 : : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_ENABLED())
1516 : : TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(lock), mode);
1517 : : }
1518 : :
5158 heikki.linnakangas@i 1519 : 134154 : return !mustwait;
1520 : : }
1521 : :
1522 : : /*
1523 : : * Does the lwlock in its current state need to wait for the variable value to
1524 : : * change?
1525 : : *
1526 : : * If we don't need to wait, and it's because the value of the variable has
1527 : : * changed, store the current value in newval.
1528 : : *
1529 : : * *result is set to true if the lock was free, and false otherwise.
1530 : : */
1531 : : static bool
964 michael@paquier.xyz 1532 : 4401974 : LWLockConflictsWithVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
1533 : : uint64 *newval, bool *result)
1534 : : {
1535 : : bool mustwait;
1536 : : uint64 value;
1537 : :
1538 : : /*
1539 : : * Test first to see if it the slot is free right now.
1540 : : *
1541 : : * XXX: the unique caller of this routine, WaitXLogInsertionsToFinish()
1542 : : * via LWLockWaitForVar(), uses an implied barrier with a spinlock before
1543 : : * this, so we don't need a memory barrier here as far as the current
1544 : : * usage is concerned. But that might not be safe in general.
1545 : : */
3880 andres@anarazel.de 1546 : 4401974 : mustwait = (pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE) != 0;
1547 : :
1548 [ + + ]: 4401974 : if (!mustwait)
1549 : : {
1550 : 2678645 : *result = true;
1551 : 2678645 : return false;
1552 : : }
1553 : :
1554 : 1723329 : *result = false;
1555 : :
1556 : : /*
1557 : : * Reading this value atomically is safe even on platforms where uint64
1558 : : * cannot be read without observing a torn value.
1559 : : */
964 michael@paquier.xyz 1560 : 1723329 : value = pg_atomic_read_u64(valptr);
1561 : :
3880 andres@anarazel.de 1562 [ + + ]: 1723329 : if (value != oldval)
1563 : : {
1564 : 1430064 : mustwait = false;
1565 : 1430064 : *newval = value;
1566 : : }
1567 : : else
1568 : : {
1569 : 293265 : mustwait = true;
1570 : : }
1571 : :
1572 : 1723329 : return mustwait;
1573 : : }
1574 : :
1575 : : /*
1576 : : * LWLockWaitForVar - Wait until lock is free, or a variable is updated.
1577 : : *
1578 : : * If the lock is held and *valptr equals oldval, waits until the lock is
1579 : : * either freed, or the lock holder updates *valptr by calling
1580 : : * LWLockUpdateVar. If the lock is free on exit (immediately or after
1581 : : * waiting), returns true. If the lock is still held, but *valptr no longer
1582 : : * matches oldval, returns false and sets *newval to the current value in
1583 : : * *valptr.
1584 : : *
1585 : : * Note: this function ignores shared lock holders; if the lock is held
1586 : : * in shared mode, returns 'true'.
1587 : : *
1588 : : * Be aware that LWLockConflictsWithVar() does not include a memory barrier,
1589 : : * hence the caller of this function may want to rely on an explicit barrier or
1590 : : * an implied barrier via spinlock or LWLock to avoid memory ordering issues.
1591 : : */
1592 : : bool
964 michael@paquier.xyz 1593 : 4108709 : LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
1594 : : uint64 *newval)
1595 : : {
4377 heikki.linnakangas@i 1596 : 4108709 : PGPROC *proc = MyProc;
1597 : 4108709 : int extraWaits = 0;
1598 : 4108709 : bool result = false;
1599 : : #ifdef LWLOCK_STATS
1600 : : lwlock_stats *lwstats;
1601 : :
1602 : : lwstats = get_lwlock_stats_entry(lock);
1603 : : #endif
1604 : :
1605 : : PRINT_LWDEBUG("LWLockWaitForVar", lock, LW_WAIT_UNTIL_FREE);
1606 : :
1607 : : /*
1608 : : * Lock out cancel/die interrupts while we sleep on the lock. There is no
1609 : : * cleanup mechanism to remove us from the wait queue if we got
1610 : : * interrupted.
1611 : : */
1612 : 4108709 : HOLD_INTERRUPTS();
1613 : :
1614 : : /*
1615 : : * Loop here to check the lock's status after each time we are signaled.
1616 : : */
1617 : : for (;;)
1618 : 146209 : {
1619 : : bool mustwait;
1620 : :
3880 andres@anarazel.de 1621 : 4254918 : mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1622 : : &result);
1623 : :
4377 heikki.linnakangas@i 1624 [ + + ]: 4254918 : if (!mustwait)
1625 : 4107862 : break; /* the lock was free or value didn't match */
1626 : :
1627 : : /*
1628 : : * Add myself to wait queue. Note that this is racy, somebody else
1629 : : * could wakeup before we're finished queuing. NB: We're using nearly
1630 : : * the same twice-in-a-row lock acquisition protocol as
1631 : : * LWLockAcquire(). Check its comments for details. The only
1632 : : * difference is that we also have to check the variable's values when
1633 : : * checking the state of the lock.
1634 : : */
4098 andres@anarazel.de 1635 : 147056 : LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
1636 : :
1637 : : /*
1638 : : * Clear LW_FLAG_WAKE_IN_PROGRESS flag, to make sure we get woken up
1639 : : * as soon as the lock is released.
1640 : : */
61 andres@anarazel.de 1641 :GNC 147056 : pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_WAKE_IN_PROGRESS);
1642 : :
1643 : : /*
1644 : : * We're now guaranteed to be woken up if necessary. Recheck the lock
1645 : : * and variables state.
1646 : : */
3880 andres@anarazel.de 1647 :CBC 147056 : mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1648 : : &result);
1649 : :
1650 : : /* Ok, no conflict after we queued ourselves. Undo queueing. */
4098 1651 [ + + ]: 147056 : if (!mustwait)
1652 : : {
1653 : : LOG_LWDEBUG("LWLockWaitForVar", lock, "free, undoing queue");
1654 : :
1655 : 847 : LWLockDequeueSelf(lock);
1656 : 847 : break;
1657 : : }
1658 : :
1659 : : /*
1660 : : * Wait until awakened.
1661 : : *
1662 : : * It is possible that we get awakened for a reason other than being
1663 : : * signaled by LWLockRelease. If so, loop back and wait again. Once
1664 : : * we've gotten the LWLock, re-increment the sema by the number of
1665 : : * additional signals received.
1666 : : */
1667 : : LOG_LWDEBUG("LWLockWaitForVar", lock, "waiting");
1668 : :
1669 : : #ifdef LWLOCK_STATS
1670 : : lwstats->block_count++;
1671 : : #endif
1672 : :
3657 rhaas@postgresql.org 1673 : 146209 : LWLockReportWaitStart(lock);
1674 : : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1675 : : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), LW_EXCLUSIVE);
1676 : :
1677 : : for (;;)
1678 : : {
3380 tgl@sss.pgh.pa.us 1679 : 146209 : PGSemaphoreLock(proc->sem);
1211 andres@anarazel.de 1680 [ + - ]: 146209 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
4377 heikki.linnakangas@i 1681 : 146209 : break;
4377 heikki.linnakangas@i 1682 :UBC 0 : extraWaits++;
1683 : : }
1684 : :
1685 : : #ifdef LOCK_DEBUG
1686 : : {
1687 : : /* not waiting anymore */
1688 : : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1689 : :
1690 : : Assert(nwaiters < MAX_BACKENDS);
1691 : : }
1692 : : #endif
1693 : :
1694 : : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1695 : : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), LW_EXCLUSIVE);
3657 rhaas@postgresql.org 1696 :CBC 146209 : LWLockReportWaitEnd();
1697 : :
1698 : : LOG_LWDEBUG("LWLockWaitForVar", lock, "awakened");
1699 : :
1700 : : /* Now loop back and check the status of the lock again. */
1701 : : }
1702 : :
1703 : : /*
1704 : : * Fix the process wait semaphore's count for any absorbed wakeups.
1705 : : */
4377 heikki.linnakangas@i 1706 [ - + ]: 4108709 : while (extraWaits-- > 0)
3380 tgl@sss.pgh.pa.us 1707 :UBC 0 : PGSemaphoreUnlock(proc->sem);
1708 : :
1709 : : /*
1710 : : * Now okay to allow cancel/die interrupts.
1711 : : */
4377 heikki.linnakangas@i 1712 [ - + ]:CBC 4108709 : RESUME_INTERRUPTS();
1713 : :
1714 : 4108709 : return result;
1715 : : }
1716 : :
1717 : :
1718 : : /*
1719 : : * LWLockUpdateVar - Update a variable and wake up waiters atomically
1720 : : *
1721 : : * Sets *valptr to 'val', and wakes up all processes waiting for us with
1722 : : * LWLockWaitForVar(). It first sets the value atomically and then wakes up
1723 : : * waiting processes so that any process calling LWLockWaitForVar() on the same
1724 : : * lock is guaranteed to see the new value, and act accordingly.
1725 : : *
1726 : : * The caller must be holding the lock in exclusive mode.
1727 : : */
1728 : : void
964 michael@paquier.xyz 1729 : 2544792 : LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
1730 : : {
1731 : : proclist_head wakeup;
1732 : : proclist_mutable_iter iter;
1733 : :
1734 : : PRINT_LWDEBUG("LWLockUpdateVar", lock, LW_EXCLUSIVE);
1735 : :
1736 : : /*
1737 : : * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
1738 : : * that the variable is updated before waking up waiters.
1739 : : */
1740 : 2544792 : pg_atomic_exchange_u64(valptr, val);
1741 : :
3499 rhaas@postgresql.org 1742 : 2544792 : proclist_init(&wakeup);
1743 : :
3626 andres@anarazel.de 1744 : 2544792 : LWLockWaitListLock(lock);
1745 : :
4098 1746 [ - + ]: 2544792 : Assert(pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE);
1747 : :
1748 : : /*
1749 : : * See if there are any LW_WAIT_UNTIL_FREE waiters that need to be woken
1750 : : * up. They are always in the front of the queue.
1751 : : */
3499 rhaas@postgresql.org 1752 [ + + + + ]: 2545399 : proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
1753 : : {
1754 : 58836 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1755 : :
4098 andres@anarazel.de 1756 [ + + ]: 58836 : if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
1757 : 58229 : break;
1758 : :
3499 rhaas@postgresql.org 1759 : 607 : proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
1760 : 607 : proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
1761 : :
1762 : : /* see LWLockWakeup() */
1211 andres@anarazel.de 1763 [ - + ]: 607 : Assert(waiter->lwWaiting == LW_WS_WAITING);
1764 [ + + ]: 607 : waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
1765 : : }
1766 : :
1767 : : /* We are done updating shared state of the lock itself. */
3626 1768 : 2544792 : LWLockWaitListUnlock(lock);
1769 : :
1770 : : /*
1771 : : * Awaken any waiters I removed from the queue.
1772 : : */
3499 rhaas@postgresql.org 1773 [ + + + + : 2545399 : proclist_foreach_modify(iter, &wakeup, lwWaitLink)
+ + ]
1774 : : {
1775 : 607 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1776 : :
1777 : 607 : proclist_delete(&wakeup, iter.cur, lwWaitLink);
1778 : : /* check comment in LWLockWakeup() about this barrier */
4104 andres@anarazel.de 1779 : 607 : pg_write_barrier();
1211 1780 : 607 : waiter->lwWaiting = LW_WS_NOT_WAITING;
3380 tgl@sss.pgh.pa.us 1781 : 607 : PGSemaphoreUnlock(waiter->sem);
1782 : : }
4377 heikki.linnakangas@i 1783 : 2544792 : }
1784 : :
1785 : :
1786 : : /*
1787 : : * LWLockRelease - release a previously acquired lock
1788 : : *
1789 : : * NB: This will leave lock->owner pointing to the current backend (if
1790 : : * LOCK_DEBUG is set). This is somewhat intentional, as it makes it easier to
1791 : : * debug cases of missing wakeups during lock release.
1792 : : */
1793 : : void
59 andres@anarazel.de 1794 :GNC 175503722 : LWLockRelease(LWLock *lock)
1795 : : {
1796 : : LWLockMode mode;
1797 : : uint32 oldstate;
1798 : : bool check_waiters;
1799 : : int i;
1800 : :
1801 : : /*
1802 : : * Remove lock from list of locks held. Usually, but not always, it will
1803 : : * be the latest-acquired lock; so search array backwards.
1804 : : */
8907 bruce@momjian.us 1805 [ + - ]:CBC 202686543 : for (i = num_held_lwlocks; --i >= 0;)
4098 andres@anarazel.de 1806 [ + + ]: 202686543 : if (lock == held_lwlocks[i].lock)
8933 tgl@sss.pgh.pa.us 1807 : 175503722 : break;
1808 : :
1809 [ - + ]: 175503722 : if (i < 0)
3376 rhaas@postgresql.org 1810 [ # # ]:UBC 0 : elog(ERROR, "lock %s is not held", T_NAME(lock));
1811 : :
3386 sfrost@snowman.net 1812 :CBC 175503722 : mode = held_lwlocks[i].mode;
1813 : :
8933 tgl@sss.pgh.pa.us 1814 : 175503722 : num_held_lwlocks--;
1815 [ + + ]: 202686543 : for (; i < num_held_lwlocks; i++)
8907 bruce@momjian.us 1816 : 27182821 : held_lwlocks[i] = held_lwlocks[i + 1];
1817 : :
1818 : : PRINT_LWDEBUG("LWLockRelease", lock, mode);
1819 : :
1820 : : /*
1821 : : * Release my hold on lock, after that it can immediately be acquired by
1822 : : * others, even if we still have to wakeup other waiters.
1823 : : */
4098 andres@anarazel.de 1824 [ + + ]: 175503722 : if (mode == LW_EXCLUSIVE)
1825 : 100242278 : oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_EXCLUSIVE);
1826 : : else
1827 : 75261444 : oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_SHARED);
1828 : :
1829 : : /* nobody else can have that kind of lock */
1830 [ - + ]: 175503722 : Assert(!(oldstate & LW_VAL_EXCLUSIVE));
1831 : :
1832 : : if (TRACE_POSTGRESQL_LWLOCK_RELEASE_ENABLED())
1833 : : TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(lock));
1834 : :
1835 : : /*
1836 : : * Check if we're still waiting for backends to get scheduled, if so,
1837 : : * don't wake them up again.
1838 : : */
61 andres@anarazel.de 1839 [ + + ]:GNC 175503722 : if ((oldstate & LW_FLAG_HAS_WAITERS) &&
1840 [ + + ]: 268995 : !(oldstate & LW_FLAG_WAKE_IN_PROGRESS) &&
4098 andres@anarazel.de 1841 [ + + ]:CBC 154367 : (oldstate & LW_LOCK_MASK) == 0)
1842 : 151984 : check_waiters = true;
1843 : : else
1844 : 175351738 : check_waiters = false;
1845 : :
1846 : : /*
1847 : : * As waking up waiters requires the spinlock to be acquired, only do so
1848 : : * if necessary.
1849 : : */
1850 [ + + ]: 175503722 : if (check_waiters)
1851 : : {
1852 : : /* XXX: remove before commit? */
1853 : : LOG_LWDEBUG("LWLockRelease", lock, "releasing waiters");
1854 : 151984 : LWLockWakeup(lock);
1855 : : }
1856 : :
1857 : : /*
1858 : : * Now okay to allow cancel/die interrupts.
1859 : : */
8933 tgl@sss.pgh.pa.us 1860 [ - + ]: 175503722 : RESUME_INTERRUPTS();
1861 : 175503722 : }
1862 : :
1863 : : /*
1864 : : * LWLockReleaseClearVar - release a previously acquired lock, reset variable
1865 : : */
1866 : : void
964 michael@paquier.xyz 1867 : 16107699 : LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
1868 : : {
1869 : : /*
1870 : : * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
1871 : : * that the variable is updated before releasing the lock.
1872 : : */
1873 : 16107699 : pg_atomic_exchange_u64(valptr, val);
1874 : :
3880 andres@anarazel.de 1875 : 16107699 : LWLockRelease(lock);
1876 : 16107699 : }
1877 : :
1878 : :
1879 : : /*
1880 : : * LWLockReleaseAll - release all currently-held locks
1881 : : *
1882 : : * Used to clean up after ereport(ERROR). An important difference between this
1883 : : * function and retail LWLockRelease calls is that InterruptHoldoffCount is
1884 : : * unchanged by this operation. This is necessary since InterruptHoldoffCount
1885 : : * has been set to an appropriate level earlier in error recovery. We could
1886 : : * decrement it below zero if we allow it to drop for each released lock!
1887 : : *
1888 : : * Note that this function must be safe to call even before the LWLock
1889 : : * subsystem has been initialized (e.g., during early startup failures).
1890 : : * In that case, num_held_lwlocks will be 0 and we do nothing.
1891 : : */
1892 : : void
8933 tgl@sss.pgh.pa.us 1893 : 103608 : LWLockReleaseAll(void)
1894 : : {
1895 [ + + ]: 103706 : while (num_held_lwlocks > 0)
1896 : : {
1897 : 98 : HOLD_INTERRUPTS(); /* match the upcoming RESUME_INTERRUPTS */
1898 : :
4098 andres@anarazel.de 1899 : 98 : LWLockRelease(held_lwlocks[num_held_lwlocks - 1].lock);
1900 : : }
1901 : :
58 amitlan@postgresql.o 1902 [ - + ]: 103608 : Assert(num_held_lwlocks == 0);
8933 tgl@sss.pgh.pa.us 1903 : 103608 : }
1904 : :
1905 : :
1906 : : /*
1907 : : * LWLockHeldByMe - test whether my process holds a lock in any mode
1908 : : *
1909 : : * This is meant as debug support only.
1910 : : */
1911 : : bool
1273 pg@bowt.ie 1912 : 4804848 : LWLockHeldByMe(LWLock *lock)
1913 : : {
1914 : : int i;
1915 : :
7947 tgl@sss.pgh.pa.us 1916 [ + + ]: 36227431 : for (i = 0; i < num_held_lwlocks; i++)
1917 : : {
1273 pg@bowt.ie 1918 [ + + ]: 35086751 : if (held_lwlocks[i].lock == lock)
7947 tgl@sss.pgh.pa.us 1919 : 3664168 : return true;
1920 : : }
1921 : 1140680 : return false;
1922 : : }
1923 : :
1924 : : /*
1925 : : * LWLockAnyHeldByMe - test whether my process holds any of an array of locks
1926 : : *
1927 : : * This is meant as debug support only.
1928 : : */
1929 : : bool
1273 pg@bowt.ie 1930 : 1297837 : LWLockAnyHeldByMe(LWLock *lock, int nlocks, size_t stride)
1931 : : {
1932 : : char *held_lock_addr;
1933 : : char *begin;
1934 : : char *end;
1935 : : int i;
1936 : :
1937 : 1297837 : begin = (char *) lock;
1343 tmunro@postgresql.or 1938 : 1297837 : end = begin + nlocks * stride;
1939 [ + + ]: 1299499 : for (i = 0; i < num_held_lwlocks; i++)
1940 : : {
1941 : 1662 : held_lock_addr = (char *) held_lwlocks[i].lock;
1942 [ + + - + ]: 1662 : if (held_lock_addr >= begin &&
1343 tmunro@postgresql.or 1943 :UBC 0 : held_lock_addr < end &&
1944 [ # # ]: 0 : (held_lock_addr - begin) % stride == 0)
1945 : 0 : return true;
1946 : : }
1343 tmunro@postgresql.or 1947 :CBC 1297837 : return false;
1948 : : }
1949 : :
1950 : : /*
1951 : : * LWLockHeldByMeInMode - test whether my process holds a lock in given mode
1952 : : *
1953 : : * This is meant as debug support only.
1954 : : */
1955 : : bool
1273 pg@bowt.ie 1956 : 1160888 : LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
1957 : : {
1958 : : int i;
1959 : :
3478 simon@2ndQuadrant.co 1960 [ + - ]: 1163056 : for (i = 0; i < num_held_lwlocks; i++)
1961 : : {
1273 pg@bowt.ie 1962 [ + + + - ]: 1163056 : if (held_lwlocks[i].lock == lock && held_lwlocks[i].mode == mode)
3478 simon@2ndQuadrant.co 1963 : 1160888 : return true;
1964 : : }
3478 simon@2ndQuadrant.co 1965 :UBC 0 : return false;
1966 : : }
|