Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * lwlock.c
4 : : * Lightweight lock manager
5 : : *
6 : : * Lightweight locks are intended primarily to provide mutual exclusion of
7 : : * access to shared-memory data structures. Therefore, they offer both
8 : : * exclusive and shared lock modes (to support read/write and read-only
9 : : * access to a shared object). There are few other frammishes. User-level
10 : : * locking should be done with the full lock manager --- which depends on
11 : : * LWLocks to protect its shared state.
12 : : *
13 : : * In addition to exclusive and shared modes, lightweight locks can be used to
14 : : * wait until a variable changes value. The variable is initially not set
15 : : * when the lock is acquired with LWLockAcquire, i.e. it remains set to the
16 : : * value it was set to when the lock was released last, and can be updated
17 : : * without releasing the lock by calling LWLockUpdateVar. LWLockWaitForVar
18 : : * waits for the variable to be updated, or until the lock is free. When
19 : : * releasing the lock with LWLockReleaseClearVar() the value can be set to an
20 : : * appropriate value for a free lock. The meaning of the variable is up to
21 : : * the caller, the lightweight lock code just assigns and compares it.
22 : : *
23 : : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
24 : : * Portions Copyright (c) 1994, Regents of the University of California
25 : : *
26 : : * IDENTIFICATION
27 : : * src/backend/storage/lmgr/lwlock.c
28 : : *
29 : : * NOTES:
30 : : *
31 : : * This used to be a pretty straight forward reader-writer lock
32 : : * implementation, in which the internal state was protected by a
33 : : * spinlock. Unfortunately the overhead of taking the spinlock proved to be
34 : : * too high for workloads/locks that were taken in shared mode very
35 : : * frequently. Often we were spinning in the (obviously exclusive) spinlock,
36 : : * while trying to acquire a shared lock that was actually free.
37 : : *
38 : : * Thus a new implementation was devised that provides wait-free shared lock
39 : : * acquisition for locks that aren't exclusively locked.
40 : : *
41 : : * The basic idea is to have a single atomic variable 'lockcount' instead of
42 : : * the formerly separate shared and exclusive counters and to use atomic
43 : : * operations to acquire the lock. That's fairly easy to do for plain
44 : : * rw-spinlocks, but a lot harder for something like LWLocks that want to wait
45 : : * in the OS.
46 : : *
47 : : * For lock acquisition we use an atomic compare-and-exchange on the lockcount
48 : : * variable. For exclusive lock we swap in a sentinel value
49 : : * (LW_VAL_EXCLUSIVE), for shared locks we count the number of holders.
50 : : *
51 : : * To release the lock we use an atomic decrement to release the lock. If the
52 : : * new value is zero (we get that atomically), we know we can/have to release
53 : : * waiters.
54 : : *
55 : : * Obviously it is important that the sentinel value for exclusive locks
56 : : * doesn't conflict with the maximum number of possible share lockers -
57 : : * luckily MAX_BACKENDS makes that easily possible.
58 : : *
59 : : *
60 : : * The attentive reader might have noticed that naively doing the above has a
61 : : * glaring race condition: We try to lock using the atomic operations and
62 : : * notice that we have to wait. Unfortunately by the time we have finished
63 : : * queuing, the former locker very well might have already finished its
64 : : * work. That's problematic because we're now stuck waiting inside the OS.
65 : :
66 : : * To mitigate those races we use a two phased attempt at locking:
67 : : * Phase 1: Try to do it atomically, if we succeed, nice
68 : : * Phase 2: Add ourselves to the waitqueue of the lock
69 : : * Phase 3: Try to grab the lock again, if we succeed, remove ourselves from
70 : : * the queue
71 : : * Phase 4: Sleep till wake-up, goto Phase 1
72 : : *
73 : : * This protects us against the problem from above as nobody can release too
74 : : * quick, before we're queued, since after Phase 2 we're already queued.
75 : : * -------------------------------------------------------------------------
76 : : */
77 : : #include "postgres.h"
78 : :
79 : : #include "miscadmin.h"
80 : : #include "pg_trace.h"
81 : : #include "pgstat.h"
82 : : #include "port/pg_bitutils.h"
83 : : #include "storage/proc.h"
84 : : #include "storage/proclist.h"
85 : : #include "storage/procnumber.h"
86 : : #include "storage/spin.h"
87 : : #include "utils/memutils.h"
88 : :
89 : : #ifdef LWLOCK_STATS
90 : : #include "utils/hsearch.h"
91 : : #endif
92 : :
93 : :
94 : : #define LW_FLAG_HAS_WAITERS ((uint32) 1 << 31)
95 : : #define LW_FLAG_RELEASE_OK ((uint32) 1 << 30)
96 : : #define LW_FLAG_LOCKED ((uint32) 1 << 29)
97 : : #define LW_FLAG_BITS 3
98 : : #define LW_FLAG_MASK (((1<<LW_FLAG_BITS)-1)<<(32-LW_FLAG_BITS))
99 : :
100 : : /* assumes MAX_BACKENDS is a (power of 2) - 1, checked below */
101 : : #define LW_VAL_EXCLUSIVE (MAX_BACKENDS + 1)
102 : : #define LW_VAL_SHARED 1
103 : :
104 : : /* already (power of 2)-1, i.e. suitable for a mask */
105 : : #define LW_SHARED_MASK MAX_BACKENDS
106 : : #define LW_LOCK_MASK (MAX_BACKENDS | LW_VAL_EXCLUSIVE)
107 : :
108 : :
109 : : StaticAssertDecl(((MAX_BACKENDS + 1) & MAX_BACKENDS) == 0,
110 : : "MAX_BACKENDS + 1 needs to be a power of 2");
111 : :
112 : : StaticAssertDecl((MAX_BACKENDS & LW_FLAG_MASK) == 0,
113 : : "MAX_BACKENDS and LW_FLAG_MASK overlap");
114 : :
115 : : StaticAssertDecl((LW_VAL_EXCLUSIVE & LW_FLAG_MASK) == 0,
116 : : "LW_VAL_EXCLUSIVE and LW_FLAG_MASK overlap");
117 : :
118 : : /*
119 : : * There are three sorts of LWLock "tranches":
120 : : *
121 : : * 1. The individually-named locks defined in lwlocklist.h each have their
122 : : * own tranche. We absorb the names of these tranches from there into
123 : : * BuiltinTrancheNames here.
124 : : *
125 : : * 2. There are some predefined tranches for built-in groups of locks defined
126 : : * in lwlocklist.h. We absorb the names of these tranches, too.
127 : : *
128 : : * 3. Extensions can create new tranches, via either RequestNamedLWLockTranche
129 : : * or LWLockNewTrancheId. These names are stored in shared memory and can be
130 : : * accessed via LWLockTrancheNames.
131 : : *
132 : : * All these names are user-visible as wait event names, so choose with care
133 : : * ... and do not forget to update the documentation's list of wait events.
134 : : */
135 : : static const char *const BuiltinTrancheNames[] = {
136 : : #define PG_LWLOCK(id, lockname) [id] = CppAsString(lockname),
137 : : #define PG_LWLOCKTRANCHE(id, lockname) [LWTRANCHE_##id] = CppAsString(lockname),
138 : : #include "storage/lwlocklist.h"
139 : : #undef PG_LWLOCK
140 : : #undef PG_LWLOCKTRANCHE
141 : : };
142 : :
143 : : StaticAssertDecl(lengthof(BuiltinTrancheNames) ==
144 : : LWTRANCHE_FIRST_USER_DEFINED,
145 : : "missing entries in BuiltinTrancheNames[]");
146 : :
147 : : /*
148 : : * This is indexed by tranche ID minus LWTRANCHE_FIRST_USER_DEFINED, and
149 : : * points to the shared memory locations of the names of all
150 : : * dynamically-created tranches. Backends inherit the pointer by fork from the
151 : : * postmaster (except in the EXEC_BACKEND case, where we have special measures
152 : : * to pass it down).
153 : : */
154 : : char **LWLockTrancheNames = NULL;
155 : :
156 : : /*
157 : : * This points to the main array of LWLocks in shared memory. Backends inherit
158 : : * the pointer by fork from the postmaster (except in the EXEC_BACKEND case,
159 : : * where we have special measures to pass it down).
160 : : */
161 : : LWLockPadded *MainLWLockArray = NULL;
162 : :
163 : : /*
164 : : * We use this structure to keep track of locked LWLocks for release
165 : : * during error recovery. Normally, only a few will be held at once, but
166 : : * occasionally the number can be much higher.
167 : : */
168 : : #define MAX_SIMUL_LWLOCKS 200
169 : :
170 : : /* struct representing the LWLocks we're holding */
171 : : typedef struct LWLockHandle
172 : : {
173 : : LWLock *lock;
174 : : LWLockMode mode;
175 : : } LWLockHandle;
176 : :
177 : : static int num_held_lwlocks = 0;
178 : : static LWLockHandle held_lwlocks[MAX_SIMUL_LWLOCKS];
179 : :
180 : : /* struct representing the LWLock tranche request for named tranche */
181 : : typedef struct NamedLWLockTrancheRequest
182 : : {
183 : : char tranche_name[NAMEDATALEN];
184 : : int num_lwlocks;
185 : : } NamedLWLockTrancheRequest;
186 : :
187 : : static NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray = NULL;
188 : :
189 : : /*
190 : : * NamedLWLockTrancheRequests is the valid length of the request array. This
191 : : * variable is non-static so that postmaster.c can copy them to child processes
192 : : * in EXEC_BACKEND builds.
193 : : */
194 : : int NamedLWLockTrancheRequests = 0;
195 : :
196 : : /* shared memory counter of registered tranches */
197 : : int *LWLockCounter = NULL;
198 : :
199 : : /* backend-local counter of registered tranches */
200 : : static int LocalLWLockCounter;
201 : :
202 : : #define MAX_NAMED_TRANCHES 256
203 : :
204 : : static void InitializeLWLocks(void);
205 : : static inline void LWLockReportWaitStart(LWLock *lock);
206 : : static inline void LWLockReportWaitEnd(void);
207 : : static const char *GetLWTrancheName(uint16 trancheId);
208 : :
209 : : #define T_NAME(lock) \
210 : : GetLWTrancheName((lock)->tranche)
211 : :
212 : : #ifdef LWLOCK_STATS
213 : : typedef struct lwlock_stats_key
214 : : {
215 : : int tranche;
216 : : void *instance;
217 : : } lwlock_stats_key;
218 : :
219 : : typedef struct lwlock_stats
220 : : {
221 : : lwlock_stats_key key;
222 : : int sh_acquire_count;
223 : : int ex_acquire_count;
224 : : int block_count;
225 : : int dequeue_self_count;
226 : : int spin_delay_count;
227 : : } lwlock_stats;
228 : :
229 : : static HTAB *lwlock_stats_htab;
230 : : static lwlock_stats lwlock_stats_dummy;
231 : : #endif
232 : :
233 : : #ifdef LOCK_DEBUG
234 : : bool Trace_lwlocks = false;
235 : :
236 : : inline static void
237 : : PRINT_LWDEBUG(const char *where, LWLock *lock, LWLockMode mode)
238 : : {
239 : : /* hide statement & context here, otherwise the log is just too verbose */
240 : : if (Trace_lwlocks)
241 : : {
242 : : uint32 state = pg_atomic_read_u32(&lock->state);
243 : :
244 : : ereport(LOG,
245 : : (errhidestmt(true),
246 : : errhidecontext(true),
247 : : errmsg_internal("%d: %s(%s %p): excl %u shared %u haswaiters %u waiters %u rOK %d",
248 : : MyProcPid,
249 : : where, T_NAME(lock), lock,
250 : : (state & LW_VAL_EXCLUSIVE) != 0,
251 : : state & LW_SHARED_MASK,
252 : : (state & LW_FLAG_HAS_WAITERS) != 0,
253 : : pg_atomic_read_u32(&lock->nwaiters),
254 : : (state & LW_FLAG_RELEASE_OK) != 0)));
255 : : }
256 : : }
257 : :
258 : : inline static void
259 : : LOG_LWDEBUG(const char *where, LWLock *lock, const char *msg)
260 : : {
261 : : /* hide statement & context here, otherwise the log is just too verbose */
262 : : if (Trace_lwlocks)
263 : : {
264 : : ereport(LOG,
265 : : (errhidestmt(true),
266 : : errhidecontext(true),
267 : : errmsg_internal("%s(%s %p): %s", where,
268 : : T_NAME(lock), lock, msg)));
269 : : }
270 : : }
271 : :
272 : : #else /* not LOCK_DEBUG */
273 : : #define PRINT_LWDEBUG(a,b,c) ((void)0)
274 : : #define LOG_LWDEBUG(a,b,c) ((void)0)
275 : : #endif /* LOCK_DEBUG */
276 : :
277 : : #ifdef LWLOCK_STATS
278 : :
279 : : static void init_lwlock_stats(void);
280 : : static void print_lwlock_stats(int code, Datum arg);
281 : : static lwlock_stats * get_lwlock_stats_entry(LWLock *lock);
282 : :
283 : : static void
284 : : init_lwlock_stats(void)
285 : : {
286 : : HASHCTL ctl;
287 : : static MemoryContext lwlock_stats_cxt = NULL;
288 : : static bool exit_registered = false;
289 : :
290 : : if (lwlock_stats_cxt != NULL)
291 : : MemoryContextDelete(lwlock_stats_cxt);
292 : :
293 : : /*
294 : : * The LWLock stats will be updated within a critical section, which
295 : : * requires allocating new hash entries. Allocations within a critical
296 : : * section are normally not allowed because running out of memory would
297 : : * lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally
298 : : * turned on in production, so that's an acceptable risk. The hash entries
299 : : * are small, so the risk of running out of memory is minimal in practice.
300 : : */
301 : : lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext,
302 : : "LWLock stats",
303 : : ALLOCSET_DEFAULT_SIZES);
304 : : MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true);
305 : :
306 : : ctl.keysize = sizeof(lwlock_stats_key);
307 : : ctl.entrysize = sizeof(lwlock_stats);
308 : : ctl.hcxt = lwlock_stats_cxt;
309 : : lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
310 : : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
311 : : if (!exit_registered)
312 : : {
313 : : on_shmem_exit(print_lwlock_stats, 0);
314 : : exit_registered = true;
315 : : }
316 : : }
317 : :
318 : : static void
319 : : print_lwlock_stats(int code, Datum arg)
320 : : {
321 : : HASH_SEQ_STATUS scan;
322 : : lwlock_stats *lwstats;
323 : :
324 : : hash_seq_init(&scan, lwlock_stats_htab);
325 : :
326 : : /* Grab an LWLock to keep different backends from mixing reports */
327 : : LWLockAcquire(&MainLWLockArray[0].lock, LW_EXCLUSIVE);
328 : :
329 : : while ((lwstats = (lwlock_stats *) hash_seq_search(&scan)) != NULL)
330 : : {
331 : : fprintf(stderr,
332 : : "PID %d lwlock %s %p: shacq %u exacq %u blk %u spindelay %u dequeue self %u\n",
333 : : MyProcPid, GetLWTrancheName(lwstats->key.tranche),
334 : : lwstats->key.instance, lwstats->sh_acquire_count,
335 : : lwstats->ex_acquire_count, lwstats->block_count,
336 : : lwstats->spin_delay_count, lwstats->dequeue_self_count);
337 : : }
338 : :
339 : : LWLockRelease(&MainLWLockArray[0].lock);
340 : : }
341 : :
342 : : static lwlock_stats *
343 : : get_lwlock_stats_entry(LWLock *lock)
344 : : {
345 : : lwlock_stats_key key;
346 : : lwlock_stats *lwstats;
347 : : bool found;
348 : :
349 : : /*
350 : : * During shared memory initialization, the hash table doesn't exist yet.
351 : : * Stats of that phase aren't very interesting, so just collect operations
352 : : * on all locks in a single dummy entry.
353 : : */
354 : : if (lwlock_stats_htab == NULL)
355 : : return &lwlock_stats_dummy;
356 : :
357 : : /* Fetch or create the entry. */
358 : : MemSet(&key, 0, sizeof(key));
359 : : key.tranche = lock->tranche;
360 : : key.instance = lock;
361 : : lwstats = hash_search(lwlock_stats_htab, &key, HASH_ENTER, &found);
362 : : if (!found)
363 : : {
364 : : lwstats->sh_acquire_count = 0;
365 : : lwstats->ex_acquire_count = 0;
366 : : lwstats->block_count = 0;
367 : : lwstats->dequeue_self_count = 0;
368 : : lwstats->spin_delay_count = 0;
369 : : }
370 : : return lwstats;
371 : : }
372 : : #endif /* LWLOCK_STATS */
373 : :
374 : :
375 : : /*
376 : : * Compute number of LWLocks required by named tranches. These will be
377 : : * allocated in the main array.
378 : : */
379 : : static int
1941 tgl@sss.pgh.pa.us 380 :CBC 2938 : NumLWLocksForNamedTranches(void)
381 : : {
3502 rhaas@postgresql.org 382 : 2938 : int numLocks = 0;
383 : : int i;
384 : :
385 [ + + ]: 2959 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
386 : 21 : numLocks += NamedLWLockTrancheRequestArray[i].num_lwlocks;
387 : :
388 : 2938 : return numLocks;
389 : : }
390 : :
391 : : /*
392 : : * Compute shmem space needed for LWLocks and named tranches.
393 : : */
394 : : Size
8743 tgl@sss.pgh.pa.us 395 : 2938 : LWLockShmemSize(void)
396 : : {
397 : : Size size;
3496 rhaas@postgresql.org 398 : 2938 : int numLocks = NUM_FIXED_LWLOCKS;
399 : :
400 : : /* Calculate total number of locks needed in the main array. */
1941 tgl@sss.pgh.pa.us 401 : 2938 : numLocks += NumLWLocksForNamedTranches();
402 : :
403 : : /* Space for dynamic allocation counter. */
3 nathan@postgresql.or 404 :GNC 2938 : size = MAXALIGN(sizeof(int));
405 : :
406 : : /* Space for named tranches. */
407 : 2938 : size = add_size(size, mul_size(MAX_NAMED_TRANCHES, sizeof(char *)));
408 : 2938 : size = add_size(size, mul_size(MAX_NAMED_TRANCHES, NAMEDATALEN));
409 : :
410 : : /* Space for the LWLock array, plus room for cache line alignment. */
411 : 2938 : size = add_size(size, LWLOCK_PADDED_SIZE);
412 : 2938 : size = add_size(size, mul_size(numLocks, sizeof(LWLockPadded)));
413 : :
7322 tgl@sss.pgh.pa.us 414 :CBC 2938 : return size;
415 : : }
416 : :
417 : : /*
418 : : * Allocate shmem space for the main LWLock array and all tranches and
419 : : * initialize it.
420 : : */
421 : : void
8743 422 : 1029 : CreateLWLocks(void)
423 : : {
4240 rhaas@postgresql.org 424 [ + - ]: 1029 : if (!IsUnderPostmaster)
425 : : {
426 : 1029 : Size spaceLocks = LWLockShmemSize();
427 : : char *ptr;
428 : :
429 : : /* Allocate space */
430 : 1029 : ptr = (char *) ShmemAlloc(spaceLocks);
431 : :
432 : : /* Initialize the dynamic-allocation counter for tranches */
8 nathan@postgresql.or 433 :GNC 1029 : LWLockCounter = (int *) ptr;
434 : 1029 : *LWLockCounter = LWTRANCHE_FIRST_USER_DEFINED;
3 435 : 1029 : ptr += MAXALIGN(sizeof(int));
436 : :
437 : : /* Initialize tranche names */
438 : 1029 : LWLockTrancheNames = (char **) ptr;
439 : 1029 : ptr += MAX_NAMED_TRANCHES * sizeof(char *);
440 [ + + ]: 264453 : for (int i = 0; i < MAX_NAMED_TRANCHES; i++)
441 : : {
442 : 263424 : LWLockTrancheNames[i] = ptr;
443 : 263424 : ptr += NAMEDATALEN;
444 : : }
445 : :
446 : : /* Ensure desired alignment of LWLock array */
4240 rhaas@postgresql.org 447 :CBC 1029 : ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE;
448 : 1029 : MainLWLockArray = (LWLockPadded *) ptr;
449 : :
450 : : /* Initialize all LWLocks */
3495 451 : 1029 : InitializeLWLocks();
452 : : }
453 : 1029 : }
454 : :
455 : : /*
456 : : * Initialize LWLocks that are fixed and those belonging to named tranches.
457 : : */
458 : : static void
459 : 1029 : InitializeLWLocks(void)
460 : : {
461 : : int id;
462 : : int i;
463 : : int j;
464 : : LWLockPadded *lock;
465 : :
466 : : /* Initialize all individual LWLocks in main array */
467 [ + + ]: 56595 : for (id = 0, lock = MainLWLockArray; id < NUM_INDIVIDUAL_LWLOCKS; id++, lock++)
3186 468 : 55566 : LWLockInitialize(&lock->lock, id);
469 : :
470 : : /* Initialize buffer mapping LWLocks in main array */
1747 michael@paquier.xyz 471 : 1029 : lock = MainLWLockArray + BUFFER_MAPPING_LWLOCK_OFFSET;
3495 rhaas@postgresql.org 472 [ + + ]: 132741 : for (id = 0; id < NUM_BUFFER_PARTITIONS; id++, lock++)
473 : 131712 : LWLockInitialize(&lock->lock, LWTRANCHE_BUFFER_MAPPING);
474 : :
475 : : /* Initialize lmgrs' LWLocks in main array */
1747 michael@paquier.xyz 476 : 1029 : lock = MainLWLockArray + LOCK_MANAGER_LWLOCK_OFFSET;
3495 rhaas@postgresql.org 477 [ + + ]: 17493 : for (id = 0; id < NUM_LOCK_PARTITIONS; id++, lock++)
478 : 16464 : LWLockInitialize(&lock->lock, LWTRANCHE_LOCK_MANAGER);
479 : :
480 : : /* Initialize predicate lmgrs' LWLocks in main array */
1747 michael@paquier.xyz 481 : 1029 : lock = MainLWLockArray + PREDICATELOCK_MANAGER_LWLOCK_OFFSET;
3495 rhaas@postgresql.org 482 [ + + ]: 17493 : for (id = 0; id < NUM_PREDICATELOCK_PARTITIONS; id++, lock++)
483 : 16464 : LWLockInitialize(&lock->lock, LWTRANCHE_PREDICATE_LOCK_MANAGER);
484 : :
485 : : /*
486 : : * Copy the info about any named tranches into shared memory (so that
487 : : * other processes can see it), and initialize the requested LWLocks.
488 : : */
489 [ + + ]: 1029 : if (NamedLWLockTrancheRequests > 0)
490 : : {
491 : 7 : lock = &MainLWLockArray[NUM_FIXED_LWLOCKS];
492 : :
493 [ + + ]: 14 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
494 : : {
495 : : NamedLWLockTrancheRequest *request;
496 : : int tranche;
497 : :
498 : 7 : request = &NamedLWLockTrancheRequestArray[i];
3 nathan@postgresql.or 499 :GNC 7 : tranche = LWLockNewTrancheId(request->tranche_name);
500 : :
3495 rhaas@postgresql.org 501 [ + + ]:CBC 14 : for (j = 0; j < request->num_lwlocks; j++, lock++)
3 nathan@postgresql.or 502 :GNC 7 : LWLockInitialize(&lock->lock, tranche);
503 : : }
504 : : }
3495 rhaas@postgresql.org 505 :CBC 1029 : }
506 : :
507 : : /*
508 : : * InitLWLockAccess - initialize backend-local state needed to hold LWLocks
509 : : */
510 : : void
4086 heikki.linnakangas@i 511 : 18767 : InitLWLockAccess(void)
512 : : {
513 : : #ifdef LWLOCK_STATS
514 : : init_lwlock_stats();
515 : : #endif
516 : 18767 : }
517 : :
518 : : /*
519 : : * GetNamedLWLockTranche - returns the base address of LWLock from the
520 : : * specified tranche.
521 : : *
522 : : * Caller needs to retrieve the requested number of LWLocks starting from
523 : : * the base lock address returned by this API. This can be used for
524 : : * tranches that are requested by using RequestNamedLWLockTranche() API.
525 : : */
526 : : LWLockPadded *
3502 rhaas@postgresql.org 527 : 7 : GetNamedLWLockTranche(const char *tranche_name)
528 : : {
529 : : int lock_pos;
530 : : int i;
531 : :
532 : : /*
533 : : * Obtain the position of base address of LWLock belonging to requested
534 : : * tranche_name in MainLWLockArray. LWLocks for named tranches are placed
535 : : * in MainLWLockArray after fixed locks.
536 : : */
3496 537 : 7 : lock_pos = NUM_FIXED_LWLOCKS;
3502 538 [ + - ]: 7 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
539 : : {
540 [ + - ]: 7 : if (strcmp(NamedLWLockTrancheRequestArray[i].tranche_name,
541 : : tranche_name) == 0)
542 : 7 : return &MainLWLockArray[lock_pos];
543 : :
3502 rhaas@postgresql.org 544 :UBC 0 : lock_pos += NamedLWLockTrancheRequestArray[i].num_lwlocks;
545 : : }
546 : :
1941 tgl@sss.pgh.pa.us 547 [ # # ]: 0 : elog(ERROR, "requested tranche is not registered");
548 : :
549 : : /* just to keep compiler quiet */
550 : : return NULL;
551 : : }
552 : :
553 : : /*
554 : : * Allocate a new tranche ID with the provided name.
555 : : */
556 : : int
3 nathan@postgresql.or 557 :GNC 17 : LWLockNewTrancheId(const char *name)
558 : : {
559 : : int result;
560 : :
561 [ - + ]: 17 : if (!name)
3 nathan@postgresql.or 562 [ # # ]:UNC 0 : ereport(ERROR,
563 : : (errcode(ERRCODE_INVALID_NAME),
564 : : errmsg("tranche name cannot be NULL")));
565 : :
3 nathan@postgresql.or 566 [ - + ]:GNC 17 : if (strlen(name) >= NAMEDATALEN)
3 nathan@postgresql.or 567 [ # # ]:UNC 0 : ereport(ERROR,
568 : : (errcode(ERRCODE_NAME_TOO_LONG),
569 : : errmsg("tranche name too long"),
570 : : errdetail("LWLock tranche names must be no longer than %d bytes.",
571 : : NAMEDATALEN - 1)));
572 : :
573 : : /*
574 : : * We use the ShmemLock spinlock to protect LWLockCounter and
575 : : * LWLockTrancheNames.
576 : : */
3 nathan@postgresql.or 577 [ - + ]:GNC 17 : SpinLockAcquire(ShmemLock);
578 : :
579 [ - + ]: 17 : if (*LWLockCounter - LWTRANCHE_FIRST_USER_DEFINED >= MAX_NAMED_TRANCHES)
580 : : {
3 nathan@postgresql.or 581 :UNC 0 : SpinLockRelease(ShmemLock);
582 [ # # ]: 0 : ereport(ERROR,
583 : : (errmsg("maximum number of tranches already registered"),
584 : : errdetail("No more than %d tranches may be registered.",
585 : : MAX_NAMED_TRANCHES)));
586 : : }
587 : :
3 nathan@postgresql.or 588 :GNC 17 : result = (*LWLockCounter)++;
589 : 17 : LocalLWLockCounter = *LWLockCounter;
590 : 17 : strlcpy(LWLockTrancheNames[result - LWTRANCHE_FIRST_USER_DEFINED], name, NAMEDATALEN);
591 : :
592 : 17 : SpinLockRelease(ShmemLock);
593 : :
594 : 17 : return result;
595 : : }
596 : :
597 : : /*
598 : : * RequestNamedLWLockTranche
599 : : * Request that extra LWLocks be allocated during postmaster
600 : : * startup.
601 : : *
602 : : * This may only be called via the shmem_request_hook of a library that is
603 : : * loaded into the postmaster via shared_preload_libraries. Calls from
604 : : * elsewhere will fail.
605 : : *
606 : : * The tranche name will be user-visible as a wait event name, so try to
607 : : * use a name that fits the style for those.
608 : : */
609 : : void
3502 rhaas@postgresql.org 610 :CBC 7 : RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
611 : : {
612 : : NamedLWLockTrancheRequest *request;
613 : : static int NamedLWLockTrancheRequestsAllocated;
614 : :
1212 615 [ - + ]: 7 : if (!process_shmem_requests_in_progress)
1212 rhaas@postgresql.org 616 [ # # ]:UBC 0 : elog(FATAL, "cannot request additional LWLocks outside shmem_request_hook");
617 : :
3 nathan@postgresql.or 618 [ - + ]:GNC 7 : if (!tranche_name)
3 nathan@postgresql.or 619 [ # # ]:UNC 0 : ereport(ERROR,
620 : : (errcode(ERRCODE_INVALID_NAME),
621 : : errmsg("tranche name cannot be NULL")));
622 : :
3 nathan@postgresql.or 623 [ - + ]:GNC 7 : if (strlen(tranche_name) >= NAMEDATALEN)
3 nathan@postgresql.or 624 [ # # ]:UNC 0 : ereport(ERROR,
625 : : (errcode(ERRCODE_NAME_TOO_LONG),
626 : : errmsg("tranche name too long"),
627 : : errdetail("LWLock tranche names must be no longer than %d bytes.",
628 : : NAMEDATALEN - 1)));
629 : :
3502 rhaas@postgresql.org 630 [ + - ]:CBC 7 : if (NamedLWLockTrancheRequestArray == NULL)
631 : : {
2 nathan@postgresql.or 632 : 7 : NamedLWLockTrancheRequestsAllocated = 16;
3502 rhaas@postgresql.org 633 : 7 : NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
634 : 7 : MemoryContextAlloc(TopMemoryContext,
635 : : NamedLWLockTrancheRequestsAllocated
636 : : * sizeof(NamedLWLockTrancheRequest));
637 : : }
638 : :
2 nathan@postgresql.or 639 [ - + ]: 7 : if (NamedLWLockTrancheRequests >= NamedLWLockTrancheRequestsAllocated)
640 : : {
2 nathan@postgresql.or 641 :UBC 0 : int i = pg_nextpower2_32(NamedLWLockTrancheRequests + 1);
642 : :
643 : 0 : NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
644 : 0 : repalloc(NamedLWLockTrancheRequestArray,
645 : : i * sizeof(NamedLWLockTrancheRequest));
646 : 0 : NamedLWLockTrancheRequestsAllocated = i;
647 : : }
648 : :
3502 rhaas@postgresql.org 649 :CBC 7 : request = &NamedLWLockTrancheRequestArray[NamedLWLockTrancheRequests];
1941 tgl@sss.pgh.pa.us 650 : 7 : strlcpy(request->tranche_name, tranche_name, NAMEDATALEN);
3502 rhaas@postgresql.org 651 : 7 : request->num_lwlocks = num_lwlocks;
652 : 7 : NamedLWLockTrancheRequests++;
653 : 7 : }
654 : :
655 : : /*
656 : : * LWLockInitialize - initialize a new lwlock; it's initially unlocked
657 : : */
658 : : void
4240 659 : 11470816 : LWLockInitialize(LWLock *lock, int tranche_id)
660 : : {
661 : : /* verify the tranche_id is valid */
3 nathan@postgresql.or 662 :GNC 11470816 : (void) GetLWTrancheName(tranche_id);
663 : :
3908 andres@anarazel.de 664 :CBC 11470816 : pg_atomic_init_u32(&lock->state, LW_FLAG_RELEASE_OK);
665 : : #ifdef LOCK_DEBUG
666 : : pg_atomic_init_u32(&lock->nwaiters, 0);
667 : : #endif
4240 rhaas@postgresql.org 668 : 11470816 : lock->tranche = tranche_id;
3309 669 : 11470816 : proclist_init(&lock->waiters);
4240 670 : 11470816 : }
671 : :
672 : : /*
673 : : * Report start of wait event for light-weight locks.
674 : : *
675 : : * This function will be used by all the light-weight lock calls which
676 : : * needs to wait to acquire the lock. This function distinguishes wait
677 : : * event based on tranche and lock id.
678 : : */
679 : : static inline void
3467 680 : 223933 : LWLockReportWaitStart(LWLock *lock)
681 : : {
3186 682 : 223933 : pgstat_report_wait_start(PG_WAIT_LWLOCK | lock->tranche);
3467 683 : 223933 : }
684 : :
685 : : /*
686 : : * Report end of wait event for light-weight locks.
687 : : */
688 : : static inline void
3450 andres@anarazel.de 689 : 223933 : LWLockReportWaitEnd(void)
690 : : {
3467 rhaas@postgresql.org 691 : 223933 : pgstat_report_wait_end();
692 : 223933 : }
693 : :
694 : : /*
695 : : * Return the name of an LWLock tranche.
696 : : */
697 : : static const char *
1941 tgl@sss.pgh.pa.us 698 : 11470836 : GetLWTrancheName(uint16 trancheId)
699 : : {
700 : : /* Built-in tranche or individual LWLock? */
701 [ + + ]: 11470836 : if (trancheId < LWTRANCHE_FIRST_USER_DEFINED)
591 alvherre@alvh.no-ip. 702 : 11470485 : return BuiltinTrancheNames[trancheId];
703 : :
704 : : /*
705 : : * We only ever add new entries to LWLockTrancheNames, so most lookups can
706 : : * avoid taking the spinlock as long as the backend-local counter
707 : : * (LocalLWLockCounter) is greater than the requested tranche ID. Else,
708 : : * we need to first update the backend-local counter with ShmemLock held
709 : : * before attempting the lookup again. In practice, the latter case is
710 : : * probably rare.
711 : : */
3 nathan@postgresql.or 712 [ - + ]:GNC 351 : if (trancheId >= LocalLWLockCounter)
713 : : {
3 nathan@postgresql.or 714 [ # # ]:UNC 0 : SpinLockAcquire(ShmemLock);
715 : 0 : LocalLWLockCounter = *LWLockCounter;
716 : 0 : SpinLockRelease(ShmemLock);
717 : :
718 [ # # ]: 0 : if (trancheId >= LocalLWLockCounter)
719 [ # # ]: 0 : elog(ERROR, "tranche %d is not registered", trancheId);
720 : : }
721 : :
722 : : /*
723 : : * It's an extension tranche, so look in LWLockTrancheNames.
724 : : */
3 nathan@postgresql.or 725 :GNC 351 : trancheId -= LWTRANCHE_FIRST_USER_DEFINED;
726 : :
1941 tgl@sss.pgh.pa.us 727 :GBC 351 : return LWLockTrancheNames[trancheId];
728 : : }
729 : :
730 : : /*
731 : : * Return an identifier for an LWLock based on the wait class and event.
732 : : */
733 : : const char *
1941 tgl@sss.pgh.pa.us 734 :CBC 20 : GetLWLockIdentifier(uint32 classId, uint16 eventId)
735 : : {
736 [ - + ]: 20 : Assert(classId == PG_WAIT_LWLOCK);
737 : : /* The event IDs are just tranche numbers. */
738 : 20 : return GetLWTrancheName(eventId);
739 : : }
740 : :
741 : : /*
742 : : * Internal function that tries to atomically acquire the lwlock in the passed
743 : : * in mode.
744 : : *
745 : : * This function will not block waiting for a lock to become free - that's the
746 : : * caller's job.
747 : : *
748 : : * Returns true if the lock isn't free and we need to wait.
749 : : */
750 : : static bool
3759 bruce@momjian.us 751 : 231190998 : LWLockAttemptLock(LWLock *lock, LWLockMode mode)
752 : : {
753 : : uint32 old_state;
754 : :
1044 peter@eisentraut.org 755 [ + + - + ]: 231190998 : Assert(mode == LW_EXCLUSIVE || mode == LW_SHARED);
756 : :
757 : : /*
758 : : * Read once outside the loop, later iterations will get the newer value
759 : : * via compare & exchange.
760 : : */
3690 andres@anarazel.de 761 : 231190998 : old_state = pg_atomic_read_u32(&lock->state);
762 : :
763 : : /* loop until we've determined whether we could acquire the lock or not */
764 : : while (true)
3908 765 : 36446 : {
766 : : uint32 desired_state;
767 : : bool lock_free;
768 : :
3690 769 : 231227444 : desired_state = old_state;
770 : :
3908 771 [ + + ]: 231227444 : if (mode == LW_EXCLUSIVE)
772 : : {
3690 773 : 114457431 : lock_free = (old_state & LW_LOCK_MASK) == 0;
3908 774 [ + + ]: 114457431 : if (lock_free)
775 : 114240607 : desired_state += LW_VAL_EXCLUSIVE;
776 : : }
777 : : else
778 : : {
3690 779 : 116770013 : lock_free = (old_state & LW_VAL_EXCLUSIVE) == 0;
3908 780 [ + + ]: 116770013 : if (lock_free)
781 : 116750762 : desired_state += LW_VAL_SHARED;
782 : : }
783 : :
784 : : /*
785 : : * Attempt to swap in the state we are expecting. If we didn't see
786 : : * lock to be free, that's just the old value. If we saw it as free,
787 : : * we'll attempt to mark it acquired. The reason that we always swap
788 : : * in the value is that this doubles as a memory barrier. We could try
789 : : * to be smarter and only swap in values if we saw the lock as free,
790 : : * but benchmark haven't shown it as beneficial so far.
791 : : *
792 : : * Retry if the value changed since we last looked at it.
793 : : */
794 [ + + ]: 231227444 : if (pg_atomic_compare_exchange_u32(&lock->state,
795 : : &old_state, desired_state))
796 : : {
797 [ + + ]: 231190998 : if (lock_free)
798 : : {
799 : : /* Great! Got the lock. */
800 : : #ifdef LOCK_DEBUG
801 : : if (mode == LW_EXCLUSIVE)
802 : : lock->owner = MyProc;
803 : : #endif
804 : 230961014 : return false;
805 : : }
806 : : else
3134 heikki.linnakangas@i 807 : 229984 : return true; /* somebody else has the lock */
808 : : }
809 : : }
810 : : pg_unreachable();
811 : : }
812 : :
813 : : /*
814 : : * Lock the LWLock's wait list against concurrent activity.
815 : : *
816 : : * NB: even though the wait list is locked, non-conflicting lock operations
817 : : * may still happen concurrently.
818 : : *
819 : : * Time spent holding mutex should be short!
820 : : */
821 : : static void
3436 andres@anarazel.de 822 : 2781694 : LWLockWaitListLock(LWLock *lock)
823 : : {
824 : : uint32 old_state;
825 : : #ifdef LWLOCK_STATS
826 : : lwlock_stats *lwstats;
827 : : uint32 delays = 0;
828 : :
829 : : lwstats = get_lwlock_stats_entry(lock);
830 : : #endif
831 : :
832 : : while (true)
833 : : {
834 : : /* always try once to acquire lock directly */
835 : 2790162 : old_state = pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_LOCKED);
836 [ + + ]: 2790162 : if (!(old_state & LW_FLAG_LOCKED))
837 : 2781694 : break; /* got lock */
838 : :
839 : : /* and then spin without atomic operations until lock is released */
840 : : {
841 : : SpinDelayStatus delayStatus;
842 : :
3432 843 : 8468 : init_local_spin_delay(&delayStatus);
844 : :
3436 845 [ + + ]: 88219 : while (old_state & LW_FLAG_LOCKED)
846 : : {
847 : 79751 : perform_spin_delay(&delayStatus);
848 : 79751 : old_state = pg_atomic_read_u32(&lock->state);
849 : : }
850 : : #ifdef LWLOCK_STATS
851 : : delays += delayStatus.delays;
852 : : #endif
853 : 8468 : finish_spin_delay(&delayStatus);
854 : : }
855 : :
856 : : /*
857 : : * Retry. The lock might obviously already be re-acquired by the time
858 : : * we're attempting to get it again.
859 : : */
860 : : }
861 : :
862 : : #ifdef LWLOCK_STATS
863 : : lwstats->spin_delay_count += delays;
864 : : #endif
865 : 2781694 : }
866 : :
867 : : /*
868 : : * Unlock the LWLock's wait list.
869 : : *
870 : : * Note that it can be more efficient to manipulate flags and release the
871 : : * locks in a single atomic operation.
872 : : */
873 : : static void
874 : 2535361 : LWLockWaitListUnlock(LWLock *lock)
875 : : {
876 : : uint32 old_state PG_USED_FOR_ASSERTS_ONLY;
877 : :
878 : 2535361 : old_state = pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_LOCKED);
879 : :
880 [ - + ]: 2535361 : Assert(old_state & LW_FLAG_LOCKED);
881 : 2535361 : }
882 : :
883 : : /*
884 : : * Wakeup all the lockers that currently have a chance to acquire the lock.
885 : : */
886 : : static void
3908 887 : 246333 : LWLockWakeup(LWLock *lock)
888 : : {
889 : : bool new_release_ok;
890 : 246333 : bool wokeup_somebody = false;
891 : : proclist_head wakeup;
892 : : proclist_mutable_iter iter;
893 : :
3309 rhaas@postgresql.org 894 : 246333 : proclist_init(&wakeup);
895 : :
3908 andres@anarazel.de 896 : 246333 : new_release_ok = true;
897 : :
898 : : /* lock wait list while collecting backends to wake up */
3436 899 : 246333 : LWLockWaitListLock(lock);
900 : :
3309 rhaas@postgresql.org 901 [ + + + + : 377848 : proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
+ + ]
902 : : {
903 : 229277 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
904 : :
3908 andres@anarazel.de 905 [ + + + + ]: 229277 : if (wokeup_somebody && waiter->lwWaitMode == LW_EXCLUSIVE)
906 : 1055 : continue;
907 : :
3309 rhaas@postgresql.org 908 : 228222 : proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
909 : 228222 : proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
910 : :
3908 andres@anarazel.de 911 [ + + ]: 228222 : if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
912 : : {
913 : : /*
914 : : * Prevent additional wakeups until retryer gets to run. Backends
915 : : * that are just waiting for the lock to become free don't retry
916 : : * automatically.
917 : : */
918 : 107270 : new_release_ok = false;
919 : :
920 : : /*
921 : : * Don't wakeup (further) exclusive locks.
922 : : */
923 : 107270 : wokeup_somebody = true;
924 : : }
925 : :
926 : : /*
927 : : * Signal that the process isn't on the wait list anymore. This allows
928 : : * LWLockDequeueSelf() to remove itself of the waitlist with a
929 : : * proclist_delete(), rather than having to check if it has been
930 : : * removed from the list.
931 : : */
1021 932 [ - + ]: 228222 : Assert(waiter->lwWaiting == LW_WS_WAITING);
933 : 228222 : waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
934 : :
935 : : /*
936 : : * Once we've woken up an exclusive lock, there's no point in waking
937 : : * up anybody else.
938 : : */
3759 bruce@momjian.us 939 [ + + ]: 228222 : if (waiter->lwWaitMode == LW_EXCLUSIVE)
3908 andres@anarazel.de 940 : 97762 : break;
941 : : }
942 : :
3309 rhaas@postgresql.org 943 [ + + - + ]: 246333 : Assert(proclist_is_empty(&wakeup) || pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS);
944 : :
945 : : /* unset required flags, and release lock, in one fell swoop */
946 : : {
947 : : uint32 old_state;
948 : : uint32 desired_state;
949 : :
3436 andres@anarazel.de 950 : 246333 : old_state = pg_atomic_read_u32(&lock->state);
951 : : while (true)
952 : : {
953 : 246722 : desired_state = old_state;
954 : :
955 : : /* compute desired flags */
956 : :
957 [ + + ]: 246722 : if (new_release_ok)
958 : 140878 : desired_state |= LW_FLAG_RELEASE_OK;
959 : : else
960 : 105844 : desired_state &= ~LW_FLAG_RELEASE_OK;
961 : :
3309 rhaas@postgresql.org 962 [ + + ]: 246722 : if (proclist_is_empty(&wakeup))
3436 andres@anarazel.de 963 : 95951 : desired_state &= ~LW_FLAG_HAS_WAITERS;
964 : :
965 : 246722 : desired_state &= ~LW_FLAG_LOCKED; /* release lock */
966 : :
967 [ + + ]: 246722 : if (pg_atomic_compare_exchange_u32(&lock->state, &old_state,
968 : : desired_state))
969 : 246333 : break;
970 : : }
971 : : }
972 : :
973 : : /* Awaken any waiters I removed from the queue. */
3309 rhaas@postgresql.org 974 [ + + + + : 474555 : proclist_foreach_modify(iter, &wakeup, lwWaitLink)
+ + ]
975 : : {
976 : 228222 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
977 : :
978 : : LOG_LWDEBUG("LWLockRelease", lock, "release waiter");
979 : 228222 : proclist_delete(&wakeup, iter.cur, lwWaitLink);
980 : :
981 : : /*
982 : : * Guarantee that lwWaiting being unset only becomes visible once the
983 : : * unlink from the link has completed. Otherwise the target backend
984 : : * could be woken up for other reason and enqueue for a new lock - if
985 : : * that happens before the list unlink happens, the list would end up
986 : : * being corrupted.
987 : : *
988 : : * The barrier pairs with the LWLockWaitListLock() when enqueuing for
989 : : * another lock.
990 : : */
3908 andres@anarazel.de 991 : 228222 : pg_write_barrier();
1021 992 : 228222 : waiter->lwWaiting = LW_WS_NOT_WAITING;
3190 tgl@sss.pgh.pa.us 993 : 228222 : PGSemaphoreUnlock(waiter->sem);
994 : : }
3908 andres@anarazel.de 995 : 246333 : }
996 : :
997 : : /*
998 : : * Add ourselves to the end of the queue.
999 : : *
1000 : : * NB: Mode can be LW_WAIT_UNTIL_FREE here!
1001 : : */
1002 : : static void
1003 : 243775 : LWLockQueueSelf(LWLock *lock, LWLockMode mode)
1004 : : {
1005 : : /*
1006 : : * If we don't have a PGPROC structure, there's no way to wait. This
1007 : : * should never occur, since MyProc should only be null during shared
1008 : : * memory initialization.
1009 : : */
1010 [ - + ]: 243775 : if (MyProc == NULL)
3908 andres@anarazel.de 1011 [ # # ]:UBC 0 : elog(PANIC, "cannot wait without a PGPROC structure");
1012 : :
1021 andres@anarazel.de 1013 [ - + ]:CBC 243775 : if (MyProc->lwWaiting != LW_WS_NOT_WAITING)
3908 andres@anarazel.de 1014 [ # # ]:UBC 0 : elog(PANIC, "queueing for lock while waiting on another one");
1015 : :
3436 andres@anarazel.de 1016 :CBC 243775 : LWLockWaitListLock(lock);
1017 : :
1018 : : /* setting the flag is protected by the spinlock */
3908 1019 : 243775 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS);
1020 : :
1021 1021 : 243775 : MyProc->lwWaiting = LW_WS_WAITING;
3908 1022 : 243775 : MyProc->lwWaitMode = mode;
1023 : :
1024 : : /* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */
1025 [ + + ]: 243775 : if (mode == LW_WAIT_UNTIL_FREE)
562 heikki.linnakangas@i 1026 : 122007 : proclist_push_head(&lock->waiters, MyProcNumber, lwWaitLink);
1027 : : else
1028 : 121768 : proclist_push_tail(&lock->waiters, MyProcNumber, lwWaitLink);
1029 : :
1030 : : /* Can release the mutex now */
3436 andres@anarazel.de 1031 : 243775 : LWLockWaitListUnlock(lock);
1032 : :
1033 : : #ifdef LOCK_DEBUG
1034 : : pg_atomic_fetch_add_u32(&lock->nwaiters, 1);
1035 : : #endif
3908 1036 : 243775 : }
1037 : :
1038 : : /*
1039 : : * Remove ourselves from the waitlist.
1040 : : *
1041 : : * This is used if we queued ourselves because we thought we needed to sleep
1042 : : * but, after further checking, we discovered that we don't actually need to
1043 : : * do so.
1044 : : */
1045 : : static void
1046 : 19842 : LWLockDequeueSelf(LWLock *lock)
1047 : : {
1048 : : bool on_waitlist;
1049 : :
1050 : : #ifdef LWLOCK_STATS
1051 : : lwlock_stats *lwstats;
1052 : :
1053 : : lwstats = get_lwlock_stats_entry(lock);
1054 : :
1055 : : lwstats->dequeue_self_count++;
1056 : : #endif
1057 : :
3436 1058 : 19842 : LWLockWaitListLock(lock);
1059 : :
1060 : : /*
1061 : : * Remove ourselves from the waitlist, unless we've already been removed.
1062 : : * The removal happens with the wait list lock held, so there's no race in
1063 : : * this check.
1064 : : */
1021 1065 : 19842 : on_waitlist = MyProc->lwWaiting == LW_WS_WAITING;
1066 [ + + ]: 19842 : if (on_waitlist)
562 heikki.linnakangas@i 1067 : 15076 : proclist_delete(&lock->waiters, MyProcNumber, lwWaitLink);
1068 : :
3309 rhaas@postgresql.org 1069 [ + + ]: 19842 : if (proclist_is_empty(&lock->waiters) &&
3908 andres@anarazel.de 1070 [ + + ]: 19470 : (pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS) != 0)
1071 : : {
1072 : 19469 : pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_HAS_WAITERS);
1073 : : }
1074 : :
1075 : : /* XXX: combine with fetch_and above? */
3436 1076 : 19842 : LWLockWaitListUnlock(lock);
1077 : :
1078 : : /* clear waiting state again, nice for debugging */
1021 1079 [ + + ]: 19842 : if (on_waitlist)
1080 : 15076 : MyProc->lwWaiting = LW_WS_NOT_WAITING;
1081 : : else
1082 : : {
3759 bruce@momjian.us 1083 : 4766 : int extraWaits = 0;
1084 : :
1085 : : /*
1086 : : * Somebody else dequeued us and has or will wake us up. Deal with the
1087 : : * superfluous absorption of a wakeup.
1088 : : */
1089 : :
1090 : : /*
1091 : : * Reset RELEASE_OK flag if somebody woke us before we removed
1092 : : * ourselves - they'll have set it to false.
1093 : : */
3908 andres@anarazel.de 1094 : 4766 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1095 : :
1096 : : /*
1097 : : * Now wait for the scheduled wakeup, otherwise our ->lwWaiting would
1098 : : * get reset at some inconvenient point later. Most of the time this
1099 : : * will immediately return.
1100 : : */
1101 : : for (;;)
1102 : : {
3190 tgl@sss.pgh.pa.us 1103 : 4766 : PGSemaphoreLock(MyProc->sem);
1021 andres@anarazel.de 1104 [ + - ]: 4766 : if (MyProc->lwWaiting == LW_WS_NOT_WAITING)
3908 1105 : 4766 : break;
3908 andres@anarazel.de 1106 :UBC 0 : extraWaits++;
1107 : : }
1108 : :
1109 : : /*
1110 : : * Fix the process wait semaphore's count for any absorbed wakeups.
1111 : : */
3908 andres@anarazel.de 1112 [ - + ]:CBC 4766 : while (extraWaits-- > 0)
3190 tgl@sss.pgh.pa.us 1113 :UBC 0 : PGSemaphoreUnlock(MyProc->sem);
1114 : : }
1115 : :
1116 : : #ifdef LOCK_DEBUG
1117 : : {
1118 : : /* not waiting anymore */
1119 : : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1120 : :
1121 : : Assert(nwaiters < MAX_BACKENDS);
1122 : : }
1123 : : #endif
3908 andres@anarazel.de 1124 :CBC 19842 : }
1125 : :
1126 : : /*
1127 : : * LWLockAcquire - acquire a lightweight lock in the specified mode
1128 : : *
1129 : : * If the lock is not available, sleep until it is. Returns true if the lock
1130 : : * was available immediately, false if we had to sleep.
1131 : : *
1132 : : * Side effect: cancel/die interrupts are held off until lock release.
1133 : : */
1134 : : bool
3690 1135 : 228934748 : LWLockAcquire(LWLock *lock, LWLockMode mode)
1136 : : {
8488 JanWieck@Yahoo.com 1137 : 228934748 : PGPROC *proc = MyProc;
4187 heikki.linnakangas@i 1138 : 228934748 : bool result = true;
8643 tgl@sss.pgh.pa.us 1139 : 228934748 : int extraWaits = 0;
1140 : : #ifdef LWLOCK_STATS
1141 : : lwlock_stats *lwstats;
1142 : :
1143 : : lwstats = get_lwlock_stats_entry(lock);
1144 : : #endif
1145 : :
1044 peter@eisentraut.org 1146 [ + + - + ]: 228934748 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1147 : :
1148 : : PRINT_LWDEBUG("LWLockAcquire", lock, mode);
1149 : :
1150 : : #ifdef LWLOCK_STATS
1151 : : /* Count lock acquisition attempts */
1152 : : if (mode == LW_EXCLUSIVE)
1153 : : lwstats->ex_acquire_count++;
1154 : : else
1155 : : lwstats->sh_acquire_count++;
1156 : : #endif /* LWLOCK_STATS */
1157 : :
1158 : : /*
1159 : : * We can't wait if we haven't got a PGPROC. This should only occur
1160 : : * during bootstrap or shared memory initialization. Put an Assert here
1161 : : * to catch unsafe coding practices.
1162 : : */
8382 tgl@sss.pgh.pa.us 1163 [ + + - + ]: 228934748 : Assert(!(proc == NULL && IsUnderPostmaster));
1164 : :
1165 : : /* Ensure we will have room to remember the lock */
7456 1166 [ - + ]: 228934748 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
7456 tgl@sss.pgh.pa.us 1167 [ # # ]:UBC 0 : elog(ERROR, "too many LWLocks taken");
1168 : :
1169 : : /*
1170 : : * Lock out cancel/die interrupts until we exit the code section protected
1171 : : * by the LWLock. This ensures that interrupts will not interfere with
1172 : : * manipulations of data structures in shared memory.
1173 : : */
8743 tgl@sss.pgh.pa.us 1174 :CBC 228934748 : HOLD_INTERRUPTS();
1175 : :
1176 : : /*
1177 : : * Loop here to try to acquire lock after each time we are signaled by
1178 : : * LWLockRelease.
1179 : : *
1180 : : * NOTE: it might seem better to have LWLockRelease actually grant us the
1181 : : * lock, rather than retrying and possibly having to go back to sleep. But
1182 : : * in practice that is no good because it means a process swap for every
1183 : : * lock acquisition when two or more processes are contending for the same
1184 : : * lock. Since LWLocks are normally used to protect not-very-long
1185 : : * sections of computation, a process needs to be able to acquire and
1186 : : * release the same lock many times during a single CPU time slice, even
1187 : : * in the presence of contention. The efficiency of being able to do that
1188 : : * outweighs the inefficiency of sometimes wasting a process dispatch
1189 : : * cycle because the lock is not free when a released waiter finally gets
1190 : : * to run. See pgsql-hackers archives for 29-Dec-01.
1191 : : */
1192 : : for (;;)
8652 bruce@momjian.us 1193 : 102765 : {
1194 : : bool mustwait;
1195 : :
1196 : : /*
1197 : : * Try to grab the lock the first time, we're not in the waitqueue
1198 : : * yet/anymore.
1199 : : */
3908 andres@anarazel.de 1200 : 229037513 : mustwait = LWLockAttemptLock(lock, mode);
1201 : :
8643 tgl@sss.pgh.pa.us 1202 [ + + ]: 229037513 : if (!mustwait)
1203 : : {
1204 : : LOG_LWDEBUG("LWLockAcquire", lock, "immediately acquired lock");
1205 : 228915745 : break; /* got the lock */
1206 : : }
1207 : :
1208 : : /*
1209 : : * Ok, at this point we couldn't grab the lock on the first try. We
1210 : : * cannot simply queue ourselves to the end of the list and wait to be
1211 : : * woken up because by now the lock could long have been released.
1212 : : * Instead add us to the queue and try to grab the lock again. If we
1213 : : * succeed we need to revert the queuing and be happy, otherwise we
1214 : : * recheck the lock. If we still couldn't grab it, we know that the
1215 : : * other locker will see our queue entries when releasing since they
1216 : : * existed before we checked for the lock.
1217 : : */
1218 : :
1219 : : /* add to the queue */
3908 andres@anarazel.de 1220 : 121768 : LWLockQueueSelf(lock, mode);
1221 : :
1222 : : /* we're now guaranteed to be woken up if necessary */
1223 : 121768 : mustwait = LWLockAttemptLock(lock, mode);
1224 : :
1225 : : /* ok, grabbed the lock the second time round, need to undo queueing */
1226 [ + + ]: 121768 : if (!mustwait)
1227 : : {
1228 : : LOG_LWDEBUG("LWLockAcquire", lock, "acquired, undoing queue");
1229 : :
1230 : 19003 : LWLockDequeueSelf(lock);
1231 : 19003 : break;
1232 : : }
1233 : :
1234 : : /*
1235 : : * Wait until awakened.
1236 : : *
1237 : : * It is possible that we get awakened for a reason other than being
1238 : : * signaled by LWLockRelease. If so, loop back and wait again. Once
1239 : : * we've gotten the LWLock, re-increment the sema by the number of
1240 : : * additional signals received.
1241 : : */
1242 : : LOG_LWDEBUG("LWLockAcquire", lock, "waiting");
1243 : :
1244 : : #ifdef LWLOCK_STATS
1245 : : lwstats->block_count++;
1246 : : #endif
1247 : :
3467 rhaas@postgresql.org 1248 : 102765 : LWLockReportWaitStart(lock);
1249 : : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1250 : : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
1251 : :
1252 : : for (;;)
1253 : : {
3190 tgl@sss.pgh.pa.us 1254 : 102765 : PGSemaphoreLock(proc->sem);
1021 andres@anarazel.de 1255 [ + - ]: 102765 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
8743 tgl@sss.pgh.pa.us 1256 : 102765 : break;
8743 tgl@sss.pgh.pa.us 1257 :UBC 0 : extraWaits++;
1258 : : }
1259 : :
1260 : : /* Retrying, allow LWLockRelease to release waiters again. */
3908 andres@anarazel.de 1261 :CBC 102765 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1262 : :
1263 : : #ifdef LOCK_DEBUG
1264 : : {
1265 : : /* not waiting anymore */
1266 : : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1267 : :
1268 : : Assert(nwaiters < MAX_BACKENDS);
1269 : : }
1270 : : #endif
1271 : :
1272 : : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1273 : : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
3467 rhaas@postgresql.org 1274 : 102765 : LWLockReportWaitEnd();
1275 : :
1276 : : LOG_LWDEBUG("LWLockAcquire", lock, "awakened");
1277 : :
1278 : : /* Now loop back and try to acquire lock again. */
4187 heikki.linnakangas@i 1279 : 102765 : result = false;
1280 : : }
1281 : :
1282 : : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_ENABLED())
1283 : : TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), mode);
1284 : :
1285 : : /* Add lock to list of locks held by this backend */
3908 andres@anarazel.de 1286 : 228934748 : held_lwlocks[num_held_lwlocks].lock = lock;
1287 : 228934748 : held_lwlocks[num_held_lwlocks++].mode = mode;
1288 : :
1289 : : /*
1290 : : * Fix the process wait semaphore's count for any absorbed wakeups.
1291 : : */
8643 tgl@sss.pgh.pa.us 1292 [ - + ]: 228934748 : while (extraWaits-- > 0)
3190 tgl@sss.pgh.pa.us 1293 :UBC 0 : PGSemaphoreUnlock(proc->sem);
1294 : :
4187 heikki.linnakangas@i 1295 :CBC 228934748 : return result;
1296 : : }
1297 : :
1298 : : /*
1299 : : * LWLockConditionalAcquire - acquire a lightweight lock in the specified mode
1300 : : *
1301 : : * If the lock is not available, return false with no side-effects.
1302 : : *
1303 : : * If successful, cancel/die interrupts are held off until lock release.
1304 : : */
1305 : : bool
4002 rhaas@postgresql.org 1306 : 1899502 : LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
1307 : : {
1308 : : bool mustwait;
1309 : :
1044 peter@eisentraut.org 1310 [ + + - + ]: 1899502 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1311 : :
1312 : : PRINT_LWDEBUG("LWLockConditionalAcquire", lock, mode);
1313 : :
1314 : : /* Ensure we will have room to remember the lock */
7456 tgl@sss.pgh.pa.us 1315 [ - + ]: 1899502 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
7456 tgl@sss.pgh.pa.us 1316 [ # # ]:UBC 0 : elog(ERROR, "too many LWLocks taken");
1317 : :
1318 : : /*
1319 : : * Lock out cancel/die interrupts until we exit the code section protected
1320 : : * by the LWLock. This ensures that interrupts will not interfere with
1321 : : * manipulations of data structures in shared memory.
1322 : : */
8743 tgl@sss.pgh.pa.us 1323 :CBC 1899502 : HOLD_INTERRUPTS();
1324 : :
1325 : : /* Check for the lock */
3908 andres@anarazel.de 1326 : 1899502 : mustwait = LWLockAttemptLock(lock, mode);
1327 : :
8743 tgl@sss.pgh.pa.us 1328 [ + + ]: 1899502 : if (mustwait)
1329 : : {
1330 : : /* Failed to get lock, so release interrupt holdoff */
1331 [ - + ]: 882 : RESUME_INTERRUPTS();
1332 : :
1333 : : LOG_LWDEBUG("LWLockConditionalAcquire", lock, "failed");
1334 : : if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL_ENABLED())
1335 : : TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(T_NAME(lock), mode);
1336 : : }
1337 : : else
1338 : : {
1339 : : /* Add lock to list of locks held by this backend */
3908 andres@anarazel.de 1340 : 1898620 : held_lwlocks[num_held_lwlocks].lock = lock;
1341 : 1898620 : held_lwlocks[num_held_lwlocks++].mode = mode;
1342 : : if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_ENABLED())
1343 : : TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(lock), mode);
1344 : : }
8743 tgl@sss.pgh.pa.us 1345 : 1899502 : return !mustwait;
1346 : : }
1347 : :
1348 : : /*
1349 : : * LWLockAcquireOrWait - Acquire lock, or wait until it's free
1350 : : *
1351 : : * The semantics of this function are a bit funky. If the lock is currently
1352 : : * free, it is acquired in the given mode, and the function returns true. If
1353 : : * the lock isn't immediately free, the function waits until it is released
1354 : : * and returns false, but does not acquire the lock.
1355 : : *
1356 : : * This is currently used for WALWriteLock: when a backend flushes the WAL,
1357 : : * holding WALWriteLock, it can flush the commit records of many other
1358 : : * backends as a side-effect. Those other backends need to wait until the
1359 : : * flush finishes, but don't need to acquire the lock anymore. They can just
1360 : : * wake up, observe that their records have already been flushed, and return.
1361 : : */
1362 : : bool
4002 rhaas@postgresql.org 1363 : 129896 : LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
1364 : : {
4968 heikki.linnakangas@i 1365 : 129896 : PGPROC *proc = MyProc;
1366 : : bool mustwait;
1367 : 129896 : int extraWaits = 0;
1368 : : #ifdef LWLOCK_STATS
1369 : : lwlock_stats *lwstats;
1370 : :
1371 : : lwstats = get_lwlock_stats_entry(lock);
1372 : : #endif
1373 : :
3908 andres@anarazel.de 1374 [ + - - + ]: 129896 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1375 : :
1376 : : PRINT_LWDEBUG("LWLockAcquireOrWait", lock, mode);
1377 : :
1378 : : /* Ensure we will have room to remember the lock */
4968 heikki.linnakangas@i 1379 [ - + ]: 129896 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
4968 heikki.linnakangas@i 1380 [ # # ]:UBC 0 : elog(ERROR, "too many LWLocks taken");
1381 : :
1382 : : /*
1383 : : * Lock out cancel/die interrupts until we exit the code section protected
1384 : : * by the LWLock. This ensures that interrupts will not interfere with
1385 : : * manipulations of data structures in shared memory.
1386 : : */
4968 heikki.linnakangas@i 1387 :CBC 129896 : HOLD_INTERRUPTS();
1388 : :
1389 : : /*
1390 : : * NB: We're using nearly the same twice-in-a-row lock acquisition
1391 : : * protocol as LWLockAcquire(). Check its comments for details.
1392 : : */
3908 andres@anarazel.de 1393 : 129896 : mustwait = LWLockAttemptLock(lock, mode);
1394 : :
4968 heikki.linnakangas@i 1395 [ + + ]: 129896 : if (mustwait)
1396 : : {
3908 andres@anarazel.de 1397 : 2319 : LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
1398 : :
1399 : 2319 : mustwait = LWLockAttemptLock(lock, mode);
1400 : :
1401 [ + + ]: 2319 : if (mustwait)
1402 : : {
1403 : : /*
1404 : : * Wait until awakened. Like in LWLockAcquire, be prepared for
1405 : : * bogus wakeups.
1406 : : */
1407 : : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "waiting");
1408 : :
1409 : : #ifdef LWLOCK_STATS
1410 : : lwstats->block_count++;
1411 : : #endif
1412 : :
3467 rhaas@postgresql.org 1413 : 2250 : LWLockReportWaitStart(lock);
1414 : : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1415 : : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
1416 : :
1417 : : for (;;)
1418 : : {
3190 tgl@sss.pgh.pa.us 1419 : 2250 : PGSemaphoreLock(proc->sem);
1021 andres@anarazel.de 1420 [ + - ]: 2250 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
3908 1421 : 2250 : break;
3908 andres@anarazel.de 1422 :UBC 0 : extraWaits++;
1423 : : }
1424 : :
1425 : : #ifdef LOCK_DEBUG
1426 : : {
1427 : : /* not waiting anymore */
1428 : : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1429 : :
1430 : : Assert(nwaiters < MAX_BACKENDS);
1431 : : }
1432 : : #endif
1433 : : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1434 : : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
3467 rhaas@postgresql.org 1435 :CBC 2250 : LWLockReportWaitEnd();
1436 : :
1437 : : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "awakened");
1438 : : }
1439 : : else
1440 : : {
1441 : : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "acquired, undoing queue");
1442 : :
1443 : : /*
1444 : : * Got lock in the second attempt, undo queueing. We need to treat
1445 : : * this as having successfully acquired the lock, otherwise we'd
1446 : : * not necessarily wake up people we've prevented from acquiring
1447 : : * the lock.
1448 : : */
3908 andres@anarazel.de 1449 : 69 : LWLockDequeueSelf(lock);
1450 : : }
1451 : : }
1452 : :
1453 : : /*
1454 : : * Fix the process wait semaphore's count for any absorbed wakeups.
1455 : : */
4968 heikki.linnakangas@i 1456 [ - + ]: 129896 : while (extraWaits-- > 0)
3190 tgl@sss.pgh.pa.us 1457 :UBC 0 : PGSemaphoreUnlock(proc->sem);
1458 : :
4968 heikki.linnakangas@i 1459 [ + + ]:CBC 129896 : if (mustwait)
1460 : : {
1461 : : /* Failed to get lock, so release interrupt holdoff */
1462 [ - + ]: 2250 : RESUME_INTERRUPTS();
1463 : : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "failed");
1464 : : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL_ENABLED())
1465 : : TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL(T_NAME(lock), mode);
1466 : : }
1467 : : else
1468 : : {
1469 : : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "succeeded");
1470 : : /* Add lock to list of locks held by this backend */
3908 andres@anarazel.de 1471 : 127646 : held_lwlocks[num_held_lwlocks].lock = lock;
1472 : 127646 : held_lwlocks[num_held_lwlocks++].mode = mode;
1473 : : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_ENABLED())
1474 : : TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(lock), mode);
1475 : : }
1476 : :
4968 heikki.linnakangas@i 1477 : 129896 : return !mustwait;
1478 : : }
1479 : :
1480 : : /*
1481 : : * Does the lwlock in its current state need to wait for the variable value to
1482 : : * change?
1483 : : *
1484 : : * If we don't need to wait, and it's because the value of the variable has
1485 : : * changed, store the current value in newval.
1486 : : *
1487 : : * *result is set to true if the lock was free, and false otherwise.
1488 : : */
1489 : : static bool
774 michael@paquier.xyz 1490 : 3936899 : LWLockConflictsWithVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
1491 : : uint64 *newval, bool *result)
1492 : : {
1493 : : bool mustwait;
1494 : : uint64 value;
1495 : :
1496 : : /*
1497 : : * Test first to see if it the slot is free right now.
1498 : : *
1499 : : * XXX: the unique caller of this routine, WaitXLogInsertionsToFinish()
1500 : : * via LWLockWaitForVar(), uses an implied barrier with a spinlock before
1501 : : * this, so we don't need a memory barrier here as far as the current
1502 : : * usage is concerned. But that might not be safe in general.
1503 : : */
3690 andres@anarazel.de 1504 : 3936899 : mustwait = (pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE) != 0;
1505 : :
1506 [ + + ]: 3936899 : if (!mustwait)
1507 : : {
1508 : 2552346 : *result = true;
1509 : 2552346 : return false;
1510 : : }
1511 : :
1512 : 1384553 : *result = false;
1513 : :
1514 : : /*
1515 : : * Reading this value atomically is safe even on platforms where uint64
1516 : : * cannot be read without observing a torn value.
1517 : : */
774 michael@paquier.xyz 1518 : 1384553 : value = pg_atomic_read_u64(valptr);
1519 : :
3690 andres@anarazel.de 1520 [ + + ]: 1384553 : if (value != oldval)
1521 : : {
1522 : 1145947 : mustwait = false;
1523 : 1145947 : *newval = value;
1524 : : }
1525 : : else
1526 : : {
1527 : 238606 : mustwait = true;
1528 : : }
1529 : :
1530 : 1384553 : return mustwait;
1531 : : }
1532 : :
1533 : : /*
1534 : : * LWLockWaitForVar - Wait until lock is free, or a variable is updated.
1535 : : *
1536 : : * If the lock is held and *valptr equals oldval, waits until the lock is
1537 : : * either freed, or the lock holder updates *valptr by calling
1538 : : * LWLockUpdateVar. If the lock is free on exit (immediately or after
1539 : : * waiting), returns true. If the lock is still held, but *valptr no longer
1540 : : * matches oldval, returns false and sets *newval to the current value in
1541 : : * *valptr.
1542 : : *
1543 : : * Note: this function ignores shared lock holders; if the lock is held
1544 : : * in shared mode, returns 'true'.
1545 : : *
1546 : : * Be aware that LWLockConflictsWithVar() does not include a memory barrier,
1547 : : * hence the caller of this function may want to rely on an explicit barrier or
1548 : : * an implied barrier via spinlock or LWLock to avoid memory ordering issues.
1549 : : */
1550 : : bool
774 michael@paquier.xyz 1551 : 3698293 : LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
1552 : : uint64 *newval)
1553 : : {
4187 heikki.linnakangas@i 1554 : 3698293 : PGPROC *proc = MyProc;
1555 : 3698293 : int extraWaits = 0;
1556 : 3698293 : bool result = false;
1557 : : #ifdef LWLOCK_STATS
1558 : : lwlock_stats *lwstats;
1559 : :
1560 : : lwstats = get_lwlock_stats_entry(lock);
1561 : : #endif
1562 : :
1563 : : PRINT_LWDEBUG("LWLockWaitForVar", lock, LW_WAIT_UNTIL_FREE);
1564 : :
1565 : : /*
1566 : : * Lock out cancel/die interrupts while we sleep on the lock. There is no
1567 : : * cleanup mechanism to remove us from the wait queue if we got
1568 : : * interrupted.
1569 : : */
1570 : 3698293 : HOLD_INTERRUPTS();
1571 : :
1572 : : /*
1573 : : * Loop here to check the lock's status after each time we are signaled.
1574 : : */
1575 : : for (;;)
1576 : 118918 : {
1577 : : bool mustwait;
1578 : :
3690 andres@anarazel.de 1579 : 3817211 : mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1580 : : &result);
1581 : :
4187 heikki.linnakangas@i 1582 [ + + ]: 3817211 : if (!mustwait)
1583 : 3697523 : break; /* the lock was free or value didn't match */
1584 : :
1585 : : /*
1586 : : * Add myself to wait queue. Note that this is racy, somebody else
1587 : : * could wakeup before we're finished queuing. NB: We're using nearly
1588 : : * the same twice-in-a-row lock acquisition protocol as
1589 : : * LWLockAcquire(). Check its comments for details. The only
1590 : : * difference is that we also have to check the variable's values when
1591 : : * checking the state of the lock.
1592 : : */
3908 andres@anarazel.de 1593 : 119688 : LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
1594 : :
1595 : : /*
1596 : : * Set RELEASE_OK flag, to make sure we get woken up as soon as the
1597 : : * lock is released.
1598 : : */
1599 : 119688 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1600 : :
1601 : : /*
1602 : : * We're now guaranteed to be woken up if necessary. Recheck the lock
1603 : : * and variables state.
1604 : : */
3690 1605 : 119688 : mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1606 : : &result);
1607 : :
1608 : : /* Ok, no conflict after we queued ourselves. Undo queueing. */
3908 1609 [ + + ]: 119688 : if (!mustwait)
1610 : : {
1611 : : LOG_LWDEBUG("LWLockWaitForVar", lock, "free, undoing queue");
1612 : :
1613 : 770 : LWLockDequeueSelf(lock);
1614 : 770 : break;
1615 : : }
1616 : :
1617 : : /*
1618 : : * Wait until awakened.
1619 : : *
1620 : : * It is possible that we get awakened for a reason other than being
1621 : : * signaled by LWLockRelease. If so, loop back and wait again. Once
1622 : : * we've gotten the LWLock, re-increment the sema by the number of
1623 : : * additional signals received.
1624 : : */
1625 : : LOG_LWDEBUG("LWLockWaitForVar", lock, "waiting");
1626 : :
1627 : : #ifdef LWLOCK_STATS
1628 : : lwstats->block_count++;
1629 : : #endif
1630 : :
3467 rhaas@postgresql.org 1631 : 118918 : LWLockReportWaitStart(lock);
1632 : : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1633 : : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), LW_EXCLUSIVE);
1634 : :
1635 : : for (;;)
1636 : : {
3190 tgl@sss.pgh.pa.us 1637 : 118918 : PGSemaphoreLock(proc->sem);
1021 andres@anarazel.de 1638 [ + - ]: 118918 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
4187 heikki.linnakangas@i 1639 : 118918 : break;
4187 heikki.linnakangas@i 1640 :UBC 0 : extraWaits++;
1641 : : }
1642 : :
1643 : : #ifdef LOCK_DEBUG
1644 : : {
1645 : : /* not waiting anymore */
1646 : : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1647 : :
1648 : : Assert(nwaiters < MAX_BACKENDS);
1649 : : }
1650 : : #endif
1651 : :
1652 : : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1653 : : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), LW_EXCLUSIVE);
3467 rhaas@postgresql.org 1654 :CBC 118918 : LWLockReportWaitEnd();
1655 : :
1656 : : LOG_LWDEBUG("LWLockWaitForVar", lock, "awakened");
1657 : :
1658 : : /* Now loop back and check the status of the lock again. */
1659 : : }
1660 : :
1661 : : /*
1662 : : * Fix the process wait semaphore's count for any absorbed wakeups.
1663 : : */
4187 heikki.linnakangas@i 1664 [ - + ]: 3698293 : while (extraWaits-- > 0)
3190 tgl@sss.pgh.pa.us 1665 :UBC 0 : PGSemaphoreUnlock(proc->sem);
1666 : :
1667 : : /*
1668 : : * Now okay to allow cancel/die interrupts.
1669 : : */
4187 heikki.linnakangas@i 1670 [ - + ]:CBC 3698293 : RESUME_INTERRUPTS();
1671 : :
1672 : 3698293 : return result;
1673 : : }
1674 : :
1675 : :
1676 : : /*
1677 : : * LWLockUpdateVar - Update a variable and wake up waiters atomically
1678 : : *
1679 : : * Sets *valptr to 'val', and wakes up all processes waiting for us with
1680 : : * LWLockWaitForVar(). It first sets the value atomically and then wakes up
1681 : : * waiting processes so that any process calling LWLockWaitForVar() on the same
1682 : : * lock is guaranteed to see the new value, and act accordingly.
1683 : : *
1684 : : * The caller must be holding the lock in exclusive mode.
1685 : : */
1686 : : void
774 michael@paquier.xyz 1687 : 2271744 : LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
1688 : : {
1689 : : proclist_head wakeup;
1690 : : proclist_mutable_iter iter;
1691 : :
1692 : : PRINT_LWDEBUG("LWLockUpdateVar", lock, LW_EXCLUSIVE);
1693 : :
1694 : : /*
1695 : : * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
1696 : : * that the variable is updated before waking up waiters.
1697 : : */
1698 : 2271744 : pg_atomic_exchange_u64(valptr, val);
1699 : :
3309 rhaas@postgresql.org 1700 : 2271744 : proclist_init(&wakeup);
1701 : :
3436 andres@anarazel.de 1702 : 2271744 : LWLockWaitListLock(lock);
1703 : :
3908 1704 [ - + ]: 2271744 : Assert(pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE);
1705 : :
1706 : : /*
1707 : : * See if there are any LW_WAIT_UNTIL_FREE waiters that need to be woken
1708 : : * up. They are always in the front of the queue.
1709 : : */
3309 rhaas@postgresql.org 1710 [ + + + + ]: 2272249 : proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
1711 : : {
1712 : 57518 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1713 : :
3908 andres@anarazel.de 1714 [ + + ]: 57518 : if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
1715 : 57013 : break;
1716 : :
3309 rhaas@postgresql.org 1717 : 505 : proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
1718 : 505 : proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
1719 : :
1720 : : /* see LWLockWakeup() */
1021 andres@anarazel.de 1721 [ - + ]: 505 : Assert(waiter->lwWaiting == LW_WS_WAITING);
1722 [ + + ]: 505 : waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
1723 : : }
1724 : :
1725 : : /* We are done updating shared state of the lock itself. */
3436 1726 : 2271744 : LWLockWaitListUnlock(lock);
1727 : :
1728 : : /*
1729 : : * Awaken any waiters I removed from the queue.
1730 : : */
3309 rhaas@postgresql.org 1731 [ + + + + : 2272249 : proclist_foreach_modify(iter, &wakeup, lwWaitLink)
+ + ]
1732 : : {
1733 : 505 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1734 : :
1735 : 505 : proclist_delete(&wakeup, iter.cur, lwWaitLink);
1736 : : /* check comment in LWLockWakeup() about this barrier */
3914 andres@anarazel.de 1737 : 505 : pg_write_barrier();
1021 1738 : 505 : waiter->lwWaiting = LW_WS_NOT_WAITING;
3190 tgl@sss.pgh.pa.us 1739 : 505 : PGSemaphoreUnlock(waiter->sem);
1740 : : }
4187 heikki.linnakangas@i 1741 : 2271744 : }
1742 : :
1743 : :
1744 : : /*
1745 : : * Stop treating lock as held by current backend.
1746 : : *
1747 : : * This is the code that can be shared between actually releasing a lock
1748 : : * (LWLockRelease()) and just not tracking ownership of the lock anymore
1749 : : * without releasing the lock (LWLockDisown()).
1750 : : *
1751 : : * Returns the mode in which the lock was held by the current backend.
1752 : : *
1753 : : * NB: This does not call RESUME_INTERRUPTS(), but leaves that responsibility
1754 : : * of the caller.
1755 : : *
1756 : : * NB: This will leave lock->owner pointing to the current backend (if
1757 : : * LOCK_DEBUG is set). This is somewhat intentional, as it makes it easier to
1758 : : * debug cases of missing wakeups during lock release.
1759 : : */
1760 : : static inline LWLockMode
197 andres@anarazel.de 1761 : 230961014 : LWLockDisownInternal(LWLock *lock)
1762 : : {
1763 : : LWLockMode mode;
1764 : : int i;
1765 : :
1766 : : /*
1767 : : * Remove lock from list of locks held. Usually, but not always, it will
1768 : : * be the latest-acquired lock; so search array backwards.
1769 : : */
8717 bruce@momjian.us 1770 [ + - ]: 256152484 : for (i = num_held_lwlocks; --i >= 0;)
3908 andres@anarazel.de 1771 [ + + ]: 256152484 : if (lock == held_lwlocks[i].lock)
8743 tgl@sss.pgh.pa.us 1772 : 230961014 : break;
1773 : :
1774 [ - + ]: 230961014 : if (i < 0)
3186 rhaas@postgresql.org 1775 [ # # ]:UBC 0 : elog(ERROR, "lock %s is not held", T_NAME(lock));
1776 : :
3196 sfrost@snowman.net 1777 :CBC 230961014 : mode = held_lwlocks[i].mode;
1778 : :
8743 tgl@sss.pgh.pa.us 1779 : 230961014 : num_held_lwlocks--;
1780 [ + + ]: 256152484 : for (; i < num_held_lwlocks; i++)
8717 bruce@momjian.us 1781 : 25191470 : held_lwlocks[i] = held_lwlocks[i + 1];
1782 : :
197 andres@anarazel.de 1783 : 230961014 : return mode;
1784 : : }
1785 : :
1786 : : /*
1787 : : * Helper function to release lock, shared between LWLockRelease() and
1788 : : * LWLockReleaseDisowned().
1789 : : */
1790 : : static void
1791 : 230961014 : LWLockReleaseInternal(LWLock *lock, LWLockMode mode)
1792 : : {
1793 : : uint32 oldstate;
1794 : : bool check_waiters;
1795 : :
1796 : : /*
1797 : : * Release my hold on lock, after that it can immediately be acquired by
1798 : : * others, even if we still have to wakeup other waiters.
1799 : : */
3908 1800 [ + + ]: 230961014 : if (mode == LW_EXCLUSIVE)
1801 : 114232619 : oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_EXCLUSIVE);
1802 : : else
1803 : 116728395 : oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_SHARED);
1804 : :
1805 : : /* nobody else can have that kind of lock */
1806 [ - + ]: 230961014 : Assert(!(oldstate & LW_VAL_EXCLUSIVE));
1807 : :
1808 : : if (TRACE_POSTGRESQL_LWLOCK_RELEASE_ENABLED())
1809 : : TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(lock));
1810 : :
1811 : : /*
1812 : : * We're still waiting for backends to get scheduled, don't wake them up
1813 : : * again.
1814 : : */
1815 [ + + ]: 230961014 : if ((oldstate & (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK)) ==
1816 : 273120 : (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK) &&
1817 [ + + ]: 273120 : (oldstate & LW_LOCK_MASK) == 0)
1818 : 246333 : check_waiters = true;
1819 : : else
1820 : 230714681 : check_waiters = false;
1821 : :
1822 : : /*
1823 : : * As waking up waiters requires the spinlock to be acquired, only do so
1824 : : * if necessary.
1825 : : */
1826 [ + + ]: 230961014 : if (check_waiters)
1827 : : {
1828 : : /* XXX: remove before commit? */
1829 : : LOG_LWDEBUG("LWLockRelease", lock, "releasing waiters");
1830 : 246333 : LWLockWakeup(lock);
1831 : : }
197 1832 : 230961014 : }
1833 : :
1834 : :
1835 : : /*
1836 : : * Stop treating lock as held by current backend.
1837 : : *
1838 : : * After calling this function it's the callers responsibility to ensure that
1839 : : * the lock gets released (via LWLockReleaseDisowned()), even in case of an
1840 : : * error. This only is desirable if the lock is going to be released in a
1841 : : * different process than the process that acquired it.
1842 : : */
1843 : : void
197 andres@anarazel.de 1844 :UBC 0 : LWLockDisown(LWLock *lock)
1845 : : {
1846 : 0 : LWLockDisownInternal(lock);
1847 : :
1848 [ # # ]: 0 : RESUME_INTERRUPTS();
1849 : 0 : }
1850 : :
1851 : : /*
1852 : : * LWLockRelease - release a previously acquired lock
1853 : : */
1854 : : void
197 andres@anarazel.de 1855 :CBC 230961014 : LWLockRelease(LWLock *lock)
1856 : : {
1857 : : LWLockMode mode;
1858 : :
1859 : 230961014 : mode = LWLockDisownInternal(lock);
1860 : :
1861 : : PRINT_LWDEBUG("LWLockRelease", lock, mode);
1862 : :
1863 : 230961014 : LWLockReleaseInternal(lock, mode);
1864 : :
1865 : : /*
1866 : : * Now okay to allow cancel/die interrupts.
1867 : : */
8743 tgl@sss.pgh.pa.us 1868 [ - + ]: 230961014 : RESUME_INTERRUPTS();
1869 : 230961014 : }
1870 : :
1871 : : /*
1872 : : * Release lock previously disowned with LWLockDisown().
1873 : : */
1874 : : void
197 andres@anarazel.de 1875 :UBC 0 : LWLockReleaseDisowned(LWLock *lock, LWLockMode mode)
1876 : : {
1877 : 0 : LWLockReleaseInternal(lock, mode);
1878 : 0 : }
1879 : :
1880 : : /*
1881 : : * LWLockReleaseClearVar - release a previously acquired lock, reset variable
1882 : : */
1883 : : void
774 michael@paquier.xyz 1884 :CBC 13891012 : LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
1885 : : {
1886 : : /*
1887 : : * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
1888 : : * that the variable is updated before releasing the lock.
1889 : : */
1890 : 13891012 : pg_atomic_exchange_u64(valptr, val);
1891 : :
3690 andres@anarazel.de 1892 : 13891012 : LWLockRelease(lock);
1893 : 13891012 : }
1894 : :
1895 : :
1896 : : /*
1897 : : * LWLockReleaseAll - release all currently-held locks
1898 : : *
1899 : : * Used to clean up after ereport(ERROR). An important difference between this
1900 : : * function and retail LWLockRelease calls is that InterruptHoldoffCount is
1901 : : * unchanged by this operation. This is necessary since InterruptHoldoffCount
1902 : : * has been set to an appropriate level earlier in error recovery. We could
1903 : : * decrement it below zero if we allow it to drop for each released lock!
1904 : : */
1905 : : void
8743 tgl@sss.pgh.pa.us 1906 : 52344 : LWLockReleaseAll(void)
1907 : : {
1908 [ + + ]: 52546 : while (num_held_lwlocks > 0)
1909 : : {
1910 : 202 : HOLD_INTERRUPTS(); /* match the upcoming RESUME_INTERRUPTS */
1911 : :
3908 andres@anarazel.de 1912 : 202 : LWLockRelease(held_lwlocks[num_held_lwlocks - 1].lock);
1913 : : }
8743 tgl@sss.pgh.pa.us 1914 : 52344 : }
1915 : :
1916 : :
1917 : : /*
1918 : : * ForEachLWLockHeldByMe - run a callback for each held lock
1919 : : *
1920 : : * This is meant as debug support only.
1921 : : */
1922 : : void
142 noah@leadboat.com 1923 : 101211031 : ForEachLWLockHeldByMe(void (*callback) (LWLock *, LWLockMode, void *),
1924 : : void *context)
1925 : : {
1926 : : int i;
1927 : :
1928 [ + + ]: 101338473 : for (i = 0; i < num_held_lwlocks; i++)
1929 : 127442 : callback(held_lwlocks[i].lock, held_lwlocks[i].mode, context);
1930 : 101211031 : }
1931 : :
1932 : : /*
1933 : : * LWLockHeldByMe - test whether my process holds a lock in any mode
1934 : : *
1935 : : * This is meant as debug support only.
1936 : : */
1937 : : bool
1083 pg@bowt.ie 1938 : 66919500 : LWLockHeldByMe(LWLock *lock)
1939 : : {
1940 : : int i;
1941 : :
7757 tgl@sss.pgh.pa.us 1942 [ + + ]: 103806977 : for (i = 0; i < num_held_lwlocks; i++)
1943 : : {
1083 pg@bowt.ie 1944 [ + + ]: 48988054 : if (held_lwlocks[i].lock == lock)
7757 tgl@sss.pgh.pa.us 1945 : 12100577 : return true;
1946 : : }
1947 : 54818923 : return false;
1948 : : }
1949 : :
1950 : : /*
1951 : : * LWLockAnyHeldByMe - test whether my process holds any of an array of locks
1952 : : *
1953 : : * This is meant as debug support only.
1954 : : */
1955 : : bool
1083 pg@bowt.ie 1956 : 1152238 : LWLockAnyHeldByMe(LWLock *lock, int nlocks, size_t stride)
1957 : : {
1958 : : char *held_lock_addr;
1959 : : char *begin;
1960 : : char *end;
1961 : : int i;
1962 : :
1963 : 1152238 : begin = (char *) lock;
1153 tmunro@postgresql.or 1964 : 1152238 : end = begin + nlocks * stride;
1965 [ + + ]: 1155100 : for (i = 0; i < num_held_lwlocks; i++)
1966 : : {
1967 : 2862 : held_lock_addr = (char *) held_lwlocks[i].lock;
1968 [ - + - - ]: 2862 : if (held_lock_addr >= begin &&
1153 tmunro@postgresql.or 1969 :UBC 0 : held_lock_addr < end &&
1970 [ # # ]: 0 : (held_lock_addr - begin) % stride == 0)
1971 : 0 : return true;
1972 : : }
1153 tmunro@postgresql.or 1973 :CBC 1152238 : return false;
1974 : : }
1975 : :
1976 : : /*
1977 : : * LWLockHeldByMeInMode - test whether my process holds a lock in given mode
1978 : : *
1979 : : * This is meant as debug support only.
1980 : : */
1981 : : bool
1083 pg@bowt.ie 1982 : 49941353 : LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
1983 : : {
1984 : : int i;
1985 : :
3288 simon@2ndQuadrant.co 1986 [ + - ]: 55818547 : for (i = 0; i < num_held_lwlocks; i++)
1987 : : {
1083 pg@bowt.ie 1988 [ + + + - ]: 55818547 : if (held_lwlocks[i].lock == lock && held_lwlocks[i].mode == mode)
3288 simon@2ndQuadrant.co 1989 : 49941353 : return true;
1990 : : }
3288 simon@2ndQuadrant.co 1991 :UBC 0 : return false;
1992 : : }
|