Skip to content

Commit 0f647dc

Browse files
committed
Fix GH-13437: FPM: ERROR: scoreboard: failed to lock (already locked)
This changes locking for scoreboard to reduce contention between readers and adds retries for acquiring scoreboard for read.
1 parent 5fc37b1 commit 0f647dc

File tree

3 files changed

+83
-8
lines changed

3 files changed

+83
-8
lines changed

sapi/fpm/fpm/fpm_atomic.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,25 @@ static inline int fpm_spinlock(atomic_t *lock, int try_once) /* {{{ */
156156
}
157157
/* }}} */
158158

159+
static inline int fpm_spinlock_with_max_retries(atomic_t *lock, unsigned int max_retries)
160+
{
161+
unsigned int retries = 0;
162+
163+
for (;;) {
164+
if (atomic_cmp_set(lock, 0, 1)) {
165+
return 1;
166+
}
167+
168+
sched_yield();
169+
170+
if (++retries > max_retries) {
171+
return 0;
172+
}
173+
}
174+
175+
return 1;
176+
}
177+
159178
#define fpm_unlock(lock) lock = 0
160179

161180
#endif

sapi/fpm/fpm/fpm_scoreboard.c

Lines changed: 60 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,17 @@ int fpm_scoreboard_init_main(void)
7373
return 0;
7474
}
7575

76+
static inline void fpm_scoreboard_readers_decrement(struct fpm_scoreboard_s *scoreboard)
77+
{
78+
fpm_spinlock(&scoreboard->lock, 1);
79+
if (scoreboard->reader_count > 0) {
80+
scoreboard->reader_count -= 1;
81+
}
82+
unsigned int current_reader_count = (unsigned) scoreboard->reader_count;
83+
fpm_unlock(scoreboard->lock);
84+
zlog(ZLOG_DEBUG, "scoreboard: for proc %d reader decremented to value %u", getpid(), current_reader_count);
85+
}
86+
7687
static struct fpm_scoreboard_s *fpm_scoreboard_get_for_update(struct fpm_scoreboard_s *scoreboard) /* {{{ */
7788
{
7889
if (!scoreboard) {
@@ -93,7 +104,30 @@ void fpm_scoreboard_update_begin(struct fpm_scoreboard_s *scoreboard) /* {{{ */
93104
return;
94105
}
95106

96-
fpm_spinlock(&scoreboard->lock, 0);
107+
int retries = 0;
108+
while (1) {
109+
fpm_spinlock(&scoreboard->lock, 1);
110+
if (scoreboard->reader_count == 0) {
111+
if (!fpm_spinlock_with_max_retries(&scoreboard->writer_active, FPM_SCOREBOARD_SPINLOCK_MAX_RETRIES)) {
112+
/* in this case the writer might have crashed so just warn and continue as the lock was acquired */
113+
zlog(ZLOG_WARNING, "scoreboard: writer %d waited too long for another writer to release lock.", getpid());
114+
} else {
115+
zlog(ZLOG_DEBUG, "scoreboard: writer lock acquired by writer %d", getpid());
116+
}
117+
fpm_unlock(scoreboard->lock);
118+
break;
119+
}
120+
fpm_unlock(scoreboard->lock);
121+
122+
if (++retries > FPM_SCOREBOARD_SPINLOCK_MAX_RETRIES) {
123+
/* decrement reader count by 1 (assuming a killed or crashed reader) */
124+
fpm_scoreboard_readers_decrement(scoreboard);
125+
zlog(ZLOG_WARNING, "scoreboard: writer detected a potential crashed reader, decrementing reader count.");
126+
retries = 0;
127+
}
128+
129+
sched_yield();
130+
}
97131
}
98132
/* }}} */
99133

@@ -170,7 +204,8 @@ void fpm_scoreboard_update_commit(
170204
scoreboard->active_max = scoreboard->active;
171205
}
172206

173-
fpm_unlock(scoreboard->lock);
207+
fpm_unlock(scoreboard->writer_active);
208+
zlog(ZLOG_DEBUG, "scoreboard: writer lock released by writer %d", getpid());
174209
}
175210
/* }}} */
176211

@@ -234,16 +269,33 @@ struct fpm_scoreboard_proc_s *fpm_scoreboard_proc_get_from_child(struct fpm_chil
234269

235270
struct fpm_scoreboard_s *fpm_scoreboard_acquire(struct fpm_scoreboard_s *scoreboard, int nohang) /* {{{ */
236271
{
237-
struct fpm_scoreboard_s *s;
238-
239-
s = scoreboard ? scoreboard : fpm_scoreboard;
272+
struct fpm_scoreboard_s *s = scoreboard ? scoreboard : fpm_scoreboard;
240273
if (!s) {
241274
return NULL;
242275
}
243276

244-
if (!fpm_spinlock(&s->lock, nohang)) {
245-
return NULL;
277+
int retries = 0;
278+
while (1) {
279+
/* increment reader if no writer active */
280+
fpm_spinlock(&scoreboard->lock, 1);
281+
if (!s->writer_active) {
282+
scoreboard->reader_count += 1;
283+
unsigned int current_reader_count = (unsigned) scoreboard->reader_count;
284+
fpm_unlock(scoreboard->lock);
285+
zlog(ZLOG_DEBUG, "scoreboard: for proc %d reader incremented to value %u", getpid(), current_reader_count);
286+
break;
287+
}
288+
fpm_unlock(scoreboard->lock);
289+
290+
sched_yield();
291+
292+
if (++retries > FPM_SCOREBOARD_SPINLOCK_MAX_RETRIES) {
293+
zlog(ZLOG_WARNING, "scoreboard: reader waited too long for writer to release lock.");
294+
fpm_scoreboard_readers_decrement(s);
295+
return NULL;
296+
}
246297
}
298+
247299
return s;
248300
}
249301
/* }}} */
@@ -253,7 +305,7 @@ void fpm_scoreboard_release(struct fpm_scoreboard_s *scoreboard) {
253305
return;
254306
}
255307

256-
scoreboard->lock = 0;
308+
fpm_scoreboard_readers_decrement(scoreboard);
257309
}
258310

259311
struct fpm_scoreboard_s *fpm_scoreboard_copy(struct fpm_scoreboard_s *scoreboard, int copy_procs)

sapi/fpm/fpm/fpm_scoreboard.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
#define FPM_SCOREBOARD_LOCK_HANG 0
1919
#define FPM_SCOREBOARD_LOCK_NOHANG 1
2020

21+
#define FPM_SCOREBOARD_SPINLOCK_MAX_RETRIES 50000
22+
2123
struct fpm_scoreboard_proc_s {
2224
union {
2325
atomic_t lock;
@@ -52,6 +54,8 @@ struct fpm_scoreboard_s {
5254
atomic_t lock;
5355
char dummy[16];
5456
};
57+
atomic_t writer_active;
58+
atomic_t reader_count;
5559
char pool[32];
5660
int pm;
5761
time_t start_epoch;

0 commit comments

Comments
 (0)