Description
Hello,
We are using OpenResty 1.13.6.1 (1.el7.centos) to terminate HTTPS traffic for multiple domains, using the lua-resty-auto-ssl module.
One of our instances randomly stopped responding to traffic, and did not cover on its own. It turned out that both nginx worker processes appear to be wedged waiting for a futex.
Stacktraces on both processes look rather similar, although the details vary. Here is one: ngx_http_lua_ssl_cert_handler -> ngx_http_lua_ssl_cert_by_chunk -> ngx_http_lua_run_thread -> ngx_http_lua_shdict_get_helper -> lua_pushlstring -> lj_gc_step -> gc_onestep -> gc_finalize -> gc_call_finalizer -> lj_BC_FUNCC -> ngx_http_lua_shdict_set_helper -> ngx_shmtx_lock -> sem_wait@@GLIBC_2.2.5
.
I am wondering whether the apparent recursion between ngx_http_lua_shdict_get_helper
and ngx_http_lua_shdict_set_helper
might have caused an issue, as I see both functions acquiring the same lock? Or is there some subtle race condition between the two worker processes?
Here is the stacktrace of the first worker process:
#0 0x00007f1ace2fda0b in do_futex_wait.constprop.1 ()
from /lib64/libpthread.so.0
No symbol table info available.
#1 0x00007f1ace2fda9f in __new_sem_wait_slow.constprop.0 ()
from /lib64/libpthread.so.0
No symbol table info available.
#2 0x00007f1ace2fdb3b in sem_wait@@GLIBC_2.2.5 () from /lib64/libpthread.so.0
No symbol table info available.
#3 0x000000000042e5f8 in ngx_shmtx_lock (mtx=0x7f1ac5de1068)
at src/core/ngx_shmtx.c:111
i = <optimized out>
n = <optimized out>
#4 0x00000000004ef772 in ngx_http_lua_shdict_set_helper (L=0x413061a8,
flags=0) at ../ngx_lua-0.10.11/src/ngx_http_lua_shdict.c:1016
i = <optimized out>
n = <optimized out>
key = {len = 31, data = 0x40dc1d70 "issue_cert:www.domainA.com"}
rc = <optimized out>
ctx = 0x23df390
sd = 0x5a8279995a827999
value = {len = 0, data = 0x0}
value_type = 0
num = 6.9047205714551806e-310
c = 0 '\000'
exptime = <optimized out>
p = <optimized out>
node = <optimized out>
tp = <optimized out>
zone = 0x23df410
forcible = 0
user_flags = 0
q = <optimized out>
#5 0x00007f1acde48ae3 in lj_BC_FUNCC ()
from /usr/local/openresty/luajit/lib/libluajit-5.1.so.2
No symbol table info available.
#6 0x00007f1acde4ad06 in gc_call_finalizer (g=g@entry=0x402703b8,
L=L@entry=0x413061a8, mo=mo@entry=0x7ffd578b22b0, o=o@entry=0x40dc1d18)
at lj_gc.c:475
oldh = 0 '\000'
oldt = 435326
errcode = <optimized out>
top = <optimized out>
#7 0x00007f1acde4ae68 in gc_finalize (L=0x413061a8) at lj_gc.c:509
tmp = {u64 = 18446744036148519760, n = -nan(0xffff741302350), {{gcr = {
gcptr32 = 1093673808}, i = 1093673808}, it = 4294967287},
fr = {func = {gcptr32 = 1093673808}, tp = {ftsz = -9, pcr = {
ptr32 = 4294967287}}}, u32 = {lo = 1093673808,
hi = 4294967287}}
tv = <optimized out>
g = 0x402703b8
o = 0x40dc1d18
mo = <optimized out>
#8 0x00007f1acde4b75f in gc_onestep (L=L@entry=0x413061a8) at lj_gc.c:659
g = 0x402703b8
#9 0x00007f1acde4bd4c in lj_gc_step (L=0x413061a8) at lj_gc.c:689
g = 0x402703b8
lim = 1300
ostate = -2
#10 0x00007f1acde581b3 in lua_pushlstring (L=L@entry=0x413061a8,
str=0x7f1ac6f0306a "removed", len=2463) at lj_api.c:639
s = <optimized out>
#11 0x00000000004f034c in ngx_http_lua_shdict_get_helper (L=0x413061a8,
get_stale=0) at ../ngx_lua-0.10.11/src/ngx_http_lua_shdict.c:538
key = {len = 38,
data = 0x4028d458 "domain:fullchain_der:domainB.com"}
ctx = 0x23df390
value = <optimized out>
sd = 0x7f1ac6f03020
num = <optimized out>
zone = 0x23df400
name = {len = <optimized out>, data = 0x23df34a "auto_ssl"}
rc = <optimized out>
value_type = 4
c = <optimized out>
user_flags = <optimized out>
get_stale = 0
L = 0x413061a8
n = <optimized out>
#12 0x00007f1acde48ae3 in lj_BC_FUNCC ()
from /usr/local/openresty/luajit/lib/libluajit-5.1.so.2
No symbol table info available.
#13 0x00000000004e9279 in ngx_http_lua_run_thread (L=L@entry=0x40270378,
r=r@entry=0x245d2f0, ctx=ctx@entry=0x263e030, nrets=nrets@entry=0)
at ../ngx_lua-0.10.11/src/ngx_http_lua_util.c:1010
next_coctx = <optimized out>
parent_coctx = <optimized out>
orig_coctx = 0x263e058
rv = <optimized out>
success = <optimized out>
next_co = <optimized out>
old_co = <optimized out>
err = <optimized out>
msg = <optimized out>
trace = <optimized out>
rc = <optimized out>
old_pool = 0x0
#14 0x0000000000502c45 in ngx_http_lua_ssl_cert_by_chunk (L=0x40270378,
r=0x245d2f0) at ../ngx_lua-0.10.11/src/ngx_http_lua_ssl_certby.c:527
co_ref = 1
rc = <optimized out>
co = 0x413061a8
ctx = 0x263e030
cln = <optimized out>
#15 0x00000000005034a0 in ngx_http_lua_ssl_cert_handler (
ssl_conn=<optimized out>, data=<optimized out>)
at ../ngx_lua-0.10.11/src/ngx_http_lua_ssl_certby.c:315
rc = <optimized out>
c = 0x7f1ace8ef130
fc = 0x7f1ace8da910
r = 0x245d2f0
cln = <optimized out>
hc = 0x24a64a0
lscf = <optimized out>
cctx = 0x24a6558
cscf = <optimized out>
#16 0x00007f1acd674ce0 in ssl3_get_client_hello ()
from /usr/local/openresty/openssl/lib/libssl.so.1.0.0
No symbol table info available.
#17 0x00007f1acd679ba6 in ssl3_accept ()
from /usr/local/openresty/openssl/lib/libssl.so.1.0.0
No symbol table info available.
#18 0x00007f1acd68a514 in ssl23_accept ()
from /usr/local/openresty/openssl/lib/libssl.so.1.0.0
No symbol table info available.
#19 0x000000000044abdf in ngx_ssl_handshake (c=c@entry=0x7f1ace8ef130)
at src/event/ngx_event_openssl.c:1232
n = <optimized out>
sslerr = <optimized out>
err = <optimized out>
#20 0x000000000045dd2a in ngx_http_ssl_handshake (rev=0x241c3e0)
at src/http/ngx_http_request.c:743
p = <optimized out>
buf = "removed", <incomplete sequence \316>
size = <optimized out>
n = <optimized out>
err = <optimized out>
rc = <optimized out>
c = 0x7f1ace8ef130
hc = 0x24a64a0
#21 0x0000000000446ab7 in ngx_epoll_process_events (cycle=<optimized out>,
timer=<optimized out>, flags=<optimized out>)
at src/event/modules/ngx_epoll_module.c:902
events = 1
revents = 1
instance = <optimized out>
i = 0
level = <optimized out>
err = <optimized out>
rev = <optimized out>
wev = <optimized out>
queue = <optimized out>
c = 0x7f1ace8ef130
#22 0x000000000043df3b in ngx_process_events_and_timers (
cycle=cycle@entry=0x23c8550) at src/event/ngx_event.c:252
flags = <optimized out>
timer = <optimized out>
delta = 1512955930530
q = <optimized out>
ev = <optimized out>
#23 0x00000000004449d2 in ngx_worker_process_cycle (
cycle=cycle@entry=0x23c8550, data=data@entry=0x0)
at src/os/unix/ngx_process_cycle.c:815
worker = 0
#24 0x00000000004434b4 in ngx_spawn_process (cycle=cycle@entry=0x23c8550,
proc=proc@entry=0x444960 <ngx_worker_process_cycle>, data=data@entry=0x0,
name=name@entry=0x542645 "worker process", respawn=respawn@entry=-3)
at src/os/unix/ngx_process.c:198
on = 1
pid = 0
s = 0
#25 0x0000000000444e34 in ngx_start_worker_processes (
cycle=cycle@entry=0x23c8550, n=2, type=type@entry=-3)
at src/os/unix/ngx_process_cycle.c:396
i = 0
ch = {command = 1, pid = 0, slot = 0, fd = 0}
#26 0x0000000000445748 in ngx_master_process_cycle (
cycle=cycle@entry=0x23c8550) at src/os/unix/ngx_process_cycle.c:135
title = 0x23fef6a "master process /usr/local/openresty/nginx/sbin/nginx -p /etc/nginx/"
p = <optimized out>
size = <optimized out>
i = <optimized out>
n = <optimized out>
sigio = <optimized out>
set = {__val = {0 <repeats 16 times>}}
itv = {it_interval = {tv_sec = 37744344, tv_usec = 0}, it_value = {
tv_sec = 0, tv_usec = 0}}
live = <optimized out>
delay = <optimized out>
ls = <optimized out>
ccf = 0x23ca3a0
Here is the stacktrace of the second worker process:
#0 0x00007f1ace2fda0b in do_futex_wait.constprop.1 () from /lib64/libpthread.so.0
No symbol table info available.
#1 0x00007f1ace2fda9f in __new_sem_wait_slow.constprop.0 () from /lib64/libpthread.so.0
No symbol table info available.
#2 0x00007f1ace2fdb3b in sem_wait@@GLIBC_2.2.5 () from /lib64/libpthread.so.0
No symbol table info available.
#3 0x000000000042e5f8 in ngx_shmtx_lock (mtx=0x7f1ac5de1068) at src/core/ngx_shmtx.c:111
i = <optimized out>
n = <optimized out>
#4 0x00000000004f0222 in ngx_http_lua_shdict_get_helper (L=0x40ae7bb0, get_stale=0)
at ../ngx_lua-0.10.11/src/ngx_http_lua_shdict.c:506
key = {len = 34, data = 0x40f87f58 "domain:fullchain_der:domainC.org"}
ctx = 0x23df390
value = <optimized out>
sd = 0x40ae7bb0
num = <optimized out>
zone = 0x23df410
name = {len = <optimized out>, data = 0x23df34a "auto_ssl"}
rc = <optimized out>
value_type = <optimized out>
c = <optimized out>
user_flags = <optimized out>
get_stale = 0
L = 0x40ae7bb0
n = <optimized out>
#5 0x00007f1acde48ae3 in lj_BC_FUNCC () from /usr/local/openresty/luajit/lib/libluajit-5.1.so.2
No symbol table info available.
#6 0x00000000004e9279 in ngx_http_lua_run_thread (L=L@entry=0x40270378, r=r@entry=0x26a28a0,
ctx=ctx@entry=0x2491f80, nrets=nrets@entry=0) at ../ngx_lua-0.10.11/src/ngx_http_lua_util.c:1010
next_coctx = <optimized out>
parent_coctx = <optimized out>
orig_coctx = 0x2491fa8
rv = <optimized out>
success = <optimized out>
next_co = <optimized out>
old_co = <optimized out>
err = <optimized out>
msg = <optimized out>
trace = <optimized out>
rc = <optimized out>
old_pool = 0x0
#7 0x0000000000502c45 in ngx_http_lua_ssl_cert_by_chunk (L=0x40270378, r=0x26a28a0)
at ../ngx_lua-0.10.11/src/ngx_http_lua_ssl_certby.c:527
co_ref = 2
rc = <optimized out>
co = 0x40ae7bb0
ctx = 0x2491f80
cln = <optimized out>
#8 0x00000000005034a0 in ngx_http_lua_ssl_cert_handler (ssl_conn=<optimized out>, data=<optimized out>)
at ../ngx_lua-0.10.11/src/ngx_http_lua_ssl_certby.c:315
rc = <optimized out>
c = 0x7f1ace8e9460
fc = 0x7f1ace902e10
r = 0x26a28a0
cln = <optimized out>
hc = 0x2572b40
lscf = <optimized out>
cctx = 0x2572bf0
cscf = <optimized out>
#9 0x00007f1acd674ce0 in ssl3_get_client_hello () from /usr/local/openresty/openssl/lib/libssl.so.1.0.0
No symbol table info available.
#10 0x00007f1acd679ba6 in ssl3_accept () from /usr/local/openresty/openssl/lib/libssl.so.1.0.0
No symbol table info available.
#11 0x00007f1acd68a514 in ssl23_accept () from /usr/local/openresty/openssl/lib/libssl.so.1.0.0
No symbol table info available.
#12 0x000000000044abdf in ngx_ssl_handshake (c=c@entry=0x7f1ace8e9460) at src/event/ngx_event_openssl.c:1232
n = <optimized out>
sslerr = <optimized out>
err = <optimized out>
#13 0x000000000045dd2a in ngx_http_ssl_handshake (rev=0x2419ec0) at src/http/ngx_http_request.c:743
p = <optimized out>
buf = "removed"
size = <optimized out>
n = <optimized out>
err = <optimized out>
rc = <optimized out>
c = 0x7f1ace8e9460
hc = 0x2572b40
#14 0x0000000000446ab7 in ngx_epoll_process_events (cycle=<optimized out>, timer=<optimized out>,
flags=<optimized out>) at src/event/modules/ngx_epoll_module.c:902
events = 1
revents = 1
instance = <optimized out>
i = 0
level = <optimized out>
err = <optimized out>
rev = <optimized out>
wev = <optimized out>
queue = <optimized out>
c = 0x7f1ace8e9460
#15 0x000000000043df3b in ngx_process_events_and_timers (cycle=cycle@entry=0x23c8550)
at src/event/ngx_event.c:252
flags = <optimized out>
timer = <optimized out>
delta = 1512955931246
q = <optimized out>
ev = <optimized out>
#16 0x00000000004449d2 in ngx_worker_process_cycle (cycle=cycle@entry=0x23c8550, data=data@entry=0x1)
at src/os/unix/ngx_process_cycle.c:815
worker = 1
#17 0x00000000004434b4 in ngx_spawn_process (cycle=cycle@entry=0x23c8550,
proc=proc@entry=0x444960 <ngx_worker_process_cycle>, data=data@entry=0x1,
name=name@entry=0x542645 "worker process", respawn=respawn@entry=-3) at src/os/unix/ngx_process.c:198
on = 1
pid = 0
s = 1
#18 0x0000000000444e34 in ngx_start_worker_processes (cycle=cycle@entry=0x23c8550, n=2, type=type@entry=-3)
at src/os/unix/ngx_process_cycle.c:396
i = 1
ch = {command = 1, pid = 7980, slot = 0, fd = 3}
#19 0x0000000000445748 in ngx_master_process_cycle (cycle=cycle@entry=0x23c8550)
at src/os/unix/ngx_process_cycle.c:135
title = 0x23fef6a "master process /usr/local/openresty/nginx/sbin/nginx -p /etc/nginx/"
p = <optimized out>
size = <optimized out>
i = <optimized out>
n = <optimized out>
sigio = <optimized out>
set = {__val = {0 <repeats 16 times>}}
itv = {it_interval = {tv_sec = 37744344, tv_usec = 0}, it_value = {tv_sec = 0, tv_usec = 0}}
live = <optimized out>
delay = <optimized out>
ls = <optimized out>
ccf = 0x23ca3a0
#20 0x000000000041f162 in main (argc=<optimized out>, argv=<optimized out>) at src/core/nginx.c:381
b = <optimized out>
log = 0x7aeea0 <ngx_log>
i = <optimized out>
cycle = 0x23c8550
init_cycle = {conf_ctx = 0x0, pool = 0x23c7f70, log = 0x7aeea0 <ngx_log>, new_log = {log_level = 0,
file = 0x0, connection = 0, disk_full_time = 0, handler = 0x0, data = 0x0, writer = 0x0,
wdata = 0x0, action = 0x0, next = 0x0}, log_use_stderr = 0, files = 0x0, free_connections = 0x0,
free_connection_n = 0, modules = 0x0, modules_n = 0, modules_used = 0, reusable_connections_queue = {
prev = 0x0, next = 0x0}, reusable_connections_n = 0, listening = {elts = 0x0, nelts = 0, size = 0,
nalloc = 0, pool = 0x0}, paths = {elts = 0x0, nelts = 0, size = 0, nalloc = 0, pool = 0x0},
config_dump = {elts = 0x0, nelts = 0, size = 0, nalloc = 0, pool = 0x0}, config_dump_rbtree = {
root = 0x0, sentinel = 0x0, insert = 0x0}, config_dump_sentinel = {key = 0, left = 0x0, right = 0x0,
parent = 0x0, color = 0 '\000', data = 0 '\000'}, open_files = {last = 0x0, part = {elts = 0x0,
nelts = 0, next = 0x0}, size = 0, nalloc = 0, pool = 0x0}, shared_memory = {last = 0x0, part = {
elts = 0x0, nelts = 0, next = 0x0}, size = 0, nalloc = 0, pool = 0x0}, connection_n = 0,
files_n = 0, connections = 0x0, read_events = 0x0, write_events = 0x0, old_cycle = 0x0, conf_file = {
len = 26, data = 0x23c7fc0 ""}, conf_param = {len = 0, data = 0x0}, conf_prefix = {len = 16,
data = 0x23c7fc0 ""}, prefix = {len = 11, data = 0x7ffd578b4f7e ""}, lock_file = {len = 0,
data = 0x0}, hostname = {len = 0, data = 0x0}, intercept_error_log_handler = 0x0,
intercept_error_log_data = 0x0, entered_logger = 0}
cd = <optimized out>
ccf = <optimized out>
Please let me know if there is anything we can help to fix this. Thank you!