Skip to content

Deadlock in ngx_http_lua_shdict_set_helper #1207

Closed
@gohai

Description

@gohai

Hello,

We are using OpenResty 1.13.6.1 (1.el7.centos) to terminate HTTPS traffic for multiple domains, using the lua-resty-auto-ssl module.

One of our instances randomly stopped responding to traffic, and did not cover on its own. It turned out that both nginx worker processes appear to be wedged waiting for a futex.

Stacktraces on both processes look rather similar, although the details vary. Here is one: ngx_http_lua_ssl_cert_handler -> ngx_http_lua_ssl_cert_by_chunk -> ngx_http_lua_run_thread -> ngx_http_lua_shdict_get_helper -> lua_pushlstring -> lj_gc_step -> gc_onestep -> gc_finalize -> gc_call_finalizer -> lj_BC_FUNCC -> ngx_http_lua_shdict_set_helper -> ngx_shmtx_lock -> sem_wait@@GLIBC_2.2.5.

I am wondering whether the apparent recursion between ngx_http_lua_shdict_get_helper and ngx_http_lua_shdict_set_helper might have caused an issue, as I see both functions acquiring the same lock? Or is there some subtle race condition between the two worker processes?

Here is the stacktrace of the first worker process:

#0  0x00007f1ace2fda0b in do_futex_wait.constprop.1 ()
   from /lib64/libpthread.so.0
No symbol table info available.
#1  0x00007f1ace2fda9f in __new_sem_wait_slow.constprop.0 ()
   from /lib64/libpthread.so.0
No symbol table info available.
#2  0x00007f1ace2fdb3b in sem_wait@@GLIBC_2.2.5 () from /lib64/libpthread.so.0
No symbol table info available.
#3  0x000000000042e5f8 in ngx_shmtx_lock (mtx=0x7f1ac5de1068)
    at src/core/ngx_shmtx.c:111
        i = <optimized out>
        n = <optimized out>
#4  0x00000000004ef772 in ngx_http_lua_shdict_set_helper (L=0x413061a8, 
    flags=0) at ../ngx_lua-0.10.11/src/ngx_http_lua_shdict.c:1016
        i = <optimized out>
        n = <optimized out>
        key = {len = 31, data = 0x40dc1d70 "issue_cert:www.domainA.com"}
        rc = <optimized out>
        ctx = 0x23df390
        sd = 0x5a8279995a827999
        value = {len = 0, data = 0x0}
        value_type = 0
        num = 6.9047205714551806e-310
        c = 0 '\000'
        exptime = <optimized out>
        p = <optimized out>
        node = <optimized out>
        tp = <optimized out>
        zone = 0x23df410
        forcible = 0
        user_flags = 0
        q = <optimized out>
#5  0x00007f1acde48ae3 in lj_BC_FUNCC ()
   from /usr/local/openresty/luajit/lib/libluajit-5.1.so.2
No symbol table info available.
#6  0x00007f1acde4ad06 in gc_call_finalizer (g=g@entry=0x402703b8, 
    L=L@entry=0x413061a8, mo=mo@entry=0x7ffd578b22b0, o=o@entry=0x40dc1d18)
    at lj_gc.c:475
        oldh = 0 '\000'
        oldt = 435326
        errcode = <optimized out>
        top = <optimized out>
#7  0x00007f1acde4ae68 in gc_finalize (L=0x413061a8) at lj_gc.c:509
        tmp = {u64 = 18446744036148519760, n = -nan(0xffff741302350), {{gcr = {
                gcptr32 = 1093673808}, i = 1093673808}, it = 4294967287}, 
          fr = {func = {gcptr32 = 1093673808}, tp = {ftsz = -9, pcr = {
                ptr32 = 4294967287}}}, u32 = {lo = 1093673808, 
            hi = 4294967287}}
        tv = <optimized out>
        g = 0x402703b8
        o = 0x40dc1d18
        mo = <optimized out>
#8  0x00007f1acde4b75f in gc_onestep (L=L@entry=0x413061a8) at lj_gc.c:659
        g = 0x402703b8
#9  0x00007f1acde4bd4c in lj_gc_step (L=0x413061a8) at lj_gc.c:689
        g = 0x402703b8
        lim = 1300
        ostate = -2
#10 0x00007f1acde581b3 in lua_pushlstring (L=L@entry=0x413061a8, 
    str=0x7f1ac6f0306a "removed", len=2463) at lj_api.c:639
        s = <optimized out>
#11 0x00000000004f034c in ngx_http_lua_shdict_get_helper (L=0x413061a8, 
    get_stale=0) at ../ngx_lua-0.10.11/src/ngx_http_lua_shdict.c:538
        key = {len = 38, 
          data = 0x4028d458 "domain:fullchain_der:domainB.com"}
        ctx = 0x23df390
        value = <optimized out>
        sd = 0x7f1ac6f03020
        num = <optimized out>
        zone = 0x23df400
        name = {len = <optimized out>, data = 0x23df34a "auto_ssl"}
        rc = <optimized out>
        value_type = 4
        c = <optimized out>
        user_flags = <optimized out>
        get_stale = 0
        L = 0x413061a8
        n = <optimized out>
#12 0x00007f1acde48ae3 in lj_BC_FUNCC ()
   from /usr/local/openresty/luajit/lib/libluajit-5.1.so.2
No symbol table info available.
#13 0x00000000004e9279 in ngx_http_lua_run_thread (L=L@entry=0x40270378, 
    r=r@entry=0x245d2f0, ctx=ctx@entry=0x263e030, nrets=nrets@entry=0)
    at ../ngx_lua-0.10.11/src/ngx_http_lua_util.c:1010
        next_coctx = <optimized out>
        parent_coctx = <optimized out>
        orig_coctx = 0x263e058
        rv = <optimized out>
        success = <optimized out>
        next_co = <optimized out>
        old_co = <optimized out>
        err = <optimized out>
        msg = <optimized out>
        trace = <optimized out>
        rc = <optimized out>
        old_pool = 0x0
#14 0x0000000000502c45 in ngx_http_lua_ssl_cert_by_chunk (L=0x40270378, 
    r=0x245d2f0) at ../ngx_lua-0.10.11/src/ngx_http_lua_ssl_certby.c:527
        co_ref = 1
        rc = <optimized out>
        co = 0x413061a8
        ctx = 0x263e030
        cln = <optimized out>
#15 0x00000000005034a0 in ngx_http_lua_ssl_cert_handler (
    ssl_conn=<optimized out>, data=<optimized out>)
    at ../ngx_lua-0.10.11/src/ngx_http_lua_ssl_certby.c:315
        rc = <optimized out>
        c = 0x7f1ace8ef130
        fc = 0x7f1ace8da910
        r = 0x245d2f0
        cln = <optimized out>
        hc = 0x24a64a0
        lscf = <optimized out>
        cctx = 0x24a6558
        cscf = <optimized out>
#16 0x00007f1acd674ce0 in ssl3_get_client_hello ()
   from /usr/local/openresty/openssl/lib/libssl.so.1.0.0
No symbol table info available.
#17 0x00007f1acd679ba6 in ssl3_accept ()
   from /usr/local/openresty/openssl/lib/libssl.so.1.0.0
No symbol table info available.
#18 0x00007f1acd68a514 in ssl23_accept ()
   from /usr/local/openresty/openssl/lib/libssl.so.1.0.0
No symbol table info available.
#19 0x000000000044abdf in ngx_ssl_handshake (c=c@entry=0x7f1ace8ef130)
    at src/event/ngx_event_openssl.c:1232
        n = <optimized out>
        sslerr = <optimized out>
        err = <optimized out>
#20 0x000000000045dd2a in ngx_http_ssl_handshake (rev=0x241c3e0)
    at src/http/ngx_http_request.c:743
        p = <optimized out>
        buf = "removed", <incomplete sequence \316>
        size = <optimized out>
        n = <optimized out>
        err = <optimized out>
        rc = <optimized out>
        c = 0x7f1ace8ef130
        hc = 0x24a64a0
#21 0x0000000000446ab7 in ngx_epoll_process_events (cycle=<optimized out>, 
    timer=<optimized out>, flags=<optimized out>)
    at src/event/modules/ngx_epoll_module.c:902
        events = 1
        revents = 1
        instance = <optimized out>
        i = 0
        level = <optimized out>
        err = <optimized out>
        rev = <optimized out>
        wev = <optimized out>
        queue = <optimized out>
        c = 0x7f1ace8ef130
#22 0x000000000043df3b in ngx_process_events_and_timers (
    cycle=cycle@entry=0x23c8550) at src/event/ngx_event.c:252
        flags = <optimized out>
        timer = <optimized out>
        delta = 1512955930530
        q = <optimized out>
        ev = <optimized out>
#23 0x00000000004449d2 in ngx_worker_process_cycle (
    cycle=cycle@entry=0x23c8550, data=data@entry=0x0)
    at src/os/unix/ngx_process_cycle.c:815
        worker = 0
#24 0x00000000004434b4 in ngx_spawn_process (cycle=cycle@entry=0x23c8550, 
    proc=proc@entry=0x444960 <ngx_worker_process_cycle>, data=data@entry=0x0, 
    name=name@entry=0x542645 "worker process", respawn=respawn@entry=-3)
    at src/os/unix/ngx_process.c:198
        on = 1
        pid = 0
        s = 0
#25 0x0000000000444e34 in ngx_start_worker_processes (
    cycle=cycle@entry=0x23c8550, n=2, type=type@entry=-3)
    at src/os/unix/ngx_process_cycle.c:396
        i = 0
        ch = {command = 1, pid = 0, slot = 0, fd = 0}
#26 0x0000000000445748 in ngx_master_process_cycle (
    cycle=cycle@entry=0x23c8550) at src/os/unix/ngx_process_cycle.c:135
        title = 0x23fef6a "master process /usr/local/openresty/nginx/sbin/nginx -p /etc/nginx/"
        p = <optimized out>
        size = <optimized out>
        i = <optimized out>
        n = <optimized out>
        sigio = <optimized out>
        set = {__val = {0 <repeats 16 times>}}
        itv = {it_interval = {tv_sec = 37744344, tv_usec = 0}, it_value = {
            tv_sec = 0, tv_usec = 0}}
        live = <optimized out>
        delay = <optimized out>
        ls = <optimized out>
        ccf = 0x23ca3a0

Here is the stacktrace of the second worker process:

#0  0x00007f1ace2fda0b in do_futex_wait.constprop.1 () from /lib64/libpthread.so.0
No symbol table info available.
#1  0x00007f1ace2fda9f in __new_sem_wait_slow.constprop.0 () from /lib64/libpthread.so.0
No symbol table info available.
#2  0x00007f1ace2fdb3b in sem_wait@@GLIBC_2.2.5 () from /lib64/libpthread.so.0
No symbol table info available.
#3  0x000000000042e5f8 in ngx_shmtx_lock (mtx=0x7f1ac5de1068) at src/core/ngx_shmtx.c:111
        i = <optimized out>
        n = <optimized out>
#4  0x00000000004f0222 in ngx_http_lua_shdict_get_helper (L=0x40ae7bb0, get_stale=0)
    at ../ngx_lua-0.10.11/src/ngx_http_lua_shdict.c:506
        key = {len = 34, data = 0x40f87f58 "domain:fullchain_der:domainC.org"}
        ctx = 0x23df390
        value = <optimized out>
        sd = 0x40ae7bb0
        num = <optimized out>
        zone = 0x23df410
        name = {len = <optimized out>, data = 0x23df34a "auto_ssl"}
        rc = <optimized out>
        value_type = <optimized out>
        c = <optimized out>
        user_flags = <optimized out>
        get_stale = 0
        L = 0x40ae7bb0
        n = <optimized out>
#5  0x00007f1acde48ae3 in lj_BC_FUNCC () from /usr/local/openresty/luajit/lib/libluajit-5.1.so.2
No symbol table info available.
#6  0x00000000004e9279 in ngx_http_lua_run_thread (L=L@entry=0x40270378, r=r@entry=0x26a28a0, 
    ctx=ctx@entry=0x2491f80, nrets=nrets@entry=0) at ../ngx_lua-0.10.11/src/ngx_http_lua_util.c:1010
        next_coctx = <optimized out>
        parent_coctx = <optimized out>
        orig_coctx = 0x2491fa8
        rv = <optimized out>
        success = <optimized out>
        next_co = <optimized out>
        old_co = <optimized out>
        err = <optimized out>
        msg = <optimized out>
        trace = <optimized out>
        rc = <optimized out>
        old_pool = 0x0
#7  0x0000000000502c45 in ngx_http_lua_ssl_cert_by_chunk (L=0x40270378, r=0x26a28a0)
    at ../ngx_lua-0.10.11/src/ngx_http_lua_ssl_certby.c:527
        co_ref = 2
        rc = <optimized out>
        co = 0x40ae7bb0
        ctx = 0x2491f80
        cln = <optimized out>
#8  0x00000000005034a0 in ngx_http_lua_ssl_cert_handler (ssl_conn=<optimized out>, data=<optimized out>)
    at ../ngx_lua-0.10.11/src/ngx_http_lua_ssl_certby.c:315
        rc = <optimized out>
        c = 0x7f1ace8e9460
        fc = 0x7f1ace902e10
        r = 0x26a28a0
        cln = <optimized out>
        hc = 0x2572b40
        lscf = <optimized out>
        cctx = 0x2572bf0
        cscf = <optimized out>
#9  0x00007f1acd674ce0 in ssl3_get_client_hello () from /usr/local/openresty/openssl/lib/libssl.so.1.0.0
No symbol table info available.
#10 0x00007f1acd679ba6 in ssl3_accept () from /usr/local/openresty/openssl/lib/libssl.so.1.0.0
No symbol table info available.
#11 0x00007f1acd68a514 in ssl23_accept () from /usr/local/openresty/openssl/lib/libssl.so.1.0.0
No symbol table info available.
#12 0x000000000044abdf in ngx_ssl_handshake (c=c@entry=0x7f1ace8e9460) at src/event/ngx_event_openssl.c:1232
        n = <optimized out>
        sslerr = <optimized out>
        err = <optimized out>
#13 0x000000000045dd2a in ngx_http_ssl_handshake (rev=0x2419ec0) at src/http/ngx_http_request.c:743
        p = <optimized out>
        buf = "removed"
        size = <optimized out>
        n = <optimized out>
        err = <optimized out>
        rc = <optimized out>
        c = 0x7f1ace8e9460
        hc = 0x2572b40
#14 0x0000000000446ab7 in ngx_epoll_process_events (cycle=<optimized out>, timer=<optimized out>, 
    flags=<optimized out>) at src/event/modules/ngx_epoll_module.c:902
        events = 1
        revents = 1
        instance = <optimized out>
        i = 0
        level = <optimized out>
        err = <optimized out>
        rev = <optimized out>
        wev = <optimized out>
        queue = <optimized out>
        c = 0x7f1ace8e9460
#15 0x000000000043df3b in ngx_process_events_and_timers (cycle=cycle@entry=0x23c8550)
    at src/event/ngx_event.c:252
        flags = <optimized out>
        timer = <optimized out>
        delta = 1512955931246
        q = <optimized out>
        ev = <optimized out>
#16 0x00000000004449d2 in ngx_worker_process_cycle (cycle=cycle@entry=0x23c8550, data=data@entry=0x1)
    at src/os/unix/ngx_process_cycle.c:815
        worker = 1
#17 0x00000000004434b4 in ngx_spawn_process (cycle=cycle@entry=0x23c8550, 
    proc=proc@entry=0x444960 <ngx_worker_process_cycle>, data=data@entry=0x1, 
    name=name@entry=0x542645 "worker process", respawn=respawn@entry=-3) at src/os/unix/ngx_process.c:198
        on = 1
        pid = 0
        s = 1
#18 0x0000000000444e34 in ngx_start_worker_processes (cycle=cycle@entry=0x23c8550, n=2, type=type@entry=-3)
    at src/os/unix/ngx_process_cycle.c:396
        i = 1
        ch = {command = 1, pid = 7980, slot = 0, fd = 3}
#19 0x0000000000445748 in ngx_master_process_cycle (cycle=cycle@entry=0x23c8550)
    at src/os/unix/ngx_process_cycle.c:135
        title = 0x23fef6a "master process /usr/local/openresty/nginx/sbin/nginx -p /etc/nginx/"
        p = <optimized out>
        size = <optimized out>
        i = <optimized out>
        n = <optimized out>
        sigio = <optimized out>
        set = {__val = {0 <repeats 16 times>}}
        itv = {it_interval = {tv_sec = 37744344, tv_usec = 0}, it_value = {tv_sec = 0, tv_usec = 0}}
        live = <optimized out>
        delay = <optimized out>
        ls = <optimized out>
        ccf = 0x23ca3a0
#20 0x000000000041f162 in main (argc=<optimized out>, argv=<optimized out>) at src/core/nginx.c:381
        b = <optimized out>
        log = 0x7aeea0 <ngx_log>
        i = <optimized out>
        cycle = 0x23c8550
        init_cycle = {conf_ctx = 0x0, pool = 0x23c7f70, log = 0x7aeea0 <ngx_log>, new_log = {log_level = 0, 
            file = 0x0, connection = 0, disk_full_time = 0, handler = 0x0, data = 0x0, writer = 0x0, 
            wdata = 0x0, action = 0x0, next = 0x0}, log_use_stderr = 0, files = 0x0, free_connections = 0x0, 
          free_connection_n = 0, modules = 0x0, modules_n = 0, modules_used = 0, reusable_connections_queue = {
            prev = 0x0, next = 0x0}, reusable_connections_n = 0, listening = {elts = 0x0, nelts = 0, size = 0, 
            nalloc = 0, pool = 0x0}, paths = {elts = 0x0, nelts = 0, size = 0, nalloc = 0, pool = 0x0}, 
          config_dump = {elts = 0x0, nelts = 0, size = 0, nalloc = 0, pool = 0x0}, config_dump_rbtree = {
            root = 0x0, sentinel = 0x0, insert = 0x0}, config_dump_sentinel = {key = 0, left = 0x0, right = 0x0, 
            parent = 0x0, color = 0 '\000', data = 0 '\000'}, open_files = {last = 0x0, part = {elts = 0x0, 
              nelts = 0, next = 0x0}, size = 0, nalloc = 0, pool = 0x0}, shared_memory = {last = 0x0, part = {
              elts = 0x0, nelts = 0, next = 0x0}, size = 0, nalloc = 0, pool = 0x0}, connection_n = 0, 
          files_n = 0, connections = 0x0, read_events = 0x0, write_events = 0x0, old_cycle = 0x0, conf_file = {
            len = 26, data = 0x23c7fc0 ""}, conf_param = {len = 0, data = 0x0}, conf_prefix = {len = 16, 
            data = 0x23c7fc0 ""}, prefix = {len = 11, data = 0x7ffd578b4f7e ""}, lock_file = {len = 0, 
            data = 0x0}, hostname = {len = 0, data = 0x0}, intercept_error_log_handler = 0x0, 
          intercept_error_log_data = 0x0, entered_logger = 0}
        cd = <optimized out>
        ccf = <optimized out>

Please let me know if there is anything we can help to fix this. Thank you!

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions