Skip to content

Commit 75858ee

Browse files
committed
Ensure all HTLCs for a claimed payment are claimed on startup
While the HTLC-claim process happens across all MPP parts under one lock, this doesn't imply that they are claimed fully atomically on disk. Ultimately, an application can crash after persisting one `ChannelMonitorUpdate` out of multiple monitor updates needed for the full claim. Previously, this would leave us in a very bad state - because of the all-channels-available check in `claim_funds` we'd refuse to claim the payment again on restart (even though the `PaymentReceived` event will be passed to the user again), and we'd end up having partially claimed the payment! The fix for the consistency part of this issue is pretty straightforward - just check for this condition on startup and complete the claim across all channels/`ChannelMonitor`s if we detect it. This still leaves us in a confused state from the perspective of the user, however - we've actually claimed a payment but when they call `claim_funds` we return `false` indicating it could not be claimed.
1 parent 24150ee commit 75858ee

File tree

5 files changed

+230
-6
lines changed

5 files changed

+230
-6
lines changed

lightning/src/chain/channelmonitor.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1085,7 +1085,8 @@ impl<Signer: Sign> ChannelMonitor<Signer> {
10851085
self.inner.lock().unwrap().provide_latest_holder_commitment_tx(holder_commitment_tx, htlc_outputs).map_err(|_| ())
10861086
}
10871087

1088-
#[cfg(test)]
1088+
/// This is used to provide a payment preimage(s) out-of-band during startup without updating
1089+
/// the off-chain state with a new commitment transaction.
10891090
pub(crate) fn provide_payment_preimage<B: Deref, F: Deref, L: Deref>(
10901091
&self,
10911092
payment_hash: &PaymentHash,
@@ -1631,6 +1632,10 @@ impl<Signer: Sign> ChannelMonitor<Signer> {
16311632

16321633
res
16331634
}
1635+
1636+
pub(crate) fn get_stored_preimages(&self) -> HashMap<PaymentHash, PaymentPreimage> {
1637+
self.inner.lock().unwrap().payment_preimages.clone()
1638+
}
16341639
}
16351640

16361641
/// Compares a broadcasted commitment transaction's HTLCs with those in the latest state,

lightning/src/ln/channel.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1661,6 +1661,24 @@ impl<Signer: Sign> Channel<Signer> {
16611661
make_funding_redeemscript(&self.get_holder_pubkeys().funding_pubkey, self.counterparty_funding_pubkey())
16621662
}
16631663

1664+
/// Claims an HTLC while we're disconnected from a peer, dropping the ChannelMonitorUpdate
1665+
/// entirely.
1666+
///
1667+
/// The ChannelMonitor for this channel MUST be updated out-of-band with the preimage provided.
1668+
/// The HTLC claim will end up in the holding cell (because the caller must ensure the peer is
1669+
/// disconnected).
1670+
pub fn claim_htlc_while_disconnected_dropping_mon_update<L: Deref>
1671+
(&mut self, htlc_id_arg: u64, payment_preimage_arg: PaymentPreimage, logger: &L)
1672+
where L::Target: Logger {
1673+
assert!(self.channel_state & (ChannelState::AwaitingRemoteRevoke as u32 | ChannelState::PeerDisconnected as u32 | ChannelState::MonitorUpdateFailed as u32) != 0);
1674+
let mon_update_id = self.latest_monitor_update_id; // Forget the ChannelMonitor update
1675+
let fulfill_resp = self.get_update_fulfill_htlc(htlc_id_arg, payment_preimage_arg, logger);
1676+
self.latest_monitor_update_id = mon_update_id;
1677+
if let UpdateFulfillFetch::NewClaim { msg, .. } = fulfill_resp {
1678+
assert!(msg.is_none()); // The HTLC must have ended up in the holding cell.
1679+
}
1680+
}
1681+
16641682
fn get_update_fulfill_htlc<L: Deref>(&mut self, htlc_id_arg: u64, payment_preimage_arg: PaymentPreimage, logger: &L) -> UpdateFulfillFetch where L::Target: Logger {
16651683
// Either ChannelFunded got set (which means it won't be unset) or there is no way any
16661684
// caller thought we could have something claimed (cause we wouldn't have accepted in an

lightning/src/ln/channelmanager.rs

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6568,7 +6568,7 @@ impl<'a, Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
65686568
let previous_hops_len: u64 = Readable::read(reader)?;
65696569
let mut previous_hops = Vec::with_capacity(cmp::min(previous_hops_len as usize, MAX_ALLOC_SIZE/mem::size_of::<ClaimableHTLC>()));
65706570
for _ in 0..previous_hops_len {
6571-
previous_hops.push(Readable::read(reader)?);
6571+
previous_hops.push(<ClaimableHTLC as Readable>::read(reader)?);
65726572
}
65736573
claimable_htlcs.insert(payment_hash, previous_hops);
65746574
}
@@ -6667,7 +6667,7 @@ impl<'a, Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
66676667
// payments which are still in-flight via their on-chain state.
66686668
// We only rebuild the pending payments map if we were most recently serialized by
66696669
// 0.0.102+
6670-
for (_, monitor) in args.channel_monitors {
6670+
for (_, monitor) in args.channel_monitors.iter() {
66716671
if by_id.get(&monitor.get_funding_txo().0.to_channel_id()).is_none() {
66726672
for (htlc_source, htlc) in monitor.get_pending_outbound_htlcs() {
66736673
if let HTLCSource::OutboundRoute { payment_id, session_priv, path, payment_secret, .. } = htlc_source {
@@ -6750,6 +6750,23 @@ impl<'a, Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
67506750
}
67516751
}
67526752

6753+
for (_, monitor) in args.channel_monitors.iter() {
6754+
for (payment_hash, payment_preimage) in monitor.get_stored_preimages() {
6755+
if let Some(claimable_htlcs) = claimable_htlcs.remove(&payment_hash) {
6756+
log_info!(args.logger, "Re-claimaing HTLCs with payment hash {} due to partial-claim.", log_bytes!(payment_hash.0));
6757+
for claimable_htlc in claimable_htlcs {
6758+
let previous_channel_id = claimable_htlc.prev_hop.outpoint.to_channel_id();
6759+
if let Some(channel) = by_id.get_mut(&previous_channel_id) {
6760+
channel.claim_htlc_while_disconnected_dropping_mon_update(claimable_htlc.prev_hop.htlc_id, payment_preimage, &args.logger);
6761+
}
6762+
if let Some(previous_hop_monitor) = args.channel_monitors.get(&claimable_htlc.prev_hop.outpoint) {
6763+
previous_hop_monitor.provide_payment_preimage(&payment_hash, &payment_preimage, &args.tx_broadcaster, &args.fee_estimator, &args.logger);
6764+
}
6765+
}
6766+
}
6767+
}
6768+
}
6769+
67536770
let inbound_pmt_key_material = args.keys_manager.get_inbound_payment_key_material();
67546771
let expanded_inbound_key = inbound_payment::ExpandedKey::new(&inbound_pmt_key_material);
67556772
let channel_manager = ChannelManager {

lightning/src/ln/functional_test_utils.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1459,7 +1459,7 @@ pub fn send_along_route_with_secret<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>,
14591459
payment_id
14601460
}
14611461

1462-
pub fn pass_along_path<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_path: &[&Node<'a, 'b, 'c>], recv_value: u64, our_payment_hash: PaymentHash, our_payment_secret: Option<PaymentSecret>, ev: MessageSendEvent, payment_received_expected: bool, expected_preimage: Option<PaymentPreimage>) {
1462+
pub fn do_pass_along_path<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_path: &[&Node<'a, 'b, 'c>], recv_value: u64, our_payment_hash: PaymentHash, our_payment_secret: Option<PaymentSecret>, ev: MessageSendEvent, payment_received_expected: bool, clear_recipient_events: bool, expected_preimage: Option<PaymentPreimage>) {
14631463
let mut payment_event = SendEvent::from_event(ev);
14641464
let mut prev_node = origin_node;
14651465

@@ -1472,7 +1472,7 @@ pub fn pass_along_path<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_path
14721472

14731473
expect_pending_htlcs_forwardable!(node);
14741474

1475-
if idx == expected_path.len() - 1 {
1475+
if idx == expected_path.len() - 1 && clear_recipient_events {
14761476
let events_2 = node.node.get_and_clear_pending_events();
14771477
if payment_received_expected {
14781478
assert_eq!(events_2.len(), 1);
@@ -1496,7 +1496,7 @@ pub fn pass_along_path<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_path
14961496
} else {
14971497
assert!(events_2.is_empty());
14981498
}
1499-
} else {
1499+
} else if idx != expected_path.len() - 1 {
15001500
let mut events_2 = node.node.get_and_clear_pending_msg_events();
15011501
assert_eq!(events_2.len(), 1);
15021502
check_added_monitors!(node, 1);
@@ -1508,6 +1508,10 @@ pub fn pass_along_path<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_path
15081508
}
15091509
}
15101510

1511+
pub fn pass_along_path<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_path: &[&Node<'a, 'b, 'c>], recv_value: u64, our_payment_hash: PaymentHash, our_payment_secret: Option<PaymentSecret>, ev: MessageSendEvent, payment_received_expected: bool, expected_preimage: Option<PaymentPreimage>) {
1512+
do_pass_along_path(origin_node, expected_path, recv_value, our_payment_hash, our_payment_secret, ev, payment_received_expected, true, expected_preimage);
1513+
}
1514+
15111515
pub fn pass_along_route<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_route: &[&[&Node<'a, 'b, 'c>]], recv_value: u64, our_payment_hash: PaymentHash, our_payment_secret: PaymentSecret) {
15121516
let mut events = origin_node.node.get_and_clear_pending_msg_events();
15131517
assert_eq!(events.len(), expected_route.len());

lightning/src/ln/functional_tests.rs

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9562,6 +9562,186 @@ fn test_keysend_payments_to_private_node() {
95629562
claim_payment(&nodes[0], &path, test_preimage);
95639563
}
95649564

9565+
fn do_test_partial_claim_before_restart(persist_both_monitors: bool) {
9566+
// Test what happens if a node receives an MPP payment, claims it, but crashes before
9567+
// persisting the ChannelManager. If `persist_both_monitors` is false, also crash after only
9568+
// updating one of the two channels' ChannelMonitors. As a result, on startup, we'll (a) still
9569+
// have the PaymentReceivedEvent, (b) have one (or two) channel(s) that goes on chain with the
9570+
// HTLC preimage in them, and (c) optionally have one channel that is live off-chain but does
9571+
// not have the preimage tied to the still-pending HTLC.
9572+
//
9573+
// To get to the correct state, on startup we should propagate the preimage to the
9574+
// still-off-chain channel, claiming the HTLC as soon as the peer connects, with the monitor
9575+
// receiving the preimage without a state update.
9576+
let chanmon_cfgs = create_chanmon_cfgs(4);
9577+
let node_cfgs = create_node_cfgs(4, &chanmon_cfgs);
9578+
let node_chanmgrs = create_node_chanmgrs(4, &node_cfgs, &[None, None, None, None]);
9579+
9580+
let persister: test_utils::TestPersister;
9581+
let new_chain_monitor: test_utils::TestChainMonitor;
9582+
let nodes_3_deserialized: ChannelManager<EnforcingSigner, &test_utils::TestChainMonitor, &test_utils::TestBroadcaster, &test_utils::TestKeysInterface, &test_utils::TestFeeEstimator, &test_utils::TestLogger>;
9583+
9584+
let mut nodes = create_network(4, &node_cfgs, &node_chanmgrs);
9585+
9586+
create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 100_000, 0, InitFeatures::known(), InitFeatures::known());
9587+
create_announced_chan_between_nodes_with_value(&nodes, 0, 2, 100_000, 0, InitFeatures::known(), InitFeatures::known());
9588+
let chan_id_persisted = create_announced_chan_between_nodes_with_value(&nodes, 1, 3, 100_000, 0, InitFeatures::known(), InitFeatures::known()).2;
9589+
let chan_id_not_persisted = create_announced_chan_between_nodes_with_value(&nodes, 2, 3, 100_000, 0, InitFeatures::known(), InitFeatures::known()).2;
9590+
9591+
// Crate an MPP route for 15k sats, more than the default htlc-max of 10%
9592+
let (mut route, payment_hash, payment_preimage, payment_secret) = get_route_and_payment_hash!(nodes[0], nodes[3], 15_000_000);
9593+
assert_eq!(route.paths.len(), 2);
9594+
route.paths.sort_by(|path_a, _| {
9595+
// Sort the path so that the path through nodes[1] comes first
9596+
if path_a[0].pubkey == nodes[1].node.get_our_node_id() {
9597+
core::cmp::Ordering::Less } else { core::cmp::Ordering::Greater }
9598+
});
9599+
9600+
nodes[0].node.send_payment(&route, payment_hash, &Some(payment_secret)).unwrap();
9601+
check_added_monitors!(nodes[0], 2);
9602+
9603+
// Send the payment through to nodes[3] *without* clearing the PaymentReceived event
9604+
let mut send_events = nodes[0].node.get_and_clear_pending_msg_events();
9605+
assert_eq!(send_events.len(), 2);
9606+
do_pass_along_path(&nodes[0], &[&nodes[1], &nodes[3]], 15_000_000, payment_hash, Some(payment_secret), send_events[0].clone(), true, false, None);
9607+
do_pass_along_path(&nodes[0], &[&nodes[2], &nodes[3]], 15_000_000, payment_hash, Some(payment_secret), send_events[1].clone(), true, false, None);
9608+
9609+
// Now that we have an MPP payment pending, get the latest encoded copies of nodes[3]'s
9610+
// monitors and ChannelManager, for use later, if we don't want to persist both monitors.
9611+
let mut original_monitor = test_utils::TestVecWriter(Vec::new());
9612+
if !persist_both_monitors {
9613+
for outpoint in nodes[3].chain_monitor.chain_monitor.list_monitors() {
9614+
if outpoint.to_channel_id() == chan_id_not_persisted {
9615+
assert!(original_monitor.0.is_empty());
9616+
nodes[3].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap().write(&mut original_monitor).unwrap();
9617+
}
9618+
}
9619+
}
9620+
9621+
let mut original_manager = test_utils::TestVecWriter(Vec::new());
9622+
nodes[3].node.write(&mut original_manager).unwrap();
9623+
9624+
expect_payment_received!(nodes[3], payment_hash, payment_secret, 15_000_000);
9625+
9626+
nodes[3].node.claim_funds(payment_preimage);
9627+
check_added_monitors!(nodes[3], 2);
9628+
9629+
// Now fetch one of the two updated ChannelMonitors from nodes[3], and restart pretending we
9630+
// crashed in between the two persistence calls - using one old ChannelMonitor and one new one,
9631+
// with the old ChannelManager.
9632+
let mut updated_monitor = test_utils::TestVecWriter(Vec::new());
9633+
for outpoint in nodes[3].chain_monitor.chain_monitor.list_monitors() {
9634+
if outpoint.to_channel_id() == chan_id_persisted {
9635+
assert!(updated_monitor.0.is_empty());
9636+
nodes[3].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap().write(&mut updated_monitor).unwrap();
9637+
}
9638+
}
9639+
// If `persist_both_monitors` is set, get the second monitor here as well
9640+
if persist_both_monitors {
9641+
for outpoint in nodes[3].chain_monitor.chain_monitor.list_monitors() {
9642+
if outpoint.to_channel_id() == chan_id_not_persisted {
9643+
assert!(original_monitor.0.is_empty());
9644+
nodes[3].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap().write(&mut original_monitor).unwrap();
9645+
}
9646+
}
9647+
}
9648+
9649+
// Now restart nodes[3].
9650+
persister = test_utils::TestPersister::new();
9651+
let keys_manager = &chanmon_cfgs[3].keys_manager;
9652+
new_chain_monitor = test_utils::TestChainMonitor::new(Some(nodes[3].chain_source), nodes[3].tx_broadcaster.clone(), nodes[3].logger, node_cfgs[3].fee_estimator, &persister, keys_manager);
9653+
nodes[3].chain_monitor = &new_chain_monitor;
9654+
let mut monitors = Vec::new();
9655+
for mut monitor_data in [original_monitor, updated_monitor] {
9656+
let (_, mut deserialized_monitor) = <(BlockHash, ChannelMonitor<EnforcingSigner>)>::read(&mut &monitor_data.0[..], keys_manager).unwrap();
9657+
monitors.push(deserialized_monitor);
9658+
}
9659+
9660+
let config = UserConfig::default();
9661+
nodes_3_deserialized = {
9662+
let mut channel_monitors = HashMap::new();
9663+
for monitor in monitors.iter_mut() {
9664+
channel_monitors.insert(monitor.get_funding_txo().0, monitor);
9665+
}
9666+
<(BlockHash, ChannelManager<EnforcingSigner, &test_utils::TestChainMonitor, &test_utils::TestBroadcaster, &test_utils::TestKeysInterface, &test_utils::TestFeeEstimator, &test_utils::TestLogger>)>::read(&mut &original_manager.0[..], ChannelManagerReadArgs {
9667+
default_config: config,
9668+
keys_manager,
9669+
fee_estimator: node_cfgs[3].fee_estimator,
9670+
chain_monitor: nodes[3].chain_monitor,
9671+
tx_broadcaster: nodes[3].tx_broadcaster.clone(),
9672+
logger: nodes[3].logger,
9673+
channel_monitors,
9674+
}).unwrap().1
9675+
};
9676+
nodes[3].node = &nodes_3_deserialized;
9677+
9678+
for monitor in monitors {
9679+
// On startup the preimage should have been copied into the non-persisted monitor:
9680+
assert!(monitor.get_stored_preimages().contains_key(&payment_hash));
9681+
nodes[3].chain_monitor.watch_channel(monitor.get_funding_txo().0.clone(), monitor).unwrap();
9682+
}
9683+
check_added_monitors!(nodes[3], 2);
9684+
9685+
nodes[1].node.peer_disconnected(&nodes[3].node.get_our_node_id(), false);
9686+
nodes[2].node.peer_disconnected(&nodes[3].node.get_our_node_id(), false);
9687+
9688+
// During deserialization, we should have closed one channel and broadcast its latest
9689+
// commitment transaction. We should also still have the original PaymentReceived event we
9690+
// never finished processing.
9691+
let events = nodes[3].node.get_and_clear_pending_events();
9692+
assert_eq!(events.len(), if persist_both_monitors { 3 } else { 2 });
9693+
if let Event::PaymentReceived { amt: 15_000_000, .. } = events[0] { } else { panic!(); }
9694+
if let Event::ChannelClosed { reason: ClosureReason::OutdatedChannelManager, .. } = events[1] { } else { panic!(); }
9695+
if persist_both_monitors {
9696+
if let Event::ChannelClosed { reason: ClosureReason::OutdatedChannelManager, .. } = events[2] { } else { panic!(); }
9697+
}
9698+
9699+
assert_eq!(nodes[3].node.list_channels().len(), if persist_both_monitors { 0 } else { 1 });
9700+
if !persist_both_monitors {
9701+
// If one of the two channels is still live, reveal the payment preimage over it.
9702+
9703+
nodes[3].node.peer_connected(&nodes[2].node.get_our_node_id(), &msgs::Init { features: InitFeatures::empty(), remote_network_address: None });
9704+
let reestablish_1 = get_chan_reestablish_msgs!(nodes[3], nodes[2]);
9705+
nodes[2].node.peer_connected(&nodes[3].node.get_our_node_id(), &msgs::Init { features: InitFeatures::empty(), remote_network_address: None });
9706+
let reestablish_2 = get_chan_reestablish_msgs!(nodes[2], nodes[3]);
9707+
9708+
nodes[2].node.handle_channel_reestablish(&nodes[3].node.get_our_node_id(), &reestablish_1[0]);
9709+
get_event_msg!(nodes[2], MessageSendEvent::SendChannelUpdate, nodes[3].node.get_our_node_id());
9710+
assert!(nodes[2].node.get_and_clear_pending_msg_events().is_empty());
9711+
9712+
nodes[3].node.handle_channel_reestablish(&nodes[2].node.get_our_node_id(), &reestablish_2[0]);
9713+
9714+
// Once we call `get_and_clear_pending_msg_events` the holding cell is cleared and the HTLC
9715+
// claim should fly.
9716+
let ds_msgs = nodes[3].node.get_and_clear_pending_msg_events();
9717+
check_added_monitors!(nodes[3], 1);
9718+
assert_eq!(ds_msgs.len(), 2);
9719+
if let MessageSendEvent::SendChannelUpdate { .. } = ds_msgs[1] {} else { panic!(); }
9720+
9721+
let cs_updates = match ds_msgs[0] {
9722+
MessageSendEvent::UpdateHTLCs { ref updates, .. } => {
9723+
nodes[2].node.handle_update_fulfill_htlc(&nodes[3].node.get_our_node_id(), &updates.update_fulfill_htlcs[0]);
9724+
check_added_monitors!(nodes[2], 1);
9725+
let cs_updates = get_htlc_update_msgs!(nodes[2], nodes[0].node.get_our_node_id());
9726+
expect_payment_forwarded!(nodes[2], Some(1000), false);
9727+
commitment_signed_dance!(nodes[2], nodes[3], updates.commitment_signed, false, true);
9728+
cs_updates
9729+
}
9730+
_ => panic!(),
9731+
};
9732+
9733+
nodes[0].node.handle_update_fulfill_htlc(&nodes[2].node.get_our_node_id(), &cs_updates.update_fulfill_htlcs[0]);
9734+
commitment_signed_dance!(nodes[0], nodes[2], cs_updates.commitment_signed, false, true);
9735+
expect_payment_sent!(nodes[0], payment_preimage);
9736+
}
9737+
}
9738+
9739+
#[test]
9740+
fn test_partial_claim_before_restart() {
9741+
do_test_partial_claim_before_restart(false);
9742+
do_test_partial_claim_before_restart(true);
9743+
}
9744+
95659745
/// The possible events which may trigger a `max_dust_htlc_exposure` breach
95669746
#[derive(Clone, Copy, PartialEq)]
95679747
enum ExposureEvent {

0 commit comments

Comments
 (0)