Skip to content

Commit 3c163f3

Browse files
Kalesh APkuba-moo
Kalesh AP
authored andcommitted
bnxt_en: Optimize recovery path ULP locking in the driver
In the error recovery path (AER, firmware recovery, etc), the driver notifies the RoCE driver via ULP_STOP before the reset and via ULP_START after the reset, all under RTNL_LOCK. The RoCE driver can take a long time if there are a lot of QPs to destroy, so it is not ideal to hold the global RTNL lock. Rely on the new en_dev_lock mutex instead for ULP_STOP and ULP_START. For the most part, we move the ULP_STOP call before we take the RTNL lock and move the ULP_START after RTNL unlock. Note that SRIOV re-enablement must be done after ULP_START or RoCE on the VFs will not resume properly after reset. The one scenario in bnxt_hwrm_if_change() where the RTNL lock is already taken in the .ndo_open() context requires the ULP restart to be deferred to the bnxt_sp_task() workqueue. Reviewed-by: Selvin Thyparampil Xavier <selvin.xavier@broadcom.com> Reviewed-by: Vikas Gupta <vikas.gupta@broadcom.com> Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com> Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com> Signed-off-by: Michael Chan <michael.chan@broadcom.com> Reviewed-by: Simon Horman <horms@kernel.org> Link: https://lore.kernel.org/r/20240501003056.100607-6-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
1 parent de21ec4 commit 3c163f3

File tree

3 files changed

+44
-26
lines changed

3 files changed

+44
-26
lines changed

drivers/net/ethernet/broadcom/bnxt/bnxt.c

Lines changed: 38 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11556,7 +11556,7 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
1155611556
if (fw_reset) {
1155711557
set_bit(BNXT_STATE_FW_RESET_DET, &bp->state);
1155811558
if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
11559-
bnxt_ulp_stop(bp);
11559+
bnxt_ulp_irq_stop(bp);
1156011560
bnxt_free_ctx_mem(bp);
1156111561
bnxt_dcb_free(bp);
1156211562
rc = bnxt_fw_init_one(bp);
@@ -12111,10 +12111,9 @@ static int bnxt_open(struct net_device *dev)
1211112111
bnxt_hwrm_if_change(bp, false);
1211212112
} else {
1211312113
if (test_and_clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) {
12114-
if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) {
12115-
bnxt_ulp_start(bp, 0);
12116-
bnxt_reenable_sriov(bp);
12117-
}
12114+
if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
12115+
bnxt_queue_sp_work(bp,
12116+
BNXT_RESTART_ULP_SP_EVENT);
1211812117
}
1211912118
}
1212012119

@@ -13270,7 +13269,6 @@ static void bnxt_fw_fatal_close(struct bnxt *bp)
1327013269

1327113270
static void bnxt_fw_reset_close(struct bnxt *bp)
1327213271
{
13273-
bnxt_ulp_stop(bp);
1327413272
/* When firmware is in fatal state, quiesce device and disable
1327513273
* bus master to prevent any potential bad DMAs before freeing
1327613274
* kernel memory.
@@ -13351,6 +13349,7 @@ void bnxt_fw_exception(struct bnxt *bp)
1335113349
{
1335213350
netdev_warn(bp->dev, "Detected firmware fatal condition, initiating reset\n");
1335313351
set_bit(BNXT_STATE_FW_FATAL_COND, &bp->state);
13352+
bnxt_ulp_stop(bp);
1335413353
bnxt_rtnl_lock_sp(bp);
1335513354
bnxt_force_fw_reset(bp);
1335613355
bnxt_rtnl_unlock_sp(bp);
@@ -13382,6 +13381,7 @@ static int bnxt_get_registered_vfs(struct bnxt *bp)
1338213381

1338313382
void bnxt_fw_reset(struct bnxt *bp)
1338413383
{
13384+
bnxt_ulp_stop(bp);
1338513385
bnxt_rtnl_lock_sp(bp);
1338613386
if (test_bit(BNXT_STATE_OPEN, &bp->state) &&
1338713387
!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) {
@@ -13506,6 +13506,12 @@ static void bnxt_fw_echo_reply(struct bnxt *bp)
1350613506
hwrm_req_send(bp, req);
1350713507
}
1350813508

13509+
static void bnxt_ulp_restart(struct bnxt *bp)
13510+
{
13511+
bnxt_ulp_stop(bp);
13512+
bnxt_ulp_start(bp, 0);
13513+
}
13514+
1350913515
static void bnxt_sp_task(struct work_struct *work)
1351013516
{
1351113517
struct bnxt *bp = container_of(work, struct bnxt, sp_task);
@@ -13517,6 +13523,11 @@ static void bnxt_sp_task(struct work_struct *work)
1351713523
return;
1351813524
}
1351913525

13526+
if (test_and_clear_bit(BNXT_RESTART_ULP_SP_EVENT, &bp->sp_event)) {
13527+
bnxt_ulp_restart(bp);
13528+
bnxt_reenable_sriov(bp);
13529+
}
13530+
1352013531
if (test_and_clear_bit(BNXT_RX_MASK_SP_EVENT, &bp->sp_event))
1352113532
bnxt_cfg_rx_mode(bp);
1352213533

@@ -13973,10 +13984,8 @@ static bool bnxt_fw_reset_timeout(struct bnxt *bp)
1397313984
static void bnxt_fw_reset_abort(struct bnxt *bp, int rc)
1397413985
{
1397513986
clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
13976-
if (bp->fw_reset_state != BNXT_FW_RESET_STATE_POLL_VF) {
13977-
bnxt_ulp_start(bp, rc);
13987+
if (bp->fw_reset_state != BNXT_FW_RESET_STATE_POLL_VF)
1397813988
bnxt_dl_health_fw_status_update(bp, false);
13979-
}
1398013989
bp->fw_reset_state = 0;
1398113990
dev_close(bp->dev);
1398213991
}
@@ -14007,7 +14016,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
1400714016
bp->fw_reset_state = 0;
1400814017
netdev_err(bp->dev, "Firmware reset aborted, bnxt_get_registered_vfs() returns %d\n",
1400914018
n);
14010-
return;
14019+
goto ulp_start;
1401114020
}
1401214021
bnxt_queue_fw_reset_work(bp, HZ / 10);
1401314022
return;
@@ -14017,7 +14026,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
1401714026
if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) {
1401814027
bnxt_fw_reset_abort(bp, rc);
1401914028
rtnl_unlock();
14020-
return;
14029+
goto ulp_start;
1402114030
}
1402214031
bnxt_fw_reset_close(bp);
1402314032
if (bp->fw_cap & BNXT_FW_CAP_ERR_RECOVER_RELOAD) {
@@ -14110,7 +14119,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
1411014119
netdev_err(bp->dev, "bnxt_open() failed during FW reset\n");
1411114120
bnxt_fw_reset_abort(bp, rc);
1411214121
rtnl_unlock();
14113-
return;
14122+
goto ulp_start;
1411414123
}
1411514124

1411614125
if ((bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY) &&
@@ -14122,17 +14131,19 @@ static void bnxt_fw_reset_task(struct work_struct *work)
1412214131
/* Make sure fw_reset_state is 0 before clearing the flag */
1412314132
smp_mb__before_atomic();
1412414133
clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
14125-
bnxt_ulp_start(bp, 0);
14126-
bnxt_reenable_sriov(bp);
14127-
bnxt_vf_reps_alloc(bp);
14128-
bnxt_vf_reps_open(bp);
1412914134
bnxt_ptp_reapply_pps(bp);
1413014135
clear_bit(BNXT_STATE_FW_ACTIVATE, &bp->state);
1413114136
if (test_and_clear_bit(BNXT_STATE_RECOVER, &bp->state)) {
1413214137
bnxt_dl_health_fw_recovery_done(bp);
1413314138
bnxt_dl_health_fw_status_update(bp, true);
1413414139
}
1413514140
rtnl_unlock();
14141+
bnxt_ulp_start(bp, 0);
14142+
bnxt_reenable_sriov(bp);
14143+
rtnl_lock();
14144+
bnxt_vf_reps_alloc(bp);
14145+
bnxt_vf_reps_open(bp);
14146+
rtnl_unlock();
1413614147
break;
1413714148
}
1413814149
return;
@@ -14148,6 +14159,8 @@ static void bnxt_fw_reset_task(struct work_struct *work)
1414814159
rtnl_lock();
1414914160
bnxt_fw_reset_abort(bp, rc);
1415014161
rtnl_unlock();
14162+
ulp_start:
14163+
bnxt_ulp_start(bp, rc);
1415114164
}
1415214165

1415314166
static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev)
@@ -15534,8 +15547,9 @@ static int bnxt_suspend(struct device *device)
1553415547
struct bnxt *bp = netdev_priv(dev);
1553515548
int rc = 0;
1553615549

15537-
rtnl_lock();
1553815550
bnxt_ulp_stop(bp);
15551+
15552+
rtnl_lock();
1553915553
if (netif_running(dev)) {
1554015554
netif_device_detach(dev);
1554115555
rc = bnxt_close(dev);
@@ -15590,10 +15604,10 @@ static int bnxt_resume(struct device *device)
1559015604
}
1559115605

1559215606
resume_exit:
15607+
rtnl_unlock();
1559315608
bnxt_ulp_start(bp, rc);
1559415609
if (!rc)
1559515610
bnxt_reenable_sriov(bp);
15596-
rtnl_unlock();
1559715611
return rc;
1559815612
}
1559915613

@@ -15623,11 +15637,11 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev,
1562315637

1562415638
netdev_info(netdev, "PCI I/O error detected\n");
1562515639

15640+
bnxt_ulp_stop(bp);
15641+
1562615642
rtnl_lock();
1562715643
netif_device_detach(netdev);
1562815644

15629-
bnxt_ulp_stop(bp);
15630-
1563115645
if (test_and_set_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) {
1563215646
netdev_err(bp->dev, "Firmware reset already in progress\n");
1563315647
abort = true;
@@ -15763,13 +15777,13 @@ static void bnxt_io_resume(struct pci_dev *pdev)
1576315777
if (!err && netif_running(netdev))
1576415778
err = bnxt_open(netdev);
1576515779

15766-
bnxt_ulp_start(bp, err);
15767-
if (!err) {
15768-
bnxt_reenable_sriov(bp);
15780+
if (!err)
1576915781
netif_device_attach(netdev);
15770-
}
1577115782

1577215783
rtnl_unlock();
15784+
bnxt_ulp_start(bp, err);
15785+
if (!err)
15786+
bnxt_reenable_sriov(bp);
1577315787
}
1577415788

1577515789
static const struct pci_error_handlers bnxt_err_handler = {

drivers/net/ethernet/broadcom/bnxt/bnxt.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2440,6 +2440,7 @@ struct bnxt {
24402440
#define BNXT_LINK_CFG_CHANGE_SP_EVENT 21
24412441
#define BNXT_THERMAL_THRESHOLD_SP_EVENT 22
24422442
#define BNXT_FW_ECHO_REQUEST_SP_EVENT 23
2443+
#define BNXT_RESTART_ULP_SP_EVENT 24
24432444

24442445
struct delayed_work fw_reset_task;
24452446
int fw_reset_state;

drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -437,18 +437,20 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change,
437437

438438
switch (action) {
439439
case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: {
440+
bnxt_ulp_stop(bp);
440441
rtnl_lock();
441442
if (bnxt_sriov_cfg(bp)) {
442443
NL_SET_ERR_MSG_MOD(extack,
443444
"reload is unsupported while VFs are allocated or being configured");
444445
rtnl_unlock();
446+
bnxt_ulp_start(bp, 0);
445447
return -EOPNOTSUPP;
446448
}
447449
if (bp->dev->reg_state == NETREG_UNREGISTERED) {
448450
rtnl_unlock();
451+
bnxt_ulp_start(bp, 0);
449452
return -ENODEV;
450453
}
451-
bnxt_ulp_stop(bp);
452454
if (netif_running(bp->dev))
453455
bnxt_close_nic(bp, true, true);
454456
bnxt_vf_reps_free(bp);
@@ -516,7 +518,6 @@ static int bnxt_dl_reload_up(struct devlink *dl, enum devlink_reload_action acti
516518
bnxt_vf_reps_alloc(bp);
517519
if (netif_running(bp->dev))
518520
rc = bnxt_open_nic(bp, true, true);
519-
bnxt_ulp_start(bp, rc);
520521
if (!rc) {
521522
bnxt_reenable_sriov(bp);
522523
bnxt_ptp_reapply_pps(bp);
@@ -570,6 +571,8 @@ static int bnxt_dl_reload_up(struct devlink *dl, enum devlink_reload_action acti
570571
dev_close(bp->dev);
571572
}
572573
rtnl_unlock();
574+
if (action == DEVLINK_RELOAD_ACTION_DRIVER_REINIT)
575+
bnxt_ulp_start(bp, rc);
573576
return rc;
574577
}
575578

0 commit comments

Comments
 (0)