Skip to content

Commit 519ca6f

Browse files
committed
Merge branch '40GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue
Tony Nguyen says: ==================== Intel Wired LAN Driver Updates 2022-02-25 This series contains updates to iavf driver only. Slawomir fixes stability issues that can be seen when stressing the driver using a large number of VFs with a multitude of operations. Among the fixes are reworking mutexes to provide more effective locking, ensuring initialization is complete before teardown, preventing operations which could race while removing the driver, stopping certain tasks from being queued when the device is down, and adding a missing mutex unlock. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 328e765 + 14756b2 commit 519ca6f

File tree

3 files changed

+114
-75
lines changed

3 files changed

+114
-75
lines changed

drivers/net/ethernet/intel/iavf/iavf.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,10 @@ enum iavf_state_t {
201201
__IAVF_RUNNING, /* opened, working */
202202
};
203203

204+
enum iavf_critical_section_t {
205+
__IAVF_IN_REMOVE_TASK, /* device being removed */
206+
};
207+
204208
#define IAVF_CLOUD_FIELD_OMAC 0x01
205209
#define IAVF_CLOUD_FIELD_IMAC 0x02
206210
#define IAVF_CLOUD_FIELD_IVLAN 0x04
@@ -246,7 +250,6 @@ struct iavf_adapter {
246250
struct list_head mac_filter_list;
247251
struct mutex crit_lock;
248252
struct mutex client_lock;
249-
struct mutex remove_lock;
250253
/* Lock to protect accesses to MAC and VLAN lists */
251254
spinlock_t mac_vlan_list_lock;
252255
char misc_vector_name[IFNAMSIZ + 9];
@@ -284,6 +287,7 @@ struct iavf_adapter {
284287
#define IAVF_FLAG_LEGACY_RX BIT(15)
285288
#define IAVF_FLAG_REINIT_ITR_NEEDED BIT(16)
286289
#define IAVF_FLAG_QUEUES_DISABLED BIT(17)
290+
#define IAVF_FLAG_SETUP_NETDEV_FEATURES BIT(18)
287291
/* duplicates for common code */
288292
#define IAVF_FLAG_DCB_ENABLED 0
289293
/* flags for admin queue service task */

drivers/net/ethernet/intel/iavf/iavf_main.c

+108-51
Original file line numberDiff line numberDiff line change
@@ -302,8 +302,9 @@ static irqreturn_t iavf_msix_aq(int irq, void *data)
302302
rd32(hw, IAVF_VFINT_ICR01);
303303
rd32(hw, IAVF_VFINT_ICR0_ENA1);
304304

305-
/* schedule work on the private workqueue */
306-
queue_work(iavf_wq, &adapter->adminq_task);
305+
if (adapter->state != __IAVF_REMOVE)
306+
/* schedule work on the private workqueue */
307+
queue_work(iavf_wq, &adapter->adminq_task);
307308

308309
return IRQ_HANDLED;
309310
}
@@ -1136,8 +1137,7 @@ void iavf_down(struct iavf_adapter *adapter)
11361137
rss->state = IAVF_ADV_RSS_DEL_REQUEST;
11371138
spin_unlock_bh(&adapter->adv_rss_lock);
11381139

1139-
if (!(adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) &&
1140-
adapter->state != __IAVF_RESETTING) {
1140+
if (!(adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)) {
11411141
/* cancel any current operation */
11421142
adapter->current_op = VIRTCHNL_OP_UNKNOWN;
11431143
/* Schedule operations to close down the HW. Don't wait
@@ -2374,17 +2374,22 @@ static void iavf_watchdog_task(struct work_struct *work)
23742374
struct iavf_hw *hw = &adapter->hw;
23752375
u32 reg_val;
23762376

2377-
if (!mutex_trylock(&adapter->crit_lock))
2377+
if (!mutex_trylock(&adapter->crit_lock)) {
2378+
if (adapter->state == __IAVF_REMOVE)
2379+
return;
2380+
23782381
goto restart_watchdog;
2382+
}
23792383

23802384
if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
23812385
iavf_change_state(adapter, __IAVF_COMM_FAILED);
23822386

2383-
if (adapter->flags & IAVF_FLAG_RESET_NEEDED &&
2384-
adapter->state != __IAVF_RESETTING) {
2385-
iavf_change_state(adapter, __IAVF_RESETTING);
2387+
if (adapter->flags & IAVF_FLAG_RESET_NEEDED) {
23862388
adapter->aq_required = 0;
23872389
adapter->current_op = VIRTCHNL_OP_UNKNOWN;
2390+
mutex_unlock(&adapter->crit_lock);
2391+
queue_work(iavf_wq, &adapter->reset_task);
2392+
return;
23882393
}
23892394

23902395
switch (adapter->state) {
@@ -2419,6 +2424,15 @@ static void iavf_watchdog_task(struct work_struct *work)
24192424
msecs_to_jiffies(1));
24202425
return;
24212426
case __IAVF_INIT_FAILED:
2427+
if (test_bit(__IAVF_IN_REMOVE_TASK,
2428+
&adapter->crit_section)) {
2429+
/* Do not update the state and do not reschedule
2430+
* watchdog task, iavf_remove should handle this state
2431+
* as it can loop forever
2432+
*/
2433+
mutex_unlock(&adapter->crit_lock);
2434+
return;
2435+
}
24222436
if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) {
24232437
dev_err(&adapter->pdev->dev,
24242438
"Failed to communicate with PF; waiting before retry\n");
@@ -2435,6 +2449,17 @@ static void iavf_watchdog_task(struct work_struct *work)
24352449
queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ);
24362450
return;
24372451
case __IAVF_COMM_FAILED:
2452+
if (test_bit(__IAVF_IN_REMOVE_TASK,
2453+
&adapter->crit_section)) {
2454+
/* Set state to __IAVF_INIT_FAILED and perform remove
2455+
* steps. Remove IAVF_FLAG_PF_COMMS_FAILED so the task
2456+
* doesn't bring the state back to __IAVF_COMM_FAILED.
2457+
*/
2458+
iavf_change_state(adapter, __IAVF_INIT_FAILED);
2459+
adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED;
2460+
mutex_unlock(&adapter->crit_lock);
2461+
return;
2462+
}
24382463
reg_val = rd32(hw, IAVF_VFGEN_RSTAT) &
24392464
IAVF_VFGEN_RSTAT_VFR_STATE_MASK;
24402465
if (reg_val == VIRTCHNL_VFR_VFACTIVE ||
@@ -2507,7 +2532,8 @@ static void iavf_watchdog_task(struct work_struct *work)
25072532
schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5));
25082533
mutex_unlock(&adapter->crit_lock);
25092534
restart_watchdog:
2510-
queue_work(iavf_wq, &adapter->adminq_task);
2535+
if (adapter->state >= __IAVF_DOWN)
2536+
queue_work(iavf_wq, &adapter->adminq_task);
25112537
if (adapter->aq_required)
25122538
queue_delayed_work(iavf_wq, &adapter->watchdog_task,
25132539
msecs_to_jiffies(20));
@@ -2601,13 +2627,13 @@ static void iavf_reset_task(struct work_struct *work)
26012627
/* When device is being removed it doesn't make sense to run the reset
26022628
* task, just return in such a case.
26032629
*/
2604-
if (mutex_is_locked(&adapter->remove_lock))
2605-
return;
2630+
if (!mutex_trylock(&adapter->crit_lock)) {
2631+
if (adapter->state != __IAVF_REMOVE)
2632+
queue_work(iavf_wq, &adapter->reset_task);
26062633

2607-
if (iavf_lock_timeout(&adapter->crit_lock, 200)) {
2608-
schedule_work(&adapter->reset_task);
26092634
return;
26102635
}
2636+
26112637
while (!mutex_trylock(&adapter->client_lock))
26122638
usleep_range(500, 1000);
26132639
if (CLIENT_ENABLED(adapter)) {
@@ -2662,6 +2688,7 @@ static void iavf_reset_task(struct work_struct *work)
26622688
reg_val);
26632689
iavf_disable_vf(adapter);
26642690
mutex_unlock(&adapter->client_lock);
2691+
mutex_unlock(&adapter->crit_lock);
26652692
return; /* Do not attempt to reinit. It's dead, Jim. */
26662693
}
26672694

@@ -2670,8 +2697,7 @@ static void iavf_reset_task(struct work_struct *work)
26702697
* ndo_open() returning, so we can't assume it means all our open
26712698
* tasks have finished, since we're not holding the rtnl_lock here.
26722699
*/
2673-
running = ((adapter->state == __IAVF_RUNNING) ||
2674-
(adapter->state == __IAVF_RESETTING));
2700+
running = adapter->state == __IAVF_RUNNING;
26752701

26762702
if (running) {
26772703
netdev->flags &= ~IFF_UP;
@@ -2826,13 +2852,19 @@ static void iavf_adminq_task(struct work_struct *work)
28262852
if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
28272853
goto out;
28282854

2855+
if (!mutex_trylock(&adapter->crit_lock)) {
2856+
if (adapter->state == __IAVF_REMOVE)
2857+
return;
2858+
2859+
queue_work(iavf_wq, &adapter->adminq_task);
2860+
goto out;
2861+
}
2862+
28292863
event.buf_len = IAVF_MAX_AQ_BUF_SIZE;
28302864
event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL);
28312865
if (!event.msg_buf)
28322866
goto out;
28332867

2834-
if (iavf_lock_timeout(&adapter->crit_lock, 200))
2835-
goto freedom;
28362868
do {
28372869
ret = iavf_clean_arq_element(hw, &event, &pending);
28382870
v_op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high);
@@ -2848,6 +2880,24 @@ static void iavf_adminq_task(struct work_struct *work)
28482880
} while (pending);
28492881
mutex_unlock(&adapter->crit_lock);
28502882

2883+
if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES)) {
2884+
if (adapter->netdev_registered ||
2885+
!test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) {
2886+
struct net_device *netdev = adapter->netdev;
2887+
2888+
rtnl_lock();
2889+
netdev_update_features(netdev);
2890+
rtnl_unlock();
2891+
/* Request VLAN offload settings */
2892+
if (VLAN_V2_ALLOWED(adapter))
2893+
iavf_set_vlan_offload_features
2894+
(adapter, 0, netdev->features);
2895+
2896+
iavf_set_queue_vlan_tag_loc(adapter);
2897+
}
2898+
2899+
adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES;
2900+
}
28512901
if ((adapter->flags &
28522902
(IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) ||
28532903
adapter->state == __IAVF_RESETTING)
@@ -3800,11 +3850,12 @@ static int iavf_close(struct net_device *netdev)
38003850
struct iavf_adapter *adapter = netdev_priv(netdev);
38013851
int status;
38023852

3803-
if (adapter->state <= __IAVF_DOWN_PENDING)
3804-
return 0;
3853+
mutex_lock(&adapter->crit_lock);
38053854

3806-
while (!mutex_trylock(&adapter->crit_lock))
3807-
usleep_range(500, 1000);
3855+
if (adapter->state <= __IAVF_DOWN_PENDING) {
3856+
mutex_unlock(&adapter->crit_lock);
3857+
return 0;
3858+
}
38083859

38093860
set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
38103861
if (CLIENT_ENABLED(adapter))
@@ -3853,8 +3904,11 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu)
38533904
iavf_notify_client_l2_params(&adapter->vsi);
38543905
adapter->flags |= IAVF_FLAG_SERVICE_CLIENT_REQUESTED;
38553906
}
3856-
adapter->flags |= IAVF_FLAG_RESET_NEEDED;
3857-
queue_work(iavf_wq, &adapter->reset_task);
3907+
3908+
if (netif_running(netdev)) {
3909+
adapter->flags |= IAVF_FLAG_RESET_NEEDED;
3910+
queue_work(iavf_wq, &adapter->reset_task);
3911+
}
38583912

38593913
return 0;
38603914
}
@@ -4431,7 +4485,6 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
44314485
*/
44324486
mutex_init(&adapter->crit_lock);
44334487
mutex_init(&adapter->client_lock);
4434-
mutex_init(&adapter->remove_lock);
44354488
mutex_init(&hw->aq.asq_mutex);
44364489
mutex_init(&hw->aq.arq_mutex);
44374490

@@ -4547,7 +4600,6 @@ static int __maybe_unused iavf_resume(struct device *dev_d)
45474600
static void iavf_remove(struct pci_dev *pdev)
45484601
{
45494602
struct iavf_adapter *adapter = iavf_pdev_to_adapter(pdev);
4550-
enum iavf_state_t prev_state = adapter->last_state;
45514603
struct net_device *netdev = adapter->netdev;
45524604
struct iavf_fdir_fltr *fdir, *fdirtmp;
45534605
struct iavf_vlan_filter *vlf, *vlftmp;
@@ -4556,14 +4608,30 @@ static void iavf_remove(struct pci_dev *pdev)
45564608
struct iavf_cloud_filter *cf, *cftmp;
45574609
struct iavf_hw *hw = &adapter->hw;
45584610
int err;
4559-
/* Indicate we are in remove and not to run reset_task */
4560-
mutex_lock(&adapter->remove_lock);
4561-
cancel_work_sync(&adapter->reset_task);
4611+
4612+
set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section);
4613+
/* Wait until port initialization is complete.
4614+
* There are flows where register/unregister netdev may race.
4615+
*/
4616+
while (1) {
4617+
mutex_lock(&adapter->crit_lock);
4618+
if (adapter->state == __IAVF_RUNNING ||
4619+
adapter->state == __IAVF_DOWN ||
4620+
adapter->state == __IAVF_INIT_FAILED) {
4621+
mutex_unlock(&adapter->crit_lock);
4622+
break;
4623+
}
4624+
4625+
mutex_unlock(&adapter->crit_lock);
4626+
usleep_range(500, 1000);
4627+
}
45624628
cancel_delayed_work_sync(&adapter->watchdog_task);
4563-
cancel_delayed_work_sync(&adapter->client_task);
4629+
45644630
if (adapter->netdev_registered) {
4565-
unregister_netdev(netdev);
4631+
rtnl_lock();
4632+
unregister_netdevice(netdev);
45664633
adapter->netdev_registered = false;
4634+
rtnl_unlock();
45674635
}
45684636
if (CLIENT_ALLOWED(adapter)) {
45694637
err = iavf_lan_del_device(adapter);
@@ -4572,44 +4640,35 @@ static void iavf_remove(struct pci_dev *pdev)
45724640
err);
45734641
}
45744642

4643+
mutex_lock(&adapter->crit_lock);
4644+
dev_info(&adapter->pdev->dev, "Remove device\n");
4645+
iavf_change_state(adapter, __IAVF_REMOVE);
4646+
45754647
iavf_request_reset(adapter);
45764648
msleep(50);
45774649
/* If the FW isn't responding, kick it once, but only once. */
45784650
if (!iavf_asq_done(hw)) {
45794651
iavf_request_reset(adapter);
45804652
msleep(50);
45814653
}
4582-
if (iavf_lock_timeout(&adapter->crit_lock, 5000))
4583-
dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n", __FUNCTION__);
45844654

4585-
dev_info(&adapter->pdev->dev, "Removing device\n");
4655+
iavf_misc_irq_disable(adapter);
45864656
/* Shut down all the garbage mashers on the detention level */
4587-
iavf_change_state(adapter, __IAVF_REMOVE);
4657+
cancel_work_sync(&adapter->reset_task);
4658+
cancel_delayed_work_sync(&adapter->watchdog_task);
4659+
cancel_work_sync(&adapter->adminq_task);
4660+
cancel_delayed_work_sync(&adapter->client_task);
4661+
45884662
adapter->aq_required = 0;
45894663
adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
45904664

45914665
iavf_free_all_tx_resources(adapter);
45924666
iavf_free_all_rx_resources(adapter);
4593-
iavf_misc_irq_disable(adapter);
45944667
iavf_free_misc_irq(adapter);
45954668

4596-
/* In case we enter iavf_remove from erroneous state, free traffic irqs
4597-
* here, so as to not cause a kernel crash, when calling
4598-
* iavf_reset_interrupt_capability.
4599-
*/
4600-
if ((adapter->last_state == __IAVF_RESETTING &&
4601-
prev_state != __IAVF_DOWN) ||
4602-
(adapter->last_state == __IAVF_RUNNING &&
4603-
!(netdev->flags & IFF_UP)))
4604-
iavf_free_traffic_irqs(adapter);
4605-
46064669
iavf_reset_interrupt_capability(adapter);
46074670
iavf_free_q_vectors(adapter);
46084671

4609-
cancel_delayed_work_sync(&adapter->watchdog_task);
4610-
4611-
cancel_work_sync(&adapter->adminq_task);
4612-
46134672
iavf_free_rss(adapter);
46144673

46154674
if (hw->aq.asq.count)
@@ -4621,8 +4680,6 @@ static void iavf_remove(struct pci_dev *pdev)
46214680
mutex_destroy(&adapter->client_lock);
46224681
mutex_unlock(&adapter->crit_lock);
46234682
mutex_destroy(&adapter->crit_lock);
4624-
mutex_unlock(&adapter->remove_lock);
4625-
mutex_destroy(&adapter->remove_lock);
46264683

46274684
iounmap(hw->hw_addr);
46284685
pci_release_regions(pdev);

drivers/net/ethernet/intel/iavf/iavf_virtchnl.c

+1-23
Original file line numberDiff line numberDiff line change
@@ -2146,29 +2146,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
21462146
sizeof(adapter->vlan_v2_caps)));
21472147

21482148
iavf_process_config(adapter);
2149-
2150-
/* unlock crit_lock before acquiring rtnl_lock as other
2151-
* processes holding rtnl_lock could be waiting for the same
2152-
* crit_lock
2153-
*/
2154-
mutex_unlock(&adapter->crit_lock);
2155-
/* VLAN capabilities can change during VFR, so make sure to
2156-
* update the netdev features with the new capabilities
2157-
*/
2158-
rtnl_lock();
2159-
netdev_update_features(netdev);
2160-
rtnl_unlock();
2161-
if (iavf_lock_timeout(&adapter->crit_lock, 10000))
2162-
dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n",
2163-
__FUNCTION__);
2164-
2165-
/* Request VLAN offload settings */
2166-
if (VLAN_V2_ALLOWED(adapter))
2167-
iavf_set_vlan_offload_features(adapter, 0,
2168-
netdev->features);
2169-
2170-
iavf_set_queue_vlan_tag_loc(adapter);
2171-
2149+
adapter->flags |= IAVF_FLAG_SETUP_NETDEV_FEATURES;
21722150
}
21732151
break;
21742152
case VIRTCHNL_OP_ENABLE_QUEUES:

0 commit comments

Comments
 (0)