diff --git a/configure.ac b/configure.ac index 4d66c55d0a96..77a90b88dcd1 100644 --- a/configure.ac +++ b/configure.ac @@ -1384,6 +1384,7 @@ ]) # DPDK support + enable_dpdk_bond_pmd="no" AC_ARG_ENABLE(dpdk, AS_HELP_STRING([--enable-dpdk], [Enable DPDK support [default=no]]), [enable_dpdk=$enableval],[enable_dpdk=no]) @@ -1415,6 +1416,23 @@ fi CFLAGS="${CFLAGS} `pkg-config --cflags libdpdk`" LIBS="${LIBS} -Wl,-R,`pkg-config --libs-only-L libdpdk | cut -c 3-` -lnuma `pkg-config --libs libdpdk`" + + if test ! -z "$(ldconfig -p | grep librte_net_bond)"; then + AC_DEFINE([HAVE_DPDK_BOND],[1],(DPDK Bond PMD support enabled)) + enable_dpdk_bond_pmd="yes" + LIBS="${LIBS} -lrte_net_bond" # 20.11+ + elif test ! -z "$(ldconfig -p | grep librte_pmd_bond)"; then + AC_DEFINE([HAVE_DPDK_BOND],[1],(DPDK Bond PMD support enabled)) + enable_dpdk_bond_pmd="yes" + LIBS="${LIBS} -lrte_pmd_bond" + else + echo + echo " WARNING: DPDK Bond PMD was not found on your system, " + echo " you will be unable to use DPDK Bond PMD." + echo " You can try to \"sudo ldconfig\" and reconfigure again" + echo " or compile and install DPDK with Bond support enabled." + echo + fi ]) # Netmap support @@ -2629,6 +2647,7 @@ SURICATA_BUILD_CONF="Suricata Configuration: Profiling rules enabled: ${enable_profiling_rules} Plugin support (experimental): ${plugin_support} + DPDK Bond PMD: ${enable_dpdk_bond_pmd} Development settings: Coccinelle / spatch: ${enable_coccinelle} diff --git a/doc/userguide/capture-hardware/dpdk.rst b/doc/userguide/capture-hardware/dpdk.rst new file mode 100644 index 000000000000..91ae1c876ca9 --- /dev/null +++ b/doc/userguide/capture-hardware/dpdk.rst @@ -0,0 +1,97 @@ +.. _dpdk: + +DPDK +==== + +Introduction +------------- + +The Data Plane Development Kit (DPDK) is a set of libraries and drivers that +enhance and speed up packet processing in the data plane. Its primary use is to +provide faster packet processing by bypassing the kernel network stack, which +can provide significant performance improvements. For detailed instructions on +how to setup DPDK, please refer to :doc:`../configuration/suricata-yaml` to +learn more about the basic setup for DPDK. +The following sections contain examples of how to set up DPDK and Suricata for +more obscure use-cases. + +Bond interface +-------------- + +Link Bonding Poll Mode Driver (Bond PMD), is a software +mechanism provided by the Data Plane Development Kit (DPDK) for aggregating +multiple physical network interfaces into a single logical interface. +Bonding can be e.g. used to: + +* deliver bidirectional flows of tapped interfaces to the same worker, +* establish redundancy by monitoring multiple links, +* improve network performance by load-balancing traffic across multiple links. + +Bond PMD is essentially a virtual driver that manipulates with multiple +physical network interfaces. It can operate in multiple modes as described +in the `DPDK docs +`_ +The individual bonding modes can accustom user needs. +DPDK Bond PMD has a requirement that the aggregated interfaces must be +the same device types - e.g. both physical ports run on mlx5 PMD. +Bond PMD supports multiple queues and therefore can work in workers runmode. +It should have no effect on traffic distribution of the individual ports and +flows should be distributed by physical ports according to the RSS +configuration the same way as if they would be configured independently. + +As an example of Bond PMD, we can setup Suricata to monitor 2 interfaces +that receive TAP traffic from optical interfaces. This means that Suricata +receive one direction of the communication on one interface and the other +direction is received on the other interface. + +:: + + ... + dpdk: + eal-params: + proc-type: primary + vdev: 'net_bonding0,mode=0,slave=0000:04:00.0,slave=0000:04:00.1' + + # DPDK capture support + # RX queues (and TX queues in IPS mode) are assigned to cores in 1:1 ratio + interfaces: + - interface: net_bonding0 # PCIe address of the NIC port + # Threading: possible values are either "auto" or number of threads + # - auto takes all cores + # in IPS mode it is required to specify the number of cores and the + # numbers on both interfaces must match + threads: 4 + ... + +In the DPDK part of suricata.yaml we have added a new parameter to the +eal-params section for virtual devices - `vdev`. +DPDK Environment Abstraction Layer (EAL) can initialize some virtual devices +during the initialization of EAL. +In this case, EAL creates a new device of type `net_bonding`. Suffix of +`net_bonding` signifies the name of the interface (in this case the zero). +Extra arguments are passed after the device name, such as the bonding mode +(`mode=0`). This is the round-robin mode as is described in the DPDK +documentation of Bond PMD. +Members (slaves) of the `net_bonding0` interface are appended after +the bonding mode parameter. + +When the device is specified within EAL parameters, it can be used within +Suricata `interfaces` list. Note that the list doesn't contain PCIe addresses +of the physical ports but instead the `net_bonding0` interface. +Threading section is also adjusted according to the items in the interfaces +list by enablign set-cpu-affinity and listing CPUs that should be used in +management and worker CPU set. + +:: + + ... + threading: + set-cpu-affinity: yes + cpu-affinity: + - management-cpu-set: + cpu: [ 0 ] # include only these CPUs in affinity settings + - receive-cpu-set: + cpu: [ 0 ] # include only these CPUs in affinity settings + - worker-cpu-set: + cpu: [ 2,4,6,8 ] + ... diff --git a/doc/userguide/capture-hardware/index.rst b/doc/userguide/capture-hardware/index.rst index f121508cb49d..992bd07f614b 100644 --- a/doc/userguide/capture-hardware/index.rst +++ b/doc/userguide/capture-hardware/index.rst @@ -9,3 +9,4 @@ Using Capture Hardware ebpf-xdp netmap af-xdp + dpdk diff --git a/src/Makefile.am b/src/Makefile.am index 737b6a6a71fa..4ca8dc7bdbc3 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -532,6 +532,7 @@ noinst_HEADERS = \ util-dpdk-i40e.h \ util-dpdk-ice.h \ util-dpdk-ixgbe.h \ + util-dpdk-bonding.h \ util-ebpf.h \ util-enum.h \ util-error.h \ @@ -1127,6 +1128,7 @@ libsuricata_c_a_SOURCES = \ util-dpdk-i40e.c \ util-dpdk-ice.c \ util-dpdk-ixgbe.c \ + util-dpdk-bonding.c \ util-ebpf.c \ util-enum.c \ util-error.c \ diff --git a/src/runmode-dpdk.c b/src/runmode-dpdk.c index 72e3ed2a51e8..82e6aee57f27 100644 --- a/src/runmode-dpdk.c +++ b/src/runmode-dpdk.c @@ -44,6 +44,7 @@ #include "util-dpdk-i40e.h" #include "util-dpdk-ice.h" #include "util-dpdk-ixgbe.h" +#include "util-dpdk-bonding.h" #include "util-time.h" #include "util-conf.h" #include "suricata.h" @@ -765,7 +766,7 @@ static void DeviceSetPMDSpecificRSS(struct rte_eth_rss_conf *rss_conf, const cha { // RSS is configured in a specific way for a driver i40e and DPDK version <= 19.xx if (strcmp(driver_name, "net_i40e") == 0) - i40eDeviceSetRSSHashFunction(&rss_conf->rss_hf); + i40eDeviceSetRSSConf(rss_conf); if (strcmp(driver_name, "net_ice") == 0) iceDeviceSetRSSHashFunction(&rss_conf->rss_hf); if (strcmp(driver_name, "net_ixgbe") == 0) @@ -921,6 +922,52 @@ static void DumpRSSFlags(const uint64_t requested, const uint64_t actual) SCLogConfig("RTE_ETH_RSS_L4_DST_ONLY %sset", (actual & RTE_ETH_RSS_L4_DST_ONLY) ? "" : "NOT "); } +static void DumpRXOffloadCapabilities(const uint64_t rx_offld_capa) +{ + SCLogConfig("RTE_ETH_RX_OFFLOAD_VLAN_STRIP - %savailable", + rx_offld_capa & RTE_ETH_RX_OFFLOAD_VLAN_STRIP ? "" : "NOT "); + SCLogConfig("RTE_ETH_RX_OFFLOAD_IPV4_CKSUM - %savailable", + rx_offld_capa & RTE_ETH_RX_OFFLOAD_IPV4_CKSUM ? "" : "NOT "); + SCLogConfig("RTE_ETH_RX_OFFLOAD_UDP_CKSUM - %savailable", + rx_offld_capa & RTE_ETH_RX_OFFLOAD_UDP_CKSUM ? "" : "NOT "); + SCLogConfig("RTE_ETH_RX_OFFLOAD_TCP_CKSUM - %savailable", + rx_offld_capa & RTE_ETH_RX_OFFLOAD_TCP_CKSUM ? "" : "NOT "); + SCLogConfig("RTE_ETH_RX_OFFLOAD_TCP_LRO - %savailable", + rx_offld_capa & RTE_ETH_RX_OFFLOAD_TCP_LRO ? "" : "NOT "); + SCLogConfig("RTE_ETH_RX_OFFLOAD_QINQ_STRIP - %savailable", + rx_offld_capa & RTE_ETH_RX_OFFLOAD_QINQ_STRIP ? "" : "NOT "); + SCLogConfig("RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM - %savailable", + rx_offld_capa & RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM ? "" : "NOT "); + SCLogConfig("RTE_ETH_RX_OFFLOAD_MACSEC_STRIP - %savailable", + rx_offld_capa & RTE_ETH_RX_OFFLOAD_MACSEC_STRIP ? "" : "NOT "); +#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0) + SCLogConfig("RTE_ETH_RX_OFFLOAD_HEADER_SPLIT - %savailable", + rx_offld_capa & RTE_ETH_RX_OFFLOAD_HEADER_SPLIT ? "" : "NOT "); +#endif + SCLogConfig("RTE_ETH_RX_OFFLOAD_VLAN_FILTER - %savailable", + rx_offld_capa & RTE_ETH_RX_OFFLOAD_VLAN_FILTER ? "" : "NOT "); + SCLogConfig("RTE_ETH_RX_OFFLOAD_VLAN_EXTEND - %savailable", + rx_offld_capa & RTE_ETH_RX_OFFLOAD_VLAN_EXTEND ? "" : "NOT "); + SCLogConfig("RTE_ETH_RX_OFFLOAD_SCATTER - %savailable", + rx_offld_capa & RTE_ETH_RX_OFFLOAD_SCATTER ? "" : "NOT "); + SCLogConfig("RTE_ETH_RX_OFFLOAD_TIMESTAMP - %savailable", + rx_offld_capa & RTE_ETH_RX_OFFLOAD_TIMESTAMP ? "" : "NOT "); + SCLogConfig("RTE_ETH_RX_OFFLOAD_SECURITY - %savailable", + rx_offld_capa & RTE_ETH_RX_OFFLOAD_SECURITY ? "" : "NOT "); + SCLogConfig("RTE_ETH_RX_OFFLOAD_KEEP_CRC - %savailable", + rx_offld_capa & RTE_ETH_RX_OFFLOAD_KEEP_CRC ? "" : "NOT "); + SCLogConfig("RTE_ETH_RX_OFFLOAD_SCTP_CKSUM - %savailable", + rx_offld_capa & RTE_ETH_RX_OFFLOAD_SCTP_CKSUM ? "" : "NOT "); + SCLogConfig("RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM - %savailable", + rx_offld_capa & RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM ? "" : "NOT "); + SCLogConfig("RTE_ETH_RX_OFFLOAD_RSS_HASH - %savailable", + rx_offld_capa & RTE_ETH_RX_OFFLOAD_RSS_HASH ? "" : "NOT "); +#if RTE_VERSION >= RTE_VERSION_NUM(20, 11, 0, 0) + SCLogConfig("RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT - %savailable", + rx_offld_capa & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT ? "" : "NOT "); +#endif +} + static int DeviceValidateMTU(const DPDKIfaceConfig *iconf, const struct rte_eth_dev_info *dev_info) { if (iconf->mtu > dev_info->max_mtu || iconf->mtu < dev_info->min_mtu) { @@ -975,6 +1022,7 @@ static int32_t DeviceSetSocketID(uint16_t port_id, int32_t *socket_id) static void DeviceInitPortConf(const DPDKIfaceConfig *iconf, const struct rte_eth_dev_info *dev_info, struct rte_eth_conf *port_conf) { + DumpRXOffloadCapabilities(dev_info->rx_offload_capa); *port_conf = (struct rte_eth_conf){ .rxmode = { .mq_mode = RTE_ETH_MQ_RX_NONE, @@ -996,7 +1044,12 @@ static void DeviceInitPortConf(const DPDKIfaceConfig *iconf, .rss_hf = iconf->rss_hf, }; - DeviceSetPMDSpecificRSS(&port_conf->rx_adv_conf.rss_conf, dev_info->driver_name); + const char *dev_driver = dev_info->driver_name; + if (strcmp(dev_info->driver_name, "net_bonding") == 0) { + dev_driver = BondingDeviceDriverGet(iconf->port_id); + } + + DeviceSetPMDSpecificRSS(&port_conf->rx_adv_conf.rss_conf, dev_driver); uint64_t rss_hf_tmp = port_conf->rx_adv_conf.rss_conf.rss_hf & dev_info->flow_type_rss_offloads; @@ -1197,17 +1250,51 @@ static int DeviceConfigureIPS(DPDKIfaceConfig *iconf) SCReturnInt(0); } +/** + * Function verifies changes in e.g. device info after configuration has + * happened. Sometimes (e.g. DPDK Bond PMD with Intel NICs i40e/ixgbe) change + * device info only after the device configuration. + * @param iconf + * @param dev_info + * @return 0 on success, -EAGAIN when reconfiguration is needed, <0 on failure + */ +static int32_t DeviceVerifyPostConfigure( + const DPDKIfaceConfig *iconf, const struct rte_eth_dev_info *dev_info) +{ + struct rte_eth_dev_info post_conf_dev_info = { 0 }; + int32_t ret = rte_eth_dev_info_get(iconf->port_id, &post_conf_dev_info); + if (ret < 0) { + SCLogError("%s: getting device info failed (err: %s)", iconf->iface, rte_strerror(-ret)); + SCReturnInt(ret); + } + + if (dev_info->flow_type_rss_offloads != post_conf_dev_info.flow_type_rss_offloads || + dev_info->rx_offload_capa != post_conf_dev_info.rx_offload_capa || + dev_info->tx_offload_capa != post_conf_dev_info.tx_offload_capa || + dev_info->max_rx_queues != post_conf_dev_info.max_rx_queues || + dev_info->max_tx_queues != post_conf_dev_info.max_tx_queues || + dev_info->max_mtu != post_conf_dev_info.max_mtu) { + SCLogWarning("Device information severely changed after configuration, reconfiguring"); + return -EAGAIN; + } + + if (strcmp(dev_info->driver_name, "net_bonding") == 0) { + ret = BondingAllDevicesSameDriver(iconf->port_id); + if (ret < 0) { + SCLogError("%s: bond port uses port with different DPDK drivers", iconf->iface); + SCReturnInt(ret); + } + } + + return 0; +} + static int DeviceConfigure(DPDKIfaceConfig *iconf) { SCEnter(); - // configure device - int retval; - struct rte_eth_dev_info dev_info; - struct rte_eth_conf port_conf; - - retval = rte_eth_dev_get_port_by_name(iconf->iface, &(iconf->port_id)); + int32_t retval = rte_eth_dev_get_port_by_name(iconf->iface, &(iconf->port_id)); if (retval < 0) { - SCLogError("%s: getting port id failed (err=%d). Is device enabled?", iconf->iface, retval); + SCLogError("%s: getting port id failed (err: %s)", iconf->iface, rte_strerror(-retval)); SCReturnInt(retval); } @@ -1218,13 +1305,14 @@ static int DeviceConfigure(DPDKIfaceConfig *iconf) retval = DeviceSetSocketID(iconf->port_id, &iconf->socket_id); if (retval < 0) { - SCLogError("%s: invalid socket id (err=%d)", iconf->iface, retval); + SCLogError("%s: invalid socket id (err: %s)", iconf->iface, rte_strerror(-retval)); SCReturnInt(retval); } + struct rte_eth_dev_info dev_info = { 0 }; retval = rte_eth_dev_info_get(iconf->port_id, &dev_info); - if (retval != 0) { - SCLogError("%s: getting device info failed (err=%d)", iconf->iface, retval); + if (retval < 0) { + SCLogError("%s: getting device info failed (err: %s)", iconf->iface, rte_strerror(-retval)); SCReturnInt(retval); } @@ -1241,9 +1329,10 @@ static int DeviceConfigure(DPDKIfaceConfig *iconf) } retval = DeviceValidateMTU(iconf, &dev_info); - if (retval != 0) + if (retval < 0) return retval; + struct rte_eth_conf port_conf = { 0 }; DeviceInitPortConf(iconf, &dev_info, &port_conf); if (port_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_CHECKSUM) { // Suricata does not need recalc checksums now @@ -1252,12 +1341,16 @@ static int DeviceConfigure(DPDKIfaceConfig *iconf) retval = rte_eth_dev_configure( iconf->port_id, iconf->nb_rx_queues, iconf->nb_tx_queues, &port_conf); - if (retval != 0) { - SCLogError("%s: failed to configure the device (port %u, err %d)", iconf->iface, - iconf->port_id, retval); + if (retval < 0) { + SCLogError("%s: failed to configure the device (port %u, err %s)", iconf->iface, + iconf->port_id, rte_strerror(-retval)); SCReturnInt(retval); } + retval = DeviceVerifyPostConfigure(iconf, &dev_info); + if (retval < 0) + return retval; + retval = rte_eth_dev_adjust_nb_rx_tx_desc( iconf->port_id, &iconf->nb_rx_desc, &iconf->nb_tx_desc); if (retval != 0) { @@ -1348,7 +1441,13 @@ static void *ParseDpdkConfigAndConfigureDevice(const char *iface) FatalError("DPDK configuration could not be parsed"); } - if (DeviceConfigure(iconf) != 0) { + retval = DeviceConfigure(iconf); + if (retval == -EAGAIN) { + // for e.g. bonding PMD it needs to be reconfigured + retval = DeviceConfigure(iconf); + } + + if (retval < 0) { // handles both configure attempts iconf->DerefFunc(iconf); retval = rte_eal_cleanup(); if (retval != 0) diff --git a/src/source-dpdk.c b/src/source-dpdk.c index 14dd9f807125..cc9dcd6f6e62 100644 --- a/src/source-dpdk.c +++ b/src/source-dpdk.c @@ -87,6 +87,7 @@ TmEcode NoDPDKSupportExit(ThreadVars *tv, const void *initdata, void **data) #include "util-dpdk.h" #include "util-dpdk-i40e.h" +#include "util-dpdk-bonding.h" #include #define BURST_SIZE 32 @@ -194,6 +195,10 @@ static uint64_t DPDKGetSeconds(void) static void DevicePostStartPMDSpecificActions(DPDKThreadVars *ptv, const char *driver_name) { + if (strcmp(driver_name, "net_bonding") == 0) { + driver_name = BondingDeviceDriverGet(ptv->port_id); + } + // The PMD Driver i40e has a special way to set the RSS, it can be set via rte_flow rules // and only after the start of the port if (strcmp(driver_name, "net_i40e") == 0) @@ -202,16 +207,20 @@ static void DevicePostStartPMDSpecificActions(DPDKThreadVars *ptv, const char *d static void DevicePreStopPMDSpecificActions(DPDKThreadVars *ptv, const char *driver_name) { - int retval; + if (strcmp(driver_name, "net_bonding") == 0) { + driver_name = BondingDeviceDriverGet(ptv->port_id); + } if (strcmp(driver_name, "net_i40e") == 0) { +#if RTE_VERSION > RTE_VERSION_NUM(20, 0, 0, 0) // Flush the RSS rules that have been inserted in the post start section struct rte_flow_error flush_error = { 0 }; - retval = rte_flow_flush(ptv->port_id, &flush_error); + int32_t retval = rte_flow_flush(ptv->port_id, &flush_error); if (retval != 0) { SCLogError("Unable to flush rte_flow rules: %s Flush error msg: %s", rte_strerror(-retval), flush_error.message); } +#endif /* RTE_VERSION > RTE_VERSION_NUM(20, 0, 0, 0) */ } } diff --git a/src/util-dpdk-bonding.c b/src/util-dpdk-bonding.c new file mode 100644 index 000000000000..2dda0927a735 --- /dev/null +++ b/src/util-dpdk-bonding.c @@ -0,0 +1,120 @@ +/* Copyright (C) 2023 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Lukas Sismis + */ + +#ifndef UTIL_DPDK_BONDING_C +#define UTIL_DPDK_BONDING_C + +#include "suricata-common.h" +#include "util-dpdk-bonding.h" + +#ifdef HAVE_DPDK + +#include "util-dpdk.h" +#include "util-debug.h" + +/** + * Determines if the port is Bond or not by evaluating device driver name + * @param pid port ID + * @return 0 - the device si Bond PMD, 1 - regular device, <0 error + */ +int32_t BondingIsBond(uint16_t pid) +{ + struct rte_eth_dev_info di; + int32_t ret = rte_eth_dev_info_get(pid, &di); + if (ret < 0) { + SCLogError("%s: unable to get device info (err: %s)", DPDKGetPortNameByPortID(pid), + rte_strerror(-ret)); + return ret; + } + + return strcmp(di.driver_name, "net_bonding") == 0 ? 0 : 1; +} + +uint16_t BondingMemberDevicesGet( + uint16_t bond_pid, uint16_t bonded_devs[], uint16_t bonded_devs_length) +{ +#ifdef HAVE_DPDK_BOND + int32_t len = rte_eth_bond_slaves_get(bond_pid, bonded_devs, bonded_devs_length); + if (len == 0) + FatalError("%s: no bonded devices found", DPDKGetPortNameByPortID(bond_pid)); + else if (len < 0) + FatalError("%s: unable to get bonded devices (err: %s)", DPDKGetPortNameByPortID(bond_pid), + rte_strerror(-len)); + + return len; +#else + FatalError( + "%s: bond port not supported in DPDK installation", DPDKGetPortNameByPortID(bond_pid)); +#endif +} + +int32_t BondingAllDevicesSameDriver(uint16_t bond_pid) +{ + uint16_t bonded_devs[RTE_MAX_ETHPORTS] = { 0 }; + uint16_t len = BondingMemberDevicesGet(bond_pid, bonded_devs, RTE_MAX_ETHPORTS); + + const char *driver_name = NULL, *first_driver_name = NULL; + struct rte_eth_dev_info di = { 0 }; + + for (uint16_t i = 0; i < len; i++) { + int32_t ret = rte_eth_dev_info_get(bonded_devs[i], &di); + if (ret < 0) + FatalError("%s: unable to get device info (err: %s)", + DPDKGetPortNameByPortID(bonded_devs[i]), rte_strerror(-ret)); + + if (i == 0) { + first_driver_name = di.driver_name; + } else { + driver_name = di.driver_name; + if (strncmp(first_driver_name, driver_name, + MIN(strlen(first_driver_name), strlen(driver_name))) != 0) { + return -EINVAL; // inconsistent drivers + } + } + } + + return 0; +} + +/** + * Translates to the driver that is actually used by the bonded ports + * \param bond_pid + * \return driver name, FatalError otherwise + */ +const char *BondingDeviceDriverGet(uint16_t bond_pid) +{ + uint16_t bonded_devs[RTE_MAX_ETHPORTS] = { 0 }; + BondingMemberDevicesGet(bond_pid, bonded_devs, RTE_MAX_ETHPORTS); + + struct rte_eth_dev_info di = { 0 }; + int32_t ret = rte_eth_dev_info_get(bonded_devs[0], &di); + if (ret < 0) + FatalError("%s: unable to get device info (err: %s)", + DPDKGetPortNameByPortID(bonded_devs[0]), rte_strerror(-ret)); + + return di.driver_name; +} + +#endif /* HAVE_DPDK */ + +#endif /* UTIL_DPDK_BONDING_C */ diff --git a/src/util-dpdk-bonding.h b/src/util-dpdk-bonding.h new file mode 100644 index 000000000000..f7fad5e64852 --- /dev/null +++ b/src/util-dpdk-bonding.h @@ -0,0 +1,39 @@ +/* Copyright (C) 2023 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/** + * \file + * + * \author Lukas Sismis + */ + +#ifndef UTIL_DPDK_BONDING_H +#define UTIL_DPDK_BONDING_H + +#include "suricata-common.h" + +#ifdef HAVE_DPDK + +int32_t BondingIsBond(uint16_t pid); +uint16_t BondingMemberDevicesGet( + uint16_t bond_pid, uint16_t bonded_devs[], uint16_t bonded_devs_length); +int32_t BondingAllDevicesSameDriver(uint16_t bond_pid); +const char *BondingDeviceDriverGet(uint16_t bond_pid); + +#endif /* HAVE_DPDK */ + +#endif /* UTIL_DPDK_BONDING_H */ diff --git a/src/util-dpdk-i40e.c b/src/util-dpdk-i40e.c index 4191b7750c82..2484b45c793b 100644 --- a/src/util-dpdk-i40e.c +++ b/src/util-dpdk-i40e.c @@ -33,6 +33,7 @@ #include "util-dpdk-i40e.h" #include "util-dpdk.h" #include "util-debug.h" +#include "util-dpdk-bonding.h" #ifdef HAVE_DPDK @@ -110,7 +111,7 @@ static int i40eDeviceSetSymHash(int port_id, const char *port_name, int enable) return 0; } -static int i40eDeviceSetRSSWithFilter(int port_id, const char *port_name) +static int i40eDeviceApplyRSSFilter(int port_id, const char *port_name) { int retval = 0; @@ -142,6 +143,27 @@ static int i40eDeviceSetRSSWithFilter(int port_id, const char *port_name) return retval; } +static int32_t i40eDeviceSetRSSWithFilter(int port_id, const char *port_name) +{ + int32_t ret = BondingIsBond(port_id); + if (ret < 0) + return -ret; + + if (ret == 1) { // regular device + i40eDeviceApplyRSSFilter(port_id, port_name); + } else if (ret == 0) { // the device is Bond PMD + uint16_t bonded_devs[RTE_MAX_ETHPORTS]; + ret = BondingMemberDevicesGet(port_id, bonded_devs, RTE_MAX_ETHPORTS); + for (int i = 0; i < ret; i++) { + i40eDeviceApplyRSSFilter(bonded_devs[i], port_name); + } + } else { + FatalError("Unknown return value from BondingIsBond()"); + } + + return 0; +} + #else static int i40eDeviceSetRSSFlowQueues( @@ -372,16 +394,19 @@ int i40eDeviceSetRSS(int port_id, int nb_rx_queues) return 0; } -void i40eDeviceSetRSSHashFunction(uint64_t *rss_hf) +void i40eDeviceSetRSSConf(struct rte_eth_rss_conf *rss_conf) { #if RTE_VERSION >= RTE_VERSION_NUM(20, 0, 0, 0) - *rss_hf = RTE_ETH_RSS_FRAG_IPV4 | RTE_ETH_RSS_NONFRAG_IPV4_OTHER | RTE_ETH_RSS_FRAG_IPV6 | - RTE_ETH_RSS_NONFRAG_IPV6_OTHER; + rss_conf->rss_hf = RTE_ETH_RSS_FRAG_IPV4 | RTE_ETH_RSS_NONFRAG_IPV4_OTHER | + RTE_ETH_RSS_FRAG_IPV6 | RTE_ETH_RSS_NONFRAG_IPV6_OTHER; + rss_conf->rss_key = NULL; + rss_conf->rss_key_len = 0; #else - *rss_hf = RTE_ETH_RSS_FRAG_IPV4 | RTE_ETH_RSS_NONFRAG_IPV4_TCP | RTE_ETH_RSS_NONFRAG_IPV4_UDP | - RTE_ETH_RSS_NONFRAG_IPV4_SCTP | RTE_ETH_RSS_NONFRAG_IPV4_OTHER | - RTE_ETH_RSS_FRAG_IPV6 | RTE_ETH_RSS_NONFRAG_IPV6_TCP | RTE_ETH_RSS_NONFRAG_IPV6_UDP | - RTE_ETH_RSS_NONFRAG_IPV6_SCTP | RTE_ETH_RSS_NONFRAG_IPV6_OTHER | RTE_ETH_RSS_SCTP; + rss_conf->rss_hf = + RTE_ETH_RSS_FRAG_IPV4 | RTE_ETH_RSS_NONFRAG_IPV4_TCP | RTE_ETH_RSS_NONFRAG_IPV4_UDP | + RTE_ETH_RSS_NONFRAG_IPV4_SCTP | RTE_ETH_RSS_NONFRAG_IPV4_OTHER | RTE_ETH_RSS_FRAG_IPV6 | + RTE_ETH_RSS_NONFRAG_IPV6_TCP | RTE_ETH_RSS_NONFRAG_IPV6_UDP | + RTE_ETH_RSS_NONFRAG_IPV6_SCTP | RTE_ETH_RSS_NONFRAG_IPV6_OTHER | RTE_ETH_RSS_SCTP; #endif } diff --git a/src/util-dpdk-i40e.h b/src/util-dpdk-i40e.h index 6b1eb7fd13cf..6133aed5d771 100644 --- a/src/util-dpdk-i40e.h +++ b/src/util-dpdk-i40e.h @@ -28,8 +28,10 @@ #ifdef HAVE_DPDK +#include "util-dpdk.h" + int i40eDeviceSetRSS(int port_id, int nb_rx_queues); -void i40eDeviceSetRSSHashFunction(uint64_t *rss_conf); +void i40eDeviceSetRSSConf(struct rte_eth_rss_conf *rss_conf); #endif /* HAVE_DPDK */ diff --git a/src/util-dpdk.c b/src/util-dpdk.c index 83284411fe2a..291ab075e08f 100644 --- a/src/util-dpdk.c +++ b/src/util-dpdk.c @@ -57,3 +57,23 @@ void DPDKCloseDevice(LiveDevice *ldev) } #endif } + +#ifdef HAVE_DPDK + +/** + * Retrieves name of the port from port id + * Not thread-safe + * @param pid + * @return static dev_name on success + */ +const char *DPDKGetPortNameByPortID(uint16_t pid) +{ + static char dev_name[RTE_ETH_NAME_MAX_LEN]; + int32_t ret = rte_eth_dev_get_name_by_port(pid, dev_name); + if (ret < 0) { + FatalError("Port %d: Failed to obtain port name (err: %s)", pid, rte_strerror(-ret)); + } + return dev_name; +} + +#endif /* HAVE_DPDK */ diff --git a/src/util-dpdk.h b/src/util-dpdk.h index e711eaf6cecb..f6a54a8323e4 100644 --- a/src/util-dpdk.h +++ b/src/util-dpdk.h @@ -28,6 +28,9 @@ #include #include +#ifdef HAVE_DPDK_BOND +#include +#endif #include #include #include @@ -44,6 +47,24 @@ #define RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE DEV_TX_OFFLOAD_MBUF_FAST_FREE #define RTE_ETH_RX_OFFLOAD_CHECKSUM DEV_RX_OFFLOAD_CHECKSUM + +#define RTE_ETH_RX_OFFLOAD_VLAN_STRIP DEV_RX_OFFLOAD_VLAN_STRIP +#define RTE_ETH_RX_OFFLOAD_IPV4_CKSUM DEV_RX_OFFLOAD_IPV4_CKSUM +#define RTE_ETH_RX_OFFLOAD_UDP_CKSUM DEV_RX_OFFLOAD_UDP_CKSUM +#define RTE_ETH_RX_OFFLOAD_TCP_CKSUM DEV_RX_OFFLOAD_TCP_CKSUM +#define RTE_ETH_RX_OFFLOAD_TCP_LRO DEV_RX_OFFLOAD_TCP_LRO +#define RTE_ETH_RX_OFFLOAD_QINQ_STRIP DEV_RX_OFFLOAD_QINQ_STRIP +#define RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM +#define RTE_ETH_RX_OFFLOAD_MACSEC_STRIP DEV_RX_OFFLOAD_MACSEC_STRIP +#define RTE_ETH_RX_OFFLOAD_HEADER_SPLIT DEV_RX_OFFLOAD_HEADER_SPLIT +#define RTE_ETH_RX_OFFLOAD_VLAN_FILTER DEV_RX_OFFLOAD_VLAN_FILTER +#define RTE_ETH_RX_OFFLOAD_VLAN_EXTEND DEV_RX_OFFLOAD_VLAN_EXTEND +#define RTE_ETH_RX_OFFLOAD_SCATTER DEV_RX_OFFLOAD_SCATTER +#define RTE_ETH_RX_OFFLOAD_TIMESTAMP DEV_RX_OFFLOAD_TIMESTAMP +#define RTE_ETH_RX_OFFLOAD_SECURITY DEV_RX_OFFLOAD_SECURITY +#define RTE_ETH_RX_OFFLOAD_KEEP_CRC DEV_RX_OFFLOAD_KEEP_CRC +#define RTE_ETH_RX_OFFLOAD_SCTP_CKSUM DEV_RX_OFFLOAD_SCTP_CKSUM +#define RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM DEV_RX_OFFLOAD_OUTER_UDP_CKSUM #define RTE_ETH_RX_OFFLOAD_RSS_HASH DEV_RX_OFFLOAD_RSS_HASH #define RTE_ETH_MQ_TX_NONE ETH_MQ_TX_NONE @@ -100,4 +121,8 @@ void DPDKCleanupEAL(void); void DPDKCloseDevice(LiveDevice *ldev); +#ifdef HAVE_DPDK +const char *DPDKGetPortNameByPortID(uint16_t pid); +#endif /* HAVE_DPDK */ + #endif /* UTIL_DPDK_H */