- Add IP checksum offloading support for dpdk
- Added Ethernet flow control __disable__ support for dpdk
- Enabled support for mlx4/mlx5 NICs.
diff --git a/mtcp/src/dpdk_module.c b/mtcp/src/dpdk_module.c
index 33d349e..a29a87d 100644
--- a/mtcp/src/dpdk_module.c
+++ b/mtcp/src/dpdk_module.c
@@ -24,6 +24,8 @@
/* for ioctl */
#include <sys/ioctl.h>
#endif /* !ENABLE_STATS_IOCTL */
+/* for ip pseudo-chksum */
+#include <rte_ip.h>
/*----------------------------------------------------------------------------*/
/* Essential macros */
#define MAX_RX_QUEUE_PER_LCORE MAX_CPUS
@@ -193,9 +195,9 @@
#ifdef ENABLE_STATS_IOCTL
dpc->fd = open("/dev/dpdk-iface", O_RDWR);
if (dpc->fd == -1) {
- TRACE_ERROR("Can't open /dev/dpdk-iface for context->cpu: %d!\n",
+ TRACE_ERROR("Can't open /dev/dpdk-iface for context->cpu: %d! "
+ "Are you using mlx4/mlx5 driver?\n",
ctxt->cpu);
- exit(EXIT_FAILURE);
}
#endif /* !ENABLE_STATS_IOCTL */
}
@@ -239,13 +241,15 @@
#ifdef NETSTAT
mtcp->nstat.tx_packets[nif] += cnt;
#ifdef ENABLE_STATS_IOCTL
- ss.tx_pkts = mtcp->nstat.tx_packets[nif];
- ss.tx_bytes = mtcp->nstat.tx_bytes[nif];
- ss.rx_pkts = mtcp->nstat.rx_packets[nif];
- ss.rx_bytes = mtcp->nstat.rx_bytes[nif];
- ss.qid = ctxt->cpu;
- ss.dev = nif;
- ioctl(dpc->fd, 0, &ss);
+ if (likely(dpc->fd >= 0)) {
+ ss.tx_pkts = mtcp->nstat.tx_packets[nif];
+ ss.tx_bytes = mtcp->nstat.tx_bytes[nif];
+ ss.rx_pkts = mtcp->nstat.rx_packets[nif];
+ ss.rx_bytes = mtcp->nstat.rx_bytes[nif];
+ ss.qid = ctxt->cpu;
+ ss.dev = nif;
+ ioctl(dpc->fd, 0, &ss);
+ }
#endif /* !ENABLE_STATS_IOCTL */
#endif
do {
@@ -385,7 +389,8 @@
#ifdef ENABLE_STATS_IOCTL
/* free fd */
- close(dpc->fd);
+ if (dpc->fd >= 0)
+ close(dpc->fd);
#endif /* !ENABLE_STATS_IOCTL */
/* free it all up */
@@ -498,6 +503,8 @@
dpdk_load_module(void)
{
int portid, rxlcore_id, ret;
+ /* for Ethernet flow control settings */
+ struct rte_eth_fc_conf fc_conf;
/* setting the rss key */
static const uint8_t key[] = {
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
@@ -570,6 +577,19 @@
printf("done: \n");
rte_eth_promiscuous_enable(portid);
+ /* retrieve current flow control settings per port */
+ memset(&fc_conf, 0, sizeof(fc_conf));
+ ret = rte_eth_dev_flow_ctrl_get(portid, &fc_conf);
+ if (ret != 0)
+ rte_exit(EXIT_FAILURE, "Failed to get flow control info!\n");
+
+ /* and just disable the rx/tx flow control */
+ fc_conf.mode = RTE_FC_NONE;
+ ret = rte_eth_dev_flow_ctrl_set(portid, &fc_conf);
+ if (ret != 0)
+ rte_exit(EXIT_FAILURE, "Failed to set flow control info!: errno: %d\n",
+ ret);
+
#ifdef DEBUG
printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n",
(unsigned) portid,
@@ -605,6 +625,40 @@
check_all_ports_link_status(num_devices_attached, 0xFFFFFFFF);
}
/*----------------------------------------------------------------------------*/
+int32_t
+dpdk_dev_ioctl(struct mtcp_thread_context *ctx, int nif, int cmd, void *argp)
+{
+ struct dpdk_private_context *dpc;
+ struct rte_mbuf *m;
+ int len_of_mbuf;
+ struct iphdr *iph;
+ struct tcphdr *tcph;
+
+ iph = (struct iphdr *)argp;
+ dpc = (struct dpdk_private_context *)ctx->io_private_context;
+ len_of_mbuf = dpc->wmbufs[nif].len;
+
+ switch (cmd) {
+ case PKT_TX_IP_CSUM:
+ m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1];
+ m->ol_flags = PKT_TX_IP_CKSUM | PKT_TX_IPV4;
+ m->l2_len = sizeof(struct ether_hdr);
+ m->l3_len = (iph->ihl<<2);
+ break;
+ case PKT_TX_TCP_CSUM:
+ m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1];
+ tcph = (struct tcphdr *)((unsigned char *)iph + (iph->ihl<<2));
+ m->ol_flags |= PKT_TX_TCP_CKSUM;
+ tcph->check = rte_ipv4_phdr_cksum((struct ipv4_hdr *)iph, m->ol_flags);
+ break;
+ default:
+ goto dev_ioctl_err;
+ }
+ return 0;
+ dev_ioctl_err:
+ return -1;
+}
+/*----------------------------------------------------------------------------*/
io_module_func dpdk_module_func = {
.load_module = dpdk_load_module,
.init_handle = dpdk_init_handle,
@@ -615,7 +669,8 @@
.recv_pkts = dpdk_recv_pkts,
.get_rptr = dpdk_get_rptr,
.select = dpdk_select,
- .destroy_handle = dpdk_destroy_handle
+ .destroy_handle = dpdk_destroy_handle,
+ .dev_ioctl = dpdk_dev_ioctl
};
/*----------------------------------------------------------------------------*/
#else
@@ -629,7 +684,8 @@
.recv_pkts = NULL,
.get_rptr = NULL,
.select = NULL,
- .destroy_handle = NULL
+ .destroy_handle = NULL,
+ .dev_ioctl = NULL
};
/*----------------------------------------------------------------------------*/
#endif /* !DISABLE_DPDK */
diff --git a/mtcp/src/include/io_module.h b/mtcp/src/include/io_module.h
index 432cc8f..1364628 100644
--- a/mtcp/src/include/io_module.h
+++ b/mtcp/src/include/io_module.h
@@ -49,6 +49,8 @@
* destroy_handle() : free up resources allocated during
* init_handle(). Normally called during
* process termination.
+ *
+ * dev_ioctl() : contains submodules for select drivers
*
*/
typedef struct io_module_func {
@@ -62,6 +64,7 @@
int32_t (*recv_pkts)(struct mtcp_thread_context *ctx, int ifidx);
int32_t (*select)(struct mtcp_thread_context *ctx);
void (*destroy_handle)(struct mtcp_thread_context *ctx);
+ int32_t (*dev_ioctl)(struct mtcp_thread_context *ctx, int nif, int cmd, void *argp);
} io_module_func __attribute__((aligned(__WORDSIZE)));
/*----------------------------------------------------------------------------*/
/* set I/O module context */
@@ -70,6 +73,10 @@
/* ptr to the `running' I/O module context */
extern io_module_func *current_iomodule_func;
+/* dev_ioctl related macros */
+#define PKT_TX_IP_CSUM 0x01
+#define PKT_TX_TCP_CSUM 0x02
+
/* registered psio context */
#ifdef DISABLE_PSIO
#define ps_list_devices(x) 0
diff --git a/mtcp/src/ip_out.c b/mtcp/src/ip_out.c
index 2473112..42b25f9 100644
--- a/mtcp/src/ip_out.c
+++ b/mtcp/src/ip_out.c
@@ -39,6 +39,7 @@
struct iphdr *iph;
int nif;
unsigned char * haddr;
+ int rc = -1;
nif = GetOutputInterface(daddr);
if (nif < 0)
@@ -73,7 +74,13 @@
iph->saddr = saddr;
iph->daddr = daddr;
iph->check = 0;
- iph->check = ip_fast_csum(iph, iph->ihl);
+
+ /* offload IP checkum if possible */
+ if (mtcp->iom->dev_ioctl != NULL)
+ rc = mtcp->iom->dev_ioctl(mtcp->ctx, nif, PKT_TX_IP_CSUM, iph);
+ /* otherwise calculate IP checksum in S/W */
+ if (rc == -1)
+ iph->check = ip_fast_csum(iph, iph->ihl);
return (uint8_t *)(iph + 1);
}
@@ -84,6 +91,7 @@
struct iphdr *iph;
int nif;
unsigned char *haddr;
+ int rc = -1;
if (stream->sndvar->nif_out >= 0) {
nif = stream->sndvar->nif_out;
@@ -123,7 +131,13 @@
iph->saddr = stream->saddr;
iph->daddr = stream->daddr;
iph->check = 0;
- iph->check = ip_fast_csum(iph, iph->ihl);
+
+ /* offload IP checkum if possible */
+ if (mtcp->iom->dev_ioctl != NULL)
+ rc = mtcp->iom->dev_ioctl(mtcp->ctx, nif, PKT_TX_IP_CSUM, iph);
+ /* otherwise calculate IP checksum in S/W */
+ if (rc == -1)
+ iph->check = ip_fast_csum(iph, iph->ihl);
return (uint8_t *)(iph + 1);
}
diff --git a/mtcp/src/netmap_module.c b/mtcp/src/netmap_module.c
index 032a938..dac671a 100644
--- a/mtcp/src/netmap_module.c
+++ b/mtcp/src/netmap_module.c
@@ -254,7 +254,8 @@
.recv_pkts = netmap_recv_pkts,
.get_rptr = netmap_get_rptr,
.select = netmap_select,
- .destroy_handle = netmap_destroy_handle
+ .destroy_handle = netmap_destroy_handle,
+ .dev_ioctl = NULL
};
/*----------------------------------------------------------------------------*/
#else
@@ -268,7 +269,8 @@
.recv_pkts = NULL,
.get_rptr = NULL,
.select = NULL,
- .destroy_handle = NULL
+ .destroy_handle = NULL,
+ .dev_ioctl = NULL
};
/*----------------------------------------------------------------------------*/
#endif /* !DISABLE_NETMAP */
diff --git a/mtcp/src/psio_module.c b/mtcp/src/psio_module.c
index ab9b726..e986f8f 100644
--- a/mtcp/src/psio_module.c
+++ b/mtcp/src/psio_module.c
@@ -383,7 +383,8 @@
.recv_pkts = psio_recv_pkts,
.get_rptr = psio_get_rptr,
.select = psio_select,
- .destroy_handle = psio_destroy_handle
+ .destroy_handle = psio_destroy_handle,
+ .dev_ioctl = NULL
};
#else
io_module_func ps_module_func = {
@@ -396,7 +397,8 @@
.recv_pkts = NULL,
.get_rptr = NULL,
.select = NULL,
- .destroy_handle = NULL
+ .destroy_handle = NULL,
+ .dev_ioctl = NULL
};
/*----------------------------------------------------------------------------*/
#endif /* !DISABLE_PSIO */