Skip to content

Commit c1badf7

Browse files
committed
[ena] Add support for low latency transmit queues
Newer generations of the ENA hardware require the use of low latency transmit queues, where the submission queues and the initial portion of the transmitted packet are written to on-device memory via BAR2 instead of being read from host memory. Detect support for low latency queues and set the placement policy appropriately. We attempt the use of low latency queues only if the device reports that it supports inline headers, 128-byte entries, and two descriptors prior to the inlined header, on the basis that we don't care about using low latency queues on older versions of the hardware since those versions will support normal host memory submission queues anyway. We reuse the redundant memory allocated for the submission queue as the bounce buffer for constructing the descriptors and inlined packet data, since this avoids needing a separate allocation just for the bounce buffer. We construct a metadata submission queue entry prior to the actual submission queue entry, since experimentation suggests that newer generations of the hardware require this to be present even though it conveys no information beyond its own existence. Signed-off-by: Michael Brown <[email protected]>
1 parent 0d15d7f commit c1badf7

File tree

2 files changed

+263
-17
lines changed

2 files changed

+263
-17
lines changed

src/drivers/net/ena.c

Lines changed: 163 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,7 @@ static int ena_create_sq ( struct ena_nic *ena, struct ena_sq *sq,
450450
union ena_aq_req *req;
451451
union ena_acq_rsp *rsp;
452452
unsigned int i;
453+
size_t llqe;
453454
int rc;
454455

455456
/* Allocate submission queue entries */
@@ -464,8 +465,7 @@ static int ena_create_sq ( struct ena_nic *ena, struct ena_sq *sq,
464465
req = ena_admin_req ( ena );
465466
req->header.opcode = ENA_CREATE_SQ;
466467
req->create_sq.direction = sq->direction;
467-
req->create_sq.policy = cpu_to_le16 ( ENA_SQ_HOST_MEMORY |
468-
ENA_SQ_CONTIGUOUS );
468+
req->create_sq.policy = cpu_to_le16 ( sq->policy );
469469
req->create_sq.cq_id = cpu_to_le16 ( cq->id );
470470
req->create_sq.count = cpu_to_le16 ( sq->count );
471471
req->create_sq.address = cpu_to_le64 ( virt_to_bus ( sq->sqe.raw ) );
@@ -480,6 +480,14 @@ static int ena_create_sq ( struct ena_nic *ena, struct ena_sq *sq,
480480
/* Parse response */
481481
sq->id = le16_to_cpu ( rsp->create_sq.id );
482482
sq->doorbell = le32_to_cpu ( rsp->create_sq.doorbell );
483+
llqe = le32_to_cpu ( rsp->create_sq.llqe );
484+
if ( sq->policy & ENA_SQ_DEVICE_MEMORY ) {
485+
assert ( ena->mem != NULL );
486+
assert ( sq->len >= sizeof ( *sq->sqe.llq ) );
487+
sq->llqe = ( ena->mem + llqe );
488+
} else {
489+
sq->llqe = NULL;
490+
}
483491

484492
/* Reset producer counter and phase */
485493
sq->prod = 0;
@@ -494,10 +502,16 @@ static int ena_create_sq ( struct ena_nic *ena, struct ena_sq *sq,
494502
for ( i = 0 ; i < sq->count ; i++ )
495503
sq->ids[i] = i;
496504

497-
DBGC ( ena, "ENA %p %s SQ%d at [%08lx,%08lx) fill %d db +%04x CQ%d\n",
498-
ena, ena_direction ( sq->direction ), sq->id,
499-
virt_to_phys ( sq->sqe.raw ),
500-
( virt_to_phys ( sq->sqe.raw ) + sq->len ),
505+
DBGC ( ena, "ENA %p %s SQ%d at ",
506+
ena, ena_direction ( sq->direction ), sq->id );
507+
if ( sq->policy & ENA_SQ_DEVICE_MEMORY ) {
508+
DBGC ( ena, "LLQ [+%08zx,+%08zx)", llqe,
509+
( llqe + ( sq->count * sizeof ( sq->sqe.llq[0] ) ) ) );
510+
} else {
511+
DBGC ( ena, "[%08lx,%08lx)", virt_to_phys ( sq->sqe.raw ),
512+
( virt_to_phys ( sq->sqe.raw ) + sq->len ) );
513+
}
514+
DBGC ( ena, " fill %d db +%04x CQ%d\n",
501515
sq->fill, sq->doorbell, cq->id );
502516
return 0;
503517

@@ -744,6 +758,101 @@ static int ena_set_host_attributes ( struct ena_nic *ena ) {
744758
return 0;
745759
}
746760

761+
/**
762+
* Configure low latency queues
763+
*
764+
* @v ena ENA device
765+
* @ret rc Return status code
766+
*/
767+
static int ena_llq_config ( struct ena_nic *ena ) {
768+
union ena_aq_req *req;
769+
union ena_acq_rsp *rsp;
770+
union ena_feature *feature;
771+
uint16_t header;
772+
uint16_t size;
773+
uint16_t desc;
774+
uint16_t stride;
775+
uint16_t mode;
776+
int rc;
777+
778+
/* Construct request */
779+
req = ena_admin_req ( ena );
780+
req->header.opcode = ENA_GET_FEATURE;
781+
req->get_feature.id = ENA_LLQ_CONFIG;
782+
783+
/* Issue request */
784+
if ( ( rc = ena_admin ( ena, req, &rsp ) ) != 0 ) {
785+
DBGC ( ena, "ENA %p could not get LLQ configuration: %s\n",
786+
ena, strerror ( rc ) );
787+
return rc;
788+
}
789+
790+
/* Parse response */
791+
feature = &rsp->get_feature.feature;
792+
header = le16_to_cpu ( feature->llq.header.supported );
793+
size = le16_to_cpu ( feature->llq.size.supported );
794+
desc = le16_to_cpu ( feature->llq.desc.supported );
795+
stride = le16_to_cpu ( feature->llq.stride.supported );
796+
mode = le16_to_cpu ( feature->llq.mode );
797+
DBGC ( ena, "ENA %p LLQ supports %02x:%02x:%02x:%02x:%02x with %dx%d "
798+
"entries\n", ena, header, size, desc, stride, mode,
799+
le32_to_cpu ( feature->llq.queues ),
800+
le32_to_cpu ( feature->llq.count ) );
801+
802+
/* Check for a supported configuration */
803+
if ( ! feature->llq.queues ) {
804+
DBGC ( ena, "ENA %p LLQ has no queues\n", ena );
805+
return -ENOTSUP;
806+
}
807+
if ( ! ( header & ENA_LLQ_HEADER_INLINE ) ) {
808+
DBGC ( ena, "ENA %p LLQ does not support inline headers\n",
809+
ena );
810+
return -ENOTSUP;
811+
}
812+
if ( ! ( size & ENA_LLQ_SIZE_128 ) ) {
813+
DBGC ( ena, "ENA %p LLQ does not support 128-byte entries\n",
814+
ena );
815+
return -ENOTSUP;
816+
}
817+
if ( ! ( desc & ENA_LLQ_DESC_2 ) ) {
818+
DBGC ( ena, "ENA %p LLQ does not support two-descriptor "
819+
"entries\n", ena );
820+
return -ENOTSUP;
821+
}
822+
823+
/* Enable a minimal configuration */
824+
header = ENA_LLQ_HEADER_INLINE;
825+
size = ENA_LLQ_SIZE_128;
826+
desc = ENA_LLQ_DESC_2;
827+
stride &= ( -stride ); /* Don't care: use first supported option */
828+
DBGC ( ena, "ENA %p LLQ enabling %02x:%02x:%02x:%02x:%02x\n",
829+
ena, header, size, desc, stride, mode );
830+
831+
/* Construct request */
832+
req = ena_admin_req ( ena );
833+
req->header.opcode = ENA_SET_FEATURE;
834+
req->set_feature.id = ENA_LLQ_CONFIG;
835+
feature = &req->set_feature.feature;
836+
feature->llq.header.enabled = cpu_to_le16 ( header );
837+
feature->llq.size.enabled = cpu_to_le16 ( size );
838+
feature->llq.desc.enabled = cpu_to_le16 ( desc );
839+
feature->llq.stride.enabled = cpu_to_le16 ( stride );
840+
feature->llq.mode = cpu_to_le16 ( mode );
841+
842+
/* Issue request */
843+
if ( ( rc = ena_admin ( ena, req, &rsp ) ) != 0 ) {
844+
DBGC ( ena, "ENA %p could not set LLQ configuration: %s\n",
845+
ena, strerror ( rc ) );
846+
return rc;
847+
}
848+
849+
/* Use on-device memory for transmit queue */
850+
ena->tx.sq.policy |= ENA_SQ_DEVICE_MEMORY;
851+
ena->tx.sq.inlined = sizeof ( ena->tx.sq.sqe.llq->inlined );
852+
853+
return 0;
854+
}
855+
747856
/**
748857
* Get statistics (for debugging)
749858
*
@@ -954,9 +1063,15 @@ static void ena_close ( struct net_device *netdev ) {
9541063
static int ena_transmit ( struct net_device *netdev, struct io_buffer *iobuf ) {
9551064
struct ena_nic *ena = netdev->priv;
9561065
struct ena_tx_sqe *sqe;
1066+
struct ena_tx_llqe *llqe;
1067+
const uint64_t *src;
1068+
uint64_t *dest;
9571069
physaddr_t address;
9581070
unsigned int index;
9591071
unsigned int id;
1072+
unsigned int i;
1073+
uint8_t flags;
1074+
size_t inlined;
9601075
size_t len;
9611076

9621077
/* Get next submission queue entry */
@@ -968,17 +1083,50 @@ static int ena_transmit ( struct net_device *netdev, struct io_buffer *iobuf ) {
9681083
sqe = &ena->tx.sq.sqe.tx[index];
9691084
id = ena->tx_ids[index];
9701085

971-
/* Construct submission queue entry */
1086+
/* Construct submission queue entry values */
9721087
address = virt_to_bus ( iobuf->data );
9731088
len = iob_len ( iobuf );
1089+
inlined = ena->tx.sq.inlined;
1090+
if ( inlined > len )
1091+
inlined = len;
1092+
len -= inlined;
1093+
address += inlined;
1094+
flags = ( ENA_SQE_FIRST | ENA_SQE_LAST | ENA_SQE_CPL |
1095+
ena->tx.sq.phase );
1096+
1097+
/* Prepare low-latency queue bounce buffer, if applicable */
1098+
llqe = ena->tx.sq.sqe.llq;
1099+
if ( ena->tx.sq.llqe ) {
1100+
1101+
/* Construct zero-information metadata queue entry */
1102+
llqe->meta.meta = ENA_TX_SQE_META;
1103+
llqe->meta.flags = ( flags & ~( ENA_SQE_LAST | ENA_SQE_CPL ) );
1104+
1105+
/* Copy inlined data */
1106+
memcpy ( llqe->inlined, iobuf->data, inlined );
1107+
1108+
/* Place submission queue entry within bounce buffer */
1109+
sqe = &llqe->sqe;
1110+
flags &= ~ENA_SQE_FIRST;
1111+
}
1112+
1113+
/* Construct submission queue entry */
9741114
sqe->len = cpu_to_le16 ( len );
9751115
sqe->id = cpu_to_le16 ( id );
9761116
sqe->address = cpu_to_le64 ( address );
1117+
sqe->inlined = inlined;
9771118
wmb();
978-
sqe->flags = ( ENA_SQE_FIRST | ENA_SQE_LAST | ENA_SQE_CPL |
979-
ena->tx.sq.phase );
1119+
sqe->flags = flags;
9801120
wmb();
9811121

1122+
/* Copy bounce buffer to on-device memory, if applicable */
1123+
if ( ena->tx.sq.llqe ) {
1124+
src = ( ( const void * ) llqe );
1125+
dest = ( ena->tx.sq.llqe + ( index * sizeof ( *llqe ) ) );
1126+
for ( i = 0 ; i < ( sizeof ( *llqe ) / sizeof ( *src ) ); i++ )
1127+
writeq ( *(src++), dest++ );
1128+
}
1129+
9821130
/* Increment producer counter */
9831131
ena->tx.sq.prod++;
9841132
if ( ( ena->tx.sq.prod % ENA_TX_COUNT ) == 0 )
@@ -1282,6 +1430,12 @@ static int ena_probe ( struct pci_device *pci ) {
12821430
if ( ( rc = ena_get_device_attributes ( netdev ) ) != 0 )
12831431
goto err_get_device_attributes;
12841432

1433+
/* Attempt to configure low latency queues, if applicable.
1434+
* Ignore any errors and continue without using LLQs.
1435+
*/
1436+
if ( ena->mem && ( ena->features & ENA_FEATURE_LLQ ) )
1437+
ena_llq_config ( ena );
1438+
12851439
/* Register network device */
12861440
if ( ( rc = register_netdev ( netdev ) ) != 0 )
12871441
goto err_register_netdev;

0 commit comments

Comments
 (0)