aboutsummaryrefslogtreecommitdiffstats
path: root/gpxe/src/net/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'gpxe/src/net/infiniband')
-rw-r--r--gpxe/src/net/infiniband/ib_cm.c413
-rw-r--r--gpxe/src/net/infiniband/ib_cmrc.c436
-rw-r--r--gpxe/src/net/infiniband/ib_mcast.c218
-rw-r--r--gpxe/src/net/infiniband/ib_mi.c406
-rw-r--r--gpxe/src/net/infiniband/ib_packet.c244
-rw-r--r--gpxe/src/net/infiniband/ib_pathrec.c296
-rw-r--r--gpxe/src/net/infiniband/ib_sma.c369
-rw-r--r--gpxe/src/net/infiniband/ib_smc.c179
-rw-r--r--gpxe/src/net/infiniband/ib_srp.c406
9 files changed, 2967 insertions, 0 deletions
diff --git a/gpxe/src/net/infiniband/ib_cm.c b/gpxe/src/net/infiniband/ib_cm.c
new file mode 100644
index 00000000..ebe65b33
--- /dev/null
+++ b/gpxe/src/net/infiniband/ib_cm.c
@@ -0,0 +1,413 @@
+/*
+ * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <byteswap.h>
+#include <errno.h>
+#include <assert.h>
+#include <gpxe/infiniband.h>
+#include <gpxe/ib_mi.h>
+#include <gpxe/ib_pathrec.h>
+#include <gpxe/ib_cm.h>
+
+/**
+ * @file
+ *
+ * Infiniband communication management
+ *
+ */
+
+/** List of connections */
+static LIST_HEAD ( ib_cm_conns );
+
+/**
+ * Send "ready to use" response
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v conn Connection
+ * @v av Address vector
+ * @ret rc Return status code
+ */
+static int ib_cm_send_rtu ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ struct ib_connection *conn,
+ struct ib_address_vector *av ) {
+ union ib_mad mad;
+ struct ib_cm_ready_to_use *ready =
+ &mad.cm.cm_data.ready_to_use;
+ int rc;
+
+ /* Construct "ready to use" response */
+ memset ( &mad, 0, sizeof ( mad ) );
+ mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
+ mad.hdr.class_version = IB_CM_CLASS_VERSION;
+ mad.hdr.method = IB_MGMT_METHOD_SEND;
+ mad.hdr.attr_id = htons ( IB_CM_ATTR_READY_TO_USE );
+ ready->local_id = htonl ( conn->local_id );
+ ready->remote_id = htonl ( conn->remote_id );
+ if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ){
+ DBGC ( conn, "CM %p could not send RTU: %s\n",
+ conn, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Handle duplicate connection replies
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v mad Received MAD
+ * @v av Source address vector
+ * @ret rc Return status code
+ *
+ * If a "ready to use" MAD is lost, the peer may resend the connection
+ * reply. We have to respond to these with duplicate "ready to use"
+ * MADs, otherwise the peer may time out and drop the connection.
+ */
+static void ib_cm_connect_rep ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ union ib_mad *mad,
+ struct ib_address_vector *av ) {
+ struct ib_cm_connect_reply *connect_rep =
+ &mad->cm.cm_data.connect_reply;
+ struct ib_connection *conn;
+ int rc;
+
+ /* Identify connection */
+ list_for_each_entry ( conn, &ib_cm_conns, list ) {
+ if ( ntohl ( connect_rep->remote_id ) != conn->local_id )
+ continue;
+ /* Try to send "ready to use" reply */
+ if ( ( rc = ib_cm_send_rtu ( ibdev, mi, conn, av ) ) != 0 ) {
+ /* Ignore errors */
+ return;
+ }
+ return;
+ }
+
+ DBG ( "CM unidentified connection %08x\n",
+ ntohl ( connect_rep->remote_id ) );
+}
+
+/** Communication management agents */
+struct ib_mad_agent ib_cm_agent[] __ib_mad_agent = {
+ {
+ .mgmt_class = IB_MGMT_CLASS_CM,
+ .class_version = IB_CM_CLASS_VERSION,
+ .attr_id = htons ( IB_CM_ATTR_CONNECT_REPLY ),
+ .handle = ib_cm_connect_rep,
+ },
+};
+
+/**
+ * Convert connection rejection reason to return status code
+ *
+ * @v reason Rejection reason (in network byte order)
+ * @ret rc Return status code
+ */
+static int ib_cm_rejection_reason_to_rc ( uint16_t reason ) {
+ switch ( reason ) {
+ case htons ( IB_CM_REJECT_BAD_SERVICE_ID ) :
+ return -ENODEV;
+ case htons ( IB_CM_REJECT_STALE_CONN ) :
+ return -EALREADY;
+ case htons ( IB_CM_REJECT_CONSUMER ) :
+ return -ENOTTY;
+ default:
+ return -EPERM;
+ }
+}
+
+/**
+ * Handle connection request transaction completion
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v madx Management transaction
+ * @v rc Status code
+ * @v mad Received MAD (or NULL on error)
+ * @v av Source address vector (or NULL on error)
+ */
+static void ib_cm_req_complete ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ struct ib_mad_transaction *madx,
+ int rc, union ib_mad *mad,
+ struct ib_address_vector *av ) {
+ struct ib_connection *conn = ib_madx_get_ownerdata ( madx );
+ struct ib_queue_pair *qp = conn->qp;
+ struct ib_cm_common *common = &mad->cm.cm_data.common;
+ struct ib_cm_connect_reply *connect_rep =
+ &mad->cm.cm_data.connect_reply;
+ struct ib_cm_connect_reject *connect_rej =
+ &mad->cm.cm_data.connect_reject;
+ void *private_data = NULL;
+ size_t private_data_len = 0;
+
+ /* Report failures */
+ if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ))
+ rc = -EIO;
+ if ( rc != 0 ) {
+ DBGC ( conn, "CM %p connection request failed: %s\n",
+ conn, strerror ( rc ) );
+ goto out;
+ }
+
+ /* Record remote communication ID */
+ conn->remote_id = ntohl ( common->local_id );
+
+ /* Handle response */
+ switch ( mad->hdr.attr_id ) {
+
+ case htons ( IB_CM_ATTR_CONNECT_REPLY ) :
+ /* Extract fields */
+ qp->av.qpn = ( ntohl ( connect_rep->local_qpn ) >> 8 );
+ qp->send.psn = ( ntohl ( connect_rep->starting_psn ) >> 8 );
+ private_data = &connect_rep->private_data;
+ private_data_len = sizeof ( connect_rep->private_data );
+ DBGC ( conn, "CM %p connected to QPN %lx PSN %x\n",
+ conn, qp->av.qpn, qp->send.psn );
+
+ /* Modify queue pair */
+ if ( ( rc = ib_modify_qp ( ibdev, qp ) ) != 0 ) {
+ DBGC ( conn, "CM %p could not modify queue pair: %s\n",
+ conn, strerror ( rc ) );
+ goto out;
+ }
+
+ /* Send "ready to use" reply */
+ if ( ( rc = ib_cm_send_rtu ( ibdev, mi, conn, av ) ) != 0 ) {
+ /* Treat as non-fatal */
+ rc = 0;
+ }
+ break;
+
+ case htons ( IB_CM_ATTR_CONNECT_REJECT ) :
+ /* Extract fields */
+ DBGC ( conn, "CM %p connection rejected (reason %d)\n",
+ conn, ntohs ( connect_rej->reason ) );
+ /* Private data is valid only for a Consumer Reject */
+ if ( connect_rej->reason == htons ( IB_CM_REJECT_CONSUMER ) ) {
+ private_data = &connect_rej->private_data;
+ private_data_len = sizeof (connect_rej->private_data);
+ }
+ rc = ib_cm_rejection_reason_to_rc ( connect_rej->reason );
+ break;
+
+ default:
+ DBGC ( conn, "CM %p unexpected response (attribute %04x)\n",
+ conn, ntohs ( mad->hdr.attr_id ) );
+ rc = -ENOTSUP;
+ break;
+ }
+
+ out:
+ /* Destroy the completed transaction */
+ ib_destroy_madx ( ibdev, ibdev->gsi, madx );
+ conn->madx = NULL;
+
+ /* Hand off to the upper completion handler */
+ conn->op->changed ( ibdev, qp, conn, rc, private_data,
+ private_data_len );
+}
+
+/** Connection request operations */
+static struct ib_mad_transaction_operations ib_cm_req_op = {
+ .complete = ib_cm_req_complete,
+};
+
+/**
+ * Handle connection path transaction completion
+ *
+ * @v ibdev Infiniband device
+ * @v path Path
+ * @v rc Status code
+ * @v av Address vector, or NULL on error
+ */
+static void ib_cm_path_complete ( struct ib_device *ibdev,
+ struct ib_path *path, int rc,
+ struct ib_address_vector *av ) {
+ struct ib_connection *conn = ib_path_get_ownerdata ( path );
+ struct ib_queue_pair *qp = conn->qp;
+ union ib_mad mad;
+ struct ib_cm_connect_request *connect_req =
+ &mad.cm.cm_data.connect_request;
+ size_t private_data_len;
+
+ /* Report failures */
+ if ( rc != 0 ) {
+ DBGC ( conn, "CM %p path lookup failed: %s\n",
+ conn, strerror ( rc ) );
+ conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
+ goto out;
+ }
+
+ /* Update queue pair peer path */
+ memcpy ( &qp->av, av, sizeof ( qp->av ) );
+
+ /* Construct connection request */
+ memset ( &mad, 0, sizeof ( mad ) );
+ mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
+ mad.hdr.class_version = IB_CM_CLASS_VERSION;
+ mad.hdr.method = IB_MGMT_METHOD_SEND;
+ mad.hdr.attr_id = htons ( IB_CM_ATTR_CONNECT_REQUEST );
+ connect_req->local_id = htonl ( conn->local_id );
+ memcpy ( &connect_req->service_id, &conn->service_id,
+ sizeof ( connect_req->service_id ) );
+ ib_get_hca_info ( ibdev, &connect_req->local_ca );
+ connect_req->local_qpn__responder_resources =
+ htonl ( ( qp->qpn << 8 ) | 1 );
+ connect_req->local_eecn__initiator_depth = htonl ( ( 0 << 8 ) | 1 );
+ connect_req->remote_eecn__remote_timeout__service_type__ee_flow_ctrl =
+ htonl ( ( 0x14 << 3 ) | ( IB_CM_TRANSPORT_RC << 1 ) |
+ ( 0 << 0 ) );
+ connect_req->starting_psn__local_timeout__retry_count =
+ htonl ( ( qp->recv.psn << 8 ) | ( 0x14 << 3 ) |
+ ( 0x07 << 0 ) );
+ connect_req->pkey = htons ( ibdev->pkey );
+ connect_req->payload_mtu__rdc_exists__rnr_retry =
+ ( ( IB_MTU_2048 << 4 ) | ( 1 << 3 ) | ( 0x07 << 0 ) );
+ connect_req->max_cm_retries__srq =
+ ( ( 0x0f << 4 ) | ( 0 << 3 ) );
+ connect_req->primary.local_lid = htons ( ibdev->lid );
+ connect_req->primary.remote_lid = htons ( conn->qp->av.lid );
+ memcpy ( &connect_req->primary.local_gid, &ibdev->gid,
+ sizeof ( connect_req->primary.local_gid ) );
+ memcpy ( &connect_req->primary.remote_gid, &conn->qp->av.gid,
+ sizeof ( connect_req->primary.remote_gid ) );
+ connect_req->primary.flow_label__rate =
+ htonl ( ( 0 << 12 ) | ( conn->qp->av.rate << 0 ) );
+ connect_req->primary.hop_limit = 0;
+ connect_req->primary.sl__subnet_local =
+ ( ( conn->qp->av.sl << 4 ) | ( 1 << 3 ) );
+ connect_req->primary.local_ack_timeout = ( 0x13 << 3 );
+ private_data_len = conn->private_data_len;
+ if ( private_data_len > sizeof ( connect_req->private_data ) )
+ private_data_len = sizeof ( connect_req->private_data );
+ memcpy ( &connect_req->private_data, &conn->private_data,
+ private_data_len );
+
+ /* Create connection request */
+ av->qpn = IB_QPN_GSI;
+ av->qkey = IB_QKEY_GSI;
+ conn->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, av,
+ &ib_cm_req_op );
+ if ( ! conn->madx ) {
+ DBGC ( conn, "CM %p could not create connection request\n",
+ conn );
+ conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
+ goto out;
+ }
+ ib_madx_set_ownerdata ( conn->madx, conn );
+
+ out:
+ /* Destroy the completed transaction */
+ ib_destroy_path ( ibdev, path );
+ conn->path = NULL;
+}
+
+/** Connection path operations */
+static struct ib_path_operations ib_cm_path_op = {
+ .complete = ib_cm_path_complete,
+};
+
+/**
+ * Create connection to remote QP
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v dgid Target GID
+ * @v service_id Target service ID
+ * @v private_data Connection request private data
+ * @v private_data_len Length of connection request private data
+ * @v op Connection operations
+ * @ret conn Connection
+ */
+struct ib_connection *
+ib_create_conn ( struct ib_device *ibdev, struct ib_queue_pair *qp,
+ struct ib_gid *dgid, struct ib_gid_half *service_id,
+ void *private_data, size_t private_data_len,
+ struct ib_connection_operations *op ) {
+ struct ib_connection *conn;
+
+ /* Allocate and initialise request */
+ conn = zalloc ( sizeof ( *conn ) + private_data_len );
+ if ( ! conn )
+ goto err_alloc_conn;
+ conn->ibdev = ibdev;
+ conn->qp = qp;
+ memset ( &qp->av, 0, sizeof ( qp->av ) );
+ qp->av.gid_present = 1;
+ memcpy ( &qp->av.gid, dgid, sizeof ( qp->av.gid ) );
+ conn->local_id = random();
+ memcpy ( &conn->service_id, service_id, sizeof ( conn->service_id ) );
+ conn->op = op;
+ conn->private_data_len = private_data_len;
+ memcpy ( &conn->private_data, private_data, private_data_len );
+
+ /* Create path */
+ conn->path = ib_create_path ( ibdev, &qp->av, &ib_cm_path_op );
+ if ( ! conn->path )
+ goto err_create_path;
+ ib_path_set_ownerdata ( conn->path, conn );
+
+ /* Add to list of connections */
+ list_add ( &conn->list, &ib_cm_conns );
+
+ DBGC ( conn, "CM %p created for IBDEV %p QPN %lx\n",
+ conn, ibdev, qp->qpn );
+ DBGC ( conn, "CM %p connecting to %08x:%08x:%08x:%08x %08x:%08x\n",
+ conn, ntohl ( dgid->u.dwords[0] ), ntohl ( dgid->u.dwords[1] ),
+ ntohl ( dgid->u.dwords[2] ), ntohl ( dgid->u.dwords[3] ),
+ ntohl ( service_id->u.dwords[0] ),
+ ntohl ( service_id->u.dwords[1] ) );
+
+ return conn;
+
+ ib_destroy_path ( ibdev, conn->path );
+ err_create_path:
+ free ( conn );
+ err_alloc_conn:
+ return NULL;
+}
+
+/**
+ * Destroy connection to remote QP
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v conn Connection
+ */
+void ib_destroy_conn ( struct ib_device *ibdev,
+ struct ib_queue_pair *qp __unused,
+ struct ib_connection *conn ) {
+
+ list_del ( &conn->list );
+ if ( conn->madx )
+ ib_destroy_madx ( ibdev, ibdev->gsi, conn->madx );
+ if ( conn->path )
+ ib_destroy_path ( ibdev, conn->path );
+ free ( conn );
+}
diff --git a/gpxe/src/net/infiniband/ib_cmrc.c b/gpxe/src/net/infiniband/ib_cmrc.c
new file mode 100644
index 00000000..2d648115
--- /dev/null
+++ b/gpxe/src/net/infiniband/ib_cmrc.c
@@ -0,0 +1,436 @@
+/*
+ * Copyright (C) 2009 Fen Systems Ltd <mbrown@fensystems.co.uk>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+FILE_LICENCE ( BSD2 );
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <gpxe/iobuf.h>
+#include <gpxe/xfer.h>
+#include <gpxe/process.h>
+#include <gpxe/infiniband.h>
+#include <gpxe/ib_cm.h>
+#include <gpxe/ib_cmrc.h>
+
+/**
+ * @file
+ *
+ * Infiniband Communication-managed Reliable Connections
+ *
+ */
+
+/** CMRC number of send WQEs
+ *
+ * This is a policy decision.
+ */
+#define IB_CMRC_NUM_SEND_WQES 4
+
+/** CMRC number of receive WQEs
+ *
+ * This is a policy decision.
+ */
+#define IB_CMRC_NUM_RECV_WQES 2
+
+/** CMRC number of completion queue entries
+ *
+ * This is a policy decision
+ */
+#define IB_CMRC_NUM_CQES 8
+
+/** An Infiniband Communication-Managed Reliable Connection */
+struct ib_cmrc_connection {
+ /** Reference count */
+ struct refcnt refcnt;
+ /** Data transfer interface */
+ struct xfer_interface xfer;
+ /** Infiniband device */
+ struct ib_device *ibdev;
+ /** Completion queue */
+ struct ib_completion_queue *cq;
+ /** Queue pair */
+ struct ib_queue_pair *qp;
+ /** Connection */
+ struct ib_connection *conn;
+ /** Destination GID */
+ struct ib_gid dgid;
+ /** Service ID */
+ struct ib_gid_half service_id;
+ /** QP is connected */
+ int connected;
+ /** Shutdown process */
+ struct process shutdown;
+};
+
+/**
+ * Shut down CMRC connection gracefully
+ *
+ * @v process Process
+ *
+ * The Infiniband data structures are not reference-counted or
+ * guarded. It is therefore unsafe to shut them down while we may be
+ * in the middle of a callback from the Infiniband stack (e.g. in a
+ * receive completion handler).
+ *
+ * This shutdown process will run some time after the call to
+ * ib_cmrc_close(), after control has returned out of the Infiniband
+ * core, and will shut down the Infiniband interfaces cleanly.
+ *
+ * The shutdown process holds an implicit reference on the CMRC
+ * connection, ensuring that the structure is not freed before the
+ * shutdown process has run.
+ */
+static void ib_cmrc_shutdown ( struct process *process ) {
+ struct ib_cmrc_connection *cmrc =
+ container_of ( process, struct ib_cmrc_connection, shutdown );
+
+ DBGC ( cmrc, "CMRC %p shutting down\n", cmrc );
+
+ /* Shut down Infiniband interface */
+ ib_destroy_conn ( cmrc->ibdev, cmrc->qp, cmrc->conn );
+ ib_destroy_qp ( cmrc->ibdev, cmrc->qp );
+ ib_destroy_cq ( cmrc->ibdev, cmrc->cq );
+ ib_close ( cmrc->ibdev );
+
+ /* Remove process from run queue */
+ process_del ( &cmrc->shutdown );
+
+ /* Drop the remaining reference */
+ ref_put ( &cmrc->refcnt );
+}
+
+/**
+ * Close CMRC connection
+ *
+ * @v cmrc Communication-Managed Reliable Connection
+ * @v rc Reason for close
+ */
+static void ib_cmrc_close ( struct ib_cmrc_connection *cmrc, int rc ) {
+
+ /* Close data transfer interface */
+ xfer_nullify ( &cmrc->xfer );
+ xfer_close ( &cmrc->xfer, rc );
+
+ /* Schedule shutdown process */
+ process_add ( &cmrc->shutdown );
+}
+
+/**
+ * Handle change of CMRC connection status
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v conn Connection
+ * @v rc_cm Connection status code
+ * @v private_data Private data, if available
+ * @v private_data_len Length of private data
+ */
+static void ib_cmrc_changed ( struct ib_device *ibdev __unused,
+ struct ib_queue_pair *qp,
+ struct ib_connection *conn __unused, int rc_cm,
+ void *private_data, size_t private_data_len ) {
+ struct ib_cmrc_connection *cmrc = ib_qp_get_ownerdata ( qp );
+ int rc_xfer;
+
+ /* Record connection status */
+ if ( rc_cm == 0 ) {
+ DBGC ( cmrc, "CMRC %p connected\n", cmrc );
+ cmrc->connected = 1;
+ } else {
+ DBGC ( cmrc, "CMRC %p disconnected: %s\n",
+ cmrc, strerror ( rc_cm ) );
+ cmrc->connected = 0;
+ }
+
+ /* Pass up any private data */
+ DBGC2 ( cmrc, "CMRC %p received private data:\n", cmrc );
+ DBGC2_HDA ( cmrc, 0, private_data, private_data_len );
+ if ( private_data &&
+ ( rc_xfer = xfer_deliver_raw ( &cmrc->xfer, private_data,
+ private_data_len ) ) != 0 ) {
+ DBGC ( cmrc, "CMRC %p could not deliver private data: %s\n",
+ cmrc, strerror ( rc_xfer ) );
+ ib_cmrc_close ( cmrc, rc_xfer );
+ return;
+ }
+
+ /* If we are disconnected, close the upper connection */
+ if ( rc_cm != 0 ) {
+ ib_cmrc_close ( cmrc, rc_cm );
+ return;
+ }
+}
+
+/** CMRC connection operations */
+static struct ib_connection_operations ib_cmrc_conn_op = {
+ .changed = ib_cmrc_changed,
+};
+
+/**
+ * Handle CMRC send completion
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v iobuf I/O buffer
+ * @v rc Completion status code
+ */
+static void ib_cmrc_complete_send ( struct ib_device *ibdev __unused,
+ struct ib_queue_pair *qp,
+ struct io_buffer *iobuf, int rc ) {
+ struct ib_cmrc_connection *cmrc = ib_qp_get_ownerdata ( qp );
+
+ /* Free the completed I/O buffer */
+ free_iob ( iobuf );
+
+ /* Close the connection on any send errors */
+ if ( rc != 0 ) {
+ DBGC ( cmrc, "CMRC %p send error: %s\n",
+ cmrc, strerror ( rc ) );
+ ib_cmrc_close ( cmrc, rc );
+ return;
+ }
+}
+
+/**
+ * Handle CMRC receive completion
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v av Address vector, or NULL
+ * @v iobuf I/O buffer
+ * @v rc Completion status code
+ */
+static void ib_cmrc_complete_recv ( struct ib_device *ibdev __unused,
+ struct ib_queue_pair *qp,
+ struct ib_address_vector *av __unused,
+ struct io_buffer *iobuf, int rc ) {
+ struct ib_cmrc_connection *cmrc = ib_qp_get_ownerdata ( qp );
+
+ /* Close the connection on any receive errors */
+ if ( rc != 0 ) {
+ DBGC ( cmrc, "CMRC %p receive error: %s\n",
+ cmrc, strerror ( rc ) );
+ free_iob ( iobuf );
+ ib_cmrc_close ( cmrc, rc );
+ return;
+ }
+
+ DBGC2 ( cmrc, "CMRC %p received:\n", cmrc );
+ DBGC2_HDA ( cmrc, 0, iobuf->data, iob_len ( iobuf ) );
+
+ /* Pass up data */
+ if ( ( rc = xfer_deliver_iob ( &cmrc->xfer, iobuf ) ) != 0 ) {
+ DBGC ( cmrc, "CMRC %p could not deliver data: %s\n",
+ cmrc, strerror ( rc ) );
+ ib_cmrc_close ( cmrc, rc );
+ return;
+ }
+}
+
+/** Infiniband CMRC completion operations */
+static struct ib_completion_queue_operations ib_cmrc_completion_ops = {
+ .complete_send = ib_cmrc_complete_send,
+ .complete_recv = ib_cmrc_complete_recv,
+};
+
+/**
+ * Send data via CMRC
+ *
+ * @v xfer Data transfer interface
+ * @v iobuf Datagram I/O buffer
+ * @v meta Data transfer metadata
+ * @ret rc Return status code
+ */
+static int ib_cmrc_xfer_deliver_iob ( struct xfer_interface *xfer,
+ struct io_buffer *iobuf,
+ struct xfer_metadata *meta __unused ) {
+ struct ib_cmrc_connection *cmrc =
+ container_of ( xfer, struct ib_cmrc_connection, xfer );
+ int rc;
+
+ /* If no connection has yet been attempted, send this datagram
+ * as the CM REQ private data. Otherwise, send it via the QP.
+ */
+ if ( ! cmrc->connected ) {
+
+ /* Abort if we have already sent a CM connection request */
+ if ( cmrc->conn ) {
+ DBGC ( cmrc, "CMRC %p attempt to send before "
+ "connection is complete\n", cmrc );
+ rc = -EIO;
+ goto out;
+ }
+
+ /* Send via CM connection request */
+ cmrc->conn = ib_create_conn ( cmrc->ibdev, cmrc->qp,
+ &cmrc->dgid, &cmrc->service_id,
+ iobuf->data, iob_len ( iobuf ),
+ &ib_cmrc_conn_op );
+ if ( ! cmrc->conn ) {
+ DBGC ( cmrc, "CMRC %p could not connect\n", cmrc );
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ } else {
+
+ /* Send via QP */
+ if ( ( rc = ib_post_send ( cmrc->ibdev, cmrc->qp, NULL,
+ iob_disown ( iobuf ) ) ) != 0 ) {
+ DBGC ( cmrc, "CMRC %p could not send: %s\n",
+ cmrc, strerror ( rc ) );
+ goto out;
+ }
+
+ }
+ return 0;
+
+ out:
+ /* Free the I/O buffer if necessary */
+ free_iob ( iobuf );
+
+ /* Close the connection on any errors */
+ if ( rc != 0 )
+ ib_cmrc_close ( cmrc, rc );
+
+ return rc;
+}
+
+/**
+ * Check CMRC flow control window
+ *
+ * @v xfer Data transfer interface
+ * @ret len Length of window
+ */
+static size_t ib_cmrc_xfer_window ( struct xfer_interface *xfer ) {
+ struct ib_cmrc_connection *cmrc =
+ container_of ( xfer, struct ib_cmrc_connection, xfer );
+
+ /* We indicate a window only when we are successfully
+ * connected.
+ */
+ return ( cmrc->connected ? IB_MAX_PAYLOAD_SIZE : 0 );
+}
+
+/**
+ * Close CMRC data-transfer interface
+ *
+ * @v xfer Data transfer interface
+ * @v rc Reason for close
+ */
+static void ib_cmrc_xfer_close ( struct xfer_interface *xfer, int rc ) {
+ struct ib_cmrc_connection *cmrc =
+ container_of ( xfer, struct ib_cmrc_connection, xfer );
+
+ DBGC ( cmrc, "CMRC %p closed: %s\n", cmrc, strerror ( rc ) );
+ ib_cmrc_close ( cmrc, rc );
+}
+
+/** CMRC data transfer interface operations */
+static struct xfer_interface_operations ib_cmrc_xfer_operations = {
+ .close = ib_cmrc_xfer_close,
+ .vredirect = ignore_xfer_vredirect,
+ .window = ib_cmrc_xfer_window,
+ .alloc_iob = default_xfer_alloc_iob,
+ .deliver_iob = ib_cmrc_xfer_deliver_iob,
+ .deliver_raw = xfer_deliver_as_iob,
+};
+
+/**
+ * Open CMRC connection
+ *
+ * @v xfer Data transfer interface
+ * @v ibdev Infiniband device
+ * @v dgid Destination GID
+ * @v service_id Service ID
+ * @ret rc Returns status code
+ */
+int ib_cmrc_open ( struct xfer_interface *xfer, struct ib_device *ibdev,
+ struct ib_gid *dgid, struct ib_gid_half *service_id ) {
+ struct ib_cmrc_connection *cmrc;
+ int rc;
+
+ /* Allocate and initialise structure */
+ cmrc = zalloc ( sizeof ( *cmrc ) );
+ if ( ! cmrc ) {
+ rc = -ENOMEM;
+ goto err_alloc;
+ }
+ xfer_init ( &cmrc->xfer, &ib_cmrc_xfer_operations, &cmrc->refcnt );
+ cmrc->ibdev = ibdev;
+ memcpy ( &cmrc->dgid, dgid, sizeof ( cmrc->dgid ) );
+ memcpy ( &cmrc->service_id, service_id, sizeof ( cmrc->service_id ) );
+ process_init_stopped ( &cmrc->shutdown, ib_cmrc_shutdown,
+ &cmrc->refcnt );
+
+ /* Open Infiniband device */
+ if ( ( rc = ib_open ( ibdev ) ) != 0 ) {
+ DBGC ( cmrc, "CMRC %p could not open device: %s\n",
+ cmrc, strerror ( rc ) );
+ goto err_open;
+ }
+
+ /* Create completion queue */
+ cmrc->cq = ib_create_cq ( ibdev, IB_CMRC_NUM_CQES,
+ &ib_cmrc_completion_ops );
+ if ( ! cmrc->cq ) {
+ DBGC ( cmrc, "CMRC %p could not create completion queue\n",
+ cmrc );
+ rc = -ENOMEM;
+ goto err_create_cq;
+ }
+
+ /* Create queue pair */
+ cmrc->qp = ib_create_qp ( ibdev, IB_QPT_RC, IB_CMRC_NUM_SEND_WQES,
+ cmrc->cq, IB_CMRC_NUM_RECV_WQES, cmrc->cq );
+ if ( ! cmrc->qp ) {
+ DBGC ( cmrc, "CMRC %p could not create queue pair\n", cmrc );
+ rc = -ENOMEM;
+ goto err_create_qp;
+ }
+ ib_qp_set_ownerdata ( cmrc->qp, cmrc );
+ DBGC ( cmrc, "CMRC %p using QPN %lx\n", cmrc, cmrc->qp->qpn );
+
+ /* Attach to parent interface, transfer reference (implicitly)
+ * to our shutdown process, and return.
+ */
+ xfer_plug_plug ( &cmrc->xfer, xfer );
+ return 0;
+
+ ib_destroy_qp ( ibdev, cmrc->qp );
+ err_create_qp:
+ ib_destroy_cq ( ibdev, cmrc->cq );
+ err_create_cq:
+ ib_close ( ibdev );
+ err_open:
+ ref_put ( &cmrc->refcnt );
+ err_alloc:
+ return rc;
+}
diff --git a/gpxe/src/net/infiniband/ib_mcast.c b/gpxe/src/net/infiniband/ib_mcast.c
new file mode 100644
index 00000000..5cb395de
--- /dev/null
+++ b/gpxe/src/net/infiniband/ib_mcast.c
@@ -0,0 +1,218 @@
+/*
+ * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <string.h>
+#include <byteswap.h>
+#include <errno.h>
+#include <gpxe/list.h>
+#include <gpxe/infiniband.h>
+#include <gpxe/ib_mi.h>
+#include <gpxe/ib_mcast.h>
+
+/** @file
+ *
+ * Infiniband multicast groups
+ *
+ */
+
+/**
+ * Generate multicast membership MAD
+ *
+ * @v ibdev Infiniband device
+ * @v gid Multicast GID
+ * @v join Join (rather than leave) group
+ * @v mad MAD to fill in
+ */
+static void ib_mcast_mad ( struct ib_device *ibdev, struct ib_gid *gid,
+ int join, union ib_mad *mad ) {
+ struct ib_mad_sa *sa = &mad->sa;
+
+ /* Construct multicast membership record request */
+ memset ( sa, 0, sizeof ( *sa ) );
+ sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
+ sa->mad_hdr.class_version = IB_SA_CLASS_VERSION;
+ sa->mad_hdr.method =
+ ( join ? IB_MGMT_METHOD_SET : IB_MGMT_METHOD_DELETE );
+ sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC );
+ sa->sa_hdr.comp_mask[1] =
+ htonl ( IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
+ IB_SA_MCMEMBER_REC_JOIN_STATE );
+ sa->sa_data.mc_member_record.scope__join_state = 1;
+ memcpy ( &sa->sa_data.mc_member_record.mgid, gid,
+ sizeof ( sa->sa_data.mc_member_record.mgid ) );
+ memcpy ( &sa->sa_data.mc_member_record.port_gid, &ibdev->gid,
+ sizeof ( sa->sa_data.mc_member_record.port_gid ) );
+}
+
+/**
+ * Handle multicast membership record join response
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v madx Management transaction
+ * @v rc Status code
+ * @v mad Received MAD (or NULL on error)
+ * @v av Source address vector (or NULL on error)
+ */
+static void ib_mcast_complete ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi __unused,
+ struct ib_mad_transaction *madx,
+ int rc, union ib_mad *mad,
+ struct ib_address_vector *av __unused ) {
+ struct ib_mc_membership *membership = ib_madx_get_ownerdata ( madx );
+ struct ib_queue_pair *qp = membership->qp;
+ struct ib_gid *gid = &membership->gid;
+ struct ib_mc_member_record *mc_member_record =
+ &mad->sa.sa_data.mc_member_record;
+ int joined;
+ unsigned long qkey;
+
+ /* Report failures */
+ if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ))
+ rc = -ENOTCONN;
+ if ( rc != 0 ) {
+ DBGC ( ibdev, "IBDEV %p QPN %lx join failed: %s\n",
+ ibdev, qp->qpn, strerror ( rc ) );
+ goto out;
+ }
+
+ /* Extract values from MAD */
+ joined = ( mad->hdr.method == IB_MGMT_METHOD_GET_RESP );
+ qkey = ntohl ( mc_member_record->qkey );
+ DBGC ( ibdev, "IBDEV %p QPN %lx %s %08x:%08x:%08x:%08x qkey %lx\n",
+ ibdev, qp->qpn, ( joined ? "joined" : "left" ),
+ ntohl ( gid->u.dwords[0] ), ntohl ( gid->u.dwords[1] ),
+ ntohl ( gid->u.dwords[2] ), ntohl ( gid->u.dwords[3] ),
+ qkey );
+
+ /* Set queue key */
+ qp->qkey = qkey;
+ if ( ( rc = ib_modify_qp ( ibdev, qp ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p QPN %lx could not modify qkey: %s\n",
+ ibdev, qp->qpn, strerror ( rc ) );
+ goto out;
+ }
+
+ out:
+ /* Destroy the completed transaction */
+ ib_destroy_madx ( ibdev, mi, madx );
+ membership->madx = NULL;
+
+ /* Hand off to upper completion handler */
+ membership->complete ( ibdev, qp, membership, rc, mad );
+}
+
+/** Multicast membership management transaction completion operations */
+static struct ib_mad_transaction_operations ib_mcast_op = {
+ .complete = ib_mcast_complete,
+};
+
+/**
+ * Join multicast group
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v membership Multicast group membership
+ * @v gid Multicast GID to join
+ * @v joined Join completion handler
+ * @ret rc Return status code
+ */
+int ib_mcast_join ( struct ib_device *ibdev, struct ib_queue_pair *qp,
+ struct ib_mc_membership *membership, struct ib_gid *gid,
+ void ( * complete ) ( struct ib_device *ibdev,
+ struct ib_queue_pair *qp,
+ struct ib_mc_membership *membership,
+ int rc, union ib_mad *mad ) ) {
+ union ib_mad mad;
+ int rc;
+
+ DBGC ( ibdev, "IBDEV %p QPN %lx joining %08x:%08x:%08x:%08x\n",
+ ibdev, qp->qpn, ntohl ( gid->u.dwords[0] ),
+ ntohl ( gid->u.dwords[1] ), ntohl ( gid->u.dwords[2] ),
+ ntohl ( gid->u.dwords[3] ) );
+
+ /* Initialise structure */
+ membership->qp = qp;
+ memcpy ( &membership->gid, gid, sizeof ( membership->gid ) );
+ membership->complete = complete;
+
+ /* Attach queue pair to multicast GID */
+ if ( ( rc = ib_mcast_attach ( ibdev, qp, gid ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p QPN %lx could not attach: %s\n",
+ ibdev, qp->qpn, strerror ( rc ) );
+ goto err_mcast_attach;
+ }
+
+ /* Initiate multicast membership join */
+ ib_mcast_mad ( ibdev, gid, 1, &mad );
+ membership->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, NULL,
+ &ib_mcast_op );
+ if ( ! membership->madx ) {
+ DBGC ( ibdev, "IBDEV %p QPN %lx could not create join "
+ "transaction\n", ibdev, qp->qpn );
+ rc = -ENOMEM;
+ goto err_create_madx;
+ }
+ ib_madx_set_ownerdata ( membership->madx, membership );
+
+ return 0;
+
+ ib_destroy_madx ( ibdev, ibdev->gsi, membership->madx );
+ err_create_madx:
+ ib_mcast_detach ( ibdev, qp, gid );
+ err_mcast_attach:
+ return rc;
+}
+
+/**
+ * Leave multicast group
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v membership Multicast group membership
+ */
+void ib_mcast_leave ( struct ib_device *ibdev, struct ib_queue_pair *qp,
+ struct ib_mc_membership *membership ) {
+ struct ib_gid *gid = &membership->gid;
+ union ib_mad mad;
+ int rc;
+
+ DBGC ( ibdev, "IBDEV %p QPN %lx leaving %08x:%08x:%08x:%08x\n",
+ ibdev, qp->qpn, ntohl ( gid->u.dwords[0] ),
+ ntohl ( gid->u.dwords[1] ), ntohl ( gid->u.dwords[2] ),
+ ntohl ( gid->u.dwords[3] ) );
+
+ /* Detach from multicast GID */
+ ib_mcast_detach ( ibdev, qp, &membership->gid );
+
+ /* Cancel multicast membership join, if applicable */
+ if ( membership->madx ) {
+ ib_destroy_madx ( ibdev, ibdev->gsi, membership->madx );
+ membership->madx = NULL;
+ }
+
+ /* Send a single group leave MAD */
+ ib_mcast_mad ( ibdev, &membership->gid, 0, &mad );
+ if ( ( rc = ib_mi_send ( ibdev, ibdev->gsi, &mad, NULL ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p QPN %lx could not send leave request: "
+ "%s\n", ibdev, qp->qpn, strerror ( rc ) );
+ }
+}
diff --git a/gpxe/src/net/infiniband/ib_mi.c b/gpxe/src/net/infiniband/ib_mi.c
new file mode 100644
index 00000000..7511fd87
--- /dev/null
+++ b/gpxe/src/net/infiniband/ib_mi.c
@@ -0,0 +1,406 @@
+/*
+ * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <byteswap.h>
+#include <gpxe/infiniband.h>
+#include <gpxe/iobuf.h>
+#include <gpxe/ib_mi.h>
+
+/**
+ * @file
+ *
+ * Infiniband management interfaces
+ *
+ */
+
+/** Management interface number of send WQEs
+ *
+ * This is a policy decision.
+ */
+#define IB_MI_NUM_SEND_WQES 4
+
+/** Management interface number of receive WQEs
+ *
+ * This is a policy decision.
+ */
+#define IB_MI_NUM_RECV_WQES 2
+
+/** Management interface number of completion queue entries
+ *
+ * This is a policy decision
+ */
+#define IB_MI_NUM_CQES 8
+
+/** TID magic signature */
+#define IB_MI_TID_MAGIC ( ( 'g' << 24 ) | ( 'P' << 16 ) | ( 'X' << 8 ) | 'E' )
+
+/** TID to use for next MAD */
+static unsigned int next_tid;
+
+/**
+ * Handle received MAD
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v mad Received MAD
+ * @v av Source address vector
+ * @ret rc Return status code
+ */
+static int ib_mi_handle ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ union ib_mad *mad,
+ struct ib_address_vector *av ) {
+ struct ib_mad_hdr *hdr = &mad->hdr;
+ struct ib_mad_transaction *madx;
+ struct ib_mad_agent *agent;
+
+ /* Look for a matching transaction by TID */
+ list_for_each_entry ( madx, &mi->madx, list ) {
+ if ( memcmp ( &hdr->tid, &madx->mad.hdr.tid,
+ sizeof ( hdr->tid ) ) != 0 )
+ continue;
+ /* Found a matching transaction */
+ madx->op->complete ( ibdev, mi, madx, 0, mad, av );
+ return 0;
+ }
+
+ /* If there is no matching transaction, look for a listening agent */
+ for_each_table_entry ( agent, IB_MAD_AGENTS ) {
+ if ( ( ( agent->mgmt_class & IB_MGMT_CLASS_MASK ) !=
+ ( hdr->mgmt_class & IB_MGMT_CLASS_MASK ) ) ||
+ ( agent->class_version != hdr->class_version ) ||
+ ( agent->attr_id != hdr->attr_id ) )
+ continue;
+ /* Found a matching agent */
+ agent->handle ( ibdev, mi, mad, av );
+ return 0;
+ }
+
+ /* Otherwise, ignore it */
+ DBGC ( mi, "MI %p RX TID %08x%08x ignored\n",
+ mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ) );
+ return -ENOTSUP;
+}
+
+/**
+ * Complete receive via management interface
+ *
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v av Address vector
+ * @v iobuf I/O buffer
+ * @v rc Completion status code
+ */
+static void ib_mi_complete_recv ( struct ib_device *ibdev,
+ struct ib_queue_pair *qp,
+ struct ib_address_vector *av,
+ struct io_buffer *iobuf, int rc ) {
+ struct ib_mad_interface *mi = ib_qp_get_ownerdata ( qp );
+ union ib_mad *mad;
+ struct ib_mad_hdr *hdr;
+
+ /* Ignore errors */
+ if ( rc != 0 ) {
+ DBGC ( mi, "MI %p RX error: %s\n", mi, strerror ( rc ) );
+ goto out;
+ }
+
+ /* Sanity checks */
+ if ( iob_len ( iobuf ) != sizeof ( *mad ) ) {
+ DBGC ( mi, "MI %p RX bad size (%zd bytes)\n",
+ mi, iob_len ( iobuf ) );
+ DBGC_HDA ( mi, 0, iobuf->data, iob_len ( iobuf ) );
+ goto out;
+ }
+ mad = iobuf->data;
+ hdr = &mad->hdr;
+ if ( hdr->base_version != IB_MGMT_BASE_VERSION ) {
+ DBGC ( mi, "MI %p RX unsupported base version %x\n",
+ mi, hdr->base_version );
+ DBGC_HDA ( mi, 0, mad, sizeof ( *mad ) );
+ goto out;
+ }
+ DBGC ( mi, "MI %p RX TID %08x%08x (%02x,%02x,%02x,%04x) status "
+ "%04x\n", mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ),
+ hdr->mgmt_class, hdr->class_version, hdr->method,
+ ntohs ( hdr->attr_id ), ntohs ( hdr->status ) );
+ DBGC2_HDA ( mi, 0, mad, sizeof ( *mad ) );
+
+ /* Handle MAD */
+ if ( ( rc = ib_mi_handle ( ibdev, mi, mad, av ) ) != 0 )
+ goto out;
+
+ out:
+ free_iob ( iobuf );
+}
+
+/** Management interface completion operations */
+static struct ib_completion_queue_operations ib_mi_completion_ops = {
+ .complete_recv = ib_mi_complete_recv,
+};
+
+/**
+ * Transmit MAD
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v mad MAD
+ * @v av Destination address vector
+ * @ret rc Return status code
+ */
+int ib_mi_send ( struct ib_device *ibdev, struct ib_mad_interface *mi,
+ union ib_mad *mad, struct ib_address_vector *av ) {
+ struct ib_mad_hdr *hdr = &mad->hdr;
+ struct io_buffer *iobuf;
+ int rc;
+
+ /* Set common fields */
+ hdr->base_version = IB_MGMT_BASE_VERSION;
+ if ( ( hdr->tid[0] == 0 ) && ( hdr->tid[1] == 0 ) ) {
+ hdr->tid[0] = htonl ( IB_MI_TID_MAGIC );
+ hdr->tid[1] = htonl ( ++next_tid );
+ }
+ DBGC ( mi, "MI %p TX TID %08x%08x (%02x,%02x,%02x,%04x) status "
+ "%04x\n", mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ),
+ hdr->mgmt_class, hdr->class_version, hdr->method,
+ ntohs ( hdr->attr_id ), ntohs ( hdr->status ) );
+ DBGC2_HDA ( mi, 0, mad, sizeof ( *mad ) );
+
+ /* Construct directed route portion of response, if necessary */
+ if ( hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ) {
+ struct ib_mad_smp *smp = &mad->smp;
+ unsigned int hop_pointer;
+ unsigned int hop_count;
+
+ smp->mad_hdr.status |= htons ( IB_SMP_STATUS_D_INBOUND );
+ hop_pointer = smp->mad_hdr.class_specific.smp.hop_pointer;
+ hop_count = smp->mad_hdr.class_specific.smp.hop_count;
+ assert ( hop_count == hop_pointer );
+ if ( hop_pointer < ( sizeof ( smp->return_path.hops ) /
+ sizeof ( smp->return_path.hops[0] ) ) ) {
+ smp->return_path.hops[hop_pointer] = ibdev->port;
+ } else {
+ DBGC ( mi, "MI %p TX TID %08x%08x invalid hop pointer "
+ "%d\n", mi, ntohl ( hdr->tid[0] ),
+ ntohl ( hdr->tid[1] ), hop_pointer );
+ return -EINVAL;
+ }
+ }
+
+ /* Construct I/O buffer */
+ iobuf = alloc_iob ( sizeof ( *mad ) );
+ if ( ! iobuf ) {
+ DBGC ( mi, "MI %p could not allocate buffer for TID "
+ "%08x%08x\n",
+ mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ) );
+ return -ENOMEM;
+ }
+ memcpy ( iob_put ( iobuf, sizeof ( *mad ) ), mad, sizeof ( *mad ) );
+
+ /* Send I/O buffer */
+ if ( ( rc = ib_post_send ( ibdev, mi->qp, av, iobuf ) ) != 0 ) {
+ DBGC ( mi, "MI %p TX TID %08x%08x failed: %s\n",
+ mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ),
+ strerror ( rc ) );
+ free_iob ( iobuf );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Handle management transaction timer expiry
+ *
+ * @v timer Retry timer
+ * @v expired Failure indicator
+ */
+static void ib_mi_timer_expired ( struct retry_timer *timer, int expired ) {
+ struct ib_mad_transaction *madx =
+ container_of ( timer, struct ib_mad_transaction, timer );
+ struct ib_mad_interface *mi = madx->mi;
+ struct ib_device *ibdev = mi->ibdev;
+ struct ib_mad_hdr *hdr = &madx->mad.hdr;
+
+ /* Abandon transaction if we have tried too many times */
+ if ( expired ) {
+ DBGC ( mi, "MI %p abandoning TID %08x%08x\n",
+ mi, ntohl ( hdr->tid[0] ), ntohl ( hdr->tid[1] ) );
+ madx->op->complete ( ibdev, mi, madx, -ETIMEDOUT, NULL, NULL );
+ return;
+ }
+
+ /* Restart retransmission timer */
+ start_timer ( timer );
+
+ /* Resend MAD */
+ ib_mi_send ( ibdev, mi, &madx->mad, &madx->av );
+}
+
+/**
+ * Create management transaction
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v mad MAD to send
+ * @v av Destination address, or NULL to use SM's GSI
+ * @v op Management transaction operations
+ * @ret madx Management transaction, or NULL
+ */
+struct ib_mad_transaction *
+ib_create_madx ( struct ib_device *ibdev, struct ib_mad_interface *mi,
+ union ib_mad *mad, struct ib_address_vector *av,
+ struct ib_mad_transaction_operations *op ) {
+ struct ib_mad_transaction *madx;
+
+ /* Allocate and initialise structure */
+ madx = zalloc ( sizeof ( *madx ) );
+ if ( ! madx )
+ return NULL;
+ madx->mi = mi;
+ madx->timer.expired = ib_mi_timer_expired;
+ madx->op = op;
+
+ /* Determine address vector */
+ if ( av ) {
+ memcpy ( &madx->av, av, sizeof ( madx->av ) );
+ } else {
+ madx->av.lid = ibdev->sm_lid;
+ madx->av.sl = ibdev->sm_sl;
+ madx->av.qpn = IB_QPN_GSI;
+ madx->av.qkey = IB_QKEY_GSI;
+ }
+
+ /* Copy MAD */
+ memcpy ( &madx->mad, mad, sizeof ( madx->mad ) );
+
+ /* Add to list and start timer to send initial MAD */
+ list_add ( &madx->list, &mi->madx );
+ start_timer_nodelay ( &madx->timer );
+
+ return madx;
+}
+
+/**
+ * Destroy management transaction
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v madx Management transaction
+ */
+void ib_destroy_madx ( struct ib_device *ibdev __unused,
+ struct ib_mad_interface *mi __unused,
+ struct ib_mad_transaction *madx ) {
+
+ /* Stop timer and remove from list */
+ stop_timer ( &madx->timer );
+ list_del ( &madx->list );
+
+ /* Free transaction */
+ free ( madx );
+}
+
+/**
+ * Create management interface
+ *
+ * @v ibdev Infiniband device
+ * @v type Queue pair type
+ * @ret mi Management agent, or NULL
+ */
+struct ib_mad_interface * ib_create_mi ( struct ib_device *ibdev,
+ enum ib_queue_pair_type type ) {
+ struct ib_mad_interface *mi;
+ int rc;
+
+ /* Allocate and initialise fields */
+ mi = zalloc ( sizeof ( *mi ) );
+ if ( ! mi )
+ goto err_alloc;
+ mi->ibdev = ibdev;
+ INIT_LIST_HEAD ( &mi->madx );
+
+ /* Create completion queue */
+ mi->cq = ib_create_cq ( ibdev, IB_MI_NUM_CQES, &ib_mi_completion_ops );
+ if ( ! mi->cq ) {
+ DBGC ( mi, "MI %p could not allocate completion queue\n", mi );
+ goto err_create_cq;
+ }
+
+ /* Create queue pair */
+ mi->qp = ib_create_qp ( ibdev, type, IB_MI_NUM_SEND_WQES, mi->cq,
+ IB_MI_NUM_RECV_WQES, mi->cq );
+ if ( ! mi->qp ) {
+ DBGC ( mi, "MI %p could not allocate queue pair\n", mi );
+ goto err_create_qp;
+ }
+ ib_qp_set_ownerdata ( mi->qp, mi );
+ DBGC ( mi, "MI %p (%s) running on QPN %#lx\n",
+ mi, ( ( type == IB_QPT_SMI ) ? "SMI" : "GSI" ), mi->qp->qpn );
+
+ /* Set queue key */
+ mi->qp->qkey = ( ( type == IB_QPT_SMI ) ? IB_QKEY_SMI : IB_QKEY_GSI );
+ if ( ( rc = ib_modify_qp ( ibdev, mi->qp ) ) != 0 ) {
+ DBGC ( mi, "MI %p could not set queue key: %s\n",
+ mi, strerror ( rc ) );
+ goto err_modify_qp;
+ }
+
+ /* Fill receive ring */
+ ib_refill_recv ( ibdev, mi->qp );
+ return mi;
+
+ err_modify_qp:
+ ib_destroy_qp ( ibdev, mi->qp );
+ err_create_qp:
+ ib_destroy_cq ( ibdev, mi->cq );
+ err_create_cq:
+ free ( mi );
+ err_alloc:
+ return NULL;
+}
+
+/**
+ * Destroy management interface
+ *
+ * @v mi Management interface
+ */
+void ib_destroy_mi ( struct ib_device *ibdev, struct ib_mad_interface *mi ) {
+ struct ib_mad_transaction *madx;
+ struct ib_mad_transaction *tmp;
+
+ /* Flush any outstanding requests */
+ list_for_each_entry_safe ( madx, tmp, &mi->madx, list ) {
+ DBGC ( mi, "MI %p destroyed while TID %08x%08x in progress\n",
+ mi, ntohl ( madx->mad.hdr.tid[0] ),
+ ntohl ( madx->mad.hdr.tid[1] ) );
+ madx->op->complete ( ibdev, mi, madx, -ECANCELED, NULL, NULL );
+ }
+
+ ib_destroy_qp ( ibdev, mi->qp );
+ ib_destroy_cq ( ibdev, mi->cq );
+ free ( mi );
+}
diff --git a/gpxe/src/net/infiniband/ib_packet.c b/gpxe/src/net/infiniband/ib_packet.c
new file mode 100644
index 00000000..08820ef3
--- /dev/null
+++ b/gpxe/src/net/infiniband/ib_packet.c
@@ -0,0 +1,244 @@
+/*
+ * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <gpxe/iobuf.h>
+#include <gpxe/infiniband.h>
+#include <gpxe/ib_packet.h>
+
+/**
+ * @file
+ *
+ * Infiniband Packet Formats
+ *
+ */
+
+/**
+ * Add IB headers
+ *
+ * @v ibdev Infiniband device
+ * @v iobuf I/O buffer to contain headers
+ * @v qp Queue pair
+ * @v payload_len Payload length
+ * @v av Address vector
+ */
+int ib_push ( struct ib_device *ibdev, struct io_buffer *iobuf,
+ struct ib_queue_pair *qp, size_t payload_len,
+ const struct ib_address_vector *av ) {
+ struct ib_local_route_header *lrh;
+ struct ib_global_route_header *grh;
+ struct ib_base_transport_header *bth;
+ struct ib_datagram_extended_transport_header *deth;
+ size_t orig_iob_len = iob_len ( iobuf );
+ size_t pad_len;
+ size_t lrh_len;
+ size_t grh_len;
+ unsigned int vl;
+ unsigned int lnh;
+
+ DBGC2 ( ibdev, "IBDEV %p TX %04x:%08lx => %04x:%08lx (key %08lx)\n",
+ ibdev, ibdev->lid, qp->ext_qpn, av->lid, av->qpn, av->qkey );
+
+ /* Calculate packet length */
+ pad_len = ( (-payload_len) & 0x3 );
+ payload_len += pad_len;
+ payload_len += 4; /* ICRC */
+
+ /* Reserve space for headers */
+ orig_iob_len = iob_len ( iobuf );
+ deth = iob_push ( iobuf, sizeof ( *deth ) );
+ bth = iob_push ( iobuf, sizeof ( *bth ) );
+ grh_len = ( payload_len + iob_len ( iobuf ) - orig_iob_len );
+ grh = ( av->gid_present ?
+ iob_push ( iobuf, sizeof ( *grh ) ) : NULL );
+ lrh = iob_push ( iobuf, sizeof ( *lrh ) );
+ lrh_len = ( payload_len + iob_len ( iobuf ) - orig_iob_len );
+
+ /* Construct LRH */
+ vl = ( ( qp->ext_qpn == IB_QPN_SMI ) ? IB_VL_SMP : IB_VL_DEFAULT );
+ lrh->vl__lver = ( vl << 4 );
+ lnh = ( grh ? IB_LNH_GRH : IB_LNH_BTH );
+ lrh->sl__lnh = ( ( av->sl << 4 ) | lnh );
+ lrh->dlid = htons ( av->lid );
+ lrh->length = htons ( lrh_len >> 2 );
+ lrh->slid = htons ( ibdev->lid );
+
+ /* Construct GRH, if required */
+ if ( grh ) {
+ grh->ipver__tclass__flowlabel =
+ htonl ( IB_GRH_IPVER_IPv6 << 28 );
+ grh->paylen = htons ( grh_len );
+ grh->nxthdr = IB_GRH_NXTHDR_IBA;
+ grh->hoplmt = 0;
+ memcpy ( &grh->sgid, &ibdev->gid, sizeof ( grh->sgid ) );
+ memcpy ( &grh->dgid, &av->gid, sizeof ( grh->dgid ) );
+ }
+
+ /* Construct BTH */
+ bth->opcode = BTH_OPCODE_UD_SEND;
+ bth->se__m__padcnt__tver = ( pad_len << 4 );
+ bth->pkey = htons ( ibdev->pkey );
+ bth->dest_qp = htonl ( av->qpn );
+ bth->ack__psn = htonl ( ( qp->send.psn++ ) & 0xffffffUL );
+
+ /* Construct DETH */
+ deth->qkey = htonl ( av->qkey );
+ deth->src_qp = htonl ( qp->ext_qpn );
+
+ DBGCP_HDA ( ibdev, 0, iobuf->data,
+ ( iob_len ( iobuf ) - orig_iob_len ) );
+
+ return 0;
+}
+
+/**
+ * Remove IB headers
+ *
+ * @v ibdev Infiniband device
+ * @v iobuf I/O buffer containing headers
+ * @v qp Queue pair to fill in, or NULL
+ * @v payload_len Payload length to fill in, or NULL
+ * @v av Address vector to fill in
+ */
+int ib_pull ( struct ib_device *ibdev, struct io_buffer *iobuf,
+ struct ib_queue_pair **qp, size_t *payload_len,
+ struct ib_address_vector *av ) {
+ struct ib_local_route_header *lrh;
+ struct ib_global_route_header *grh;
+ struct ib_base_transport_header *bth;
+ struct ib_datagram_extended_transport_header *deth;
+ size_t orig_iob_len = iob_len ( iobuf );
+ unsigned int lnh;
+ size_t pad_len;
+ unsigned long qpn;
+ unsigned int lid;
+
+ /* Clear return values */
+ if ( qp )
+ *qp = NULL;
+ if ( payload_len )
+ *payload_len = 0;
+ memset ( av, 0, sizeof ( *av ) );
+
+ /* Extract LRH */
+ if ( iob_len ( iobuf ) < sizeof ( *lrh ) ) {
+ DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) for LRH\n",
+ ibdev, iob_len ( iobuf ) );
+ return -EINVAL;
+ }
+ lrh = iobuf->data;
+ iob_pull ( iobuf, sizeof ( *lrh ) );
+ av->lid = ntohs ( lrh->slid );
+ av->sl = ( lrh->sl__lnh >> 4 );
+ lnh = ( lrh->sl__lnh & 0x3 );
+ lid = ntohs ( lrh->dlid );
+
+ /* Reject unsupported packets */
+ if ( ! ( ( lnh == IB_LNH_BTH ) || ( lnh == IB_LNH_GRH ) ) ) {
+ DBGC ( ibdev, "IBDEV %p RX unsupported LNH %x\n",
+ ibdev, lnh );
+ return -ENOTSUP;
+ }
+
+ /* Extract GRH, if present */
+ if ( lnh == IB_LNH_GRH ) {
+ if ( iob_len ( iobuf ) < sizeof ( *grh ) ) {
+ DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) "
+ "for GRH\n", ibdev, iob_len ( iobuf ) );
+ return -EINVAL;
+ }
+ grh = iobuf->data;
+ iob_pull ( iobuf, sizeof ( *grh ) );
+ av->gid_present = 1;
+ memcpy ( &av->gid, &grh->sgid, sizeof ( av->gid ) );
+ } else {
+ grh = NULL;
+ }
+
+ /* Extract BTH */
+ if ( iob_len ( iobuf ) < sizeof ( *bth ) ) {
+ DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) for BTH\n",
+ ibdev, iob_len ( iobuf ) );
+ return -EINVAL;
+ }
+ bth = iobuf->data;
+ iob_pull ( iobuf, sizeof ( *bth ) );
+ if ( bth->opcode != BTH_OPCODE_UD_SEND ) {
+ DBGC ( ibdev, "IBDEV %p unsupported BTH opcode %x\n",
+ ibdev, bth->opcode );
+ return -ENOTSUP;
+ }
+ qpn = ntohl ( bth->dest_qp );
+
+ /* Extract DETH */
+ if ( iob_len ( iobuf ) < sizeof ( *deth ) ) {
+ DBGC ( ibdev, "IBDEV %p RX too short (%zd bytes) for DETH\n",
+ ibdev, iob_len ( iobuf ) );
+ return -EINVAL;
+ }
+ deth = iobuf->data;
+ iob_pull ( iobuf, sizeof ( *deth ) );
+ av->qpn = ntohl ( deth->src_qp );
+ av->qkey = ntohl ( deth->qkey );
+
+ /* Calculate payload length, if applicable */
+ if ( payload_len ) {
+ pad_len = ( ( bth->se__m__padcnt__tver >> 4 ) & 0x3 );
+ *payload_len = ( ( ntohs ( lrh->length ) << 2 )
+ - ( orig_iob_len - iob_len ( iobuf ) )
+ - pad_len - 4 /* ICRC */ );
+ }
+
+ /* Determine destination QP, if applicable */
+ if ( qp ) {
+ if ( IB_LID_MULTICAST ( lid ) && grh ) {
+ if ( ! ( *qp = ib_find_qp_mgid ( ibdev, &grh->dgid ))){
+ DBGC ( ibdev, "IBDEV %p RX for unknown MGID "
+ "%08x:%08x:%08x:%08x\n", ibdev,
+ ntohl ( grh->dgid.u.dwords[0] ),
+ ntohl ( grh->dgid.u.dwords[1] ),
+ ntohl ( grh->dgid.u.dwords[2] ),
+ ntohl ( grh->dgid.u.dwords[3] ) );
+ return -ENODEV;
+ }
+ } else {
+ if ( ! ( *qp = ib_find_qp_qpn ( ibdev, qpn ) ) ) {
+ DBGC ( ibdev, "IBDEV %p RX for nonexistent "
+ "QPN %lx\n", ibdev, qpn );
+ return -ENODEV;
+ }
+ }
+ assert ( *qp );
+ }
+
+ DBGC2 ( ibdev, "IBDEV %p RX %04x:%08lx <= %04x:%08lx (key %08x)\n",
+ ibdev, lid, ( IB_LID_MULTICAST( lid ) ?
+ ( qp ? (*qp)->ext_qpn : -1UL ) : qpn ),
+ av->lid, av->qpn, ntohl ( deth->qkey ) );
+ DBGCP_HDA ( ibdev, 0,
+ ( iobuf->data - ( orig_iob_len - iob_len ( iobuf ) ) ),
+ ( orig_iob_len - iob_len ( iobuf ) ) );
+
+ return 0;
+}
diff --git a/gpxe/src/net/infiniband/ib_pathrec.c b/gpxe/src/net/infiniband/ib_pathrec.c
new file mode 100644
index 00000000..136e628d
--- /dev/null
+++ b/gpxe/src/net/infiniband/ib_pathrec.c
@@ -0,0 +1,296 @@
+/*
+ * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <byteswap.h>
+#include <errno.h>
+#include <gpxe/infiniband.h>
+#include <gpxe/ib_mi.h>
+#include <gpxe/ib_pathrec.h>
+
+/** @file
+ *
+ * Infiniband path lookups
+ *
+ */
+
+/**
+ * Handle path transaction completion
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v madx Management transaction
+ * @v rc Status code
+ * @v mad Received MAD (or NULL on error)
+ * @v av Source address vector (or NULL on error)
+ */
+static void ib_path_complete ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ struct ib_mad_transaction *madx,
+ int rc, union ib_mad *mad,
+ struct ib_address_vector *av __unused ) {
+ struct ib_path *path = ib_madx_get_ownerdata ( madx );
+ struct ib_gid *dgid = &path->av.gid;
+ struct ib_path_record *pathrec = &mad->sa.sa_data.path_record;
+
+ /* Report failures */
+ if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ))
+ rc = -ENETUNREACH;
+ if ( rc != 0 ) {
+ DBGC ( ibdev, "IBDEV %p path lookup for %08x:%08x:%08x:%08x "
+ "failed: %s\n", ibdev, htonl ( dgid->u.dwords[0] ),
+ htonl ( dgid->u.dwords[1] ),
+ htonl ( dgid->u.dwords[2] ),
+ htonl ( dgid->u.dwords[3] ), strerror ( rc ) );
+ goto out;
+ }
+
+ /* Extract values from MAD */
+ path->av.lid = ntohs ( pathrec->dlid );
+ path->av.sl = ( pathrec->reserved__sl & 0x0f );
+ path->av.rate = ( pathrec->rate_selector__rate & 0x3f );
+ DBGC ( ibdev, "IBDEV %p path to %08x:%08x:%08x:%08x is %04x sl %d "
+ "rate %d\n", ibdev, htonl ( dgid->u.dwords[0] ),
+ htonl ( dgid->u.dwords[1] ), htonl ( dgid->u.dwords[2] ),
+ htonl ( dgid->u.dwords[3] ), path->av.lid, path->av.sl,
+ path->av.rate );
+
+ out:
+ /* Destroy the completed transaction */
+ ib_destroy_madx ( ibdev, mi, madx );
+ path->madx = NULL;
+
+ /* Hand off to upper completion handler */
+ path->op->complete ( ibdev, path, rc, &path->av );
+}
+
+/** Path transaction completion operations */
+static struct ib_mad_transaction_operations ib_path_op = {
+ .complete = ib_path_complete,
+};
+
+/**
+ * Create path
+ *
+ * @v ibdev Infiniband device
+ * @v av Address vector to complete
+ * @v op Path operations
+ * @ret path Path
+ */
+struct ib_path *
+ib_create_path ( struct ib_device *ibdev, struct ib_address_vector *av,
+ struct ib_path_operations *op ) {
+ struct ib_path *path;
+ union ib_mad mad;
+ struct ib_mad_sa *sa = &mad.sa;
+
+ /* Allocate and initialise structure */
+ path = zalloc ( sizeof ( *path ) );
+ if ( ! path )
+ goto err_alloc_path;
+ path->ibdev = ibdev;
+ memcpy ( &path->av, av, sizeof ( path->av ) );
+ path->op = op;
+
+ /* Construct path request */
+ memset ( sa, 0, sizeof ( *sa ) );
+ sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
+ sa->mad_hdr.class_version = IB_SA_CLASS_VERSION;
+ sa->mad_hdr.method = IB_MGMT_METHOD_GET;
+ sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC );
+ sa->sa_hdr.comp_mask[1] =
+ htonl ( IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID );
+ memcpy ( &sa->sa_data.path_record.dgid, &path->av.gid,
+ sizeof ( sa->sa_data.path_record.dgid ) );
+ memcpy ( &sa->sa_data.path_record.sgid, &ibdev->gid,
+ sizeof ( sa->sa_data.path_record.sgid ) );
+
+ /* Create management transaction */
+ path->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, NULL,
+ &ib_path_op );
+ if ( ! path->madx )
+ goto err_create_madx;
+ ib_madx_set_ownerdata ( path->madx, path );
+
+ return path;
+
+ ib_destroy_madx ( ibdev, ibdev->gsi, path->madx );
+ err_create_madx:
+ free ( path );
+ err_alloc_path:
+ return NULL;
+}
+
+/**
+ * Destroy path
+ *
+ * @v ibdev Infiniband device
+ * @v path Path
+ */
+void ib_destroy_path ( struct ib_device *ibdev, struct ib_path *path ) {
+
+ if ( path->madx )
+ ib_destroy_madx ( ibdev, ibdev->gsi, path->madx );
+ free ( path );
+}
+
+/** Number of path cache entries
+ *
+ * Must be a power of two.
+ */
+#define IB_NUM_CACHED_PATHS 4
+
+/** A cached path */
+struct ib_cached_path {
+ /** Path */
+ struct ib_path *path;
+};
+
+/** Path cache */
+static struct ib_cached_path ib_path_cache[IB_NUM_CACHED_PATHS];
+
+/** Oldest path cache entry index */
+static unsigned int ib_path_cache_idx;
+
+/**
+ * Find path cache entry
+ *
+ * @v ibdev Infiniband device
+ * @v dgid Destination GID
+ * @ret path Path cache entry, or NULL
+ */
+static struct ib_cached_path *
+ib_find_path_cache_entry ( struct ib_device *ibdev, struct ib_gid *dgid ) {
+ struct ib_cached_path *cached;
+ unsigned int i;
+
+ for ( i = 0 ; i < IB_NUM_CACHED_PATHS ; i++ ) {
+ cached = &ib_path_cache[i];
+ if ( ! cached->path )
+ continue;
+ if ( cached->path->ibdev != ibdev )
+ continue;
+ if ( memcmp ( &cached->path->av.gid, dgid,
+ sizeof ( cached->path->av.gid ) ) != 0 )
+ continue;
+ return cached;
+ }
+
+ return NULL;
+}
+
+/**
+ * Handle cached path transaction completion
+ *
+ * @v ibdev Infiniband device
+ * @v path Path
+ * @v rc Status code
+ * @v av Address vector, or NULL on error
+ */
+static void ib_cached_path_complete ( struct ib_device *ibdev,
+ struct ib_path *path, int rc,
+ struct ib_address_vector *av __unused ) {
+ struct ib_cached_path *cached = ib_path_get_ownerdata ( path );
+
+ /* If the transaction failed, erase the cache entry */
+ if ( rc != 0 ) {
+ /* Destroy the old cache entry */
+ ib_destroy_path ( ibdev, path );
+ memset ( cached, 0, sizeof ( *cached ) );
+ return;
+ }
+
+ /* Do not destroy the completed transaction; we still need to
+ * refer to the resolved path.
+ */
+}
+
+/** Cached path transaction completion operations */
+static struct ib_path_operations ib_cached_path_op = {
+ .complete = ib_cached_path_complete,
+};
+
+/**
+ * Resolve path
+ *
+ * @v ibdev Infiniband device
+ * @v av Address vector to complete
+ * @ret rc Return status code
+ *
+ * This provides a non-transactional way to resolve a path, via a
+ * cache similar to ARP.
+ */
+int ib_resolve_path ( struct ib_device *ibdev, struct ib_address_vector *av ) {
+ struct ib_gid *gid = &av->gid;
+ struct ib_cached_path *cached;
+ unsigned int cache_idx;
+
+ /* Sanity check */
+ if ( ! av->gid_present ) {
+ DBGC ( ibdev, "IBDEV %p attempt to look up path "
+ "without GID\n", ibdev );
+ return -EINVAL;
+ }
+
+ /* Look in cache for a matching entry */
+ cached = ib_find_path_cache_entry ( ibdev, gid );
+ if ( cached && cached->path->av.lid ) {
+ /* Populated entry found */
+ av->lid = cached->path->av.lid;
+ av->rate = cached->path->av.rate;
+ av->sl = cached->path->av.sl;
+ DBGC2 ( ibdev, "IBDEV %p cache hit for %08x:%08x:%08x:%08x\n",
+ ibdev, htonl ( gid->u.dwords[0] ),
+ htonl ( gid->u.dwords[1] ), htonl ( gid->u.dwords[2] ),
+ htonl ( gid->u.dwords[3] ) );
+ return 0;
+ }
+ DBGC ( ibdev, "IBDEV %p cache miss for %08x:%08x:%08x:%08x%s\n",
+ ibdev, htonl ( gid->u.dwords[0] ), htonl ( gid->u.dwords[1] ),
+ htonl ( gid->u.dwords[2] ), htonl ( gid->u.dwords[3] ),
+ ( cached ? " (in progress)" : "" ) );
+
+ /* If lookup is already in progress, do nothing */
+ if ( cached )
+ return -ENOENT;
+
+ /* Locate a new cache entry to use */
+ cache_idx = ( (ib_path_cache_idx++) % IB_NUM_CACHED_PATHS );
+ cached = &ib_path_cache[cache_idx];
+
+ /* Destroy the old cache entry */
+ if ( cached->path )
+ ib_destroy_path ( ibdev, cached->path );
+ memset ( cached, 0, sizeof ( *cached ) );
+
+ /* Create new path */
+ cached->path = ib_create_path ( ibdev, av, &ib_cached_path_op );
+ if ( ! cached->path ) {
+ DBGC ( ibdev, "IBDEV %p could not create path\n",
+ ibdev );
+ return -ENOMEM;
+ }
+ ib_path_set_ownerdata ( cached->path, cached );
+
+ /* Not found yet */
+ return -ENOENT;
+}
diff --git a/gpxe/src/net/infiniband/ib_sma.c b/gpxe/src/net/infiniband/ib_sma.c
new file mode 100644
index 00000000..ff4cbbf6
--- /dev/null
+++ b/gpxe/src/net/infiniband/ib_sma.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (C) 2009 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <byteswap.h>
+#include <gpxe/settings.h>
+#include <gpxe/infiniband.h>
+#include <gpxe/iobuf.h>
+#include <gpxe/ib_mi.h>
+#include <gpxe/ib_sma.h>
+
+/**
+ * @file
+ *
+ * Infiniband Subnet Management Agent
+ *
+ */
+
+/**
+ * Node information
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v mad Received MAD
+ * @v av Source address vector
+ */
+static void ib_sma_node_info ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ union ib_mad *mad,
+ struct ib_address_vector *av ) {
+ struct ib_node_info *node_info = &mad->smp.smp_data.node_info;
+ int rc;
+
+ /* Fill in information */
+ memset ( node_info, 0, sizeof ( *node_info ) );
+ node_info->base_version = IB_MGMT_BASE_VERSION;
+ node_info->class_version = IB_SMP_CLASS_VERSION;
+ node_info->node_type = IB_NODE_TYPE_HCA;
+ node_info->num_ports = ib_get_hca_info ( ibdev, &node_info->sys_guid );
+ memcpy ( &node_info->node_guid, &node_info->sys_guid,
+ sizeof ( node_info->node_guid ) );
+ memcpy ( &node_info->port_guid, &ibdev->gid.u.half[1],
+ sizeof ( node_info->port_guid ) );
+ node_info->partition_cap = htons ( 1 );
+ node_info->local_port_num = ibdev->port;
+
+ /* Send GetResponse */
+ mad->hdr.method = IB_MGMT_METHOD_GET_RESP;
+ if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) {
+ DBGC ( mi, "SMA %p could not send NodeInfo GetResponse: %s\n",
+ mi, strerror ( rc ) );
+ return;
+ }
+}
+
+/**
+ * Node description
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v mad Received MAD
+ * @v av Source address vector
+ */
+static void ib_sma_node_desc ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ union ib_mad *mad,
+ struct ib_address_vector *av ) {
+ struct ib_node_desc *node_desc = &mad->smp.smp_data.node_desc;
+ struct ib_gid_half guid;
+ char hostname[ sizeof ( node_desc->node_string ) ];
+ int hostname_len;
+ int rc;
+
+ /* Fill in information */
+ memset ( node_desc, 0, sizeof ( *node_desc ) );
+ ib_get_hca_info ( ibdev, &guid );
+ hostname_len = fetch_string_setting ( NULL, &hostname_setting,
+ hostname, sizeof ( hostname ) );
+ snprintf ( node_desc->node_string, sizeof ( node_desc->node_string ),
+ "gPXE %s%s%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x (%s)",
+ hostname, ( ( hostname_len >= 0 ) ? " " : "" ),
+ guid.u.bytes[0], guid.u.bytes[1], guid.u.bytes[2],
+ guid.u.bytes[3], guid.u.bytes[4], guid.u.bytes[5],
+ guid.u.bytes[6], guid.u.bytes[7], ibdev->dev->name );
+
+ /* Send GetResponse */
+ mad->hdr.method = IB_MGMT_METHOD_GET_RESP;
+ if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) {
+ DBGC ( mi, "SMA %p could not send NodeDesc GetResponse: %s\n",
+ mi, strerror ( rc ) );
+ return;
+ }
+}
+
+/**
+ * GUID information
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v mad Received MAD
+ * @v av Source address vector
+ */
+static void ib_sma_guid_info ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ union ib_mad *mad,
+ struct ib_address_vector *av ) {
+ struct ib_guid_info *guid_info = &mad->smp.smp_data.guid_info;
+ int rc;
+
+ /* Fill in information */
+ memset ( guid_info, 0, sizeof ( *guid_info ) );
+ memcpy ( guid_info->guid[0], &ibdev->gid.u.half[1],
+ sizeof ( guid_info->guid[0] ) );
+
+ /* Send GetResponse */
+ mad->hdr.method = IB_MGMT_METHOD_GET_RESP;
+ if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) {
+ DBGC ( mi, "SMA %p could not send GuidInfo GetResponse: %s\n",
+ mi, strerror ( rc ) );
+ return;
+ }
+}
+
+/**
+ * Set port information
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v mad Received MAD
+ * @ret rc Return status code
+ */
+static int ib_sma_set_port_info ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ union ib_mad *mad ) {
+ const struct ib_port_info *port_info = &mad->smp.smp_data.port_info;
+ unsigned int link_width_enabled;
+ unsigned int link_speed_enabled;
+ int rc;
+
+ /* Set parameters */
+ memcpy ( &ibdev->gid.u.half[0], port_info->gid_prefix,
+ sizeof ( ibdev->gid.u.half[0] ) );
+ ibdev->lid = ntohs ( port_info->lid );
+ ibdev->sm_lid = ntohs ( port_info->mastersm_lid );
+ if ( ( link_width_enabled = port_info->link_width_enabled ) )
+ ibdev->link_width_enabled = link_width_enabled;
+ if ( ( link_speed_enabled =
+ ( port_info->link_speed_active__link_speed_enabled & 0xf ) ) )
+ ibdev->link_speed_enabled = link_speed_enabled;
+ ibdev->sm_sl = ( port_info->neighbour_mtu__mastersm_sl & 0xf );
+ DBGC ( mi, "SMA %p set LID %04x SMLID %04x link width %02x speed "
+ "%02x\n", mi, ibdev->lid, ibdev->sm_lid,
+ ibdev->link_width_enabled, ibdev->link_speed_enabled );
+
+ /* Update parameters on device */
+ if ( ( rc = ib_set_port_info ( ibdev, mad ) ) != 0 ) {
+ DBGC ( mi, "SMA %p could not set port information: %s\n",
+ mi, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Port information
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v mad Received MAD
+ * @v av Source address vector
+ */
+static void ib_sma_port_info ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ union ib_mad *mad,
+ struct ib_address_vector *av ) {
+ struct ib_port_info *port_info = &mad->smp.smp_data.port_info;
+ int rc;
+
+ /* Set parameters if applicable */
+ if ( mad->hdr.method == IB_MGMT_METHOD_SET ) {
+ if ( ( rc = ib_sma_set_port_info ( ibdev, mi, mad ) ) != 0 ) {
+ mad->hdr.status =
+ htons ( IB_MGMT_STATUS_UNSUPPORTED_METHOD_ATTR );
+ /* Fall through to generate GetResponse */
+ }
+ }
+
+ /* Fill in information */
+ memset ( port_info, 0, sizeof ( *port_info ) );
+ memcpy ( port_info->gid_prefix, &ibdev->gid.u.half[0],
+ sizeof ( port_info->gid_prefix ) );
+ port_info->lid = ntohs ( ibdev->lid );
+ port_info->mastersm_lid = ntohs ( ibdev->sm_lid );
+ port_info->local_port_num = ibdev->port;
+ port_info->link_width_enabled = ibdev->link_width_enabled;
+ port_info->link_width_supported = ibdev->link_width_supported;
+ port_info->link_width_active = ibdev->link_width_active;
+ port_info->link_speed_supported__port_state =
+ ( ( ibdev->link_speed_supported << 4 ) | ibdev->port_state );
+ port_info->port_phys_state__link_down_def_state =
+ ( ( IB_PORT_PHYS_STATE_POLLING << 4 ) |
+ IB_PORT_PHYS_STATE_POLLING );
+ port_info->link_speed_active__link_speed_enabled =
+ ( ( ibdev->link_speed_active << 4 ) |
+ ibdev->link_speed_enabled );
+ port_info->neighbour_mtu__mastersm_sl =
+ ( ( IB_MTU_2048 << 4 ) | ibdev->sm_sl );
+ port_info->vl_cap__init_type = ( IB_VL_0 << 4 );
+ port_info->init_type_reply__mtu_cap = IB_MTU_2048;
+ port_info->operational_vls__enforcement = ( IB_VL_0 << 4 );
+ port_info->guid_cap = 1;
+
+ /* Send GetResponse */
+ mad->hdr.method = IB_MGMT_METHOD_GET_RESP;
+ if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) {
+ DBGC ( mi, "SMA %p could not send PortInfo GetResponse: %s\n",
+ mi, strerror ( rc ) );
+ return;
+ }
+}
+
+/**
+ * Set partition key table
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v mad Received MAD
+ * @ret rc Return status code
+ */
+static int ib_sma_set_pkey_table ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ union ib_mad *mad ) {
+ struct ib_pkey_table *pkey_table = &mad->smp.smp_data.pkey_table;
+ int rc;
+
+ /* Set parameters */
+ ibdev->pkey = ntohs ( pkey_table->pkey[0] );
+ DBGC ( mi, "SMA %p set pkey %04x\n", mi, ibdev->pkey );
+
+ /* Update parameters on device */
+ if ( ( rc = ib_set_pkey_table ( ibdev, mad ) ) != 0 ) {
+ DBGC ( mi, "SMA %p could not set pkey table: %s\n",
+ mi, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * Partition key table
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @v mad Received MAD
+ * @v av Source address vector
+ */
+static void ib_sma_pkey_table ( struct ib_device *ibdev,
+ struct ib_mad_interface *mi,
+ union ib_mad *mad,
+ struct ib_address_vector *av ) {
+ struct ib_pkey_table *pkey_table = &mad->smp.smp_data.pkey_table;
+ int rc;
+
+ /* Set parameters, if applicable */
+ if ( mad->hdr.method == IB_MGMT_METHOD_SET ) {
+ if ( ( rc = ib_sma_set_pkey_table ( ibdev, mi, mad ) ) != 0 ) {
+ mad->hdr.status =
+ htons ( IB_MGMT_STATUS_UNSUPPORTED_METHOD_ATTR );
+ /* Fall through to generate GetResponse */
+ }
+ }
+
+ /* Fill in information */
+ mad->hdr.method = IB_MGMT_METHOD_GET_RESP;
+ memset ( pkey_table, 0, sizeof ( *pkey_table ) );
+ pkey_table->pkey[0] = htons ( ibdev->pkey );
+
+ /* Send GetResponse */
+ mad->hdr.method = IB_MGMT_METHOD_GET_RESP;
+ if ( ( rc = ib_mi_send ( ibdev, mi, mad, av ) ) != 0 ) {
+ DBGC ( mi, "SMA %p could not send PKeyTable GetResponse: %s\n",
+ mi, strerror ( rc ) );
+ return;
+ }
+}
+
+/** Subnet management agent */
+struct ib_mad_agent ib_sma_agent[] __ib_mad_agent = {
+ {
+ .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED,
+ .class_version = IB_SMP_CLASS_VERSION,
+ .attr_id = htons ( IB_SMP_ATTR_NODE_INFO ),
+ .handle = ib_sma_node_info,
+ },
+ {
+ .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED,
+ .class_version = IB_SMP_CLASS_VERSION,
+ .attr_id = htons ( IB_SMP_ATTR_NODE_DESC ),
+ .handle = ib_sma_node_desc,
+ },
+ {
+ .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED,
+ .class_version = IB_SMP_CLASS_VERSION,
+ .attr_id = htons ( IB_SMP_ATTR_GUID_INFO ),
+ .handle = ib_sma_guid_info,
+ },
+ {
+ .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED,
+ .class_version = IB_SMP_CLASS_VERSION,
+ .attr_id = htons ( IB_SMP_ATTR_PORT_INFO ),
+ .handle = ib_sma_port_info,
+ },
+ {
+ .mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED,
+ .class_version = IB_SMP_CLASS_VERSION,
+ .attr_id = htons ( IB_SMP_ATTR_PKEY_TABLE ),
+ .handle = ib_sma_pkey_table,
+ },
+};
+
+/**
+ * Create subnet management agent and interface
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ * @ret rc Return status code
+ */
+int ib_create_sma ( struct ib_device *ibdev, struct ib_mad_interface *mi ) {
+
+ /* Nothing to do */
+ DBGC ( ibdev, "IBDEV %p SMA using SMI %p\n", ibdev, mi );
+
+ return 0;
+}
+
+/**
+ * Destroy subnet management agent and interface
+ *
+ * @v ibdev Infiniband device
+ * @v mi Management interface
+ */
+void ib_destroy_sma ( struct ib_device *ibdev __unused,
+ struct ib_mad_interface *mi __unused ) {
+ /* Nothing to do */
+}
diff --git a/gpxe/src/net/infiniband/ib_smc.c b/gpxe/src/net/infiniband/ib_smc.c
new file mode 100644
index 00000000..d308dd9d
--- /dev/null
+++ b/gpxe/src/net/infiniband/ib_smc.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright (C) 2008 Michael Brown <mbrown@fensystems.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER );
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <byteswap.h>
+#include <gpxe/infiniband.h>
+#include <gpxe/ib_smc.h>
+
+/**
+ * @file
+ *
+ * Infiniband Subnet Management Client
+ *
+ */
+
+/**
+ * Get port information
+ *
+ * @v ibdev Infiniband device
+ * @v local_mad Method for issuing local MADs
+ * @v mad Management datagram to fill in
+ * @ret rc Return status code
+ */
+static int ib_smc_get_port_info ( struct ib_device *ibdev,
+ ib_local_mad_t local_mad,
+ union ib_mad *mad ) {
+ int rc;
+
+ /* Construct MAD */
+ memset ( mad, 0, sizeof ( *mad ) );
+ mad->hdr.base_version = IB_MGMT_BASE_VERSION;
+ mad->hdr.mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ mad->hdr.class_version = 1;
+ mad->hdr.method = IB_MGMT_METHOD_GET;
+ mad->hdr.attr_id = htons ( IB_SMP_ATTR_PORT_INFO );
+ mad->hdr.attr_mod = htonl ( ibdev->port );
+
+ if ( ( rc = local_mad ( ibdev, mad ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not get port info: %s\n",
+ ibdev, strerror ( rc ) );
+ return rc;
+ }
+ return 0;
+}
+
+/**
+ * Get GUID information
+ *
+ * @v ibdev Infiniband device
+ * @v local_mad Method for issuing local MADs
+ * @v mad Management datagram to fill in
+ * @ret rc Return status code
+ */
+static int ib_smc_get_guid_info ( struct ib_device *ibdev,
+ ib_local_mad_t local_mad,
+ union ib_mad *mad ) {
+ int rc;
+
+ /* Construct MAD */
+ memset ( mad, 0, sizeof ( *mad ) );
+ mad->hdr.base_version = IB_MGMT_BASE_VERSION;
+ mad->hdr.mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ mad->hdr.class_version = 1;
+ mad->hdr.method = IB_MGMT_METHOD_GET;
+ mad->hdr.attr_id = htons ( IB_SMP_ATTR_GUID_INFO );
+
+ if ( ( rc = local_mad ( ibdev, mad ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not get GUID info: %s\n",
+ ibdev, strerror ( rc ) );
+ return rc;
+ }
+ return 0;
+}
+
+/**
+ * Get partition key table
+ *
+ * @v ibdev Infiniband device
+ * @v local_mad Method for issuing local MADs
+ * @v mad Management datagram to fill in
+ * @ret rc Return status code
+ */
+static int ib_smc_get_pkey_table ( struct ib_device *ibdev,
+ ib_local_mad_t local_mad,
+ union ib_mad *mad ) {
+ int rc;
+
+ /* Construct MAD */
+ memset ( mad, 0, sizeof ( *mad ) );
+ mad->hdr.base_version = IB_MGMT_BASE_VERSION;
+ mad->hdr.mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ mad->hdr.class_version = 1;
+ mad->hdr.method = IB_MGMT_METHOD_GET;
+ mad->hdr.attr_id = htons ( IB_SMP_ATTR_PKEY_TABLE );
+
+ if ( ( rc = local_mad ( ibdev, mad ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not get pkey table: %s\n",
+ ibdev, strerror ( rc ) );
+ return rc;
+ }
+ return 0;
+}
+
+/**
+ * Get MAD parameters
+ *
+ * @v ibdev Infiniband device
+ * @v local_mad Method for issuing local MADs
+ * @ret rc Return status code
+ */
+int ib_smc_update ( struct ib_device *ibdev, ib_local_mad_t local_mad ) {
+ union ib_mad mad;
+ struct ib_port_info *port_info = &mad.smp.smp_data.port_info;
+ struct ib_guid_info *guid_info = &mad.smp.smp_data.guid_info;
+ struct ib_pkey_table *pkey_table = &mad.smp.smp_data.pkey_table;
+ int rc;
+
+ /* Port info gives us the link state, the first half of the
+ * port GID and the SM LID.
+ */
+ if ( ( rc = ib_smc_get_port_info ( ibdev, local_mad, &mad ) ) != 0 )
+ return rc;
+ memcpy ( &ibdev->gid.u.half[0], port_info->gid_prefix,
+ sizeof ( ibdev->gid.u.half[0] ) );
+ ibdev->lid = ntohs ( port_info->lid );
+ ibdev->sm_lid = ntohs ( port_info->mastersm_lid );
+ ibdev->link_width_enabled = port_info->link_width_enabled;
+ ibdev->link_width_supported = port_info->link_width_supported;
+ ibdev->link_width_active = port_info->link_width_active;
+ ibdev->link_speed_supported =
+ ( port_info->link_speed_supported__port_state >> 4 );
+ ibdev->port_state =
+ ( port_info->link_speed_supported__port_state & 0xf );
+ ibdev->link_speed_active =
+ ( port_info->link_speed_active__link_speed_enabled >> 4 );
+ ibdev->link_speed_enabled =
+ ( port_info->link_speed_active__link_speed_enabled & 0xf );
+ ibdev->sm_sl = ( port_info->neighbour_mtu__mastersm_sl & 0xf );
+
+ /* GUID info gives us the second half of the port GID */
+ if ( ( rc = ib_smc_get_guid_info ( ibdev, local_mad, &mad ) ) != 0 )
+ return rc;
+ memcpy ( &ibdev->gid.u.half[1], guid_info->guid[0],
+ sizeof ( ibdev->gid.u.half[1] ) );
+
+ /* Get partition key */
+ if ( ( rc = ib_smc_get_pkey_table ( ibdev, local_mad, &mad ) ) != 0 )
+ return rc;
+ ibdev->pkey = ntohs ( pkey_table->pkey[0] );
+
+ DBGC ( ibdev, "IBDEV %p port GID is %08x:%08x:%08x:%08x\n", ibdev,
+ htonl ( ibdev->gid.u.dwords[0] ),
+ htonl ( ibdev->gid.u.dwords[1] ),
+ htonl ( ibdev->gid.u.dwords[2] ),
+ htonl ( ibdev->gid.u.dwords[3] ) );
+
+ return 0;
+}
diff --git a/gpxe/src/net/infiniband/ib_srp.c b/gpxe/src/net/infiniband/ib_srp.c
new file mode 100644
index 00000000..c156d3ae
--- /dev/null
+++ b/gpxe/src/net/infiniband/ib_srp.c
@@ -0,0 +1,406 @@
+/*
+ * Copyright (C) 2009 Fen Systems Ltd <mbrown@fensystems.co.uk>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+FILE_LICENCE ( BSD2 );
+
+#include <stdlib.h>
+#include <errno.h>
+#include <gpxe/srp.h>
+#include <gpxe/infiniband.h>
+#include <gpxe/ib_cmrc.h>
+#include <gpxe/ib_srp.h>
+
+/**
+ * @file
+ *
+ * SCSI RDMA Protocol over Infiniband
+ *
+ */
+
+/* Disambiguate the various possible EINVALs */
+#define EINVAL_BYTE_STRING_LEN ( EINVAL | EUNIQ_01 )
+#define EINVAL_BYTE_STRING ( EINVAL | EUNIQ_02 )
+#define EINVAL_INTEGER ( EINVAL | EUNIQ_03 )
+#define EINVAL_RP_TOO_SHORT ( EINVAL | EUNIQ_04 )
+
+/** IB SRP parse flags */
+enum ib_srp_parse_flags {
+ IB_SRP_PARSE_REQUIRED = 0x0000,
+ IB_SRP_PARSE_OPTIONAL = 0x8000,
+ IB_SRP_PARSE_FLAG_MASK = 0xf000,
+};
+
+/** IB SRP root path parameters */
+struct ib_srp_root_path {
+ /** SCSI LUN */
+ struct scsi_lun *lun;
+ /** SRP port IDs */
+ struct srp_port_ids *port_ids;
+ /** IB SRP parameters */
+ struct ib_srp_parameters *ib;
+};
+
+/**
+ * Parse IB SRP root path byte-string value
+ *
+ * @v rp_comp Root path component string
+ * @v default_value Default value to use if component string is empty
+ * @ret value Value
+ */
+static int ib_srp_parse_byte_string ( const char *rp_comp, uint8_t *bytes,
+ unsigned int size_flags ) {
+ size_t size = ( size_flags & ~IB_SRP_PARSE_FLAG_MASK );
+ size_t rp_comp_len = strlen ( rp_comp );
+ char buf[3];
+ char *buf_end;
+
+ /* Allow optional components to be empty */
+ if ( ( rp_comp_len == 0 ) &&
+ ( size_flags & IB_SRP_PARSE_OPTIONAL ) )
+ return 0;
+
+ /* Check string length */
+ if ( rp_comp_len != ( 2 * size ) )
+ return -EINVAL_BYTE_STRING_LEN;
+
+ /* Parse byte string */
+ for ( ; size ; size--, rp_comp += 2, bytes++ ) {
+ memcpy ( buf, rp_comp, 2 );
+ buf[2] = '\0';
+ *bytes = strtoul ( buf, &buf_end, 16 );
+ if ( buf_end != &buf[2] )
+ return -EINVAL_BYTE_STRING;
+ }
+ return 0;
+}
+
+/**
+ * Parse IB SRP root path integer value
+ *
+ * @v rp_comp Root path component string
+ * @v default_value Default value to use if component string is empty
+ * @ret value Value
+ */
+static int ib_srp_parse_integer ( const char *rp_comp, int default_value ) {
+ int value;
+ char *end;
+
+ value = strtoul ( rp_comp, &end, 16 );
+ if ( *end )
+ return -EINVAL_INTEGER;
+
+ if ( end == rp_comp )
+ return default_value;
+
+ return value;
+}
+
+/**
+ * Parse IB SRP root path literal component
+ *
+ * @v rp_comp Root path component string
+ * @v rp IB SRP root path
+ * @ret rc Return status code
+ */
+static int ib_srp_parse_literal ( const char *rp_comp __unused,
+ struct ib_srp_root_path *rp __unused ) {
+ /* Ignore */
+ return 0;
+}
+
+/**
+ * Parse IB SRP root path source GID
+ *
+ * @v rp_comp Root path component string
+ * @v rp IB SRP root path
+ * @ret rc Return status code
+ */
+static int ib_srp_parse_sgid ( const char *rp_comp,
+ struct ib_srp_root_path *rp ) {
+ struct ib_device *ibdev;
+
+ /* Default to the GID of the last opened Infiniband device */
+ if ( ( ibdev = last_opened_ibdev() ) != NULL )
+ memcpy ( &rp->ib->sgid, &ibdev->gid, sizeof ( rp->ib->sgid ) );
+
+ return ib_srp_parse_byte_string ( rp_comp, rp->ib->sgid.u.bytes,
+ ( sizeof ( rp->ib->sgid ) |
+ IB_SRP_PARSE_OPTIONAL ) );
+}
+
+/**
+ * Parse IB SRP root path initiator identifier extension
+ *
+ * @v rp_comp Root path component string
+ * @v rp IB SRP root path
+ * @ret rc Return status code
+ */
+static int ib_srp_parse_initiator_id_ext ( const char *rp_comp,
+ struct ib_srp_root_path *rp ) {
+ struct ib_srp_initiator_port_id *port_id =
+ ib_srp_initiator_port_id ( rp->port_ids );
+
+ return ib_srp_parse_byte_string ( rp_comp, port_id->id_ext.u.bytes,
+ ( sizeof ( port_id->id_ext ) |
+ IB_SRP_PARSE_OPTIONAL ) );
+}
+
+/**
+ * Parse IB SRP root path initiator HCA GUID
+ *
+ * @v rp_comp Root path component string
+ * @v rp IB SRP root path
+ * @ret rc Return status code
+ */
+static int ib_srp_parse_initiator_hca_guid ( const char *rp_comp,
+ struct ib_srp_root_path *rp ) {
+ struct ib_srp_initiator_port_id *port_id =
+ ib_srp_initiator_port_id ( rp->port_ids );
+
+ /* Default to the GUID portion of the source GID */
+ memcpy ( &port_id->hca_guid, &rp->ib->sgid.u.half[1],
+ sizeof ( port_id->hca_guid ) );
+
+ return ib_srp_parse_byte_string ( rp_comp, port_id->hca_guid.u.bytes,
+ ( sizeof ( port_id->hca_guid ) |
+ IB_SRP_PARSE_OPTIONAL ) );
+}
+
+/**
+ * Parse IB SRP root path destination GID
+ *
+ * @v rp_comp Root path component string
+ * @v rp IB SRP root path
+ * @ret rc Return status code
+ */
+static int ib_srp_parse_dgid ( const char *rp_comp,
+ struct ib_srp_root_path *rp ) {
+ return ib_srp_parse_byte_string ( rp_comp, rp->ib->dgid.u.bytes,
+ ( sizeof ( rp->ib->dgid ) |
+ IB_SRP_PARSE_REQUIRED ) );
+}
+
+/**
+ * Parse IB SRP root path partition key
+ *
+ * @v rp_comp Root path component string
+ * @v rp IB SRP root path
+ * @ret rc Return status code
+ */
+static int ib_srp_parse_pkey ( const char *rp_comp,
+ struct ib_srp_root_path *rp ) {
+ int pkey;
+
+ if ( ( pkey = ib_srp_parse_integer ( rp_comp, IB_PKEY_DEFAULT ) ) < 0 )
+ return pkey;
+ rp->ib->pkey = pkey;
+ return 0;
+}
+
+/**
+ * Parse IB SRP root path service ID
+ *
+ * @v rp_comp Root path component string
+ * @v rp IB SRP root path
+ * @ret rc Return status code
+ */
+static int ib_srp_parse_service_id ( const char *rp_comp,
+ struct ib_srp_root_path *rp ) {
+ return ib_srp_parse_byte_string ( rp_comp, rp->ib->service_id.u.bytes,
+ ( sizeof ( rp->ib->service_id ) |
+ IB_SRP_PARSE_REQUIRED ) );
+}
+
+/**
+ * Parse IB SRP root path LUN
+ *
+ * @v rp_comp Root path component string
+ * @v rp IB SRP root path
+ * @ret rc Return status code
+ */
+static int ib_srp_parse_lun ( const char *rp_comp,
+ struct ib_srp_root_path *rp ) {
+ return scsi_parse_lun ( rp_comp, rp->lun );
+}
+
+/**
+ * Parse IB SRP root path target identifier extension
+ *
+ * @v rp_comp Root path component string
+ * @v rp IB SRP root path
+ * @ret rc Return status code
+ */
+static int ib_srp_parse_target_id_ext ( const char *rp_comp,
+ struct ib_srp_root_path *rp ) {
+ struct ib_srp_target_port_id *port_id =
+ ib_srp_target_port_id ( rp->port_ids );
+
+ return ib_srp_parse_byte_string ( rp_comp, port_id->id_ext.u.bytes,
+ ( sizeof ( port_id->id_ext ) |
+ IB_SRP_PARSE_REQUIRED ) );
+}
+
+/**
+ * Parse IB SRP root path target I/O controller GUID
+ *
+ * @v rp_comp Root path component string
+ * @v rp IB SRP root path
+ * @ret rc Return status code
+ */
+static int ib_srp_parse_target_ioc_guid ( const char *rp_comp,
+ struct ib_srp_root_path *rp ) {
+ struct ib_srp_target_port_id *port_id =
+ ib_srp_target_port_id ( rp->port_ids );
+
+ return ib_srp_parse_byte_string ( rp_comp, port_id->ioc_guid.u.bytes,
+ ( sizeof ( port_id->ioc_guid ) |
+ IB_SRP_PARSE_REQUIRED ) );
+}
+
+/** IB SRP root path component parser */
+struct ib_srp_root_path_parser {
+ /**
+ * Parse IB SRP root path component
+ *
+ * @v rp_comp Root path component string
+ * @v rp IB SRP root path
+ * @ret rc Return status code
+ */
+ int ( * parse ) ( const char *rp_comp, struct ib_srp_root_path *rp );
+};
+
+/** IB SRP root path components */
+static struct ib_srp_root_path_parser ib_srp_rp_parser[] = {
+ { ib_srp_parse_literal },
+ { ib_srp_parse_sgid },
+ { ib_srp_parse_initiator_id_ext },
+ { ib_srp_parse_initiator_hca_guid },
+ { ib_srp_parse_dgid },
+ { ib_srp_parse_pkey },
+ { ib_srp_parse_service_id },
+ { ib_srp_parse_lun },
+ { ib_srp_parse_target_id_ext },
+ { ib_srp_parse_target_ioc_guid },
+};
+
+/** Number of IB SRP root path components */
+#define IB_SRP_NUM_RP_COMPONENTS \
+ ( sizeof ( ib_srp_rp_parser ) / sizeof ( ib_srp_rp_parser[0] ) )
+
+/**
+ * Parse IB SRP root path
+ *
+ * @v srp SRP device
+ * @v rp_string Root path
+ * @ret rc Return status code
+ */
+static int ib_srp_parse_root_path ( struct srp_device *srp,
+ const char *rp_string ) {
+ struct ib_srp_parameters *ib_params = ib_srp_params ( srp );
+ struct ib_srp_root_path rp = {
+ .lun = &srp->lun,
+ .port_ids = &srp->port_ids,
+ .ib = ib_params,
+ };
+ char rp_string_copy[ strlen ( rp_string ) + 1 ];
+ char *rp_comp[IB_SRP_NUM_RP_COMPONENTS];
+ char *rp_string_tmp = rp_string_copy;
+ unsigned int i = 0;
+ int rc;
+
+ /* Split root path into component parts */
+ strcpy ( rp_string_copy, rp_string );
+ while ( 1 ) {
+ rp_comp[i++] = rp_string_tmp;
+ if ( i == IB_SRP_NUM_RP_COMPONENTS )
+ break;
+ for ( ; *rp_string_tmp != ':' ; rp_string_tmp++ ) {
+ if ( ! *rp_string_tmp ) {
+ DBGC ( srp, "SRP %p root path \"%s\" too "
+ "short\n", srp, rp_string );
+ return -EINVAL_RP_TOO_SHORT;
+ }
+ }
+ *(rp_string_tmp++) = '\0';
+ }
+
+ /* Parse root path components */
+ for ( i = 0 ; i < IB_SRP_NUM_RP_COMPONENTS ; i++ ) {
+ if ( ( rc = ib_srp_rp_parser[i].parse ( rp_comp[i],
+ &rp ) ) != 0 ) {
+ DBGC ( srp, "SRP %p could not parse \"%s\" in root "
+ "path \"%s\": %s\n", srp, rp_comp[i],
+ rp_string, strerror ( rc ) );
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * Connect IB SRP session
+ *
+ * @v srp SRP device
+ * @ret rc Return status code
+ */
+static int ib_srp_connect ( struct srp_device *srp ) {
+ struct ib_srp_parameters *ib_params = ib_srp_params ( srp );
+ struct ib_device *ibdev;
+ int rc;
+
+ /* Identify Infiniband device */
+ ibdev = find_ibdev ( &ib_params->sgid );
+ if ( ! ibdev ) {
+ DBGC ( srp, "SRP %p could not identify Infiniband device\n",
+ srp );
+ return -ENODEV;
+ }
+
+ /* Configure remaining SRP parameters */
+ srp->memory_handle = ibdev->rdma_key;
+
+ /* Open CMRC socket */
+ if ( ( rc = ib_cmrc_open ( &srp->socket, ibdev, &ib_params->dgid,
+ &ib_params->service_id ) ) != 0 ) {
+ DBGC ( srp, "SRP %p could not open CMRC socket: %s\n",
+ srp, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/** IB SRP transport type */
+struct srp_transport_type ib_srp_transport = {
+ .priv_len = sizeof ( struct ib_srp_parameters ),
+ .parse_root_path = ib_srp_parse_root_path,
+ .connect = ib_srp_connect,
+};