aboutsummaryrefslogtreecommitdiffstats
path: root/gpxe/src/net
diff options
context:
space:
mode:
Diffstat (limited to 'gpxe/src/net')
-rw-r--r--gpxe/src/net/infiniband.c146
-rw-r--r--gpxe/src/net/ipv4.c2
-rw-r--r--gpxe/src/net/tcp.c67
-rw-r--r--gpxe/src/net/tcp/iscsi.c46
4 files changed, 186 insertions, 75 deletions
diff --git a/gpxe/src/net/infiniband.c b/gpxe/src/net/infiniband.c
index 39d11285..ab76742e 100644
--- a/gpxe/src/net/infiniband.c
+++ b/gpxe/src/net/infiniband.c
@@ -29,6 +29,7 @@
#include <gpxe/netdevice.h>
#include <gpxe/iobuf.h>
#include <gpxe/ipoib.h>
+#include <gpxe/process.h>
#include <gpxe/infiniband.h>
/** @file
@@ -37,6 +38,9 @@
*
*/
+/** List of Infiniband devices */
+struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
+
/**
* Create completion queue
*
@@ -153,14 +157,40 @@ struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
}
/**
+ * Modify queue pair
+ *
+ * @v ibdev Infiniband device
+ * @v qp Queue pair
+ * @v mod_list Modification list
+ * @v qkey New queue key, if applicable
+ * @ret rc Return status code
+ */
+int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp,
+ unsigned long mod_list, unsigned long qkey ) {
+ int rc;
+
+ DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn );
+
+ if ( mod_list & IB_MODIFY_QKEY )
+ qp->qkey = qkey;
+
+ if ( ( rc = ibdev->op->modify_qp ( ibdev, qp, mod_list ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n",
+ ibdev, qp->qpn, strerror ( rc ) );
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
* Destroy queue pair
*
* @v ibdev Infiniband device
* @v qp Queue pair
*/
-void ib_destroy_qp ( struct ib_device *ibdev,
- struct ib_queue_pair *qp ) {
- DBGC ( ibdev, "IBDEV %p destroying queue pair %#lx\n",
+void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
+ DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n",
ibdev, qp->qpn );
ibdev->op->destroy_qp ( ibdev, qp );
list_del ( &qp->send.list );
@@ -280,38 +310,6 @@ static int ib_get_pkey_table ( struct ib_device *ibdev,
}
/**
- * Wait for link up
- *
- * @v ibdev Infiniband device
- * @ret rc Return status code
- *
- * This function shouldn't really exist. Unfortunately, IB links take
- * a long time to come up, and we can't get various key parameters
- * e.g. our own IPoIB MAC address without information from the subnet
- * manager). We should eventually make link-up an asynchronous event.
- */
-static int ib_wait_for_link ( struct ib_device *ibdev ) {
- struct ib_mad_port_info port_info;
- unsigned int retries;
- int rc;
-
- printf ( "Waiting for Infiniband link-up..." );
- for ( retries = 20 ; retries ; retries-- ) {
- if ( ( rc = ib_get_port_info ( ibdev, &port_info ) ) != 0 )
- continue;
- if ( ( ( port_info.port_state__link_speed_supported ) & 0xf )
- == 4 ) {
- printf ( "ok\n" );
- return 0;
- }
- printf ( "." );
- sleep ( 1 );
- }
- printf ( "failed\n" );
- return -ENODEV;
-};
-
-/**
* Get MAD parameters
*
* @v ibdev Infiniband device
@@ -326,9 +324,13 @@ static int ib_get_mad_params ( struct ib_device *ibdev ) {
} u;
int rc;
- /* Port info gives us the first half of the port GID and the SM LID */
+ /* Port info gives us the link state, the first half of the
+ * port GID and the SM LID.
+ */
if ( ( rc = ib_get_port_info ( ibdev, &u.port_info ) ) != 0 )
return rc;
+ ibdev->link_up = ( ( u.port_info.port_state__link_speed_supported
+ & 0xf ) == 4 );
memcpy ( &ibdev->port_gid.u.bytes[0], u.port_info.gid_prefix, 8 );
ibdev->sm_lid = ntohs ( u.port_info.mastersm_lid );
@@ -353,6 +355,50 @@ static int ib_get_mad_params ( struct ib_device *ibdev ) {
/***************************************************************************
*
+ * Event queues
+ *
+ ***************************************************************************
+ */
+
+/**
+ * Handle Infiniband link state change
+ *
+ * @v ibdev Infiniband device
+ */
+void ib_link_state_changed ( struct ib_device *ibdev ) {
+ int rc;
+
+ /* Update MAD parameters */
+ if ( ( rc = ib_get_mad_params ( ibdev ) ) != 0 ) {
+ DBGC ( ibdev, "IBDEV %p could not update MAD parameters: %s\n",
+ ibdev, strerror ( rc ) );
+ return;
+ }
+
+ /* Notify IPoIB of link state change */
+ ipoib_link_state_changed ( ibdev );
+}
+
+/**
+ * Single-step the Infiniband event queue
+ *
+ * @v process Infiniband event queue process
+ */
+static void ib_step ( struct process *process __unused ) {
+ struct ib_device *ibdev;
+
+ list_for_each_entry ( ibdev, &ib_devices, list ) {
+ ibdev->op->poll_eq ( ibdev );
+ }
+}
+
+/** Infiniband event queue process */
+struct process ib_process __permanent_process = {
+ .step = ib_step,
+};
+
+/***************************************************************************
+ *
* Infiniband device creation/destruction
*
***************************************************************************
@@ -387,14 +433,14 @@ struct ib_device * alloc_ibdev ( size_t priv_size ) {
int register_ibdev ( struct ib_device *ibdev ) {
int rc;
+ /* Add to device list */
+ ibdev_get ( ibdev );
+ list_add_tail ( &ibdev->list, &ib_devices );
+
/* Open link */
if ( ( rc = ib_open ( ibdev ) ) != 0 )
goto err_open;
- /* Wait for link */
- if ( ( rc = ib_wait_for_link ( ibdev ) ) != 0 )
- goto err_wait_for_link;
-
/* Get MAD parameters */
if ( ( rc = ib_get_mad_params ( ibdev ) ) != 0 )
goto err_get_mad_params;
@@ -406,13 +452,16 @@ int register_ibdev ( struct ib_device *ibdev ) {
goto err_ipoib_probe;
}
+ DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev,
+ ibdev->dev->name );
return 0;
err_ipoib_probe:
err_get_mad_params:
- err_wait_for_link:
ib_close ( ibdev );
err_open:
+ list_del ( &ibdev->list );
+ ibdev_put ( ibdev );
return rc;
}
@@ -422,16 +471,13 @@ int register_ibdev ( struct ib_device *ibdev ) {
* @v ibdev Infiniband device
*/
void unregister_ibdev ( struct ib_device *ibdev ) {
+
+ /* Close device */
ipoib_remove ( ibdev );
ib_close ( ibdev );
-}
-/**
- * Free Infiniband device
- *
- * @v ibdev Infiniband device
- */
-void free_ibdev ( struct ib_device *ibdev ) {
- free ( ibdev );
+ /* Remove from device list */
+ list_del ( &ibdev->list );
+ ibdev_put ( ibdev );
+ DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev );
}
-
diff --git a/gpxe/src/net/ipv4.c b/gpxe/src/net/ipv4.c
index 591293b7..82a13c33 100644
--- a/gpxe/src/net/ipv4.c
+++ b/gpxe/src/net/ipv4.c
@@ -273,7 +273,7 @@ static int ipv4_ll_addr ( struct in_addr dest, struct in_addr src,
memcpy ( ll_dest, ll_protocol->ll_broadcast,
ll_protocol->ll_addr_len );
return 0;
- } else if ( IN_MULTICAST ( dest.s_addr ) ) {
+ } else if ( IN_MULTICAST ( ntohl ( dest.s_addr ) ) ) {
/* Special case: IPv4 multicast over Ethernet. This
* code may need to be generalised once we find out
* what happens for other link layers.
diff --git a/gpxe/src/net/tcp.c b/gpxe/src/net/tcp.c
index da8e87b4..df87fc14 100644
--- a/gpxe/src/net/tcp.c
+++ b/gpxe/src/net/tcp.c
@@ -65,6 +65,11 @@ struct tcp_connection {
* Equivalent to RCV.NXT in RFC 793 terminology.
*/
uint32_t rcv_ack;
+ /** Receive window
+ *
+ * Equivalent to RCV.WND in RFC 793 terminology.
+ */
+ uint32_t rcv_win;
/** Most recent received timestamp
*
* Equivalent to TS.Recent in RFC 1323 terminology.
@@ -394,7 +399,7 @@ static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) {
size_t len = 0;
size_t seq_len;
size_t app_win;
- size_t rcv_win;
+ size_t max_rcv_win;
/* If retransmission timer is already running, do nothing */
if ( timer_running ( &tcp->timer ) )
@@ -439,14 +444,16 @@ static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) {
/* Fill data payload from transmit queue */
tcp_process_queue ( tcp, len, iobuf, 0 );
- /* Estimate window size */
- rcv_win = ( ( freemem * 3 ) / 4 );
- if ( rcv_win > TCP_MAX_WINDOW_SIZE )
- rcv_win = TCP_MAX_WINDOW_SIZE;
+ /* Expand receive window if possible */
+ max_rcv_win = ( ( freemem * 3 ) / 4 );
+ if ( max_rcv_win > TCP_MAX_WINDOW_SIZE )
+ max_rcv_win = TCP_MAX_WINDOW_SIZE;
app_win = xfer_window ( &tcp->xfer );
- if ( rcv_win > app_win )
- rcv_win = app_win;
- rcv_win &= ~0x03; /* Keep everything dword-aligned */
+ if ( max_rcv_win > app_win )
+ max_rcv_win = app_win;
+ max_rcv_win &= ~0x03; /* Keep everything dword-aligned */
+ if ( tcp->rcv_win < max_rcv_win )
+ tcp->rcv_win = max_rcv_win;
/* Fill up the TCP header */
payload = iobuf->data;
@@ -472,7 +479,7 @@ static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) {
tcphdr->ack = htonl ( tcp->rcv_ack );
tcphdr->hlen = ( ( payload - iobuf->data ) << 2 );
tcphdr->flags = flags;
- tcphdr->win = htons ( rcv_win );
+ tcphdr->win = htons ( tcp->rcv_win );
tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) );
/* Dump header */
@@ -633,6 +640,21 @@ static void tcp_rx_opts ( struct tcp_connection *tcp, const void *data,
}
/**
+ * Consume received sequence space
+ *
+ * @v tcp TCP connection
+ * @v seq_len Sequence space length to consume
+ */
+static void tcp_rx_seq ( struct tcp_connection *tcp, size_t seq_len ) {
+ tcp->rcv_ack += seq_len;
+ if ( tcp->rcv_win > seq_len ) {
+ tcp->rcv_win -= seq_len;
+ } else {
+ tcp->rcv_win = 0;
+ }
+}
+
+/**
* Handle TCP received SYN
*
* @v tcp TCP connection
@@ -659,7 +681,7 @@ static int tcp_rx_syn ( struct tcp_connection *tcp, uint32_t seq,
TCP_STATE_RCVD ( TCP_SYN ) );
/* Acknowledge SYN */
- tcp->rcv_ack++;
+ tcp_rx_seq ( tcp, 1 );
return 0;
}
@@ -747,7 +769,8 @@ static int tcp_rx_data ( struct tcp_connection *tcp, uint32_t seq,
return rc;
/* Acknowledge new data */
- tcp->rcv_ack += len;
+ tcp_rx_seq ( tcp, len );
+
return 0;
}
@@ -766,7 +789,7 @@ static int tcp_rx_fin ( struct tcp_connection *tcp, uint32_t seq ) {
/* Mark FIN as received and acknowledge it */
tcp->tcp_state |= TCP_STATE_RCVD ( TCP_FIN );
- tcp->rcv_ack++;
+ tcp_rx_seq ( tcp, 1 );
/* Close connection */
tcp_close ( tcp, 0 );
@@ -789,7 +812,7 @@ static int tcp_rx_rst ( struct tcp_connection *tcp, uint32_t seq ) {
* ACKed.
*/
if ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) {
- if ( ( tcp->rcv_ack - seq ) > 0 )
+ if ( ( seq - tcp->rcv_ack ) >= tcp->rcv_win )
return 0;
} else {
if ( ! ( tcp->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) )
@@ -850,7 +873,8 @@ static int tcp_rx ( struct io_buffer *iobuf,
rc = -EINVAL;
goto discard;
}
- csum = tcpip_continue_chksum ( pshdr_csum, iobuf->data, iob_len ( iobuf ));
+ csum = tcpip_continue_chksum ( pshdr_csum, iobuf->data,
+ iob_len ( iobuf ) );
if ( csum != 0 ) {
DBG ( "TCP checksum incorrect (is %04x including checksum "
"field, should be 0000)\n", csum );
@@ -922,10 +946,19 @@ static int tcp_rx ( struct io_buffer *iobuf,
/* Dump out any state change as a result of the received packet */
tcp_dump_state ( tcp );
- /* Send out any pending data. If peer is expecting an ACK for
- * this packet then force sending a reply.
+ /* Send out any pending data. We force sending a reply if either
+ *
+ * a) the peer is expecting an ACK (i.e. consumed sequence space), or
+ * b) either end of the packet was outside the receive window
+ *
+ * Case (b) enables us to support TCP keepalives using
+ * zero-length packets, which we would otherwise ignore. Note
+ * that for case (b), we need *only* consider zero-length
+ * packets, since non-zero-length packets will already be
+ * caught by case (a).
*/
- tcp_xmit ( tcp, ( start_seq != seq ) );
+ tcp_xmit ( tcp, ( ( start_seq != seq ) ||
+ ( ( seq - tcp->rcv_ack ) > tcp->rcv_win ) ) );
/* If this packet was the last we expect to receive, set up
* timer to expire and cause the connection to be freed.
diff --git a/gpxe/src/net/tcp/iscsi.c b/gpxe/src/net/tcp/iscsi.c
index c01ca44b..a12fca85 100644
--- a/gpxe/src/net/tcp/iscsi.c
+++ b/gpxe/src/net/tcp/iscsi.c
@@ -456,17 +456,18 @@ static int iscsi_build_login_request_strings ( struct iscsi_session *iscsi,
"InitiatorName=%s%c"
"TargetName=%s%c"
"SessionType=Normal%c"
- "AuthMethod=CHAP,None%c",
+ "AuthMethod=%sNone%c",
iscsi_initiator_iqn(), 0,
- iscsi->target_iqn, 0, 0, 0 );
+ iscsi->target_iqn, 0, 0,
+ ( ( iscsi->username && iscsi->password ) ?
+ "CHAP," : "" ), 0 );
}
if ( iscsi->status & ISCSI_STATUS_STRINGS_CHAP_ALGORITHM ) {
used += ssnprintf ( data + used, len - used, "CHAP_A=5%c", 0 );
}
- if ( ( iscsi->status & ISCSI_STATUS_STRINGS_CHAP_RESPONSE ) &&
- iscsi->username ) {
+ if ( ( iscsi->status & ISCSI_STATUS_STRINGS_CHAP_RESPONSE ) ) {
used += ssnprintf ( data + used, len - used,
"CHAP_N=%s%cCHAP_R=0x",
iscsi->username, 0 );
@@ -830,6 +831,35 @@ static int iscsi_rx_buffered_data ( struct iscsi_session *iscsi,
}
/**
+ * Convert iSCSI response status to return status code
+ *
+ * @v status_class iSCSI status class
+ * @v status_detail iSCSI status detail
+ * @ret rc Return status code
+ */
+static int iscsi_status_to_rc ( unsigned int status_class,
+ unsigned int status_detail ) {
+ switch ( status_class ) {
+ case ISCSI_STATUS_INITIATOR_ERROR :
+ switch ( status_detail ) {
+ case ISCSI_STATUS_INITIATOR_ERROR_AUTHENTICATION :
+ return -EACCES;
+ case ISCSI_STATUS_INITIATOR_ERROR_AUTHORISATION :
+ return -EPERM;
+ case ISCSI_STATUS_INITIATOR_ERROR_NOT_FOUND :
+ case ISCSI_STATUS_INITIATOR_ERROR_REMOVED :
+ return -ENODEV;
+ default :
+ return -ENOTSUP;
+ }
+ case ISCSI_STATUS_TARGET_ERROR :
+ return -EIO;
+ default :
+ return -EINVAL;
+ }
+}
+
+/**
* Receive data segment of an iSCSI login response PDU
*
* @v iscsi iSCSI session
@@ -876,8 +906,10 @@ static int iscsi_rx_login_response ( struct iscsi_session *iscsi,
if ( response->status_class != 0 ) {
DBGC ( iscsi, "iSCSI login failure: class %02x detail %02x\n",
response->status_class, response->status_detail );
- iscsi->instant_rc = -EPERM;
- return -EPERM;
+ rc = iscsi_status_to_rc ( response->status_class,
+ response->status_detail );
+ iscsi->instant_rc = rc;
+ return rc;
}
/* Handle login transitions */
@@ -1176,7 +1208,7 @@ static int iscsi_rx_data ( struct iscsi_session *iscsi, const void *data,
return 0;
DBGC ( iscsi, "iSCSI %p unknown opcode %02x\n", iscsi,
response->opcode );
- return -EOPNOTSUPP;
+ return -ENOTSUP;
}
}