summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-05-31 21:47:23 -0700
committerDavid S. Miller <davem@davemloft.net>2015-05-31 21:47:23 -0700
commitf7f35c0209b579256accb16c63f56510773f6e3e (patch)
tree006c24e39896b5d0911c957533b02defc4e4b12b /net
parentf16e9d86ae435d7ee6e9eaceb8ba1cf3b1895b72 (diff)
parent8ba38460f363e4d26d666aae9bc7fd3afa5f8e43 (diff)
Merge branch 'rds-next'
Sowmini Varadhan says: ==================== net/rds: SOL_RDS socket option to explicitly select transport Today the underlying transport (TCP or IB) for a PF_RDS socket is implicitly selected based on the local address used to bind(2) the PF_RDS socket. This results in some non-deterministic behavior when there are un-numbered and IPoIB interfaces sharing the same IP address. It also places the constraint that the IB interface must have an IP address (and thus, IPoIB) configured on it. The non-determinism may be avoided by providing the user-space application a socket option that allows it to explicitly select the transport prior to bind(2). Patch 1 of this series provides the constant definitions needed by the application via <linux/rds.h>. Patch 2 provides the setsockopt support, and Patch 3 provides the getsockopt support. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/rds/af_rds.c41
-rw-r--r--net/rds/bind.c4
-rw-r--r--net/rds/rds.h6
-rw-r--r--net/rds/transport.c21
4 files changed, 67 insertions, 5 deletions
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 3d83641f2861..2ad9032372b2 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -270,6 +270,28 @@ static int rds_cong_monitor(struct rds_sock *rs, char __user *optval,
return ret;
}
+static int rds_set_transport(struct rds_sock *rs, char __user *optval,
+ int optlen)
+{
+ int t_type;
+
+ if (rs->rs_transport)
+ return -EOPNOTSUPP; /* previously attached to transport */
+
+ if (optlen != sizeof(int))
+ return -EINVAL;
+
+ if (copy_from_user(&t_type, (int __user *)optval, sizeof(t_type)))
+ return -EFAULT;
+
+ if (t_type < 0 || t_type >= RDS_TRANS_COUNT)
+ return -EINVAL;
+
+ rs->rs_transport = rds_trans_get(t_type);
+
+ return rs->rs_transport ? 0 : -ENOPROTOOPT;
+}
+
static int rds_setsockopt(struct socket *sock, int level, int optname,
char __user *optval, unsigned int optlen)
{
@@ -300,6 +322,11 @@ static int rds_setsockopt(struct socket *sock, int level, int optname,
case RDS_CONG_MONITOR:
ret = rds_cong_monitor(rs, optval, optlen);
break;
+ case SO_RDS_TRANSPORT:
+ lock_sock(sock->sk);
+ ret = rds_set_transport(rs, optval, optlen);
+ release_sock(sock->sk);
+ break;
default:
ret = -ENOPROTOOPT;
}
@@ -312,6 +339,7 @@ static int rds_getsockopt(struct socket *sock, int level, int optname,
{
struct rds_sock *rs = rds_sk_to_rs(sock->sk);
int ret = -ENOPROTOOPT, len;
+ int trans;
if (level != SOL_RDS)
goto out;
@@ -337,6 +365,19 @@ static int rds_getsockopt(struct socket *sock, int level, int optname,
else
ret = 0;
break;
+ case SO_RDS_TRANSPORT:
+ if (len < sizeof(int)) {
+ ret = -EINVAL;
+ break;
+ }
+ trans = (rs->rs_transport ? rs->rs_transport->t_type :
+ RDS_TRANS_NONE); /* unbound */
+ if (put_user(trans, (int __user *)optval) ||
+ put_user(sizeof(int), optlen))
+ ret = -EFAULT;
+ else
+ ret = 0;
+ break;
default:
break;
}
diff --git a/net/rds/bind.c b/net/rds/bind.c
index a2e6562da751..4ebd29c128b6 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -181,6 +181,10 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (ret)
goto out;
+ if (rs->rs_transport) { /* previously bound */
+ ret = 0;
+ goto out;
+ }
trans = rds_trans_get_preferred(sin->sin_addr.s_addr);
if (!trans) {
ret = -EADDRNOTAVAIL;
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 0d41155a2258..a33fb4ad3535 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -408,11 +408,6 @@ struct rds_notifier {
* should try hard not to block.
*/
-#define RDS_TRANS_IB 0
-#define RDS_TRANS_IWARP 1
-#define RDS_TRANS_TCP 2
-#define RDS_TRANS_COUNT 3
-
struct rds_transport {
char t_name[TRANSNAMSIZ];
struct list_head t_item;
@@ -803,6 +798,7 @@ struct rds_transport *rds_trans_get_preferred(__be32 addr);
void rds_trans_put(struct rds_transport *trans);
unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
unsigned int avail);
+struct rds_transport *rds_trans_get(int t_type);
int rds_trans_init(void);
void rds_trans_exit(void);
diff --git a/net/rds/transport.c b/net/rds/transport.c
index 7f2ac4fec367..8b4a6cd2c3a7 100644
--- a/net/rds/transport.c
+++ b/net/rds/transport.c
@@ -101,6 +101,27 @@ struct rds_transport *rds_trans_get_preferred(__be32 addr)
return ret;
}
+struct rds_transport *rds_trans_get(int t_type)
+{
+ struct rds_transport *ret = NULL;
+ struct rds_transport *trans;
+ unsigned int i;
+
+ down_read(&rds_trans_sem);
+ for (i = 0; i < RDS_TRANS_COUNT; i++) {
+ trans = transports[i];
+
+ if (trans && trans->t_type == t_type &&
+ (!trans->t_owner || try_module_get(trans->t_owner))) {
+ ret = trans;
+ break;
+ }
+ }
+ up_read(&rds_trans_sem);
+
+ return ret;
+}
+
/*
* This returns the number of stats entries in the snapshot and only
* copies them using the iter if there is enough space for them. The