summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-10-20 08:42:09 +0100
committerDavid S. Miller <davem@davemloft.net>2017-10-20 08:42:09 +0100
commit9854d758f7215ceea7da625cb421e0674846b53c (patch)
tree08ed51abc3e56216076e43efbf496e816b64b454
parent37320537318934878cc02d8e6103e1be5696fe9f (diff)
parentbc5e3a546d553e5223851fc199e69040eb70f68b (diff)
Merge tag 'rxrpc-next-20171018' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs
David Howells says: ==================== rxrpc: Add bits for kernel services Here are some patches that add a few things for kernel services to use: (1) Allow service upgrade to be requested and allow the resultant actual service ID to be obtained. (2) Allow the RTT time of a call to be obtained. (3) Allow a kernel service to find out if a call is still alive on a server between transmitting a request and getting the reply. (4) Allow data transmission to ignore signals if transmission progress is being made in reasonable time. This is also usable by userspace by passing MSG_WAITALL to sendmsg()[*]. [*] I'm not sure this is the right interface for this or whether a sockopt should be used instead. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/networking/rxrpc.txt53
-rw-r--r--fs/afs/internal.h1
-rw-r--r--fs/afs/rxrpc.c42
-rw-r--r--include/net/af_rxrpc.h7
-rw-r--r--net/rxrpc/af_rxrpc.c24
-rw-r--r--net/rxrpc/peer_object.c13
-rw-r--r--net/rxrpc/recvmsg.c5
-rw-r--r--net/rxrpc/sendmsg.c107
8 files changed, 211 insertions, 41 deletions
diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
index 810620153a44..b5407163d53b 100644
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@@ -280,6 +280,18 @@ Interaction with the user of the RxRPC socket:
nominated by a socket option.
+Notes on sendmsg:
+
+ (*) MSG_WAITALL can be set to tell sendmsg to ignore signals if the peer is
+ making progress at accepting packets within a reasonable time such that we
+ manage to queue up all the data for transmission. This requires the
+ client to accept at least one packet per 2*RTT time period.
+
+ If this isn't set, sendmsg() will return immediately, either returning
+ EINTR/ERESTARTSYS if nothing was consumed or returning the amount of data
+ consumed.
+
+
Notes on recvmsg:
(*) If there's a sequence of data messages belonging to a particular call on
@@ -782,7 +794,9 @@ The kernel interface functions are as follows:
struct key *key,
unsigned long user_call_ID,
s64 tx_total_len,
- gfp_t gfp);
+ gfp_t gfp,
+ rxrpc_notify_rx_t notify_rx,
+ bool upgrade);
This allocates the infrastructure to make a new RxRPC call and assigns
call and connection numbers. The call will be made on the UDP port that
@@ -803,6 +817,13 @@ The kernel interface functions are as follows:
allows the kernel to encrypt directly to the packet buffers, thereby
saving a copy. The value may not be less than -1.
+ notify_rx is a pointer to a function to be called when events such as
+ incoming data packets or remote aborts happen.
+
+ upgrade should be set to true if a client operation should request that
+ the server upgrade the service to a better one. The resultant service ID
+ is returned by rxrpc_kernel_recv_data().
+
If this function is successful, an opaque reference to the RxRPC call is
returned. The caller now holds a reference on this and it must be
properly ended.
@@ -850,7 +871,8 @@ The kernel interface functions are as follows:
size_t size,
size_t *_offset,
bool want_more,
- u32 *_abort)
+ u32 *_abort,
+ u16 *_service)
This is used to receive data from either the reply part of a client call
or the request part of a service call. buf and size specify how much
@@ -873,6 +895,9 @@ The kernel interface functions are as follows:
If a remote ABORT is detected, the abort code received will be stored in
*_abort and ECONNABORTED will be returned.
+ The service ID that the call ended up with is returned into *_service.
+ This can be used to see if a call got a service upgrade.
+
(*) Abort a call.
void rxrpc_kernel_abort_call(struct socket *sock,
@@ -1020,6 +1045,30 @@ The kernel interface functions are as follows:
It returns 0 if the call was requeued and an error otherwise.
+ (*) Get call RTT.
+
+ u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call);
+
+ Get the RTT time to the peer in use by a call. The value returned is in
+ nanoseconds.
+
+ (*) Check call still alive.
+
+ u32 rxrpc_kernel_check_life(struct socket *sock,
+ struct rxrpc_call *call);
+
+ This returns a number that is updated when ACKs are received from the peer
+ (notably including PING RESPONSE ACKs which we can elicit by sending PING
+ ACKs to see if the call still exists on the server). The caller should
+ compare the numbers of two calls to see if the call is still alive after
+ waiting for a suitable interval.
+
+ This allows the caller to work out if the server is still contactable and
+ if the call is still alive on the server whilst waiting for the server to
+ process a client operation.
+
+ This function may transmit a PING ACK.
+
=======================
CONFIGURABLE PARAMETERS
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 82e16556afea..3f03f7888302 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -100,6 +100,7 @@ struct afs_call {
bool send_pages; /* T if data from mapping should be sent */
bool need_attention; /* T if RxRPC poked us */
bool async; /* T if asynchronous */
+ bool upgrade; /* T to request service upgrade */
u16 service_id; /* RxRPC service ID to call */
__be16 port; /* target UDP port */
u32 operation_ID; /* operation ID for an incoming call */
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 0bf191f0dbaf..bb1e2caa1720 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -387,7 +387,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
tx_total_len, gfp,
(async ?
afs_wake_up_async_call :
- afs_wake_up_call_waiter));
+ afs_wake_up_call_waiter),
+ call->upgrade);
call->key = NULL;
if (IS_ERR(rxcall)) {
ret = PTR_ERR(rxcall);
@@ -406,7 +407,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
call->request_size);
msg.msg_control = NULL;
msg.msg_controllen = 0;
- msg.msg_flags = (call->send_pages ? MSG_MORE : 0);
+ msg.msg_flags = MSG_WAITALL | (call->send_pages ? MSG_MORE : 0);
/* We have to change the state *before* sending the last packet as
* rxrpc might give us the reply before it returns from sending the
@@ -443,7 +444,7 @@ error_do_abort:
abort_code = 0;
offset = 0;
rxrpc_kernel_recv_data(afs_socket, rxcall, NULL, 0, &offset,
- false, &abort_code);
+ false, &abort_code, &call->service_id);
ret = call->type->abort_to_error(abort_code);
}
error_kill_call:
@@ -471,7 +472,8 @@ static void afs_deliver_to_call(struct afs_call *call)
size_t offset = 0;
ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall,
NULL, 0, &offset, false,
- &call->abort_code);
+ &call->abort_code,
+ &call->service_id);
trace_afs_recv_data(call, 0, offset, false, ret);
if (ret == -EINPROGRESS || ret == -EAGAIN)
@@ -536,15 +538,26 @@ call_complete:
*/
static int afs_wait_for_call_to_complete(struct afs_call *call)
{
+ signed long rtt2, timeout;
int ret;
+ u64 rtt;
+ u32 life, last_life;
DECLARE_WAITQUEUE(myself, current);
_enter("");
+ rtt = rxrpc_kernel_get_rtt(afs_socket, call->rxcall);
+ rtt2 = nsecs_to_jiffies64(rtt) * 2;
+ if (rtt2 < 2)
+ rtt2 = 2;
+
+ timeout = rtt2;
+ last_life = rxrpc_kernel_check_life(afs_socket, call->rxcall);
+
add_wait_queue(&call->waitq, &myself);
for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
+ set_current_state(TASK_UNINTERRUPTIBLE);
/* deliver any messages that are in the queue */
if (call->state < AFS_CALL_COMPLETE && call->need_attention) {
@@ -554,10 +567,20 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
continue;
}
- if (call->state == AFS_CALL_COMPLETE ||
- signal_pending(current))
+ if (call->state == AFS_CALL_COMPLETE)
break;
- schedule();
+
+ life = rxrpc_kernel_check_life(afs_socket, call->rxcall);
+ if (timeout == 0 &&
+ life == last_life && signal_pending(current))
+ break;
+
+ if (life != last_life) {
+ timeout = rtt2;
+ last_life = life;
+ }
+
+ timeout = schedule_timeout(timeout);
}
remove_wait_queue(&call->waitq, &myself);
@@ -851,7 +874,8 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall,
buf, count, &call->offset,
- want_more, &call->abort_code);
+ want_more, &call->abort_code,
+ &call->service_id);
trace_afs_recv_data(call, count, call->offset, want_more, ret);
if (ret == 0 || ret == -EAGAIN)
return ret;
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index 3ac79150291f..2b3a6eec4570 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -49,17 +49,19 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *,
unsigned long,
s64,
gfp_t,
- rxrpc_notify_rx_t);
+ rxrpc_notify_rx_t,
+ bool);
int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *,
struct msghdr *, size_t,
rxrpc_notify_end_tx_t);
int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *,
- void *, size_t, size_t *, bool, u32 *);
+ void *, size_t, size_t *, bool, u32 *, u16 *);
bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *,
u32, int, const char *);
void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *);
void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *,
struct sockaddr_rxrpc *);
+u64 rxrpc_kernel_get_rtt(struct socket *, struct rxrpc_call *);
int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
rxrpc_user_attach_call_t, unsigned long, gfp_t);
void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64);
@@ -67,5 +69,6 @@ int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *,
struct sockaddr_rxrpc *, struct key *);
int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *,
enum rxrpc_call_completion *, u32 *);
+u32 rxrpc_kernel_check_life(struct socket *, struct rxrpc_call *);
#endif /* _NET_RXRPC_H */
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index fb17552fd292..73c980e26581 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -265,6 +265,7 @@ static int rxrpc_listen(struct socket *sock, int backlog)
* @tx_total_len: Total length of data to transmit during the call (or -1)
* @gfp: The allocation constraints
* @notify_rx: Where to send notifications instead of socket queue
+ * @upgrade: Request service upgrade for call
*
* Allow a kernel service to begin a call on the nominated socket. This just
* sets up all the internal tracking structures and allocates connection and
@@ -279,7 +280,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
unsigned long user_call_ID,
s64 tx_total_len,
gfp_t gfp,
- rxrpc_notify_rx_t notify_rx)
+ rxrpc_notify_rx_t notify_rx,
+ bool upgrade)
{
struct rxrpc_conn_parameters cp;
struct rxrpc_call *call;
@@ -304,6 +306,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
cp.key = key;
cp.security_level = 0;
cp.exclusive = false;
+ cp.upgrade = upgrade;
cp.service_id = srx->srx_service;
call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, tx_total_len,
gfp);
@@ -337,6 +340,25 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
EXPORT_SYMBOL(rxrpc_kernel_end_call);
/**
+ * rxrpc_kernel_check_life - Check to see whether a call is still alive
+ * @sock: The socket the call is on
+ * @call: The call to check
+ *
+ * Allow a kernel service to find out whether a call is still alive - ie. we're
+ * getting ACKs from the server. Returns a number representing the life state
+ * which can be compared to that returned by a previous call.
+ *
+ * If this is a client call, ping ACKs will be sent to the server to find out
+ * whether it's still responsive and whether the call is still alive on the
+ * server.
+ */
+u32 rxrpc_kernel_check_life(struct socket *sock, struct rxrpc_call *call)
+{
+ return call->acks_latest;
+}
+EXPORT_SYMBOL(rxrpc_kernel_check_life);
+
+/**
* rxrpc_kernel_check_call - Check a call's state
* @sock: The socket the call is on
* @call: The call to check
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 5787f97f5330..d02a99f37f5f 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -411,3 +411,16 @@ void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call,
*_srx = call->peer->srx;
}
EXPORT_SYMBOL(rxrpc_kernel_get_peer);
+
+/**
+ * rxrpc_kernel_get_rtt - Get a call's peer RTT
+ * @sock: The socket on which the call is in progress.
+ * @call: The call to query
+ *
+ * Get the call's peer RTT.
+ */
+u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call)
+{
+ return call->peer->rtt;
+}
+EXPORT_SYMBOL(rxrpc_kernel_get_rtt);
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index bdece21f313d..e4937b3f3685 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -607,6 +607,7 @@ wait_error:
* @_offset: The running offset into the buffer.
* @want_more: True if more data is expected to be read
* @_abort: Where the abort code is stored if -ECONNABORTED is returned
+ * @_service: Where to store the actual service ID (may be upgraded)
*
* Allow a kernel service to receive data and pick up information about the
* state of a call. Returns 0 if got what was asked for and there's more
@@ -624,7 +625,7 @@ wait_error:
*/
int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
void *buf, size_t size, size_t *_offset,
- bool want_more, u32 *_abort)
+ bool want_more, u32 *_abort, u16 *_service)
{
struct iov_iter iter;
struct kvec iov;
@@ -680,6 +681,8 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
read_phase_complete:
ret = 1;
out:
+ if (_service)
+ *_service = call->service_id;
mutex_unlock(&call->user_mutex);
_leave(" = %d [%zu,%d]", ret, *_offset, *_abort);
return ret;
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 9ea6f972767e..2d9edc656ca3 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -38,12 +38,86 @@ struct rxrpc_send_params {
};
/*
+ * Wait for space to appear in the Tx queue or a signal to occur.
+ */
+static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
+ struct rxrpc_call *call,
+ long *timeo)
+{
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (call->tx_top - call->tx_hard_ack <
+ min_t(unsigned int, call->tx_winsize,
+ call->cong_cwnd + call->cong_extra))
+ return 0;
+
+ if (call->state >= RXRPC_CALL_COMPLETE)
+ return call->error;
+
+ if (signal_pending(current))
+ return sock_intr_errno(*timeo);
+
+ trace_rxrpc_transmit(call, rxrpc_transmit_wait);
+ mutex_unlock(&call->user_mutex);
+ *timeo = schedule_timeout(*timeo);
+ if (mutex_lock_interruptible(&call->user_mutex) < 0)
+ return sock_intr_errno(*timeo);
+ }
+}
+
+/*
+ * Wait for space to appear in the Tx queue uninterruptibly, but with
+ * a timeout of 2*RTT if no progress was made and a signal occurred.
+ */
+static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
+ struct rxrpc_call *call)
+{
+ rxrpc_seq_t tx_start, tx_win;
+ signed long rtt2, timeout;
+ u64 rtt;
+
+ rtt = READ_ONCE(call->peer->rtt);
+ rtt2 = nsecs_to_jiffies64(rtt) * 2;
+ if (rtt2 < 1)
+ rtt2 = 1;
+
+ timeout = rtt2;
+ tx_start = READ_ONCE(call->tx_hard_ack);
+
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+
+ tx_win = READ_ONCE(call->tx_hard_ack);
+ if (call->tx_top - tx_win <
+ min_t(unsigned int, call->tx_winsize,
+ call->cong_cwnd + call->cong_extra))
+ return 0;
+
+ if (call->state >= RXRPC_CALL_COMPLETE)
+ return call->error;
+
+ if (timeout == 0 &&
+ tx_win == tx_start && signal_pending(current))
+ return -EINTR;
+
+ if (tx_win != tx_start) {
+ timeout = rtt2;
+ tx_start = tx_win;
+ }
+
+ trace_rxrpc_transmit(call, rxrpc_transmit_wait);
+ timeout = schedule_timeout(timeout);
+ }
+}
+
+/*
* wait for space to appear in the transmit/ACK window
* - caller holds the socket locked
*/
static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
struct rxrpc_call *call,
- long *timeo)
+ long *timeo,
+ bool waitall)
{
DECLARE_WAITQUEUE(myself, current);
int ret;
@@ -53,30 +127,10 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
add_wait_queue(&call->waitq, &myself);
- for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
- ret = 0;
- if (call->tx_top - call->tx_hard_ack <
- min_t(unsigned int, call->tx_winsize,
- call->cong_cwnd + call->cong_extra))
- break;
- if (call->state >= RXRPC_CALL_COMPLETE) {
- ret = call->error;
- break;
- }
- if (signal_pending(current)) {
- ret = sock_intr_errno(*timeo);
- break;
- }
-
- trace_rxrpc_transmit(call, rxrpc_transmit_wait);
- mutex_unlock(&call->user_mutex);
- *timeo = schedule_timeout(*timeo);
- if (mutex_lock_interruptible(&call->user_mutex) < 0) {
- ret = sock_intr_errno(*timeo);
- break;
- }
- }
+ if (waitall)
+ ret = rxrpc_wait_for_tx_window_nonintr(rx, call);
+ else
+ ret = rxrpc_wait_for_tx_window_intr(rx, call, timeo);
remove_wait_queue(&call->waitq, &myself);
set_current_state(TASK_RUNNING);
@@ -254,7 +308,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
if (msg->msg_flags & MSG_DONTWAIT)
goto maybe_error;
ret = rxrpc_wait_for_tx_window(rx, call,
- &timeo);
+ &timeo,
+ msg->msg_flags & MSG_WAITALL);
if (ret < 0)
goto maybe_error;
}