summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-06-21 22:23:35 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-06-21 22:23:35 -0700
commitc356dc4b540edd6c02b409dd8cf3208ba2804c38 (patch)
tree457d971da033bfb11c85aaee260d9811937fa2c4 /net
parent121bddf39a8e39baf0df9ef1d688392c179935cd (diff)
parentb6653b3629e5b88202be3c9abc44713973f5c4b4 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: 1) Fix leak of unqueued fragments in ipv6 nf_defrag, from Guillaume Nault. 2) Don't access the DDM interface unless the transceiver implements it in bnx2x, from Mauro S. M. Rodrigues. 3) Don't double fetch 'len' from userspace in sock_getsockopt(), from JingYi Hou. 4) Sign extension overflow in lio_core, from Colin Ian King. 5) Various netem bug fixes wrt. corrupted packets from Jakub Kicinski. 6) Fix epollout hang in hvsock, from Sunil Muthuswamy. 7) Fix regression in default fib6_type, from David Ahern. 8) Handle memory limits in tcp_fragment more appropriately, from Eric Dumazet. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (24 commits) tcp: refine memory limit test in tcp_fragment() inet: clear num_timeout reqsk_alloc() net: mvpp2: debugfs: Add pmap to fs dump ipv6: Default fib6_type to RTN_UNICAST when not set net: hns3: Fix inconsistent indenting net/af_iucv: always register net_device notifier net/af_iucv: build proper skbs for HiperTransport net/af_iucv: remove GFP_DMA restriction for HiperTransport net: dsa: mv88e6xxx: fix shift of FID bits in mv88e6185_g1_vtu_loadpurge() hvsock: fix epollout hang from race condition net/udp_gso: Allow TX timestamp with UDP GSO net: netem: fix use after free and double free with packet corruption net: netem: fix backlog accounting for corrupted GSO frames net: lio_core: fix potential sign-extension overflow on large shift tipc: pass tunnel dev as NULL to udp_tunnel(6)_xmit_skb ip6_tunnel: allow not to count pkts on tstats by passing dev as NULL ip_tunnel: allow not to count pkts on tstats by setting skb's dev to NULL tun: wake up waitqueues after IFF_UP is set net: remove duplicate fetch in sock_getsockopt tipc: fix issues with early FAILOVER_MSG from peer ...
Diffstat (limited to 'net')
-rw-r--r--net/core/sock.c3
-rw-r--r--net/ipv4/inet_connection_sock.c4
-rw-r--r--net/ipv4/ip_tunnel_core.c9
-rw-r--r--net/ipv4/tcp_fastopen.c4
-rw-r--r--net/ipv4/tcp_output.c3
-rw-r--r--net/ipv4/udp_offload.c5
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c22
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/iucv/af_iucv.c49
-rw-r--r--net/netfilter/nft_masq.c3
-rw-r--r--net/netfilter/nft_redir.c3
-rw-r--r--net/sched/sch_netem.c26
-rw-r--r--net/tipc/link.c1
-rw-r--r--net/tipc/node.c10
-rw-r--r--net/tipc/udp_media.c8
-rw-r--r--net/vmw_vsock/hyperv_transport.c39
16 files changed, 96 insertions, 95 deletions
diff --git a/net/core/sock.c b/net/core/sock.c
index af09a23e4822..aa4a00d381e3 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1477,9 +1477,6 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
{
u32 meminfo[SK_MEMINFO_VARS];
- if (get_user(len, optlen))
- return -EFAULT;
-
sk_get_meminfo(sk, meminfo);
len = min_t(unsigned int, len, sizeof(meminfo));
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 13ec7c3a9c49..7fd6db3fe366 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -752,10 +752,6 @@ drop:
static void reqsk_queue_hash_req(struct request_sock *req,
unsigned long timeout)
{
- req->num_retrans = 0;
- req->num_timeout = 0;
- req->sk = NULL;
-
timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
mod_timer(&req->rsk_timer, jiffies + timeout);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 9e3846388fb3..1452a97914a0 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -76,9 +76,12 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
__ip_select_ident(net, iph, skb_shinfo(skb)->gso_segs ?: 1);
err = ip_local_out(net, sk, skb);
- if (unlikely(net_xmit_eval(err)))
- pkt_len = 0;
- iptunnel_xmit_stats(dev, pkt_len);
+
+ if (dev) {
+ if (unlikely(net_xmit_eval(err)))
+ pkt_len = 0;
+ iptunnel_xmit_stats(dev, pkt_len);
+ }
}
EXPORT_SYMBOL_GPL(iptunnel_xmit);
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 018a48477355..f5a45e1e1182 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -221,10 +221,6 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
struct sock *child;
bool own_req;
- req->num_retrans = 0;
- req->num_timeout = 0;
- req->sk = NULL;
-
child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
NULL, &own_req);
if (!child)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 00c01a01b547..0ebc33d1c9e5 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1296,7 +1296,8 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
if (nsize < 0)
nsize = 0;
- if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf)) {
+ if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf &&
+ tcp_queue != TCP_FRAG_IN_WRITE_QUEUE)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG);
return -ENOMEM;
}
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 06b3e2c1fcdc..9763464a75d7 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -224,6 +224,11 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
seg = segs;
uh = udp_hdr(seg);
+ /* preserve TX timestamp flags and TS key for first segment */
+ skb_shinfo(seg)->tskey = skb_shinfo(gso_skb)->tskey;
+ skb_shinfo(seg)->tx_flags |=
+ (skb_shinfo(gso_skb)->tx_flags & SKBTX_ANY_TSTAMP);
+
/* compute checksum adjustment based on old length versus new */
newlen = htons(sizeof(*uh) + mss);
check = csum16_add(csum16_sub(uh->check, uh->len), newlen);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 6f3abbb9e093..84322ce81d70 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -261,8 +261,14 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
prev = fq->q.fragments_tail;
err = inet_frag_queue_insert(&fq->q, skb, offset, end);
- if (err)
+ if (err) {
+ if (err == IPFRAG_DUP) {
+ /* No error for duplicates, pretend they got queued. */
+ kfree_skb(skb);
+ return -EINPROGRESS;
+ }
goto insert_error;
+ }
if (dev)
fq->iif = dev->ifindex;
@@ -289,15 +295,17 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
skb->_skb_refdst = 0UL;
err = nf_ct_frag6_reasm(fq, skb, prev, dev);
skb->_skb_refdst = orefdst;
- return err;
+
+ /* After queue has assumed skb ownership, only 0 or
+ * -EINPROGRESS must be returned.
+ */
+ return err ? -EINPROGRESS : 0;
}
skb_dst_drop(skb);
return -EINPROGRESS;
insert_error:
- if (err == IPFRAG_DUP)
- goto err;
inet_frag_kill(&fq->q);
err:
skb_dst_drop(skb);
@@ -476,12 +484,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
ret = 0;
}
- /* after queue has assumed skb ownership, only 0 or -EINPROGRESS
- * must be returned.
- */
- if (ret)
- ret = -EINPROGRESS;
-
spin_unlock_bh(&fq->q.lock);
inet_frag_put(&fq->q);
return ret;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0f60eb3a2873..11ad62effd56 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3184,7 +3184,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
rt->fib6_table = table;
rt->fib6_metric = cfg->fc_metric;
- rt->fib6_type = cfg->fc_type;
+ rt->fib6_type = cfg->fc_type ? : RTN_UNICAST;
rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 36eb8d1d9128..09e1694b6d34 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -14,6 +14,7 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/module.h>
+#include <linux/netdevice.h>
#include <linux/types.h>
#include <linux/list.h>
#include <linux/errno.h>
@@ -347,14 +348,14 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
if (imsg)
memcpy(&phs_hdr->iucv_hdr, imsg, sizeof(struct iucv_message));
- skb_push(skb, ETH_HLEN);
- memset(skb->data, 0, ETH_HLEN);
-
skb->dev = iucv->hs_dev;
if (!skb->dev) {
err = -ENODEV;
goto err_free;
}
+
+ dev_hard_header(skb, skb->dev, ETH_P_AF_IUCV, NULL, NULL, skb->len);
+
if (!(skb->dev->flags & IFF_UP) || !netif_carrier_ok(skb->dev)) {
err = -ENETDOWN;
goto err_free;
@@ -367,6 +368,8 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
skb_trim(skb, skb->dev->mtu);
}
skb->protocol = cpu_to_be16(ETH_P_AF_IUCV);
+
+ __skb_header_release(skb);
nskb = skb_clone(skb, GFP_ATOMIC);
if (!nskb) {
err = -ENOMEM;
@@ -466,12 +469,14 @@ static void iucv_sever_path(struct sock *sk, int with_user_data)
/* Send controlling flags through an IUCV socket for HIPER transport */
static int iucv_send_ctrl(struct sock *sk, u8 flags)
{
+ struct iucv_sock *iucv = iucv_sk(sk);
int err = 0;
int blen;
struct sk_buff *skb;
u8 shutdown = 0;
- blen = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN;
+ blen = sizeof(struct af_iucv_trans_hdr) +
+ LL_RESERVED_SPACE(iucv->hs_dev);
if (sk->sk_shutdown & SEND_SHUTDOWN) {
/* controlling flags should be sent anyway */
shutdown = sk->sk_shutdown;
@@ -588,7 +593,6 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio,
sk->sk_destruct = iucv_sock_destruct;
sk->sk_sndtimeo = IUCV_CONN_TIMEOUT;
- sk->sk_allocation = GFP_DMA;
sock_reset_flag(sk, SOCK_ZAPPED);
@@ -782,6 +786,7 @@ vm_bind:
memcpy(iucv->src_user_id, iucv_userid, 8);
sk->sk_state = IUCV_BOUND;
iucv->transport = AF_IUCV_TRANS_IUCV;
+ sk->sk_allocation |= GFP_DMA;
if (!iucv->msglimit)
iucv->msglimit = IUCV_QUEUELEN_DEFAULT;
goto done_unlock;
@@ -806,6 +811,8 @@ static int iucv_sock_autobind(struct sock *sk)
return -EPROTO;
memcpy(iucv->src_user_id, iucv_userid, 8);
+ iucv->transport = AF_IUCV_TRANS_IUCV;
+ sk->sk_allocation |= GFP_DMA;
write_lock_bh(&iucv_sk_list.lock);
__iucv_auto_name(iucv);
@@ -1131,7 +1138,8 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
* segmented records using the MSG_EOR flag), but
* for SOCK_STREAM we might want to improve it in future */
if (iucv->transport == AF_IUCV_TRANS_HIPER) {
- headroom = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN;
+ headroom = sizeof(struct af_iucv_trans_hdr) +
+ LL_RESERVED_SPACE(iucv->hs_dev);
linear = len;
} else {
if (len < PAGE_SIZE) {
@@ -1781,6 +1789,8 @@ static int iucv_callback_connreq(struct iucv_path *path,
niucv = iucv_sk(nsk);
iucv_sock_init(nsk, sk);
+ niucv->transport = AF_IUCV_TRANS_IUCV;
+ nsk->sk_allocation |= GFP_DMA;
/* Set the new iucv_sock */
memcpy(niucv->dst_name, ipuser + 8, 8);
@@ -2430,6 +2440,13 @@ out:
return err;
}
+static void afiucv_iucv_exit(void)
+{
+ device_unregister(af_iucv_dev);
+ driver_unregister(&af_iucv_driver);
+ pr_iucv->iucv_unregister(&af_iucv_handler, 0);
+}
+
static int __init afiucv_init(void)
{
int err;
@@ -2463,11 +2480,18 @@ static int __init afiucv_init(void)
err = afiucv_iucv_init();
if (err)
goto out_sock;
- } else
- register_netdevice_notifier(&afiucv_netdev_notifier);
+ }
+
+ err = register_netdevice_notifier(&afiucv_netdev_notifier);
+ if (err)
+ goto out_notifier;
+
dev_add_pack(&iucv_packet_type);
return 0;
+out_notifier:
+ if (pr_iucv)
+ afiucv_iucv_exit();
out_sock:
sock_unregister(PF_IUCV);
out_proto:
@@ -2481,12 +2505,11 @@ out:
static void __exit afiucv_exit(void)
{
if (pr_iucv) {
- device_unregister(af_iucv_dev);
- driver_unregister(&af_iucv_driver);
- pr_iucv->iucv_unregister(&af_iucv_handler, 0);
+ afiucv_iucv_exit();
symbol_put(iucv_if);
- } else
- unregister_netdevice_notifier(&afiucv_netdev_notifier);
+ }
+
+ unregister_netdevice_notifier(&afiucv_netdev_notifier);
dev_remove_pack(&iucv_packet_type);
sock_unregister(PF_IUCV);
proto_unregister(&iucv_proto);
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 922d47081080..39dc94f2491e 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -304,5 +304,4 @@ module_exit(nft_masq_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
-MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "masq");
-MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "masq");
+MODULE_ALIAS_NFT_EXPR("masq");
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index 96930b429d5f..8487eeff5c0e 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -291,5 +291,4 @@ module_exit(nft_redir_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
-MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "redir");
-MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir");
+MODULE_ALIAS_NFT_EXPR("nat");
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 956ff3da81f4..b17f2ed970e2 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -439,8 +439,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct netem_skb_cb *cb;
struct sk_buff *skb2;
struct sk_buff *segs = NULL;
- unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb);
- int nb = 0;
+ unsigned int prev_len = qdisc_pkt_len(skb);
int count = 1;
int rc = NET_XMIT_SUCCESS;
int rc_drop = NET_XMIT_DROP;
@@ -494,16 +493,14 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
*/
if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
if (skb_is_gso(skb)) {
- segs = netem_segment(skb, sch, to_free);
- if (!segs)
+ skb = netem_segment(skb, sch, to_free);
+ if (!skb)
return rc_drop;
- } else {
- segs = skb;
+ segs = skb->next;
+ skb_mark_not_on_list(skb);
+ qdisc_skb_cb(skb)->pkt_len = skb->len;
}
- skb = segs;
- segs = segs->next;
-
skb = skb_unshare(skb, GFP_ATOMIC);
if (unlikely(!skb)) {
qdisc_qstats_drop(sch);
@@ -520,6 +517,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
if (unlikely(sch->q.qlen >= sch->limit)) {
+ /* re-link segs, so that qdisc_drop_all() frees them all */
+ skb->next = segs;
qdisc_drop_all(skb, sch, to_free);
return rc_drop;
}
@@ -593,6 +592,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
finish_segs:
if (segs) {
+ unsigned int len, last_len;
+ int nb = 0;
+
+ len = skb->len;
+
while (segs) {
skb2 = segs->next;
skb_mark_not_on_list(segs);
@@ -608,9 +612,7 @@ finish_segs:
}
segs = skb2;
}
- sch->q.qlen += nb;
- if (nb > 1)
- qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
+ qdisc_tree_reduce_backlog(sch, -nb, prev_len - len);
}
return NET_XMIT_SUCCESS;
}
diff --git a/net/tipc/link.c b/net/tipc/link.c
index f5cd986e1e50..2050fd386642 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1728,7 +1728,6 @@ void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl,
* node has entered SELF_DOWN_PEER_LEAVING and both peer nodes
* would have to start over from scratch instead.
*/
- WARN_ON(l && tipc_link_is_up(l));
tnl->drop_point = 1;
tnl->failover_reasm_skb = NULL;
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 9e106d3ed187..550581d47d51 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -766,9 +766,9 @@ static void tipc_node_link_up(struct tipc_node *n, int bearer_id,
* disturbance, wrong session, etc.)
* 3. Link <1B-2B> up
* 4. Link endpoint 2A down (e.g. due to link tolerance timeout)
- * 5. Node B starts failover onto link <1B-2B>
+ * 5. Node 2 starts failover onto link <1B-2B>
*
- * ==> Node A does never start link/node failover!
+ * ==> Node 1 does never start link/node failover!
*
* @n: tipc node structure
* @l: link peer endpoint failingover (- can be NULL)
@@ -783,6 +783,10 @@ static void tipc_node_link_failover(struct tipc_node *n, struct tipc_link *l,
if (!tipc_link_is_up(tnl))
return;
+ /* Don't rush, failure link may be in the process of resetting */
+ if (l && !tipc_link_is_reset(l))
+ return;
+
tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT);
tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT);
@@ -1706,7 +1710,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
/* Initiate or update failover mode if applicable */
if ((usr == TUNNEL_PROTOCOL) && (mtyp == FAILOVER_MSG)) {
syncpt = oseqno + exp_pkts - 1;
- if (pl && tipc_link_is_up(pl)) {
+ if (pl && !tipc_link_is_reset(pl)) {
__tipc_node_link_down(n, &pb_id, xmitq, &maddr);
trace_tipc_node_link_down(n, true,
"node link down <- failover!");
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 7fc02d84c4f1..1405ccc9101c 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -176,7 +176,6 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
goto tx_error;
}
- skb->dev = rt->dst.dev;
ttl = ip4_dst_hoplimit(&rt->dst);
udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr,
dst->ipv4.s_addr, 0, ttl, 0, src->port,
@@ -195,10 +194,9 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
if (err)
goto tx_error;
ttl = ip6_dst_hoplimit(ndst);
- err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb,
- ndst->dev, &src->ipv6,
- &dst->ipv6, 0, ttl, 0, src->port,
- dst->port, false);
+ err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, NULL,
+ &src->ipv6, &dst->ipv6, 0, ttl, 0,
+ src->port, dst->port, false);
#endif
}
return err;
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index fb2df6e068fa..62dcdf082349 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -211,18 +211,6 @@ static void hvs_set_channel_pending_send_size(struct vmbus_channel *chan)
set_channel_pending_send_size(chan,
HVS_PKT_LEN(HVS_SEND_BUF_SIZE));
- /* See hvs_stream_has_space(): we must make sure the host has seen
- * the new pending send size, before we can re-check the writable
- * bytes.
- */
- virt_mb();
-}
-
-static void hvs_clear_channel_pending_send_size(struct vmbus_channel *chan)
-{
- set_channel_pending_send_size(chan, 0);
-
- /* Ditto */
virt_mb();
}
@@ -292,9 +280,6 @@ static void hvs_channel_cb(void *ctx)
if (hvs_channel_readable(chan))
sk->sk_data_ready(sk);
- /* See hvs_stream_has_space(): when we reach here, the writable bytes
- * may be already less than HVS_PKT_LEN(HVS_SEND_BUF_SIZE).
- */
if (hv_get_bytes_to_write(&chan->outbound) > 0)
sk->sk_write_space(sk);
}
@@ -395,6 +380,13 @@ static void hvs_open_connection(struct vmbus_channel *chan)
set_per_channel_state(chan, conn_from_host ? new : sk);
vmbus_set_chn_rescind_callback(chan, hvs_close_connection);
+ /* Set the pending send size to max packet size to always get
+ * notifications from the host when there is enough writable space.
+ * The host is optimized to send notifications only when the pending
+ * size boundary is crossed, and not always.
+ */
+ hvs_set_channel_pending_send_size(chan);
+
if (conn_from_host) {
new->sk_state = TCP_ESTABLISHED;
sk->sk_ack_backlog++;
@@ -688,23 +680,8 @@ static s64 hvs_stream_has_data(struct vsock_sock *vsk)
static s64 hvs_stream_has_space(struct vsock_sock *vsk)
{
struct hvsock *hvs = vsk->trans;
- struct vmbus_channel *chan = hvs->chan;
- s64 ret;
-
- ret = hvs_channel_writable_bytes(chan);
- if (ret > 0) {
- hvs_clear_channel_pending_send_size(chan);
- } else {
- /* See hvs_channel_cb() */
- hvs_set_channel_pending_send_size(chan);
-
- /* Re-check the writable bytes to avoid race */
- ret = hvs_channel_writable_bytes(chan);
- if (ret > 0)
- hvs_clear_channel_pending_send_size(chan);
- }
- return ret;
+ return hvs_channel_writable_bytes(hvs->chan);
}
static u64 hvs_stream_rcvhiwat(struct vsock_sock *vsk)