diff options
Diffstat (limited to 'net/ipv4')
49 files changed, 493 insertions, 689 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 4670683b4688..591ea23639ca 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -356,10 +356,8 @@ config INET_ESP config INET_IPCOMP tristate "IP: IPComp transformation" - select XFRM select INET_XFRM_TUNNEL - select CRYPTO - select CRYPTO_DEFLATE + select XFRM_IPCOMP ---help--- Support for IP Payload Compression Protocol (IPComp) (RFC3173), typically needed for IPsec. diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index dd919d84285f..8a3ac1fa71a9 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -148,10 +148,10 @@ void inet_sock_destruct(struct sock *sk) return; } - BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); - BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); - BUG_TRAP(!sk->sk_wmem_queued); - BUG_TRAP(!sk->sk_forward_alloc); + WARN_ON(atomic_read(&sk->sk_rmem_alloc)); + WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(sk->sk_wmem_queued); + WARN_ON(sk->sk_forward_alloc); kfree(inet->opt); dst_release(sk->sk_dst_cache); @@ -264,7 +264,6 @@ static inline int inet_netns_ok(struct net *net, int protocol) static int inet_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; - struct list_head *p; struct inet_protosw *answer; struct inet_sock *inet; struct proto *answer_prot; @@ -281,13 +280,12 @@ static int inet_create(struct net *net, struct socket *sock, int protocol) sock->state = SS_UNCONNECTED; /* Look for the requested type/protocol pair. */ - answer = NULL; lookup_protocol: err = -ESOCKTNOSUPPORT; rcu_read_lock(); - list_for_each_rcu(p, &inetsw[sock->type]) { - answer = list_entry(p, struct inet_protosw, list); + list_for_each_entry_rcu(answer, &inetsw[sock->type], list) { + err = 0; /* Check the non-wild match. */ if (protocol == answer->protocol) { if (protocol != IPPROTO_IP) @@ -302,10 +300,9 @@ lookup_protocol: break; } err = -EPROTONOSUPPORT; - answer = NULL; } - if (unlikely(answer == NULL)) { + if (unlikely(err)) { if (try_loading_module < 2) { rcu_read_unlock(); /* @@ -341,7 +338,7 @@ lookup_protocol: answer_flags = answer->flags; rcu_read_unlock(); - BUG_TRAP(answer_prot->slab != NULL); + WARN_ON(answer_prot->slab == NULL); err = -ENOBUFS; sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot); @@ -661,8 +658,8 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags) lock_sock(sk2); - BUG_TRAP((1 << sk2->sk_state) & - (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE)); + WARN_ON(!((1 << sk2->sk_state) & + (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE))); sock_graft(sk2, newsock); @@ -1442,6 +1439,10 @@ static int __init inet_init(void) (void)sock_register(&inet_family_ops); +#ifdef CONFIG_SYSCTL + ip_static_sysctl_init(); +#endif + /* * Add all the base protocols. */ diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 2e667e2f90df..b12dae2b0b2d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -138,8 +138,8 @@ void in_dev_finish_destroy(struct in_device *idev) { struct net_device *dev = idev->dev; - BUG_TRAP(!idev->ifa_list); - BUG_TRAP(!idev->mc_list); + WARN_ON(idev->ifa_list); + WARN_ON(idev->mc_list); #ifdef NET_REFCNT_DEBUG printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n", idev, dev ? dev->name : "NIL"); @@ -399,7 +399,7 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) } ipv4_devconf_setall(in_dev); if (ifa->ifa_dev != in_dev) { - BUG_TRAP(!ifa->ifa_dev); + WARN_ON(ifa->ifa_dev); in_dev_hold(in_dev); ifa->ifa_dev = in_dev; } @@ -1029,6 +1029,11 @@ skip: } } +static inline bool inetdev_valid_mtu(unsigned mtu) +{ + return mtu >= 68; +} + /* Called only under RTNL semaphore */ static int inetdev_event(struct notifier_block *this, unsigned long event, @@ -1048,6 +1053,10 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, IN_DEV_CONF_SET(in_dev, NOXFRM, 1); IN_DEV_CONF_SET(in_dev, NOPOLICY, 1); } + } else if (event == NETDEV_CHANGEMTU) { + /* Re-enabling IP */ + if (inetdev_valid_mtu(dev->mtu)) + in_dev = inetdev_init(dev); } goto out; } @@ -1058,7 +1067,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, dev->ip_ptr = NULL; break; case NETDEV_UP: - if (dev->mtu < 68) + if (!inetdev_valid_mtu(dev->mtu)) break; if (dev->flags & IFF_LOOPBACK) { struct in_ifaddr *ifa; @@ -1080,9 +1089,9 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, ip_mc_down(in_dev); break; case NETDEV_CHANGEMTU: - if (dev->mtu >= 68) + if (inetdev_valid_mtu(dev->mtu)) break; - /* MTU falled under 68, disable IP */ + /* disable IP when MTU is not enough */ case NETDEV_UNREGISTER: inetdev_destroy(in_dev); break; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 4e73e5708e70..21515d4c49eb 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -575,7 +575,7 @@ static int esp_init_state(struct xfrm_state *x) crypto_aead_ivsize(aead); if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); - else if (x->props.mode == XFRM_MODE_BEET) + else if (x->props.mode == XFRM_MODE_BEET && x->sel.family != AF_INET6) x->props.header_len += IPV4_BEET_PHMAXLEN; if (x->encap) { struct xfrm_encap_tmpl *encap = x->encap; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 860558633b2c..55c355e63234 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -204,18 +204,22 @@ static struct sock *icmp_sk(struct net *net) return net->ipv4.icmp_sk[smp_processor_id()]; } -static inline int icmp_xmit_lock(struct sock *sk) +static inline struct sock *icmp_xmit_lock(struct net *net) { + struct sock *sk; + local_bh_disable(); + sk = icmp_sk(net); + if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { /* This can happen if the output path signals a * dst_link_failure() for an outgoing ICMP packet. */ local_bh_enable(); - return 1; + return NULL; } - return 0; + return sk; } static inline void icmp_xmit_unlock(struct sock *sk) @@ -354,15 +358,17 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct ipcm_cookie ipc; struct rtable *rt = skb->rtable; struct net *net = dev_net(rt->u.dst.dev); - struct sock *sk = icmp_sk(net); - struct inet_sock *inet = inet_sk(sk); + struct sock *sk; + struct inet_sock *inet; __be32 daddr; if (ip_options_echo(&icmp_param->replyopts, skb)) return; - if (icmp_xmit_lock(sk)) + sk = icmp_xmit_lock(net); + if (sk == NULL) return; + inet = inet_sk(sk); icmp_param->data.icmph.checksum = 0; @@ -419,7 +425,6 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) if (!rt) goto out; net = dev_net(rt->u.dst.dev); - sk = icmp_sk(net); /* * Find the original header. It is expected to be valid, of course. @@ -483,7 +488,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) } } - if (icmp_xmit_lock(sk)) + sk = icmp_xmit_lock(net); + if (sk == NULL) return; /* diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6203ece53606..f70fac612596 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -289,6 +289,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) struct rtable *rt; struct iphdr *pip; struct igmpv3_report *pig; + struct net *net = dev_net(dev); skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); if (skb == NULL) @@ -299,7 +300,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) .nl_u = { .ip4_u = { .daddr = IGMPV3_ALL_MCR } }, .proto = IPPROTO_IGMP }; - if (ip_route_output_key(&init_net, &rt, &fl)) { + if (ip_route_output_key(net, &rt, &fl)) { kfree_skb(skb); return NULL; } @@ -629,6 +630,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, struct igmphdr *ih; struct rtable *rt; struct net_device *dev = in_dev->dev; + struct net *net = dev_net(dev); __be32 group = pmc ? pmc->multiaddr : 0; __be32 dst; @@ -643,7 +645,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, struct flowi fl = { .oif = dev->ifindex, .nl_u = { .ip4_u = { .daddr = dst } }, .proto = IPPROTO_IGMP }; - if (ip_route_output_key(&init_net, &rt, &fl)) + if (ip_route_output_key(net, &rt, &fl)) return -1; } if (rt->rt_src == 0) { @@ -1196,9 +1198,6 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) ASSERT_RTNL(); - if (!net_eq(dev_net(in_dev->dev), &init_net)) - return; - for (im=in_dev->mc_list; im; im=im->next) { if (im->multiaddr == addr) { im->users++; @@ -1278,9 +1277,6 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) ASSERT_RTNL(); - if (!net_eq(dev_net(in_dev->dev), &init_net)) - return; - for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { if (i->multiaddr==addr) { if (--i->users == 0) { @@ -1308,9 +1304,6 @@ void ip_mc_down(struct in_device *in_dev) ASSERT_RTNL(); - if (!net_eq(dev_net(in_dev->dev), &init_net)) - return; - for (i=in_dev->mc_list; i; i=i->next) igmp_group_dropped(i); @@ -1331,9 +1324,6 @@ void ip_mc_init_dev(struct in_device *in_dev) { ASSERT_RTNL(); - if (!net_eq(dev_net(in_dev->dev), &init_net)) - return; - in_dev->mc_tomb = NULL; #ifdef CONFIG_IP_MULTICAST in_dev->mr_gq_running = 0; @@ -1357,9 +1347,6 @@ void ip_mc_up(struct in_device *in_dev) ASSERT_RTNL(); - if (!net_eq(dev_net(in_dev->dev), &init_net)) - return; - ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); for (i=in_dev->mc_list; i; i=i->next) @@ -1376,9 +1363,6 @@ void ip_mc_destroy_dev(struct in_device *in_dev) ASSERT_RTNL(); - if (!net_eq(dev_net(in_dev->dev), &init_net)) - return; - /* Deactivate timers */ ip_mc_down(in_dev); @@ -1395,7 +1379,7 @@ void ip_mc_destroy_dev(struct in_device *in_dev) write_unlock_bh(&in_dev->mc_list_lock); } -static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) +static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = imr->imr_multiaddr.s_addr } } }; @@ -1404,19 +1388,19 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) struct in_device *idev = NULL; if (imr->imr_ifindex) { - idev = inetdev_by_index(&init_net, imr->imr_ifindex); + idev = inetdev_by_index(net, imr->imr_ifindex); if (idev) __in_dev_put(idev); return idev; } if (imr->imr_address.s_addr) { - dev = ip_dev_find(&init_net, imr->imr_address.s_addr); + dev = ip_dev_find(net, imr->imr_address.s_addr); if (!dev) return NULL; dev_put(dev); } - if (!dev && !ip_route_output_key(&init_net, &rt, &fl)) { + if (!dev && !ip_route_output_key(net, &rt, &fl)) { dev = rt->u.dst.dev; ip_rt_put(rt); } @@ -1754,18 +1738,16 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) struct ip_mc_socklist *iml=NULL, *i; struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); + struct net *net = sock_net(sk); int ifindex; int count = 0; if (!ipv4_is_multicast(addr)) return -EINVAL; - if (!net_eq(sock_net(sk), &init_net)) - return -EPROTONOSUPPORT; - rtnl_lock(); - in_dev = ip_mc_find_dev(imr); + in_dev = ip_mc_find_dev(net, imr); if (!in_dev) { iml = NULL; @@ -1827,15 +1809,13 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *iml, **imlp; struct in_device *in_dev; + struct net *net = sock_net(sk); __be32 group = imr->imr_multiaddr.s_addr; u32 ifindex; int ret = -EADDRNOTAVAIL; - if (!net_eq(sock_net(sk), &init_net)) - return -EPROTONOSUPPORT; - rtnl_lock(); - in_dev = ip_mc_find_dev(imr); + in_dev = ip_mc_find_dev(net, imr); ifindex = imr->imr_ifindex; for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) { if (iml->multi.imr_multiaddr.s_addr != group) @@ -1873,21 +1853,19 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct struct in_device *in_dev = NULL; struct inet_sock *inet = inet_sk(sk); struct ip_sf_socklist *psl; + struct net *net = sock_net(sk); int leavegroup = 0; int i, j, rv; if (!ipv4_is_multicast(addr)) return -EINVAL; - if (!net_eq(sock_net(sk), &init_net)) - return -EPROTONOSUPPORT; - rtnl_lock(); imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr; imr.imr_address.s_addr = mreqs->imr_interface; imr.imr_ifindex = ifindex; - in_dev = ip_mc_find_dev(&imr); + in_dev = ip_mc_find_dev(net, &imr); if (!in_dev) { err = -ENODEV; @@ -2007,6 +1985,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); struct ip_sf_socklist *newpsl, *psl; + struct net *net = sock_net(sk); int leavegroup = 0; if (!ipv4_is_multicast(addr)) @@ -2015,15 +1994,12 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) msf->imsf_fmode != MCAST_EXCLUDE) return -EINVAL; - if (!net_eq(sock_net(sk), &init_net)) - return -EPROTONOSUPPORT; - rtnl_lock(); imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; imr.imr_address.s_addr = msf->imsf_interface; imr.imr_ifindex = ifindex; - in_dev = ip_mc_find_dev(&imr); + in_dev = ip_mc_find_dev(net, &imr); if (!in_dev) { err = -ENODEV; @@ -2094,19 +2070,17 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); struct ip_sf_socklist *psl; + struct net *net = sock_net(sk); if (!ipv4_is_multicast(addr)) return -EINVAL; - if (!net_eq(sock_net(sk), &init_net)) - return -EPROTONOSUPPORT; - rtnl_lock(); imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; imr.imr_address.s_addr = msf->imsf_interface; imr.imr_ifindex = 0; - in_dev = ip_mc_find_dev(&imr); + in_dev = ip_mc_find_dev(net, &imr); if (!in_dev) { err = -ENODEV; @@ -2163,9 +2137,6 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, if (!ipv4_is_multicast(addr)) return -EINVAL; - if (!net_eq(sock_net(sk), &init_net)) - return -EPROTONOSUPPORT; - rtnl_lock(); err = -EADDRNOTAVAIL; @@ -2246,19 +2217,17 @@ void ip_mc_drop_socket(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *iml; + struct net *net = sock_net(sk); if (inet->mc_list == NULL) return; - if (!net_eq(sock_net(sk), &init_net)) - return; - rtnl_lock(); while ((iml = inet->mc_list) != NULL) { struct in_device *in_dev; inet->mc_list = iml->next; - in_dev = inetdev_by_index(&init_net, iml->multi.imr_ifindex); + in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); (void) ip_mc_leave_src(sk, iml, in_dev); if (in_dev != NULL) { ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index bb81c958b744..0c1ae68ee84b 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -167,7 +167,7 @@ tb_not_found: success: if (!inet_csk(sk)->icsk_bind_hash) inet_bind_hash(sk, tb, snum); - BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb); + WARN_ON(inet_csk(sk)->icsk_bind_hash != tb); ret = 0; fail_unlock: @@ -260,7 +260,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) } newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); - BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); + WARN_ON(newsk->sk_state == TCP_SYN_RECV); out: release_sock(sk); return newsk; @@ -386,7 +386,7 @@ struct request_sock *inet_csk_search_req(const struct sock *sk, ireq->rmt_addr == raddr && ireq->loc_addr == laddr && AF_INET_FAMILY(req->rsk_ops->family)) { - BUG_TRAP(!req->sk); + WARN_ON(req->sk); *prevp = prev; break; } @@ -539,14 +539,14 @@ EXPORT_SYMBOL_GPL(inet_csk_clone); */ void inet_csk_destroy_sock(struct sock *sk) { - BUG_TRAP(sk->sk_state == TCP_CLOSE); - BUG_TRAP(sock_flag(sk, SOCK_DEAD)); + WARN_ON(sk->sk_state != TCP_CLOSE); + WARN_ON(!sock_flag(sk, SOCK_DEAD)); /* It cannot be in hash table! */ - BUG_TRAP(sk_unhashed(sk)); + WARN_ON(!sk_unhashed(sk)); /* If it has not 0 inet_sk(sk)->num, it must be bound */ - BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash); + WARN_ON(inet_sk(sk)->num && !inet_csk(sk)->icsk_bind_hash); sk->sk_prot->destroy(sk); @@ -629,7 +629,7 @@ void inet_csk_listen_stop(struct sock *sk) local_bh_disable(); bh_lock_sock(child); - BUG_TRAP(!sock_owned_by_user(child)); + WARN_ON(sock_owned_by_user(child)); sock_hold(child); sk->sk_prot->disconnect(child, O_NONBLOCK); @@ -647,7 +647,7 @@ void inet_csk_listen_stop(struct sock *sk) sk_acceptq_removed(sk); __reqsk_free(req); } - BUG_TRAP(!sk->sk_ack_backlog); + WARN_ON(sk->sk_ack_backlog); } EXPORT_SYMBOL_GPL(inet_csk_listen_stop); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 0546a0bc97ea..6c52e08f786e 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -134,8 +134,8 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, struct sk_buff *fp; struct netns_frags *nf; - BUG_TRAP(q->last_in & INET_FRAG_COMPLETE); - BUG_TRAP(del_timer(&q->timer) == 0); + WARN_ON(!(q->last_in & INET_FRAG_COMPLETE)); + WARN_ON(del_timer(&q->timer) != 0); /* Release all fragment data. */ fp = q->fragments; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 115f53722d20..44981906fb91 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -305,7 +305,7 @@ unique: inet->num = lport; inet->sport = htons(lport); sk->sk_hash = hash; - BUG_TRAP(sk_unhashed(sk)); + WARN_ON(!sk_unhashed(sk)); __sk_add_node(sk, &head->chain); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock(lock); @@ -342,7 +342,7 @@ void __inet_hash_nolisten(struct sock *sk) rwlock_t *lock; struct inet_ehash_bucket *head; - BUG_TRAP(sk_unhashed(sk)); + WARN_ON(!sk_unhashed(sk)); sk->sk_hash = inet_sk_ehashfn(sk); head = inet_ehash_bucket(hashinfo, sk->sk_hash); @@ -367,7 +367,7 @@ static void __inet_hash(struct sock *sk) return; } - BUG_TRAP(sk_unhashed(sk)); + WARN_ON(!sk_unhashed(sk)); list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; lock = &hashinfo->lhash_lock; @@ -450,7 +450,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, */ inet_bind_bucket_for_each(tb, node, &head->chain) { if (tb->ib_net == net && tb->port == port) { - BUG_TRAP(!hlist_empty(&tb->owners)); + WARN_ON(hlist_empty(&tb->owners)); if (tb->fastreuse >= 0) goto next_port; if (!check_established(death_row, sk, diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 75c2def8f9a0..743f011b9a84 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -86,7 +86,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, hashinfo->bhash_size)]; spin_lock(&bhead->lock); tw->tw_tb = icsk->icsk_bind_hash; - BUG_TRAP(icsk->icsk_bind_hash); + WARN_ON(!icsk->icsk_bind_hash); inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); spin_unlock(&bhead->lock); @@ -409,3 +409,38 @@ out: } EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); + +void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, + struct inet_timewait_death_row *twdr, int family) +{ + struct inet_timewait_sock *tw; + struct sock *sk; + struct hlist_node *node; + int h; + + local_bh_disable(); + for (h = 0; h < (hashinfo->ehash_size); h++) { + struct inet_ehash_bucket *head = + inet_ehash_bucket(hashinfo, h); + rwlock_t *lock = inet_ehash_lockp(hashinfo, h); +restart: + write_lock(lock); + sk_for_each(sk, node, &head->twchain) { + + tw = inet_twsk(sk); + if (!net_eq(twsk_net(tw), net) || + tw->tw_family != family) + continue; + + atomic_inc(&tw->tw_refcnt); + write_unlock(lock); + inet_twsk_deschedule(tw, twdr); + inet_twsk_put(tw); + + goto restart; + } + write_unlock(lock); + } + local_bh_enable(); +} +EXPORT_SYMBOL_GPL(inet_twsk_purge); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 38d38f058018..2152d222b954 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -488,8 +488,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, qp->q.fragments = head; } - BUG_TRAP(head != NULL); - BUG_TRAP(FRAG_CB(head)->offset == 0); + WARN_ON(head == NULL); + WARN_ON(FRAG_CB(head)->offset != 0); /* Allocate a new buffer for the datagram. */ ihlen = ip_hdrlen(head); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 465544f6281a..d533a89e08de 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -118,7 +118,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb) __skb_pull(newskb, skb_network_offset(newskb)); newskb->pkt_type = PACKET_LOOPBACK; newskb->ip_summed = CHECKSUM_UNNECESSARY; - BUG_TRAP(newskb->dst); + WARN_ON(!newskb->dst); netif_rx(newskb); return 0; } diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index a75807b971b3..38ccb6dfb02e 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -14,153 +14,14 @@ * - Adaptive compression. */ #include <linux/module.h> -#include <linux/crypto.h> #include <linux/err.h> -#include <linux/pfkeyv2.h> -#include <linux/percpu.h> -#include <linux/smp.h> -#include <linux/list.h> -#include <linux/vmalloc.h> #include <linux/rtnetlink.h> -#include <linux/mutex.h> #include <net/ip.h> #include <net/xfrm.h> #include <net/icmp.h> #include <net/ipcomp.h> #include <net/protocol.h> - -struct ipcomp_tfms { - struct list_head list; - struct crypto_comp **tfms; - int users; -}; - -static DEFINE_MUTEX(ipcomp_resource_mutex); -static void **ipcomp_scratches; -static int ipcomp_scratch_users; -static LIST_HEAD(ipcomp_tfms_list); - -static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) -{ - struct ipcomp_data *ipcd = x->data; - const int plen = skb->len; - int dlen = IPCOMP_SCRATCH_SIZE; - const u8 *start = skb->data; - const int cpu = get_cpu(); - u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); - struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); - int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen); - - if (err) - goto out; - - if (dlen < (plen + sizeof(struct ip_comp_hdr))) { - err = -EINVAL; - goto out; - } - - err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC); - if (err) - goto out; - - skb->truesize += dlen - plen; - __skb_put(skb, dlen - plen); - skb_copy_to_linear_data(skb, scratch, dlen); -out: - put_cpu(); - return err; -} - -static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb) -{ - int nexthdr; - int err = -ENOMEM; - struct ip_comp_hdr *ipch; - - if (skb_linearize_cow(skb)) - goto out; - - skb->ip_summed = CHECKSUM_NONE; - - /* Remove ipcomp header and decompress original payload */ - ipch = (void *)skb->data; - nexthdr = ipch->nexthdr; - - skb->transport_header = skb->network_header + sizeof(*ipch); - __skb_pull(skb, sizeof(*ipch)); - err = ipcomp_decompress(x, skb); - if (err) - goto out; - - err = nexthdr; - -out: - return err; -} - -static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) -{ - struct ipcomp_data *ipcd = x->data; - const int plen = skb->len; - int dlen = IPCOMP_SCRATCH_SIZE; - u8 *start = skb->data; - const int cpu = get_cpu(); - u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); - struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); - int err; - - local_bh_disable(); - err = crypto_comp_compress(tfm, start, plen, scratch, &dlen); - local_bh_enable(); - if (err) - goto out; - - if ((dlen + sizeof(struct ip_comp_hdr)) >= plen) { - err = -EMSGSIZE; - goto out; - } - - memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen); - put_cpu(); - - pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr)); - return 0; - -out: - put_cpu(); - return err; -} - -static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb) -{ - int err; - struct ip_comp_hdr *ipch; - struct ipcomp_data *ipcd = x->data; - - if (skb->len < ipcd->threshold) { - /* Don't bother compressing */ - goto out_ok; - } - - if (skb_linearize_cow(skb)) - goto out_ok; - - err = ipcomp_compress(x, skb); - - if (err) { - goto out_ok; - } - - /* Install ipcomp header, convert into ipcomp datagram. */ - ipch = ip_comp_hdr(skb); - ipch->nexthdr = *skb_mac_header(skb); - ipch->flags = 0; - ipch->cpi = htons((u16 )ntohl(x->id.spi)); - *skb_mac_header(skb) = IPPROTO_COMP; -out_ok: - skb_push(skb, -skb_network_offset(skb)); - return 0; -} +#include <net/sock.h> static void ipcomp4_err(struct sk_buff *skb, u32 info) { @@ -241,155 +102,9 @@ out: return err; } -static void ipcomp_free_scratches(void) -{ - int i; - void **scratches; - - if (--ipcomp_scratch_users) - return; - - scratches = ipcomp_scratches; - if (!scratches) - return; - - for_each_possible_cpu(i) - vfree(*per_cpu_ptr(scratches, i)); - - free_percpu(scratches); -} - -static void **ipcomp_alloc_scratches(void) -{ - int i; - void **scratches; - - if (ipcomp_scratch_users++) - return ipcomp_scratches; - - scratches = alloc_percpu(void *); - if (!scratches) - return NULL; - - ipcomp_scratches = scratches; - - for_each_possible_cpu(i) { - void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE); - if (!scratch) - return NULL; - *per_cpu_ptr(scratches, i) = scratch; - } - - return scratches; -} - -static void ipcomp_free_tfms(struct crypto_comp **tfms) -{ - struct ipcomp_tfms *pos; - int cpu; - - list_for_each_entry(pos, &ipcomp_tfms_list, list) { - if (pos->tfms == tfms) - break; - } - - BUG_TRAP(pos); - - if (--pos->users) - return; - - list_del(&pos->list); - kfree(pos); - - if (!tfms) - return; - - for_each_possible_cpu(cpu) { - struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu); - crypto_free_comp(tfm); - } - free_percpu(tfms); -} - -static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name) -{ - struct ipcomp_tfms *pos; - struct crypto_comp **tfms; - int cpu; - - /* This can be any valid CPU ID so we don't need locking. */ - cpu = raw_smp_processor_id(); - - list_for_each_entry(pos, &ipcomp_tfms_list, list) { - struct crypto_comp *tfm; - - tfms = pos->tfms; - tfm = *per_cpu_ptr(tfms, cpu); - - if (!strcmp(crypto_comp_name(tfm), alg_name)) { - pos->users++; - return tfms; - } - } - - pos = kmalloc(sizeof(*pos), GFP_KERNEL); - if (!pos) - return NULL; - - pos->users = 1; - INIT_LIST_HEAD(&pos->list); - list_add(&pos->list, &ipcomp_tfms_list); - - pos->tfms = tfms = alloc_percpu(struct crypto_comp *); - if (!tfms) - goto error; - - for_each_possible_cpu(cpu) { - struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, - CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) - goto error; - *per_cpu_ptr(tfms, cpu) = tfm; - } - - return tfms; - -error: - ipcomp_free_tfms(tfms); - return NULL; -} - -static void ipcomp_free_data(struct ipcomp_data *ipcd) +static int ipcomp4_init_state(struct xfrm_state *x) { - if (ipcd->tfms) - ipcomp_free_tfms(ipcd->tfms); - ipcomp_free_scratches(); -} - -static void ipcomp_destroy(struct xfrm_state *x) -{ - struct ipcomp_data *ipcd = x->data; - if (!ipcd) - return; - xfrm_state_delete_tunnel(x); - mutex_lock(&ipcomp_resource_mutex); - ipcomp_free_data(ipcd); - mutex_unlock(&ipcomp_resource_mutex); - kfree(ipcd); -} - -static int ipcomp_init_state(struct xfrm_state *x) -{ - int err; - struct ipcomp_data *ipcd; - struct xfrm_algo_desc *calg_desc; - - err = -EINVAL; - if (!x->calg) - goto out; - - if (x->encap) - goto out; + int err = -EINVAL; x->props.header_len = 0; switch (x->props.mode) { @@ -402,40 +117,22 @@ static int ipcomp_init_state(struct xfrm_state *x) goto out; } - err = -ENOMEM; - ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL); - if (!ipcd) + err = ipcomp_init_state(x); + if (err) goto out; - mutex_lock(&ipcomp_resource_mutex); - if (!ipcomp_alloc_scratches()) - goto error; - - ipcd->tfms = ipcomp_alloc_tfms(x->calg->alg_name); - if (!ipcd->tfms) - goto error; - mutex_unlock(&ipcomp_resource_mutex); - if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp_tunnel_attach(x); if (err) goto error_tunnel; } - calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0); - BUG_ON(!calg_desc); - ipcd->threshold = calg_desc->uinfo.comp.threshold; - x->data = ipcd; err = 0; out: return err; error_tunnel: - mutex_lock(&ipcomp_resource_mutex); -error: - ipcomp_free_data(ipcd); - mutex_unlock(&ipcomp_resource_mutex); - kfree(ipcd); + ipcomp_destroy(x); goto out; } @@ -443,7 +140,7 @@ static const struct xfrm_type ipcomp_type = { .description = "IPCOMP4", .owner = THIS_MODULE, .proto = IPPROTO_COMP, - .init_state = ipcomp_init_state, + .init_state = ipcomp4_init_state, .destructor = ipcomp_destroy, .input = ipcomp_input, .output = ipcomp_output @@ -481,7 +178,7 @@ module_init(ipcomp4_init); module_exit(ipcomp4_fini); MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173"); +MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp/IPv4) - RFC3173"); MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_COMP); diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c index 1f1897a1a702..201b8ea3020d 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/ipv4/ipvs/ip_vs_app.c @@ -608,7 +608,7 @@ int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, } -int ip_vs_app_init(void) +int __init ip_vs_app_init(void) { /* we will replace it with proc_net_ipvs_create() soon */ proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops); diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index f8bdae47a77f..44a6872dc245 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -965,7 +965,7 @@ static void ip_vs_conn_flush(void) } -int ip_vs_conn_init(void) +int __init ip_vs_conn_init(void) { int idx; diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 9a5ace0b4dd6..6379705a8dcb 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -683,9 +683,22 @@ static void ip_vs_zero_stats(struct ip_vs_stats *stats) { spin_lock_bh(&stats->lock); - memset(stats, 0, (char *)&stats->lock - (char *)stats); - spin_unlock_bh(&stats->lock); + + stats->conns = 0; + stats->inpkts = 0; + stats->outpkts = 0; + stats->inbytes = 0; + stats->outbytes = 0; + + stats->cps = 0; + stats->inpps = 0; + stats->outpps = 0; + stats->inbps = 0; + stats->outbps = 0; + ip_vs_zero_estimator(stats); + + spin_unlock_bh(&stats->lock); } /* @@ -1589,7 +1602,7 @@ static struct ctl_table vs_vars[] = { { .ctl_name = 0 } }; -struct ctl_path net_vs_ctl_path[] = { +const struct ctl_path net_vs_ctl_path[] = { { .procname = "net", .ctl_name = CTL_NET, }, { .procname = "ipv4", .ctl_name = NET_IPV4, }, { .procname = "vs", }, @@ -1784,7 +1797,9 @@ static const struct file_operations ip_vs_info_fops = { #endif -struct ip_vs_stats ip_vs_stats; +struct ip_vs_stats ip_vs_stats = { + .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock), +}; #ifdef CONFIG_PROC_FS static int ip_vs_stats_show(struct seq_file *seq, void *v) @@ -2306,7 +2321,7 @@ static struct nf_sockopt_ops ip_vs_sockopts = { }; -int ip_vs_control_init(void) +int __init ip_vs_control_init(void) { int ret; int idx; @@ -2333,8 +2348,6 @@ int ip_vs_control_init(void) INIT_LIST_HEAD(&ip_vs_rtable[idx]); } - memset(&ip_vs_stats, 0, sizeof(ip_vs_stats)); - spin_lock_init(&ip_vs_stats.lock); ip_vs_new_estimator(&ip_vs_stats); /* Hook the defense timer */ diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c index 8afc1503ed20..fa66824d264f 100644 --- a/net/ipv4/ipvs/ip_vs_dh.c +++ b/net/ipv4/ipvs/ip_vs_dh.c @@ -233,6 +233,7 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler = .name = "dh", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list), .init_service = ip_vs_dh_init_svc, .done_service = ip_vs_dh_done_svc, .update_service = ip_vs_dh_update_svc, @@ -242,7 +243,6 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler = static int __init ip_vs_dh_init(void) { - INIT_LIST_HEAD(&ip_vs_dh_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_dh_scheduler); } diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c index bc04eedd6dbb..5a20f93bd7f9 100644 --- a/net/ipv4/ipvs/ip_vs_est.c +++ b/net/ipv4/ipvs/ip_vs_est.c @@ -17,6 +17,7 @@ #include <linux/types.h> #include <linux/interrupt.h> #include <linux/sysctl.h> +#include <linux/list.h> #include <net/ip_vs.h> @@ -44,28 +45,11 @@ */ -struct ip_vs_estimator -{ - struct ip_vs_estimator *next; - struct ip_vs_stats *stats; - - u32 last_conns; - u32 last_inpkts; - u32 last_outpkts; - u64 last_inbytes; - u64 last_outbytes; - - u32 cps; - u32 inpps; - u32 outpps; - u32 inbps; - u32 outbps; -}; - +static void estimation_timer(unsigned long arg); -static struct ip_vs_estimator *est_list = NULL; -static DEFINE_RWLOCK(est_lock); -static struct timer_list est_timer; +static LIST_HEAD(est_list); +static DEFINE_SPINLOCK(est_lock); +static DEFINE_TIMER(est_timer, estimation_timer, 0, 0); static void estimation_timer(unsigned long arg) { @@ -76,9 +60,9 @@ static void estimation_timer(unsigned long arg) u64 n_inbytes, n_outbytes; u32 rate; - read_lock(&est_lock); - for (e = est_list; e; e = e->next) { - s = e->stats; + spin_lock(&est_lock); + list_for_each_entry(e, &est_list, list) { + s = container_of(e, struct ip_vs_stats, est); spin_lock(&s->lock); n_conns = s->conns; @@ -114,19 +98,16 @@ static void estimation_timer(unsigned long arg) s->outbps = (e->outbps+0xF)>>5; spin_unlock(&s->lock); } - read_unlock(&est_lock); + spin_unlock(&est_lock); mod_timer(&est_timer, jiffies + 2*HZ); } -int ip_vs_new_estimator(struct ip_vs_stats *stats) +void ip_vs_new_estimator(struct ip_vs_stats *stats) { - struct ip_vs_estimator *est; + struct ip_vs_estimator *est = &stats->est; - est = kzalloc(sizeof(*est), GFP_KERNEL); - if (est == NULL) - return -ENOMEM; + INIT_LIST_HEAD(&est->list); - est->stats = stats; est->last_conns = stats->conns; est->cps = stats->cps<<10; @@ -142,59 +123,40 @@ int ip_vs_new_estimator(struct ip_vs_stats *stats) est->last_outbytes = stats->outbytes; est->outbps = stats->outbps<<5; - write_lock_bh(&est_lock); - est->next = est_list; - if (est->next == NULL) { - setup_timer(&est_timer, estimation_timer, 0); - est_timer.expires = jiffies + 2*HZ; - add_timer(&est_timer); - } - est_list = est; - write_unlock_bh(&est_lock); - return 0; + spin_lock_bh(&est_lock); + if (list_empty(&est_list)) + mod_timer(&est_timer, jiffies + 2 * HZ); + list_add(&est->list, &est_list); + spin_unlock_bh(&est_lock); } void ip_vs_kill_estimator(struct ip_vs_stats *stats) { - struct ip_vs_estimator *est, **pest; - int killed = 0; - - write_lock_bh(&est_lock); - pest = &est_list; - while ((est=*pest) != NULL) { - if (est->stats != stats) { - pest = &est->next; - continue; - } - *pest = est->next; - kfree(est); - killed++; + struct ip_vs_estimator *est = &stats->est; + + spin_lock_bh(&est_lock); + list_del(&est->list); + while (list_empty(&est_list) && try_to_del_timer_sync(&est_timer) < 0) { + spin_unlock_bh(&est_lock); + cpu_relax(); + spin_lock_bh(&est_lock); } - if (killed && est_list == NULL) - del_timer_sync(&est_timer); - write_unlock_bh(&est_lock); + spin_unlock_bh(&est_lock); } void ip_vs_zero_estimator(struct ip_vs_stats *stats) { - struct ip_vs_estimator *e; - - write_lock_bh(&est_lock); - for (e = est_list; e; e = e->next) { - if (e->stats != stats) - continue; - - /* set counters zero */ - e->last_conns = 0; - e->last_inpkts = 0; - e->last_outpkts = 0; - e->last_inbytes = 0; - e->last_outbytes = 0; - e->cps = 0; - e->inpps = 0; - e->outpps = 0; - e->inbps = 0; - e->outbps = 0; - } - write_unlock_bh(&est_lock); + struct ip_vs_estimator *est = &stats->est; + + /* set counters zero, caller must hold the stats->lock lock */ + est->last_inbytes = 0; + est->last_outbytes = 0; + est->last_conns = 0; + est->last_inpkts = 0; + est->last_outpkts = 0; + est->cps = 0; + est->inpps = 0; + est->outpps = 0; + est->inbps = 0; + est->outbps = 0; } diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c index 0efa3db4b180..7a6a319f544a 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/ipv4/ipvs/ip_vs_lblc.c @@ -539,6 +539,7 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler = .name = "lblc", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list), .init_service = ip_vs_lblc_init_svc, .done_service = ip_vs_lblc_done_svc, .update_service = ip_vs_lblc_update_svc, @@ -550,7 +551,6 @@ static int __init ip_vs_lblc_init(void) { int ret; - INIT_LIST_HEAD(&ip_vs_lblc_scheduler.n_list); sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler); if (ret) diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index 8e3bbeb45138..c234e73968a6 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c @@ -728,6 +728,7 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler = .name = "lblcr", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list), .init_service = ip_vs_lblcr_init_svc, .done_service = ip_vs_lblcr_done_svc, .update_service = ip_vs_lblcr_update_svc, @@ -739,7 +740,6 @@ static int __init ip_vs_lblcr_init(void) { int ret; - INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list); sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); if (ret) diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/ipv4/ipvs/ip_vs_lc.c index ac9f08e065d5..ebcdbf75ac65 100644 --- a/net/ipv4/ipvs/ip_vs_lc.c +++ b/net/ipv4/ipvs/ip_vs_lc.c @@ -98,6 +98,7 @@ static struct ip_vs_scheduler ip_vs_lc_scheduler = { .name = "lc", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list), .init_service = ip_vs_lc_init_svc, .done_service = ip_vs_lc_done_svc, .update_service = ip_vs_lc_update_svc, @@ -107,7 +108,6 @@ static struct ip_vs_scheduler ip_vs_lc_scheduler = { static int __init ip_vs_lc_init(void) { - INIT_LIST_HEAD(&ip_vs_lc_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_lc_scheduler) ; } diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/ipv4/ipvs/ip_vs_nq.c index a46bf258d420..92f3a6770031 100644 --- a/net/ipv4/ipvs/ip_vs_nq.c +++ b/net/ipv4/ipvs/ip_vs_nq.c @@ -136,6 +136,7 @@ static struct ip_vs_scheduler ip_vs_nq_scheduler = .name = "nq", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list), .init_service = ip_vs_nq_init_svc, .done_service = ip_vs_nq_done_svc, .update_service = ip_vs_nq_update_svc, @@ -145,7 +146,6 @@ static struct ip_vs_scheduler ip_vs_nq_scheduler = static int __init ip_vs_nq_init(void) { - INIT_LIST_HEAD(&ip_vs_nq_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_nq_scheduler); } diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c index 876714f23d65..6099a88fc200 100644 --- a/net/ipv4/ipvs/ip_vs_proto.c +++ b/net/ipv4/ipvs/ip_vs_proto.c @@ -43,7 +43,7 @@ static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE]; /* * register an ipvs protocol */ -static int __used register_ip_vs_protocol(struct ip_vs_protocol *pp) +static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp) { unsigned hash = IP_VS_PROTO_HASH(pp->protocol); @@ -190,7 +190,7 @@ ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, } -int ip_vs_protocol_init(void) +int __init ip_vs_protocol_init(void) { char protocols[64]; #define REGISTER_PROTOCOL(p) \ diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/ipv4/ipvs/ip_vs_rr.c index c8db12d39e61..358110d17e59 100644 --- a/net/ipv4/ipvs/ip_vs_rr.c +++ b/net/ipv4/ipvs/ip_vs_rr.c @@ -94,6 +94,7 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = { .name = "rr", /* name */ .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list), .init_service = ip_vs_rr_init_svc, .done_service = ip_vs_rr_done_svc, .update_service = ip_vs_rr_update_svc, @@ -102,7 +103,6 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = { static int __init ip_vs_rr_init(void) { - INIT_LIST_HEAD(&ip_vs_rr_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_rr_scheduler); } diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/ipv4/ipvs/ip_vs_sched.c index b64767309855..a46ad9e35016 100644 --- a/net/ipv4/ipvs/ip_vs_sched.c +++ b/net/ipv4/ipvs/ip_vs_sched.c @@ -184,7 +184,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler) write_lock_bh(&__ip_vs_sched_lock); - if (scheduler->n_list.next != &scheduler->n_list) { + if (!list_empty(&scheduler->n_list)) { write_unlock_bh(&__ip_vs_sched_lock); ip_vs_use_count_dec(); IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler " @@ -229,7 +229,7 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler) } write_lock_bh(&__ip_vs_sched_lock); - if (scheduler->n_list.next == &scheduler->n_list) { + if (list_empty(&scheduler->n_list)) { write_unlock_bh(&__ip_vs_sched_lock); IP_VS_ERR("unregister_ip_vs_scheduler(): [%s] scheduler " "is not in the list. failed\n", scheduler->name); diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c index 2a7d31358181..77663d84cbd1 100644 --- a/net/ipv4/ipvs/ip_vs_sed.c +++ b/net/ipv4/ipvs/ip_vs_sed.c @@ -138,6 +138,7 @@ static struct ip_vs_scheduler ip_vs_sed_scheduler = .name = "sed", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list), .init_service = ip_vs_sed_init_svc, .done_service = ip_vs_sed_done_svc, .update_service = ip_vs_sed_update_svc, @@ -147,7 +148,6 @@ static struct ip_vs_scheduler ip_vs_sed_scheduler = static int __init ip_vs_sed_init(void) { - INIT_LIST_HEAD(&ip_vs_sed_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_sed_scheduler); } diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c index b8fdfac65001..7b979e228056 100644 --- a/net/ipv4/ipvs/ip_vs_sh.c +++ b/net/ipv4/ipvs/ip_vs_sh.c @@ -230,6 +230,7 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler = .name = "sh", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list), .init_service = ip_vs_sh_init_svc, .done_service = ip_vs_sh_done_svc, .update_service = ip_vs_sh_update_svc, @@ -239,7 +240,6 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler = static int __init ip_vs_sh_init(void) { - INIT_LIST_HEAD(&ip_vs_sh_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_sh_scheduler); } diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 45e9bd96c286..a652da2c3200 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -904,9 +904,9 @@ int stop_sync_thread(int state) * progress of stopping the master sync daemon. */ - spin_lock(&ip_vs_sync_lock); + spin_lock_bh(&ip_vs_sync_lock); ip_vs_sync_state &= ~IP_VS_STATE_MASTER; - spin_unlock(&ip_vs_sync_lock); + spin_unlock_bh(&ip_vs_sync_lock); kthread_stop(sync_master_thread); sync_master_thread = NULL; } else if (state == IP_VS_STATE_BACKUP) { diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/ipv4/ipvs/ip_vs_wlc.c index 772c3cb4eca1..9b0ef86bb1f7 100644 --- a/net/ipv4/ipvs/ip_vs_wlc.c +++ b/net/ipv4/ipvs/ip_vs_wlc.c @@ -126,6 +126,7 @@ static struct ip_vs_scheduler ip_vs_wlc_scheduler = .name = "wlc", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list), .init_service = ip_vs_wlc_init_svc, .done_service = ip_vs_wlc_done_svc, .update_service = ip_vs_wlc_update_svc, @@ -135,7 +136,6 @@ static struct ip_vs_scheduler ip_vs_wlc_scheduler = static int __init ip_vs_wlc_init(void) { - INIT_LIST_HEAD(&ip_vs_wlc_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_wlc_scheduler); } diff --git a/net/ipv4/ipvs/ip_vs_wrr.c b/net/ipv4/ipvs/ip_vs_wrr.c index 1d6932d7dc97..0d86a79b87b5 100644 --- a/net/ipv4/ipvs/ip_vs_wrr.c +++ b/net/ipv4/ipvs/ip_vs_wrr.c @@ -212,6 +212,7 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = { .name = "wrr", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list), .init_service = ip_vs_wrr_init_svc, .done_service = ip_vs_wrr_done_svc, .update_service = ip_vs_wrr_update_svc, @@ -220,7 +221,6 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = { static int __init ip_vs_wrr_init(void) { - INIT_LIST_HEAD(&ip_vs_wrr_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_wrr_scheduler) ; } diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index f23e60c93ef9..90eb7cb47e77 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -369,7 +369,7 @@ config IP_NF_SECURITY tristate "Security table" depends on IP_NF_IPTABLES depends on SECURITY - default m if NETFILTER_ADVANCED=n + depends on NETFILTER_ADVANCED help This option adds a `security' table to iptables, for use with Mandatory Access Control (MAC) policy. diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 3be4d07e7ed9..082f5dd3156c 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -55,32 +55,53 @@ static struct xt_table packet_filter = { }; /* The work comes in here from netfilter.c */ -static unsigned int arpt_hook(unsigned int hook, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int arpt_in_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return arpt_do_table(skb, hook, in, out, init_net.ipv4.arptable_filter); + return arpt_do_table(skb, hook, in, out, + dev_net(in)->ipv4.arptable_filter); +} + +static unsigned int arpt_out_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return arpt_do_table(skb, hook, in, out, + dev_net(out)->ipv4.arptable_filter); +} + +static unsigned int arpt_forward_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return arpt_do_table(skb, hook, in, out, + dev_net(in)->ipv4.arptable_filter); } static struct nf_hook_ops arpt_ops[] __read_mostly = { { - .hook = arpt_hook, + .hook = arpt_in_hook, .owner = THIS_MODULE, .pf = NF_ARP, .hooknum = NF_ARP_IN, .priority = NF_IP_PRI_FILTER, }, { - .hook = arpt_hook, + .hook = arpt_out_hook, .owner = THIS_MODULE, .pf = NF_ARP, .hooknum = NF_ARP_OUT, .priority = NF_IP_PRI_FILTER, }, { - .hook = arpt_hook, + .hook = arpt_forward_hook, .owner = THIS_MODULE, .pf = NF_ARP, .hooknum = NF_ARP_FORWARD, diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 1819ad7ab910..fafe8ebb4c55 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -475,11 +475,10 @@ static void arp_print(struct arp_payload *payload) #define HBUFFERLEN 30 char hbuffer[HBUFFERLEN]; int j,k; - const char hexbuf[]= "0123456789abcdef"; for (k=0, j=0; k < HBUFFERLEN-3 && j < ETH_ALEN; j++) { - hbuffer[k++]=hexbuf[(payload->src_hw[j]>>4)&15]; - hbuffer[k++]=hexbuf[payload->src_hw[j]&15]; + hbuffer[k++] = hex_asc_hi(payload->src_hw[j]); + hbuffer[k++] = hex_asc_lo(payload->src_hw[j]); hbuffer[k++]=':'; } hbuffer[--k]='\0'; diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c index 49587a497229..462a22c97877 100644 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ b/net/ipv4/netfilter/ipt_addrtype.c @@ -70,7 +70,7 @@ addrtype_mt_v1(const struct sk_buff *skb, const struct net_device *in, (info->flags & IPT_ADDRTYPE_INVERT_SOURCE); if (ret && info->dest) ret &= match_type(dev, iph->daddr, info->dest) ^ - (info->flags & IPT_ADDRTYPE_INVERT_DEST); + !!(info->flags & IPT_ADDRTYPE_INVERT_DEST); return ret; } diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 21cb053f5d7d..3974d7cae5c0 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -305,10 +305,10 @@ static void recent_mt_destroy(const struct xt_match *match, void *matchinfo) spin_lock_bh(&recent_lock); list_del(&t->list); spin_unlock_bh(&recent_lock); - recent_table_flush(t); #ifdef CONFIG_PROC_FS remove_proc_entry(t->name, proc_dir); #endif + recent_table_flush(t); kfree(t); } mutex_unlock(&recent_mutex); diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index 2b472ac2263a..db6d312128e1 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -32,7 +32,7 @@ static struct struct ipt_replace repl; struct ipt_standard entries[3]; struct ipt_error term; -} initial_table __initdata = { +} initial_table __net_initdata = { .repl = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c index 91537f11273f..6c4f11f51446 100644 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ b/net/ipv4/netfilter/nf_nat_proto_common.c @@ -73,9 +73,13 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, range_size = ntohs(range->max.all) - min + 1; } - off = *rover; if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) - off = net_random(); + off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip, + maniptype == IP_NAT_MANIP_SRC + ? tuple->dst.u.all + : tuple->src.u.all); + else + off = *rover; for (i = 0; i < range_size; i++, off++) { *portptr = htons(min + off % range_size); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 834356ea99df..8f5a403f6f6b 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -232,6 +232,8 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPDSACKIgnoredOld", LINUX_MIB_TCPDSACKIGNOREDOLD), SNMP_MIB_ITEM("TCPDSACKIgnoredNoUndo", LINUX_MIB_TCPDSACKIGNOREDNOUNDO), SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS), + SNMP_MIB_ITEM("TCPMD5NotFound", LINUX_MIB_TCPMD5NOTFOUND), + SNMP_MIB_ITEM("TCPMD5Unexpected", LINUX_MIB_TCPMD5UNEXPECTED), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e4ab0ac94f92..6ee5354c9aa1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1502,21 +1502,21 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, rth->fl.iif != 0 || dst_metric_locked(&rth->u.dst, RTAX_MTU) || !net_eq(dev_net(rth->u.dst.dev), net) || - !rt_is_expired(rth)) + rt_is_expired(rth)) continue; if (new_mtu < 68 || new_mtu >= old_mtu) { /* BSD 4.2 compatibility hack :-( */ if (mtu == 0 && - old_mtu >= dst_metric(&rth->u.dst, RTAX_MTU) && + old_mtu >= dst_mtu(&rth->u.dst) && old_mtu >= 68 + (iph->ihl << 2)) old_mtu -= iph->ihl << 2; mtu = guess_mtu(old_mtu); } - if (mtu <= dst_metric(&rth->u.dst, RTAX_MTU)) { - if (mtu < dst_metric(&rth->u.dst, RTAX_MTU)) { + if (mtu <= dst_mtu(&rth->u.dst)) { + if (mtu < dst_mtu(&rth->u.dst)) { dst_confirm(&rth->u.dst); if (mtu < ip_rt_min_pmtu) { mtu = ip_rt_min_pmtu; @@ -1538,7 +1538,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) { - if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= 68 && + if (dst_mtu(dst) > mtu && mtu >= 68 && !(dst_metric_locked(dst, RTAX_MTU))) { if (mtu < ip_rt_min_pmtu) { mtu = ip_rt_min_pmtu; @@ -1667,7 +1667,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; - if (dst_metric(&rt->u.dst, RTAX_MTU) > IP_MAX_MTU) + if (dst_mtu(&rt->u.dst) > IP_MAX_MTU) rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0) rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40, @@ -2914,7 +2914,69 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, return 0; } -ctl_table ipv4_route_table[] = { +static void rt_secret_reschedule(int old) +{ + struct net *net; + int new = ip_rt_secret_interval; + int diff = new - old; + + if (!diff) + return; + + rtnl_lock(); + for_each_net(net) { + int deleted = del_timer_sync(&net->ipv4.rt_secret_timer); + + if (!new) + continue; + + if (deleted) { + long time = net->ipv4.rt_secret_timer.expires - jiffies; + + if (time <= 0 || (time += diff) <= 0) + time = 0; + + net->ipv4.rt_secret_timer.expires = time; + } else + net->ipv4.rt_secret_timer.expires = new; + + net->ipv4.rt_secret_timer.expires += jiffies; + add_timer(&net->ipv4.rt_secret_timer); + } + rtnl_unlock(); +} + +static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write, + struct file *filp, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int old = ip_rt_secret_interval; + int ret = proc_dointvec_jiffies(ctl, write, filp, buffer, lenp, ppos); + + rt_secret_reschedule(old); + + return ret; +} + +static int ipv4_sysctl_rt_secret_interval_strategy(ctl_table *table, + int __user *name, + int nlen, + void __user *oldval, + size_t __user *oldlenp, + void __user *newval, + size_t newlen) +{ + int old = ip_rt_secret_interval; + int ret = sysctl_jiffies(table, name, nlen, oldval, oldlenp, newval, + newlen); + + rt_secret_reschedule(old); + + return ret; +} + +static ctl_table ipv4_route_table[] = { { .ctl_name = NET_IPV4_ROUTE_GC_THRESH, .procname = "gc_thresh", @@ -3048,20 +3110,29 @@ ctl_table ipv4_route_table[] = { .data = &ip_rt_secret_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = &ipv4_sysctl_rt_secret_interval, + .strategy = &ipv4_sysctl_rt_secret_interval_strategy, }, { .ctl_name = 0 } }; -static __net_initdata struct ctl_path ipv4_route_path[] = { +static struct ctl_table empty[1]; + +static struct ctl_table ipv4_skeleton[] = +{ + { .procname = "route", .ctl_name = NET_IPV4_ROUTE, + .mode = 0555, .child = ipv4_route_table}, + { .procname = "neigh", .ctl_name = NET_IPV4_NEIGH, + .mode = 0555, .child = empty}, + { } +}; + +static __net_initdata struct ctl_path ipv4_path[] = { { .procname = "net", .ctl_name = CTL_NET, }, { .procname = "ipv4", .ctl_name = NET_IPV4, }, - { .procname = "route", .ctl_name = NET_IPV4_ROUTE, }, { }, }; - static struct ctl_table ipv4_route_flush_table[] = { { .ctl_name = NET_IPV4_ROUTE_FLUSH, @@ -3074,6 +3145,13 @@ static struct ctl_table ipv4_route_flush_table[] = { { .ctl_name = 0 }, }; +static __net_initdata struct ctl_path ipv4_route_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "ipv4", .ctl_name = NET_IPV4, }, + { .procname = "route", .ctl_name = NET_IPV4_ROUTE, }, + { }, +}; + static __net_init int sysctl_route_net_init(struct net *net) { struct ctl_table *tbl; @@ -3126,10 +3204,12 @@ static __net_init int rt_secret_timer_init(struct net *net) net->ipv4.rt_secret_timer.data = (unsigned long)net; init_timer_deferrable(&net->ipv4.rt_secret_timer); - net->ipv4.rt_secret_timer.expires = - jiffies + net_random() % ip_rt_secret_interval + - ip_rt_secret_interval; - add_timer(&net->ipv4.rt_secret_timer); + if (ip_rt_secret_interval) { + net->ipv4.rt_secret_timer.expires = + jiffies + net_random() % ip_rt_secret_interval + + ip_rt_secret_interval; + add_timer(&net->ipv4.rt_secret_timer); + } return 0; } @@ -3216,6 +3296,17 @@ int __init ip_rt_init(void) return rc; } +#ifdef CONFIG_SYSCTL +/* + * We really need to sanitize the damn ipv4 init order, then all + * this nonsense will go away. + */ +void __init ip_static_sysctl_init(void) +{ + register_sysctl_paths(ipv4_path, ipv4_skeleton); +} +#endif + EXPORT_SYMBOL(__ip_select_ident); EXPORT_SYMBOL(ip_route_input); EXPORT_SYMBOL(ip_route_output_key); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 51bc24d3b8a7..9d38005abbac 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -299,6 +299,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->rmt_port = th->source; ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; + ireq->ecn_ok = 0; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->rcv_wscale = tcp_opt.rcv_wscale; ireq->sack_ok = tcp_opt.sack_ok; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 14ef202a2254..e0689fd7b798 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -232,6 +232,7 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = &ipv4_doint_and_flush, .strategy = &ipv4_doint_and_flush_strategy, + .extra2 = &init_net, }, { .ctl_name = NET_IPV4_NO_PMTU_DISC, @@ -401,13 +402,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = &ipv4_local_port_range, .strategy = &ipv4_sysctl_local_port_range, }, - { - .ctl_name = NET_IPV4_ROUTE, - .procname = "route", - .maxlen = 0, - .mode = 0555, - .child = ipv4_route_table - }, #ifdef CONFIG_IP_MULTICAST { .ctl_name = NET_IPV4_IGMP_MAX_MEMBERSHIPS, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0b491bf03db4..1ab341e5d3e0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1096,7 +1096,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) #if TCP_DEBUG struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); - BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)); + WARN_ON(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)); #endif if (inet_csk_ack_scheduled(sk)) { @@ -1358,7 +1358,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto found_ok_skb; if (tcp_hdr(skb)->fin) goto found_fin_ok; - BUG_TRAP(flags & MSG_PEEK); + WARN_ON(!(flags & MSG_PEEK)); skb = skb->next; } while (skb != (struct sk_buff *)&sk->sk_receive_queue); @@ -1421,8 +1421,8 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, tp->ucopy.len = len; - BUG_TRAP(tp->copied_seq == tp->rcv_nxt || - (flags & (MSG_PEEK | MSG_TRUNC))); + WARN_ON(tp->copied_seq != tp->rcv_nxt && + !(flags & (MSG_PEEK | MSG_TRUNC))); /* Ugly... If prequeue is not empty, we have to * process it before releasing socket, otherwise @@ -1844,7 +1844,7 @@ adjudge_to_death: */ local_bh_disable(); bh_lock_sock(sk); - BUG_TRAP(!sock_owned_by_user(sk)); + WARN_ON(sock_owned_by_user(sk)); /* Have we already been destroyed by a softirq or backlog? */ if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE) @@ -1973,7 +1973,7 @@ int tcp_disconnect(struct sock *sk, int flags) memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); - BUG_TRAP(!inet->num || icsk->icsk_bind_hash); + WARN_ON(inet->num && !icsk->icsk_bind_hash); sk->sk_error_report(sk); return err; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1f5e6049883e..67ccce2a96bd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1629,10 +1629,10 @@ advance_sp: out: #if FASTRETRANS_DEBUG > 0 - BUG_TRAP((int)tp->sacked_out >= 0); - BUG_TRAP((int)tp->lost_out >= 0); - BUG_TRAP((int)tp->retrans_out >= 0); - BUG_TRAP((int)tcp_packets_in_flight(tp) >= 0); + WARN_ON((int)tp->sacked_out < 0); + WARN_ON((int)tp->lost_out < 0); + WARN_ON((int)tp->retrans_out < 0); + WARN_ON((int)tcp_packets_in_flight(tp) < 0); #endif return flag; } @@ -2181,7 +2181,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) int err; unsigned int mss; - BUG_TRAP(packets <= tp->packets_out); + WARN_ON(packets > tp->packets_out); if (tp->lost_skb_hint) { skb = tp->lost_skb_hint; cnt = tp->lost_cnt_hint; @@ -2610,7 +2610,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) /* E. Check state exit conditions. State can be terminated * when high_seq is ACKed. */ if (icsk->icsk_ca_state == TCP_CA_Open) { - BUG_TRAP(tp->retrans_out == 0); + WARN_ON(tp->retrans_out != 0); tp->retrans_stamp = 0; } else if (!before(tp->snd_una, tp->high_seq)) { switch (icsk->icsk_ca_state) { @@ -2972,9 +2972,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) } #if FASTRETRANS_DEBUG > 0 - BUG_TRAP((int)tp->sacked_out >= 0); - BUG_TRAP((int)tp->lost_out >= 0); - BUG_TRAP((int)tp->retrans_out >= 0); + WARN_ON((int)tp->sacked_out < 0); + WARN_ON((int)tp->lost_out < 0); + WARN_ON((int)tp->retrans_out < 0); if (!tp->packets_out && tcp_is_sack(tp)) { icsk = inet_csk(sk); if (tp->lost_out) { @@ -3292,6 +3292,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) * log. Something worked... */ sk->sk_err_soft = 0; + icsk->icsk_probes_out = 0; tp->rcv_tstamp = tcp_time_stamp; prior_packets = tp->packets_out; if (!prior_packets) @@ -3324,8 +3325,6 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) return 1; no_queue: - icsk->icsk_probes_out = 0; - /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. @@ -3878,7 +3877,7 @@ static void tcp_sack_remove(struct tcp_sock *tp) int i; /* RCV.NXT must cover all the block! */ - BUG_TRAP(!before(tp->rcv_nxt, sp->end_seq)); + WARN_ON(before(tp->rcv_nxt, sp->end_seq)); /* Zap this SACK, by moving forward any other SACKS. */ for (i=this_sack+1; i < num_sacks; i++) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a82df6307567..1b4fee20fc93 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -418,7 +418,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) /* ICMPs are not backlogged, hence we cannot get an established socket here. */ - BUG_TRAP(!req->sk); + WARN_ON(req->sk); if (seq != tcp_rsk(req)->snt_isn) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); @@ -655,8 +655,8 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, rep.th.doff = arg.iov[0].iov_len/4; tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset], - key, ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr, &rep.th); + key, ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, &rep.th); } #endif arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, @@ -687,14 +687,14 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) inet_twsk_put(tw); } -static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, +static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent, 0, - tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr)); + tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr)); } /* @@ -1116,18 +1116,12 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) return 0; if (hash_expected && !hash_location) { - LIMIT_NETDEBUG(KERN_INFO "MD5 Hash expected but NOT found " - "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n", - NIPQUAD(iph->saddr), ntohs(th->source), - NIPQUAD(iph->daddr), ntohs(th->dest)); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); return 1; } if (!hash_expected && hash_location) { - LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found " - "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n", - NIPQUAD(iph->saddr), ntohs(th->source), - NIPQUAD(iph->daddr), ntohs(th->dest)); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); return 1; } @@ -2382,6 +2376,7 @@ static int __net_init tcp_sk_init(struct net *net) static void __net_exit tcp_sk_exit(struct net *net) { inet_ctl_sock_destroy(net->ipv4.tcp_sock); + inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET); } static struct pernet_operations __net_initdata tcp_sk_ops = { diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 204c42162660..f976fc57892c 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -609,7 +609,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) { /* Out of window: send ACK and drop. */ if (!(flg & TCP_FLAG_RST)) - req->rsk_ops->send_ack(skb, req); + req->rsk_ops->send_ack(sk, skb, req); if (paws_reject) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); return NULL; @@ -618,89 +618,87 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, /* In sequence, PAWS is OK. */ if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1)) - req->ts_recent = tmp_opt.rcv_tsval; + req->ts_recent = tmp_opt.rcv_tsval; - if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) { - /* Truncate SYN, it is out of window starting - at tcp_rsk(req)->rcv_isn + 1. */ - flg &= ~TCP_FLAG_SYN; - } + if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) { + /* Truncate SYN, it is out of window starting + at tcp_rsk(req)->rcv_isn + 1. */ + flg &= ~TCP_FLAG_SYN; + } - /* RFC793: "second check the RST bit" and - * "fourth, check the SYN bit" - */ - if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) { - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); - goto embryonic_reset; - } + /* RFC793: "second check the RST bit" and + * "fourth, check the SYN bit" + */ + if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) { + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); + goto embryonic_reset; + } - /* ACK sequence verified above, just make sure ACK is - * set. If ACK not set, just silently drop the packet. - */ - if (!(flg & TCP_FLAG_ACK)) - return NULL; - - /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ - if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && - TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { - inet_rsk(req)->acked = 1; - return NULL; - } + /* ACK sequence verified above, just make sure ACK is + * set. If ACK not set, just silently drop the packet. + */ + if (!(flg & TCP_FLAG_ACK)) + return NULL; - /* OK, ACK is valid, create big socket and - * feed this segment to it. It will repeat all - * the tests. THIS SEGMENT MUST MOVE SOCKET TO - * ESTABLISHED STATE. If it will be dropped after - * socket is created, wait for troubles. - */ - child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, - req, NULL); - if (child == NULL) - goto listen_overflow; + /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ + if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && + TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { + inet_rsk(req)->acked = 1; + return NULL; + } + + /* OK, ACK is valid, create big socket and + * feed this segment to it. It will repeat all + * the tests. THIS SEGMENT MUST MOVE SOCKET TO + * ESTABLISHED STATE. If it will be dropped after + * socket is created, wait for troubles. + */ + child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); + if (child == NULL) + goto listen_overflow; #ifdef CONFIG_TCP_MD5SIG - else { - /* Copy over the MD5 key from the original socket */ - struct tcp_md5sig_key *key; - struct tcp_sock *tp = tcp_sk(sk); - key = tp->af_specific->md5_lookup(sk, child); - if (key != NULL) { - /* - * We're using one, so create a matching key on the - * newsk structure. If we fail to get memory then we - * end up not copying the key across. Shucks. - */ - char *newkey = kmemdup(key->key, key->keylen, - GFP_ATOMIC); - if (newkey) { - if (!tcp_alloc_md5sig_pool()) - BUG(); - tp->af_specific->md5_add(child, child, - newkey, - key->keylen); - } + else { + /* Copy over the MD5 key from the original socket */ + struct tcp_md5sig_key *key; + struct tcp_sock *tp = tcp_sk(sk); + key = tp->af_specific->md5_lookup(sk, child); + if (key != NULL) { + /* + * We're using one, so create a matching key on the + * newsk structure. If we fail to get memory then we + * end up not copying the key across. Shucks. + */ + char *newkey = kmemdup(key->key, key->keylen, + GFP_ATOMIC); + if (newkey) { + if (!tcp_alloc_md5sig_pool()) + BUG(); + tp->af_specific->md5_add(child, child, newkey, + key->keylen); } } + } #endif - inet_csk_reqsk_queue_unlink(sk, req, prev); - inet_csk_reqsk_queue_removed(sk, req); + inet_csk_reqsk_queue_unlink(sk, req, prev); + inet_csk_reqsk_queue_removed(sk, req); - inet_csk_reqsk_queue_add(sk, req, child); - return child; + inet_csk_reqsk_queue_add(sk, req, child); + return child; - listen_overflow: - if (!sysctl_tcp_abort_on_overflow) { - inet_rsk(req)->acked = 1; - return NULL; - } +listen_overflow: + if (!sysctl_tcp_abort_on_overflow) { + inet_rsk(req)->acked = 1; + return NULL; + } - embryonic_reset: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); - if (!(flg & TCP_FLAG_RST)) - req->rsk_ops->send_reset(sk, skb); +embryonic_reset: + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); + if (!(flg & TCP_FLAG_RST)) + req->rsk_ops->send_reset(sk, skb); - inet_csk_reqsk_queue_drop(sk, req, prev); - return NULL; + inet_csk_reqsk_queue_drop(sk, req, prev); + return NULL; } /* diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a00532de2a8c..8165f5aa8c71 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -468,7 +468,8 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, } if (likely(sysctl_tcp_window_scaling)) { opts->ws = tp->rx_opt.rcv_wscale; - size += TCPOLEN_WSCALE_ALIGNED; + if(likely(opts->ws)) + size += TCPOLEN_WSCALE_ALIGNED; } if (likely(sysctl_tcp_sack)) { opts->options |= OPTION_SACK_ADVERTISE; @@ -509,7 +510,8 @@ static unsigned tcp_synack_options(struct sock *sk, if (likely(ireq->wscale_ok)) { opts->ws = ireq->rcv_wscale; - size += TCPOLEN_WSCALE_ALIGNED; + if(likely(opts->ws)) + size += TCPOLEN_WSCALE_ALIGNED; } if (likely(doing_ts)) { opts->options |= OPTION_TS; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 328e0cf42b3c..5ab6ba19c3ce 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -287,7 +287,7 @@ static void tcp_retransmit_timer(struct sock *sk) if (!tp->packets_out) goto out; - BUG_TRAP(!tcp_write_queue_empty(sk)); + WARN_ON(tcp_write_queue_empty(sk)); if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) && !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 383d17359d01..57e26fa66185 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -951,6 +951,27 @@ int udp_disconnect(struct sock *sk, int flags) return 0; } +static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + int is_udplite = IS_UDPLITE(sk); + int rc; + + if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) { + /* Note that an ENOMEM error is charged twice */ + if (rc == -ENOMEM) + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, + is_udplite); + goto drop; + } + + return 0; + +drop: + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + kfree_skb(skb); + return -1; +} + /* returns: * -1: error * 0: success @@ -1042,17 +1063,16 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) goto drop; } - if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { - /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) { - UDP_INC_STATS_BH(sock_net(sk), - UDP_MIB_RCVBUFERRORS, is_udplite); - atomic_inc(&sk->sk_drops); - } - goto drop; - } + rc = 0; - return 0; + bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) + rc = __udp_queue_rcv_skb(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); + + return rc; drop: UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); @@ -1090,15 +1110,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, skb1 = skb_clone(skb, GFP_ATOMIC); if (skb1) { - int ret = 0; - - bh_lock_sock_nested(sk); - if (!sock_owned_by_user(sk)) - ret = udp_queue_rcv_skb(sk, skb1); - else - sk_add_backlog(sk, skb1); - bh_unlock_sock(sk); - + int ret = udp_queue_rcv_skb(sk, skb1); if (ret > 0) /* we should probably re-process instead * of dropping packets here. */ @@ -1193,13 +1205,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], uh->dest, inet_iif(skb), udptable); if (sk != NULL) { - int ret = 0; - bh_lock_sock_nested(sk); - if (!sock_owned_by_user(sk)) - ret = udp_queue_rcv_skb(sk, skb); - else - sk_add_backlog(sk, skb); - bh_unlock_sock(sk); + int ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); /* a return value > 0 means to resubmit the input, but @@ -1492,7 +1498,7 @@ struct proto udp_prot = { .sendmsg = udp_sendmsg, .recvmsg = udp_recvmsg, .sendpage = udp_sendpage, - .backlog_rcv = udp_queue_rcv_skb, + .backlog_rcv = __udp_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c index 9c798abce736..63418185f524 100644 --- a/net/ipv4/xfrm4_mode_beet.c +++ b/net/ipv4/xfrm4_mode_beet.c @@ -47,8 +47,10 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb) if (unlikely(optlen)) hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); - skb_set_network_header(skb, IPV4_BEET_PHMAXLEN - x->props.header_len - - hdrlen); + skb_set_network_header(skb, -x->props.header_len - + hdrlen + (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph))); + if (x->sel.family != AF_INET6) + skb->network_header += IPV4_BEET_PHMAXLEN; skb->mac_header = skb->network_header + offsetof(struct iphdr, protocol); skb->transport_header = skb->network_header + sizeof(*top_iph); |