From b7aa0bf70c4afb9e38be25f5c0922498d0f8684c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Apr 2007 16:16:32 -0700 Subject: [NET]: convert network timestamps to ktime_t We currently use a special structure (struct skb_timeval) and plain 'struct timeval' to store packet timestamps in sk_buffs and struct sock. This has some drawbacks : - Fixed resolution of micro second. - Waste of space on 64bit platforms where sizeof(struct timeval)=16 I suggest using ktime_t that is a nice abstraction of high resolution time services, currently capable of nanosecond resolution. As sizeof(ktime_t) is 8 bytes, using ktime_t in 'struct sock' permits a 8 byte shrink of this structure on 64bit architectures. Some other structures also benefit from this size reduction (struct ipq in ipv4/ip_fragment.c, struct frag_queue in ipv6/reassembly.c, ...) Once this ktime infrastructure adopted, we can more easily provide nanosecond resolution on top of it. (ioctl SIOCGSTAMPNS and/or SO_TIMESTAMPNS/SCM_TIMESTAMPNS) Note : this patch includes a bug correction in compat_sock_get_timestamp() where a "err = 0;" was missing (so this syscall returned -ENOENT instead of 0) Signed-off-by: Eric Dumazet CC: Stephen Hemminger CC: John find Signed-off-by: David S. Miller --- net/core/dev.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 4dc93cc4d5b7..582db646cc54 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1031,23 +1031,12 @@ void net_disable_timestamp(void) atomic_dec(&netstamp_needed); } -void __net_timestamp(struct sk_buff *skb) -{ - struct timeval tv; - - do_gettimeofday(&tv); - skb_set_timestamp(skb, &tv); -} -EXPORT_SYMBOL(__net_timestamp); - static inline void net_timestamp(struct sk_buff *skb) { if (atomic_read(&netstamp_needed)) __net_timestamp(skb); - else { - skb->tstamp.off_sec = 0; - skb->tstamp.off_usec = 0; - } + else + skb->tstamp.tv64 = 0; } /* @@ -1577,7 +1566,7 @@ int netif_rx(struct sk_buff *skb) if (netpoll_rx(skb)) return NET_RX_DROP; - if (!skb->tstamp.off_sec) + if (!skb->tstamp.tv64) net_timestamp(skb); /* @@ -1769,7 +1758,7 @@ int netif_receive_skb(struct sk_buff *skb) if (skb->dev->poll && netpoll_rx(skb)) return NET_RX_DROP; - if (!skb->tstamp.off_sec) + if (!skb->tstamp.tv64) net_timestamp(skb); if (!skb->iif) -- cgit v1.2.3 From 6f05f629716a71d4c9c82813f45d3e9a6e90d146 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 8 Mar 2007 20:46:03 -0800 Subject: [NET]: deinline some functions Several functions are marked inline or forced inline, but it would be better to let the compiler decide. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/dev.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 582db646cc54..424d6d0e98f8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1673,9 +1673,9 @@ static void net_tx_action(struct softirq_action *h) } } -static __inline__ int deliver_skb(struct sk_buff *skb, - struct packet_type *pt_prev, - struct net_device *orig_dev) +static inline int deliver_skb(struct sk_buff *skb, + struct packet_type *pt_prev, + struct net_device *orig_dev) { atomic_inc(&skb->users); return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); @@ -2065,7 +2065,7 @@ static int dev_ifconf(char __user *arg) * This is invoked by the /proc filesystem handler to display a device * in detail. */ -static __inline__ struct net_device *dev_get_idx(loff_t pos) +static struct net_device *dev_get_idx(loff_t pos) { struct net_device *dev; loff_t i; @@ -2836,7 +2836,7 @@ static int dev_boot_phase = 1; static DEFINE_SPINLOCK(net_todo_list_lock); static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list); -static inline void net_set_todo(struct net_device *dev) +static void net_set_todo(struct net_device *dev) { spin_lock(&net_todo_list_lock); list_add_tail(&dev->todo_list, &net_todo_list); -- cgit v1.2.3 From 459a98ed881802dee55897441bc7f77af614368e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Mar 2007 15:30:44 -0700 Subject: [SK_BUFF]: Introduce skb_reset_mac_header(skb) For the common, open coded 'skb->mac.raw = skb->data' operation, so that we can later turn skb->mac.raw into a offset, reducing the size of struct sk_buff in 64bit land while possibly keeping it as a pointer on 32bit. This one touches just the most simple case, next will handle the slightly more "complex" cases. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 424d6d0e98f8..2fcaf5bc4a9c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1066,7 +1066,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) set by sender, so that the second statement is just protection against buggy protocols. */ - skb2->mac.raw = skb2->data; + skb_reset_mac_header(skb2); if (skb2->nh.raw < skb2->data || skb2->nh.raw > skb2->tail) { @@ -1206,7 +1206,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) BUG_ON(skb_shinfo(skb)->frag_list); - skb->mac.raw = skb->data; + skb_reset_mac_header(skb); skb->mac_len = skb->nh.raw - skb->data; __skb_pull(skb, skb->mac_len); -- cgit v1.2.3 From 98e399f82ab3a6d863d1d4a7ea48925cc91c830e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Mar 2007 15:33:04 -0700 Subject: [SK_BUFF]: Introduce skb_mac_header() For the places where we need a pointer to the mac header, it is still legal to touch skb->mac.raw directly if just adding to, subtracting from or setting it to another layer header. This one also converts some more cases to skb_reset_mac_header() that my regex missed as it had no spaces before nor after '=', ugh. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 2fcaf5bc4a9c..560560fe3064 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1232,7 +1232,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) } rcu_read_unlock(); - __skb_push(skb, skb->data - skb->mac.raw); + __skb_push(skb, skb->data - skb_mac_header(skb)); return segs; } -- cgit v1.2.3 From c1d2bbe1cd6c7bbdc6d532cefebb66c7efb789ce Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 10 Apr 2007 20:45:18 -0700 Subject: [SK_BUFF]: Introduce skb_reset_network_header(skb) For the common, open coded 'skb->nh.raw = skb->data' operation, so that we can later turn skb->nh.raw into a offset, reducing the size of struct sk_buff in 64bit land while possibly keeping it as a pointer on 32bit. This one touches just the most simple case, next will handle the slightly more "complex" cases. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/core/dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 560560fe3064..1b0758254ba0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1074,7 +1074,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) printk(KERN_CRIT "protocol %04x is " "buggy, dev %s\n", skb2->protocol, dev->name); - skb2->nh.raw = skb2->data; + skb_reset_network_header(skb2); } skb2->h.raw = skb2->nh.raw; @@ -1771,7 +1771,8 @@ int netif_receive_skb(struct sk_buff *skb) __get_cpu_var(netdev_rx_stat).total++; - skb->h.raw = skb->nh.raw = skb->data; + skb_reset_network_header(skb); + skb->h.raw = skb->data; skb->mac_len = skb->nh.raw - skb->mac.raw; pt_prev = NULL; -- cgit v1.2.3 From d56f90a7c96da5187f0cdf07ee7434fe6aa78bbc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 10 Apr 2007 20:50:43 -0700 Subject: [SK_BUFF]: Introduce skb_network_header() For the places where we need a pointer to the network header, it is still legal to touch skb->nh.raw directly if just adding to, subtracting from or setting it to another layer header. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/core/dev.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 1b0758254ba0..54ffe9db9b02 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1068,8 +1068,8 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) */ skb_reset_mac_header(skb2); - if (skb2->nh.raw < skb2->data || - skb2->nh.raw > skb2->tail) { + if (skb_network_header(skb2) < skb2->data || + skb_network_header(skb2) > skb2->tail) { if (net_ratelimit()) printk(KERN_CRIT "protocol %04x is " "buggy, dev %s\n", @@ -1207,7 +1207,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) BUG_ON(skb_shinfo(skb)->frag_list); skb_reset_mac_header(skb); - skb->mac_len = skb->nh.raw - skb->data; + skb->mac_len = skb->nh.raw - skb->mac.raw; __skb_pull(skb, skb->mac_len); if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { @@ -1224,7 +1224,8 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) segs = ERR_PTR(err); if (err || skb_gso_ok(skb, features)) break; - __skb_push(skb, skb->data - skb->nh.raw); + __skb_push(skb, (skb->data - + skb_network_header(skb))); } segs = ptype->gso_segment(skb, features); break; -- cgit v1.2.3 From 6b2bedc3a659ba228a93afc8e3f008e152abf18a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 12 Mar 2007 14:33:50 -0700 Subject: [NET]: network dev read_mostly For Eric, mark packet type and network device watermarks as read mostly. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/dev.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 54ffe9db9b02..f9d2b0f0bd58 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -146,8 +146,8 @@ */ static DEFINE_SPINLOCK(ptype_lock); -static struct list_head ptype_base[16]; /* 16 way hashed list */ -static struct list_head ptype_all; /* Taps */ +static struct list_head ptype_base[16] __read_mostly; /* 16 way hashed list */ +static struct list_head ptype_all __read_mostly; /* Taps */ #ifdef CONFIG_NET_DMA static struct dma_client *net_dma_client; @@ -1533,9 +1533,9 @@ out: Receiver routines =======================================================================*/ -int netdev_max_backlog = 1000; -int netdev_budget = 300; -int weight_p = 64; /* old backlog weight */ +int netdev_max_backlog __read_mostly = 1000; +int netdev_budget __read_mostly = 300; +int weight_p __read_mostly = 64; /* old backlog weight */ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; -- cgit v1.2.3 From f690808e17925fc45217eb22e8670902ecee5c1b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 12 Mar 2007 14:34:29 -0700 Subject: [NET]: make seq_operations const The seq_file operations stuff can be marked constant to get it out of dirty cache. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index f9d2b0f0bd58..8ddc2ab23142 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2176,7 +2176,7 @@ static int softnet_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations dev_seq_ops = { +static const struct seq_operations dev_seq_ops = { .start = dev_seq_start, .next = dev_seq_next, .stop = dev_seq_stop, @@ -2196,7 +2196,7 @@ static const struct file_operations dev_seq_fops = { .release = seq_release, }; -static struct seq_operations softnet_seq_ops = { +static const struct seq_operations softnet_seq_ops = { .start = softnet_seq_start, .next = softnet_seq_next, .stop = softnet_seq_stop, -- cgit v1.2.3 From 0e1256ffd1ec654b35e023c66f6b262d4cba91e9 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 12 Mar 2007 14:35:37 -0700 Subject: [NET]: show bound packet types Show what protocols are bound to what packet types in /proc/net/ptype Uses kallsyms to decode function pointers if possible. Example: Type Device Function ALL eth1 packet_rcv_spkt+0x0 0800 ip_rcv+0x0 0806 arp_rcv+0x0 86dd :ipv6:ipv6_rcv+0x0 Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/dev.c | 134 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 8ddc2ab23142..3af0bdc86491 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2216,6 +2216,135 @@ static const struct file_operations softnet_seq_fops = { .release = seq_release, }; +static void *ptype_get_idx(loff_t pos) +{ + struct packet_type *pt = NULL; + loff_t i = 0; + int t; + + list_for_each_entry_rcu(pt, &ptype_all, list) { + if (i == pos) + return pt; + ++i; + } + + for (t = 0; t < 16; t++) { + list_for_each_entry_rcu(pt, &ptype_base[t], list) { + if (i == pos) + return pt; + ++i; + } + } + return NULL; +} + +static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) +{ + rcu_read_lock(); + return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; +} + +static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct packet_type *pt; + struct list_head *nxt; + int hash; + + ++*pos; + if (v == SEQ_START_TOKEN) + return ptype_get_idx(0); + + pt = v; + nxt = pt->list.next; + if (pt->type == htons(ETH_P_ALL)) { + if (nxt != &ptype_all) + goto found; + hash = 0; + nxt = ptype_base[0].next; + } else + hash = ntohs(pt->type) & 15; + + while (nxt == &ptype_base[hash]) { + if (++hash >= 16) + return NULL; + nxt = ptype_base[hash].next; + } +found: + return list_entry(nxt, struct packet_type, list); +} + +static void ptype_seq_stop(struct seq_file *seq, void *v) +{ + rcu_read_unlock(); +} + +static void ptype_seq_decode(struct seq_file *seq, void *sym) +{ +#ifdef CONFIG_KALLSYMS + unsigned long offset = 0, symsize; + const char *symname; + char *modname; + char namebuf[128]; + + symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset, + &modname, namebuf); + + if (symname) { + char *delim = ":"; + + if (!modname) + modname = delim = ""; + seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim, + symname, offset); + return; + } +#endif + + seq_printf(seq, "[%p]", sym); +} + +static int ptype_seq_show(struct seq_file *seq, void *v) +{ + struct packet_type *pt = v; + + if (v == SEQ_START_TOKEN) + seq_puts(seq, "Type Device Function\n"); + else { + if (pt->type == htons(ETH_P_ALL)) + seq_puts(seq, "ALL "); + else + seq_printf(seq, "%04x", ntohs(pt->type)); + + seq_printf(seq, " %-8s ", + pt->dev ? pt->dev->name : ""); + ptype_seq_decode(seq, pt->func); + seq_putc(seq, '\n'); + } + + return 0; +} + +static const struct seq_operations ptype_seq_ops = { + .start = ptype_seq_start, + .next = ptype_seq_next, + .stop = ptype_seq_stop, + .show = ptype_seq_show, +}; + +static int ptype_seq_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &ptype_seq_ops); +} + +static const struct file_operations ptype_seq_fops = { + .owner = THIS_MODULE, + .open = ptype_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + + #ifdef CONFIG_WIRELESS_EXT extern int wireless_proc_init(void); #else @@ -2230,6 +2359,9 @@ static int __init dev_proc_init(void) goto out; if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops)) goto out_dev; + if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops)) + goto out_dev2; + if (wireless_proc_init()) goto out_softnet; rc = 0; @@ -2237,6 +2369,8 @@ out: return rc; out_softnet: proc_net_remove("softnet_stat"); +out_dev2: + proc_net_remove("ptype"); out_dev: proc_net_remove("dev"); goto out; -- cgit v1.2.3 From badff6d01a8589a1c828b0bf118903ca38627f4e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Mar 2007 13:06:52 -0300 Subject: [SK_BUFF]: Introduce skb_reset_transport_header(skb) For the common, open coded 'skb->h.raw = skb->data' operation, so that we can later turn skb->h.raw into a offset, reducing the size of struct sk_buff in 64bit land while possibly keeping it as a pointer on 32bit. This one touches just the most simple cases: skb->h.raw = skb->data; skb->h.raw = {skb_push|[__]skb_pull}() The next ones will handle the slightly more "complex" cases. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 3af0bdc86491..99f15728d9cb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1773,7 +1773,7 @@ int netif_receive_skb(struct sk_buff *skb) __get_cpu_var(netdev_rx_stat).total++; skb_reset_network_header(skb); - skb->h.raw = skb->data; + skb_reset_transport_header(skb); skb->mac_len = skb->nh.raw - skb->mac.raw; pt_prev = NULL; -- cgit v1.2.3 From ea2ae17d6443abddc79480dc9f7af8feacabddc4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 25 Apr 2007 17:55:53 -0700 Subject: [SK_BUFF]: Introduce skb_transport_offset() For the quite common 'skb->h.raw - skb->data' sequence. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 99f15728d9cb..f7f7e5687e46 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1156,7 +1156,7 @@ EXPORT_SYMBOL(netif_device_attach); int skb_checksum_help(struct sk_buff *skb) { __wsum csum; - int ret = 0, offset = skb->h.raw - skb->data; + int ret = 0, offset = skb_transport_offset(skb); if (skb->ip_summed == CHECKSUM_COMPLETE) goto out_set_summed; -- cgit v1.2.3 From 9c70220b73908f64792422a2c39c593c4792f2c5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 25 Apr 2007 18:04:18 -0700 Subject: [SK_BUFF]: Introduce skb_transport_header(skb) For the places where we need a pointer to the transport header, it is still legal to touch skb->h.raw directly if just adding to, subtracting from or setting it to another layer header. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/core/dev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index f7f7e5687e46..30fcc7f9d4ed 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1175,12 +1175,12 @@ int skb_checksum_help(struct sk_buff *skb) BUG_ON(offset > (int)skb->len); csum = skb_checksum(skb, offset, skb->len-offset, 0); - offset = skb->tail - skb->h.raw; + offset = skb->tail - skb_transport_header(skb); BUG_ON(offset <= 0); BUG_ON(skb->csum_offset + 2 > offset); - *(__sum16*)(skb->h.raw + skb->csum_offset) = csum_fold(csum); - + *(__sum16 *)(skb_transport_header(skb) + + skb->csum_offset) = csum_fold(csum); out_set_summed: skb->ip_summed = CHECKSUM_NONE; out: -- cgit v1.2.3 From b0e380b1d8a8e0aca215df97702f99815f05c094 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 10 Apr 2007 21:21:55 -0700 Subject: [SK_BUFF]: unions of just one member don't get anything done, kill them Renaming skb->h to skb->transport_header, skb->nh to skb->network_header and skb->mac to skb->mac_header, to match the names of the associated helpers (skb[_[re]set]_{transport,network,mac}_header). Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/core/dev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 30fcc7f9d4ed..6562e5736e2f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1077,7 +1077,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) skb_reset_network_header(skb2); } - skb2->h.raw = skb2->nh.raw; + skb2->transport_header = skb2->network_header; skb2->pkt_type = PACKET_OUTGOING; ptype->func(skb2, skb->dev, ptype, skb->dev); } @@ -1207,7 +1207,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) BUG_ON(skb_shinfo(skb)->frag_list); skb_reset_mac_header(skb); - skb->mac_len = skb->nh.raw - skb->mac.raw; + skb->mac_len = skb->network_header - skb->mac_header; __skb_pull(skb, skb->mac_len); if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { @@ -1774,7 +1774,7 @@ int netif_receive_skb(struct sk_buff *skb) skb_reset_network_header(skb); skb_reset_transport_header(skb); - skb->mac_len = skb->nh.raw - skb->mac.raw; + skb->mac_len = skb->network_header - skb->mac_header; pt_prev = NULL; -- cgit v1.2.3 From 27a884dc3cb63b93c2b3b643f5b31eed5f8a4d26 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 19 Apr 2007 20:29:13 -0700 Subject: [SK_BUFF]: Convert skb->tail to sk_buff_data_t So that it is also an offset from skb->head, reduces its size from 8 to 4 bytes on 64bit architectures, allowing us to combine the 4 bytes hole left by the layer headers conversion, reducing struct sk_buff size to 256 bytes, i.e. 4 64byte cachelines, and since the sk_buff slab cache is SLAB_HWCACHE_ALIGN... :-) Many calculations that previously required that skb->{transport,network, mac}_header be first converted to a pointer now can be done directly, being meaningful as offsets or pointers. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 6562e5736e2f..86dc9f693f66 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1069,7 +1069,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) skb_reset_mac_header(skb2); if (skb_network_header(skb2) < skb2->data || - skb_network_header(skb2) > skb2->tail) { + skb2->network_header > skb2->tail) { if (net_ratelimit()) printk(KERN_CRIT "protocol %04x is " "buggy, dev %s\n", @@ -1175,7 +1175,7 @@ int skb_checksum_help(struct sk_buff *skb) BUG_ON(offset > (int)skb->len); csum = skb_checksum(skb, offset, skb->len-offset, 0); - offset = skb->tail - skb_transport_header(skb); + offset = skb->tail - skb->transport_header; BUG_ON(offset <= 0); BUG_ON(skb->csum_offset + 2 > offset); -- cgit v1.2.3 From c45d286e72dd72c0229dc9e2849743ba427fee84 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 28 Mar 2007 14:29:08 -0700 Subject: [NET]: Inline net_device_stats Network drivers which keep stats allocate their own stats structure then write a get_stats() function to return them. It would be nice if this were done by default. 1) Add a new "stats" field to "struct net_device". 2) Add a new feature field to say "this driver uses the internal one" 3) Have a default "get_stats" which returns NULL if that feature not set. 4) Change callers to check result of get_stats call for NULL, not if ->get_stats is set. This should not break backwards compatibility with older drivers, yet allow modern drivers to shed some boilerplate code. Lightly tested: works for a modified lguest network driver. Signed-off-by: Rusty Russell Signed-off-by: David S. Miller --- net/core/dev.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 86dc9f693f66..fec8cf27f75d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -817,7 +817,6 @@ static int default_rebuild_header(struct sk_buff *skb) return 1; } - /** * dev_open - prepare an interface for use. * @dev: device to open @@ -2096,9 +2095,9 @@ void dev_seq_stop(struct seq_file *seq, void *v) static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) { - if (dev->get_stats) { - struct net_device_stats *stats = dev->get_stats(dev); + struct net_device_stats *stats = dev->get_stats(dev); + if (stats) { seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", dev->name, stats->rx_bytes, stats->rx_packets, @@ -3282,6 +3281,13 @@ out: mutex_unlock(&net_todo_run_mutex); } +static struct net_device_stats *maybe_internal_stats(struct net_device *dev) +{ + if (dev->features & NETIF_F_INTERNAL_STATS) + return &dev->stats; + return NULL; +} + /** * alloc_netdev - allocate network device * @sizeof_priv: size of private data to allocate space for @@ -3317,6 +3323,7 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name, if (sizeof_priv) dev->priv = netdev_priv(dev); + dev->get_stats = maybe_internal_stats; setup(dev); strcpy(dev->name, name); return dev; -- cgit v1.2.3 From 663ead3bb8d5b561e70fc3bb3861c9220b5a77eb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 9 Apr 2007 11:59:07 -0700 Subject: [NET]: Use csum_start offset instead of skb_transport_header The skb transport pointer is currently used to specify the start of the checksum region for transmit checksum offload. Unfortunately, the same pointer is also used during receive side processing. This creates a problem when we want to retransmit a received packet with partial checksums since the skb transport pointer would be overwritten. This patch solves this problem by creating a new 16-bit csum_start offset value to replace the skb transport header for the purpose of checksums. This offset is calculated from skb->head so that it does not have to change when skb->data changes. No extra space is required since csum_offset itself fits within a 16-bit word so we can use the other 16 bits for csum_start. For backwards compatibility, just before we push a packet with partial checksums off into the device driver, we set the skb transport header to what it would have been under the old scheme. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index fec8cf27f75d..d23972f56fc7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1155,7 +1155,7 @@ EXPORT_SYMBOL(netif_device_attach); int skb_checksum_help(struct sk_buff *skb) { __wsum csum; - int ret = 0, offset = skb_transport_offset(skb); + int ret = 0, offset; if (skb->ip_summed == CHECKSUM_COMPLETE) goto out_set_summed; @@ -1171,15 +1171,16 @@ int skb_checksum_help(struct sk_buff *skb) goto out; } + offset = skb->csum_start - skb_headroom(skb); BUG_ON(offset > (int)skb->len); csum = skb_checksum(skb, offset, skb->len-offset, 0); - offset = skb->tail - skb->transport_header; + offset = skb_headlen(skb) - offset; BUG_ON(offset <= 0); BUG_ON(skb->csum_offset + 2 > offset); - *(__sum16 *)(skb_transport_header(skb) + - skb->csum_offset) = csum_fold(csum); + *(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) = + csum_fold(csum); out_set_summed: skb->ip_summed = CHECKSUM_NONE; out: @@ -1431,12 +1432,16 @@ int dev_queue_xmit(struct sk_buff *skb) /* If packet is not checksummed and device does not support * checksumming for this protocol, complete checksumming here. */ - if (skb->ip_summed == CHECKSUM_PARTIAL && - (!(dev->features & NETIF_F_GEN_CSUM) && - (!(dev->features & NETIF_F_IP_CSUM) || - skb->protocol != htons(ETH_P_IP)))) - if (skb_checksum_help(skb)) - goto out_kfree_skb; + if (skb->ip_summed == CHECKSUM_PARTIAL) { + skb_set_transport_header(skb, skb->csum_start - + skb_headroom(skb)); + + if (!(dev->features & NETIF_F_GEN_CSUM) && + (!(dev->features & NETIF_F_IP_CSUM) || + skb->protocol != htons(ETH_P_IP))) + if (skb_checksum_help(skb)) + goto out_kfree_skb; + } gso: spin_lock_prefetch(&dev->queue_lock); -- cgit v1.2.3 From 6229e362dd49b9e8387126bd4483ab0574d23e9c Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 21 Mar 2007 13:38:47 -0700 Subject: bridge: eliminate call by reference Change the bridging hook to be simple function with return value rather than modifying the skb argument. This could generate better code and is cleaner. Signed-off-by: Stephen Hemminger --- net/core/dev.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index d23972f56fc7..7f31d0f88424 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1687,31 +1687,37 @@ static inline int deliver_skb(struct sk_buff *skb, } #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) -int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb); +/* These hooks defined here for ATM */ struct net_bridge; struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, unsigned char *addr); -void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent); +void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly; -static __inline__ int handle_bridge(struct sk_buff **pskb, - struct packet_type **pt_prev, int *ret, - struct net_device *orig_dev) +/* + * If bridge module is loaded call bridging hook. + * returns NULL if packet was consumed. + */ +struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p, + struct sk_buff *skb) __read_mostly; +static inline struct sk_buff *handle_bridge(struct sk_buff *skb, + struct packet_type **pt_prev, int *ret, + struct net_device *orig_dev) { struct net_bridge_port *port; - if ((*pskb)->pkt_type == PACKET_LOOPBACK || - (port = rcu_dereference((*pskb)->dev->br_port)) == NULL) - return 0; + if (skb->pkt_type == PACKET_LOOPBACK || + (port = rcu_dereference(skb->dev->br_port)) == NULL) + return skb; if (*pt_prev) { - *ret = deliver_skb(*pskb, *pt_prev, orig_dev); + *ret = deliver_skb(skb, *pt_prev, orig_dev); *pt_prev = NULL; } - return br_handle_frame_hook(port, pskb); + return br_handle_frame_hook(port, skb); } #else -#define handle_bridge(skb, pt_prev, ret, orig_dev) (0) +#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb) #endif #ifdef CONFIG_NET_CLS_ACT @@ -1818,7 +1824,8 @@ int netif_receive_skb(struct sk_buff *skb) ncls: #endif - if (handle_bridge(&skb, &pt_prev, &ret, orig_dev)) + skb = handle_bridge(skb, &pt_prev, &ret, orig_dev); + if (!skb) goto out; type = skb->protocol; -- cgit v1.2.3 From fd44de7cc1d430caef91ad9aecec9ff000fe86f8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 16 Apr 2007 17:07:08 -0700 Subject: [NET_SCHED]: ingress: switch back to using ingress_lock Switch ingress queueing back to use ingress_lock. qdisc_lock_tree now locks both the ingress and egress qdiscs on the device. All changes to data that might be used on both ingress and egress needs to be protected by using qdisc_lock_tree instead of manually taking dev->queue_lock. Additionally the qdisc stats_lock needs to be initialized to ingress_lock for ingress qdiscs. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 7f31d0f88424..c8f5ea9aea81 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1747,10 +1747,10 @@ static int ing_filter(struct sk_buff *skb) skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS); - spin_lock(&dev->queue_lock); + spin_lock(&dev->ingress_lock); if ((q = dev->qdisc_ingress) != NULL) result = q->enqueue(skb, q); - spin_unlock(&dev->queue_lock); + spin_unlock(&dev->ingress_lock); } -- cgit v1.2.3 From 9be9a6b983314dd57e2c5ba548dee8b53d338ac3 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 20 Apr 2007 17:02:45 -0700 Subject: [NET]: Get rid of netdev_nit It isn't any faster to test a boolean global variable than do a simple check for empty list. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/dev.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index c8f5ea9aea81..431998d9cee9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -225,12 +225,6 @@ extern void netdev_unregister_sysfs(struct net_device *); *******************************************************************************/ -/* - * For efficiency - */ - -static int netdev_nit; - /* * Add a protocol ID to the list. Now that the input handler is * smarter we can dispense with all the messy stuff that used to be @@ -265,10 +259,9 @@ void dev_add_pack(struct packet_type *pt) int hash; spin_lock_bh(&ptype_lock); - if (pt->type == htons(ETH_P_ALL)) { - netdev_nit++; + if (pt->type == htons(ETH_P_ALL)) list_add_rcu(&pt->list, &ptype_all); - } else { + else { hash = ntohs(pt->type) & 15; list_add_rcu(&pt->list, &ptype_base[hash]); } @@ -295,10 +288,9 @@ void __dev_remove_pack(struct packet_type *pt) spin_lock_bh(&ptype_lock); - if (pt->type == htons(ETH_P_ALL)) { - netdev_nit--; + if (pt->type == htons(ETH_P_ALL)) head = &ptype_all; - } else + else head = &ptype_base[ntohs(pt->type) & 15]; list_for_each_entry(pt1, head, list) { @@ -1330,7 +1322,7 @@ static int dev_gso_segment(struct sk_buff *skb) int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { if (likely(!skb->next)) { - if (netdev_nit) + if (!list_empty(&ptype_all)) dev_queue_xmit_nit(skb, dev); if (netif_needs_gso(dev, skb)) { -- cgit v1.2.3 From 38b4da383705394788aa09208917ba200792de4b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 20 Apr 2007 22:14:10 -0700 Subject: [NET]: Fix comments for register_netdev(). Correct the function name in the comments supplied with register_netdev() Signed-off-by: Borislav Petkov Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 431998d9cee9..f3b99701da5b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3130,7 +3130,7 @@ out: * chain. 0 is returned on success. A negative errno code is returned * on a failure to set up the device, or if the name is a duplicate. * - * This is a wrapper around register_netdev that takes the rtnl semaphore + * This is a wrapper around register_netdevice that takes the rtnl semaphore * and expands the device name if you passed a format string to * alloc_netdev. */ -- cgit v1.2.3 From 372cc74c8b41d808af0a3fa8b11795cba79e7299 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 22 Apr 2007 23:22:24 -0700 Subject: [NET]: Prevent much sadness in qdisc_lock_tree(). Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/core/dev.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index f3b99701da5b..18c51b40f665 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3016,9 +3016,7 @@ int register_netdevice(struct net_device *dev) spin_lock_init(&dev->queue_lock); spin_lock_init(&dev->_xmit_lock); dev->xmit_lock_owner = -1; -#ifdef CONFIG_NET_CLS_ACT spin_lock_init(&dev->ingress_lock); -#endif dev->iflink = -1; -- cgit v1.2.3 From f9d106a6d53b57b78eae5544f9582c643343a764 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 23 Apr 2007 22:36:13 -0700 Subject: [NET]: Warn about GSO/checksum abuse Now that Patrick has added the code to deal with GSO in netfilter, we no longer need the crutch that computes partial checksums just before transmission. This patch turns this into a warning again. If this goes OK, we can then turn it into a BUG_ON and remove the gso_send_check cruft. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 18c51b40f665..d82d00f5451f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1202,7 +1202,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) skb->mac_len = skb->network_header - skb->mac_header; __skb_pull(skb, skb->mac_len); - if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { + if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) { if (skb_header_cloned(skb) && (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) return ERR_PTR(err); -- cgit v1.2.3 From 11433ee450eb4a320f46ce5ed51410b52803ffcc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 26 Apr 2007 20:42:51 -0700 Subject: [WEXT]: Move to net/wireless This patch moves dev/core/wireless.c to net/wireless/wext.c. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index d82d00f5451f..700e4b5081b6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2936,7 +2936,7 @@ int dev_ioctl(unsigned int cmd, void __user *arg) } dev_load(ifr.ifr_name); rtnl_lock(); - /* Follow me in net/core/wireless.c */ + /* Follow me in net/wireless/wext.c */ ret = wireless_process_ioctl(&ifr, cmd); rtnl_unlock(); if (IW_IS_GET(cmd) && -- cgit v1.2.3 From 295f4a1fa3ecdf816b18393ef7bcd37c032df2fa Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 26 Apr 2007 20:43:56 -0700 Subject: [WEXT]: Clean up how wext is called. This patch cleans up the call paths from the core code into wext. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville Signed-off-by: David S. Miller --- net/core/dev.c | 34 ++++------------------------------ 1 file changed, 4 insertions(+), 30 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 700e4b5081b6..d5e42d13bd67 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -109,7 +109,7 @@ #include #include #include -#include +#include #include #include #include @@ -2348,12 +2348,6 @@ static const struct file_operations ptype_seq_fops = { }; -#ifdef CONFIG_WIRELESS_EXT -extern int wireless_proc_init(void); -#else -#define wireless_proc_init() 0 -#endif - static int __init dev_proc_init(void) { int rc = -ENOMEM; @@ -2365,7 +2359,7 @@ static int __init dev_proc_init(void) if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops)) goto out_dev2; - if (wireless_proc_init()) + if (wext_proc_init()) goto out_softnet; rc = 0; out: @@ -2923,29 +2917,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg) ret = -EFAULT; return ret; } -#ifdef CONFIG_WIRELESS_EXT /* Take care of Wireless Extensions */ - if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { - /* If command is `set a parameter', or - * `get the encoding parameters', check if - * the user has the right to do it */ - if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE - || cmd == SIOCGIWENCODEEXT) { - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - } - dev_load(ifr.ifr_name); - rtnl_lock(); - /* Follow me in net/wireless/wext.c */ - ret = wireless_process_ioctl(&ifr, cmd); - rtnl_unlock(); - if (IW_IS_GET(cmd) && - copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - return ret; - } -#endif /* CONFIG_WIRELESS_EXT */ + if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) + return wext_handle_ioctl(&ifr, cmd, arg); return -EINVAL; } } -- cgit v1.2.3