diff options
author | Heiko Carstens <hca@linux.ibm.com> | 2020-08-11 15:30:30 +0200 |
---|---|---|
committer | Vasily Gorbik <gor@linux.ibm.com> | 2020-08-26 18:47:20 +0200 |
commit | bb4644b14accb05663847277002e3efa9fa3cd3b (patch) | |
tree | ba7d76538d9a476b7e40f5681e3f5905802442d6 /arch/s390/include | |
parent | b064904c509decf9e038f29f903a2304851a913b (diff) |
s390/checksum: rewrite csum_tcpudp_nofold()
Rewrite csum_tcpudp_nofold() so that the generated code will not
contain branches. The old implementation was also optimized for
machines which came with "add logical with carry" instructions,
however the compiler doesn't generate them anymore. This is most
likely because those instructions are slower.
However with the old code the compiler generates a lot of branches,
which isn't too helpful usually. Therefore rewrite the code.
In a tight loop this doesn't make any difference since the branch
prediction unit does its job.
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Diffstat (limited to 'arch/s390/include')
-rw-r--r-- | arch/s390/include/asm/checksum.h | 22 |
1 files changed, 7 insertions, 15 deletions
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index f4b42db5d007..961c25c5124b 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -73,25 +73,17 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) * computes the checksum of the TCP/UDP pseudo-header * returns a 32-bit checksum */ -static inline __wsum -csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, - __wsum sum) +static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, + __u8 proto, __wsum sum) { - __u32 csum = (__force __u32)sum; + __u64 csum = (__force __u64)sum; csum += (__force __u32)saddr; - if (csum < (__force __u32)saddr) - csum++; - csum += (__force __u32)daddr; - if (csum < (__force __u32)daddr) - csum++; - - csum += len + proto; - if (csum < len + proto) - csum++; - - return (__force __wsum)csum; + csum += len; + csum += proto; + csum += (csum >> 32) | (csum << 32); + return (__force __wsum)(csum >> 32); } /* |