From 9067931236651c8bde847d17a2f862d052e672b7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 30 May 2021 22:53:43 -0400 Subject: ntfs_copy_from_user_iter(): don't bother with copying iov_iter Advance the original, let the caller revert if it needs to. Don't mess with iov_iter_single_seg_count() in the caller - if we got a (non-zero) short copy, use the amount actually copied for the next pass, limit it to "up to the end of page" if nothing got copied at all. Originally fault-in only read the first iovec; back then it used to make sense to limit to the just one iovec for the pass after short copy. These days it's no long true. Signed-off-by: Al Viro --- fs/ntfs/file.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index e5aab265dff1..0666d4578137 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -1684,20 +1684,19 @@ static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages, { struct page **last_page = pages + nr_pages; size_t total = 0; - struct iov_iter data = *i; unsigned len, copied; do { len = PAGE_SIZE - ofs; if (len > bytes) len = bytes; - copied = iov_iter_copy_from_user_atomic(*pages, &data, ofs, + copied = iov_iter_copy_from_user_atomic(*pages, i, ofs, len); + iov_iter_advance(i, copied); total += copied; bytes -= copied; if (!bytes) break; - iov_iter_advance(&data, copied); if (copied < len) goto err; ofs = 0; @@ -1866,34 +1865,24 @@ again: if (likely(copied == bytes)) { status = ntfs_commit_pages_after_write(pages, do_pages, pos, bytes); - if (!status) - status = bytes; } do { unlock_page(pages[--do_pages]); put_page(pages[do_pages]); } while (do_pages); - if (unlikely(status < 0)) + if (unlikely(status < 0)) { + iov_iter_revert(i, copied); break; - copied = status; + } cond_resched(); - if (unlikely(!copied)) { - size_t sc; - - /* - * We failed to copy anything. Fall back to single - * segment length write. - * - * This is needed to avoid possible livelock in the - * case that all segments in the iov cannot be copied - * at once without a pagefault. - */ - sc = iov_iter_single_seg_count(i); - if (bytes > sc) - bytes = sc; + if (unlikely(copied < bytes)) { + iov_iter_revert(i, copied); + if (copied) + bytes = copied; + else if (bytes > PAGE_SIZE - ofs) + bytes = PAGE_SIZE - ofs; goto again; } - iov_iter_advance(i, copied); pos += copied; written += copied; balance_dirty_pages_ratelimited(mapping); -- cgit v1.2.3 From bc1bb416bbb9203e250f5c49aaf1d11b5d9c8adb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 31 May 2021 00:32:44 -0400 Subject: generic_perform_write()/iomap_write_actor(): saner logics for short copy if we run into a short copy and ->write_end() refuses to advance at all, use the amount we'd managed to copy for the next iteration to handle. Signed-off-by: Al Viro --- fs/iomap/buffered-io.c | 25 ++++++++++--------------- mm/filemap.c | 24 +++++++++--------------- 2 files changed, 19 insertions(+), 30 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index f2cd2034a87b..354b41d20e5d 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -771,10 +771,6 @@ again: * Otherwise there's a nasty deadlock on copying from the * same page as we're writing to, without it being marked * up-to-date. - * - * Not only is this an optimisation, but it is also required - * to check that the address is actually valid, when atomic - * usercopies are used, below. */ if (unlikely(iov_iter_fault_in_readable(i, bytes))) { status = -EFAULT; @@ -791,25 +787,24 @@ again: copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); - copied = iomap_write_end(inode, pos, bytes, copied, page, iomap, + status = iomap_write_end(inode, pos, bytes, copied, page, iomap, srcmap); cond_resched(); - iov_iter_advance(i, copied); - if (unlikely(copied == 0)) { + if (unlikely(status == 0)) { /* - * If we were unable to copy any data at all, we must - * fall back to a single segment length write. - * - * If we didn't fallback here, we could livelock - * because not all segments in the iov can be copied at - * once without a pagefault. + * A short copy made iomap_write_end() reject the + * thing entirely. Might be memory poisoning + * halfway through, might be a race with munmap, + * might be severe memory pressure. */ - bytes = min_t(unsigned long, PAGE_SIZE - offset, - iov_iter_single_seg_count(i)); + if (copied) + bytes = copied; goto again; } + copied = status; + iov_iter_advance(i, copied); pos += copied; written += copied; length -= copied; diff --git a/mm/filemap.c b/mm/filemap.c index 66f7e9fdfbc4..0be24942bf8e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3642,10 +3642,6 @@ again: * Otherwise there's a nasty deadlock on copying from the * same page as we're writing to, without it being marked * up-to-date. - * - * Not only is this an optimisation, but it is also required - * to check that the address is actually valid, when atomic - * usercopies are used, below. */ if (unlikely(iov_iter_fault_in_readable(i, bytes))) { status = -EFAULT; @@ -3672,24 +3668,22 @@ again: page, fsdata); if (unlikely(status < 0)) break; - copied = status; cond_resched(); - iov_iter_advance(i, copied); - if (unlikely(copied == 0)) { + if (unlikely(status == 0)) { /* - * If we were unable to copy any data at all, we must - * fall back to a single segment length write. - * - * If we didn't fallback here, we could livelock - * because not all segments in the iov can be copied at - * once without a pagefault. + * A short copy made ->write_end() reject the + * thing entirely. Might be memory poisoning + * halfway through, might be a race with munmap, + * might be severe memory pressure. */ - bytes = min_t(unsigned long, PAGE_SIZE - offset, - iov_iter_single_seg_count(i)); + if (copied) + bytes = copied; goto again; } + copied = status; + iov_iter_advance(i, copied); pos += copied; written += copied; -- cgit v1.2.3 From 8959a239242754054e70391c05c06d8c4eb2bc77 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 3 Jun 2021 10:34:55 -0400 Subject: fuse_fill_write_pages(): don't bother with iov_iter_single_seg_count() another rudiment of fault-in originally having been limited to the first segment, same as in generic_perform_write() and friends. Signed-off-by: Al Viro --- fs/fuse/file.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 09ef2a4d25ed..44bd301fa4fb 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1178,7 +1178,6 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia, if (!tmp) { unlock_page(page); put_page(page); - bytes = min(bytes, iov_iter_single_seg_count(ii)); goto again; } -- cgit v1.2.3 From 66cd071a1f839b4834d45aa7dde622151041b1a0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 9 Apr 2021 19:10:53 +0100 Subject: iov_iter: Remove iov_iter_for_each_range() Remove iov_iter_for_each_range() as it's no longer used with the removal of lustre. Signed-off-by: David Howells Signed-off-by: Al Viro --- include/linux/uio.h | 4 ---- lib/iov_iter.c | 27 --------------------------- 2 files changed, 31 deletions(-) diff --git a/include/linux/uio.h b/include/linux/uio.h index d3ec87706d75..74a401f04bd3 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -294,8 +294,4 @@ ssize_t __import_iovec(int type, const struct iovec __user *uvec, int import_single_range(int type, void __user *buf, size_t len, struct iovec *iov, struct iov_iter *i); -int iov_iter_for_each_range(struct iov_iter *i, size_t bytes, - int (*f)(struct kvec *vec, void *context), - void *context); - #endif diff --git a/lib/iov_iter.c b/lib/iov_iter.c index c701b7a187f2..8f5ce5b1ff91 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -2093,30 +2093,3 @@ int import_single_range(int rw, void __user *buf, size_t len, return 0; } EXPORT_SYMBOL(import_single_range); - -int iov_iter_for_each_range(struct iov_iter *i, size_t bytes, - int (*f)(struct kvec *vec, void *context), - void *context) -{ - struct kvec w; - int err = -EINVAL; - if (!bytes) - return 0; - - iterate_all_kinds(i, bytes, v, -EINVAL, ({ - w.iov_base = kmap(v.bv_page) + v.bv_offset; - w.iov_len = v.bv_len; - err = f(&w, context); - kunmap(v.bv_page); - err;}), ({ - w = v; - err = f(&w, context);}), ({ - w.iov_base = kmap(v.bv_page) + v.bv_offset; - w.iov_len = v.bv_len; - err = f(&w, context); - kunmap(v.bv_page); - err;}) - ) - return err; -} -EXPORT_SYMBOL(iov_iter_for_each_range); -- cgit v1.2.3 From 08aa64796016cb47b2ef3d0924653b4d944b0d65 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 29 Apr 2021 20:42:25 -0400 Subject: teach copy_page_to_iter() to handle compound pages In situation when copy_page_to_iter() got a compound page the current code would only work on systems with no CONFIG_HIGHMEM. It *is* the majority of real-world setups, or we would've drown in bug reports by now. Still needs fixing. Current variant works for solitary page; rename that to __copy_page_to_iter() and turn the handling of compound pages into a loop over subpages. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- lib/iov_iter.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 8f5ce5b1ff91..12fb04b23143 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -957,11 +957,9 @@ static inline bool page_copy_sane(struct page *page, size_t offset, size_t n) return false; } -size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, +static size_t __copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { - if (unlikely(!page_copy_sane(page, offset, bytes))) - return 0; if (i->type & (ITER_BVEC | ITER_KVEC | ITER_XARRAY)) { void *kaddr = kmap_atomic(page); size_t wanted = copy_to_iter(kaddr + offset, bytes, i); @@ -974,6 +972,30 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, else return copy_page_to_iter_pipe(page, offset, bytes, i); } + +size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + size_t res = 0; + if (unlikely(!page_copy_sane(page, offset, bytes))) + return 0; + page += offset / PAGE_SIZE; // first subpage + offset %= PAGE_SIZE; + while (1) { + size_t n = __copy_page_to_iter(page, offset, + min(bytes, (size_t)PAGE_SIZE - offset), i); + res += n; + bytes -= n; + if (!bytes || !n) + break; + offset += n; + if (offset == PAGE_SIZE) { + page++; + offset = 0; + } + } + return res; +} EXPORT_SYMBOL(copy_page_to_iter); size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, -- cgit v1.2.3 From a506abc7b644d71966a75337d5a534f531b3cdc4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 27 Apr 2021 12:34:04 -0400 Subject: copy_page_to_iter(): fix ITER_DISCARD case we need to advance the iterator... Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- lib/iov_iter.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 12fb04b23143..c8877cffb7bc 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -965,9 +965,12 @@ static size_t __copy_page_to_iter(struct page *page, size_t offset, size_t bytes size_t wanted = copy_to_iter(kaddr + offset, bytes, i); kunmap_atomic(kaddr); return wanted; - } else if (unlikely(iov_iter_is_discard(i))) + } else if (unlikely(iov_iter_is_discard(i))) { + if (unlikely(i->count < bytes)) + bytes = i->count; + i->count -= bytes; return bytes; - else if (likely(!iov_iter_is_pipe(i))) + } else if (likely(!iov_iter_is_pipe(i))) return copy_page_to_iter_iovec(page, offset, bytes, i); else return copy_page_to_iter_pipe(page, offset, bytes, i); -- cgit v1.2.3 From 0e8f0d67401589a141950856902c7d0ec8d9c985 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 2 Jun 2021 14:48:21 -0400 Subject: [xarray] iov_iter_fault_in_readable() should do nothing in xarray case ... and actually should just check it's given an iovec-backed iterator in the first place. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- lib/iov_iter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index c8877cffb7bc..a3aabeda945b 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -476,7 +476,7 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) int err; struct iovec v; - if (!(i->type & (ITER_BVEC|ITER_KVEC))) { + if (iter_is_iovec(i)) { iterate_iovec(i, bytes, v, iov, skip, ({ err = fault_in_pages_readable(v.iov_base, v.iov_len); if (unlikely(err)) -- cgit v1.2.3 From 3b3fc051cd2cba42bf736fa62780857d251a1236 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 23 Apr 2021 22:24:08 -0400 Subject: iov_iter_advance(): use consistent semantics for move past the end asking to advance by more than we have left in the iov_iter should move to the very end; it should *not* leave negative i->count and it should not spew into syslog, etc. - it's a legitimate operation. Signed-off-by: Al Viro --- lib/iov_iter.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index a3aabeda945b..bdbe6691457d 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1117,8 +1117,6 @@ static inline void pipe_truncate(struct iov_iter *i) static void pipe_advance(struct iov_iter *i, size_t size) { struct pipe_inode_info *pipe = i->pipe; - if (unlikely(i->count < size)) - size = i->count; if (size) { struct pipe_buffer *buf; unsigned int p_mask = pipe->ring_size - 1; @@ -1159,6 +1157,8 @@ static void iov_iter_bvec_advance(struct iov_iter *i, size_t size) void iov_iter_advance(struct iov_iter *i, size_t size) { + if (unlikely(i->count < size)) + size = i->count; if (unlikely(iov_iter_is_pipe(i))) { pipe_advance(i, size); return; @@ -1168,7 +1168,6 @@ void iov_iter_advance(struct iov_iter *i, size_t size) return; } if (unlikely(iov_iter_is_xarray(i))) { - size = min(size, i->count); i->iov_offset += size; i->count -= size; return; -- cgit v1.2.3 From 4b6c132b7da6430cf5dcc96948b04849dea0a32a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 29 Apr 2021 21:16:56 -0400 Subject: iov_iter: switch ..._full() variants of primitives to use of iov_iter_revert() Use corresponding plain variants, revert on short copy. That's the way it should've been done from the very beginning, except that we didn't have iov_iter_revert() back then... [fixed another braino caught by Qian Cai ] Signed-off-by: Al Viro --- include/linux/uio.h | 32 ++++++++++------ lib/iov_iter.c | 104 ---------------------------------------------------- 2 files changed, 21 insertions(+), 115 deletions(-) diff --git a/include/linux/uio.h b/include/linux/uio.h index 74a401f04bd3..68079e2f34eb 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -132,9 +132,7 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i); size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i); -bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i); size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i); -bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i); static __always_inline __must_check size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) @@ -157,10 +155,11 @@ size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) static __always_inline __must_check bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) { - if (unlikely(!check_copy_size(addr, bytes, false))) - return false; - else - return _copy_from_iter_full(addr, bytes, i); + size_t copied = copy_from_iter(addr, bytes, i); + if (likely(copied == bytes)) + return true; + iov_iter_revert(i, copied); + return false; } static __always_inline __must_check @@ -175,10 +174,11 @@ size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) static __always_inline __must_check bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) { - if (unlikely(!check_copy_size(addr, bytes, false))) - return false; - else - return _copy_from_iter_full_nocache(addr, bytes, i); + size_t copied = copy_from_iter_nocache(addr, bytes, i); + if (likely(copied == bytes)) + return true; + iov_iter_revert(i, copied); + return false; } #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE @@ -278,7 +278,17 @@ struct csum_state { size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csstate, struct iov_iter *i); size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); -bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); + +static __always_inline __must_check +bool csum_and_copy_from_iter_full(void *addr, size_t bytes, + __wsum *csum, struct iov_iter *i) +{ + size_t copied = csum_and_copy_from_iter(addr, bytes, csum, i); + if (likely(copied == bytes)) + return true; + iov_iter_revert(i, copied); + return false; +} size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, struct iov_iter *i); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index bdbe6691457d..ce9f8b9168ea 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -819,35 +819,6 @@ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) } EXPORT_SYMBOL(_copy_from_iter); -bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) -{ - char *to = addr; - if (unlikely(iov_iter_is_pipe(i))) { - WARN_ON(1); - return false; - } - if (unlikely(i->count < bytes)) - return false; - - if (iter_is_iovec(i)) - might_fault(); - iterate_all_kinds(i, bytes, v, ({ - if (copyin((to += v.iov_len) - v.iov_len, - v.iov_base, v.iov_len)) - return false; - 0;}), - memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, - v.bv_offset, v.bv_len), - memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), - memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, - v.bv_offset, v.bv_len) - ) - - iov_iter_advance(i, bytes); - return true; -} -EXPORT_SYMBOL(_copy_from_iter_full); - size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; @@ -907,32 +878,6 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache); #endif -bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) -{ - char *to = addr; - if (unlikely(iov_iter_is_pipe(i))) { - WARN_ON(1); - return false; - } - if (unlikely(i->count < bytes)) - return false; - iterate_all_kinds(i, bytes, v, ({ - if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len, - v.iov_base, v.iov_len)) - return false; - 0;}), - memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, - v.bv_offset, v.bv_len), - memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), - memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, - v.bv_offset, v.bv_len) - ) - - iov_iter_advance(i, bytes); - return true; -} -EXPORT_SYMBOL(_copy_from_iter_full_nocache); - static inline bool page_copy_sane(struct page *page, size_t offset, size_t n) { struct page *head; @@ -1740,55 +1685,6 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, } EXPORT_SYMBOL(csum_and_copy_from_iter); -bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, - struct iov_iter *i) -{ - char *to = addr; - __wsum sum, next; - size_t off = 0; - sum = *csum; - if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { - WARN_ON(1); - return false; - } - if (unlikely(i->count < bytes)) - return false; - iterate_all_kinds(i, bytes, v, ({ - next = csum_and_copy_from_user(v.iov_base, - (to += v.iov_len) - v.iov_len, - v.iov_len); - if (!next) - return false; - sum = csum_block_add(sum, next, off); - off += v.iov_len; - 0; - }), ({ - char *p = kmap_atomic(v.bv_page); - sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, - p + v.bv_offset, v.bv_len, - sum, off); - kunmap_atomic(p); - off += v.bv_len; - }),({ - sum = csum_and_memcpy((to += v.iov_len) - v.iov_len, - v.iov_base, v.iov_len, - sum, off); - off += v.iov_len; - }), ({ - char *p = kmap_atomic(v.bv_page); - sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, - p + v.bv_offset, v.bv_len, - sum, off); - kunmap_atomic(p); - off += v.bv_len; - }) - ) - *csum = sum; - iov_iter_advance(i, bytes); - return true; -} -EXPORT_SYMBOL(csum_and_copy_from_iter_full); - size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, struct iov_iter *i) { -- cgit v1.2.3 From 28f38db7edbfa6d7736cd7a3a7aec76660bfef57 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 2 Jun 2021 17:25:59 -0400 Subject: iov_iter: reorder handling of flavours in primitives iovec is the most common one; test it first and test explicitly, rather than "not anything else". Replace all flavour checks with use of iov_iter_is_...() helpers. Signed-off-by: Al Viro --- lib/iov_iter.c | 91 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 45 insertions(+), 46 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index ce9f8b9168ea..bdcf1fbeb2db 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -117,22 +117,21 @@ #define iterate_all_kinds(i, n, v, I, B, K, X) { \ if (likely(n)) { \ size_t skip = i->iov_offset; \ - if (unlikely(i->type & ITER_BVEC)) { \ + if (likely(iter_is_iovec(i))) { \ + const struct iovec *iov; \ + struct iovec v; \ + iterate_iovec(i, n, v, iov, skip, (I)) \ + } else if (iov_iter_is_bvec(i)) { \ struct bio_vec v; \ struct bvec_iter __bi; \ iterate_bvec(i, n, v, __bi, skip, (B)) \ - } else if (unlikely(i->type & ITER_KVEC)) { \ + } else if (iov_iter_is_kvec(i)) { \ const struct kvec *kvec; \ struct kvec v; \ iterate_kvec(i, n, v, kvec, skip, (K)) \ - } else if (unlikely(i->type & ITER_DISCARD)) { \ - } else if (unlikely(i->type & ITER_XARRAY)) { \ + } else if (iov_iter_is_xarray(i)) { \ struct bio_vec v; \ iterate_xarray(i, n, v, skip, (X)); \ - } else { \ - const struct iovec *iov; \ - struct iovec v; \ - iterate_iovec(i, n, v, iov, skip, (I)) \ } \ } \ } @@ -142,7 +141,17 @@ n = i->count; \ if (i->count) { \ size_t skip = i->iov_offset; \ - if (unlikely(i->type & ITER_BVEC)) { \ + if (likely(iter_is_iovec(i))) { \ + const struct iovec *iov; \ + struct iovec v; \ + iterate_iovec(i, n, v, iov, skip, (I)) \ + if (skip == iov->iov_len) { \ + iov++; \ + skip = 0; \ + } \ + i->nr_segs -= iov - i->iov; \ + i->iov = iov; \ + } else if (iov_iter_is_bvec(i)) { \ const struct bio_vec *bvec = i->bvec; \ struct bio_vec v; \ struct bvec_iter __bi; \ @@ -150,7 +159,7 @@ i->bvec = __bvec_iter_bvec(i->bvec, __bi); \ i->nr_segs -= i->bvec - bvec; \ skip = __bi.bi_bvec_done; \ - } else if (unlikely(i->type & ITER_KVEC)) { \ + } else if (iov_iter_is_kvec(i)) { \ const struct kvec *kvec; \ struct kvec v; \ iterate_kvec(i, n, v, kvec, skip, (K)) \ @@ -160,21 +169,11 @@ } \ i->nr_segs -= kvec - i->kvec; \ i->kvec = kvec; \ - } else if (unlikely(i->type & ITER_DISCARD)) { \ - skip += n; \ - } else if (unlikely(i->type & ITER_XARRAY)) { \ + } else if (iov_iter_is_xarray(i)) { \ struct bio_vec v; \ iterate_xarray(i, n, v, skip, (X)) \ - } else { \ - const struct iovec *iov; \ - struct iovec v; \ - iterate_iovec(i, n, v, iov, skip, (I)) \ - if (skip == iov->iov_len) { \ - iov++; \ - skip = 0; \ - } \ - i->nr_segs -= iov - i->iov; \ - i->iov = iov; \ + } else if (iov_iter_is_discard(i)) { \ + skip += n; \ } \ i->count -= n; \ i->iov_offset = skip; \ @@ -905,20 +904,24 @@ static inline bool page_copy_sane(struct page *page, size_t offset, size_t n) static size_t __copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { - if (i->type & (ITER_BVEC | ITER_KVEC | ITER_XARRAY)) { + if (likely(iter_is_iovec(i))) + return copy_page_to_iter_iovec(page, offset, bytes, i); + if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) { void *kaddr = kmap_atomic(page); size_t wanted = copy_to_iter(kaddr + offset, bytes, i); kunmap_atomic(kaddr); return wanted; - } else if (unlikely(iov_iter_is_discard(i))) { + } + if (iov_iter_is_pipe(i)) + return copy_page_to_iter_pipe(page, offset, bytes, i); + if (unlikely(iov_iter_is_discard(i))) { if (unlikely(i->count < bytes)) bytes = i->count; i->count -= bytes; return bytes; - } else if (likely(!iov_iter_is_pipe(i))) - return copy_page_to_iter_iovec(page, offset, bytes, i); - else - return copy_page_to_iter_pipe(page, offset, bytes, i); + } + WARN_ON(1); + return 0; } size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, @@ -951,17 +954,16 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, { if (unlikely(!page_copy_sane(page, offset, bytes))) return 0; - if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { - WARN_ON(1); - return 0; - } - if (i->type & (ITER_BVEC | ITER_KVEC | ITER_XARRAY)) { + if (likely(iter_is_iovec(i))) + return copy_page_from_iter_iovec(page, offset, bytes, i); + if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) { void *kaddr = kmap_atomic(page); size_t wanted = _copy_from_iter(kaddr + offset, bytes, i); kunmap_atomic(kaddr); return wanted; - } else - return copy_page_from_iter_iovec(page, offset, bytes, i); + } + WARN_ON(1); + return 0; } EXPORT_SYMBOL(copy_page_from_iter); @@ -1203,16 +1205,13 @@ EXPORT_SYMBOL(iov_iter_revert); */ size_t iov_iter_single_seg_count(const struct iov_iter *i) { - if (unlikely(iov_iter_is_pipe(i))) - return i->count; // it is a silly place, anyway - if (i->nr_segs == 1) - return i->count; - if (unlikely(iov_iter_is_discard(i) || iov_iter_is_xarray(i))) - return i->count; - if (iov_iter_is_bvec(i)) - return min(i->count, i->bvec->bv_len - i->iov_offset); - else - return min(i->count, i->iov->iov_len - i->iov_offset); + if (i->nr_segs > 1) { + if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) + return min(i->count, i->iov->iov_len - i->iov_offset); + if (iov_iter_is_bvec(i)) + return min(i->count, i->bvec->bv_len - i->iov_offset); + } + return i->count; } EXPORT_SYMBOL(iov_iter_single_seg_count); -- cgit v1.2.3 From 556351c1c09ad6511bc2eaa2c214992192f50410 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 2 May 2021 17:01:22 -0400 Subject: iov_iter_advance(): don't modify ->iov_offset for ITER_DISCARD the field is not used for that flavour Signed-off-by: Al Viro --- lib/iov_iter.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index bdcf1fbeb2db..e6c5834da32d 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -172,8 +172,6 @@ } else if (iov_iter_is_xarray(i)) { \ struct bio_vec v; \ iterate_xarray(i, n, v, skip, (X)) \ - } else if (iov_iter_is_discard(i)) { \ - skip += n; \ } \ i->count -= n; \ i->iov_offset = skip; \ -- cgit v1.2.3 From 8cd54c1c848031a87820e58d772166ffdf8c08c0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 22 Apr 2021 14:50:39 -0400 Subject: iov_iter: separate direction from flavour Instead of having them mixed in iter->type, use separate ->iter_type and ->data_source (u8 and bool resp.) And don't bother with (pseudo-) bitmap for the former - microoptimizations from being able to check if the flavour is one of two values are not worth the confusion for optimizer. It can't prove that we never get e.g. ITER_IOVEC | ITER_PIPE, so we end up with extra headache. Signed-off-by: Al Viro --- include/linux/uio.h | 24 +++++++-------- lib/iov_iter.c | 85 ++++++++++++++++++++++++++++++----------------------- 2 files changed, 58 insertions(+), 51 deletions(-) diff --git a/include/linux/uio.h b/include/linux/uio.h index 68079e2f34eb..ad76eef356b0 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -19,21 +19,17 @@ struct kvec { enum iter_type { /* iter types */ - ITER_IOVEC = 4, - ITER_KVEC = 8, - ITER_BVEC = 16, - ITER_PIPE = 32, - ITER_DISCARD = 64, - ITER_XARRAY = 128, + ITER_IOVEC, + ITER_KVEC, + ITER_BVEC, + ITER_PIPE, + ITER_XARRAY, + ITER_DISCARD, }; struct iov_iter { - /* - * Bit 0 is the read/write bit, set if we're writing. - * Bit 1 is the BVEC_FLAG_NO_REF bit, set if type is a bvec and - * the caller isn't expecting to drop a page reference when done. - */ - unsigned int type; + u8 iter_type; + bool data_source; size_t iov_offset; size_t count; union { @@ -55,7 +51,7 @@ struct iov_iter { static inline enum iter_type iov_iter_type(const struct iov_iter *i) { - return i->type & ~(READ | WRITE); + return i->iter_type; } static inline bool iter_is_iovec(const struct iov_iter *i) @@ -90,7 +86,7 @@ static inline bool iov_iter_is_xarray(const struct iov_iter *i) static inline unsigned char iov_iter_rw(const struct iov_iter *i) { - return i->type & (READ | WRITE); + return i->data_source ? WRITE : READ; } /* diff --git a/lib/iov_iter.c b/lib/iov_iter.c index e6c5834da32d..03c4e677b075 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -489,19 +489,15 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction, size_t count) { WARN_ON(direction & ~(READ | WRITE)); - direction &= READ | WRITE; - - /* It will get better. Eventually... */ - if (uaccess_kernel()) { - i->type = ITER_KVEC | direction; - i->kvec = (struct kvec *)iov; - } else { - i->type = ITER_IOVEC | direction; - i->iov = iov; - } - i->nr_segs = nr_segs; - i->iov_offset = 0; - i->count = count; + WARN_ON_ONCE(uaccess_kernel()); + *i = (struct iov_iter) { + .iter_type = ITER_IOVEC, + .data_source = direction, + .iov = iov, + .nr_segs = nr_segs, + .iov_offset = 0, + .count = count + }; } EXPORT_SYMBOL(iov_iter_init); @@ -1218,11 +1214,14 @@ void iov_iter_kvec(struct iov_iter *i, unsigned int direction, size_t count) { WARN_ON(direction & ~(READ | WRITE)); - i->type = ITER_KVEC | (direction & (READ | WRITE)); - i->kvec = kvec; - i->nr_segs = nr_segs; - i->iov_offset = 0; - i->count = count; + *i = (struct iov_iter){ + .iter_type = ITER_KVEC, + .data_source = direction, + .kvec = kvec, + .nr_segs = nr_segs, + .iov_offset = 0, + .count = count + }; } EXPORT_SYMBOL(iov_iter_kvec); @@ -1231,11 +1230,14 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction, size_t count) { WARN_ON(direction & ~(READ | WRITE)); - i->type = ITER_BVEC | (direction & (READ | WRITE)); - i->bvec = bvec; - i->nr_segs = nr_segs; - i->iov_offset = 0; - i->count = count; + *i = (struct iov_iter){ + .iter_type = ITER_BVEC, + .data_source = direction, + .bvec = bvec, + .nr_segs = nr_segs, + .iov_offset = 0, + .count = count + }; } EXPORT_SYMBOL(iov_iter_bvec); @@ -1245,12 +1247,15 @@ void iov_iter_pipe(struct iov_iter *i, unsigned int direction, { BUG_ON(direction != READ); WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size)); - i->type = ITER_PIPE | READ; - i->pipe = pipe; - i->head = pipe->head; - i->iov_offset = 0; - i->count = count; - i->start_head = i->head; + *i = (struct iov_iter){ + .iter_type = ITER_PIPE, + .data_source = false, + .pipe = pipe, + .head = pipe->head, + .start_head = pipe->head, + .iov_offset = 0, + .count = count + }; } EXPORT_SYMBOL(iov_iter_pipe); @@ -1271,11 +1276,14 @@ void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray, loff_t start, size_t count) { BUG_ON(direction & ~1); - i->type = ITER_XARRAY | (direction & (READ | WRITE)); - i->xarray = xarray; - i->xarray_start = start; - i->count = count; - i->iov_offset = 0; + *i = (struct iov_iter) { + .iter_type = ITER_XARRAY, + .data_source = direction, + .xarray = xarray, + .xarray_start = start, + .count = count, + .iov_offset = 0 + }; } EXPORT_SYMBOL(iov_iter_xarray); @@ -1291,9 +1299,12 @@ EXPORT_SYMBOL(iov_iter_xarray); void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count) { BUG_ON(direction != READ); - i->type = ITER_DISCARD | READ; - i->count = count; - i->iov_offset = 0; + *i = (struct iov_iter){ + .iter_type = ITER_DISCARD, + .data_source = false, + .count = count, + .iov_offset = 0 + }; } EXPORT_SYMBOL(iov_iter_discard); -- cgit v1.2.3 From 185ac4d43669314f31c9c27d1ffc5ebcad791351 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 23 Apr 2021 12:58:53 -0400 Subject: iov_iter: optimize iov_iter_advance() for iovec and kvec We can do better than generic iterate_and_advance() for this one; inspired by bvec_iter_advance() (and massaged into that form by equivalent transformations). [fixed a braino caught by kernel test robot ] Signed-off-by: Al Viro --- lib/iov_iter.c | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 03c4e677b075..cd23c79acb94 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1096,28 +1096,42 @@ static void iov_iter_bvec_advance(struct iov_iter *i, size_t size) i->iov_offset = bi.bi_bvec_done; } +static void iov_iter_iovec_advance(struct iov_iter *i, size_t size) +{ + const struct iovec *iov, *end; + + if (!i->count) + return; + i->count -= size; + + size += i->iov_offset; // from beginning of current segment + for (iov = i->iov, end = iov + i->nr_segs; iov < end; iov++) { + if (likely(size < iov->iov_len)) + break; + size -= iov->iov_len; + } + i->iov_offset = size; + i->nr_segs -= iov - i->iov; + i->iov = iov; +} + void iov_iter_advance(struct iov_iter *i, size_t size) { if (unlikely(i->count < size)) size = i->count; - if (unlikely(iov_iter_is_pipe(i))) { + if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) { + /* iovec and kvec have identical layouts */ + iov_iter_iovec_advance(i, size); + } else if (iov_iter_is_bvec(i)) { + iov_iter_bvec_advance(i, size); + } else if (iov_iter_is_pipe(i)) { pipe_advance(i, size); - return; - } - if (unlikely(iov_iter_is_discard(i))) { - i->count -= size; - return; - } - if (unlikely(iov_iter_is_xarray(i))) { + } else if (unlikely(iov_iter_is_xarray(i))) { i->iov_offset += size; i->count -= size; - return; - } - if (iov_iter_is_bvec(i)) { - iov_iter_bvec_advance(i, size); - return; + } else if (iov_iter_is_discard(i)) { + i->count -= size; } - iterate_and_advance(i, size, v, 0, 0, 0, 0) } EXPORT_SYMBOL(iov_iter_advance); -- cgit v1.2.3 From 8409a0d261e20180361e7afe6d89847d1bad4ce8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 2 May 2021 11:57:37 -0400 Subject: sanitize iov_iter_fault_in_readable() 1) constify iov_iter argument; we are not advancing it in this primitive. 2) cap the amount requested by the amount of data in iov_iter. All existing callers should've been safe, but the check is really cheap and doing it here makes for easier analysis, as well as more consistent semantics among the primitives. 3) don't bother with iterate_iovec(). Explicit loop is not any harder to follow, and we get rid of standalone iterate_iovec() users - it's only used by iterate_and_advance() and (soon to be gone) iterate_all_kinds(). Signed-off-by: Al Viro --- include/linux/uio.h | 2 +- lib/iov_iter.c | 26 ++++++++++++++++---------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/include/linux/uio.h b/include/linux/uio.h index ad76eef356b0..b5cf54859109 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -119,7 +119,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes); void iov_iter_advance(struct iov_iter *i, size_t bytes); void iov_iter_revert(struct iov_iter *i, size_t bytes); -int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); +int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes); size_t iov_iter_single_seg_count(const struct iov_iter *i); size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index cd23c79acb94..2b543bea1e0d 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -466,19 +466,25 @@ out: * Return 0 on success, or non-zero if the memory could not be accessed (i.e. * because it is an invalid address). */ -int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) +int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes) { - size_t skip = i->iov_offset; - const struct iovec *iov; - int err; - struct iovec v; - if (iter_is_iovec(i)) { - iterate_iovec(i, bytes, v, iov, skip, ({ - err = fault_in_pages_readable(v.iov_base, v.iov_len); + const struct iovec *p; + size_t skip; + + if (bytes > i->count) + bytes = i->count; + for (p = i->iov, skip = i->iov_offset; bytes; p++, skip = 0) { + size_t len = min(bytes, p->iov_len - skip); + int err; + + if (unlikely(!len)) + continue; + err = fault_in_pages_readable(p->iov_base + skip, len); if (unlikely(err)) - return err; - 0;})) + return err; + bytes -= len; + } } return 0; } -- cgit v1.2.3 From 9221d2e37b729077797e6d02012289892dbdb859 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Apr 2021 00:44:35 -0400 Subject: iov_iter_alignment(): don't bother with iterate_all_kinds() It's easier to go over the array manually. We need to watch out for truncated iov_iter, though - iovec array might cover more than i->count. Signed-off-by: Al Viro --- lib/iov_iter.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 53 insertions(+), 10 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 2b543bea1e0d..ed9318358b68 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1328,27 +1328,70 @@ void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count) } EXPORT_SYMBOL(iov_iter_discard); -unsigned long iov_iter_alignment(const struct iov_iter *i) +static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i) { unsigned long res = 0; size_t size = i->count; + size_t skip = i->iov_offset; + unsigned k; + + for (k = 0; k < i->nr_segs; k++, skip = 0) { + size_t len = i->iov[k].iov_len - skip; + if (len) { + res |= (unsigned long)i->iov[k].iov_base + skip; + if (len > size) + len = size; + res |= len; + size -= len; + if (!size) + break; + } + } + return res; +} - if (unlikely(iov_iter_is_pipe(i))) { +static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i) +{ + unsigned res = 0; + size_t size = i->count; + unsigned skip = i->iov_offset; + unsigned k; + + for (k = 0; k < i->nr_segs; k++, skip = 0) { + size_t len = i->bvec[k].bv_len - skip; + res |= (unsigned long)i->bvec[k].bv_offset + skip; + if (len > size) + len = size; + res |= len; + size -= len; + if (!size) + break; + } + return res; +} + +unsigned long iov_iter_alignment(const struct iov_iter *i) +{ + /* iovec and kvec have identical layouts */ + if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) + return iov_iter_alignment_iovec(i); + + if (iov_iter_is_bvec(i)) + return iov_iter_alignment_bvec(i); + + if (iov_iter_is_pipe(i)) { unsigned int p_mask = i->pipe->ring_size - 1; + size_t size = i->count; if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask])) return size | i->iov_offset; return size; } - if (unlikely(iov_iter_is_xarray(i))) + + if (iov_iter_is_xarray(i)) return (i->xarray_start + i->iov_offset) | i->count; - iterate_all_kinds(i, size, v, - (res |= (unsigned long)v.iov_base | v.iov_len, 0), - res |= v.bv_offset | v.bv_len, - res |= (unsigned long)v.iov_base | v.iov_len, - res |= v.bv_offset | v.bv_len - ) - return res; + + return 0; } EXPORT_SYMBOL(iov_iter_alignment); -- cgit v1.2.3 From 610c7a71543df32fcecf64004f974905f5881fb3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Apr 2021 01:03:16 -0400 Subject: iov_iter_gap_alignment(): get rid of iterate_all_kinds() For one thing, it's only used for iovec (and makes sense only for those). For another, here we don't care about iov_offset, since the beginning of the first segment and the end of the last one are ignored. So it makes a lot more sense to just walk through the iovec array... We need to deal with the case of truncated iov_iter, but unlike the situation with iov_iter_alignment() we don't care where the last segment ends - just which segment is the last one. [fixed a braino spotted by Qian Cai ] Signed-off-by: Al Viro --- lib/iov_iter.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index ed9318358b68..6569e3f5d01d 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1398,23 +1398,24 @@ EXPORT_SYMBOL(iov_iter_alignment); unsigned long iov_iter_gap_alignment(const struct iov_iter *i) { unsigned long res = 0; + unsigned long v = 0; size_t size = i->count; + unsigned k; - if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { - WARN_ON(1); + if (WARN_ON(!iter_is_iovec(i))) return ~0U; - } - iterate_all_kinds(i, size, v, - (res |= (!res ? 0 : (unsigned long)v.iov_base) | - (size != v.iov_len ? size : 0), 0), - (res |= (!res ? 0 : (unsigned long)v.bv_offset) | - (size != v.bv_len ? size : 0)), - (res |= (!res ? 0 : (unsigned long)v.iov_base) | - (size != v.iov_len ? size : 0)), - (res |= (!res ? 0 : (unsigned long)v.bv_offset) | - (size != v.bv_len ? size : 0)) - ); + for (k = 0; k < i->nr_segs; k++) { + if (i->iov[k].iov_len) { + unsigned long base = (unsigned long)i->iov[k].iov_base; + if (v) // if not the first one + res |= base | v; // this start | previous end + v = base + i->iov[k].iov_len; + if (size <= i->iov[k].iov_len) + break; + size -= i->iov[k].iov_len; + } + } return res; } EXPORT_SYMBOL(iov_iter_gap_alignment); -- cgit v1.2.3 From 3d671ca62a08114810321a2a5e9d3523de5fb1b4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Apr 2021 09:14:44 -0400 Subject: get rid of iterate_all_kinds() in iov_iter_get_pages()/iov_iter_get_pages_alloc() Here iterate_all_kinds() is used just to find the first (non-empty, in case of iovec) segment. Which can be easily done explicitly. Note that in bvec case we now can get more than PAGE_SIZE worth of them, in case when we have a compound page in bvec and a range that crosses a subpage boundary. Older behaviour had been to stop on that boundary; we used to get the right first page (for_each_bvec() took care of that), but that was all we'd got. Signed-off-by: Al Viro --- lib/iov_iter.c | 147 +++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 91 insertions(+), 56 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 6569e3f5d01d..3fd331b3b8f2 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1450,9 +1450,6 @@ static ssize_t pipe_get_pages(struct iov_iter *i, unsigned int iter_head, npages; size_t capacity; - if (!maxsize) - return 0; - if (!sanity(i)) return -EFAULT; @@ -1533,29 +1530,67 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i, return actual; } +/* must be done on non-empty ITER_IOVEC one */ +static unsigned long first_iovec_segment(const struct iov_iter *i, + size_t *size, size_t *start, + size_t maxsize, unsigned maxpages) +{ + size_t skip; + long k; + + for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) { + unsigned long addr = (unsigned long)i->iov[k].iov_base + skip; + size_t len = i->iov[k].iov_len - skip; + + if (unlikely(!len)) + continue; + if (len > maxsize) + len = maxsize; + len += (*start = addr % PAGE_SIZE); + if (len > maxpages * PAGE_SIZE) + len = maxpages * PAGE_SIZE; + *size = len; + return addr & PAGE_MASK; + } + BUG(); // if it had been empty, we wouldn't get called +} + +/* must be done on non-empty ITER_BVEC one */ +static struct page *first_bvec_segment(const struct iov_iter *i, + size_t *size, size_t *start, + size_t maxsize, unsigned maxpages) +{ + struct page *page; + size_t skip = i->iov_offset, len; + + len = i->bvec->bv_len - skip; + if (len > maxsize) + len = maxsize; + skip += i->bvec->bv_offset; + page = i->bvec->bv_page + skip / PAGE_SIZE; + len += (*start = skip % PAGE_SIZE); + if (len > maxpages * PAGE_SIZE) + len = maxpages * PAGE_SIZE; + *size = len; + return page; +} + ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start) { + size_t len; + int n, res; + if (maxsize > i->count) maxsize = i->count; + if (!maxsize) + return 0; - if (unlikely(iov_iter_is_pipe(i))) - return pipe_get_pages(i, pages, maxsize, maxpages, start); - if (unlikely(iov_iter_is_xarray(i))) - return iter_xarray_get_pages(i, pages, maxsize, maxpages, start); - if (unlikely(iov_iter_is_discard(i))) - return -EFAULT; - - iterate_all_kinds(i, maxsize, v, ({ - unsigned long addr = (unsigned long)v.iov_base; - size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); - int n; - int res; + if (likely(iter_is_iovec(i))) { + unsigned long addr; - if (len > maxpages * PAGE_SIZE) - len = maxpages * PAGE_SIZE; - addr &= ~(PAGE_SIZE - 1); + addr = first_iovec_segment(i, &len, start, maxsize, maxpages); n = DIV_ROUND_UP(len, PAGE_SIZE); res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, @@ -1563,17 +1598,21 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, if (unlikely(res < 0)) return res; return (res == n ? len : res * PAGE_SIZE) - *start; - 0;}),({ - /* can't be more than PAGE_SIZE */ - *start = v.bv_offset; - get_page(*pages = v.bv_page); - return v.bv_len; - }),({ - return -EFAULT; - }), - 0 - ) - return 0; + } + if (iov_iter_is_bvec(i)) { + struct page *page; + + page = first_bvec_segment(i, &len, start, maxsize, maxpages); + n = DIV_ROUND_UP(len, PAGE_SIZE); + while (n--) + get_page(*pages++ = page++); + return len - *start; + } + if (iov_iter_is_pipe(i)) + return pipe_get_pages(i, pages, maxsize, maxpages, start); + if (iov_iter_is_xarray(i)) + return iter_xarray_get_pages(i, pages, maxsize, maxpages, start); + return -EFAULT; } EXPORT_SYMBOL(iov_iter_get_pages); @@ -1590,9 +1629,6 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i, unsigned int iter_head, npages; ssize_t n; - if (!maxsize) - return 0; - if (!sanity(i)) return -EFAULT; @@ -1665,24 +1701,18 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, size_t *start) { struct page **p; + size_t len; + int n, res; if (maxsize > i->count) maxsize = i->count; + if (!maxsize) + return 0; - if (unlikely(iov_iter_is_pipe(i))) - return pipe_get_pages_alloc(i, pages, maxsize, start); - if (unlikely(iov_iter_is_xarray(i))) - return iter_xarray_get_pages_alloc(i, pages, maxsize, start); - if (unlikely(iov_iter_is_discard(i))) - return -EFAULT; - - iterate_all_kinds(i, maxsize, v, ({ - unsigned long addr = (unsigned long)v.iov_base; - size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); - int n; - int res; + if (likely(iter_is_iovec(i))) { + unsigned long addr; - addr &= ~(PAGE_SIZE - 1); + addr = first_iovec_segment(i, &len, start, maxsize, ~0U); n = DIV_ROUND_UP(len, PAGE_SIZE); p = get_pages_array(n); if (!p) @@ -1695,19 +1725,24 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, } *pages = p; return (res == n ? len : res * PAGE_SIZE) - *start; - 0;}),({ - /* can't be more than PAGE_SIZE */ - *start = v.bv_offset; - *pages = p = get_pages_array(1); + } + if (iov_iter_is_bvec(i)) { + struct page *page; + + page = first_bvec_segment(i, &len, start, maxsize, ~0U); + n = DIV_ROUND_UP(len, PAGE_SIZE); + *pages = p = get_pages_array(n); if (!p) return -ENOMEM; - get_page(*p = v.bv_page); - return v.bv_len; - }),({ - return -EFAULT; - }), 0 - ) - return 0; + while (n--) + get_page(*p++ = page++); + return len - *start; + } + if (iov_iter_is_pipe(i)) + return pipe_get_pages_alloc(i, pages, maxsize, start); + if (iov_iter_is_xarray(i)) + return iter_xarray_get_pages_alloc(i, pages, maxsize, start); + return -EFAULT; } EXPORT_SYMBOL(iov_iter_get_pages_alloc); -- cgit v1.2.3 From 66531c65aa254e77c935785036beb50985d0fe89 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Apr 2021 16:00:48 -0400 Subject: iov_iter_npages(): don't bother with iterate_all_kinds() note that in bvec case pages can be compound ones - we can't just assume that each segment is covered by one (sub)page Signed-off-by: Al Viro --- lib/iov_iter.c | 88 +++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 54 insertions(+), 34 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 3fd331b3b8f2..1c65de175371 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1864,19 +1864,56 @@ size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, } EXPORT_SYMBOL(hash_and_copy_to_iter); -int iov_iter_npages(const struct iov_iter *i, int maxpages) +static int iov_npages(const struct iov_iter *i, int maxpages) { - size_t size = i->count; + size_t skip = i->iov_offset, size = i->count; + const struct iovec *p; int npages = 0; - if (!size) - return 0; - if (unlikely(iov_iter_is_discard(i))) - return 0; + for (p = i->iov; size; skip = 0, p++) { + unsigned offs = offset_in_page(p->iov_base + skip); + size_t len = min(p->iov_len - skip, size); - if (unlikely(iov_iter_is_pipe(i))) { - struct pipe_inode_info *pipe = i->pipe; + if (len) { + size -= len; + npages += DIV_ROUND_UP(offs + len, PAGE_SIZE); + if (unlikely(npages > maxpages)) + return maxpages; + } + } + return npages; +} + +static int bvec_npages(const struct iov_iter *i, int maxpages) +{ + size_t skip = i->iov_offset, size = i->count; + const struct bio_vec *p; + int npages = 0; + + for (p = i->bvec; size; skip = 0, p++) { + unsigned offs = (p->bv_offset + skip) % PAGE_SIZE; + size_t len = min(p->bv_len - skip, size); + + size -= len; + npages += DIV_ROUND_UP(offs + len, PAGE_SIZE); + if (unlikely(npages > maxpages)) + return maxpages; + } + return npages; +} + +int iov_iter_npages(const struct iov_iter *i, int maxpages) +{ + if (unlikely(!i->count)) + return 0; + /* iovec and kvec have identical layouts */ + if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) + return iov_npages(i, maxpages); + if (iov_iter_is_bvec(i)) + return bvec_npages(i, maxpages); + if (iov_iter_is_pipe(i)) { unsigned int iter_head; + int npages; size_t off; if (!sanity(i)) @@ -1884,11 +1921,13 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) data_start(i, &iter_head, &off); /* some of this one + all after this one */ - npages = pipe_space_for_user(iter_head, pipe->tail, pipe); - if (npages >= maxpages) - return maxpages; - } else if (unlikely(iov_iter_is_xarray(i))) { + npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); + return min(npages, maxpages); + } + if (iov_iter_is_xarray(i)) { + size_t size = i->count; unsigned offset; + int npages; offset = (i->xarray_start + i->iov_offset) & ~PAGE_MASK; @@ -1900,28 +1939,9 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) if (size) npages++; } - if (npages >= maxpages) - return maxpages; - } else iterate_all_kinds(i, size, v, ({ - unsigned long p = (unsigned long)v.iov_base; - npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) - - p / PAGE_SIZE; - if (npages >= maxpages) - return maxpages; - 0;}),({ - npages++; - if (npages >= maxpages) - return maxpages; - }),({ - unsigned long p = (unsigned long)v.iov_base; - npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) - - p / PAGE_SIZE; - if (npages >= maxpages) - return maxpages; - }), - 0 - ) - return npages; + return min(npages, maxpages); + } + return 0; } EXPORT_SYMBOL(iov_iter_npages); -- cgit v1.2.3 From e4f8df86798aea60aff6cfff40252b709431f850 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 3 May 2021 11:05:29 -0400 Subject: [xarray] iov_iter_npages(): just use DIV_ROUND_UP() Compiler is capable of recognizing division by power of 2 and turning it into shifts. Signed-off-by: Al Viro --- lib/iov_iter.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 1c65de175371..72c5bb794e8d 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1925,20 +1925,8 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) return min(npages, maxpages); } if (iov_iter_is_xarray(i)) { - size_t size = i->count; - unsigned offset; - int npages; - - offset = (i->xarray_start + i->iov_offset) & ~PAGE_MASK; - - npages = 1; - if (size > PAGE_SIZE - offset) { - size -= PAGE_SIZE - offset; - npages += size >> PAGE_SHIFT; - size &= ~PAGE_MASK; - if (size) - npages++; - } + unsigned offset = (i->xarray_start + i->iov_offset) % PAGE_SIZE; + int npages = DIV_ROUND_UP(offset + i->count, PAGE_SIZE); return min(npages, maxpages); } return 0; -- cgit v1.2.3 From f0b65f39ac505e8f1dcdaa165aa7b8c0bd6fd454 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 30 Apr 2021 10:26:41 -0400 Subject: iov_iter: replace iov_iter_copy_from_user_atomic() with iterator-advancing variant Replacement is called copy_page_from_iter_atomic(); unlike the old primitive the callers do *not* need to do iov_iter_advance() after it. In case when they end up consuming less than they'd been given they need to do iov_iter_revert() on everything they had not consumed. That, however, needs to be done only on slow paths. All in-tree callers converted. And that kills the last user of iterate_all_kinds() Signed-off-by: Al Viro --- Documentation/filesystems/porting.rst | 9 +++++++++ fs/btrfs/file.c | 23 +++++++++++------------ fs/fuse/file.c | 3 +-- fs/iomap/buffered-io.c | 14 +++++++------- fs/ntfs/file.c | 4 +--- include/linux/uio.h | 4 ++-- lib/iov_iter.c | 30 ++++-------------------------- mm/filemap.c | 16 ++++++++-------- 8 files changed, 43 insertions(+), 60 deletions(-) diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index 0302035781be..43b492d08dec 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -890,3 +890,12 @@ been called or returned with non -EIOCBQUEUED code. mnt_want_write_file() can now only be paired with mnt_drop_write_file(), whereas previously it could be paired with mnt_drop_write() as well. + +--- + +**mandatory** + +iov_iter_copy_from_user_atomic() is gone; use copy_page_from_iter_atomic(). +The difference is copy_page_from_iter_atomic() advances the iterator and +you don't need iov_iter_advance() after it. However, if you decide to use +only a part of obtained data, you should do iov_iter_revert(). diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 864c08d08a35..78cb8f9eaa6b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -398,7 +398,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes, /* * Copy data from userspace to the current page */ - copied = iov_iter_copy_from_user_atomic(page, i, offset, count); + copied = copy_page_from_iter_atomic(page, offset, count, i); /* Flush processor's dcache for this page */ flush_dcache_page(page); @@ -412,20 +412,19 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes, * The rest of the btrfs_file_write code will fall * back to page at a time copies after we return 0. */ - if (!PageUptodate(page) && copied < count) - copied = 0; + if (unlikely(copied < count)) { + if (!PageUptodate(page)) { + iov_iter_revert(i, copied); + copied = 0; + } + if (!copied) + break; + } - iov_iter_advance(i, copied); write_bytes -= copied; total_copied += copied; - - /* Return to btrfs_file_write_iter to fault page */ - if (unlikely(copied == 0)) - break; - - if (copied < PAGE_SIZE - offset) { - offset += copied; - } else { + offset += copied; + if (offset == PAGE_SIZE) { pg++; offset = 0; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 44bd301fa4fb..4722fa31a185 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1171,10 +1171,9 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia, if (mapping_writably_mapped(mapping)) flush_dcache_page(page); - tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes); + tmp = copy_page_from_iter_atomic(page, offset, bytes, ii); flush_dcache_page(page); - iov_iter_advance(ii, tmp); if (!tmp) { unlock_page(page); put_page(page); diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 354b41d20e5d..c5ff13e0e7cf 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -785,13 +785,15 @@ again: if (mapping_writably_mapped(inode->i_mapping)) flush_dcache_page(page); - copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); + copied = copy_page_from_iter_atomic(page, offset, bytes, i); status = iomap_write_end(inode, pos, bytes, copied, page, iomap, srcmap); - cond_resched(); + if (unlikely(copied != status)) + iov_iter_revert(i, copied - status); + cond_resched(); if (unlikely(status == 0)) { /* * A short copy made iomap_write_end() reject the @@ -803,11 +805,9 @@ again: bytes = copied; goto again; } - copied = status; - iov_iter_advance(i, copied); - pos += copied; - written += copied; - length -= copied; + pos += status; + written += status; + length -= status; balance_dirty_pages_ratelimited(inode->i_mapping); } while (iov_iter_count(i) && length); diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 0666d4578137..ab4f3362466d 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -1690,9 +1690,7 @@ static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages, len = PAGE_SIZE - ofs; if (len > bytes) len = bytes; - copied = iov_iter_copy_from_user_atomic(*pages, i, ofs, - len); - iov_iter_advance(i, copied); + copied = copy_page_from_iter_atomic(*pages, ofs, len, i); total += copied; bytes -= copied; if (!bytes) diff --git a/include/linux/uio.h b/include/linux/uio.h index b5cf54859109..82c3c3e819e0 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -115,8 +115,8 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter) }; } -size_t iov_iter_copy_from_user_atomic(struct page *page, - struct iov_iter *i, unsigned long offset, size_t bytes); +size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, + size_t bytes, struct iov_iter *i); void iov_iter_advance(struct iov_iter *i, size_t bytes); void iov_iter_revert(struct iov_iter *i, size_t bytes); int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 72c5bb794e8d..362e8b5a5dc5 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -114,28 +114,6 @@ n = wanted - n; \ } -#define iterate_all_kinds(i, n, v, I, B, K, X) { \ - if (likely(n)) { \ - size_t skip = i->iov_offset; \ - if (likely(iter_is_iovec(i))) { \ - const struct iovec *iov; \ - struct iovec v; \ - iterate_iovec(i, n, v, iov, skip, (I)) \ - } else if (iov_iter_is_bvec(i)) { \ - struct bio_vec v; \ - struct bvec_iter __bi; \ - iterate_bvec(i, n, v, __bi, skip, (B)) \ - } else if (iov_iter_is_kvec(i)) { \ - const struct kvec *kvec; \ - struct kvec v; \ - iterate_kvec(i, n, v, kvec, skip, (K)) \ - } else if (iov_iter_is_xarray(i)) { \ - struct bio_vec v; \ - iterate_xarray(i, n, v, skip, (X)); \ - } \ - } \ -} - #define iterate_and_advance(i, n, v, I, B, K, X) { \ if (unlikely(i->count < n)) \ n = i->count; \ @@ -1009,8 +987,8 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *i) } EXPORT_SYMBOL(iov_iter_zero); -size_t iov_iter_copy_from_user_atomic(struct page *page, - struct iov_iter *i, unsigned long offset, size_t bytes) +size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t bytes, + struct iov_iter *i) { char *kaddr = kmap_atomic(page), *p = kaddr + offset; if (unlikely(!page_copy_sane(page, offset, bytes))) { @@ -1022,7 +1000,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, WARN_ON(1); return 0; } - iterate_all_kinds(i, bytes, v, + iterate_and_advance(i, bytes, v, copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, v.bv_offset, v.bv_len), @@ -1033,7 +1011,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, kunmap_atomic(kaddr); return bytes; } -EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); +EXPORT_SYMBOL(copy_page_from_iter_atomic); static inline void pipe_truncate(struct iov_iter *i) { diff --git a/mm/filemap.c b/mm/filemap.c index 0be24942bf8e..cf9de790f493 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3661,14 +3661,16 @@ again: if (mapping_writably_mapped(mapping)) flush_dcache_page(page); - copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); + copied = copy_page_from_iter_atomic(page, offset, bytes, i); flush_dcache_page(page); status = a_ops->write_end(file, mapping, pos, bytes, copied, page, fsdata); - if (unlikely(status < 0)) - break; - + if (unlikely(status != copied)) { + iov_iter_revert(i, copied - max(status, 0L)); + if (unlikely(status < 0)) + break; + } cond_resched(); if (unlikely(status == 0)) { @@ -3682,10 +3684,8 @@ again: bytes = copied; goto again; } - copied = status; - iov_iter_advance(i, copied); - pos += copied; - written += copied; + pos += status; + written += status; balance_dirty_pages_ratelimited(mapping); } while (iov_iter_count(i)); -- cgit v1.2.3 From 594e450b3f4435a9d663df3d48d7fa34e685cbd1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 5 Jun 2021 10:19:30 -0400 Subject: csum_and_copy_to_iter(): massage into form closer to csum_and_copy_from_iter() Namely, have off counted starting from 0 rather than from csstate->off. To compensate we need to shift the initial value (csstate->sum) (rotate by 8 bits, as usual for csum) and do the same after we are finished adding the pieces up. What we get out of that is a bit more redundancy in our variables - from is always equal to addr + off, which will be useful several commits down the road. Signed-off-by: Al Viro --- include/net/checksum.h | 14 ++++++++------ lib/iov_iter.c | 8 ++++---- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/include/net/checksum.h b/include/net/checksum.h index 0d05b9e8690b..5b96d5bd6e54 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -80,16 +80,18 @@ static inline __sum16 csum16_sub(__sum16 csum, __be16 addend) return csum16_add(csum, ~addend); } -static inline __wsum -csum_block_add(__wsum csum, __wsum csum2, int offset) +static inline __wsum csum_shift(__wsum sum, int offset) { - u32 sum = (__force u32)csum2; - /* rotate sum to align it with a 16b boundary */ if (offset & 1) - sum = ror32(sum, 8); + return (__force __wsum)ror32((__force u32)sum, 8); + return sum; +} - return csum_add(csum, (__force __wsum)sum); +static inline __wsum +csum_block_add(__wsum csum, __wsum csum2, int offset) +{ + return csum_add(csum, csum_shift(csum2, offset)); } static inline __wsum diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 362e8b5a5dc5..93ae0c2c8d66 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1781,8 +1781,8 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, if (unlikely(iov_iter_is_pipe(i))) return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i); - sum = csstate->csum; - off = csstate->off; + sum = csum_shift(csstate->csum, csstate->off); + off = 0; if (unlikely(iov_iter_is_discard(i))) { WARN_ON(1); /* for now */ return 0; @@ -1817,8 +1817,8 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, off += v.bv_len; }) ) - csstate->csum = sum; - csstate->off = off; + csstate->csum = csum_shift(sum, csstate->off); + csstate->off += bytes; return bytes; } EXPORT_SYMBOL(csum_and_copy_to_iter); -- cgit v1.2.3 From f5da83545f4ed2c1a1648b7d760a6fc358798e52 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 28 Apr 2021 20:59:08 -0400 Subject: iterate_and_advance(): get rid of magic in case when n is 0 iov_iter_advance() needs to do some non-trivial work when it's given 0 as argument (skip all empty iovecs, mostly). We used to implement it via iterate_and_advance(); we no longer do so and for all other users of iterate_and_advance() zero length is a no-op. Signed-off-by: Al Viro --- lib/iov_iter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 93ae0c2c8d66..763114a754c5 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -117,7 +117,7 @@ #define iterate_and_advance(i, n, v, I, B, K, X) { \ if (unlikely(i->count < n)) \ n = i->count; \ - if (i->count) { \ + if (likely(n)) { \ size_t skip = i->iov_offset; \ if (likely(iter_is_iovec(i))) { \ const struct iovec *iov; \ -- cgit v1.2.3 From 7a1bcb5d255d4fd8b9725c3cf7ee0880a6369d2f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Apr 2021 23:46:09 -0400 Subject: iov_iter: massage iterate_iovec and iterate_kvec to logics similar to iterate_bvec Premature optimization is the root of all evil... Trying to unroll the first pass through the loop makes it harder to follow and not just for readers - compiler ends up generating worse code than it would on a "non-optimized" loop. Signed-off-by: Al Viro --- lib/iov_iter.c | 91 +++++++++++++++++++++++----------------------------------- 1 file changed, 36 insertions(+), 55 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 763114a754c5..2098059da64c 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -16,55 +16,44 @@ #define PIPE_PARANOIA /* for now */ -#define iterate_iovec(i, n, __v, __p, skip, STEP) { \ - size_t left; \ - size_t wanted = n; \ - __p = i->iov; \ - __v.iov_len = min(n, __p->iov_len - skip); \ - if (likely(__v.iov_len)) { \ - __v.iov_base = __p->iov_base + skip; \ - left = (STEP); \ - __v.iov_len -= left; \ - skip += __v.iov_len; \ - n -= __v.iov_len; \ - } else { \ - left = 0; \ - } \ - while (unlikely(!left && n)) { \ - __p++; \ - __v.iov_len = min(n, __p->iov_len); \ - if (unlikely(!__v.iov_len)) \ - continue; \ - __v.iov_base = __p->iov_base; \ - left = (STEP); \ - __v.iov_len -= left; \ - skip = __v.iov_len; \ - n -= __v.iov_len; \ - } \ - n = wanted - n; \ +#define iterate_iovec(i, n, __v, __p, skip, STEP) { \ + size_t left; \ + size_t wanted = n; \ + __p = i->iov; \ + do { \ + __v.iov_len = min(n, __p->iov_len - skip); \ + if (likely(__v.iov_len)) { \ + __v.iov_base = __p->iov_base + skip; \ + left = (STEP); \ + __v.iov_len -= left; \ + skip += __v.iov_len; \ + n -= __v.iov_len; \ + if (skip < __p->iov_len) \ + break; \ + } \ + __p++; \ + skip = 0; \ + } while (n); \ + n = wanted - n; \ } -#define iterate_kvec(i, n, __v, __p, skip, STEP) { \ - size_t wanted = n; \ - __p = i->kvec; \ - __v.iov_len = min(n, __p->iov_len - skip); \ - if (likely(__v.iov_len)) { \ - __v.iov_base = __p->iov_base + skip; \ - (void)(STEP); \ - skip += __v.iov_len; \ - n -= __v.iov_len; \ - } \ - while (unlikely(n)) { \ - __p++; \ - __v.iov_len = min(n, __p->iov_len); \ - if (unlikely(!__v.iov_len)) \ - continue; \ - __v.iov_base = __p->iov_base; \ - (void)(STEP); \ - skip = __v.iov_len; \ - n -= __v.iov_len; \ - } \ - n = wanted; \ +#define iterate_kvec(i, n, __v, __p, skip, STEP) { \ + size_t wanted = n; \ + __p = i->kvec; \ + do { \ + __v.iov_len = min(n, __p->iov_len - skip); \ + if (likely(__v.iov_len)) { \ + __v.iov_base = __p->iov_base + skip; \ + (void)(STEP); \ + skip += __v.iov_len; \ + n -= __v.iov_len; \ + if (skip < __p->iov_len) \ + break; \ + } \ + __p++; \ + skip = 0; \ + } while (n); \ + n = wanted - n; \ } #define iterate_bvec(i, n, __v, __bi, skip, STEP) { \ @@ -123,10 +112,6 @@ const struct iovec *iov; \ struct iovec v; \ iterate_iovec(i, n, v, iov, skip, (I)) \ - if (skip == iov->iov_len) { \ - iov++; \ - skip = 0; \ - } \ i->nr_segs -= iov - i->iov; \ i->iov = iov; \ } else if (iov_iter_is_bvec(i)) { \ @@ -141,10 +126,6 @@ const struct kvec *kvec; \ struct kvec v; \ iterate_kvec(i, n, v, kvec, skip, (K)) \ - if (skip == kvec->iov_len) { \ - kvec++; \ - skip = 0; \ - } \ i->nr_segs -= kvec - i->kvec; \ i->kvec = kvec; \ } else if (iov_iter_is_xarray(i)) { \ -- cgit v1.2.3 From 5c67aa90cd5c59912ee71cff879e8f1ab237ad88 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Apr 2021 23:57:42 -0400 Subject: iov_iter: unify iterate_iovec and iterate_kvec The differences between iterate_iovec and iterate_kvec are minor: * kvec callback is treated as if it returned 0 * initialization of __p is with i->iov and i->kvec resp. which is trivially dealt with. No code generation changes - compiler is quite capable of turning left = ((void)(STEP), 0); __v.iov_len -= left; (with no accesses to left downstream) and (void)(STEP); into the same code. Signed-off-by: Al Viro --- lib/iov_iter.c | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 2098059da64c..fc071d7b4528 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -16,10 +16,10 @@ #define PIPE_PARANOIA /* for now */ +/* covers iovec and kvec alike */ #define iterate_iovec(i, n, __v, __p, skip, STEP) { \ size_t left; \ size_t wanted = n; \ - __p = i->iov; \ do { \ __v.iov_len = min(n, __p->iov_len - skip); \ if (likely(__v.iov_len)) { \ @@ -37,25 +37,6 @@ n = wanted - n; \ } -#define iterate_kvec(i, n, __v, __p, skip, STEP) { \ - size_t wanted = n; \ - __p = i->kvec; \ - do { \ - __v.iov_len = min(n, __p->iov_len - skip); \ - if (likely(__v.iov_len)) { \ - __v.iov_base = __p->iov_base + skip; \ - (void)(STEP); \ - skip += __v.iov_len; \ - n -= __v.iov_len; \ - if (skip < __p->iov_len) \ - break; \ - } \ - __p++; \ - skip = 0; \ - } while (n); \ - n = wanted - n; \ -} - #define iterate_bvec(i, n, __v, __bi, skip, STEP) { \ struct bvec_iter __start; \ __start.bi_size = n; \ @@ -109,7 +90,7 @@ if (likely(n)) { \ size_t skip = i->iov_offset; \ if (likely(iter_is_iovec(i))) { \ - const struct iovec *iov; \ + const struct iovec *iov = i->iov; \ struct iovec v; \ iterate_iovec(i, n, v, iov, skip, (I)) \ i->nr_segs -= iov - i->iov; \ @@ -123,9 +104,10 @@ i->nr_segs -= i->bvec - bvec; \ skip = __bi.bi_bvec_done; \ } else if (iov_iter_is_kvec(i)) { \ - const struct kvec *kvec; \ + const struct kvec *kvec = i->kvec; \ struct kvec v; \ - iterate_kvec(i, n, v, kvec, skip, (K)) \ + iterate_iovec(i, n, v, kvec, skip, \ + ((void)(K),0)) \ i->nr_segs -= kvec - i->kvec; \ i->kvec = kvec; \ } else if (iov_iter_is_xarray(i)) { \ -- cgit v1.2.3 From 7491a2bf64e3a4f1699deba97728cd9f8856bdf3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 26 Apr 2021 20:19:14 -0400 Subject: iterate_bvec(): expand bvec.h macro forest, massage a bit ... incidentally, using pointer instead of index in an array (the only change here) trims half-kilobyte of .text... Signed-off-by: Al Viro --- lib/iov_iter.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index fc071d7b4528..58d53deb62b8 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -37,14 +37,23 @@ n = wanted - n; \ } -#define iterate_bvec(i, n, __v, __bi, skip, STEP) { \ - struct bvec_iter __start; \ - __start.bi_size = n; \ - __start.bi_bvec_done = skip; \ - __start.bi_idx = 0; \ - for_each_bvec(__v, i->bvec, __bi, __start) { \ - (void)(STEP); \ - } \ +#define iterate_bvec(i, n, __v, p, skip, STEP) { \ + size_t wanted = n; \ + while (n) { \ + unsigned offset = p->bv_offset + skip; \ + __v.bv_offset = offset % PAGE_SIZE; \ + __v.bv_page = p->bv_page + offset / PAGE_SIZE; \ + __v.bv_len = min(min(n, p->bv_len - skip), \ + (size_t)(PAGE_SIZE - offset % PAGE_SIZE)); \ + (void)(STEP); \ + skip += __v.bv_len; \ + if (skip == p->bv_len) { \ + skip = 0; \ + p++; \ + } \ + n -= __v.bv_len; \ + } \ + n = wanted - n; \ } #define iterate_xarray(i, n, __v, skip, STEP) { \ @@ -98,11 +107,9 @@ } else if (iov_iter_is_bvec(i)) { \ const struct bio_vec *bvec = i->bvec; \ struct bio_vec v; \ - struct bvec_iter __bi; \ - iterate_bvec(i, n, v, __bi, skip, (B)) \ - i->bvec = __bvec_iter_bvec(i->bvec, __bi); \ - i->nr_segs -= i->bvec - bvec; \ - skip = __bi.bi_bvec_done; \ + iterate_bvec(i, n, v, bvec, skip, (B)) \ + i->nr_segs -= bvec - i->bvec; \ + i->bvec = bvec; \ } else if (iov_iter_is_kvec(i)) { \ const struct kvec *kvec = i->kvec; \ struct kvec v; \ -- cgit v1.2.3 From 1b4fb5ffd79bac27a7b9beda63c827c7d7457c45 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 26 Apr 2021 20:33:42 -0400 Subject: iov_iter: teach iterate_{bvec,xarray}() about possible short copies ... and now we finally can sort out the mess in _copy_mc_to_iter(). Provide a variant of iterate_and_advance() that does *NOT* ignore the return values of bvec, xarray and kvec callbacks, use that in _copy_mc_to_iter(). That gets rid of magic in those callbacks - we used to need it so we'd get at least the right return value in case of failure halfway through. As a bonus, now iterator is advanced by the amount actually copied for all flavours. That's what the callers expect and it used to do that correctly in iovec and xarray cases. However, in kvec and bvec cases the iterator had not been advanced on such failures, breaking the users. Fixed now... Signed-off-by: Al Viro --- lib/iov_iter.c | 65 ++++++++++++++++++++++------------------------------------ 1 file changed, 24 insertions(+), 41 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 58d53deb62b8..ac7682734df2 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -41,22 +41,27 @@ size_t wanted = n; \ while (n) { \ unsigned offset = p->bv_offset + skip; \ + unsigned left; \ __v.bv_offset = offset % PAGE_SIZE; \ __v.bv_page = p->bv_page + offset / PAGE_SIZE; \ __v.bv_len = min(min(n, p->bv_len - skip), \ (size_t)(PAGE_SIZE - offset % PAGE_SIZE)); \ - (void)(STEP); \ + left = (STEP); \ + __v.bv_len -= left; \ skip += __v.bv_len; \ if (skip == p->bv_len) { \ skip = 0; \ p++; \ } \ n -= __v.bv_len; \ + if (left) \ + break; \ } \ n = wanted - n; \ } #define iterate_xarray(i, n, __v, skip, STEP) { \ + __label__ __out; \ struct page *head = NULL; \ size_t wanted = n, seg, offset; \ loff_t start = i->xarray_start + skip; \ @@ -67,6 +72,7 @@ \ rcu_read_lock(); \ xas_for_each(&xas, head, ULONG_MAX) { \ + unsigned left; \ if (xas_retry(&xas, head)) \ continue; \ if (WARN_ON(xa_is_value(head))) \ @@ -80,20 +86,20 @@ seg = PAGE_SIZE - offset; \ __v.bv_offset = offset; \ __v.bv_len = min(n, seg); \ - (void)(STEP); \ + left = (STEP); \ + __v.bv_len -= left; \ n -= __v.bv_len; \ skip += __v.bv_len; \ - if (n == 0) \ - break; \ + if (left || n == 0) \ + goto __out; \ } \ - if (n == 0) \ - break; \ } \ +__out: \ rcu_read_unlock(); \ n = wanted - n; \ } -#define iterate_and_advance(i, n, v, I, B, K, X) { \ +#define __iterate_and_advance(i, n, v, I, B, K, X) { \ if (unlikely(i->count < n)) \ n = i->count; \ if (likely(n)) { \ @@ -113,8 +119,7 @@ } else if (iov_iter_is_kvec(i)) { \ const struct kvec *kvec = i->kvec; \ struct kvec v; \ - iterate_iovec(i, n, v, kvec, skip, \ - ((void)(K),0)) \ + iterate_iovec(i, n, v, kvec, skip, (K)) \ i->nr_segs -= kvec - i->kvec; \ i->kvec = kvec; \ } else if (iov_iter_is_xarray(i)) { \ @@ -125,6 +130,9 @@ i->iov_offset = skip; \ } \ } +#define iterate_and_advance(i, n, v, I, B, K, X) \ + __iterate_and_advance(i, n, v, I, ((void)(B),0), \ + ((void)(K),0), ((void)(X),0)) static int copyout(void __user *to, const void *from, size_t n) { @@ -698,45 +706,20 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { const char *from = addr; - unsigned long rem, curr_addr, s_addr = (unsigned long) addr; if (unlikely(iov_iter_is_pipe(i))) return copy_mc_pipe_to_iter(addr, bytes, i); if (iter_is_iovec(i)) might_fault(); - iterate_and_advance(i, bytes, v, + __iterate_and_advance(i, bytes, v, copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), - ({ - rem = copy_mc_to_page(v.bv_page, v.bv_offset, - (from += v.bv_len) - v.bv_len, v.bv_len); - if (rem) { - curr_addr = (unsigned long) from; - bytes = curr_addr - s_addr - rem; - return bytes; - } - }), - ({ - rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len) - - v.iov_len, v.iov_len); - if (rem) { - curr_addr = (unsigned long) from; - bytes = curr_addr - s_addr - rem; - return bytes; - } - }), - ({ - rem = copy_mc_to_page(v.bv_page, v.bv_offset, - (from += v.bv_len) - v.bv_len, v.bv_len); - if (rem) { - curr_addr = (unsigned long) from; - bytes = curr_addr - s_addr - rem; - rcu_read_unlock(); - i->iov_offset += bytes; - i->count -= bytes; - return bytes; - } - }) + copy_mc_to_page(v.bv_page, v.bv_offset, + (from += v.bv_len) - v.bv_len, v.bv_len), + copy_mc_to_kernel(v.iov_base, (from += v.iov_len) + - v.iov_len, v.iov_len), + copy_mc_to_page(v.bv_page, v.bv_offset, + (from += v.bv_len) - v.bv_len, v.bv_len) ) return bytes; -- cgit v1.2.3 From 21b56c84775351ac66354c9b09fb429e5cdeceac Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 26 Apr 2021 20:50:05 -0400 Subject: iov_iter: get rid of separate bvec and xarray callbacks After the previous commit we have * xarray and bvec callbacks idential in all cases * both equivalent to kvec callback wrapped into kmap_local_page()/kunmap_local() pair. So we can pass only two (iovec and kvec) callbacks to iterate_and_advance() and let iterate_{bvec,xarray} wrap it into kmap_local_page()/kunmap_local_page(). Signed-off-by: Al Viro --- lib/iov_iter.c | 112 ++++++++++++++++----------------------------------------- 1 file changed, 30 insertions(+), 82 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index ac7682734df2..74e20d6d6e3d 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -42,18 +42,20 @@ while (n) { \ unsigned offset = p->bv_offset + skip; \ unsigned left; \ - __v.bv_offset = offset % PAGE_SIZE; \ - __v.bv_page = p->bv_page + offset / PAGE_SIZE; \ - __v.bv_len = min(min(n, p->bv_len - skip), \ + void *kaddr = kmap_local_page(p->bv_page + \ + offset / PAGE_SIZE); \ + __v.iov_base = kaddr + offset % PAGE_SIZE; \ + __v.iov_len = min(min(n, p->bv_len - skip), \ (size_t)(PAGE_SIZE - offset % PAGE_SIZE)); \ left = (STEP); \ - __v.bv_len -= left; \ - skip += __v.bv_len; \ + kunmap_local(kaddr); \ + __v.iov_len -= left; \ + skip += __v.iov_len; \ if (skip == p->bv_len) { \ skip = 0; \ p++; \ } \ - n -= __v.bv_len; \ + n -= __v.iov_len; \ if (left) \ break; \ } \ @@ -81,15 +83,16 @@ break; \ for (j = (head->index < index) ? index - head->index : 0; \ j < thp_nr_pages(head); j++) { \ - __v.bv_page = head + j; \ - offset = (i->xarray_start + skip) & ~PAGE_MASK; \ + void *kaddr = kmap_local_page(head + j); \ + offset = (i->xarray_start + skip) % PAGE_SIZE; \ + __v.iov_base = kaddr + offset; \ seg = PAGE_SIZE - offset; \ - __v.bv_offset = offset; \ - __v.bv_len = min(n, seg); \ + __v.iov_len = min(n, seg); \ left = (STEP); \ - __v.bv_len -= left; \ - n -= __v.bv_len; \ - skip += __v.bv_len; \ + kunmap_local(kaddr); \ + __v.iov_len -= left; \ + n -= __v.iov_len; \ + skip += __v.iov_len; \ if (left || n == 0) \ goto __out; \ } \ @@ -99,7 +102,7 @@ __out: \ n = wanted - n; \ } -#define __iterate_and_advance(i, n, v, I, B, K, X) { \ +#define __iterate_and_advance(i, n, v, I, K) { \ if (unlikely(i->count < n)) \ n = i->count; \ if (likely(n)) { \ @@ -112,8 +115,8 @@ __out: \ i->iov = iov; \ } else if (iov_iter_is_bvec(i)) { \ const struct bio_vec *bvec = i->bvec; \ - struct bio_vec v; \ - iterate_bvec(i, n, v, bvec, skip, (B)) \ + struct kvec v; \ + iterate_bvec(i, n, v, bvec, skip, (K)) \ i->nr_segs -= bvec - i->bvec; \ i->bvec = bvec; \ } else if (iov_iter_is_kvec(i)) { \ @@ -123,16 +126,15 @@ __out: \ i->nr_segs -= kvec - i->kvec; \ i->kvec = kvec; \ } else if (iov_iter_is_xarray(i)) { \ - struct bio_vec v; \ - iterate_xarray(i, n, v, skip, (X)) \ + struct kvec v; \ + iterate_xarray(i, n, v, skip, (K)) \ } \ i->count -= n; \ i->iov_offset = skip; \ } \ } -#define iterate_and_advance(i, n, v, I, B, K, X) \ - __iterate_and_advance(i, n, v, I, ((void)(B),0), \ - ((void)(K),0), ((void)(X),0)) +#define iterate_and_advance(i, n, v, I, K) \ + __iterate_and_advance(i, n, v, I, ((void)(K),0)) static int copyout(void __user *to, const void *from, size_t n) { @@ -612,11 +614,7 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) might_fault(); iterate_and_advance(i, bytes, v, copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), - memcpy_to_page(v.bv_page, v.bv_offset, - (from += v.bv_len) - v.bv_len, v.bv_len), - memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), - memcpy_to_page(v.bv_page, v.bv_offset, - (from += v.bv_len) - v.bv_len, v.bv_len) + memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len) ) return bytes; @@ -714,12 +712,8 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) __iterate_and_advance(i, bytes, v, copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), - copy_mc_to_page(v.bv_page, v.bv_offset, - (from += v.bv_len) - v.bv_len, v.bv_len), copy_mc_to_kernel(v.iov_base, (from += v.iov_len) - - v.iov_len, v.iov_len), - copy_mc_to_page(v.bv_page, v.bv_offset, - (from += v.bv_len) - v.bv_len, v.bv_len) + - v.iov_len, v.iov_len) ) return bytes; @@ -738,11 +732,7 @@ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) might_fault(); iterate_and_advance(i, bytes, v, copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), - memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, - v.bv_offset, v.bv_len), - memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), - memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, - v.bv_offset, v.bv_len) + memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) ) return bytes; @@ -759,11 +749,7 @@ size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) iterate_and_advance(i, bytes, v, __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), - memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, - v.bv_offset, v.bv_len), - memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), - memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, - v.bv_offset, v.bv_len) + memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) ) return bytes; @@ -795,12 +781,8 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) iterate_and_advance(i, bytes, v, __copy_from_user_flushcache((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), - memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page, - v.bv_offset, v.bv_len), memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base, - v.iov_len), - memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page, - v.bv_offset, v.bv_len) + v.iov_len) ) return bytes; @@ -931,9 +913,7 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *i) return pipe_zero(bytes, i); iterate_and_advance(i, bytes, v, clear_user(v.iov_base, v.iov_len), - memzero_page(v.bv_page, v.bv_offset, v.bv_len), - memset(v.iov_base, 0, v.iov_len), - memzero_page(v.bv_page, v.bv_offset, v.bv_len) + memset(v.iov_base, 0, v.iov_len) ) return bytes; @@ -955,11 +935,7 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t byt } iterate_and_advance(i, bytes, v, copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), - memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, - v.bv_offset, v.bv_len), - memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), - memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, - v.bv_offset, v.bv_len) + memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) ) kunmap_atomic(kaddr); return bytes; @@ -1698,24 +1674,10 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, } next ? 0 : v.iov_len; }), ({ - char *p = kmap_atomic(v.bv_page); - sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, - p + v.bv_offset, v.bv_len, - sum, off); - kunmap_atomic(p); - off += v.bv_len; - }),({ sum = csum_and_memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len, sum, off); off += v.iov_len; - }), ({ - char *p = kmap_atomic(v.bv_page); - sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, - p + v.bv_offset, v.bv_len, - sum, off); - kunmap_atomic(p); - off += v.bv_len; }) ) *csum = sum; @@ -1750,24 +1712,10 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, } next ? 0 : v.iov_len; }), ({ - char *p = kmap_atomic(v.bv_page); - sum = csum_and_memcpy(p + v.bv_offset, - (from += v.bv_len) - v.bv_len, - v.bv_len, sum, off); - kunmap_atomic(p); - off += v.bv_len; - }),({ sum = csum_and_memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len, sum, off); off += v.iov_len; - }), ({ - char *p = kmap_atomic(v.bv_page); - sum = csum_and_memcpy(p + v.bv_offset, - (from += v.bv_len) - v.bv_len, - v.bv_len, sum, off); - kunmap_atomic(p); - off += v.bv_len; }) ) csstate->csum = csum_shift(sum, csstate->off); -- cgit v1.2.3 From 622838f3fde2c3671a718dc6196c19087ebe9b11 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 2 May 2021 11:13:09 -0400 Subject: iov_iter: make the amount already copied available to iterator callbacks Making iterator macros keep track of the amount of data copied is pretty easy and it has several benefits: 1) we no longer need the mess like (from += v.iov_len) - v.iov_len in the callbacks - initial value + total amount copied so far would do just fine. 2) less obviously, we no longer need to remember the initial amount of data we wanted to copy; the loops in iterator macros are along the lines of wanted = bytes; while (bytes) { copy some bytes -= copied if short copy break } bytes = wanted - bytes; Replacement is offs = 0; while (bytes) { copy some offs += copied bytes -= copied if short copy break } bytes = offs; That wouldn't be a win per se, but unlike the initial value of bytes, the amount copied so far *is* useful in callbacks. 3) in some cases (csum_and_copy_..._iter()) we already had offs manually maintained by the callbacks. With that change we can drop that. Less boilerplate and more readable code... Signed-off-by: Al Viro --- lib/iov_iter.c | 120 ++++++++++++++++++++++++--------------------------------- 1 file changed, 50 insertions(+), 70 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 74e20d6d6e3d..0e04abe9ec49 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -17,15 +17,14 @@ #define PIPE_PARANOIA /* for now */ /* covers iovec and kvec alike */ -#define iterate_iovec(i, n, __v, __p, skip, STEP) { \ - size_t left; \ - size_t wanted = n; \ +#define iterate_iovec(i, n, __v, __off, __p, skip, STEP) { \ + size_t __off = 0; \ do { \ __v.iov_len = min(n, __p->iov_len - skip); \ if (likely(__v.iov_len)) { \ __v.iov_base = __p->iov_base + skip; \ - left = (STEP); \ - __v.iov_len -= left; \ + __v.iov_len -= (STEP); \ + __off += __v.iov_len; \ skip += __v.iov_len; \ n -= __v.iov_len; \ if (skip < __p->iov_len) \ @@ -34,11 +33,11 @@ __p++; \ skip = 0; \ } while (n); \ - n = wanted - n; \ + n = __off; \ } -#define iterate_bvec(i, n, __v, p, skip, STEP) { \ - size_t wanted = n; \ +#define iterate_bvec(i, n, __v, __off, p, skip, STEP) { \ + size_t __off = 0; \ while (n) { \ unsigned offset = p->bv_offset + skip; \ unsigned left; \ @@ -50,6 +49,7 @@ left = (STEP); \ kunmap_local(kaddr); \ __v.iov_len -= left; \ + __off += __v.iov_len; \ skip += __v.iov_len; \ if (skip == p->bv_len) { \ skip = 0; \ @@ -59,13 +59,14 @@ if (left) \ break; \ } \ - n = wanted - n; \ + n = __off; \ } -#define iterate_xarray(i, n, __v, skip, STEP) { \ +#define iterate_xarray(i, n, __v, __off, skip, STEP) { \ __label__ __out; \ + size_t __off = 0; \ struct page *head = NULL; \ - size_t wanted = n, seg, offset; \ + size_t seg, offset; \ loff_t start = i->xarray_start + skip; \ pgoff_t index = start >> PAGE_SHIFT; \ int j; \ @@ -84,25 +85,26 @@ for (j = (head->index < index) ? index - head->index : 0; \ j < thp_nr_pages(head); j++) { \ void *kaddr = kmap_local_page(head + j); \ - offset = (i->xarray_start + skip) % PAGE_SIZE; \ + offset = (start + __off) % PAGE_SIZE; \ __v.iov_base = kaddr + offset; \ seg = PAGE_SIZE - offset; \ __v.iov_len = min(n, seg); \ left = (STEP); \ kunmap_local(kaddr); \ __v.iov_len -= left; \ + __off += __v.iov_len; \ n -= __v.iov_len; \ - skip += __v.iov_len; \ if (left || n == 0) \ goto __out; \ } \ } \ __out: \ rcu_read_unlock(); \ - n = wanted - n; \ + skip += __off; \ + n = __off; \ } -#define __iterate_and_advance(i, n, v, I, K) { \ +#define __iterate_and_advance(i, n, v, off, I, K) { \ if (unlikely(i->count < n)) \ n = i->count; \ if (likely(n)) { \ @@ -110,31 +112,31 @@ __out: \ if (likely(iter_is_iovec(i))) { \ const struct iovec *iov = i->iov; \ struct iovec v; \ - iterate_iovec(i, n, v, iov, skip, (I)) \ + iterate_iovec(i, n, v, off, iov, skip, (I)) \ i->nr_segs -= iov - i->iov; \ i->iov = iov; \ } else if (iov_iter_is_bvec(i)) { \ const struct bio_vec *bvec = i->bvec; \ struct kvec v; \ - iterate_bvec(i, n, v, bvec, skip, (K)) \ + iterate_bvec(i, n, v, off, bvec, skip, (K)) \ i->nr_segs -= bvec - i->bvec; \ i->bvec = bvec; \ } else if (iov_iter_is_kvec(i)) { \ const struct kvec *kvec = i->kvec; \ struct kvec v; \ - iterate_iovec(i, n, v, kvec, skip, (K)) \ + iterate_iovec(i, n, v, off, kvec, skip, (K)) \ i->nr_segs -= kvec - i->kvec; \ i->kvec = kvec; \ } else if (iov_iter_is_xarray(i)) { \ struct kvec v; \ - iterate_xarray(i, n, v, skip, (K)) \ + iterate_xarray(i, n, v, off, skip, (K)) \ } \ i->count -= n; \ i->iov_offset = skip; \ } \ } -#define iterate_and_advance(i, n, v, I, K) \ - __iterate_and_advance(i, n, v, I, ((void)(K),0)) +#define iterate_and_advance(i, n, v, off, I, K) \ + __iterate_and_advance(i, n, v, off, I, ((void)(K),0)) static int copyout(void __user *to, const void *from, size_t n) { @@ -607,14 +609,13 @@ static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { - const char *from = addr; if (unlikely(iov_iter_is_pipe(i))) return copy_pipe_to_iter(addr, bytes, i); if (iter_is_iovec(i)) might_fault(); - iterate_and_advance(i, bytes, v, - copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), - memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len) + iterate_and_advance(i, bytes, v, off, + copyout(v.iov_base, addr + off, v.iov_len), + memcpy(v.iov_base, addr + off, v.iov_len) ) return bytes; @@ -703,17 +704,13 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, */ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { - const char *from = addr; - if (unlikely(iov_iter_is_pipe(i))) return copy_mc_pipe_to_iter(addr, bytes, i); if (iter_is_iovec(i)) might_fault(); - __iterate_and_advance(i, bytes, v, - copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len, - v.iov_len), - copy_mc_to_kernel(v.iov_base, (from += v.iov_len) - - v.iov_len, v.iov_len) + __iterate_and_advance(i, bytes, v, off, + copyout_mc(v.iov_base, addr + off, v.iov_len), + copy_mc_to_kernel(v.iov_base, addr + off, v.iov_len) ) return bytes; @@ -723,16 +720,15 @@ EXPORT_SYMBOL_GPL(_copy_mc_to_iter); size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) { - char *to = addr; if (unlikely(iov_iter_is_pipe(i))) { WARN_ON(1); return 0; } if (iter_is_iovec(i)) might_fault(); - iterate_and_advance(i, bytes, v, - copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), - memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) + iterate_and_advance(i, bytes, v, off, + copyin(addr + off, v.iov_base, v.iov_len), + memcpy(addr + off, v.iov_base, v.iov_len) ) return bytes; @@ -741,15 +737,14 @@ EXPORT_SYMBOL(_copy_from_iter); size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) { - char *to = addr; if (unlikely(iov_iter_is_pipe(i))) { WARN_ON(1); return 0; } - iterate_and_advance(i, bytes, v, - __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len, + iterate_and_advance(i, bytes, v, off, + __copy_from_user_inatomic_nocache(addr + off, v.iov_base, v.iov_len), - memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) + memcpy(addr + off, v.iov_base, v.iov_len) ) return bytes; @@ -773,16 +768,13 @@ EXPORT_SYMBOL(_copy_from_iter_nocache); */ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) { - char *to = addr; if (unlikely(iov_iter_is_pipe(i))) { WARN_ON(1); return 0; } - iterate_and_advance(i, bytes, v, - __copy_from_user_flushcache((to += v.iov_len) - v.iov_len, - v.iov_base, v.iov_len), - memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base, - v.iov_len) + iterate_and_advance(i, bytes, v, off, + __copy_from_user_flushcache(addr + off, v.iov_base, v.iov_len), + memcpy_flushcache(addr + off, v.iov_base, v.iov_len) ) return bytes; @@ -911,7 +903,7 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *i) { if (unlikely(iov_iter_is_pipe(i))) return pipe_zero(bytes, i); - iterate_and_advance(i, bytes, v, + iterate_and_advance(i, bytes, v, count, clear_user(v.iov_base, v.iov_len), memset(v.iov_base, 0, v.iov_len) ) @@ -933,9 +925,9 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t byt WARN_ON(1); return 0; } - iterate_and_advance(i, bytes, v, - copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), - memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) + iterate_and_advance(i, bytes, v, off, + copyin(p + off, v.iov_base, v.iov_len), + memcpy(p + off, v.iov_base, v.iov_len) ) kunmap_atomic(kaddr); return bytes; @@ -1656,28 +1648,22 @@ EXPORT_SYMBOL(iov_iter_get_pages_alloc); size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i) { - char *to = addr; __wsum sum, next; - size_t off = 0; sum = *csum; if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { WARN_ON(1); return 0; } - iterate_and_advance(i, bytes, v, ({ + iterate_and_advance(i, bytes, v, off, ({ next = csum_and_copy_from_user(v.iov_base, - (to += v.iov_len) - v.iov_len, + addr + off, v.iov_len); - if (next) { + if (next) sum = csum_block_add(sum, next, off); - off += v.iov_len; - } next ? 0 : v.iov_len; }), ({ - sum = csum_and_memcpy((to += v.iov_len) - v.iov_len, - v.iov_base, v.iov_len, + sum = csum_and_memcpy(addr + off, v.iov_base, v.iov_len, sum, off); - off += v.iov_len; }) ) *csum = sum; @@ -1689,33 +1675,27 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, struct iov_iter *i) { struct csum_state *csstate = _csstate; - const char *from = addr; __wsum sum, next; - size_t off; if (unlikely(iov_iter_is_pipe(i))) return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i); sum = csum_shift(csstate->csum, csstate->off); - off = 0; if (unlikely(iov_iter_is_discard(i))) { WARN_ON(1); /* for now */ return 0; } - iterate_and_advance(i, bytes, v, ({ - next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, + iterate_and_advance(i, bytes, v, off, ({ + next = csum_and_copy_to_user(addr + off, v.iov_base, v.iov_len); - if (next) { + if (next) sum = csum_block_add(sum, next, off); - off += v.iov_len; - } next ? 0 : v.iov_len; }), ({ sum = csum_and_memcpy(v.iov_base, - (from += v.iov_len) - v.iov_len, + addr + off, v.iov_len, sum, off); - off += v.iov_len; }) ) csstate->csum = csum_shift(sum, csstate->off); -- cgit v1.2.3 From 7baa5099002f2f2ea6c026890598ed1708e7cfd4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 2 May 2021 11:35:03 -0400 Subject: iov_iter: make iterator callbacks use base and len instead of iovec Iterator macros used to provide the arguments for step callbacks in a structure matching the flavour - iovec for ITER_IOVEC, kvec for ITER_KVEC and bio_vec for ITER_BVEC. That already broke down for ITER_XARRAY (bio_vec there); now that we are using kvec callback for bvec and xarray cases, we are always passing a pointer + length (void __user * + size_t for ITER_IOVEC callback, void * + size_t for everything else). Note that the original reason for bio_vec (page + offset + len) in case of ITER_BVEC used to be that we did *not* want to kmap a page when all we wanted was e.g. to find the alignment of its subrange. Now all such users are gone and the ones that are left want the page mapped anyway for actually copying the data. So in all cases we have pointer + length, and there's no good reason for keeping those in struct iovec or struct kvec - we can just pass them to callback separately. Again, less boilerplate in callbacks... Signed-off-by: Al Viro --- lib/iov_iter.c | 182 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 91 insertions(+), 91 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 0e04abe9ec49..f4ea04e24e06 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -17,86 +17,86 @@ #define PIPE_PARANOIA /* for now */ /* covers iovec and kvec alike */ -#define iterate_iovec(i, n, __v, __off, __p, skip, STEP) { \ - size_t __off = 0; \ +#define iterate_iovec(i, n, base, len, off, __p, skip, STEP) { \ + size_t off = 0; \ do { \ - __v.iov_len = min(n, __p->iov_len - skip); \ - if (likely(__v.iov_len)) { \ - __v.iov_base = __p->iov_base + skip; \ - __v.iov_len -= (STEP); \ - __off += __v.iov_len; \ - skip += __v.iov_len; \ - n -= __v.iov_len; \ + len = min(n, __p->iov_len - skip); \ + if (likely(len)) { \ + base = __p->iov_base + skip; \ + len -= (STEP); \ + off += len; \ + skip += len; \ + n -= len; \ if (skip < __p->iov_len) \ break; \ } \ __p++; \ skip = 0; \ } while (n); \ - n = __off; \ + n = off; \ } -#define iterate_bvec(i, n, __v, __off, p, skip, STEP) { \ - size_t __off = 0; \ +#define iterate_bvec(i, n, base, len, off, p, skip, STEP) { \ + size_t off = 0; \ while (n) { \ unsigned offset = p->bv_offset + skip; \ unsigned left; \ void *kaddr = kmap_local_page(p->bv_page + \ offset / PAGE_SIZE); \ - __v.iov_base = kaddr + offset % PAGE_SIZE; \ - __v.iov_len = min(min(n, p->bv_len - skip), \ + base = kaddr + offset % PAGE_SIZE; \ + len = min(min(n, p->bv_len - skip), \ (size_t)(PAGE_SIZE - offset % PAGE_SIZE)); \ left = (STEP); \ kunmap_local(kaddr); \ - __v.iov_len -= left; \ - __off += __v.iov_len; \ - skip += __v.iov_len; \ + len -= left; \ + off += len; \ + skip += len; \ if (skip == p->bv_len) { \ skip = 0; \ p++; \ } \ - n -= __v.iov_len; \ + n -= len; \ if (left) \ break; \ } \ - n = __off; \ + n = off; \ } -#define iterate_xarray(i, n, __v, __off, skip, STEP) { \ +#define iterate_xarray(i, n, base, len, __off, skip, STEP) { \ __label__ __out; \ size_t __off = 0; \ struct page *head = NULL; \ - size_t seg, offset; \ + size_t offset; \ loff_t start = i->xarray_start + skip; \ pgoff_t index = start >> PAGE_SHIFT; \ int j; \ \ XA_STATE(xas, i->xarray, index); \ \ - rcu_read_lock(); \ - xas_for_each(&xas, head, ULONG_MAX) { \ - unsigned left; \ - if (xas_retry(&xas, head)) \ - continue; \ - if (WARN_ON(xa_is_value(head))) \ - break; \ - if (WARN_ON(PageHuge(head))) \ - break; \ + rcu_read_lock(); \ + xas_for_each(&xas, head, ULONG_MAX) { \ + unsigned left; \ + if (xas_retry(&xas, head)) \ + continue; \ + if (WARN_ON(xa_is_value(head))) \ + break; \ + if (WARN_ON(PageHuge(head))) \ + break; \ for (j = (head->index < index) ? index - head->index : 0; \ - j < thp_nr_pages(head); j++) { \ + j < thp_nr_pages(head); j++) { \ void *kaddr = kmap_local_page(head + j); \ - offset = (start + __off) % PAGE_SIZE; \ - __v.iov_base = kaddr + offset; \ - seg = PAGE_SIZE - offset; \ - __v.iov_len = min(n, seg); \ - left = (STEP); \ - kunmap_local(kaddr); \ - __v.iov_len -= left; \ - __off += __v.iov_len; \ - n -= __v.iov_len; \ - if (left || n == 0) \ - goto __out; \ - } \ + offset = (start + __off) % PAGE_SIZE; \ + base = kaddr + offset; \ + len = PAGE_SIZE - offset; \ + len = min(n, len); \ + left = (STEP); \ + kunmap_local(kaddr); \ + len -= left; \ + __off += len; \ + n -= len; \ + if (left || n == 0) \ + goto __out; \ + } \ } \ __out: \ rcu_read_unlock(); \ @@ -104,39 +104,47 @@ __out: \ n = __off; \ } -#define __iterate_and_advance(i, n, v, off, I, K) { \ +#define __iterate_and_advance(i, n, base, len, off, I, K) { \ if (unlikely(i->count < n)) \ n = i->count; \ if (likely(n)) { \ size_t skip = i->iov_offset; \ if (likely(iter_is_iovec(i))) { \ const struct iovec *iov = i->iov; \ - struct iovec v; \ - iterate_iovec(i, n, v, off, iov, skip, (I)) \ + void __user *base; \ + size_t len; \ + iterate_iovec(i, n, base, len, off, \ + iov, skip, (I)) \ i->nr_segs -= iov - i->iov; \ i->iov = iov; \ } else if (iov_iter_is_bvec(i)) { \ const struct bio_vec *bvec = i->bvec; \ - struct kvec v; \ - iterate_bvec(i, n, v, off, bvec, skip, (K)) \ + void *base; \ + size_t len; \ + iterate_bvec(i, n, base, len, off, \ + bvec, skip, (K)) \ i->nr_segs -= bvec - i->bvec; \ i->bvec = bvec; \ } else if (iov_iter_is_kvec(i)) { \ const struct kvec *kvec = i->kvec; \ - struct kvec v; \ - iterate_iovec(i, n, v, off, kvec, skip, (K)) \ + void *base; \ + size_t len; \ + iterate_iovec(i, n, base, len, off, \ + kvec, skip, (K)) \ i->nr_segs -= kvec - i->kvec; \ i->kvec = kvec; \ } else if (iov_iter_is_xarray(i)) { \ - struct kvec v; \ - iterate_xarray(i, n, v, off, skip, (K)) \ + void *base; \ + size_t len; \ + iterate_xarray(i, n, base, len, off, \ + skip, (K)) \ } \ i->count -= n; \ i->iov_offset = skip; \ } \ } -#define iterate_and_advance(i, n, v, off, I, K) \ - __iterate_and_advance(i, n, v, off, I, ((void)(K),0)) +#define iterate_and_advance(i, n, base, len, off, I, K) \ + __iterate_and_advance(i, n, base, len, off, I, ((void)(K),0)) static int copyout(void __user *to, const void *from, size_t n) { @@ -613,9 +621,9 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) return copy_pipe_to_iter(addr, bytes, i); if (iter_is_iovec(i)) might_fault(); - iterate_and_advance(i, bytes, v, off, - copyout(v.iov_base, addr + off, v.iov_len), - memcpy(v.iov_base, addr + off, v.iov_len) + iterate_and_advance(i, bytes, base, len, off, + copyout(base, addr + off, len), + memcpy(base, addr + off, len) ) return bytes; @@ -708,9 +716,9 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) return copy_mc_pipe_to_iter(addr, bytes, i); if (iter_is_iovec(i)) might_fault(); - __iterate_and_advance(i, bytes, v, off, - copyout_mc(v.iov_base, addr + off, v.iov_len), - copy_mc_to_kernel(v.iov_base, addr + off, v.iov_len) + __iterate_and_advance(i, bytes, base, len, off, + copyout_mc(base, addr + off, len), + copy_mc_to_kernel(base, addr + off, len) ) return bytes; @@ -726,9 +734,9 @@ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) } if (iter_is_iovec(i)) might_fault(); - iterate_and_advance(i, bytes, v, off, - copyin(addr + off, v.iov_base, v.iov_len), - memcpy(addr + off, v.iov_base, v.iov_len) + iterate_and_advance(i, bytes, base, len, off, + copyin(addr + off, base, len), + memcpy(addr + off, base, len) ) return bytes; @@ -741,10 +749,9 @@ size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) WARN_ON(1); return 0; } - iterate_and_advance(i, bytes, v, off, - __copy_from_user_inatomic_nocache(addr + off, - v.iov_base, v.iov_len), - memcpy(addr + off, v.iov_base, v.iov_len) + iterate_and_advance(i, bytes, base, len, off, + __copy_from_user_inatomic_nocache(addr + off, base, len), + memcpy(addr + off, base, len) ) return bytes; @@ -772,9 +779,9 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) WARN_ON(1); return 0; } - iterate_and_advance(i, bytes, v, off, - __copy_from_user_flushcache(addr + off, v.iov_base, v.iov_len), - memcpy_flushcache(addr + off, v.iov_base, v.iov_len) + iterate_and_advance(i, bytes, base, len, off, + __copy_from_user_flushcache(addr + off, base, len), + memcpy_flushcache(addr + off, base, len) ) return bytes; @@ -903,9 +910,9 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *i) { if (unlikely(iov_iter_is_pipe(i))) return pipe_zero(bytes, i); - iterate_and_advance(i, bytes, v, count, - clear_user(v.iov_base, v.iov_len), - memset(v.iov_base, 0, v.iov_len) + iterate_and_advance(i, bytes, base, len, count, + clear_user(base, len), + memset(base, 0, len) ) return bytes; @@ -925,9 +932,9 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t byt WARN_ON(1); return 0; } - iterate_and_advance(i, bytes, v, off, - copyin(p + off, v.iov_base, v.iov_len), - memcpy(p + off, v.iov_base, v.iov_len) + iterate_and_advance(i, bytes, base, len, off, + copyin(p + off, base, len), + memcpy(p + off, base, len) ) kunmap_atomic(kaddr); return bytes; @@ -1654,16 +1661,13 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, WARN_ON(1); return 0; } - iterate_and_advance(i, bytes, v, off, ({ - next = csum_and_copy_from_user(v.iov_base, - addr + off, - v.iov_len); + iterate_and_advance(i, bytes, base, len, off, ({ + next = csum_and_copy_from_user(base, addr + off, len); if (next) sum = csum_block_add(sum, next, off); - next ? 0 : v.iov_len; + next ? 0 : len; }), ({ - sum = csum_and_memcpy(addr + off, v.iov_base, v.iov_len, - sum, off); + sum = csum_and_memcpy(addr + off, base, len, sum, off); }) ) *csum = sum; @@ -1685,17 +1689,13 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, WARN_ON(1); /* for now */ return 0; } - iterate_and_advance(i, bytes, v, off, ({ - next = csum_and_copy_to_user(addr + off, - v.iov_base, - v.iov_len); + iterate_and_advance(i, bytes, base, len, off, ({ + next = csum_and_copy_to_user(addr + off, base, len); if (next) sum = csum_block_add(sum, next, off); - next ? 0 : v.iov_len; + next ? 0 : len; }), ({ - sum = csum_and_memcpy(v.iov_base, - addr + off, - v.iov_len, sum, off); + sum = csum_and_memcpy(base, addr + off, len, sum, off); }) ) csstate->csum = csum_shift(sum, csstate->off); -- cgit v1.2.3 From a6e4ec7bfd32f42ff37577c6b708153d19880b6e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 2 May 2021 13:03:41 -0400 Subject: pull handling of ->iov_offset into iterate_{iovec,bvec,xarray} fewer arguments (by one, but still...) for iterate_...() macros Signed-off-by: Al Viro --- lib/iov_iter.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index f4ea04e24e06..48a55de2a172 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -17,8 +17,9 @@ #define PIPE_PARANOIA /* for now */ /* covers iovec and kvec alike */ -#define iterate_iovec(i, n, base, len, off, __p, skip, STEP) { \ +#define iterate_iovec(i, n, base, len, off, __p, STEP) { \ size_t off = 0; \ + size_t skip = i->iov_offset; \ do { \ len = min(n, __p->iov_len - skip); \ if (likely(len)) { \ @@ -33,18 +34,20 @@ __p++; \ skip = 0; \ } while (n); \ + i->iov_offset = skip; \ n = off; \ } -#define iterate_bvec(i, n, base, len, off, p, skip, STEP) { \ +#define iterate_bvec(i, n, base, len, off, p, STEP) { \ size_t off = 0; \ + unsigned skip = i->iov_offset; \ while (n) { \ unsigned offset = p->bv_offset + skip; \ unsigned left; \ void *kaddr = kmap_local_page(p->bv_page + \ offset / PAGE_SIZE); \ base = kaddr + offset % PAGE_SIZE; \ - len = min(min(n, p->bv_len - skip), \ + len = min(min(n, (size_t)(p->bv_len - skip)), \ (size_t)(PAGE_SIZE - offset % PAGE_SIZE)); \ left = (STEP); \ kunmap_local(kaddr); \ @@ -59,15 +62,16 @@ if (left) \ break; \ } \ + i->iov_offset = skip; \ n = off; \ } -#define iterate_xarray(i, n, base, len, __off, skip, STEP) { \ +#define iterate_xarray(i, n, base, len, __off, STEP) { \ __label__ __out; \ size_t __off = 0; \ struct page *head = NULL; \ size_t offset; \ - loff_t start = i->xarray_start + skip; \ + loff_t start = i->xarray_start + i->iov_offset; \ pgoff_t index = start >> PAGE_SHIFT; \ int j; \ \ @@ -100,7 +104,7 @@ } \ __out: \ rcu_read_unlock(); \ - skip += __off; \ + i->iov_offset += __off; \ n = __off; \ } @@ -108,13 +112,12 @@ __out: \ if (unlikely(i->count < n)) \ n = i->count; \ if (likely(n)) { \ - size_t skip = i->iov_offset; \ if (likely(iter_is_iovec(i))) { \ const struct iovec *iov = i->iov; \ void __user *base; \ size_t len; \ iterate_iovec(i, n, base, len, off, \ - iov, skip, (I)) \ + iov, (I)) \ i->nr_segs -= iov - i->iov; \ i->iov = iov; \ } else if (iov_iter_is_bvec(i)) { \ @@ -122,7 +125,7 @@ __out: \ void *base; \ size_t len; \ iterate_bvec(i, n, base, len, off, \ - bvec, skip, (K)) \ + bvec, (K)) \ i->nr_segs -= bvec - i->bvec; \ i->bvec = bvec; \ } else if (iov_iter_is_kvec(i)) { \ @@ -130,17 +133,16 @@ __out: \ void *base; \ size_t len; \ iterate_iovec(i, n, base, len, off, \ - kvec, skip, (K)) \ + kvec, (K)) \ i->nr_segs -= kvec - i->kvec; \ i->kvec = kvec; \ } else if (iov_iter_is_xarray(i)) { \ void *base; \ size_t len; \ iterate_xarray(i, n, base, len, off, \ - skip, (K)) \ + (K)) \ } \ i->count -= n; \ - i->iov_offset = skip; \ } \ } #define iterate_and_advance(i, n, base, len, off, I, K) \ -- cgit v1.2.3 From 4b179e9a9c7c98550747b76405626dd59968f078 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 4 May 2021 17:50:07 -0400 Subject: iterate_xarray(): only of the first iteration we might get offset != 0 recalculating offset on each iteration is pointless - on all subsequent passes through the loop it will be zero anyway. Signed-off-by: Al Viro --- lib/iov_iter.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 48a55de2a172..d5f750cc6f4a 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -70,9 +70,9 @@ __label__ __out; \ size_t __off = 0; \ struct page *head = NULL; \ - size_t offset; \ loff_t start = i->xarray_start + i->iov_offset; \ - pgoff_t index = start >> PAGE_SHIFT; \ + unsigned offset = start % PAGE_SIZE; \ + pgoff_t index = start / PAGE_SIZE; \ int j; \ \ XA_STATE(xas, i->xarray, index); \ @@ -89,7 +89,6 @@ for (j = (head->index < index) ? index - head->index : 0; \ j < thp_nr_pages(head); j++) { \ void *kaddr = kmap_local_page(head + j); \ - offset = (start + __off) % PAGE_SIZE; \ base = kaddr + offset; \ len = PAGE_SIZE - offset; \ len = min(n, len); \ @@ -100,6 +99,7 @@ n -= len; \ if (left || n == 0) \ goto __out; \ + offset = 0; \ } \ } \ __out: \ -- cgit v1.2.3 From c1d4d6a9ae88b87262fb5426823930bc471f6034 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 27 Apr 2021 12:29:53 -0400 Subject: copy_page_to_iter(): don't bother with kmap_atomic() for bvec/kvec cases kmap_local_page() is enough there. Moreover, we can use _copy_to_iter() for actual copying in those cases - no useful extra checks on the address we are copying from in that call. Signed-off-by: Al Viro --- lib/iov_iter.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index d5f750cc6f4a..8aff4eb4fdfd 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -821,9 +821,9 @@ static size_t __copy_page_to_iter(struct page *page, size_t offset, size_t bytes if (likely(iter_is_iovec(i))) return copy_page_to_iter_iovec(page, offset, bytes, i); if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) { - void *kaddr = kmap_atomic(page); - size_t wanted = copy_to_iter(kaddr + offset, bytes, i); - kunmap_atomic(kaddr); + void *kaddr = kmap_local_page(page); + size_t wanted = _copy_to_iter(kaddr + offset, bytes, i); + kunmap_local(kaddr); return wanted; } if (iov_iter_is_pipe(i)) -- cgit v1.2.3 From 55ca375c5dcc7aebd89de42f00ff18f5c40d25f3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 27 Apr 2021 12:33:24 -0400 Subject: copy_page_from_iter(): don't need kmap_atomic() for kvec/bvec cases kmap_local_page() is enough. Signed-off-by: Al Viro --- lib/iov_iter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 8aff4eb4fdfd..ba7eb6557750 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -871,9 +871,9 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, if (likely(iter_is_iovec(i))) return copy_page_from_iter_iovec(page, offset, bytes, i); if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) { - void *kaddr = kmap_atomic(page); + void *kaddr = kmap_local_page(page); size_t wanted = _copy_from_iter(kaddr + offset, bytes, i); - kunmap_atomic(kaddr); + kunmap_local(kaddr); return wanted; } WARN_ON(1); -- cgit v1.2.3 From 2495bdcc86dc5e6b71b6785e1faa76452496c687 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 30 Apr 2021 13:40:48 -0400 Subject: iov_iter: clean csum_and_copy_...() primitives up a bit 1) kmap_atomic() is not needed here, kmap_local_page() is enough. 2) No need to make sum = csum_block_add(sum, next, off); conditional upon next != 0 - adding 0 is a no-op as far as csum_block_add() is concerned. Signed-off-by: Al Viro --- lib/iov_iter.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index ba7eb6557750..3b442d25a966 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -600,9 +600,9 @@ static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, return 0; do { size_t chunk = min_t(size_t, n, PAGE_SIZE - r); - char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page); + char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page); sum = csum_and_memcpy(p + r, addr, chunk, sum, off); - kunmap_atomic(p); + kunmap_local(p); i->head = i_head; i->iov_offset = r + chunk; n -= chunk; @@ -1665,8 +1665,7 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, } iterate_and_advance(i, bytes, base, len, off, ({ next = csum_and_copy_from_user(base, addr + off, len); - if (next) - sum = csum_block_add(sum, next, off); + sum = csum_block_add(sum, next, off); next ? 0 : len; }), ({ sum = csum_and_memcpy(addr + off, base, len, sum, off); @@ -1693,8 +1692,7 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, } iterate_and_advance(i, bytes, base, len, off, ({ next = csum_and_copy_to_user(addr + off, base, len); - if (next) - sum = csum_block_add(sum, next, off); + sum = csum_block_add(sum, next, off); next ? 0 : len; }), ({ sum = csum_and_memcpy(base, addr + off, len, sum, off); -- cgit v1.2.3 From 893839fd57330ce226d4ee1b16fd5221a27fb6ec Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 30 Apr 2021 18:39:25 -0400 Subject: pipe_zero(): we don't need no stinkin' kmap_atomic()... FWIW, memcpy_to_page() itself almost certainly ought to use kmap_local_page()... Signed-off-by: Al Viro --- lib/iov_iter.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 3b442d25a966..a827991f2644 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -897,7 +897,9 @@ static size_t pipe_zero(size_t bytes, struct iov_iter *i) do { size_t chunk = min_t(size_t, n, PAGE_SIZE - off); - memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk); + char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page); + memset(p + off, 0, chunk); + kunmap_local(p); i->head = i_head; i->iov_offset = off + chunk; n -= chunk; -- cgit v1.2.3 From 2a510a744bebc7f5d9e71ee094b62e28b5b43218 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 2 May 2021 17:16:34 -0400 Subject: clean up copy_mc_pipe_to_iter() ... and we don't need kmap_atomic() there - kmap_local_page() is fine. Signed-off-by: Al Viro --- lib/iov_iter.c | 33 +++++++++------------------------ 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index a827991f2644..9ce83db26571 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -642,19 +642,6 @@ static int copyout_mc(void __user *to, const void *from, size_t n) return n; } -static unsigned long copy_mc_to_page(struct page *page, size_t offset, - const char *from, size_t len) -{ - unsigned long ret; - char *to; - - to = kmap_atomic(page); - ret = copy_mc_to_kernel(to + offset, from, len); - kunmap_atomic(to); - - return ret; -} - static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { @@ -666,25 +653,23 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, if (!sanity(i)) return 0; - bytes = n = push_pipe(i, bytes, &i_head, &off); - if (unlikely(!n)) - return 0; - do { + n = push_pipe(i, bytes, &i_head, &off); + while (n) { size_t chunk = min_t(size_t, n, PAGE_SIZE - off); + char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page); unsigned long rem; - - rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page, - off, addr, chunk); + rem = copy_mc_to_kernel(p + off, addr + xfer, chunk); + chunk -= rem; + kunmap_local(p); i->head = i_head; - i->iov_offset = off + chunk - rem; - xfer += chunk - rem; + i->iov_offset = off + chunk; + xfer += chunk; if (rem) break; n -= chunk; - addr += chunk; off = 0; i_head++; - } while (n); + } i->count -= xfer; return xfer; } -- cgit v1.2.3 From 6852df1266995c35b8621a95dcb7f91ca11ea409 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 2 May 2021 17:24:40 -0400 Subject: csum_and_copy_to_pipe_iter(): leave handling of csum_state to caller ... since all the logics is already there for use by iovec/kvec/etc. cases. Signed-off-by: Al Viro --- lib/iov_iter.c | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 9ce83db26571..97e04c5dbeef 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -582,39 +582,34 @@ static __wsum csum_and_memcpy(void *to, const void *from, size_t len, } static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, - struct csum_state *csstate, - struct iov_iter *i) + struct iov_iter *i, __wsum *sump) { struct pipe_inode_info *pipe = i->pipe; unsigned int p_mask = pipe->ring_size - 1; - __wsum sum = csstate->csum; - size_t off = csstate->off; + __wsum sum = *sump; + size_t off = 0; unsigned int i_head; - size_t n, r; + size_t r; if (!sanity(i)) return 0; - bytes = n = push_pipe(i, bytes, &i_head, &r); - if (unlikely(!n)) - return 0; - do { - size_t chunk = min_t(size_t, n, PAGE_SIZE - r); + bytes = push_pipe(i, bytes, &i_head, &r); + while (bytes) { + size_t chunk = min_t(size_t, bytes, PAGE_SIZE - r); char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page); - sum = csum_and_memcpy(p + r, addr, chunk, sum, off); + sum = csum_and_memcpy(p + r, addr + off, chunk, sum, off); kunmap_local(p); i->head = i_head; i->iov_offset = r + chunk; - n -= chunk; + bytes -= chunk; off += chunk; - addr += chunk; r = 0; i_head++; - } while (n); - i->count -= bytes; - csstate->csum = sum; - csstate->off = off; - return bytes; + } + *sump = sum; + i->count -= off; + return off; } size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) @@ -1669,15 +1664,15 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, struct csum_state *csstate = _csstate; __wsum sum, next; - if (unlikely(iov_iter_is_pipe(i))) - return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i); - - sum = csum_shift(csstate->csum, csstate->off); if (unlikely(iov_iter_is_discard(i))) { WARN_ON(1); /* for now */ return 0; } - iterate_and_advance(i, bytes, base, len, off, ({ + + sum = csum_shift(csstate->csum, csstate->off); + if (unlikely(iov_iter_is_pipe(i))) + bytes = csum_and_copy_to_pipe_iter(addr, bytes, i, &sum); + else iterate_and_advance(i, bytes, base, len, off, ({ next = csum_and_copy_to_user(addr + off, base, len); sum = csum_block_add(sum, next, off); next ? 0 : len; -- cgit v1.2.3