diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-11 09:04:23 +0900 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-11 09:04:23 +0900 |
commit | ce40be7a820bb393ac4ac69865f018d2f4038cf0 (patch) | |
tree | b1fe5a93346eb06f22b1c303d63ec5456d7212ab /include | |
parent | ba0a5a36f60e4c1152af3a2ae2813251974405bf (diff) | |
parent | 02f3939e1a9357b7c370a4a69717cf9c02452737 (diff) |
Merge branch 'for-3.7/core' of git://git.kernel.dk/linux-block
Pull block IO update from Jens Axboe:
"Core block IO bits for 3.7. Not a huge round this time, it contains:
- First series from Kent cleaning up and generalizing bio allocation
and freeing.
- WRITE_SAME support from Martin.
- Mikulas patches to prevent O_DIRECT crashes when someone changes
the block size of a device.
- Make bio_split() work on data-less bio's (like trim/discards).
- A few other minor fixups."
Fixed up silent semantic mis-merge as per Mikulas Patocka and Andrew
Morton. It is due to the VM no longer using a prio-tree (see commit
6b2dbba8b6ac: "mm: replace vma prio_tree with an interval tree").
So make set_blocksize() use mapping_mapped() instead of open-coding the
internal VM knowledge that has changed.
* 'for-3.7/core' of git://git.kernel.dk/linux-block: (26 commits)
block: makes bio_split support bio without data
scatterlist: refactor the sg_nents
scatterlist: add sg_nents
fs: fix include/percpu-rwsem.h export error
percpu-rw-semaphore: fix documentation typos
fs/block_dev.c:1644:5: sparse: symbol 'blkdev_mmap' was not declared
blockdev: turn a rw semaphore into a percpu rw semaphore
Fix a crash when block device is read and block size is changed at the same time
block: fix request_queue->flags initialization
block: lift the initial queue bypass mode on blk_register_queue() instead of blk_init_allocated_queue()
block: ioctl to zero block ranges
block: Make blkdev_issue_zeroout use WRITE SAME
block: Implement support for WRITE SAME
block: Consolidate command flag and queue limit checks for merges
block: Clean up special command handling logic
block/blk-tag.c: Remove useless kfree
block: remove the duplicated setting for congestion_threshold
block: reject invalid queue attribute values
block: Add bio_clone_bioset(), bio_clone_kmalloc()
block: Consolidate bio_alloc_bioset(), bio_kmalloc()
...
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/bio.h | 70 | ||||
-rw-r--r-- | include/linux/blk_types.h | 36 | ||||
-rw-r--r-- | include/linux/blkdev.h | 82 | ||||
-rw-r--r-- | include/linux/fs.h | 6 | ||||
-rw-r--r-- | include/linux/percpu-rwsem.h | 89 | ||||
-rw-r--r-- | include/linux/scatterlist.h | 1 |
6 files changed, 249 insertions, 35 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h index 26435890dc87..820e7aaad4fd 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -212,20 +212,41 @@ extern void bio_pair_release(struct bio_pair *dbio); extern struct bio_set *bioset_create(unsigned int, unsigned int); extern void bioset_free(struct bio_set *); -extern struct bio *bio_alloc(gfp_t, unsigned int); -extern struct bio *bio_kmalloc(gfp_t, unsigned int); extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); extern void bio_put(struct bio *); -extern void bio_free(struct bio *, struct bio_set *); + +extern void __bio_clone(struct bio *, struct bio *); +extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs); + +extern struct bio_set *fs_bio_set; + +static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) +{ + return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); +} + +static inline struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) +{ + return bio_clone_bioset(bio, gfp_mask, fs_bio_set); +} + +static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs) +{ + return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); +} + +static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask) +{ + return bio_clone_bioset(bio, gfp_mask, NULL); + +} extern void bio_endio(struct bio *, int); struct request_queue; extern int bio_phys_segments(struct request_queue *, struct bio *); -extern void __bio_clone(struct bio *, struct bio *); -extern struct bio *bio_clone(struct bio *, gfp_t); - extern void bio_init(struct bio *); +extern void bio_reset(struct bio *); extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, @@ -304,8 +325,6 @@ struct biovec_slab { struct kmem_cache *slab; }; -extern struct bio_set *fs_bio_set; - /* * a small number of entries is fine, not going to be performance critical. * basically we just need to survive @@ -367,9 +386,31 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, /* * Check whether this bio carries any data or not. A NULL bio is allowed. */ -static inline int bio_has_data(struct bio *bio) +static inline bool bio_has_data(struct bio *bio) { - return bio && bio->bi_io_vec != NULL; + if (bio && bio->bi_vcnt) + return true; + + return false; +} + +static inline bool bio_is_rw(struct bio *bio) +{ + if (!bio_has_data(bio)) + return false; + + if (bio->bi_rw & REQ_WRITE_SAME) + return false; + + return true; +} + +static inline bool bio_mergeable(struct bio *bio) +{ + if (bio->bi_rw & REQ_NOMERGE_FLAGS) + return false; + + return true; } /* @@ -505,9 +546,8 @@ static inline struct bio *bio_list_get(struct bio_list *bl) #define bio_integrity(bio) (bio->bi_integrity != NULL) -extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *); extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); -extern void bio_integrity_free(struct bio *, struct bio_set *); +extern void bio_integrity_free(struct bio *); extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); extern int bio_integrity_enabled(struct bio *bio); extern int bio_integrity_set_tag(struct bio *, void *, unsigned int); @@ -517,7 +557,7 @@ extern void bio_integrity_endio(struct bio *, int); extern void bio_integrity_advance(struct bio *, unsigned int); extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int); extern void bio_integrity_split(struct bio *, struct bio_pair *, int); -extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t, struct bio_set *); +extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t); extern int bioset_integrity_create(struct bio_set *, int); extern void bioset_integrity_free(struct bio_set *); extern void bio_integrity_init(void); @@ -549,13 +589,13 @@ static inline int bio_integrity_prep(struct bio *bio) return 0; } -static inline void bio_integrity_free(struct bio *bio, struct bio_set *bs) +static inline void bio_integrity_free(struct bio *bio) { return; } static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src, - gfp_t gfp_mask, struct bio_set *bs) + gfp_t gfp_mask) { return 0; } diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 7b7ac9ccec7a..cdf11191e645 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -59,12 +59,6 @@ struct bio { unsigned int bi_seg_front_size; unsigned int bi_seg_back_size; - unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ - - atomic_t bi_cnt; /* pin count */ - - struct bio_vec *bi_io_vec; /* the actual vec list */ - bio_end_io_t *bi_end_io; void *bi_private; @@ -80,7 +74,17 @@ struct bio { struct bio_integrity_payload *bi_integrity; /* data integrity */ #endif - bio_destructor_t *bi_destructor; /* destructor */ + /* + * Everything starting with bi_max_vecs will be preserved by bio_reset() + */ + + unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ + + atomic_t bi_cnt; /* pin count */ + + struct bio_vec *bi_io_vec; /* the actual vec list */ + + struct bio_set *bi_pool; /* * We can inline a number of vecs at the end of the bio, to avoid @@ -90,6 +94,8 @@ struct bio { struct bio_vec bi_inline_vecs[0]; }; +#define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs) + /* * bio flags */ @@ -105,6 +111,13 @@ struct bio { #define BIO_FS_INTEGRITY 9 /* fs owns integrity data, not block layer */ #define BIO_QUIET 10 /* Make BIO Quiet */ #define BIO_MAPPED_INTEGRITY 11/* integrity metadata has been remapped */ + +/* + * Flags starting here get preserved by bio_reset() - this includes + * BIO_POOL_IDX() + */ +#define BIO_RESET_BITS 12 + #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) /* @@ -134,6 +147,7 @@ enum rq_flag_bits { __REQ_PRIO, /* boost priority in cfq */ __REQ_DISCARD, /* request to discard sectors */ __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ + __REQ_WRITE_SAME, /* write same block many times */ __REQ_NOIDLE, /* don't anticipate more IO after this one */ __REQ_FUA, /* forced unit access */ @@ -172,15 +186,21 @@ enum rq_flag_bits { #define REQ_META (1 << __REQ_META) #define REQ_PRIO (1 << __REQ_PRIO) #define REQ_DISCARD (1 << __REQ_DISCARD) +#define REQ_WRITE_SAME (1 << __REQ_WRITE_SAME) #define REQ_NOIDLE (1 << __REQ_NOIDLE) #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) #define REQ_COMMON_MASK \ (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \ - REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE) + REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \ + REQ_SECURE) #define REQ_CLONE_MASK REQ_COMMON_MASK +/* This mask is used for both bio and request merge checking */ +#define REQ_NOMERGE_FLAGS \ + (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA) + #define REQ_RAHEAD (1 << __REQ_RAHEAD) #define REQ_THROTTLED (1 << __REQ_THROTTLED) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4a2ab7c85393..1756001210d2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -270,6 +270,7 @@ struct queue_limits { unsigned int io_min; unsigned int io_opt; unsigned int max_discard_sectors; + unsigned int max_write_same_sectors; unsigned int discard_granularity; unsigned int discard_alignment; @@ -540,8 +541,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define blk_account_rq(rq) \ (((rq)->cmd_flags & REQ_STARTED) && \ - ((rq)->cmd_type == REQ_TYPE_FS || \ - ((rq)->cmd_flags & REQ_DISCARD))) + ((rq)->cmd_type == REQ_TYPE_FS)) #define blk_pm_request(rq) \ ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \ @@ -595,17 +595,39 @@ static inline void blk_clear_rl_full(struct request_list *rl, bool sync) rl->flags &= ~flag; } +static inline bool rq_mergeable(struct request *rq) +{ + if (rq->cmd_type != REQ_TYPE_FS) + return false; -/* - * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may - * it already be started by driver. - */ -#define RQ_NOMERGE_FLAGS \ - (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA | REQ_DISCARD) -#define rq_mergeable(rq) \ - (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ - (((rq)->cmd_flags & REQ_DISCARD) || \ - (rq)->cmd_type == REQ_TYPE_FS)) + if (rq->cmd_flags & REQ_NOMERGE_FLAGS) + return false; + + return true; +} + +static inline bool blk_check_merge_flags(unsigned int flags1, + unsigned int flags2) +{ + if ((flags1 & REQ_DISCARD) != (flags2 & REQ_DISCARD)) + return false; + + if ((flags1 & REQ_SECURE) != (flags2 & REQ_SECURE)) + return false; + + if ((flags1 & REQ_WRITE_SAME) != (flags2 & REQ_WRITE_SAME)) + return false; + + return true; +} + +static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b) +{ + if (bio_data(a) == bio_data(b)) + return true; + + return false; +} /* * q->prep_rq_fn return values @@ -802,6 +824,28 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq) return blk_rq_cur_bytes(rq) >> 9; } +static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, + unsigned int cmd_flags) +{ + if (unlikely(cmd_flags & REQ_DISCARD)) + return q->limits.max_discard_sectors; + + if (unlikely(cmd_flags & REQ_WRITE_SAME)) + return q->limits.max_write_same_sectors; + + return q->limits.max_sectors; +} + +static inline unsigned int blk_rq_get_max_sectors(struct request *rq) +{ + struct request_queue *q = rq->q; + + if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC)) + return q->limits.max_hw_sectors; + + return blk_queue_get_max_sectors(q, rq->cmd_flags); +} + /* * Request issue related functions. */ @@ -857,6 +901,8 @@ extern void blk_queue_max_segments(struct request_queue *, unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_max_discard_sectors(struct request_queue *q, unsigned int max_discard_sectors); +extern void blk_queue_max_write_same_sectors(struct request_queue *q, + unsigned int max_write_same_sectors); extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); extern void blk_queue_alignment_offset(struct request_queue *q, @@ -987,6 +1033,8 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); +extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask, struct page *page); extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask); static inline int sb_issue_discard(struct super_block *sb, sector_t block, @@ -1164,6 +1212,16 @@ static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev) return queue_discard_zeroes_data(bdev_get_queue(bdev)); } +static inline unsigned int bdev_write_same(struct block_device *bdev) +{ + struct request_queue *q = bdev_get_queue(bdev); + + if (q) + return q->limits.max_write_same_sectors; + + return 0; +} + static inline int queue_dma_alignment(struct request_queue *q) { return q ? q->dma_alignment : 511; diff --git a/include/linux/fs.h b/include/linux/fs.h index c617ed024df8..39f3e12ca752 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -335,6 +335,7 @@ struct inodes_stat_t { #define BLKDISCARDZEROES _IO(0x12,124) #define BLKSECDISCARD _IO(0x12,125) #define BLKROTATIONAL _IO(0x12,126) +#define BLKZEROOUT _IO(0x12,127) #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ @@ -415,6 +416,7 @@ struct inodes_stat_t { #include <linux/migrate_mode.h> #include <linux/uidgid.h> #include <linux/lockdep.h> +#include <linux/percpu-rwsem.h> #include <asm/byteorder.h> @@ -724,6 +726,8 @@ struct block_device { int bd_fsfreeze_count; /* Mutex for freeze */ struct mutex bd_fsfreeze_mutex; + /* A semaphore that prevents I/O while block size is being changed */ + struct percpu_rw_semaphore bd_block_size_semaphore; }; /* @@ -2570,6 +2574,8 @@ extern int generic_segment_checks(const struct iovec *iov, unsigned long *nr_segs, size_t *count, int access_flags); /* fs/block_dev.c */ +extern ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos); extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos); extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end, diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h new file mode 100644 index 000000000000..cf80f7e5277f --- /dev/null +++ b/include/linux/percpu-rwsem.h @@ -0,0 +1,89 @@ +#ifndef _LINUX_PERCPU_RWSEM_H +#define _LINUX_PERCPU_RWSEM_H + +#include <linux/mutex.h> +#include <linux/percpu.h> +#include <linux/rcupdate.h> +#include <linux/delay.h> + +struct percpu_rw_semaphore { + unsigned __percpu *counters; + bool locked; + struct mutex mtx; +}; + +static inline void percpu_down_read(struct percpu_rw_semaphore *p) +{ + rcu_read_lock(); + if (unlikely(p->locked)) { + rcu_read_unlock(); + mutex_lock(&p->mtx); + this_cpu_inc(*p->counters); + mutex_unlock(&p->mtx); + return; + } + this_cpu_inc(*p->counters); + rcu_read_unlock(); +} + +static inline void percpu_up_read(struct percpu_rw_semaphore *p) +{ + /* + * On X86, write operation in this_cpu_dec serves as a memory unlock + * barrier (i.e. memory accesses may be moved before the write, but + * no memory accesses are moved past the write). + * On other architectures this may not be the case, so we need smp_mb() + * there. + */ +#if defined(CONFIG_X86) && (!defined(CONFIG_X86_PPRO_FENCE) && !defined(CONFIG_X86_OOSTORE)) + barrier(); +#else + smp_mb(); +#endif + this_cpu_dec(*p->counters); +} + +static inline unsigned __percpu_count(unsigned __percpu *counters) +{ + unsigned total = 0; + int cpu; + + for_each_possible_cpu(cpu) + total += ACCESS_ONCE(*per_cpu_ptr(counters, cpu)); + + return total; +} + +static inline void percpu_down_write(struct percpu_rw_semaphore *p) +{ + mutex_lock(&p->mtx); + p->locked = true; + synchronize_rcu(); + while (__percpu_count(p->counters)) + msleep(1); + smp_rmb(); /* paired with smp_mb() in percpu_sem_up_read() */ +} + +static inline void percpu_up_write(struct percpu_rw_semaphore *p) +{ + p->locked = false; + mutex_unlock(&p->mtx); +} + +static inline int percpu_init_rwsem(struct percpu_rw_semaphore *p) +{ + p->counters = alloc_percpu(unsigned); + if (unlikely(!p->counters)) + return -ENOMEM; + p->locked = false; + mutex_init(&p->mtx); + return 0; +} + +static inline void percpu_free_rwsem(struct percpu_rw_semaphore *p) +{ + free_percpu(p->counters); + p->counters = NULL; /* catch use after free bugs */ +} + +#endif diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 7b600da9a635..4bd6c06eb28e 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -201,6 +201,7 @@ static inline void *sg_virt(struct scatterlist *sg) return page_address(sg_page(sg)) + sg->offset; } +int sg_nents(struct scatterlist *sg); struct scatterlist *sg_next(struct scatterlist *); struct scatterlist *sg_last(struct scatterlist *s, unsigned int); void sg_init_table(struct scatterlist *, unsigned int); |