diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-28 13:19:59 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-28 13:19:59 -0800 |
commit | 0e9da3fbf7d81f0f913b491c8de1ba7883d4f217 (patch) | |
tree | 2b3d25e3be60bf4ee40b4690c7bb9d6fa499ae69 /drivers/lightnvm | |
parent | b12a9124eeb71d766a3e3eb594ebbb3fefc66902 (diff) | |
parent | 00203ba40d40d7f33857416adfb18adaf0e40123 (diff) |
Merge tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe:
"This is the main pull request for block/storage for 4.21.
Larger than usual, it was a busy round with lots of goodies queued up.
Most notable is the removal of the old IO stack, which has been a long
time coming. No new features for a while, everything coming in this
week has all been fixes for things that were previously merged.
This contains:
- Use atomic counters instead of semaphores for mtip32xx (Arnd)
- Cleanup of the mtip32xx request setup (Christoph)
- Fix for circular locking dependency in loop (Jan, Tetsuo)
- bcache (Coly, Guoju, Shenghui)
* Optimizations for writeback caching
* Various fixes and improvements
- nvme (Chaitanya, Christoph, Sagi, Jay, me, Keith)
* host and target support for NVMe over TCP
* Error log page support
* Support for separate read/write/poll queues
* Much improved polling
* discard OOM fallback
* Tracepoint improvements
- lightnvm (Hans, Hua, Igor, Matias, Javier)
* Igor added packed metadata to pblk. Now drives without metadata
per LBA can be used as well.
* Fix from Geert on uninitialized value on chunk metadata reads.
* Fixes from Hans and Javier to pblk recovery and write path.
* Fix from Hua Su to fix a race condition in the pblk recovery
code.
* Scan optimization added to pblk recovery from Zhoujie.
* Small geometry cleanup from me.
- Conversion of the last few drivers that used the legacy path to
blk-mq (me)
- Removal of legacy IO path in SCSI (me, Christoph)
- Removal of legacy IO stack and schedulers (me)
- Support for much better polling, now without interrupts at all.
blk-mq adds support for multiple queue maps, which enables us to
have a map per type. This in turn enables nvme to have separate
completion queues for polling, which can then be interrupt-less.
Also means we're ready for async polled IO, which is hopefully
coming in the next release.
- Killing of (now) unused block exports (Christoph)
- Unification of the blk-rq-qos and blk-wbt wait handling (Josef)
- Support for zoned testing with null_blk (Masato)
- sx8 conversion to per-host tag sets (Christoph)
- IO priority improvements (Damien)
- mq-deadline zoned fix (Damien)
- Ref count blkcg series (Dennis)
- Lots of blk-mq improvements and speedups (me)
- sbitmap scalability improvements (me)
- Make core inflight IO accounting per-cpu (Mikulas)
- Export timeout setting in sysfs (Weiping)
- Cleanup the direct issue path (Jianchao)
- Export blk-wbt internals in block debugfs for easier debugging
(Ming)
- Lots of other fixes and improvements"
* tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block: (364 commits)
kyber: use sbitmap add_wait_queue/list_del wait helpers
sbitmap: add helpers for add/del wait queue handling
block: save irq state in blkg_lookup_create()
dm: don't reuse bio for flushes
nvme-pci: trace SQ status on completions
nvme-rdma: implement polling queue map
nvme-fabrics: allow user to pass in nr_poll_queues
nvme-fabrics: allow nvmf_connect_io_queue to poll
nvme-core: optionally poll sync commands
block: make request_to_qc_t public
nvme-tcp: fix spelling mistake "attepmpt" -> "attempt"
nvme-tcp: fix endianess annotations
nvmet-tcp: fix endianess annotations
nvme-pci: refactor nvme_poll_irqdisable to make sparse happy
nvme-pci: only set nr_maps to 2 if poll queues are supported
nvmet: use a macro for default error location
nvmet: fix comparison of a u16 with -1
blk-mq: enable IO poll if .nr_queues of type poll > 0
blk-mq: change blk_mq_queue_busy() to blk_mq_queue_inflight()
blk-mq: skip zero-queue maps in blk_mq_map_swqueue
...
Diffstat (limited to 'drivers/lightnvm')
-rw-r--r-- | drivers/lightnvm/core.c | 25 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-core.c | 77 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-init.c | 103 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-map.c | 63 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-rb.c | 5 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-read.c | 66 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-recovery.c | 46 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-rl.c | 5 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-sysfs.c | 7 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-write.c | 64 | ||||
-rw-r--r-- | drivers/lightnvm/pblk.h | 43 |
11 files changed, 361 insertions, 143 deletions
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index efb976a863d2..5f82036fe322 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -389,7 +389,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) goto err_dev; } - tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node, NULL); + tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node); if (!tqueue) { ret = -ENOMEM; goto err_disk; @@ -974,7 +974,7 @@ static int nvm_get_bb_meta(struct nvm_dev *dev, sector_t slba, struct ppa_addr ppa; u8 *blks; int ch, lun, nr_blks; - int ret; + int ret = 0; ppa.ppa = slba; ppa = dev_to_generic_addr(dev, ppa); @@ -1140,30 +1140,33 @@ EXPORT_SYMBOL(nvm_alloc_dev); int nvm_register(struct nvm_dev *dev) { - int ret; + int ret, exp_pool_size; if (!dev->q || !dev->ops) return -EINVAL; - dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist"); + ret = nvm_init(dev); + if (ret) + return ret; + + exp_pool_size = max_t(int, PAGE_SIZE, + (NVM_MAX_VLBA * (sizeof(u64) + dev->geo.sos))); + exp_pool_size = round_up(exp_pool_size, PAGE_SIZE); + + dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist", + exp_pool_size); if (!dev->dma_pool) { pr_err("nvm: could not create dma pool\n"); + nvm_free(dev); return -ENOMEM; } - ret = nvm_init(dev); - if (ret) - goto err_init; - /* register device with a supported media manager */ down_write(&nvm_lock); list_add(&dev->devices, &nvm_devices); up_write(&nvm_lock); return 0; -err_init: - dev->ops->destroy_dma_pool(dev->dma_pool); - return ret; } EXPORT_SYMBOL(nvm_register); diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 6944aac43b01..1ff165351180 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -250,8 +250,8 @@ int pblk_alloc_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd) if (rqd->nr_ppas == 1) return 0; - rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size; - rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size; + rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size(pblk); + rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size(pblk); return 0; } @@ -376,7 +376,7 @@ void pblk_write_should_kick(struct pblk *pblk) { unsigned int secs_avail = pblk_rb_read_count(&pblk->rwb); - if (secs_avail >= pblk->min_write_pgs) + if (secs_avail >= pblk->min_write_pgs_data) pblk_write_kick(pblk); } @@ -407,7 +407,9 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line) struct pblk_line_meta *lm = &pblk->lm; struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct list_head *move_list = NULL; - int vsc = le32_to_cpu(*line->vsc); + int packed_meta = (le32_to_cpu(*line->vsc) / pblk->min_write_pgs_data) + * (pblk->min_write_pgs - pblk->min_write_pgs_data); + int vsc = le32_to_cpu(*line->vsc) + packed_meta; lockdep_assert_held(&line->lock); @@ -531,7 +533,7 @@ void pblk_check_chunk_state_update(struct pblk *pblk, struct nvm_rq *rqd) if (caddr == 0) trace_pblk_chunk_state(pblk_disk_name(pblk), ppa, NVM_CHK_ST_OPEN); - else if (caddr == chunk->cnlb) + else if (caddr == (chunk->cnlb - 1)) trace_pblk_chunk_state(pblk_disk_name(pblk), ppa, NVM_CHK_ST_CLOSED); } @@ -620,12 +622,15 @@ out: } int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail, - unsigned long secs_to_flush) + unsigned long secs_to_flush, bool skip_meta) { int max = pblk->sec_per_write; int min = pblk->min_write_pgs; int secs_to_sync = 0; + if (skip_meta && pblk->min_write_pgs_data != pblk->min_write_pgs) + min = max = pblk->min_write_pgs_data; + if (secs_avail >= max) secs_to_sync = max; else if (secs_avail >= min) @@ -796,10 +801,11 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line, rqd.is_seq = 1; for (i = 0; i < lm->smeta_sec; i++, paddr++) { - struct pblk_sec_meta *meta_list = rqd.meta_list; + struct pblk_sec_meta *meta = pblk_get_meta(pblk, + rqd.meta_list, i); rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); - meta_list[i].lba = lba_list[paddr] = addr_empty; + meta->lba = lba_list[paddr] = addr_empty; } ret = pblk_submit_io_sync_sem(pblk, &rqd); @@ -845,13 +851,13 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line, if (!meta_list) return -ENOMEM; - ppa_list = meta_list + pblk_dma_meta_size; - dma_ppa_list = dma_meta_list + pblk_dma_meta_size; + ppa_list = meta_list + pblk_dma_meta_size(pblk); + dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk); next_rq: memset(&rqd, 0, sizeof(struct nvm_rq)); - rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); + rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false); rq_len = rq_ppas * geo->csecs; bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len, @@ -1276,6 +1282,7 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line) return 0; } +/* Line allocations in the recovery path are always single threaded */ int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line) { struct pblk_line_mgmt *l_mg = &pblk->l_mg; @@ -1295,15 +1302,22 @@ int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line) ret = pblk_line_alloc_bitmaps(pblk, line); if (ret) - return ret; + goto fail; if (!pblk_line_init_bb(pblk, line, 0)) { - list_add(&line->list, &l_mg->free_list); - return -EINTR; + ret = -EINTR; + goto fail; } pblk_rl_free_lines_dec(&pblk->rl, line, true); return 0; + +fail: + spin_lock(&l_mg->free_lock); + list_add(&line->list, &l_mg->free_list); + spin_unlock(&l_mg->free_lock); + + return ret; } void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line) @@ -2160,3 +2174,38 @@ void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, } spin_unlock(&pblk->trans_lock); } + +void *pblk_get_meta_for_writes(struct pblk *pblk, struct nvm_rq *rqd) +{ + void *buffer; + + if (pblk_is_oob_meta_supported(pblk)) { + /* Just use OOB metadata buffer as always */ + buffer = rqd->meta_list; + } else { + /* We need to reuse last page of request (packed metadata) + * in similar way as traditional oob metadata + */ + buffer = page_to_virt( + rqd->bio->bi_io_vec[rqd->bio->bi_vcnt - 1].bv_page); + } + + return buffer; +} + +void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd) +{ + void *meta_list = rqd->meta_list; + void *page; + int i = 0; + + if (pblk_is_oob_meta_supported(pblk)) + return; + + page = page_to_virt(rqd->bio->bi_io_vec[rqd->bio->bi_vcnt - 1].bv_page); + /* We need to fill oob meta buffer with data from packed metadata */ + for (; i < rqd->nr_ppas; i++) + memcpy(pblk_get_meta(pblk, meta_list, i), + page + (i * sizeof(struct pblk_sec_meta)), + sizeof(struct pblk_sec_meta)); +} diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 13822594647c..f9a3e47b6a93 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -207,9 +207,6 @@ static int pblk_rwb_init(struct pblk *pblk) return pblk_rb_init(&pblk->rwb, buffer_size, threshold, geo->csecs); } -/* Minimum pages needed within a lun */ -#define ADDR_POOL_SIZE 64 - static int pblk_set_addrf_12(struct pblk *pblk, struct nvm_geo *geo, struct nvm_addrf_12 *dst) { @@ -350,23 +347,19 @@ fail_destroy_ws: static int pblk_get_global_caches(void) { - int ret; + int ret = 0; mutex_lock(&pblk_caches.mutex); - if (kref_read(&pblk_caches.kref) > 0) { - kref_get(&pblk_caches.kref); - mutex_unlock(&pblk_caches.mutex); - return 0; - } + if (kref_get_unless_zero(&pblk_caches.kref)) + goto out; ret = pblk_create_global_caches(); - if (!ret) - kref_get(&pblk_caches.kref); + kref_init(&pblk_caches.kref); +out: mutex_unlock(&pblk_caches.mutex); - return ret; } @@ -406,12 +399,45 @@ static int pblk_core_init(struct pblk *pblk) pblk->nr_flush_rst = 0; pblk->min_write_pgs = geo->ws_opt; + pblk->min_write_pgs_data = pblk->min_write_pgs; max_write_ppas = pblk->min_write_pgs * geo->all_luns; pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA); pblk->max_write_pgs = min_t(int, pblk->max_write_pgs, queue_max_hw_sectors(dev->q) / (geo->csecs >> SECTOR_SHIFT)); pblk_set_sec_per_write(pblk, pblk->min_write_pgs); + pblk->oob_meta_size = geo->sos; + if (!pblk_is_oob_meta_supported(pblk)) { + /* For drives which does not have OOB metadata feature + * in order to support recovery feature we need to use + * so called packed metadata. Packed metada will store + * the same information as OOB metadata (l2p table mapping, + * but in the form of the single page at the end of + * every write request. + */ + if (pblk->min_write_pgs + * sizeof(struct pblk_sec_meta) > PAGE_SIZE) { + /* We want to keep all the packed metadata on single + * page per write requests. So we need to ensure that + * it will fit. + * + * This is more like sanity check, since there is + * no device with such a big minimal write size + * (above 1 metabytes). + */ + pblk_err(pblk, "Not supported min write size\n"); + return -EINVAL; + } + /* For packed meta approach we do some simplification. + * On read path we always issue requests which size + * equal to max_write_pgs, with all pages filled with + * user payload except of last one page which will be + * filled with packed metadata. + */ + pblk->max_write_pgs = pblk->min_write_pgs; + pblk->min_write_pgs_data = pblk->min_write_pgs - 1; + } + pblk->pad_dist = kcalloc(pblk->min_write_pgs - 1, sizeof(atomic64_t), GFP_KERNEL); if (!pblk->pad_dist) @@ -635,40 +661,61 @@ static unsigned int calc_emeta_len(struct pblk *pblk) return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]); } -static void pblk_set_provision(struct pblk *pblk, long nr_free_blks) +static int pblk_set_provision(struct pblk *pblk, int nr_free_chks) { struct nvm_tgt_dev *dev = pblk->dev; struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_meta *lm = &pblk->lm; struct nvm_geo *geo = &dev->geo; sector_t provisioned; - int sec_meta, blk_meta; + int sec_meta, blk_meta, clba; + int minimum; if (geo->op == NVM_TARGET_DEFAULT_OP) pblk->op = PBLK_DEFAULT_OP; else pblk->op = geo->op; - provisioned = nr_free_blks; + minimum = pblk_get_min_chks(pblk); + provisioned = nr_free_chks; provisioned *= (100 - pblk->op); sector_div(provisioned, 100); - pblk->op_blks = nr_free_blks - provisioned; + if ((nr_free_chks - provisioned) < minimum) { + if (geo->op != NVM_TARGET_DEFAULT_OP) { + pblk_err(pblk, "OP too small to create a sane instance\n"); + return -EINTR; + } + + /* If the user did not specify an OP value, and PBLK_DEFAULT_OP + * is not enough, calculate and set sane value + */ + + provisioned = nr_free_chks - minimum; + pblk->op = (100 * minimum) / nr_free_chks; + pblk_info(pblk, "Default OP insufficient, adjusting OP to %d\n", + pblk->op); + } + + pblk->op_blks = nr_free_chks - provisioned; /* Internally pblk manages all free blocks, but all calculations based * on user capacity consider only provisioned blocks */ - pblk->rl.total_blocks = nr_free_blks; - pblk->rl.nr_secs = nr_free_blks * geo->clba; + pblk->rl.total_blocks = nr_free_chks; + pblk->rl.nr_secs = nr_free_chks * geo->clba; /* Consider sectors used for metadata */ sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines; blk_meta = DIV_ROUND_UP(sec_meta, geo->clba); - pblk->capacity = (provisioned - blk_meta) * geo->clba; + clba = (geo->clba / pblk->min_write_pgs) * pblk->min_write_pgs_data; + pblk->capacity = (provisioned - blk_meta) * clba; - atomic_set(&pblk->rl.free_blocks, nr_free_blks); - atomic_set(&pblk->rl.free_user_blocks, nr_free_blks); + atomic_set(&pblk->rl.free_blocks, nr_free_chks); + atomic_set(&pblk->rl.free_user_blocks, nr_free_chks); + + return 0; } static int pblk_setup_line_meta_chk(struct pblk *pblk, struct pblk_line *line, @@ -984,7 +1031,7 @@ static int pblk_lines_init(struct pblk *pblk) struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line *line; void *chunk_meta; - long nr_free_chks = 0; + int nr_free_chks = 0; int i, ret; ret = pblk_line_meta_init(pblk); @@ -1031,7 +1078,9 @@ static int pblk_lines_init(struct pblk *pblk) goto fail_free_lines; } - pblk_set_provision(pblk, nr_free_chks); + ret = pblk_set_provision(pblk, nr_free_chks); + if (ret) + goto fail_free_lines; vfree(chunk_meta); return 0; @@ -1041,7 +1090,7 @@ fail_free_lines: pblk_line_meta_free(l_mg, &pblk->lines[i]); kfree(pblk->lines); fail_free_chunk_meta: - kfree(chunk_meta); + vfree(chunk_meta); fail_free_luns: kfree(pblk->luns); fail_free_meta: @@ -1154,6 +1203,12 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, return ERR_PTR(-EINVAL); } + if (geo->ext) { + pblk_err(pblk, "extended metadata not supported\n"); + kfree(pblk); + return ERR_PTR(-EINVAL); + } + spin_lock_init(&pblk->resubmit_lock); spin_lock_init(&pblk->trans_lock); spin_lock_init(&pblk->lock); diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c index 6dcbd44e3acb..79df583ea709 100644 --- a/drivers/lightnvm/pblk-map.c +++ b/drivers/lightnvm/pblk-map.c @@ -22,7 +22,7 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry, struct ppa_addr *ppa_list, unsigned long *lun_bitmap, - struct pblk_sec_meta *meta_list, + void *meta_list, unsigned int valid_secs) { struct pblk_line *line = pblk_line_get_data(pblk); @@ -33,6 +33,9 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry, int nr_secs = pblk->min_write_pgs; int i; + if (!line) + return -ENOSPC; + if (pblk_line_is_full(line)) { struct pblk_line *prev_line = line; @@ -42,8 +45,11 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry, line = pblk_line_replace_data(pblk); pblk_line_close_meta(pblk, prev_line); - if (!line) - return -EINTR; + if (!line) { + pblk_pipeline_stop(pblk); + return -ENOSPC; + } + } emeta = line->emeta; @@ -52,6 +58,7 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry, paddr = pblk_alloc_page(pblk, line, nr_secs); for (i = 0; i < nr_secs; i++, paddr++) { + struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i); __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); /* ppa to be sent to the device */ @@ -68,14 +75,15 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry, kref_get(&line->ref); w_ctx = pblk_rb_w_ctx(&pblk->rwb, sentry + i); w_ctx->ppa = ppa_list[i]; - meta_list[i].lba = cpu_to_le64(w_ctx->lba); + meta->lba = cpu_to_le64(w_ctx->lba); lba_list[paddr] = cpu_to_le64(w_ctx->lba); if (lba_list[paddr] != addr_empty) line->nr_valid_lbas++; else atomic64_inc(&pblk->pad_wa); } else { - lba_list[paddr] = meta_list[i].lba = addr_empty; + lba_list[paddr] = addr_empty; + meta->lba = addr_empty; __pblk_map_invalidate(pblk, line, paddr); } } @@ -84,50 +92,57 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry, return 0; } -void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, +int pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, unsigned long *lun_bitmap, unsigned int valid_secs, unsigned int off) { - struct pblk_sec_meta *meta_list = rqd->meta_list; + void *meta_list = pblk_get_meta_for_writes(pblk, rqd); + void *meta_buffer; struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); unsigned int map_secs; int min = pblk->min_write_pgs; int i; + int ret; for (i = off; i < rqd->nr_ppas; i += min) { map_secs = (i + min > valid_secs) ? (valid_secs % min) : min; - if (pblk_map_page_data(pblk, sentry + i, &ppa_list[i], - lun_bitmap, &meta_list[i], map_secs)) { - bio_put(rqd->bio); - pblk_free_rqd(pblk, rqd, PBLK_WRITE); - pblk_pipeline_stop(pblk); - } + meta_buffer = pblk_get_meta(pblk, meta_list, i); + + ret = pblk_map_page_data(pblk, sentry + i, &ppa_list[i], + lun_bitmap, meta_buffer, map_secs); + if (ret) + return ret; } + + return 0; } /* only if erase_ppa is set, acquire erase semaphore */ -void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, +int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, unsigned long *lun_bitmap, unsigned int valid_secs, struct ppa_addr *erase_ppa) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; struct pblk_line_meta *lm = &pblk->lm; - struct pblk_sec_meta *meta_list = rqd->meta_list; + void *meta_list = pblk_get_meta_for_writes(pblk, rqd); + void *meta_buffer; struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd); struct pblk_line *e_line, *d_line; unsigned int map_secs; int min = pblk->min_write_pgs; int i, erase_lun; + int ret; + for (i = 0; i < rqd->nr_ppas; i += min) { map_secs = (i + min > valid_secs) ? (valid_secs % min) : min; - if (pblk_map_page_data(pblk, sentry + i, &ppa_list[i], - lun_bitmap, &meta_list[i], map_secs)) { - bio_put(rqd->bio); - pblk_free_rqd(pblk, rqd, PBLK_WRITE); - pblk_pipeline_stop(pblk); - } + meta_buffer = pblk_get_meta(pblk, meta_list, i); + + ret = pblk_map_page_data(pblk, sentry + i, &ppa_list[i], + lun_bitmap, meta_buffer, map_secs); + if (ret) + return ret; erase_lun = pblk_ppa_to_pos(geo, ppa_list[i]); @@ -163,7 +178,7 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, */ e_line = pblk_line_get_erase(pblk); if (!e_line) - return; + return -ENOSPC; /* Erase blocks that are bad in this line but might not be in next */ if (unlikely(pblk_ppa_empty(*erase_ppa)) && @@ -174,7 +189,7 @@ retry: bit = find_next_bit(d_line->blk_bitmap, lm->blk_per_line, bit + 1); if (bit >= lm->blk_per_line) - return; + return 0; spin_lock(&e_line->lock); if (test_bit(bit, e_line->erase_bitmap)) { @@ -188,4 +203,6 @@ retry: *erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */ erase_ppa->a.blk = e_line->id; } + + return 0; } diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index b1f4b51783f4..d4ca8c64ee0f 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -147,7 +147,7 @@ int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int threshold, /* * Initialize rate-limiter, which controls access to the write buffer - * but user and GC I/O + * by user and GC I/O */ pblk_rl_init(&pblk->rl, rb->nr_entries); @@ -552,6 +552,9 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd, to_read = count; } + /* Add space for packed metadata if in use*/ + pad += (pblk->min_write_pgs - pblk->min_write_pgs_data); + c_ctx->sentry = pos; c_ctx->nr_valid = to_read; c_ctx->nr_padded = pad; diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index 9fba614adeeb..3789185144da 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -43,7 +43,7 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio, sector_t blba, unsigned long *read_bitmap) { - struct pblk_sec_meta *meta_list = rqd->meta_list; + void *meta_list = rqd->meta_list; struct ppa_addr ppas[NVM_MAX_VLBA]; int nr_secs = rqd->nr_ppas; bool advanced_bio = false; @@ -53,12 +53,15 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, for (i = 0; i < nr_secs; i++) { struct ppa_addr p = ppas[i]; + struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i); sector_t lba = blba + i; retry: if (pblk_ppa_empty(p)) { + __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); + WARN_ON(test_and_set_bit(i, read_bitmap)); - meta_list[i].lba = cpu_to_le64(ADDR_EMPTY); + meta->lba = addr_empty; if (unlikely(!advanced_bio)) { bio_advance(bio, (i) * PBLK_EXPOSED_PAGE_SIZE); @@ -78,7 +81,7 @@ retry: goto retry; } WARN_ON(test_and_set_bit(i, read_bitmap)); - meta_list[i].lba = cpu_to_le64(lba); + meta->lba = cpu_to_le64(lba); advanced_bio = true; #ifdef CONFIG_NVM_PBLK_DEBUG atomic_long_inc(&pblk->cache_reads); @@ -105,12 +108,16 @@ next: static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd, sector_t blba) { - struct pblk_sec_meta *meta_lba_list = rqd->meta_list; + void *meta_list = rqd->meta_list; int nr_lbas = rqd->nr_ppas; int i; + if (!pblk_is_oob_meta_supported(pblk)) + return; + for (i = 0; i < nr_lbas; i++) { - u64 lba = le64_to_cpu(meta_lba_list[i].lba); + struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i); + u64 lba = le64_to_cpu(meta->lba); if (lba == ADDR_EMPTY) continue; @@ -134,17 +141,22 @@ static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd, static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd, u64 *lba_list, int nr_lbas) { - struct pblk_sec_meta *meta_lba_list = rqd->meta_list; + void *meta_lba_list = rqd->meta_list; int i, j; + if (!pblk_is_oob_meta_supported(pblk)) + return; + for (i = 0, j = 0; i < nr_lbas; i++) { + struct pblk_sec_meta *meta = pblk_get_meta(pblk, + meta_lba_list, j); u64 lba = lba_list[i]; u64 meta_lba; if (lba == ADDR_EMPTY) continue; - meta_lba = le64_to_cpu(meta_lba_list[j].lba); + meta_lba = le64_to_cpu(meta->lba); if (lba != meta_lba) { #ifdef CONFIG_NVM_PBLK_DEBUG @@ -216,15 +228,15 @@ static void pblk_end_partial_read(struct nvm_rq *rqd) struct pblk *pblk = rqd->private; struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); struct pblk_pr_ctx *pr_ctx = r_ctx->private; + struct pblk_sec_meta *meta; struct bio *new_bio = rqd->bio; struct bio *bio = pr_ctx->orig_bio; struct bio_vec src_bv, dst_bv; - struct pblk_sec_meta *meta_list = rqd->meta_list; + void *meta_list = rqd->meta_list; int bio_init_idx = pr_ctx->bio_init_idx; unsigned long *read_bitmap = pr_ctx->bitmap; int nr_secs = pr_ctx->orig_nr_secs; int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs); - __le64 *lba_list_mem, *lba_list_media; void *src_p, *dst_p; int hole, i; @@ -237,13 +249,10 @@ static void pblk_end_partial_read(struct nvm_rq *rqd) rqd->ppa_list[0] = ppa; } - /* Re-use allocated memory for intermediate lbas */ - lba_list_mem = (((void *)rqd->ppa_list) + pblk_dma_ppa_size); - lba_list_media = (((void *)rqd->ppa_list) + 2 * pblk_dma_ppa_size); - for (i = 0; i < nr_secs; i++) { - lba_list_media[i] = meta_list[i].lba; - meta_list[i].lba = lba_list_mem[i]; + meta = pblk_get_meta(pblk, meta_list, i); + pr_ctx->lba_list_media[i] = le64_to_cpu(meta->lba); + meta->lba = cpu_to_le64(pr_ctx->lba_list_mem[i]); } /* Fill the holes in the original bio */ @@ -255,7 +264,8 @@ static void pblk_end_partial_read(struct nvm_rq *rqd) line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]); kref_put(&line->ref, pblk_line_put); - meta_list[hole].lba = lba_list_media[i]; + meta = pblk_get_meta(pblk, meta_list, hole); + meta->lba = cpu_to_le64(pr_ctx->lba_list_media[i]); src_bv = new_bio->bi_io_vec[i++]; dst_bv = bio->bi_io_vec[bio_init_idx + hole]; @@ -291,17 +301,13 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd, unsigned long *read_bitmap, int nr_holes) { - struct pblk_sec_meta *meta_list = rqd->meta_list; + void *meta_list = rqd->meta_list; struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); struct pblk_pr_ctx *pr_ctx; struct bio *new_bio, *bio = r_ctx->private; - __le64 *lba_list_mem; int nr_secs = rqd->nr_ppas; int i; - /* Re-use allocated memory for intermediate lbas */ - lba_list_mem = (((void *)rqd->ppa_list) + pblk_dma_ppa_size); - new_bio = bio_alloc(GFP_KERNEL, nr_holes); if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes)) @@ -312,12 +318,15 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd, goto fail_free_pages; } - pr_ctx = kmalloc(sizeof(struct pblk_pr_ctx), GFP_KERNEL); + pr_ctx = kzalloc(sizeof(struct pblk_pr_ctx), GFP_KERNEL); if (!pr_ctx) goto fail_free_pages; - for (i = 0; i < nr_secs; i++) - lba_list_mem[i] = meta_list[i].lba; + for (i = 0; i < nr_secs; i++) { + struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i); + + pr_ctx->lba_list_mem[i] = le64_to_cpu(meta->lba); + } new_bio->bi_iter.bi_sector = 0; /* internal bio */ bio_set_op_attrs(new_bio, REQ_OP_READ, 0); @@ -325,7 +334,6 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd, rqd->bio = new_bio; rqd->nr_ppas = nr_holes; - pr_ctx->ppa_ptr = NULL; pr_ctx->orig_bio = bio; bitmap_copy(pr_ctx->bitmap, read_bitmap, NVM_MAX_VLBA); pr_ctx->bio_init_idx = bio_init_idx; @@ -383,7 +391,7 @@ err: static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio, sector_t lba, unsigned long *read_bitmap) { - struct pblk_sec_meta *meta_list = rqd->meta_list; + struct pblk_sec_meta *meta = pblk_get_meta(pblk, rqd->meta_list, 0); struct ppa_addr ppa; pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); @@ -394,8 +402,10 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio, retry: if (pblk_ppa_empty(ppa)) { + __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); + WARN_ON(test_and_set_bit(0, read_bitmap)); - meta_list[0].lba = cpu_to_le64(ADDR_EMPTY); + meta->lba = addr_empty; return; } @@ -409,7 +419,7 @@ retry: } WARN_ON(test_and_set_bit(0, read_bitmap)); - meta_list[0].lba = cpu_to_le64(lba); + meta->lba = cpu_to_le64(lba); #ifdef CONFIG_NVM_PBLK_DEBUG atomic_long_inc(&pblk->cache_reads); diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index 5740b7509bd8..3fcf062d752c 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -13,6 +13,9 @@ * General Public License for more details. * * pblk-recovery.c - pblk's recovery path + * + * The L2P recovery path is single threaded as the L2P table is updated in order + * following the line sequence ID. */ #include "pblk.h" @@ -124,7 +127,7 @@ static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line) struct pblk_recov_alloc { struct ppa_addr *ppa_list; - struct pblk_sec_meta *meta_list; + void *meta_list; struct nvm_rq *rqd; void *data; dma_addr_t dma_ppa_list; @@ -158,7 +161,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line, { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - struct pblk_sec_meta *meta_list; + void *meta_list; struct pblk_pad_rq *pad_rq; struct nvm_rq *rqd; struct bio *bio; @@ -188,7 +191,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line, kref_init(&pad_rq->ref); next_pad_rq: - rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); + rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false); if (rq_ppas < pblk->min_write_pgs) { pblk_err(pblk, "corrupted pad line %d\n", line->id); goto fail_free_pad; @@ -237,12 +240,15 @@ next_pad_rq: for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) { struct ppa_addr dev_ppa; + struct pblk_sec_meta *meta; __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); pblk_map_invalidate(pblk, dev_ppa); - lba_list[w_ptr] = meta_list[i].lba = addr_empty; + lba_list[w_ptr] = addr_empty; + meta = pblk_get_meta(pblk, meta_list, i); + meta->lba = addr_empty; rqd->ppa_list[i] = dev_ppa; } } @@ -334,20 +340,21 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line, struct pblk_recov_alloc p) { struct nvm_tgt_dev *dev = pblk->dev; + struct pblk_line_meta *lm = &pblk->lm; struct nvm_geo *geo = &dev->geo; struct ppa_addr *ppa_list; - struct pblk_sec_meta *meta_list; + void *meta_list; struct nvm_rq *rqd; struct bio *bio; void *data; dma_addr_t dma_ppa_list, dma_meta_list; __le64 *lba_list; - u64 paddr = 0; + u64 paddr = pblk_line_smeta_start(pblk, line) + lm->smeta_sec; bool padded = false; int rq_ppas, rq_len; int i, j; int ret; - u64 left_ppas = pblk_sec_in_open_line(pblk, line); + u64 left_ppas = pblk_sec_in_open_line(pblk, line) - lm->smeta_sec; if (pblk_line_wp_is_unbalanced(pblk, line)) pblk_warn(pblk, "recovering unbalanced line (%d)\n", line->id); @@ -364,17 +371,19 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line, next_rq: memset(rqd, 0, pblk_g_rq_size); - rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); + rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false); if (!rq_ppas) rq_ppas = pblk->min_write_pgs; rq_len = rq_ppas * geo->csecs; +retry_rq: bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); if (IS_ERR(bio)) return PTR_ERR(bio); bio->bi_iter.bi_sector = 0; /* internal bio */ bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio_get(bio); rqd->bio = bio; rqd->opcode = NVM_OP_PREAD; @@ -387,7 +396,6 @@ next_rq: if (pblk_io_aligned(pblk, rq_ppas)) rqd->is_seq = 1; -retry_rq: for (i = 0; i < rqd->nr_ppas; ) { struct ppa_addr ppa; int pos; @@ -410,6 +418,7 @@ retry_rq: if (ret) { pblk_err(pblk, "I/O submission failed: %d\n", ret); bio_put(bio); + bio_put(bio); return ret; } @@ -421,20 +430,28 @@ retry_rq: if (padded) { pblk_log_read_err(pblk, rqd); + bio_put(bio); return -EINTR; } pad_distance = pblk_pad_distance(pblk, line); ret = pblk_recov_pad_line(pblk, line, pad_distance); - if (ret) + if (ret) { + bio_put(bio); return ret; + } padded = true; + bio_put(bio); goto retry_rq; } + pblk_get_packed_meta(pblk, rqd); + bio_put(bio); + for (i = 0; i < rqd->nr_ppas; i++) { - u64 lba = le64_to_cpu(meta_list[i].lba); + struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i); + u64 lba = le64_to_cpu(meta->lba); lba_list[paddr++] = cpu_to_le64(lba); @@ -463,7 +480,7 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line) struct nvm_geo *geo = &dev->geo; struct nvm_rq *rqd; struct ppa_addr *ppa_list; - struct pblk_sec_meta *meta_list; + void *meta_list; struct pblk_recov_alloc p; void *data; dma_addr_t dma_ppa_list, dma_meta_list; @@ -473,8 +490,8 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line) if (!meta_list) return -ENOMEM; - ppa_list = (void *)(meta_list) + pblk_dma_meta_size; - dma_ppa_list = dma_meta_list + pblk_dma_meta_size; + ppa_list = (void *)(meta_list) + pblk_dma_meta_size(pblk); + dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk); data = kcalloc(pblk->max_write_pgs, geo->csecs, GFP_KERNEL); if (!data) { @@ -804,7 +821,6 @@ next: WARN_ON_ONCE(!test_and_clear_bit(meta_line, &l_mg->meta_bitmap)); spin_unlock(&l_mg->free_lock); - pblk_line_replace_data(pblk); } else { spin_lock(&l_mg->free_lock); /* Allocate next line for preparation */ diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c index db55a1c89997..76116d5f78e4 100644 --- a/drivers/lightnvm/pblk-rl.c +++ b/drivers/lightnvm/pblk-rl.c @@ -214,11 +214,10 @@ void pblk_rl_init(struct pblk_rl *rl, int budget) struct nvm_geo *geo = &dev->geo; struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_meta *lm = &pblk->lm; - int min_blocks = lm->blk_per_line * PBLK_GC_RSV_LINE; int sec_meta, blk_meta; - unsigned int rb_windows; + /* Consider sectors used for metadata */ sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines; blk_meta = DIV_ROUND_UP(sec_meta, geo->clba); @@ -226,7 +225,7 @@ void pblk_rl_init(struct pblk_rl *rl, int budget) rl->high = pblk->op_blks - blk_meta - lm->blk_per_line; rl->high_pw = get_count_order(rl->high); - rl->rsv_blocks = min_blocks; + rl->rsv_blocks = pblk_get_min_chks(pblk); /* This will always be a power-of-2 */ rb_windows = budget / NVM_MAX_VLBA; diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c index 2d2818155aa8..7d8958df9472 100644 --- a/drivers/lightnvm/pblk-sysfs.c +++ b/drivers/lightnvm/pblk-sysfs.c @@ -479,6 +479,13 @@ static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk, if (kstrtouint(page, 0, &sec_per_write)) return -EINVAL; + if (!pblk_is_oob_meta_supported(pblk)) { + /* For packed metadata case it is + * not allowed to change sec_per_write. + */ + return -EINVAL; + } + if (sec_per_write < pblk->min_write_pgs || sec_per_write > pblk->max_write_pgs || sec_per_write % pblk->min_write_pgs != 0) diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index fa8726493b39..06d56deb645d 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -105,14 +105,20 @@ retry: } /* Map remaining sectors in chunk, starting from ppa */ -static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa) +static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa, + int rqd_ppas) { struct pblk_line *line; struct ppa_addr map_ppa = *ppa; + __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); + __le64 *lba_list; u64 paddr; int done = 0; + int n = 0; line = pblk_ppa_to_line(pblk, *ppa); + lba_list = emeta_to_lbas(pblk, line->emeta->buf); + spin_lock(&line->lock); while (!done) { @@ -121,10 +127,17 @@ static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa) if (!test_and_set_bit(paddr, line->map_bitmap)) line->left_msecs--; + if (n < rqd_ppas && lba_list[paddr] != addr_empty) + line->nr_valid_lbas--; + + lba_list[paddr] = addr_empty; + if (!test_and_set_bit(paddr, line->invalid_bitmap)) le32_add_cpu(line->vsc, -1); done = nvm_next_ppa_in_chk(pblk->dev, &map_ppa); + + n++; } line->w_err_gc->has_write_err = 1; @@ -148,9 +161,11 @@ static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry, w_ctx = &entry->w_ctx; /* Check if the lba has been overwritten */ - ppa_l2p = pblk_trans_map_get(pblk, w_ctx->lba); - if (!pblk_ppa_comp(ppa_l2p, entry->cacheline)) - w_ctx->lba = ADDR_EMPTY; + if (w_ctx->lba != ADDR_EMPTY) { + ppa_l2p = pblk_trans_map_get(pblk, w_ctx->lba); + if (!pblk_ppa_comp(ppa_l2p, entry->cacheline)) + w_ctx->lba = ADDR_EMPTY; + } /* Mark up the entry as submittable again */ flags = READ_ONCE(w_ctx->flags); @@ -200,7 +215,7 @@ static void pblk_submit_rec(struct work_struct *work) pblk_log_write_err(pblk, rqd); - pblk_map_remaining(pblk, ppa_list); + pblk_map_remaining(pblk, ppa_list, rqd->nr_ppas); pblk_queue_resubmit(pblk, c_ctx); pblk_up_rq(pblk, c_ctx->lun_bitmap); @@ -319,12 +334,13 @@ static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd, } if (likely(!e_line || !atomic_read(&e_line->left_eblks))) - pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, 0); + ret = pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, + valid, 0); else - pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, + ret = pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, erase_ppa); - return 0; + return ret; } static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail, @@ -332,7 +348,7 @@ static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail, { int secs_to_sync; - secs_to_sync = pblk_calc_secs(pblk, secs_avail, secs_to_flush); + secs_to_sync = pblk_calc_secs(pblk, secs_avail, secs_to_flush, true); #ifdef CONFIG_NVM_PBLK_DEBUG if ((!secs_to_sync && secs_to_flush) @@ -548,15 +564,17 @@ static void pblk_free_write_rqd(struct pblk *pblk, struct nvm_rq *rqd) c_ctx->nr_padded); } -static int pblk_submit_write(struct pblk *pblk) +static int pblk_submit_write(struct pblk *pblk, int *secs_left) { struct bio *bio; struct nvm_rq *rqd; unsigned int secs_avail, secs_to_sync, secs_to_com; - unsigned int secs_to_flush; + unsigned int secs_to_flush, packed_meta_pgs; unsigned long pos; unsigned int resubmit; + *secs_left = 0; + spin_lock(&pblk->resubmit_lock); resubmit = !list_empty(&pblk->resubmit_list); spin_unlock(&pblk->resubmit_lock); @@ -586,17 +604,17 @@ static int pblk_submit_write(struct pblk *pblk) */ secs_avail = pblk_rb_read_count(&pblk->rwb); if (!secs_avail) - return 1; + return 0; secs_to_flush = pblk_rb_flush_point_count(&pblk->rwb); - if (!secs_to_flush && secs_avail < pblk->min_write_pgs) - return 1; + if (!secs_to_flush && secs_avail < pblk->min_write_pgs_data) + return 0; secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail, secs_to_flush); if (secs_to_sync > pblk->max_write_pgs) { pblk_err(pblk, "bad buffer sync calculation\n"); - return 1; + return 0; } secs_to_com = (secs_to_sync > secs_avail) ? @@ -604,7 +622,8 @@ static int pblk_submit_write(struct pblk *pblk) pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com); } - bio = bio_alloc(GFP_KERNEL, secs_to_sync); + packed_meta_pgs = (pblk->min_write_pgs - pblk->min_write_pgs_data); + bio = bio_alloc(GFP_KERNEL, secs_to_sync + packed_meta_pgs); bio->bi_iter.bi_sector = 0; /* internal bio */ bio_set_op_attrs(bio, REQ_OP_WRITE, 0); @@ -625,6 +644,7 @@ static int pblk_submit_write(struct pblk *pblk) atomic_long_add(secs_to_sync, &pblk->sub_writes); #endif + *secs_left = 1; return 0; fail_free_bio: @@ -633,16 +653,22 @@ fail_put_bio: bio_put(bio); pblk_free_rqd(pblk, rqd, PBLK_WRITE); - return 1; + return -EINTR; } int pblk_write_ts(void *data) { struct pblk *pblk = data; + int secs_left; + int write_failure = 0; while (!kthread_should_stop()) { - if (!pblk_submit_write(pblk)) - continue; + if (!write_failure) { + write_failure = pblk_submit_write(pblk, &secs_left); + + if (secs_left) + continue; + } set_current_state(TASK_INTERRUPTIBLE); io_schedule(); } diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 02bb2e98f8a9..85e38ed62f85 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -104,7 +104,6 @@ enum { PBLK_RL_LOW = 4 }; -#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * NVM_MAX_VLBA) #define pblk_dma_ppa_size (sizeof(u64) * NVM_MAX_VLBA) /* write buffer completion context */ @@ -132,6 +131,8 @@ struct pblk_pr_ctx { unsigned int bio_init_idx; void *ppa_ptr; dma_addr_t dma_ppa_list; + __le64 lba_list_mem[NVM_MAX_VLBA]; + __le64 lba_list_media[NVM_MAX_VLBA]; }; /* Pad context */ @@ -631,7 +632,9 @@ struct pblk { int state; /* pblk line state */ int min_write_pgs; /* Minimum amount of pages required by controller */ + int min_write_pgs_data; /* Minimum amount of payload pages */ int max_write_pgs; /* Maximum amount of pages supported by controller */ + int oob_meta_size; /* Size of OOB sector metadata */ sector_t capacity; /* Device capacity when bad blocks are subtracted */ @@ -836,7 +839,7 @@ void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs); u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs); u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs); int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail, - unsigned long secs_to_flush); + unsigned long secs_to_flush, bool skip_meta); void pblk_down_rq(struct pblk *pblk, struct ppa_addr ppa, unsigned long *lun_bitmap); void pblk_down_chunk(struct pblk *pblk, struct ppa_addr ppa); @@ -860,6 +863,8 @@ void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, u64 *lba_list, int nr_secs); void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, sector_t blba, int nr_secs); +void *pblk_get_meta_for_writes(struct pblk *pblk, struct nvm_rq *rqd); +void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd); /* * pblk user I/O write path @@ -871,10 +876,10 @@ int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq); /* * pblk map */ -void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, +int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, unsigned long *lun_bitmap, unsigned int valid_secs, struct ppa_addr *erase_ppa); -void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, +int pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry, unsigned long *lun_bitmap, unsigned int valid_secs, unsigned int off); @@ -905,7 +910,6 @@ int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta); #define PBLK_GC_MAX_READERS 8 /* Max number of outstanding GC reader jobs */ #define PBLK_GC_RQ_QD 128 /* Queue depth for inflight GC requests */ #define PBLK_GC_L_QD 4 /* Queue depth for inflight GC lines */ -#define PBLK_GC_RSV_LINE 1 /* Reserved lines for GC */ int pblk_gc_init(struct pblk *pblk); void pblk_gc_exit(struct pblk *pblk, bool graceful); @@ -1370,4 +1374,33 @@ static inline char *pblk_disk_name(struct pblk *pblk) return disk->disk_name; } + +static inline unsigned int pblk_get_min_chks(struct pblk *pblk) +{ + struct pblk_line_meta *lm = &pblk->lm; + /* In a worst-case scenario every line will have OP invalid sectors. + * We will then need a minimum of 1/OP lines to free up a single line + */ + + return DIV_ROUND_UP(100, pblk->op) * lm->blk_per_line; +} + +static inline struct pblk_sec_meta *pblk_get_meta(struct pblk *pblk, + void *meta, int index) +{ + return meta + + max_t(int, sizeof(struct pblk_sec_meta), pblk->oob_meta_size) + * index; +} + +static inline int pblk_dma_meta_size(struct pblk *pblk) +{ + return max_t(int, sizeof(struct pblk_sec_meta), pblk->oob_meta_size) + * NVM_MAX_VLBA; +} + +static inline int pblk_is_oob_meta_supported(struct pblk *pblk) +{ + return pblk->oob_meta_size >= sizeof(struct pblk_sec_meta); +} #endif /* PBLK_H_ */ |