aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/lightnvm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-03 10:34:51 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-03 10:34:51 -0700
commitc6b1e36c8fa04a6680c44fe0321d0370400e90b6 (patch)
tree5110f0639bfa803baa8d213cb21efe37beeaf742 /drivers/lightnvm
parent81e3e044897b0875a52953b3fb6241a33428e4f9 (diff)
parenta84ebb837b419787c2ece74efa566c998929cead (diff)
downloadkernel_replicant_linux-c6b1e36c8fa04a6680c44fe0321d0370400e90b6.tar.gz
kernel_replicant_linux-c6b1e36c8fa04a6680c44fe0321d0370400e90b6.tar.bz2
kernel_replicant_linux-c6b1e36c8fa04a6680c44fe0321d0370400e90b6.zip
Merge branch 'for-4.13/block' of git://git.kernel.dk/linux-block
Pull core block/IO updates from Jens Axboe: "This is the main pull request for the block layer for 4.13. Not a huge round in terms of features, but there's a lot of churn related to some core cleanups. Note this depends on the UUID tree pull request, that Christoph already sent out. This pull request contains: - A series from Christoph, unifying the error/stats codes in the block layer. We now use blk_status_t everywhere, instead of using different schemes for different places. - Also from Christoph, some cleanups around request allocation and IO scheduler interactions in blk-mq. - And yet another series from Christoph, cleaning up how we handle and do bounce buffering in the block layer. - A blk-mq debugfs series from Bart, further improving on the support we have for exporting internal information to aid debugging IO hangs or stalls. - Also from Bart, a series that cleans up the request initialization differences across types of devices. - A series from Goldwyn Rodrigues, allowing the block layer to return failure if we will block and the user asked for non-blocking. - Patch from Hannes for supporting setting loop devices block size to that of the underlying device. - Two series of patches from Javier, fixing various issues with lightnvm, particular around pblk. - A series from me, adding support for write hints. This comes with NVMe support as well, so applications can help guide data placement on flash to improve performance, latencies, and write amplification. - A series from Ming, improving and hardening blk-mq support for stopping/starting and quiescing hardware queues. - Two pull requests for NVMe updates. Nothing major on the feature side, but lots of cleanups and bug fixes. From the usual crew. - A series from Neil Brown, greatly improving the bio rescue set support. Most notably, this kills the bio rescue work queues, if we don't really need them. - Lots of other little bug fixes that are all over the place" * 'for-4.13/block' of git://git.kernel.dk/linux-block: (217 commits) lightnvm: pblk: set line bitmap check under debug lightnvm: pblk: verify that cache read is still valid lightnvm: pblk: add initialization check lightnvm: pblk: remove target using async. I/Os lightnvm: pblk: use vmalloc for GC data buffer lightnvm: pblk: use right metadata buffer for recovery lightnvm: pblk: schedule if data is not ready lightnvm: pblk: remove unused return variable lightnvm: pblk: fix double-free on pblk init lightnvm: pblk: fix bad le64 assignations nvme: Makefile: remove dead build rule blk-mq: map all HWQ also in hyperthreaded system nvmet-rdma: register ib_client to not deadlock in device removal nvme_fc: fix error recovery on link down. nvmet_fc: fix crashes on bad opcodes nvme_fc: Fix crash when nvme controller connection fails. nvme_fc: replace ioabort msleep loop with completion nvme_fc: fix double calls to nvme_cleanup_cmd() nvme-fabrics: verify that a controller returns the correct NQN nvme: simplify nvme_dev_attrs_are_visible ...
Diffstat (limited to 'drivers/lightnvm')
-rw-r--r--drivers/lightnvm/core.c13
-rw-r--r--drivers/lightnvm/pblk-cache.c8
-rw-r--r--drivers/lightnvm/pblk-core.c617
-rw-r--r--drivers/lightnvm/pblk-gc.c475
-rw-r--r--drivers/lightnvm/pblk-init.c389
-rw-r--r--drivers/lightnvm/pblk-map.c75
-rw-r--r--drivers/lightnvm/pblk-rb.c106
-rw-r--r--drivers/lightnvm/pblk-read.c93
-rw-r--r--drivers/lightnvm/pblk-recovery.c275
-rw-r--r--drivers/lightnvm/pblk-rl.c90
-rw-r--r--drivers/lightnvm/pblk-sysfs.c94
-rw-r--r--drivers/lightnvm/pblk-write.c353
-rw-r--r--drivers/lightnvm/pblk.h296
-rw-r--r--drivers/lightnvm/rrpc.c10
14 files changed, 1925 insertions, 969 deletions
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 6a4aa608ad95..ddae430b6eae 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -252,8 +252,9 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
}
mutex_unlock(&dev->mlock);
- if (nvm_reserve_luns(dev, s->lun_begin, s->lun_end))
- return -ENOMEM;
+ ret = nvm_reserve_luns(dev, s->lun_begin, s->lun_end);
+ if (ret)
+ return ret;
t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL);
if (!t) {
@@ -640,6 +641,7 @@ EXPORT_SYMBOL(nvm_max_phys_sects);
int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
{
struct nvm_dev *dev = tgt_dev->parent;
+ int ret;
if (!dev->ops->submit_io)
return -ENODEV;
@@ -647,7 +649,12 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
nvm_rq_tgt_to_dev(tgt_dev, rqd);
rqd->dev = tgt_dev;
- return dev->ops->submit_io(dev, rqd);
+
+ /* In case of error, fail with right address format */
+ ret = dev->ops->submit_io(dev, rqd);
+ if (ret)
+ nvm_rq_dev_to_tgt(tgt_dev, rqd);
+ return ret;
}
EXPORT_SYMBOL(nvm_submit_io);
diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c
index 59bcea88db84..024a8fc93069 100644
--- a/drivers/lightnvm/pblk-cache.c
+++ b/drivers/lightnvm/pblk-cache.c
@@ -31,9 +31,13 @@ int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags)
*/
retry:
ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos);
- if (ret == NVM_IO_REQUEUE) {
+ switch (ret) {
+ case NVM_IO_REQUEUE:
io_schedule();
goto retry;
+ case NVM_IO_ERR:
+ pblk_pipeline_stop(pblk);
+ goto out;
}
if (unlikely(!bio_has_data(bio)))
@@ -58,6 +62,8 @@ retry:
atomic_long_add(nr_entries, &pblk->req_writes);
#endif
+ pblk_rl_inserted(&pblk->rl, nr_entries);
+
out:
pblk_write_should_kick(pblk);
return ret;
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 5e44768ccffa..11fe0c5b2a9c 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -17,7 +17,6 @@
*/
#include "pblk.h"
-#include <linux/time.h>
static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
struct ppa_addr *ppa)
@@ -34,7 +33,7 @@ static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n",
line->id, pos);
- pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb);
+ pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb, pblk->bb_wq);
}
static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
@@ -54,6 +53,8 @@ static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
*ppa = rqd->ppa_addr;
pblk_mark_bb(pblk, line, ppa);
}
+
+ atomic_dec(&pblk->inflight_io);
}
/* Erase completion assumes that only one block is erased at the time */
@@ -61,13 +62,12 @@ static void pblk_end_io_erase(struct nvm_rq *rqd)
{
struct pblk *pblk = rqd->private;
- up(&pblk->erase_sem);
__pblk_end_io_erase(pblk, rqd);
- mempool_free(rqd, pblk->r_rq_pool);
+ mempool_free(rqd, pblk->g_rq_pool);
}
-static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
- u64 paddr)
+void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
+ u64 paddr)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct list_head *move_list = NULL;
@@ -88,7 +88,7 @@ static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
spin_unlock(&line->lock);
return;
}
- line->vsc--;
+ le32_add_cpu(line->vsc, -1);
if (line->state == PBLK_LINESTATE_CLOSED)
move_list = pblk_line_gc_list(pblk, line);
@@ -130,18 +130,6 @@ void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa)
__pblk_map_invalidate(pblk, line, paddr);
}
-void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
- u64 paddr)
-{
- __pblk_map_invalidate(pblk, line, paddr);
-
- pblk_rb_sync_init(&pblk->rwb, NULL);
- line->left_ssecs--;
- if (!line->left_ssecs)
- pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
- pblk_rb_sync_end(&pblk->rwb, NULL);
-}
-
static void pblk_invalidate_range(struct pblk *pblk, sector_t slba,
unsigned int nr_secs)
{
@@ -172,8 +160,8 @@ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw)
pool = pblk->w_rq_pool;
rq_size = pblk_w_rq_size;
} else {
- pool = pblk->r_rq_pool;
- rq_size = pblk_r_rq_size;
+ pool = pblk->g_rq_pool;
+ rq_size = pblk_g_rq_size;
}
rqd = mempool_alloc(pool, GFP_KERNEL);
@@ -189,7 +177,7 @@ void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw)
if (rw == WRITE)
pool = pblk->w_rq_pool;
else
- pool = pblk->r_rq_pool;
+ pool = pblk->g_rq_pool;
mempool_free(rqd, pool);
}
@@ -271,35 +259,26 @@ void pblk_end_io_sync(struct nvm_rq *rqd)
complete(waiting);
}
-void pblk_flush_writer(struct pblk *pblk)
+void pblk_wait_for_meta(struct pblk *pblk)
{
- struct bio *bio;
- int ret;
- DECLARE_COMPLETION_ONSTACK(wait);
-
- bio = bio_alloc(GFP_KERNEL, 1);
- if (!bio)
- return;
-
- bio->bi_iter.bi_sector = 0; /* internal bio */
- bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_OP_FLUSH);
- bio->bi_private = &wait;
- bio->bi_end_io = pblk_end_bio_sync;
+ do {
+ if (!atomic_read(&pblk->inflight_io))
+ break;
- ret = pblk_write_to_cache(pblk, bio, 0);
- if (ret == NVM_IO_OK) {
- if (!wait_for_completion_io_timeout(&wait,
- msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
- pr_err("pblk: flush cache timed out\n");
- }
- } else if (ret != NVM_IO_DONE) {
- pr_err("pblk: tear down bio failed\n");
- }
+ schedule();
+ } while (1);
+}
- if (bio->bi_error)
- pr_err("pblk: flush sync write failed (%u)\n", bio->bi_error);
+static void pblk_flush_writer(struct pblk *pblk)
+{
+ pblk_rb_flush(&pblk->rwb);
+ do {
+ if (!pblk_rb_sync_count(&pblk->rwb))
+ break;
- bio_put(bio);
+ pblk_write_kick(pblk);
+ schedule();
+ } while (1);
}
struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
@@ -307,28 +286,31 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct list_head *move_list = NULL;
+ int vsc = le32_to_cpu(*line->vsc);
- if (!line->vsc) {
+ lockdep_assert_held(&line->lock);
+
+ if (!vsc) {
if (line->gc_group != PBLK_LINEGC_FULL) {
line->gc_group = PBLK_LINEGC_FULL;
move_list = &l_mg->gc_full_list;
}
- } else if (line->vsc < lm->mid_thrs) {
+ } else if (vsc < lm->high_thrs) {
if (line->gc_group != PBLK_LINEGC_HIGH) {
line->gc_group = PBLK_LINEGC_HIGH;
move_list = &l_mg->gc_high_list;
}
- } else if (line->vsc < lm->high_thrs) {
+ } else if (vsc < lm->mid_thrs) {
if (line->gc_group != PBLK_LINEGC_MID) {
line->gc_group = PBLK_LINEGC_MID;
move_list = &l_mg->gc_mid_list;
}
- } else if (line->vsc < line->sec_in_line) {
+ } else if (vsc < line->sec_in_line) {
if (line->gc_group != PBLK_LINEGC_LOW) {
line->gc_group = PBLK_LINEGC_LOW;
move_list = &l_mg->gc_low_list;
}
- } else if (line->vsc == line->sec_in_line) {
+ } else if (vsc == line->sec_in_line) {
if (line->gc_group != PBLK_LINEGC_EMPTY) {
line->gc_group = PBLK_LINEGC_EMPTY;
move_list = &l_mg->gc_empty_list;
@@ -338,7 +320,7 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
line->gc_group = PBLK_LINEGC_NONE;
move_list = &l_mg->corrupt_list;
pr_err("pblk: corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n",
- line->id, line->vsc,
+ line->id, vsc,
line->sec_in_line,
lm->high_thrs, lm->mid_thrs);
}
@@ -397,6 +379,11 @@ void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd)
#endif
}
+void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write)
+{
+ pblk->sec_per_write = sec_per_write;
+}
+
int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
{
struct nvm_tgt_dev *dev = pblk->dev;
@@ -431,21 +418,23 @@ int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
}
}
#endif
+
+ atomic_inc(&pblk->inflight_io);
+
return nvm_submit_io(dev, rqd);
}
struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
unsigned int nr_secs, unsigned int len,
- gfp_t gfp_mask)
+ int alloc_type, gfp_t gfp_mask)
{
struct nvm_tgt_dev *dev = pblk->dev;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
void *kaddr = data;
struct page *page;
struct bio *bio;
int i, ret;
- if (l_mg->emeta_alloc_type == PBLK_KMALLOC_META)
+ if (alloc_type == PBLK_KMALLOC_META)
return bio_map_kern(dev->q, kaddr, len, gfp_mask);
bio = bio_kmalloc(gfp_mask, nr_secs);
@@ -478,7 +467,7 @@ out:
int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
unsigned long secs_to_flush)
{
- int max = pblk->max_write_pgs;
+ int max = pblk->sec_per_write;
int min = pblk->min_write_pgs;
int secs_to_sync = 0;
@@ -492,12 +481,26 @@ int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
return secs_to_sync;
}
-static u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line,
- int nr_secs)
+void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
+{
+ u64 addr;
+ int i;
+
+ addr = find_next_zero_bit(line->map_bitmap,
+ pblk->lm.sec_per_line, line->cur_sec);
+ line->cur_sec = addr - nr_secs;
+
+ for (i = 0; i < nr_secs; i++, line->cur_sec--)
+ WARN_ON(!test_and_clear_bit(line->cur_sec, line->map_bitmap));
+}
+
+u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
{
u64 addr;
int i;
+ lockdep_assert_held(&line->lock);
+
/* logic error: ppa out-of-bounds. Prevent generating bad address */
if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) {
WARN(1, "pblk: page allocation out of bounds\n");
@@ -528,27 +531,38 @@ u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
return addr;
}
+u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line)
+{
+ u64 paddr;
+
+ spin_lock(&line->lock);
+ paddr = find_next_zero_bit(line->map_bitmap,
+ pblk->lm.sec_per_line, line->cur_sec);
+ spin_unlock(&line->lock);
+
+ return paddr;
+}
+
/*
* Submit emeta to one LUN in the raid line at the time to avoid a deadlock when
* taking the per LUN semaphore.
*/
static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
- u64 paddr, int dir)
+ void *emeta_buf, u64 paddr, int dir)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
+ void *ppa_list, *meta_list;
struct bio *bio;
struct nvm_rq rqd;
- struct ppa_addr *ppa_list;
- dma_addr_t dma_ppa_list;
- void *emeta = line->emeta;
+ dma_addr_t dma_ppa_list, dma_meta_list;
int min = pblk->min_write_pgs;
- int left_ppas = lm->emeta_sec;
+ int left_ppas = lm->emeta_sec[0];
int id = line->id;
int rq_ppas, rq_len;
int cmd_op, bio_op;
- int flags;
int i, j;
int ret;
DECLARE_COMPLETION_ONSTACK(wait);
@@ -556,25 +570,28 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
if (dir == WRITE) {
bio_op = REQ_OP_WRITE;
cmd_op = NVM_OP_PWRITE;
- flags = pblk_set_progr_mode(pblk, WRITE);
} else if (dir == READ) {
bio_op = REQ_OP_READ;
cmd_op = NVM_OP_PREAD;
- flags = pblk_set_read_mode(pblk);
} else
return -EINVAL;
- ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_ppa_list);
- if (!ppa_list)
+ meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+ &dma_meta_list);
+ if (!meta_list)
return -ENOMEM;
+ ppa_list = meta_list + pblk_dma_meta_size;
+ dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+
next_rq:
memset(&rqd, 0, sizeof(struct nvm_rq));
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
rq_len = rq_ppas * geo->sec_size;
- bio = pblk_bio_map_addr(pblk, emeta, rq_ppas, rq_len, GFP_KERNEL);
+ bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len,
+ l_mg->emeta_alloc_type, GFP_KERNEL);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
goto free_rqd_dma;
@@ -584,27 +601,38 @@ next_rq:
bio_set_op_attrs(bio, bio_op, 0);
rqd.bio = bio;
- rqd.opcode = cmd_op;
- rqd.flags = flags;
- rqd.nr_ppas = rq_ppas;
+ rqd.meta_list = meta_list;
rqd.ppa_list = ppa_list;
+ rqd.dma_meta_list = dma_meta_list;
rqd.dma_ppa_list = dma_ppa_list;
+ rqd.opcode = cmd_op;
+ rqd.nr_ppas = rq_ppas;
rqd.end_io = pblk_end_io_sync;
rqd.private = &wait;
if (dir == WRITE) {
+ struct pblk_sec_meta *meta_list = rqd.meta_list;
+
+ rqd.flags = pblk_set_progr_mode(pblk, WRITE);
for (i = 0; i < rqd.nr_ppas; ) {
spin_lock(&line->lock);
paddr = __pblk_alloc_page(pblk, line, min);
spin_unlock(&line->lock);
- for (j = 0; j < min; j++, i++, paddr++)
+ for (j = 0; j < min; j++, i++, paddr++) {
+ meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
rqd.ppa_list[i] =
addr_to_gen_ppa(pblk, paddr, id);
+ }
}
} else {
for (i = 0; i < rqd.nr_ppas; ) {
struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id);
int pos = pblk_dev_ppa_to_pos(geo, ppa);
+ int read_type = PBLK_READ_RANDOM;
+
+ if (pblk_io_aligned(pblk, rq_ppas))
+ read_type = PBLK_READ_SEQUENTIAL;
+ rqd.flags = pblk_set_read_mode(pblk, read_type);
while (test_bit(pos, line->blk_bitmap)) {
paddr += min;
@@ -645,9 +673,11 @@ next_rq:
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pr_err("pblk: emeta I/O timed out\n");
}
+ atomic_dec(&pblk->inflight_io);
reinit_completion(&wait);
- bio_put(bio);
+ if (likely(pblk->l_mg.emeta_alloc_type == PBLK_VMALLOC_META))
+ bio_put(bio);
if (rqd.error) {
if (dir == WRITE)
@@ -656,12 +686,12 @@ next_rq:
pblk_log_read_err(pblk, &rqd);
}
- emeta += rq_len;
+ emeta_buf += rq_len;
left_ppas -= rq_ppas;
if (left_ppas)
goto next_rq;
free_rqd_dma:
- nvm_dev_dma_free(dev->parent, ppa_list, dma_ppa_list);
+ nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
return ret;
}
@@ -697,21 +727,24 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
bio_op = REQ_OP_WRITE;
cmd_op = NVM_OP_PWRITE;
flags = pblk_set_progr_mode(pblk, WRITE);
- lba_list = pblk_line_emeta_to_lbas(line->emeta);
+ lba_list = emeta_to_lbas(pblk, line->emeta->buf);
} else if (dir == READ) {
bio_op = REQ_OP_READ;
cmd_op = NVM_OP_PREAD;
- flags = pblk_set_read_mode(pblk);
+ flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
} else
return -EINVAL;
memset(&rqd, 0, sizeof(struct nvm_rq));
- rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
- &rqd.dma_ppa_list);
- if (!rqd.ppa_list)
+ rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+ &rqd.dma_meta_list);
+ if (!rqd.meta_list)
return -ENOMEM;
+ rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
+ rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
+
bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
@@ -729,9 +762,15 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
rqd.private = &wait;
for (i = 0; i < lm->smeta_sec; i++, paddr++) {
+ struct pblk_sec_meta *meta_list = rqd.meta_list;
+
rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
- if (dir == WRITE)
- lba_list[paddr] = cpu_to_le64(ADDR_EMPTY);
+
+ if (dir == WRITE) {
+ __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
+
+ meta_list[i].lba = lba_list[paddr] = addr_empty;
+ }
}
/*
@@ -750,6 +789,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pr_err("pblk: smeta I/O timed out\n");
}
+ atomic_dec(&pblk->inflight_io);
if (rqd.error) {
if (dir == WRITE)
@@ -759,7 +799,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
}
free_ppa_list:
- nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+ nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
return ret;
}
@@ -771,9 +811,11 @@ int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line)
return pblk_line_submit_smeta_io(pblk, line, bpaddr, READ);
}
-int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line)
+int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
+ void *emeta_buf)
{
- return pblk_line_submit_emeta_io(pblk, line, line->emeta_ssec, READ);
+ return pblk_line_submit_emeta_io(pblk, line, emeta_buf,
+ line->emeta_ssec, READ);
}
static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -789,7 +831,7 @@ static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
{
struct nvm_rq rqd;
- int ret;
+ int ret = 0;
DECLARE_COMPLETION_ONSTACK(wait);
memset(&rqd, 0, sizeof(struct nvm_rq));
@@ -824,14 +866,14 @@ out:
rqd.private = pblk;
__pblk_end_io_erase(pblk, &rqd);
- return 0;
+ return ret;
}
int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
{
struct pblk_line_meta *lm = &pblk->lm;
struct ppa_addr ppa;
- int bit = -1;
+ int ret, bit = -1;
/* Erase only good blocks, one at a time */
do {
@@ -850,27 +892,59 @@ int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
WARN_ON(test_and_set_bit(bit, line->erase_bitmap));
spin_unlock(&line->lock);
- if (pblk_blk_erase_sync(pblk, ppa)) {
+ ret = pblk_blk_erase_sync(pblk, ppa);
+ if (ret) {
pr_err("pblk: failed to erase line %d\n", line->id);
- return -ENOMEM;
+ return ret;
}
} while (1);
return 0;
}
+static void pblk_line_setup_metadata(struct pblk_line *line,
+ struct pblk_line_mgmt *l_mg,
+ struct pblk_line_meta *lm)
+{
+ int meta_line;
+
+ lockdep_assert_held(&l_mg->free_lock);
+
+retry_meta:
+ meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
+ if (meta_line == PBLK_DATA_LINES) {
+ spin_unlock(&l_mg->free_lock);
+ io_schedule();
+ spin_lock(&l_mg->free_lock);
+ goto retry_meta;
+ }
+
+ set_bit(meta_line, &l_mg->meta_bitmap);
+ line->meta_line = meta_line;
+
+ line->smeta = l_mg->sline_meta[meta_line];
+ line->emeta = l_mg->eline_meta[meta_line];
+
+ memset(line->smeta, 0, lm->smeta_len);
+ memset(line->emeta->buf, 0, lm->emeta_len[0]);
+
+ line->emeta->mem = 0;
+ atomic_set(&line->emeta->sync, 0);
+}
+
/* For now lines are always assumed full lines. Thus, smeta former and current
* lun bitmaps are omitted.
*/
-static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line,
+static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
struct pblk_line *cur)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct line_smeta *smeta = line->smeta;
- struct line_emeta *emeta = line->emeta;
+ struct pblk_emeta *emeta = line->emeta;
+ struct line_emeta *emeta_buf = emeta->buf;
+ struct line_smeta *smeta_buf = (struct line_smeta *)line->smeta;
int nr_blk_line;
/* After erasing the line, new bad blocks might appear and we risk
@@ -893,42 +967,44 @@ static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line,
}
/* Run-time metadata */
- line->lun_bitmap = ((void *)(smeta)) + sizeof(struct line_smeta);
+ line->lun_bitmap = ((void *)(smeta_buf)) + sizeof(struct line_smeta);
/* Mark LUNs allocated in this line (all for now) */
bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len);
- smeta->header.identifier = cpu_to_le32(PBLK_MAGIC);
- memcpy(smeta->header.uuid, pblk->instance_uuid, 16);
- smeta->header.id = cpu_to_le32(line->id);
- smeta->header.type = cpu_to_le16(line->type);
- smeta->header.version = cpu_to_le16(1);
+ smeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC);
+ memcpy(smeta_buf->header.uuid, pblk->instance_uuid, 16);
+ smeta_buf->header.id = cpu_to_le32(line->id);
+ smeta_buf->header.type = cpu_to_le16(line->type);
+ smeta_buf->header.version = cpu_to_le16(1);
/* Start metadata */
- smeta->seq_nr = cpu_to_le64(line->seq_nr);
- smeta->window_wr_lun = cpu_to_le32(geo->nr_luns);
+ smeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
+ smeta_buf->window_wr_lun = cpu_to_le32(geo->nr_luns);
/* Fill metadata among lines */
if (cur) {
memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len);
- smeta->prev_id = cpu_to_le32(cur->id);
- cur->emeta->next_id = cpu_to_le32(line->id);
+ smeta_buf->prev_id = cpu_to_le32(cur->id);
+ cur->emeta->buf->next_id = cpu_to_le32(line->id);
} else {
- smeta->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
+ smeta_buf->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
}
/* All smeta must be set at this point */
- smeta->header.crc = cpu_to_le32(pblk_calc_meta_header_crc(pblk, smeta));
- smeta->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta));
+ smeta_buf->header.crc = cpu_to_le32(
+ pblk_calc_meta_header_crc(pblk, &smeta_buf->header));
+ smeta_buf->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta_buf));
/* End metadata */
- memcpy(&emeta->header, &smeta->header, sizeof(struct line_header));
- emeta->seq_nr = cpu_to_le64(line->seq_nr);
- emeta->nr_lbas = cpu_to_le64(line->sec_in_line);
- emeta->nr_valid_lbas = cpu_to_le64(0);
- emeta->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
- emeta->crc = cpu_to_le32(0);
- emeta->prev_id = smeta->prev_id;
+ memcpy(&emeta_buf->header, &smeta_buf->header,
+ sizeof(struct line_header));
+ emeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
+ emeta_buf->nr_lbas = cpu_to_le64(line->sec_in_line);
+ emeta_buf->nr_valid_lbas = cpu_to_le64(0);
+ emeta_buf->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
+ emeta_buf->crc = cpu_to_le32(0);
+ emeta_buf->prev_id = smeta_buf->prev_id;
return 1;
}
@@ -965,7 +1041,6 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
/* Mark smeta metadata sectors as bad sectors */
bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
off = bit * geo->sec_per_pl;
-retry_smeta:
bitmap_set(line->map_bitmap, off, lm->smeta_sec);
line->sec_in_line -= lm->smeta_sec;
line->smeta_ssec = off;
@@ -973,8 +1048,7 @@ retry_smeta:
if (init && pblk_line_submit_smeta_io(pblk, line, off, WRITE)) {
pr_debug("pblk: line smeta I/O failed. Retry\n");
- off += geo->sec_per_pl;
- goto retry_smeta;
+ return 1;
}
bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line);
@@ -983,8 +1057,8 @@ retry_smeta:
* blocks to make sure that there are enough sectors to store emeta
*/
bit = lm->sec_per_line;
- off = lm->sec_per_line - lm->emeta_sec;
- bitmap_set(line->invalid_bitmap, off, lm->emeta_sec);
+ off = lm->sec_per_line - lm->emeta_sec[0];
+ bitmap_set(line->invalid_bitmap, off, lm->emeta_sec[0]);
while (nr_bb) {
off -= geo->sec_per_pl;
if (!test_bit(off, line->invalid_bitmap)) {
@@ -993,9 +1067,11 @@ retry_smeta:
}
}
- line->sec_in_line -= lm->emeta_sec;
+ line->sec_in_line -= lm->emeta_sec[0];
line->emeta_ssec = off;
- line->vsc = line->left_ssecs = line->left_msecs = line->sec_in_line;
+ line->nr_valid_lbas = 0;
+ line->left_msecs = line->sec_in_line;
+ *line->vsc = cpu_to_le32(line->sec_in_line);
if (lm->sec_per_line - line->sec_in_line !=
bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) {
@@ -1034,14 +1110,20 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
spin_lock(&line->lock);
if (line->state != PBLK_LINESTATE_FREE) {
+ mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
+ mempool_free(line->map_bitmap, pblk->line_meta_pool);
spin_unlock(&line->lock);
- WARN(1, "pblk: corrupted line state\n");
- return -EINTR;
+ WARN(1, "pblk: corrupted line %d, state %d\n",
+ line->id, line->state);
+ return -EAGAIN;
}
+
line->state = PBLK_LINESTATE_OPEN;
atomic_set(&line->left_eblks, blk_in_line);
atomic_set(&line->left_seblks, blk_in_line);
+
+ line->meta_distance = lm->meta_distance;
spin_unlock(&line->lock);
/* Bad blocks do not need to be erased */
@@ -1091,15 +1173,15 @@ struct pblk_line *pblk_line_get(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line *line = NULL;
- int bit;
+ struct pblk_line *line;
+ int ret, bit;
lockdep_assert_held(&l_mg->free_lock);
-retry_get:
+retry:
if (list_empty(&l_mg->free_list)) {
pr_err("pblk: no free lines\n");
- goto out;
+ return NULL;
}
line = list_first_entry(&l_mg->free_list, struct pblk_line, list);
@@ -1115,16 +1197,22 @@ retry_get:
list_add_tail(&line->list, &l_mg->bad_list);
pr_debug("pblk: line %d is bad\n", line->id);
- goto retry_get;
+ goto retry;
}
- if (pblk_line_prepare(pblk, line)) {
- pr_err("pblk: failed to prepare line %d\n", line->id);
- list_add(&line->list, &l_mg->free_list);
- return NULL;
+ ret = pblk_line_prepare(pblk, line);
+ if (ret) {
+ if (ret == -EAGAIN) {
+ list_add(&line->list, &l_mg->corrupt_list);
+ goto retry;
+ } else {
+ pr_err("pblk: failed to prepare line %d\n", line->id);
+ list_add(&line->list, &l_mg->free_list);
+ l_mg->nr_free_lines++;
+ return NULL;
+ }
}
-out:
return line;
}
@@ -1134,6 +1222,7 @@ static struct pblk_line *pblk_line_retry(struct pblk *pblk,
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *retry_line;
+retry:
spin_lock(&l_mg->free_lock);
retry_line = pblk_line_get(pblk);
if (!retry_line) {
@@ -1150,23 +1239,25 @@ static struct pblk_line *pblk_line_retry(struct pblk *pblk,
l_mg->data_line = retry_line;
spin_unlock(&l_mg->free_lock);
- if (pblk_line_erase(pblk, retry_line)) {
- spin_lock(&l_mg->free_lock);
- l_mg->data_line = NULL;
- spin_unlock(&l_mg->free_lock);
- return NULL;
- }
-
pblk_rl_free_lines_dec(&pblk->rl, retry_line);
+ if (pblk_line_erase(pblk, retry_line))
+ goto retry;
+
return retry_line;
}
+static void pblk_set_space_limit(struct pblk *pblk)
+{
+ struct pblk_rl *rl = &pblk->rl;
+
+ atomic_set(&rl->rb_space, 0);
+}
+
struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *line;
- int meta_line;
int is_next = 0;
spin_lock(&l_mg->free_lock);
@@ -1180,30 +1271,37 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
line->type = PBLK_LINETYPE_DATA;
l_mg->data_line = line;
- meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
- set_bit(meta_line, &l_mg->meta_bitmap);
- line->smeta = l_mg->sline_meta[meta_line].meta;
- line->emeta = l_mg->eline_meta[meta_line].meta;
- line->meta_line = meta_line;
+ pblk_line_setup_metadata(line, l_mg, &pblk->lm);
/* Allocate next line for preparation */
l_mg->data_next = pblk_line_get(pblk);
- if (l_mg->data_next) {
+ if (!l_mg->data_next) {
+ /* If we cannot get a new line, we need to stop the pipeline.
+ * Only allow as many writes in as we can store safely and then
+ * fail gracefully
+ */
+ pblk_set_space_limit(pblk);
+
+ l_mg->data_next = NULL;
+ } else {
l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
l_mg->data_next->type = PBLK_LINETYPE_DATA;
is_next = 1;
}
spin_unlock(&l_mg->free_lock);
+ if (pblk_line_erase(pblk, line)) {
+ line = pblk_line_retry(pblk, line);
+ if (!line)
+ return NULL;
+ }
+
pblk_rl_free_lines_dec(&pblk->rl, line);
if (is_next)
pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
- if (pblk_line_erase(pblk, line))
- return NULL;
-
retry_setup:
- if (!pblk_line_set_metadata(pblk, line, NULL)) {
+ if (!pblk_line_init_metadata(pblk, line, NULL)) {
line = pblk_line_retry(pblk, line);
if (!line)
return NULL;
@@ -1222,69 +1320,89 @@ retry_setup:
return line;
}
-struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
+static void pblk_stop_writes(struct pblk *pblk, struct pblk_line *line)
+{
+ lockdep_assert_held(&pblk->l_mg.free_lock);
+
+ pblk_set_space_limit(pblk);
+ pblk->state = PBLK_STATE_STOPPING;
+}
+
+void pblk_pipeline_stop(struct pblk *pblk)
+{
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ int ret;
+
+ spin_lock(&l_mg->free_lock);
+ if (pblk->state == PBLK_STATE_RECOVERING ||
+ pblk->state == PBLK_STATE_STOPPED) {
+ spin_unlock(&l_mg->free_lock);
+ return;
+ }
+ pblk->state = PBLK_STATE_RECOVERING;
+ spin_unlock(&l_mg->free_lock);
+
+ pblk_flush_writer(pblk);
+ pblk_wait_for_meta(pblk);
+
+ ret = pblk_recov_pad(pblk);
+ if (ret) {
+ pr_err("pblk: could not close data on teardown(%d)\n", ret);
+ return;
+ }
+
+ flush_workqueue(pblk->bb_wq);
+ pblk_line_close_meta_sync(pblk);
+
+ spin_lock(&l_mg->free_lock);
+ pblk->state = PBLK_STATE_STOPPED;
+ l_mg->data_line = NULL;
+ l_mg->data_next = NULL;
+ spin_unlock(&l_mg->free_lock);
+}
+
+void pblk_line_replace_data(struct pblk *pblk)
{
- struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *cur, *new;
unsigned int left_seblks;
- int meta_line;
int is_next = 0;
cur = l_mg->data_line;
new = l_mg->data_next;
if (!new)
- return NULL;
+ return;
l_mg->data_line = new;
-retry_line:
+ spin_lock(&l_mg->free_lock);
+ if (pblk->state != PBLK_STATE_RUNNING) {
+ l_mg->data_line = NULL;
+ l_mg->data_next = NULL;
+ spin_unlock(&l_mg->free_lock);
+ return;
+ }
+
+ pblk_line_setup_metadata(new, l_mg, &pblk->lm);
+ spin_unlock(&l_mg->free_lock);
+
+retry_erase:
left_seblks = atomic_read(&new->left_seblks);
if (left_seblks) {
/* If line is not fully erased, erase it */
if (atomic_read(&new->left_eblks)) {
if (pblk_line_erase(pblk, new))
- return NULL;
+ return;
} else {
io_schedule();
}
- goto retry_line;
+ goto retry_erase;
}
- spin_lock(&l_mg->free_lock);
- /* Allocate next line for preparation */
- l_mg->data_next = pblk_line_get(pblk);
- if (l_mg->data_next) {
- l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
- l_mg->data_next->type = PBLK_LINETYPE_DATA;
- is_next = 1;
- }
-
-retry_meta:
- meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
- if (meta_line == PBLK_DATA_LINES) {
- spin_unlock(&l_mg->free_lock);
- io_schedule();
- spin_lock(&l_mg->free_lock);
- goto retry_meta;
- }
-
- set_bit(meta_line, &l_mg->meta_bitmap);
- new->smeta = l_mg->sline_meta[meta_line].meta;
- new->emeta = l_mg->eline_meta[meta_line].meta;
- new->meta_line = meta_line;
-
- memset(new->smeta, 0, lm->smeta_len);
- memset(new->emeta, 0, lm->emeta_len);
- spin_unlock(&l_mg->free_lock);
-
- if (is_next)
- pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
-
retry_setup:
- if (!pblk_line_set_metadata(pblk, new, cur)) {
+ if (!pblk_line_init_metadata(pblk, new, cur)) {
new = pblk_line_retry(pblk, new);
if (!new)
- return NULL;
+ return;
goto retry_setup;
}
@@ -1292,12 +1410,30 @@ retry_setup:
if (!pblk_line_init_bb(pblk, new, 1)) {
new = pblk_line_retry(pblk, new);
if (!new)
- return NULL;
+ return;
goto retry_setup;
}
- return new;
+ /* Allocate next line for preparation */
+ spin_lock(&l_mg->free_lock);
+ l_mg->data_next = pblk_line_get(pblk);
+ if (!l_mg->data_next) {
+ /* If we cannot get a new line, we need to stop the pipeline.
+ * Only allow as many writes in as we can store safely and then
+ * fail gracefully
+ */
+ pblk_stop_writes(pblk, new);
+ l_mg->data_next = NULL;
+ } else {
+ l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
+ l_mg->data_next->type = PBLK_LINETYPE_DATA;
+ is_next = 1;
+ }
+ spin_unlock(&l_mg->free_lock);
+
+ if (is_next)
+ pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
}
void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
@@ -1307,6 +1443,8 @@ void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
if (line->invalid_bitmap)
mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
+ *line->vsc = cpu_to_le32(EMPTY_ENTRY);
+
line->map_bitmap = NULL;
line->invalid_bitmap = NULL;
line->smeta = NULL;
@@ -1339,8 +1477,8 @@ int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
struct nvm_rq *rqd;
int err;
- rqd = mempool_alloc(pblk->r_rq_pool, GFP_KERNEL);
- memset(rqd, 0, pblk_r_rq_size);
+ rqd = mempool_alloc(pblk->g_rq_pool, GFP_KERNEL);
+ memset(rqd, 0, pblk_g_rq_size);
pblk_setup_e_rq(pblk, rqd, ppa);
@@ -1368,7 +1506,8 @@ struct pblk_line *pblk_line_get_data(struct pblk *pblk)
return pblk->l_mg.data_line;
}
-struct pblk_line *pblk_line_get_data_next(struct pblk *pblk)
+/* For now, always erase next line */
+struct pblk_line *pblk_line_get_erase(struct pblk *pblk)
{
return pblk->l_mg.data_next;
}
@@ -1378,18 +1517,58 @@ int pblk_line_is_full(struct pblk_line *line)
return (line->left_msecs == 0);
}
+void pblk_line_close_meta_sync(struct pblk *pblk)
+{
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_line *line, *tline;
+ LIST_HEAD(list);
+
+ spin_lock(&l_mg->close_lock);
+ if (list_empty(&l_mg->emeta_list)) {
+ spin_unlock(&l_mg->close_lock);
+ return;
+ }
+
+ list_cut_position(&list, &l_mg->emeta_list, l_mg->emeta_list.prev);
+ spin_unlock(&l_mg->close_lock);
+
+ list_for_each_entry_safe(line, tline, &list, list) {
+ struct pblk_emeta *emeta = line->emeta;
+
+ while (emeta->mem < lm->emeta_len[0]) {
+ int ret;
+
+ ret = pblk_submit_meta_io(pblk, line);
+ if (ret) {
+ pr_err("pblk: sync meta line %d failed (%d)\n",
+ line->id, ret);
+ return;
+ }
+ }
+ }
+
+ pblk_wait_for_meta(pblk);
+ flush_workqueue(pblk->close_wq);
+}
+
+static void pblk_line_should_sync_meta(struct pblk *pblk)
+{
+ if (pblk_rl_is_limit(&pblk->rl))
+ pblk_line_close_meta_sync(pblk);
+}
+
void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct list_head *move_list;
- line->emeta->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, line->emeta));
-
- if (pblk_line_submit_emeta_io(pblk, line, line->cur_sec, WRITE))
- pr_err("pblk: line %d close I/O failed\n", line->id);
+#ifdef CONFIG_NVM_DEBUG
+ struct pblk_line_meta *lm = &pblk->lm;
- WARN(!bitmap_full(line->map_bitmap, line->sec_in_line),
+ WARN(!bitmap_full(line->map_bitmap, lm->sec_per_line),
"pblk: corrupt closed line %d\n", line->id);
+#endif
spin_lock(&l_mg->free_lock);
WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap));
@@ -1410,6 +1589,31 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
spin_unlock(&line->lock);
spin_unlock(&l_mg->gc_lock);
+
+ pblk_gc_should_kick(pblk);
+}
+
+void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
+{
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_emeta *emeta = line->emeta;
+ struct line_emeta *emeta_buf = emeta->buf;
+
+ /* No need for exact vsc value; avoid a big line lock and take aprox. */
+ memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len);
+ memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len);
+
+ emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas);
+ emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf));
+
+ spin_lock(&l_mg->close_lock);
+ spin_lock(&line->lock);
+ list_add_tail(&line->list, &l_mg->emeta_list);
+ spin_unlock(&line->lock);
+ spin_unlock(&l_mg->close_lock);
+
+ pblk_line_should_sync_meta(pblk);
}
void pblk_line_close_ws(struct work_struct *work)
@@ -1449,7 +1653,8 @@ void pblk_line_mark_bb(struct work_struct *work)
}
void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
- void (*work)(struct work_struct *))
+ void (*work)(struct work_struct *),
+ struct workqueue_struct *wq)
{
struct pblk_line_ws *line_ws;
@@ -1462,7 +1667,7 @@ void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
line_ws->priv = priv;
INIT_WORK(&line_ws->ws, work);
- queue_work(pblk->kw_wq, &line_ws->ws);
+ queue_work(wq, &line_ws->ws);
}
void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
@@ -1471,7 +1676,7 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_lun *rlun;
- int lun_id = ppa_list[0].g.ch * geo->luns_per_chnl + ppa_list[0].g.lun;
+ int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
int ret;
/*
@@ -1488,10 +1693,10 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
/* If the LUN has been locked for this same request, do no attempt to
* lock it again
*/
- if (test_and_set_bit(lun_id, lun_bitmap))
+ if (test_and_set_bit(pos, lun_bitmap))
return;
- rlun = &pblk->luns[lun_id];
+ rlun = &pblk->luns[pos];
ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
if (ret) {
switch (ret) {
diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
index eaf479c6b63c..6090d28f7995 100644
--- a/drivers/lightnvm/pblk-gc.c
+++ b/drivers/lightnvm/pblk-gc.c
@@ -20,8 +20,7 @@
static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
{
- kfree(gc_rq->data);
- kfree(gc_rq->lba_list);
+ vfree(gc_rq->data);
kfree(gc_rq);
}
@@ -37,10 +36,8 @@ static int pblk_gc_write(struct pblk *pblk)
return 1;
}
- list_for_each_entry_safe(gc_rq, tgc_rq, &gc->w_list, list) {
- list_move_tail(&gc_rq->list, &w_list);
- gc->w_entries--;
- }
+ list_cut_position(&w_list, &gc->w_list, gc->w_list.prev);
+ gc->w_entries = 0;
spin_unlock(&gc->w_lock);
list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
@@ -48,9 +45,8 @@ static int pblk_gc_write(struct pblk *pblk)
gc_rq->nr_secs, gc_rq->secs_to_gc,
gc_rq->line, PBLK_IOTYPE_GC);
- kref_put(&gc_rq->line->ref, pblk_line_put);
-
list_del(&gc_rq->list);
+ kref_put(&gc_rq->line->ref, pblk_line_put);
pblk_gc_free_gc_rq(gc_rq);
}
@@ -66,52 +62,41 @@ static void pblk_gc_writer_kick(struct pblk_gc *gc)
* Responsible for managing all memory related to a gc request. Also in case of
* failure
*/
-static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_line *line,
- u64 *lba_list, unsigned int nr_secs)
+static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_gc *gc = &pblk->gc;
- struct pblk_gc_rq *gc_rq;
+ struct pblk_line *line = gc_rq->line;
void *data;
unsigned int secs_to_gc;
- int ret = NVM_IO_OK;
+ int ret = 0;
- data = kmalloc(nr_secs * geo->sec_size, GFP_KERNEL);
+ data = vmalloc(gc_rq->nr_secs * geo->sec_size);
if (!data) {
- ret = NVM_IO_ERR;
- goto free_lba_list;
+ ret = -ENOMEM;
+ goto out;
}
/* Read from GC victim block */
- if (pblk_submit_read_gc(pblk, lba_list, data, nr_secs,
+ if (pblk_submit_read_gc(pblk, gc_rq->lba_list, data, gc_rq->nr_secs,
&secs_to_gc, line)) {
- ret = NVM_IO_ERR;
+ ret = -EFAULT;
goto free_data;
}
if (!secs_to_gc)
- goto free_data;
-
- gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
- if (!gc_rq) {
- ret = NVM_IO_ERR;
- goto free_data;
- }
+ goto free_rq;
- gc_rq->line = line;
gc_rq->data = data;
- gc_rq->lba_list = lba_list;
- gc_rq->nr_secs = nr_secs;
gc_rq->secs_to_gc = secs_to_gc;
- kref_get(&line->ref);
-
retry:
spin_lock(&gc->w_lock);
- if (gc->w_entries > 256) {
+ if (gc->w_entries >= PBLK_GC_W_QD) {
spin_unlock(&gc->w_lock);
- usleep_range(256, 1024);
+ pblk_gc_writer_kick(&pblk->gc);
+ usleep_range(128, 256);
goto retry;
}
gc->w_entries++;
@@ -120,13 +105,14 @@ retry:
pblk_gc_writer_kick(&pblk->gc);
- return NVM_IO_OK;
+ return 0;
+free_rq:
+ kfree(gc_rq);
free_data:
- kfree(data);
-free_lba_list:
- kfree(lba_list);
-
+ vfree(data);
+out:
+ kref_put(&line->ref, pblk_line_put);
return ret;
}
@@ -150,140 +136,206 @@ static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
static void pblk_gc_line_ws(struct work_struct *work)
{
+ struct pblk_line_ws *line_rq_ws = container_of(work,
+ struct pblk_line_ws, ws);
+ struct pblk *pblk = line_rq_ws->pblk;
+ struct pblk_gc *gc = &pblk->gc;
+ struct pblk_line *line = line_rq_ws->line;
+ struct pblk_gc_rq *gc_rq = line_rq_ws->priv;
+
+ up(&gc->gc_sem);
+
+ if (pblk_gc_move_valid_secs(pblk, gc_rq)) {
+ pr_err("pblk: could not GC all sectors: line:%d (%d/%d)\n",
+ line->id, *line->vsc,
+ gc_rq->nr_secs);
+ }
+
+ mempool_free(line_rq_ws, pblk->line_ws_pool);
+}
+
+static void pblk_gc_line_prepare_ws(struct work_struct *work)
+{
struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
ws);
struct pblk *pblk = line_ws->pblk;
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *line = line_ws->line;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
- __le64 *lba_list = line_ws->priv;
- u64 *gc_list;
- int sec_left;
- int nr_ppas, bit;
- int put_line = 1;
+ struct pblk_gc *gc = &pblk->gc;
+ struct line_emeta *emeta_buf;
+ struct pblk_line_ws *line_rq_ws;
+ struct pblk_gc_rq *gc_rq;
+ __le64 *lba_list;
+ int sec_left, nr_secs, bit;
+ int ret;
- pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
+ emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
+ GFP_KERNEL);
+ if (!emeta_buf) {
+ pr_err("pblk: cannot use GC emeta\n");
+ return;
+ }
- spin_lock(&line->lock);
- sec_left = line->vsc;
- if (!sec_left) {
- /* Lines are erased before being used (l_mg->data_/log_next) */
- spin_unlock(&line->lock);
- goto out;
+ ret = pblk_line_read_emeta(pblk, line, emeta_buf);
+ if (ret) {
+ pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
+ goto fail_free_emeta;
+ }
+
+ /* If this read fails, it means that emeta is corrupted. For now, leave
+ * the line untouched. TODO: Implement a recovery routine that scans and
+ * moves all sectors on the line.
+ */
+ lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
+ if (!lba_list) {
+ pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
+ goto fail_free_emeta;
}
- spin_unlock(&line->lock);
+ sec_left = pblk_line_vsc(line);
if (sec_left < 0) {
pr_err("pblk: corrupted GC line (%d)\n", line->id);
- put_line = 0;
- pblk_put_line_back(pblk, line);
- goto out;
+ goto fail_free_emeta;
}
bit = -1;
next_rq:
- gc_list = kmalloc_array(pblk->max_write_pgs, sizeof(u64), GFP_KERNEL);
- if (!gc_list) {
- put_line = 0;
- pblk_put_line_back(pblk, line);
- goto out;
- }
+ gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
+ if (!gc_rq)
+ goto fail_free_emeta;
- nr_ppas = 0;
+ nr_secs = 0;
do {
bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line,
bit + 1);
if (bit > line->emeta_ssec)
break;
- gc_list[nr_ppas++] = le64_to_cpu(lba_list[bit]);
- } while (nr_ppas < pblk->max_write_pgs);
+ gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]);
+ } while (nr_secs < pblk->max_write_pgs);
- if (unlikely(!nr_ppas)) {
- kfree(gc_list);
+ if (unlikely(!nr_secs)) {
+ kfree(gc_rq);
goto out;
}
- if (pblk_gc_move_valid_secs(pblk, line, gc_list, nr_ppas)) {
- pr_err("pblk: could not GC all sectors: line:%d (%d/%d/%d)\n",
- line->id, line->vsc,
- nr_ppas, nr_ppas);
- put_line = 0;
- pblk_put_line_back(pblk, line);
- goto out;
- }
+ gc_rq->nr_secs = nr_secs;
+ gc_rq->line = line;
+
+ line_rq_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
+ if (!line_rq_ws)
+ goto fail_free_gc_rq;
- sec_left -= nr_ppas;
+ line_rq_ws->pblk = pblk;
+ line_rq_ws->line = line;
+ line_rq_ws->priv = gc_rq;
+
+ down(&gc->gc_sem);
+ kref_get(&line->ref);
+
+ INIT_WORK(&line_rq_ws->ws, pblk_gc_line_ws);
+ queue_work(gc->gc_line_reader_wq, &line_rq_ws->ws);
+
+ sec_left -= nr_secs;
if (sec_left > 0)
goto next_rq;
out:
- pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
+ pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
mempool_free(line_ws, pblk->line_ws_pool);
- atomic_dec(&pblk->gc.inflight_gc);
- if (put_line)
- kref_put(&line->ref, pblk_line_put);
+
+ kref_put(&line->ref, pblk_line_put);
+ atomic_dec(&gc->inflight_gc);
+
+ return;
+
+fail_free_gc_rq:
+ kfree(gc_rq);
+fail_free_emeta:
+ pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
+ pblk_put_line_back(pblk, line);
+ kref_put(&line->ref, pblk_line_put);
+ mempool_free(line_ws, pblk->line_ws_pool);
+ atomic_dec(&gc->inflight_gc);
+
+ pr_err("pblk: Failed to GC line %d\n", line->id);
}
static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
{
- struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_gc *gc = &pblk->gc;
struct pblk_line_ws *line_ws;
- __le64 *lba_list;
- int ret;
- line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
- line->emeta = pblk_malloc(lm->emeta_len, l_mg->emeta_alloc_type,
- GFP_KERNEL);
- if (!line->emeta) {
- pr_err("pblk: cannot use GC emeta\n");
- goto fail_free_ws;
- }
-
- ret = pblk_line_read_emeta(pblk, line);
- if (ret) {
- pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
- goto fail_free_emeta;
- }
+ pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
- /* If this read fails, it means that emeta is corrupted. For now, leave
- * the line untouched. TODO: Implement a recovery routine that scans and
- * moves all sectors on the line.
- */
- lba_list = pblk_recov_get_lba_list(pblk, line->emeta);
- if (!lba_list) {
- pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
- goto fail_free_emeta;
- }
+ line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
+ if (!line_ws)
+ return -ENOMEM;
line_ws->pblk = pblk;
line_ws->line = line;
- line_ws->priv = lba_list;
- INIT_WORK(&line_ws->ws, pblk_gc_line_ws);
- queue_work(pblk->gc.gc_reader_wq, &line_ws->ws);
+ INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws);
+ queue_work(gc->gc_reader_wq, &line_ws->ws);
return 0;
+}
-fail_free_emeta:
- pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
-fail_free_ws:
- mempool_free(line_ws, pblk->line_ws_pool);
- pblk_put_line_back(pblk, line);
+static int pblk_gc_read(struct pblk *pblk)
+{
+ struct pblk_gc *gc = &pblk->gc;
+ struct pblk_line *line;
+
+ spin_lock(&gc->r_lock);
+ if (list_empty(&gc->r_list)) {
+ spin_unlock(&gc->r_lock);
+ return 1;
+ }
+
+ line = list_first_entry(&gc->r_list, struct pblk_line, list);
+ list_del(&line->list);
+ spin_unlock(&gc->r_lock);
+
+ pblk_gc_kick(pblk);
- return 1;
+ if (pblk_gc_line(pblk, line))
+ pr_err("pblk: failed to GC line %d\n", line->id);
+
+ return 0;
}
-static void pblk_gc_lines(struct pblk *pblk, struct list_head *gc_list)
+static void pblk_gc_reader_kick(struct pblk_gc *gc)
{
- struct pblk_line *line, *tline;
+ wake_up_process(gc->gc_reader_ts);
+}
- list_for_each_entry_safe(line, tline, gc_list, list) {
- if (pblk_gc_line(pblk, line))
- pr_err("pblk: failed to GC line %d\n", line->id);
- list_del(&line->list);
+static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
+ struct list_head *group_list)
+{
+ struct pblk_line *line, *victim;
+ int line_vsc, victim_vsc;
+
+ victim = list_first_entry(group_list, struct pblk_line, list);
+ list_for_each_entry(line, group_list, list) {
+ line_vsc = le32_to_cpu(*line->vsc);
+ victim_vsc = le32_to_cpu(*victim->vsc);
+ if (line_vsc < victim_vsc)
+ victim = line;
}
+
+ return victim;
+}
+
+static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
+{
+ unsigned int nr_blocks_free, nr_blocks_need;
+
+ nr_blocks_need = pblk_rl_high_thrs(rl);
+ nr_blocks_free = pblk_rl_nr_free_blks(rl);
+
+ /* This is not critical, no need to take lock here */
+ return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free));
}
/*
@@ -296,71 +348,83 @@ static void pblk_gc_run(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_gc *gc = &pblk->gc;
- struct pblk_line *line, *tline;
- unsigned int nr_blocks_free, nr_blocks_need;
+ struct pblk_line *line;
struct list_head *group_list;
- int run_gc, gc_group = 0;
- int prev_gc = 0;
- int inflight_gc = atomic_read(&gc->inflight_gc);
- LIST_HEAD(gc_list);
+ bool run_gc;
+ int inflight_gc, gc_group = 0, prev_group = 0;
+
+ do {
+ spin_lock(&l_mg->gc_lock);
+ if (list_empty(&l_mg->gc_full_list)) {
+ spin_unlock(&l_mg->gc_lock);
+ break;
+ }
+
+ line = list_first_entry(&l_mg->gc_full_list,
+ struct pblk_line, list);
- spin_lock(&l_mg->gc_lock);
- list_for_each_entry_safe(line, tline, &l_mg->gc_full_list, list) {
spin_lock(&line->lock);
WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
line->state = PBLK_LINESTATE_GC;
spin_unlock(&line->lock);
list_del(&line->list);
+ spin_unlock(&l_mg->gc_lock);
+
kref_put(&line->ref, pblk_line_put);
- }
- spin_unlock(&l_mg->gc_lock);
+ } while (1);
- nr_blocks_need = pblk_rl_gc_thrs(&pblk->rl);
- nr_blocks_free = pblk_rl_nr_free_blks(&pblk->rl);
- run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
+ run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
+ if (!run_gc || (atomic_read(&gc->inflight_gc) >= PBLK_GC_L_QD))
+ return;
next_gc_group:
group_list = l_mg->gc_lists[gc_group++];
- spin_lock(&l_mg->gc_lock);
- while (run_gc && !list_empty(group_list)) {
- /* No need to queue up more GC lines than we can handle */
- if (!run_gc || inflight_gc > gc->gc_jobs_active) {
+
+ do {
+ spin_lock(&l_mg->gc_lock);
+ if (list_empty(group_list)) {
spin_unlock(&l_mg->gc_lock);
- pblk_gc_lines(pblk, &gc_list);
- return;
+ break;
}
- line = list_first_entry(group_list, struct pblk_line, list);
- nr_blocks_free += atomic_read(&line->blk_in_line);
+ line = pblk_gc_get_victim_line(pblk, group_list);
spin_lock(&line->lock);
WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
line->state = PBLK_LINESTATE_GC;
- list_move_tail(&line->list, &gc_list);
- atomic_inc(&gc->inflight_gc);
- inflight_gc++;
spin_unlock(&line->lock);
- prev_gc = 1;
- run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
- }
- spin_unlock(&l_mg->gc_lock);
+ list_del(&line->list);
+ spin_unlock(&l_mg->gc_lock);
+
+ spin_lock(&gc->r_lock);
+ list_add_tail(&line->list, &gc->r_list);
+ spin_unlock(&gc->r_lock);
- pblk_gc_lines(pblk, &gc_list);
+ inflight_gc = atomic_inc_return(&gc->inflight_gc);
+ pblk_gc_reader_kick(gc);
- if (!prev_gc && pblk->rl.rb_state > gc_group &&
- gc_group < PBLK_NR_GC_LISTS)
+ prev_group = 1;
+
+ /* No need to queue up more GC lines than we can handle */
+ run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
+ if (!run_gc || inflight_gc >= PBLK_GC_L_QD)
+ break;
+ } while (1);
+
+ if (!prev_group && pblk->rl.rb_state > gc_group &&
+ gc_group < PBLK_GC_NR_LISTS)
goto next_gc_group;
}
-
-static void pblk_gc_kick(struct pblk *pblk)
+void pblk_gc_kick(struct pblk *pblk)
{
struct pblk_gc *gc = &pblk->gc;
wake_up_process(gc->gc_ts);
pblk_gc_writer_kick(gc);
+ pblk_gc_reader_kick(gc);
mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
}
@@ -398,42 +462,34 @@ static int pblk_gc_writer_ts(void *data)
return 0;
}
-static void pblk_gc_start(struct pblk *pblk)
+static int pblk_gc_reader_ts(void *data)
{
- pblk->gc.gc_active = 1;
+ struct pblk *pblk = data;
- pr_debug("pblk: gc start\n");
+ while (!kthread_should_stop()) {
+ if (!pblk_gc_read(pblk))
+ continue;
+ set_current_state(TASK_INTERRUPTIBLE);
+ io_schedule();
+ }
+
+ return 0;
}
-int pblk_gc_status(struct pblk *pblk)
+static void pblk_gc_start(struct pblk *pblk)
{
- struct pblk_gc *gc = &pblk->gc;
- int ret;
-
- spin_lock(&gc->lock);
- ret = gc->gc_active;
- spin_unlock(&gc->lock);
-
- return ret;
+ pblk->gc.gc_active = 1;
+ pr_debug("pblk: gc start\n");
}
-static void __pblk_gc_should_start(struct pblk *pblk)
+void pblk_gc_should_start(struct pblk *pblk)
{
struct pblk_gc *gc = &pblk->gc;
- lockdep_assert_held(&gc->lock);
-
if (gc->gc_enabled && !gc->gc_active)
pblk_gc_start(pblk);
-}
-void pblk_gc_should_start(struct pblk *pblk)
-{
- struct pblk_gc *gc = &pblk->gc;
-
- spin_lock(&gc->lock);
- __pblk_gc_should_start(pblk);
- spin_unlock(&gc->lock);
+ pblk_gc_kick(pblk);
}
/*
@@ -442,10 +498,7 @@ void pblk_gc_should_start(struct pblk *pblk)
*/
static void pblk_gc_stop(struct pblk *pblk, int flush_wq)
{
- spin_lock(&pblk->gc.lock);
pblk->gc.gc_active = 0;
- spin_unlock(&pblk->gc.lock);
-
pr_debug("pblk: gc stop\n");
}
@@ -468,20 +521,25 @@ void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
spin_unlock(&gc->lock);
}
-void pblk_gc_sysfs_force(struct pblk *pblk, int force)
+int pblk_gc_sysfs_force(struct pblk *pblk, int force)
{
struct pblk_gc *gc = &pblk->gc;
- int rsv = 0;
+
+ if (force < 0 || force > 1)
+ return -EINVAL;
spin_lock(&gc->lock);
- if (force) {
- gc->gc_enabled = 1;
- rsv = 64;
- }
- pblk_rl_set_gc_rsc(&pblk->rl, rsv);
gc->gc_forced = force;
- __pblk_gc_should_start(pblk);
+
+ if (force)
+ gc->gc_enabled = 1;
+ else
+ gc->gc_enabled = 0;
spin_unlock(&gc->lock);
+
+ pblk_gc_should_start(pblk);
+
+ return 0;
}
int pblk_gc_init(struct pblk *pblk)
@@ -503,30 +561,58 @@ int pblk_gc_init(struct pblk *pblk)
goto fail_free_main_kthread;
}
+ gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk,
+ "pblk-gc-reader-ts");
+ if (IS_ERR(gc->gc_reader_ts)) {
+ pr_err("pblk: could not allocate GC reader kthread\n");
+ ret = PTR_ERR(gc->gc_reader_ts);
+ goto fail_free_writer_kthread;
+ }
+
setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk);
mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
gc->gc_active = 0;
gc->gc_forced = 0;
gc->gc_enabled = 1;
- gc->gc_jobs_active = 8;
gc->w_entries = 0;
atomic_set(&gc->inflight_gc, 0);
- gc->gc_reader_wq = alloc_workqueue("pblk-gc-reader-wq",
- WQ_MEM_RECLAIM | WQ_UNBOUND, gc->gc_jobs_active);
+ /* Workqueue that reads valid sectors from a line and submit them to the
+ * GC writer to be recycled.
+ */
+ gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq",
+ WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS);
+ if (!gc->gc_line_reader_wq) {
+ pr_err("pblk: could not allocate GC line reader workqueue\n");
+ ret = -ENOMEM;
+ goto fail_free_reader_kthread;
+ }
+
+ /* Workqueue that prepare lines for GC */
+ gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq",
+ WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
if (!gc->gc_reader_wq) {
pr_err("pblk: could not allocate GC reader workqueue\n");
ret = -ENOMEM;
- goto fail_free_writer_kthread;
+ goto fail_free_reader_line_wq;
}
spin_lock_init(&gc->lock);
spin_lock_init(&gc->w_lock);
+ spin_lock_init(&gc->r_lock);
+
+ sema_init(&gc->gc_sem, 128);
+
INIT_LIST_HEAD(&gc->w_list);
+ INIT_LIST_HEAD(&gc->r_list);
return 0;
+fail_free_reader_line_wq:
+ destroy_workqueue(gc->gc_line_reader_wq);
+fail_free_reader_kthread:
+ kthread_stop(gc->gc_reader_ts);
fail_free_writer_kthread:
kthread_stop(gc->gc_writer_ts);
fail_free_main_kthread:
@@ -540,6 +626,7 @@ void pblk_gc_exit(struct pblk *pblk)
struct pblk_gc *gc = &pblk->gc;
flush_workqueue(gc->gc_reader_wq);
+ flush_workqueue(gc->gc_line_reader_wq);
del_timer(&gc->gc_timer);
pblk_gc_stop(pblk, 1);
@@ -547,9 +634,15 @@ void pblk_gc_exit(struct pblk *pblk)
if (gc->gc_ts)
kthread_stop(gc->gc_ts);
- if (pblk->gc.gc_reader_wq)
- destroy_workqueue(pblk->gc.gc_reader_wq);
+ if (gc->gc_reader_wq)
+ destroy_workqueue(gc->gc_reader_wq);
+
+ if (gc->gc_line_reader_wq)
+ destroy_workqueue(gc->gc_line_reader_wq);
if (gc->gc_writer_ts)
kthread_stop(gc->gc_writer_ts);
+
+ if (gc->gc_reader_ts)
+ kthread_stop(gc->gc_reader_ts);
}
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index ae8cd6d5af8b..1b0f61233c21 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -20,9 +20,10 @@
#include "pblk.h"
-static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_r_rq_cache,
- *pblk_w_rq_cache, *pblk_line_meta_cache;
+static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache,
+ *pblk_w_rq_cache, *pblk_line_meta_cache;
static DECLARE_RWSEM(pblk_lock);
+struct bio_set *pblk_bio_set;
static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
struct bio *bio)
@@ -33,7 +34,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
* constraint. Writes can be of arbitrary size.
*/
if (bio_data_dir(bio) == READ) {
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
ret = pblk_submit_read(pblk, bio);
if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED))
bio_put(bio);
@@ -46,7 +47,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
* available for user I/O.
*/
if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl)))
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
}
@@ -199,9 +200,9 @@ static int pblk_init_global_caches(struct pblk *pblk)
return -ENOMEM;
}
- pblk_r_rq_cache = kmem_cache_create("pblk_r_rq", pblk_r_rq_size,
+ pblk_g_rq_cache = kmem_cache_create("pblk_g_rq", pblk_g_rq_size,
0, 0, NULL);
- if (!pblk_r_rq_cache) {
+ if (!pblk_g_rq_cache) {
kmem_cache_destroy(pblk_blk_ws_cache);
kmem_cache_destroy(pblk_rec_cache);
up_write(&pblk_lock);
@@ -213,7 +214,7 @@ static int pblk_init_global_caches(struct pblk *pblk)
if (!pblk_w_rq_cache) {
kmem_cache_destroy(pblk_blk_ws_cache);
kmem_cache_destroy(pblk_rec_cache);
- kmem_cache_destroy(pblk_r_rq_cache);
+ kmem_cache_destroy(pblk_g_rq_cache);
up_write(&pblk_lock);
return -ENOMEM;
}
@@ -225,7 +226,7 @@ static int pblk_init_global_caches(struct pblk *pblk)
if (!pblk_line_meta_cache) {
kmem_cache_destroy(pblk_blk_ws_cache);
kmem_cache_destroy(pblk_rec_cache);
- kmem_cache_destroy(pblk_r_rq_cache);
+ kmem_cache_destroy(pblk_g_rq_cache);
kmem_cache_destroy(pblk_w_rq_cache);
up_write(&pblk_lock);
return -ENOMEM;
@@ -239,27 +240,10 @@ static int pblk_core_init(struct pblk *pblk)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
- int max_write_ppas;
- int mod;
- pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
- max_write_ppas = pblk->min_write_pgs * geo->nr_luns;
- pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
- max_write_ppas : nvm_max_phys_sects(dev);
pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg *
geo->nr_planes * geo->nr_luns;
- if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
- pr_err("pblk: cannot support device max_phys_sect\n");
- return -EINVAL;
- }
-
- div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod);
- if (mod) {
- pr_err("pblk: bad configuration of sectors/pages\n");
- return -EINVAL;
- }
-
if (pblk_init_global_caches(pblk))
return -ENOMEM;
@@ -267,7 +251,7 @@ static int pblk_core_init(struct pblk *pblk)
if (!pblk->page_pool)
return -ENOMEM;
- pblk->line_ws_pool = mempool_create_slab_pool(geo->nr_luns,
+ pblk->line_ws_pool = mempool_create_slab_pool(PBLK_WS_POOL_SIZE,
pblk_blk_ws_cache);
if (!pblk->line_ws_pool)
goto free_page_pool;
@@ -276,41 +260,51 @@ static int pblk_core_init(struct pblk *pblk)
if (!pblk->rec_pool)
goto free_blk_ws_pool;
- pblk->r_rq_pool = mempool_create_slab_pool(64, pblk_r_rq_cache);
- if (!pblk->r_rq_pool)
+ pblk->g_rq_pool = mempool_create_slab_pool(PBLK_READ_REQ_POOL_SIZE,
+ pblk_g_rq_cache);
+ if (!pblk->g_rq_pool)
goto free_rec_pool;
- pblk->w_rq_pool = mempool_create_slab_pool(64, pblk_w_rq_cache);
+ pblk->w_rq_pool = mempool_create_slab_pool(geo->nr_luns * 2,
+ pblk_w_rq_cache);
if (!pblk->w_rq_pool)
- goto free_r_rq_pool;
+ goto free_g_rq_pool;
pblk->line_meta_pool =
- mempool_create_slab_pool(16, pblk_line_meta_cache);
+ mempool_create_slab_pool(PBLK_META_POOL_SIZE,
+ pblk_line_meta_cache);
if (!pblk->line_meta_pool)
goto free_w_rq_pool;
- pblk->kw_wq = alloc_workqueue("pblk-aux-wq",
- WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
- if (!pblk->kw_wq)
+ pblk->close_wq = alloc_workqueue("pblk-close-wq",
+ WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_NR_CLOSE_JOBS);
+ if (!pblk->close_wq)
goto free_line_meta_pool;
+ pblk->bb_wq = alloc_workqueue("pblk-bb-wq",
+ WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
+ if (!pblk->bb_wq)
+ goto free_close_wq;
+
if (pblk_set_ppaf(pblk))
- goto free_kw_wq;
+ goto free_bb_wq;
if (pblk_rwb_init(pblk))
- goto free_kw_wq;
+ goto free_bb_wq;
INIT_LIST_HEAD(&pblk->compl_list);
return 0;
-free_kw_wq:
- destroy_workqueue(pblk->kw_wq);
+free_bb_wq:
+ destroy_workqueue(pblk->bb_wq);
+free_close_wq:
+ destroy_workqueue(pblk->close_wq);
free_line_meta_pool:
mempool_destroy(pblk->line_meta_pool);
free_w_rq_pool:
mempool_destroy(pblk->w_rq_pool);
-free_r_rq_pool:
- mempool_destroy(pblk->r_rq_pool);
+free_g_rq_pool:
+ mempool_destroy(pblk->g_rq_pool);
free_rec_pool:
mempool_destroy(pblk->rec_pool);
free_blk_ws_pool:
@@ -322,19 +316,22 @@ free_page_pool:
static void pblk_core_free(struct pblk *pblk)
{
- if (pblk->kw_wq)
- destroy_workqueue(pblk->kw_wq);
+ if (pblk->close_wq)
+ destroy_workqueue(pblk->close_wq);
+
+ if (pblk->bb_wq)
+ destroy_workqueue(pblk->bb_wq);
mempool_destroy(pblk->page_pool);
mempool_destroy(pblk->line_ws_pool);
mempool_destroy(pblk->rec_pool);
- mempool_destroy(pblk->r_rq_pool);
+ mempool_destroy(pblk->g_rq_pool);
mempool_destroy(pblk->w_rq_pool);
mempool_destroy(pblk->line_meta_pool);
kmem_cache_destroy(pblk_blk_ws_cache);
kmem_cache_destroy(pblk_rec_cache);
- kmem_cache_destroy(pblk_r_rq_cache);
+ kmem_cache_destroy(pblk_g_rq_cache);
kmem_cache_destroy(pblk_w_rq_cache);
kmem_cache_destroy(pblk_line_meta_cache);
}
@@ -344,6 +341,12 @@ static void pblk_luns_free(struct pblk *pblk)
kfree(pblk->luns);
}
+static void pblk_free_line_bitmaps(struct pblk_line *line)
+{
+ kfree(line->blk_bitmap);
+ kfree(line->erase_bitmap);
+}
+
static void pblk_lines_free(struct pblk *pblk)
{
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
@@ -355,8 +358,7 @@ static void pblk_lines_free(struct pblk *pblk)
line = &pblk->lines[i];
pblk_line_free(pblk, line);
- kfree(line->blk_bitmap);
- kfree(line->erase_bitmap);
+ pblk_free_line_bitmaps(line);
}
spin_unlock(&l_mg->free_lock);
}
@@ -368,11 +370,15 @@ static void pblk_line_meta_free(struct pblk *pblk)
kfree(l_mg->bb_template);
kfree(l_mg->bb_aux);
+ kfree(l_mg->vsc_list);
+ spin_lock(&l_mg->free_lock);
for (i = 0; i < PBLK_DATA_LINES; i++) {
- pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
- pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
+ kfree(l_mg->sline_meta[i]);
+ pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type);
+ kfree(l_mg->eline_meta[i]);
}
+ spin_unlock(&l_mg->free_lock);
kfree(pblk->lines);
}
@@ -411,13 +417,31 @@ out:
return ret;
}
-static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line)
+static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line,
+ int blk_per_line)
{
- struct pblk_line_meta *lm = &pblk->lm;
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
struct pblk_lun *rlun;
int bb_cnt = 0;
int i;
+ for (i = 0; i < blk_per_line; i++) {
+ rlun = &pblk->luns[i];
+ if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
+ continue;
+
+ set_bit(pblk_ppa_to_pos(geo, rlun->bppa), line->blk_bitmap);
+ bb_cnt++;
+ }
+
+ return bb_cnt;
+}
+
+static int pblk_alloc_line_bitmaps(struct pblk *pblk, struct pblk_line *line)
+{
+ struct pblk_line_meta *lm = &pblk->lm;
+
line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
if (!line->blk_bitmap)
return -ENOMEM;
@@ -428,16 +452,7 @@ static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line)
return -ENOMEM;
}
- for (i = 0; i < lm->blk_per_line; i++) {
- rlun = &pblk->luns[i];
- if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
- continue;
-
- set_bit(i, line->blk_bitmap);
- bb_cnt++;
- }
-
- return bb_cnt;
+ return 0;
}
static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns)
@@ -505,12 +520,32 @@ static int pblk_lines_configure(struct pblk *pblk, int flags)
}
/* See comment over struct line_emeta definition */
-static unsigned int calc_emeta_len(struct pblk *pblk, struct pblk_line_meta *lm)
+static unsigned int calc_emeta_len(struct pblk *pblk)
{
- return (sizeof(struct line_emeta) +
- ((lm->sec_per_line - lm->emeta_sec) * sizeof(u64)) +
- (pblk->l_mg.nr_lines * sizeof(u32)) +
- lm->blk_bitmap_len);
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+
+ /* Round to sector size so that lba_list starts on its own sector */
+ lm->emeta_sec[1] = DIV_ROUND_UP(
+ sizeof(struct line_emeta) + lm->blk_bitmap_len,
+ geo->sec_size);
+ lm->emeta_len[1] = lm->emeta_sec[1] * geo->sec_size;
+
+ /* Round to sector size so that vsc_list starts on its own sector */
+ lm->dsec_per_line = lm->sec_per_line - lm->emeta_sec[0];
+ lm->emeta_sec[2] = DIV_ROUND_UP(lm->dsec_per_line * sizeof(u64),
+ geo->sec_size);
+ lm->emeta_len[2] = lm->emeta_sec[2] * geo->sec_size;
+
+ lm->emeta_sec[3] = DIV_ROUND_UP(l_mg->nr_lines * sizeof(u32),
+ geo->sec_size);
+ lm->emeta_len[3] = lm->emeta_sec[3] * geo->sec_size;
+
+ lm->vsc_list_len = l_mg->nr_lines * sizeof(u32);
+
+ return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]);
}
static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
@@ -534,6 +569,78 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
atomic_set(&pblk->rl.free_blocks, nr_free_blks);
}
+static int pblk_lines_alloc_metadata(struct pblk *pblk)
+{
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line_meta *lm = &pblk->lm;
+ int i;
+
+ /* smeta is always small enough to fit on a kmalloc memory allocation,
+ * emeta depends on the number of LUNs allocated to the pblk instance
+ */
+ for (i = 0; i < PBLK_DATA_LINES; i++) {
+ l_mg->sline_meta[i] = kmalloc(lm->smeta_len, GFP_KERNEL);
+ if (!l_mg->sline_meta[i])
+ goto fail_free_smeta;
+ }
+
+ /* emeta allocates three different buffers for managing metadata with
+ * in-memory and in-media layouts
+ */
+ for (i = 0; i < PBLK_DATA_LINES; i++) {
+ struct pblk_emeta *emeta;
+
+ emeta = kmalloc(sizeof(struct pblk_emeta), GFP_KERNEL);
+ if (!emeta)
+ goto fail_free_emeta;
+
+ if (lm->emeta_len[0] > KMALLOC_MAX_CACHE_SIZE) {
+ l_mg->emeta_alloc_type = PBLK_VMALLOC_META;
+
+ emeta->buf = vmalloc(lm->emeta_len[0]);
+ if (!emeta->buf) {
+ kfree(emeta);
+ goto fail_free_emeta;
+ }
+
+ emeta->nr_entries = lm->emeta_sec[0];
+ l_mg->eline_meta[i] = emeta;
+ } else {
+ l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
+
+ emeta->buf = kmalloc(lm->emeta_len[0], GFP_KERNEL);
+ if (!emeta->buf) {
+ kfree(emeta);
+ goto fail_free_emeta;
+ }
+
+ emeta->nr_entries = lm->emeta_sec[0];
+ l_mg->eline_meta[i] = emeta;
+ }
+ }
+
+ l_mg->vsc_list = kcalloc(l_mg->nr_lines, sizeof(__le32), GFP_KERNEL);
+ if (!l_mg->vsc_list)
+ goto fail_free_emeta;
+
+ for (i = 0; i < l_mg->nr_lines; i++)
+ l_mg->vsc_list[i] = cpu_to_le32(EMPTY_ENTRY);
+
+ return 0;
+
+fail_free_emeta:
+ while (--i >= 0) {
+ vfree(l_mg->eline_meta[i]->buf);
+ kfree(l_mg->eline_meta[i]);
+ }
+
+fail_free_smeta:
+ for (i = 0; i < PBLK_DATA_LINES; i++)
+ kfree(l_mg->sline_meta[i]);
+
+ return -ENOMEM;
+}
+
static int pblk_lines_init(struct pblk *pblk)
{
struct nvm_tgt_dev *dev = pblk->dev;
@@ -542,10 +649,32 @@ static int pblk_lines_init(struct pblk *pblk)
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line *line;
unsigned int smeta_len, emeta_len;
- long nr_bad_blks, nr_meta_blks, nr_free_blks;
- int bb_distance;
- int i;
- int ret;
+ long nr_bad_blks, nr_free_blks;
+ int bb_distance, max_write_ppas, mod;
+ int i, ret;
+
+ pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
+ max_write_ppas = pblk->min_write_pgs * geo->nr_luns;
+ pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
+ max_write_ppas : nvm_max_phys_sects(dev);
+ pblk_set_sec_per_write(pblk, pblk->min_write_pgs);
+
+ if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
+ pr_err("pblk: cannot support device max_phys_sect\n");
+ return -EINVAL;
+ }
+
+ div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod);
+ if (mod) {
+ pr_err("pblk: bad configuration of sectors/pages\n");
+ return -EINVAL;
+ }
+
+ l_mg->nr_lines = geo->blks_per_lun;
+ l_mg->log_line = l_mg->data_line = NULL;
+ l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
+ l_mg->nr_free_lines = 0;
+ bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
lm->sec_per_line = geo->sec_per_blk * geo->nr_luns;
lm->blk_per_line = geo->nr_luns;
@@ -554,20 +683,17 @@ static int pblk_lines_init(struct pblk *pblk)
lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long);
lm->high_thrs = lm->sec_per_line / 2;
lm->mid_thrs = lm->sec_per_line / 4;
+ lm->meta_distance = (geo->nr_luns / 2) * pblk->min_write_pgs;
/* Calculate necessary pages for smeta. See comment over struct
* line_smeta definition
*/
- lm->smeta_len = sizeof(struct line_smeta) +
- PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
-
i = 1;
add_smeta_page:
lm->smeta_sec = i * geo->sec_per_pl;
lm->smeta_len = lm->smeta_sec * geo->sec_size;
- smeta_len = sizeof(struct line_smeta) +
- PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
+ smeta_len = sizeof(struct line_smeta) + lm->lun_bitmap_len;
if (smeta_len > lm->smeta_len) {
i++;
goto add_smeta_page;
@@ -578,66 +704,28 @@ add_smeta_page:
*/
i = 1;
add_emeta_page:
- lm->emeta_sec = i * geo->sec_per_pl;
- lm->emeta_len = lm->emeta_sec * geo->sec_size;
+ lm->emeta_sec[0] = i * geo->sec_per_pl;
+ lm->emeta_len[0] = lm->emeta_sec[0] * geo->sec_size;
- emeta_len = calc_emeta_len(pblk, lm);
- if (emeta_len > lm->emeta_len) {
+ emeta_len = calc_emeta_len(pblk);
+ if (emeta_len > lm->emeta_len[0]) {
i++;
goto add_emeta_page;
}
- lm->emeta_bb = geo->nr_luns - i;
-
- nr_meta_blks = (lm->smeta_sec + lm->emeta_sec +
- (geo->sec_per_blk / 2)) / geo->sec_per_blk;
- lm->min_blk_line = nr_meta_blks + 1;
-
- l_mg->nr_lines = geo->blks_per_lun;
- l_mg->log_line = l_mg->data_line = NULL;
- l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
- l_mg->nr_free_lines = 0;
- bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
- /* smeta is always small enough to fit on a kmalloc memory allocation,
- * emeta depends on the number of LUNs allocated to the pblk instance
- */
- l_mg->smeta_alloc_type = PBLK_KMALLOC_META;
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- l_mg->sline_meta[i].meta = kmalloc(lm->smeta_len, GFP_KERNEL);
- if (!l_mg->sline_meta[i].meta)
- while (--i >= 0) {
- kfree(l_mg->sline_meta[i].meta);
- ret = -ENOMEM;
- goto fail;
- }
+ lm->emeta_bb = geo->nr_luns - i;
+ lm->min_blk_line = 1 + DIV_ROUND_UP(lm->smeta_sec + lm->emeta_sec[0],
+ geo->sec_per_blk);
+ if (lm->min_blk_line > lm->blk_per_line) {
+ pr_err("pblk: config. not supported. Min. LUN in line:%d\n",
+ lm->blk_per_line);
+ ret = -EINVAL;
+ goto fail;
}
- if (lm->emeta_len > KMALLOC_MAX_CACHE_SIZE) {
- l_mg->emeta_alloc_type = PBLK_VMALLOC_META;
-
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- l_mg->eline_meta[i].meta = vmalloc(lm->emeta_len);
- if (!l_mg->eline_meta[i].meta)
- while (--i >= 0) {
- vfree(l_mg->eline_meta[i].meta);
- ret = -ENOMEM;
- goto fail;
- }
- }
- } else {
- l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
-
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- l_mg->eline_meta[i].meta =
- kmalloc(lm->emeta_len, GFP_KERNEL);
- if (!l_mg->eline_meta[i].meta)
- while (--i >= 0) {
- kfree(l_mg->eline_meta[i].meta);
- ret = -ENOMEM;
- goto fail;
- }
- }
- }
+ ret = pblk_lines_alloc_metadata(pblk);
+ if (ret)
+ goto fail;
l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
if (!l_mg->bb_template) {
@@ -664,11 +752,14 @@ add_emeta_page:
INIT_LIST_HEAD(&l_mg->gc_low_list);
INIT_LIST_HEAD(&l_mg->gc_empty_list);
+ INIT_LIST_HEAD(&l_mg->emeta_list);
+
l_mg->gc_lists[0] = &l_mg->gc_high_list;
l_mg->gc_lists[1] = &l_mg->gc_mid_list;
l_mg->gc_lists[2] = &l_mg->gc_low_list;
spin_lock_init(&l_mg->free_lock);
+ spin_lock_init(&l_mg->close_lock);
spin_lock_init(&l_mg->gc_lock);
pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line),
@@ -689,10 +780,16 @@ add_emeta_page:
line->type = PBLK_LINETYPE_FREE;
line->state = PBLK_LINESTATE_FREE;
line->gc_group = PBLK_LINEGC_NONE;
+ line->vsc = &l_mg->vsc_list[i];
spin_lock_init(&line->lock);
- nr_bad_blks = pblk_bb_line(pblk, line);
+ ret = pblk_alloc_line_bitmaps(pblk, line);
+ if (ret)
+ goto fail_free_lines;
+
+ nr_bad_blks = pblk_bb_line(pblk, line, lm->blk_per_line);
if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line) {
+ pblk_free_line_bitmaps(line);
ret = -EINVAL;
goto fail_free_lines;
}
@@ -713,24 +810,20 @@ add_emeta_page:
pblk_set_provision(pblk, nr_free_blks);
- sema_init(&pblk->erase_sem, 1);
-
/* Cleanup per-LUN bad block lists - managed within lines on run-time */
for (i = 0; i < geo->nr_luns; i++)
kfree(pblk->luns[i].bb_list);
return 0;
fail_free_lines:
- kfree(pblk->lines);
+ while (--i >= 0)
+ pblk_free_line_bitmaps(&pblk->lines[i]);
fail_free_bb_aux:
kfree(l_mg->bb_aux);
fail_free_bb_template:
kfree(l_mg->bb_template);
fail_free_meta:
- for (i = 0; i < PBLK_DATA_LINES; i++) {
- pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
- pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
- }
+ pblk_line_meta_free(pblk);
fail:
for (i = 0; i < geo->nr_luns; i++)
kfree(pblk->luns[i].bb_list);
@@ -754,6 +847,15 @@ static int pblk_writer_init(struct pblk *pblk)
static void pblk_writer_stop(struct pblk *pblk)
{
+ /* The pipeline must be stopped and the write buffer emptied before the
+ * write thread is stopped
+ */
+ WARN(pblk_rb_read_count(&pblk->rwb),
+ "Stopping not fully persisted write buffer\n");
+
+ WARN(pblk_rb_sync_count(&pblk->rwb),
+ "Stopping not fully synced write buffer\n");
+
if (pblk->writer_ts)
kthread_stop(pblk->writer_ts);
del_timer(&pblk->wtimer);
@@ -772,10 +874,9 @@ static void pblk_free(struct pblk *pblk)
static void pblk_tear_down(struct pblk *pblk)
{
- pblk_flush_writer(pblk);
+ pblk_pipeline_stop(pblk);
pblk_writer_stop(pblk);
pblk_rb_sync_l2p(&pblk->rwb);
- pblk_recov_pad(pblk);
pblk_rwb_free(pblk);
pblk_rl_free(&pblk->rl);
@@ -821,6 +922,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
pblk->dev = dev;
pblk->disk = tdisk;
+ pblk->state = PBLK_STATE_RUNNING;
spin_lock_init(&pblk->trans_lock);
spin_lock_init(&pblk->lock);
@@ -836,8 +938,8 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
atomic_long_set(&pblk->req_writes, 0);
atomic_long_set(&pblk->sub_writes, 0);
atomic_long_set(&pblk->sync_writes, 0);
- atomic_long_set(&pblk->compl_writes, 0);
atomic_long_set(&pblk->inflight_reads, 0);
+ atomic_long_set(&pblk->cache_reads, 0);
atomic_long_set(&pblk->sync_reads, 0);
atomic_long_set(&pblk->recov_writes, 0);
atomic_long_set(&pblk->recov_writes, 0);
@@ -946,11 +1048,20 @@ static struct nvm_tgt_type tt_pblk = {
static int __init pblk_module_init(void)
{
- return nvm_register_tgt_type(&tt_pblk);
+ int ret;
+
+ pblk_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
+ if (!pblk_bio_set)
+ return -ENOMEM;
+ ret = nvm_register_tgt_type(&tt_pblk);
+ if (ret)
+ bioset_free(pblk_bio_set);
+ return ret;
}
static void pblk_module_exit(void)
{
+ bioset_free(pblk_bio_set);
nvm_unregister_tgt_type(&tt_pblk);
}
diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
index 17c16955284d..fddb924f6dde 100644
--- a/drivers/lightnvm/pblk-map.c
+++ b/drivers/lightnvm/pblk-map.c
@@ -25,9 +25,9 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
unsigned int valid_secs)
{
struct pblk_line *line = pblk_line_get_data(pblk);
- struct line_emeta *emeta = line->emeta;
+ struct pblk_emeta *emeta = line->emeta;
struct pblk_w_ctx *w_ctx;
- __le64 *lba_list = pblk_line_emeta_to_lbas(emeta);
+ __le64 *lba_list = emeta_to_lbas(pblk, emeta->buf);
u64 paddr;
int nr_secs = pblk->min_write_pgs;
int i;
@@ -51,18 +51,20 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
w_ctx->ppa = ppa_list[i];
meta_list[i].lba = cpu_to_le64(w_ctx->lba);
lba_list[paddr] = cpu_to_le64(w_ctx->lba);
- le64_add_cpu(&line->emeta->nr_valid_lbas, 1);
+ line->nr_valid_lbas++;
} else {
- meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
- lba_list[paddr] = cpu_to_le64(ADDR_EMPTY);
- pblk_map_pad_invalidate(pblk, line, paddr);
+ __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
+
+ lba_list[paddr] = meta_list[i].lba = addr_empty;
+ __pblk_map_invalidate(pblk, line, paddr);
}
}
if (pblk_line_is_full(line)) {
- line = pblk_line_replace_data(pblk);
- if (!line)
- return;
+ struct pblk_line *prev_line = line;
+
+ pblk_line_replace_data(pblk);
+ pblk_line_close_meta(pblk, prev_line);
}
pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap);
@@ -91,8 +93,9 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
- struct pblk_line *e_line = pblk_line_get_data_next(pblk);
+ struct pblk_line_meta *lm = &pblk->lm;
struct pblk_sec_meta *meta_list = rqd->meta_list;
+ struct pblk_line *e_line, *d_line;
unsigned int map_secs;
int min = pblk->min_write_pgs;
int i, erase_lun;
@@ -102,35 +105,63 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
lun_bitmap, &meta_list[i], map_secs);
- erase_lun = rqd->ppa_list[i].g.lun * geo->nr_chnls +
- rqd->ppa_list[i].g.ch;
+ erase_lun = pblk_ppa_to_pos(geo, rqd->ppa_list[i]);
- if (!test_bit(erase_lun, e_line->erase_bitmap)) {
- if (down_trylock(&pblk->erase_sem))
- continue;
+ /* line can change after page map. We might also be writing the
+ * last line.
+ */
+ e_line = pblk_line_get_erase(pblk);
+ if (!e_line)
+ return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
+ valid_secs, i + min);
+ spin_lock(&e_line->lock);
+ if (!test_bit(erase_lun, e_line->erase_bitmap)) {
set_bit(erase_lun, e_line->erase_bitmap);
atomic_dec(&e_line->left_eblks);
+
*erase_ppa = rqd->ppa_list[i];
erase_ppa->g.blk = e_line->id;
+ spin_unlock(&e_line->lock);
+
/* Avoid evaluating e_line->left_eblks */
return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
valid_secs, i + min);
}
+ spin_unlock(&e_line->lock);
}
- /* Erase blocks that are bad in this line but might not be in next */
- if (unlikely(ppa_empty(*erase_ppa))) {
- struct pblk_line_meta *lm = &pblk->lm;
+ d_line = pblk_line_get_data(pblk);
+
+ /* line can change after page map. We might also be writing the
+ * last line.
+ */
+ e_line = pblk_line_get_erase(pblk);
+ if (!e_line)
+ return;
- i = find_first_zero_bit(e_line->erase_bitmap, lm->blk_per_line);
- if (i == lm->blk_per_line)
+ /* Erase blocks that are bad in this line but might not be in next */
+ if (unlikely(ppa_empty(*erase_ppa)) &&
+ bitmap_weight(d_line->blk_bitmap, lm->blk_per_line)) {
+ int bit = -1;
+
+retry:
+ bit = find_next_bit(d_line->blk_bitmap,
+ lm->blk_per_line, bit + 1);
+ if (bit >= lm->blk_per_line)
return;
- set_bit(i, e_line->erase_bitmap);
+ spin_lock(&e_line->lock);
+ if (test_bit(bit, e_line->erase_bitmap)) {
+ spin_unlock(&e_line->lock);
+ goto retry;
+ }
+ spin_unlock(&e_line->lock);
+
+ set_bit(bit, e_line->erase_bitmap);
atomic_dec(&e_line->left_eblks);
- *erase_ppa = pblk->luns[i].bppa; /* set ch and lun */
+ *erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */
erase_ppa->g.blk = e_line->id;
}
}
diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c
index 045384ddc1f9..5ecc154f6831 100644
--- a/drivers/lightnvm/pblk-rb.c
+++ b/drivers/lightnvm/pblk-rb.c
@@ -150,6 +150,7 @@ try:
/* Release flags on context. Protect from writes and reads */
smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
pblk_ppa_set_empty(&w_ctx->ppa);
+ w_ctx->lba = ADDR_EMPTY;
}
#define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size)
@@ -180,6 +181,14 @@ unsigned int pblk_rb_read_count(struct pblk_rb *rb)
return pblk_rb_ring_count(mem, subm, rb->nr_entries);
}
+unsigned int pblk_rb_sync_count(struct pblk_rb *rb)
+{
+ unsigned int mem = READ_ONCE(rb->mem);
+ unsigned int sync = READ_ONCE(rb->sync);
+
+ return pblk_rb_ring_count(mem, sync, rb->nr_entries);
+}
+
unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries)
{
unsigned int subm;
@@ -199,12 +208,22 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd,
struct pblk_line *line;
struct pblk_rb_entry *entry;
struct pblk_w_ctx *w_ctx;
+ unsigned int user_io = 0, gc_io = 0;
unsigned int i;
+ int flags;
for (i = 0; i < to_update; i++) {
entry = &rb->entries[*l2p_upd];
w_ctx = &entry->w_ctx;
+ flags = READ_ONCE(entry->w_ctx.flags);
+ if (flags & PBLK_IOTYPE_USER)
+ user_io++;
+ else if (flags & PBLK_IOTYPE_GC)
+ gc_io++;
+ else
+ WARN(1, "pblk: unknown IO type\n");
+
pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa,
entry->cacheline);
@@ -214,6 +233,8 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd,
*l2p_upd = (*l2p_upd + 1) & (rb->nr_entries - 1);
}
+ pblk_rl_out(&pblk->rl, user_io, gc_io);
+
return 0;
}
@@ -357,6 +378,9 @@ static int pblk_rb_sync_point_set(struct pblk_rb *rb, struct bio *bio,
/* Protect syncs */
smp_store_release(&rb->sync_point, sync_point);
+ if (!bio)
+ return 0;
+
spin_lock_irq(&rb->s_lock);
bio_list_add(&entry->w_ctx.bios, bio);
spin_unlock_irq(&rb->s_lock);
@@ -395,6 +419,17 @@ static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
return 1;
}
+void pblk_rb_flush(struct pblk_rb *rb)
+{
+ struct pblk *pblk = container_of(rb, struct pblk, rwb);
+ unsigned int mem = READ_ONCE(rb->mem);
+
+ if (pblk_rb_sync_point_set(rb, NULL, mem))
+ return;
+
+ pblk_write_should_kick(pblk);
+}
+
static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
unsigned int *pos, struct bio *bio,
int *io_ret)
@@ -431,15 +466,16 @@ int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
unsigned int nr_entries, unsigned int *pos)
{
struct pblk *pblk = container_of(rb, struct pblk, rwb);
- int flush_done;
+ int io_ret;
spin_lock(&rb->w_lock);
- if (!pblk_rl_user_may_insert(&pblk->rl, nr_entries)) {
+ io_ret = pblk_rl_user_may_insert(&pblk->rl, nr_entries);
+ if (io_ret) {
spin_unlock(&rb->w_lock);
- return NVM_IO_REQUEUE;
+ return io_ret;
}
- if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &flush_done)) {
+ if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &io_ret)) {
spin_unlock(&rb->w_lock);
return NVM_IO_REQUEUE;
}
@@ -447,7 +483,7 @@ int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
pblk_rl_user_in(&pblk->rl, nr_entries);
spin_unlock(&rb->w_lock);
- return flush_done;
+ return io_ret;
}
/*
@@ -521,20 +557,18 @@ out:
* This function is used by the write thread to form the write bio that will
* persist data on the write buffer to the media.
*/
-unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
- struct pblk_c_ctx *c_ctx,
- unsigned int pos,
- unsigned int nr_entries,
- unsigned int count)
+unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
+ struct bio *bio, unsigned int pos,
+ unsigned int nr_entries, unsigned int count)
{
struct pblk *pblk = container_of(rb, struct pblk, rwb);
+ struct request_queue *q = pblk->dev->q;
+ struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
struct pblk_rb_entry *entry;
struct page *page;
- unsigned int pad = 0, read = 0, to_read = nr_entries;
- unsigned int user_io = 0, gc_io = 0;
+ unsigned int pad = 0, to_read = nr_entries;
unsigned int i;
int flags;
- int ret;
if (count < nr_entries) {
pad = nr_entries - count;
@@ -553,15 +587,10 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
*/
try:
flags = READ_ONCE(entry->w_ctx.flags);
- if (!(flags & PBLK_WRITTEN_DATA))
+ if (!(flags & PBLK_WRITTEN_DATA)) {
+ io_schedule();
goto try;
-
- if (flags & PBLK_IOTYPE_USER)
- user_io++;
- else if (flags & PBLK_IOTYPE_GC)
- gc_io++;
- else
- WARN(1, "pblk: unknown IO type\n");
+ }
page = virt_to_page(entry->data);
if (!page) {
@@ -570,17 +599,17 @@ try:
flags |= PBLK_SUBMITTED_ENTRY;
/* Release flags on context. Protect from writes */
smp_store_release(&entry->w_ctx.flags, flags);
- goto out;
+ return NVM_IO_ERR;
}
- ret = bio_add_page(bio, page, rb->seg_size, 0);
- if (ret != rb->seg_size) {
+ if (bio_add_pc_page(q, bio, page, rb->seg_size, 0) !=
+ rb->seg_size) {
pr_err("pblk: could not add page to write bio\n");
flags &= ~PBLK_WRITTEN_DATA;
flags |= PBLK_SUBMITTED_ENTRY;
/* Release flags on context. Protect from writes */
smp_store_release(&entry->w_ctx.flags, flags);
- goto out;
+ return NVM_IO_ERR;
}
if (flags & PBLK_FLUSH_ENTRY) {
@@ -607,14 +636,19 @@ try:
pos = (pos + 1) & (rb->nr_entries - 1);
}
- read = to_read;
- pblk_rl_out(&pblk->rl, user_io, gc_io);
+ if (pad) {
+ if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, pad)) {
+ pr_err("pblk: could not pad page in write bio\n");
+ return NVM_IO_ERR;
+ }
+ }
+
#ifdef CONFIG_NVM_DEBUG
atomic_long_add(pad, &((struct pblk *)
(container_of(rb, struct pblk, rwb)))->padded_writes);
#endif
-out:
- return read;
+
+ return NVM_IO_OK;
}
/*
@@ -623,15 +657,17 @@ out:
* be directed to disk.
*/
int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
- u64 pos, int bio_iter)
+ struct ppa_addr ppa, int bio_iter)
{
+ struct pblk *pblk = container_of(rb, struct pblk, rwb);
struct pblk_rb_entry *entry;
struct pblk_w_ctx *w_ctx;
+ struct ppa_addr l2p_ppa;
+ u64 pos = pblk_addr_to_cacheline(ppa);
void *data;
int flags;
int ret = 1;
- spin_lock(&rb->w_lock);
#ifdef CONFIG_NVM_DEBUG
/* Caller must ensure that the access will not cause an overflow */
@@ -641,8 +677,14 @@ int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
w_ctx = &entry->w_ctx;
flags = READ_ONCE(w_ctx->flags);
+ spin_lock(&rb->w_lock);
+ spin_lock(&pblk->trans_lock);
+ l2p_ppa = pblk_trans_map_get(pblk, lba);
+ spin_unlock(&pblk->trans_lock);
+
/* Check if the entry has been overwritten or is scheduled to be */
- if (w_ctx->lba != lba || flags & PBLK_WRITABLE_ENTRY) {
+ if (!pblk_ppa_comp(l2p_ppa, ppa) || w_ctx->lba != lba ||
+ flags & PBLK_WRITABLE_ENTRY) {
ret = 0;
goto out;
}
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 4a12f14d78c6..4e5c48f3de62 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -34,8 +34,7 @@ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
BUG_ON(!pblk_addr_in_cache(ppa));
#endif
- return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba,
- pblk_addr_to_cacheline(ppa), bio_iter);
+ return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa, bio_iter);
}
static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -76,6 +75,9 @@ retry:
}
WARN_ON(test_and_set_bit(i, read_bitmap));
advanced_bio = 1;
+#ifdef CONFIG_NVM_DEBUG
+ atomic_long_inc(&pblk->cache_reads);
+#endif
} else {
/* Read from media non-cached sectors */
rqd->ppa_list[j++] = p;
@@ -85,6 +87,11 @@ retry:
bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
}
+ if (pblk_io_aligned(pblk, nr_secs))
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+ else
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
#ifdef CONFIG_NVM_DEBUG
atomic_long_add(nr_secs, &pblk->inflight_reads);
#endif
@@ -94,8 +101,6 @@ static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd)
{
int err;
- rqd->flags = pblk_set_read_mode(pblk);
-
err = pblk_submit_io(pblk, rqd);
if (err)
return NVM_IO_ERR;
@@ -107,27 +112,27 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
{
struct pblk *pblk = rqd->private;
struct nvm_tgt_dev *dev = pblk->dev;
- struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd);
+ struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
struct bio *bio = rqd->bio;
if (rqd->error)
pblk_log_read_err(pblk, rqd);
#ifdef CONFIG_NVM_DEBUG
else
- WARN_ONCE(bio->bi_error, "pblk: corrupted read error\n");
+ WARN_ONCE(bio->bi_status, "pblk: corrupted read error\n");
#endif
- if (rqd->nr_ppas > 1)
- nvm_dev_dma_free(dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
+ nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
bio_put(bio);
- if (r_ctx->orig_bio) {
+ if (r_ctx->private) {
+ struct bio *orig_bio = r_ctx->private;
+
#ifdef CONFIG_NVM_DEBUG
- WARN_ONCE(r_ctx->orig_bio->bi_error,
- "pblk: corrupted read bio\n");
+ WARN_ONCE(orig_bio->bi_status, "pblk: corrupted read bio\n");
#endif
- bio_endio(r_ctx->orig_bio);
- bio_put(r_ctx->orig_bio);
+ bio_endio(orig_bio);
+ bio_put(orig_bio);
}
#ifdef CONFIG_NVM_DEBUG
@@ -136,6 +141,7 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
#endif
pblk_free_rqd(pblk, rqd, READ);
+ atomic_dec(&pblk->inflight_io);
}
static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
@@ -173,6 +179,7 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
rqd->bio = new_bio;
rqd->nr_ppas = nr_holes;
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
rqd->end_io = NULL;
if (unlikely(nr_secs > 1 && nr_holes == 1)) {
@@ -280,9 +287,14 @@ retry:
goto retry;
}
WARN_ON(test_and_set_bit(0, read_bitmap));
+#ifdef CONFIG_NVM_DEBUG
+ atomic_long_inc(&pblk->cache_reads);
+#endif
} else {
rqd->ppa_addr = ppa;
}
+
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
}
int pblk_submit_read(struct pblk *pblk, struct bio *bio)
@@ -316,13 +328,16 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
*/
bio_init_idx = pblk_get_bi_idx(bio);
+ rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+ &rqd->dma_meta_list);
+ if (!rqd->meta_list) {
+ pr_err("pblk: not able to allocate ppa list\n");
+ goto fail_rqd_free;
+ }
+
if (nr_secs > 1) {
- rqd->ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
- &rqd->dma_ppa_list);
- if (!rqd->ppa_list) {
- pr_err("pblk: not able to allocate ppa list\n");
- goto fail_rqd_free;
- }
+ rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
+ rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
pblk_read_ppalist_rq(pblk, rqd, &read_bitmap);
} else {
@@ -332,6 +347,7 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
bio_get(bio);
if (bitmap_full(&read_bitmap, nr_secs)) {
bio_endio(bio);
+ atomic_inc(&pblk->inflight_io);
pblk_end_io_read(rqd);
return NVM_IO_OK;
}
@@ -339,17 +355,17 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
/* All sectors are to be read from the device */
if (bitmap_empty(&read_bitmap, rqd->nr_ppas)) {
struct bio *int_bio = NULL;
- struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd);
+ struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
/* Clone read bio to deal with read errors internally */
- int_bio = bio_clone_bioset(bio, GFP_KERNEL, fs_bio_set);
+ int_bio = bio_clone_fast(bio, GFP_KERNEL, pblk_bio_set);
if (!int_bio) {
pr_err("pblk: could not clone read bio\n");
return NVM_IO_ERR;
}
rqd->bio = int_bio;
- r_ctx->orig_bio = bio;
+ r_ctx->private = bio;
ret = pblk_submit_read_io(pblk, rqd);
if (ret) {
@@ -445,7 +461,6 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
- struct request_queue *q = dev->q;
struct bio *bio;
struct nvm_rq rqd;
int ret, data_len;
@@ -453,22 +468,19 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
memset(&rqd, 0, sizeof(struct nvm_rq));
+ rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+ &rqd.dma_meta_list);
+ if (!rqd.meta_list)
+ return NVM_IO_ERR;
+
if (nr_secs > 1) {
- rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
- &rqd.dma_ppa_list);
- if (!rqd.ppa_list)
- return NVM_IO_ERR;
+ rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
+ rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
*secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, line, lba_list,
nr_secs);
- if (*secs_to_gc == 1) {
- struct ppa_addr ppa;
-
- ppa = rqd.ppa_list[0];
- nvm_dev_dma_free(dev->parent, rqd.ppa_list,
- rqd.dma_ppa_list);
- rqd.ppa_addr = ppa;
- }
+ if (*secs_to_gc == 1)
+ rqd.ppa_addr = rqd.ppa_list[0];
} else {
*secs_to_gc = read_rq_gc(pblk, &rqd, line, lba_list[0]);
}
@@ -477,7 +489,8 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
goto out;
data_len = (*secs_to_gc) * geo->sec_size;
- bio = bio_map_kern(q, data, data_len, GFP_KERNEL);
+ bio = pblk_bio_map_addr(pblk, data, *secs_to_gc, data_len,
+ PBLK_KMALLOC_META, GFP_KERNEL);
if (IS_ERR(bio)) {
pr_err("pblk: could not allocate GC bio (%lu)\n", PTR_ERR(bio));
goto err_free_dma;
@@ -490,6 +503,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
rqd.end_io = pblk_end_io_sync;
rqd.private = &wait;
rqd.nr_ppas = *secs_to_gc;
+ rqd.flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
rqd.bio = bio;
ret = pblk_submit_read_io(pblk, &rqd);
@@ -503,6 +517,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pr_err("pblk: GC read I/O timed out\n");
}
+ atomic_dec(&pblk->inflight_io);
if (rqd.error) {
atomic_long_inc(&pblk->read_failed_gc);
@@ -518,12 +533,10 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
#endif
out:
- if (rqd.nr_ppas > 1)
- nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+ nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
return NVM_IO_OK;
err_free_dma:
- if (rqd.nr_ppas > 1)
- nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
+ nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
return NVM_IO_ERR;
}
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index f8f85087cd3c..0e48d3e4e143 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -120,18 +120,18 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
return 0;
}
-__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta)
+__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta_buf)
{
u32 crc;
- crc = pblk_calc_emeta_crc(pblk, emeta);
- if (le32_to_cpu(emeta->crc) != crc)
+ crc = pblk_calc_emeta_crc(pblk, emeta_buf);
+ if (le32_to_cpu(emeta_buf->crc) != crc)
return NULL;
- if (le32_to_cpu(emeta->header.identifier) != PBLK_MAGIC)
+ if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC)
return NULL;
- return pblk_line_emeta_to_lbas(emeta);
+ return emeta_to_lbas(pblk, emeta_buf);
}
static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
@@ -139,19 +139,20 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_line_meta *lm = &pblk->lm;
- struct line_emeta *emeta = line->emeta;
+ struct pblk_emeta *emeta = line->emeta;
+ struct line_emeta *emeta_buf = emeta->buf;
__le64 *lba_list;
int data_start;
int nr_data_lbas, nr_valid_lbas, nr_lbas = 0;
int i;
- lba_list = pblk_recov_get_lba_list(pblk, emeta);
+ lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
if (!lba_list)
return 1;
data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
- nr_data_lbas = lm->sec_per_line - lm->emeta_sec;
- nr_valid_lbas = le64_to_cpu(emeta->nr_valid_lbas);
+ nr_data_lbas = lm->sec_per_line - lm->emeta_sec[0];
+ nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas);
for (i = data_start; i < nr_data_lbas && nr_lbas < nr_valid_lbas; i++) {
struct ppa_addr ppa;
@@ -169,7 +170,7 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
if (test_and_set_bit(i, line->invalid_bitmap))
WARN_ONCE(1, "pblk: rec. double invalidate:\n");
else
- line->vsc--;
+ le32_add_cpu(line->vsc, -1);
spin_unlock(&line->lock);
continue;
@@ -181,7 +182,7 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
if (nr_valid_lbas != nr_lbas)
pr_err("pblk: line %d - inconsistent lba list(%llu/%d)\n",
- line->id, line->emeta->nr_valid_lbas, nr_lbas);
+ line->id, emeta_buf->nr_valid_lbas, nr_lbas);
line->left_msecs = 0;
@@ -195,7 +196,7 @@ static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line)
struct pblk_line_meta *lm = &pblk->lm;
int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
- return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec -
+ return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] -
nr_bb * geo->sec_per_blk;
}
@@ -240,7 +241,7 @@ static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line,
r_ptr_int = r_ptr;
next_read_rq:
- memset(rqd, 0, pblk_r_rq_size);
+ memset(rqd, 0, pblk_g_rq_size);
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
if (!rq_ppas)
@@ -256,7 +257,6 @@ next_read_rq:
rqd->bio = bio;
rqd->opcode = NVM_OP_PREAD;
- rqd->flags = pblk_set_read_mode(pblk);
rqd->meta_list = meta_list;
rqd->nr_ppas = rq_ppas;
rqd->ppa_list = ppa_list;
@@ -265,6 +265,11 @@ next_read_rq:
rqd->end_io = pblk_end_io_sync;
rqd->private = &wait;
+ if (pblk_io_aligned(pblk, rq_ppas))
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+ else
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
int pos;
@@ -295,7 +300,7 @@ next_read_rq:
pr_err("pblk: L2P recovery read timed out\n");
return -EINTR;
}
-
+ atomic_dec(&pblk->inflight_io);
reinit_completion(&wait);
/* At this point, the read should not fail. If it does, it is a problem
@@ -322,47 +327,94 @@ next_read_rq:
return 0;
}
+static void pblk_recov_complete(struct kref *ref)
+{
+ struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref);
+
+ complete(&pad_rq->wait);
+}
+
+static void pblk_end_io_recov(struct nvm_rq *rqd)
+{
+ struct pblk_pad_rq *pad_rq = rqd->private;
+ struct pblk *pblk = pad_rq->pblk;
+ struct nvm_tgt_dev *dev = pblk->dev;
+
+ kref_put(&pad_rq->ref, pblk_recov_complete);
+ nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
+ pblk_free_rqd(pblk, rqd, WRITE);
+}
+
static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
- struct pblk_recov_alloc p, int left_ppas)
+ int left_ppas)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct ppa_addr *ppa_list;
struct pblk_sec_meta *meta_list;
+ struct pblk_pad_rq *pad_rq;
struct nvm_rq *rqd;
struct bio *bio;
void *data;
dma_addr_t dma_ppa_list, dma_meta_list;
- __le64 *lba_list = pblk_line_emeta_to_lbas(line->emeta);
+ __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
u64 w_ptr = line->cur_sec;
- int left_line_ppas = line->left_msecs;
- int rq_ppas, rq_len;
+ int left_line_ppas, rq_ppas, rq_len;
int i, j;
int ret = 0;
- DECLARE_COMPLETION_ONSTACK(wait);
- ppa_list = p.ppa_list;
- meta_list = p.meta_list;
- rqd = p.rqd;
- data = p.data;
- dma_ppa_list = p.dma_ppa_list;
- dma_meta_list = p.dma_meta_list;
+ spin_lock(&line->lock);
+ left_line_ppas = line->left_msecs;
+ spin_unlock(&line->lock);
+
+ pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL);
+ if (!pad_rq)
+ return -ENOMEM;
+
+ data = vzalloc(pblk->max_write_pgs * geo->sec_size);
+ if (!data) {
+ ret = -ENOMEM;
+ goto free_rq;
+ }
+
+ pad_rq->pblk = pblk;
+ init_completion(&pad_rq->wait);
+ kref_init(&pad_rq->ref);
next_pad_rq:
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
- if (!rq_ppas)
- rq_ppas = pblk->min_write_pgs;
+ if (rq_ppas < pblk->min_write_pgs) {
+ pr_err("pblk: corrupted pad line %d\n", line->id);
+ goto free_rq;
+ }
+
rq_len = rq_ppas * geo->sec_size;
+ meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
+ if (!meta_list) {
+ ret = -ENOMEM;
+ goto free_data;
+ }
+
+ ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
+ dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+
+ rqd = pblk_alloc_rqd(pblk, WRITE);
+ if (IS_ERR(rqd)) {
+ ret = PTR_ERR(rqd);
+ goto fail_free_meta;
+ }
+ memset(rqd, 0, pblk_w_rq_size);
+
bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
- if (IS_ERR(bio))
- return PTR_ERR(bio);
+ if (IS_ERR(bio)) {
+ ret = PTR_ERR(bio);
+ goto fail_free_rqd;
+ }
bio->bi_iter.bi_sector = 0; /* internal bio */
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
- memset(rqd, 0, pblk_r_rq_size);
-
rqd->bio = bio;
rqd->opcode = NVM_OP_PWRITE;
rqd->flags = pblk_set_progr_mode(pblk, WRITE);
@@ -371,8 +423,8 @@ next_pad_rq:
rqd->ppa_list = ppa_list;
rqd->dma_ppa_list = dma_ppa_list;
rqd->dma_meta_list = dma_meta_list;
- rqd->end_io = pblk_end_io_sync;
- rqd->private = &wait;
+ rqd->end_io = pblk_end_io_recov;
+ rqd->private = pad_rq;
for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
@@ -390,34 +442,51 @@ next_pad_rq:
for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) {
struct ppa_addr dev_ppa;
+ __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
pblk_map_invalidate(pblk, dev_ppa);
- meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
- lba_list[w_ptr] = cpu_to_le64(ADDR_EMPTY);
+ lba_list[w_ptr] = meta_list[i].lba = addr_empty;
rqd->ppa_list[i] = dev_ppa;
}
}
+ kref_get(&pad_rq->ref);
+
ret = pblk_submit_io(pblk, rqd);
if (ret) {
pr_err("pblk: I/O submission failed: %d\n", ret);
- return ret;
+ goto free_data;
}
- if (!wait_for_completion_io_timeout(&wait,
- msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
- pr_err("pblk: L2P recovery write timed out\n");
- }
- reinit_completion(&wait);
+ atomic_dec(&pblk->inflight_io);
left_line_ppas -= rq_ppas;
left_ppas -= rq_ppas;
- if (left_ppas > 0 && left_line_ppas)
+ if (left_ppas && left_line_ppas)
goto next_pad_rq;
- return 0;
+ kref_put(&pad_rq->ref, pblk_recov_complete);
+
+ if (!wait_for_completion_io_timeout(&pad_rq->wait,
+ msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
+ pr_err("pblk: pad write timed out\n");
+ ret = -ETIME;
+ }
+
+free_rq:
+ kfree(pad_rq);
+free_data:
+ vfree(data);
+ return ret;
+
+fail_free_rqd:
+ pblk_free_rqd(pblk, rqd, WRITE);
+fail_free_meta:
+ nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
+ kfree(pad_rq);
+ return ret;
}
/* When this function is called, it means that not all upper pages have been
@@ -456,7 +525,7 @@ static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line,
rec_round = 0;
next_rq:
- memset(rqd, 0, pblk_r_rq_size);
+ memset(rqd, 0, pblk_g_rq_size);
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
if (!rq_ppas)
@@ -472,7 +541,6 @@ next_rq:
rqd->bio = bio;
rqd->opcode = NVM_OP_PREAD;
- rqd->flags = pblk_set_read_mode(pblk);
rqd->meta_list = meta_list;
rqd->nr_ppas = rq_ppas;
rqd->ppa_list = ppa_list;
@@ -481,6 +549,11 @@ next_rq:
rqd->end_io = pblk_end_io_sync;
rqd->private = &wait;
+ if (pblk_io_aligned(pblk, rq_ppas))
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+ else
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
int pos;
@@ -510,6 +583,7 @@ next_rq:
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pr_err("pblk: L2P recovery read timed out\n");
}
+ atomic_dec(&pblk->inflight_io);
reinit_completion(&wait);
/* This should not happen since the read failed during normal recovery,
@@ -544,7 +618,7 @@ next_rq:
if (pad_secs > line->left_msecs)
pad_secs = line->left_msecs;
- ret = pblk_recov_pad_oob(pblk, line, p, pad_secs);
+ ret = pblk_recov_pad_oob(pblk, line, pad_secs);
if (ret)
pr_err("pblk: OOB padding failed (err:%d)\n", ret);
@@ -552,7 +626,6 @@ next_rq:
if (ret)
pr_err("pblk: OOB read failed (err:%d)\n", ret);
- line->left_ssecs = line->left_msecs;
left_ppas = 0;
}
@@ -591,7 +664,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
*done = 1;
next_rq:
- memset(rqd, 0, pblk_r_rq_size);
+ memset(rqd, 0, pblk_g_rq_size);
rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
if (!rq_ppas)
@@ -607,7 +680,6 @@ next_rq:
rqd->bio = bio;
rqd->opcode = NVM_OP_PREAD;
- rqd->flags = pblk_set_read_mode(pblk);
rqd->meta_list = meta_list;
rqd->nr_ppas = rq_ppas;
rqd->ppa_list = ppa_list;
@@ -616,6 +688,11 @@ next_rq:
rqd->end_io = pblk_end_io_sync;
rqd->private = &wait;
+ if (pblk_io_aligned(pblk, rq_ppas))
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
+ else
+ rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+
for (i = 0; i < rqd->nr_ppas; ) {
struct ppa_addr ppa;
int pos;
@@ -646,6 +723,7 @@ next_rq:
msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
pr_err("pblk: L2P recovery read timed out\n");
}
+ atomic_dec(&pblk->inflight_io);
reinit_completion(&wait);
/* Reached the end of the written line */
@@ -658,7 +736,6 @@ next_rq:
/* Roll back failed sectors */
line->cur_sec -= nr_error_bits;
line->left_msecs += nr_error_bits;
- line->left_ssecs = line->left_msecs;
bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
left_ppas = 0;
@@ -770,8 +847,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *line, *tline, *data_line = NULL;
- struct line_smeta *smeta;
- struct line_emeta *emeta;
+ struct pblk_smeta *smeta;
+ struct pblk_emeta *emeta;
+ struct line_smeta *smeta_buf;
int found_lines = 0, recovered_lines = 0, open_lines = 0;
int is_next = 0;
int meta_line;
@@ -784,8 +862,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
spin_lock(&l_mg->free_lock);
meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
set_bit(meta_line, &l_mg->meta_bitmap);
- smeta = l_mg->sline_meta[meta_line].meta;
- emeta = l_mg->eline_meta[meta_line].meta;
+ smeta = l_mg->sline_meta[meta_line];
+ emeta = l_mg->eline_meta[meta_line];
+ smeta_buf = (struct line_smeta *)smeta;
spin_unlock(&l_mg->free_lock);
/* Order data lines using their sequence number */
@@ -796,33 +875,33 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
memset(smeta, 0, lm->smeta_len);
line->smeta = smeta;
- line->lun_bitmap = ((void *)(smeta)) +
+ line->lun_bitmap = ((void *)(smeta_buf)) +
sizeof(struct line_smeta);
/* Lines that cannot be read are assumed as not written here */
if (pblk_line_read_smeta(pblk, line))
continue;
- crc = pblk_calc_smeta_crc(pblk, smeta);
- if (le32_to_cpu(smeta->crc) != crc)
+ crc = pblk_calc_smeta_crc(pblk, smeta_buf);
+ if (le32_to_cpu(smeta_buf->crc) != crc)
continue;
- if (le32_to_cpu(smeta->header.identifier) != PBLK_MAGIC)
+ if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC)
continue;
- if (le16_to_cpu(smeta->header.version) != 1) {
+ if (le16_to_cpu(smeta_buf->header.version) != 1) {
pr_err("pblk: found incompatible line version %u\n",
- smeta->header.version);
+ smeta_buf->header.version);
return ERR_PTR(-EINVAL);
}
/* The first valid instance uuid is used for initialization */
if (!valid_uuid) {
- memcpy(pblk->instance_uuid, smeta->header.uuid, 16);
+ memcpy(pblk->instance_uuid, smeta_buf->header.uuid, 16);
valid_uuid = 1;
}
- if (memcmp(pblk->instance_uuid, smeta->header.uuid, 16)) {
+ if (memcmp(pblk->instance_uuid, smeta_buf->header.uuid, 16)) {
pr_debug("pblk: ignore line %u due to uuid mismatch\n",
i);
continue;
@@ -830,9 +909,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
/* Update line metadata */
spin_lock(&line->lock);
- line->id = le32_to_cpu(line->smeta->header.id);
- line->type = le16_to_cpu(line->smeta->header.type);
- line->seq_nr = le64_to_cpu(line->smeta->seq_nr);
+ line->id = le32_to_cpu(smeta_buf->header.id);
+ line->type = le16_to_cpu(smeta_buf->header.type);
+ line->seq_nr = le64_to_cpu(smeta_buf->seq_nr);
spin_unlock(&line->lock);
/* Update general metadata */
@@ -848,7 +927,7 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
pblk_recov_line_add_ordered(&recov_list, line);
found_lines++;
pr_debug("pblk: recovering data line %d, seq:%llu\n",
- line->id, smeta->seq_nr);
+ line->id, smeta_buf->seq_nr);
}
if (!found_lines) {
@@ -868,15 +947,15 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
recovered_lines++;
/* Calculate where emeta starts based on the line bb */
- off = lm->sec_per_line - lm->emeta_sec;
+ off = lm->sec_per_line - lm->emeta_sec[0];
nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
off -= nr_bb * geo->sec_per_pl;
- memset(emeta, 0, lm->emeta_len);
- line->emeta = emeta;
line->emeta_ssec = off;
+ line->emeta = emeta;
+ memset(line->emeta->buf, 0, lm->emeta_len[0]);
- if (pblk_line_read_emeta(pblk, line)) {
+ if (pblk_line_read_emeta(pblk, line, line->emeta->buf)) {
pblk_recov_l2p_from_oob(pblk, line);
goto next;
}
@@ -941,58 +1020,26 @@ out:
}
/*
- * Pad until smeta can be read on current data line
+ * Pad current line
*/
-void pblk_recov_pad(struct pblk *pblk)
+int pblk_recov_pad(struct pblk *pblk)
{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
struct pblk_line *line;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
- struct nvm_rq *rqd;
- struct pblk_recov_alloc p;
- struct ppa_addr *ppa_list;
- struct pblk_sec_meta *meta_list;
- void *data;
- dma_addr_t dma_ppa_list, dma_meta_list;
+ int left_msecs;
+ int ret = 0;
spin_lock(&l_mg->free_lock);
line = l_mg->data_line;
+ left_msecs = line->left_msecs;
spin_unlock(&l_mg->free_lock);
- rqd = pblk_alloc_rqd(pblk, READ);
- if (IS_ERR(rqd))
- return;
-
- meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
- if (!meta_list)
- goto free_rqd;
-
- ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
- dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
-
- data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL);
- if (!data)
- goto free_meta_list;
-
- p.ppa_list = ppa_list;
- p.meta_list = meta_list;
- p.rqd = rqd;
- p.data = data;
- p.dma_ppa_list = dma_ppa_list;
- p.dma_meta_list = dma_meta_list;
-
- if (pblk_recov_pad_oob(pblk, line, p, line->left_msecs)) {
- pr_err("pblk: Tear down padding failed\n");
- goto free_data;
+ ret = pblk_recov_pad_oob(pblk, line, left_msecs);
+ if (ret) {
+ pr_err("pblk: Tear down padding failed (%d)\n", ret);
+ return ret;
}
- pblk_line_close(pblk, line);
-
-free_data:
- kfree(data);
-free_meta_list:
- nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
-free_rqd:
- pblk_free_rqd(pblk, rqd, READ);
+ pblk_line_close_meta(pblk, line);
+ return ret;
}
diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c
index ab7cbb144f3f..2e6a5361baf0 100644
--- a/drivers/lightnvm/pblk-rl.c
+++ b/drivers/lightnvm/pblk-rl.c
@@ -23,11 +23,35 @@ static void pblk_rl_kick_u_timer(struct pblk_rl *rl)
mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000));
}
+int pblk_rl_is_limit(struct pblk_rl *rl)
+{
+ int rb_space;
+
+ rb_space = atomic_read(&rl->rb_space);
+
+ return (rb_space == 0);
+}
+
int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries)
{
int rb_user_cnt = atomic_read(&rl->rb_user_cnt);
+ int rb_space = atomic_read(&rl->rb_space);
- return (!(rb_user_cnt + nr_entries > rl->rb_user_max));
+ if (unlikely(rb_space >= 0) && (rb_space - nr_entries < 0))
+ return NVM_IO_ERR;
+
+ if (rb_user_cnt >= rl->rb_user_max)
+ return NVM_IO_REQUEUE;
+
+ return NVM_IO_OK;
+}
+
+void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries)
+{
+ int rb_space = atomic_read(&rl->rb_space);
+
+ if (unlikely(rb_space >= 0))
+ atomic_sub(nr_entries, &rl->rb_space);
}
int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
@@ -37,7 +61,7 @@ int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
/* If there is no user I/O let GC take over space on the write buffer */
rb_user_active = READ_ONCE(rl->rb_user_active);
- return (!(rb_gc_cnt + nr_entries > rl->rb_gc_max && rb_user_active));
+ return (!(rb_gc_cnt >= rl->rb_gc_max && rb_user_active));
}
void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries)
@@ -77,33 +101,32 @@ static int pblk_rl_update_rates(struct pblk_rl *rl, unsigned long max)
unsigned long free_blocks = pblk_rl_nr_free_blks(rl);
if (free_blocks >= rl->high) {
- rl->rb_user_max = max - rl->rb_gc_rsv;
- rl->rb_gc_max = rl->rb_gc_rsv;
+ rl->rb_user_max = max;
+ rl->rb_gc_max = 0;
rl->rb_state = PBLK_RL_HIGH;
} else if (free_blocks < rl->high) {
int shift = rl->high_pw - rl->rb_windows_pw;
int user_windows = free_blocks >> shift;
int user_max = user_windows << PBLK_MAX_REQ_ADDRS_PW;
- int gc_max;
rl->rb_user_max = user_max;
- gc_max = max - rl->rb_user_max;
- rl->rb_gc_max = max(gc_max, rl->rb_gc_rsv);
-
- if (free_blocks > rl->low)
- rl->rb_state = PBLK_RL_MID;
- else
- rl->rb_state = PBLK_RL_LOW;
+ rl->rb_gc_max = max - user_max;
+
+ if (free_blocks <= rl->rsv_blocks) {
+ rl->rb_user_max = 0;
+ rl->rb_gc_max = max;
+ }
+
+ /* In the worst case, we will need to GC lines in the low list
+ * (high valid sector count). If there are lines to GC on high
+ * or mid lists, these will be prioritized
+ */
+ rl->rb_state = PBLK_RL_LOW;
}
return rl->rb_state;
}
-void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv)
-{
- rl->rb_gc_rsv = rl->rb_gc_max = rsv;
-}
-
void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
{
struct pblk *pblk = container_of(rl, struct pblk, rl);
@@ -122,11 +145,15 @@ void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
{
- struct pblk *pblk = container_of(rl, struct pblk, rl);
int blk_in_line = atomic_read(&line->blk_in_line);
- int ret;
atomic_sub(blk_in_line, &rl->free_blocks);
+}
+
+void pblk_gc_should_kick(struct pblk *pblk)
+{
+ struct pblk_rl *rl = &pblk->rl;
+ int ret;
/* Rates will not change that often - no need to lock update */
ret = pblk_rl_update_rates(rl, rl->rb_budget);
@@ -136,11 +163,16 @@ void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
pblk_gc_should_stop(pblk);
}
-int pblk_rl_gc_thrs(struct pblk_rl *rl)
+int pblk_rl_high_thrs(struct pblk_rl *rl)
{
return rl->high;
}
+int pblk_rl_low_thrs(struct pblk_rl *rl)
+{
+ return rl->low;
+}
+
int pblk_rl_sysfs_rate_show(struct pblk_rl *rl)
{
return rl->rb_user_max;
@@ -161,24 +193,36 @@ void pblk_rl_free(struct pblk_rl *rl)
void pblk_rl_init(struct pblk_rl *rl, int budget)
{
+ struct pblk *pblk = container_of(rl, struct pblk, rl);
+ struct pblk_line_meta *lm = &pblk->lm;
+ int min_blocks = lm->blk_per_line * PBLK_GC_RSV_LINE;
unsigned int rb_windows;
rl->high = rl->total_blocks / PBLK_USER_HIGH_THRS;
- rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
rl->high_pw = get_count_order(rl->high);
+ rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
+ if (rl->low < min_blocks)
+ rl->low = min_blocks;
+
+ rl->rsv_blocks = min_blocks;
+
/* This will always be a power-of-2 */
rb_windows = budget / PBLK_MAX_REQ_ADDRS;
- rl->rb_windows_pw = get_count_order(rb_windows) + 1;
+ rl->rb_windows_pw = get_count_order(rb_windows);
/* To start with, all buffer is available to user I/O writers */
rl->rb_budget = budget;
rl->rb_user_max = budget;
- atomic_set(&rl->rb_user_cnt, 0);
rl->rb_gc_max = 0;
rl->rb_state = PBLK_RL_HIGH;
+
+ atomic_set(&rl->rb_user_cnt, 0);
atomic_set(&rl->rb_gc_cnt, 0);
+ atomic_set(&rl->rb_space, -1);
setup_timer(&rl->u_timer, pblk_rl_u_timer, (unsigned long)rl);
+
rl->rb_user_active = 0;
+ rl->rb_gc_active = 0;
}
diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c
index f0af1d1ceeff..95fb434e2f01 100644
--- a/drivers/lightnvm/pblk-sysfs.c
+++ b/drivers/lightnvm/pblk-sysfs.c
@@ -49,30 +49,26 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page)
static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page)
{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
int free_blocks, total_blocks;
int rb_user_max, rb_user_cnt;
- int rb_gc_max, rb_gc_rsv, rb_gc_cnt, rb_budget, rb_state;
+ int rb_gc_max, rb_gc_cnt, rb_budget, rb_state;
free_blocks = atomic_read(&pblk->rl.free_blocks);
rb_user_max = pblk->rl.rb_user_max;
rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt);
rb_gc_max = pblk->rl.rb_gc_max;
- rb_gc_rsv = pblk->rl.rb_gc_rsv;
rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt);
rb_budget = pblk->rl.rb_budget;
rb_state = pblk->rl.rb_state;
- total_blocks = geo->blks_per_lun * geo->nr_luns;
+ total_blocks = pblk->rl.total_blocks;
return snprintf(page, PAGE_SIZE,
- "u:%u/%u,gc:%u/%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n",
+ "u:%u/%u,gc:%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n",
rb_user_cnt,
rb_user_max,
rb_gc_cnt,
rb_gc_max,
- rb_gc_rsv,
rb_state,
rb_budget,
pblk->rl.low,
@@ -150,11 +146,11 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
ssize_t sz = 0;
int nr_free_lines;
int cur_data, cur_log;
- int free_line_cnt = 0, closed_line_cnt = 0;
+ int free_line_cnt = 0, closed_line_cnt = 0, emeta_line_cnt = 0;
int d_line_cnt = 0, l_line_cnt = 0;
int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0;
- int free = 0, bad = 0, cor = 0;
- int msecs = 0, ssecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
+ int bad = 0, cor = 0;
+ int msecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
int map_weight = 0, meta_weight = 0;
spin_lock(&l_mg->free_lock);
@@ -166,6 +162,11 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
free_line_cnt++;
spin_unlock(&l_mg->free_lock);
+ spin_lock(&l_mg->close_lock);
+ list_for_each_entry(line, &l_mg->emeta_list, list)
+ emeta_line_cnt++;
+ spin_unlock(&l_mg->close_lock);
+
spin_lock(&l_mg->gc_lock);
list_for_each_entry(line, &l_mg->gc_full_list, list) {
if (line->type == PBLK_LINETYPE_DATA)
@@ -212,8 +213,6 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
gc_empty++;
}
- list_for_each_entry(line, &l_mg->free_list, list)
- free++;
list_for_each_entry(line, &l_mg->bad_list, list)
bad++;
list_for_each_entry(line, &l_mg->corrupt_list, list)
@@ -224,8 +223,7 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
if (l_mg->data_line) {
cur_sec = l_mg->data_line->cur_sec;
msecs = l_mg->data_line->left_msecs;
- ssecs = l_mg->data_line->left_ssecs;
- vsc = l_mg->data_line->vsc;
+ vsc = le32_to_cpu(*l_mg->data_line->vsc);
sec_in_line = l_mg->data_line->sec_in_line;
meta_weight = bitmap_weight(&l_mg->meta_bitmap,
PBLK_DATA_LINES);
@@ -235,17 +233,20 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
spin_unlock(&l_mg->free_lock);
if (nr_free_lines != free_line_cnt)
- pr_err("pblk: corrupted free line list\n");
+ pr_err("pblk: corrupted free line list:%d/%d\n",
+ nr_free_lines, free_line_cnt);
sz = snprintf(page, PAGE_SIZE - sz,
"line: nluns:%d, nblks:%d, nsecs:%d\n",
geo->nr_luns, lm->blk_per_line, lm->sec_per_line);
sz += snprintf(page + sz, PAGE_SIZE - sz,
- "lines:d:%d,l:%d-f:%d(%d),b:%d,co:%d,c:%d(d:%d,l:%d)t:%d\n",
+ "lines:d:%d,l:%d-f:%d,m:%d/%d,c:%d,b:%d,co:%d(d:%d,l:%d)t:%d\n",
cur_data, cur_log,
- free, nr_free_lines, bad, cor,
+ nr_free_lines,
+ emeta_line_cnt, meta_weight,
closed_line_cnt,
+ bad, cor,
d_line_cnt, l_line_cnt,
l_mg->nr_lines);
@@ -255,9 +256,10 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
atomic_read(&pblk->gc.inflight_gc));
sz += snprintf(page + sz, PAGE_SIZE - sz,
- "data (%d) cur:%d, left:%d/%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
- cur_data, cur_sec, msecs, ssecs, vsc, sec_in_line,
- map_weight, lm->sec_per_line, meta_weight);
+ "data (%d) cur:%d, left:%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
+ cur_data, cur_sec, msecs, vsc, sec_in_line,
+ map_weight, lm->sec_per_line,
+ atomic_read(&pblk->inflight_io));
return sz;
}
@@ -274,7 +276,7 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
lm->smeta_len, lm->smeta_sec);
sz += snprintf(page + sz, PAGE_SIZE - sz,
"emeta - len:%d, sec:%d, bb_start:%d\n",
- lm->emeta_len, lm->emeta_sec,
+ lm->emeta_len[0], lm->emeta_sec[0],
lm->emeta_bb);
sz += snprintf(page + sz, PAGE_SIZE - sz,
"bitmap lengths: sec:%d, blk:%d, lun:%d\n",
@@ -290,6 +292,11 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
return sz;
}
+static ssize_t pblk_sysfs_get_sec_per_write(struct pblk *pblk, char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%d\n", pblk->sec_per_write);
+}
+
#ifdef CONFIG_NVM_DEBUG
static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
{
@@ -303,52 +310,51 @@ static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
atomic_long_read(&pblk->padded_wb),
atomic_long_read(&pblk->sub_writes),
atomic_long_read(&pblk->sync_writes),
- atomic_long_read(&pblk->compl_writes),
atomic_long_read(&pblk->recov_writes),
atomic_long_read(&pblk->recov_gc_writes),
atomic_long_read(&pblk->recov_gc_reads),
+ atomic_long_read(&pblk->cache_reads),
atomic_long_read(&pblk->sync_reads));
}
#endif
-static ssize_t pblk_sysfs_rate_store(struct pblk *pblk, const char *page,
- size_t len)
+static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
+ size_t len)
{
- struct pblk_gc *gc = &pblk->gc;
size_t c_len;
- int value;
+ int force;
c_len = strcspn(page, "\n");
if (c_len >= len)
return -EINVAL;
- if (kstrtouint(page, 0, &value))
+ if (kstrtouint(page, 0, &force))
return -EINVAL;
- spin_lock(&gc->lock);
- pblk_rl_set_gc_rsc(&pblk->rl, value);
- spin_unlock(&gc->lock);
+ pblk_gc_sysfs_force(pblk, force);
return len;
}
-static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
- size_t len)
+static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk,
+ const char *page, size_t len)
{
size_t c_len;
- int force;
+ int sec_per_write;
c_len = strcspn(page, "\n");
if (c_len >= len)
return -EINVAL;
- if (kstrtouint(page, 0, &force))
+ if (kstrtouint(page, 0, &sec_per_write))
return -EINVAL;
- if (force < 0 || force > 1)
+ if (sec_per_write < pblk->min_write_pgs
+ || sec_per_write > pblk->max_write_pgs
+ || sec_per_write % pblk->min_write_pgs != 0)
return -EINVAL;
- pblk_gc_sysfs_force(pblk, force);
+ pblk_set_sec_per_write(pblk, sec_per_write);
return len;
}
@@ -398,9 +404,9 @@ static struct attribute sys_gc_force = {
.mode = 0200,
};
-static struct attribute sys_gc_rl_max = {
- .name = "gc_rl_max",
- .mode = 0200,
+static struct attribute sys_max_sec_per_write = {
+ .name = "max_sec_per_write",
+ .mode = 0644,
};
#ifdef CONFIG_NVM_DEBUG
@@ -416,7 +422,7 @@ static struct attribute *pblk_attrs[] = {
&sys_errors_attr,
&sys_gc_state,
&sys_gc_force,
- &sys_gc_rl_max,
+ &sys_max_sec_per_write,
&sys_rb_attr,
&sys_stats_ppaf_attr,
&sys_lines_attr,
@@ -448,6 +454,8 @@ static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr,
return pblk_sysfs_lines(pblk, buf);
else if (strcmp(attr->name, "lines_info") == 0)
return pblk_sysfs_lines_info(pblk, buf);
+ else if (strcmp(attr->name, "max_sec_per_write") == 0)
+ return pblk_sysfs_get_sec_per_write(pblk, buf);
#ifdef CONFIG_NVM_DEBUG
else if (strcmp(attr->name, "stats") == 0)
return pblk_sysfs_stats_debug(pblk, buf);
@@ -460,10 +468,10 @@ static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr,
{
struct pblk *pblk = container_of(kobj, struct pblk, kobj);
- if (strcmp(attr->name, "gc_rl_max") == 0)
- return pblk_sysfs_rate_store(pblk, buf, len);
- else if (strcmp(attr->name, "gc_force") == 0)
+ if (strcmp(attr->name, "gc_force") == 0)
return pblk_sysfs_gc_force(pblk, buf, len);
+ else if (strcmp(attr->name, "max_sec_per_write") == 0)
+ return pblk_sysfs_set_sec_per_write(pblk, buf, len);
return 0;
}
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index aef6fd7c4a0c..d62a8f4faaf4 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -17,18 +17,6 @@
#include "pblk.h"
-static void pblk_sync_line(struct pblk *pblk, struct pblk_line *line)
-{
-#ifdef CONFIG_NVM_DEBUG
- atomic_long_inc(&pblk->sync_writes);
-#endif
-
- /* Counter protected by rb sync lock */
- line->left_ssecs--;
- if (!line->left_ssecs)
- pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
-}
-
static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
struct pblk_c_ctx *c_ctx)
{
@@ -39,21 +27,14 @@ static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
for (i = 0; i < c_ctx->nr_valid; i++) {
struct pblk_w_ctx *w_ctx;
- struct ppa_addr p;
- struct pblk_line *line;
w_ctx = pblk_rb_w_ctx(&pblk->rwb, c_ctx->sentry + i);
-
- p = rqd->ppa_list[i];
- line = &pblk->lines[pblk_dev_ppa_to_line(p)];
- pblk_sync_line(pblk, line);
-
while ((original_bio = bio_list_pop(&w_ctx->bios)))
bio_endio(original_bio);
}
#ifdef CONFIG_NVM_DEBUG
- atomic_long_add(c_ctx->nr_valid, &pblk->compl_writes);
+ atomic_long_add(c_ctx->nr_valid, &pblk->sync_writes);
#endif
ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid);
@@ -169,7 +150,7 @@ static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd)
}
INIT_WORK(&recovery->ws_rec, pblk_submit_rec);
- queue_work(pblk->kw_wq, &recovery->ws_rec);
+ queue_work(pblk->close_wq, &recovery->ws_rec);
out:
pblk_complete_write(pblk, rqd, c_ctx);
@@ -186,14 +167,50 @@ static void pblk_end_io_write(struct nvm_rq *rqd)
}
#ifdef CONFIG_NVM_DEBUG
else
- WARN_ONCE(rqd->bio->bi_error, "pblk: corrupted write error\n");
+ WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
#endif
pblk_complete_write(pblk, rqd, c_ctx);
+ atomic_dec(&pblk->inflight_io);
+}
+
+static void pblk_end_io_write_meta(struct nvm_rq *rqd)
+{
+ struct pblk *pblk = rqd->private;
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd);
+ struct pblk_line *line = m_ctx->private;
+ struct pblk_emeta *emeta = line->emeta;
+ int pos = pblk_ppa_to_pos(geo, rqd->ppa_list[0]);
+ struct pblk_lun *rlun = &pblk->luns[pos];
+ int sync;
+
+ up(&rlun->wr_sem);
+
+ if (rqd->error) {
+ pblk_log_write_err(pblk, rqd);
+ pr_err("pblk: metadata I/O failed. Line %d\n", line->id);
+ }
+#ifdef CONFIG_NVM_DEBUG
+ else
+ WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
+#endif
+
+ sync = atomic_add_return(rqd->nr_ppas, &emeta->sync);
+ if (sync == emeta->nr_entries)
+ pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws,
+ pblk->close_wq);
+
+ bio_put(rqd->bio);
+ pblk_free_rqd(pblk, rqd, READ);
+
+ atomic_dec(&pblk->inflight_io);
}
static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
- unsigned int nr_secs)
+ unsigned int nr_secs,
+ nvm_end_io_fn(*end_io))
{
struct nvm_tgt_dev *dev = pblk->dev;
@@ -202,7 +219,7 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
rqd->nr_ppas = nr_secs;
rqd->flags = pblk_set_progr_mode(pblk, WRITE);
rqd->private = pblk;
- rqd->end_io = pblk_end_io_write;
+ rqd->end_io = end_io;
rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
&rqd->dma_meta_list);
@@ -219,11 +236,10 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
}
static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
- struct pblk_c_ctx *c_ctx)
+ struct pblk_c_ctx *c_ctx, struct ppa_addr *erase_ppa)
{
struct pblk_line_meta *lm = &pblk->lm;
- struct pblk_line *e_line = pblk_line_get_data_next(pblk);
- struct ppa_addr erase_ppa;
+ struct pblk_line *e_line = pblk_line_get_erase(pblk);
unsigned int valid = c_ctx->nr_valid;
unsigned int padded = c_ctx->nr_padded;
unsigned int nr_secs = valid + padded;
@@ -231,40 +247,23 @@ static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
int ret = 0;
lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
- if (!lun_bitmap) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!lun_bitmap)
+ return -ENOMEM;
c_ctx->lun_bitmap = lun_bitmap;
- ret = pblk_alloc_w_rq(pblk, rqd, nr_secs);
+ ret = pblk_alloc_w_rq(pblk, rqd, nr_secs, pblk_end_io_write);
if (ret) {
kfree(lun_bitmap);
- goto out;
+ return ret;
}
- ppa_set_empty(&erase_ppa);
if (likely(!e_line || !atomic_read(&e_line->left_eblks)))
pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, 0);
else
pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
- valid, &erase_ppa);
-
-out:
- if (unlikely(e_line && !ppa_empty(erase_ppa))) {
- if (pblk_blk_erase_async(pblk, erase_ppa)) {
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- int bit;
-
- atomic_inc(&e_line->left_eblks);
- bit = erase_ppa.g.lun * geo->nr_chnls + erase_ppa.g.ch;
- WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
- up(&pblk->erase_sem);
- }
- }
+ valid, erase_ppa);
- return ret;
+ return 0;
}
int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -280,7 +279,7 @@ int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
c_ctx->lun_bitmap = lun_bitmap;
- ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas);
+ ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas, pblk_end_io_write);
if (ret)
return ret;
@@ -311,16 +310,237 @@ static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
return secs_to_sync;
}
+static inline int pblk_valid_meta_ppa(struct pblk *pblk,
+ struct pblk_line *meta_line,
+ struct ppa_addr *ppa_list, int nr_ppas)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct pblk_line *data_line;
+ struct ppa_addr ppa, ppa_opt;
+ u64 paddr;
+ int i;
+
+ data_line = &pblk->lines[pblk_dev_ppa_to_line(ppa_list[0])];
+ paddr = pblk_lookup_page(pblk, meta_line);
+ ppa = addr_to_gen_ppa(pblk, paddr, 0);
+
+ if (test_bit(pblk_ppa_to_pos(geo, ppa), data_line->blk_bitmap))
+ return 1;
+
+ /* Schedule a metadata I/O that is half the distance from the data I/O
+ * with regards to the number of LUNs forming the pblk instance. This
+ * balances LUN conflicts across every I/O.
+ *
+ * When the LUN configuration changes (e.g., due to GC), this distance
+ * can align, which would result on a LUN deadlock. In this case, modify
+ * the distance to not be optimal, but allow metadata I/Os to succeed.
+ */
+ ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0);
+ if (unlikely(ppa_opt.ppa == ppa.ppa)) {
+ data_line->meta_distance--;
+ return 0;
+ }
+
+ for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
+ if (ppa_list[i].g.ch == ppa_opt.g.ch &&
+ ppa_list[i].g.lun == ppa_opt.g.lun)
+ return 1;
+
+ if (test_bit(pblk_ppa_to_pos(geo, ppa_opt), data_line->blk_bitmap)) {
+ for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
+ if (ppa_list[i].g.ch == ppa.g.ch &&
+ ppa_list[i].g.lun == ppa.g.lun)
+ return 0;
+
+ return 1;
+ }
+
+ return 0;
+}
+
+int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_emeta *emeta = meta_line->emeta;
+ struct pblk_g_ctx *m_ctx;
+ struct pblk_lun *rlun;
+ struct bio *bio;
+ struct nvm_rq *rqd;
+ void *data;
+ u64 paddr;
+ int rq_ppas = pblk->min_write_pgs;
+ int id = meta_line->id;
+ int rq_len;
+ int i, j;
+ int ret;
+
+ rqd = pblk_alloc_rqd(pblk, READ);
+ if (IS_ERR(rqd)) {
+ pr_err("pblk: cannot allocate write req.\n");
+ return PTR_ERR(rqd);
+ }
+ m_ctx = nvm_rq_to_pdu(rqd);
+ m_ctx->private = meta_line;
+
+ rq_len = rq_ppas * geo->sec_size;
+ data = ((void *)emeta->buf) + emeta->mem;
+
+ bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
+ l_mg->emeta_alloc_type, GFP_KERNEL);
+ if (IS_ERR(bio)) {
+ ret = PTR_ERR(bio);
+ goto fail_free_rqd;
+ }
+ bio->bi_iter.bi_sector = 0; /* internal bio */
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ rqd->bio = bio;
+
+ ret = pblk_alloc_w_rq(pblk, rqd, rq_ppas, pblk_end_io_write_meta);
+ if (ret)
+ goto fail_free_bio;
+
+ for (i = 0; i < rqd->nr_ppas; ) {
+ spin_lock(&meta_line->lock);
+ paddr = __pblk_alloc_page(pblk, meta_line, rq_ppas);
+ spin_unlock(&meta_line->lock);
+ for (j = 0; j < rq_ppas; j++, i++, paddr++)
+ rqd->ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id);
+ }
+
+ rlun = &pblk->luns[pblk_ppa_to_pos(geo, rqd->ppa_list[0])];
+ ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
+ if (ret) {
+ pr_err("pblk: lun semaphore timed out (%d)\n", ret);
+ goto fail_free_bio;
+ }
+
+ emeta->mem += rq_len;
+ if (emeta->mem >= lm->emeta_len[0]) {
+ spin_lock(&l_mg->close_lock);
+ list_del(&meta_line->list);
+ WARN(!bitmap_full(meta_line->map_bitmap, lm->sec_per_line),
+ "pblk: corrupt meta line %d\n", meta_line->id);
+ spin_unlock(&l_mg->close_lock);
+ }
+
+ ret = pblk_submit_io(pblk, rqd);
+ if (ret) {
+ pr_err("pblk: emeta I/O submission failed: %d\n", ret);
+ goto fail_rollback;
+ }
+
+ return NVM_IO_OK;
+
+fail_rollback:
+ spin_lock(&l_mg->close_lock);
+ pblk_dealloc_page(pblk, meta_line, rq_ppas);
+ list_add(&meta_line->list, &meta_line->list);
+ spin_unlock(&l_mg->close_lock);
+fail_free_bio:
+ if (likely(l_mg->emeta_alloc_type == PBLK_VMALLOC_META))
+ bio_put(bio);
+fail_free_rqd:
+ pblk_free_rqd(pblk, rqd, READ);
+ return ret;
+}
+
+static int pblk_sched_meta_io(struct pblk *pblk, struct ppa_addr *prev_list,
+ int prev_n)
+{
+ struct pblk_line_meta *lm = &pblk->lm;
+ struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+ struct pblk_line *meta_line;
+
+ spin_lock(&l_mg->close_lock);
+retry:
+ if (list_empty(&l_mg->emeta_list)) {
+ spin_unlock(&l_mg->close_lock);
+ return 0;
+ }
+ meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list);
+ if (bitmap_full(meta_line->map_bitmap, lm->sec_per_line))
+ goto retry;
+ spin_unlock(&l_mg->close_lock);
+
+ if (!pblk_valid_meta_ppa(pblk, meta_line, prev_list, prev_n))
+ return 0;
+
+ return pblk_submit_meta_io(pblk, meta_line);
+}
+
+static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd)
+{
+ struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
+ struct ppa_addr erase_ppa;
+ int err;
+
+ ppa_set_empty(&erase_ppa);
+
+ /* Assign lbas to ppas and populate request structure */
+ err = pblk_setup_w_rq(pblk, rqd, c_ctx, &erase_ppa);
+ if (err) {
+ pr_err("pblk: could not setup write request: %d\n", err);
+ return NVM_IO_ERR;
+ }
+
+ if (likely(ppa_empty(erase_ppa))) {
+ /* Submit metadata write for previous data line */
+ err = pblk_sched_meta_io(pblk, rqd->ppa_list, rqd->nr_ppas);
+ if (err) {
+ pr_err("pblk: metadata I/O submission failed: %d", err);
+ return NVM_IO_ERR;
+ }
+
+ /* Submit data write for current data line */
+ err = pblk_submit_io(pblk, rqd);
+ if (err) {
+ pr_err("pblk: data I/O submission failed: %d\n", err);
+ return NVM_IO_ERR;
+ }
+ } else {
+ /* Submit data write for current data line */
+ err = pblk_submit_io(pblk, rqd);
+ if (err) {
+ pr_err("pblk: data I/O submission failed: %d\n", err);
+ return NVM_IO_ERR;
+ }
+
+ /* Submit available erase for next data line */
+ if (pblk_blk_erase_async(pblk, erase_ppa)) {
+ struct pblk_line *e_line = pblk_line_get_erase(pblk);
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ int bit;
+
+ atomic_inc(&e_line->left_eblks);
+ bit = pblk_ppa_to_pos(geo, erase_ppa);
+ WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
+ }
+ }
+
+ return NVM_IO_OK;
+}
+
+static void pblk_free_write_rqd(struct pblk *pblk, struct nvm_rq *rqd)
+{
+ struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
+ struct bio *bio = rqd->bio;
+
+ if (c_ctx->nr_padded)
+ pblk_bio_free_pages(pblk, bio, rqd->nr_ppas, c_ctx->nr_padded);
+}
+
static int pblk_submit_write(struct pblk *pblk)
{
struct bio *bio;
struct nvm_rq *rqd;
- struct pblk_c_ctx *c_ctx;
- unsigned int pgs_read;
unsigned int secs_avail, secs_to_sync, secs_to_com;
unsigned int secs_to_flush;
unsigned long pos;
- int err;
/* If there are no sectors in the cache, flushes (bios without data)
* will be cleared on the cache threads
@@ -338,7 +558,6 @@ static int pblk_submit_write(struct pblk *pblk)
pr_err("pblk: cannot allocate write req.\n");
return 1;
}
- c_ctx = nvm_rq_to_pdu(rqd);
bio = bio_alloc(GFP_KERNEL, pblk->max_write_pgs);
if (!bio) {
@@ -358,29 +577,14 @@ static int pblk_submit_write(struct pblk *pblk)
secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync;
pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
- pgs_read = pblk_rb_read_to_bio(&pblk->rwb, bio, c_ctx, pos,
- secs_to_sync, secs_avail);
- if (!pgs_read) {
+ if (pblk_rb_read_to_bio(&pblk->rwb, rqd, bio, pos, secs_to_sync,
+ secs_avail)) {
pr_err("pblk: corrupted write bio\n");
goto fail_put_bio;
}
- if (c_ctx->nr_padded)
- if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, c_ctx->nr_padded))
- goto fail_put_bio;
-
- /* Assign lbas to ppas and populate request structure */
- err = pblk_setup_w_rq(pblk, rqd, c_ctx);
- if (err) {
- pr_err("pblk: could not setup write request\n");
- goto fail_free_bio;
- }
-
- err = pblk_submit_io(pblk, rqd);
- if (err) {
- pr_err("pblk: I/O submission failed: %d\n", err);
+ if (pblk_submit_io_set(pblk, rqd))
goto fail_free_bio;
- }
#ifdef CONFIG_NVM_DEBUG
atomic_long_add(secs_to_sync, &pblk->sub_writes);
@@ -389,8 +593,7 @@ static int pblk_submit_write(struct pblk *pblk)
return 0;
fail_free_bio:
- if (c_ctx->nr_padded)
- pblk_bio_free_pages(pblk, bio, secs_to_sync, c_ctx->nr_padded);
+ pblk_free_write_rqd(pblk, rqd);
fail_put_bio:
bio_put(bio);
fail_free_rqd:
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 99f3186b5288..15931381348c 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -40,6 +40,12 @@
#define PBLK_MAX_REQ_ADDRS (64)
#define PBLK_MAX_REQ_ADDRS_PW (6)
+#define PBLK_WS_POOL_SIZE (128)
+#define PBLK_META_POOL_SIZE (128)
+#define PBLK_READ_REQ_POOL_SIZE (1024)
+
+#define PBLK_NR_CLOSE_JOBS (4)
+
#define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16)
#define PBLK_COMMAND_TIMEOUT_MS 30000
@@ -72,11 +78,15 @@ enum {
PBLK_BLK_ST_CLOSED = 0x2,
};
+struct pblk_sec_meta {
+ u64 reserved;
+ __le64 lba;
+};
+
/* The number of GC lists and the rate-limiter states go together. This way the
* rate-limiter can dictate how much GC is needed based on resource utilization.
*/
-#define PBLK_NR_GC_LISTS 3
-#define PBLK_MAX_GC_JOBS 32
+#define PBLK_GC_NR_LISTS 3
enum {
PBLK_RL_HIGH = 1,
@@ -84,14 +94,9 @@ enum {
PBLK_RL_LOW = 3,
};
-struct pblk_sec_meta {
- u64 reserved;
- __le64 lba;
-};
-
#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS)
-/* write completion context */
+/* write buffer completion context */
struct pblk_c_ctx {
struct list_head list; /* Head for out-of-order completion */
@@ -101,9 +106,16 @@ struct pblk_c_ctx {
unsigned int nr_padded;
};
-/* Read context */
-struct pblk_r_ctx {
- struct bio *orig_bio;
+/* generic context */
+struct pblk_g_ctx {
+ void *private;
+};
+
+/* Pad context */
+struct pblk_pad_rq {
+ struct pblk *pblk;
+ struct completion wait;
+ struct kref ref;
};
/* Recovery context */
@@ -195,29 +207,39 @@ struct pblk_lun {
struct pblk_gc_rq {
struct pblk_line *line;
void *data;
- u64 *lba_list;
+ u64 lba_list[PBLK_MAX_REQ_ADDRS];
int nr_secs;
int secs_to_gc;
struct list_head list;
};
struct pblk_gc {
+ /* These states are not protected by a lock since (i) they are in the
+ * fast path, and (ii) they are not critical.
+ */
int gc_active;
int gc_enabled;
int gc_forced;
- int gc_jobs_active;
- atomic_t inflight_gc;
struct task_struct *gc_ts;
struct task_struct *gc_writer_ts;
+ struct task_struct *gc_reader_ts;
+
+ struct workqueue_struct *gc_line_reader_wq;
struct workqueue_struct *gc_reader_wq;
+
struct timer_list gc_timer;
+ struct semaphore gc_sem;
+ atomic_t inflight_gc;
int w_entries;
+
struct list_head w_list;
+ struct list_head r_list;
spinlock_t lock;
spinlock_t w_lock;
+ spinlock_t r_lock;
};
struct pblk_rl {
@@ -229,10 +251,8 @@ struct pblk_rl {
*/
unsigned int high_pw; /* High rounded up as a power of 2 */
-#define PBLK_USER_HIGH_THRS 2 /* Begin write limit at 50 percent
- * available blks
- */
-#define PBLK_USER_LOW_THRS 20 /* Aggressive GC at 5% available blocks */
+#define PBLK_USER_HIGH_THRS 8 /* Begin write limit at 12% available blks */
+#define PBLK_USER_LOW_THRS 10 /* Aggressive GC at 10% available blocks */
int rb_windows_pw; /* Number of rate windows in the write buffer
* given as a power-of-2. This guarantees that
@@ -244,13 +264,19 @@ struct pblk_rl {
*/
int rb_budget; /* Total number of entries available for I/O */
int rb_user_max; /* Max buffer entries available for user I/O */
- atomic_t rb_user_cnt; /* User I/O buffer counter */
int rb_gc_max; /* Max buffer entries available for GC I/O */
int rb_gc_rsv; /* Reserved buffer entries for GC I/O */
int rb_state; /* Rate-limiter current state */
+
+ atomic_t rb_user_cnt; /* User I/O buffer counter */
atomic_t rb_gc_cnt; /* GC I/O buffer counter */
+ atomic_t rb_space; /* Space limit in case of reaching capacity */
+
+ int rsv_blocks; /* Reserved blocks for GC */
int rb_user_active;
+ int rb_gc_active;
+
struct timer_list u_timer;
unsigned long long nr_secs;
@@ -258,8 +284,6 @@ struct pblk_rl {
atomic_t free_blocks;
};
-#define PBLK_LINE_NR_LUN_BITMAP 2
-#define PBLK_LINE_NR_SEC_BITMAP 2
#define PBLK_LINE_EMPTY (~0U)
enum {
@@ -310,16 +334,19 @@ struct line_smeta {
__le32 window_wr_lun; /* Number of parallel LUNs to write */
__le32 rsvd[2];
+
+ __le64 lun_bitmap[];
};
/*
- * Metadata Layout:
- * 1. struct pblk_emeta
- * 2. nr_lbas u64 forming lba list
- * 3. nr_lines (all) u32 valid sector count (vsc) (~0U: non-alloc line)
- * 4. nr_luns bits (u64 format) forming line bad block bitmap
- *
- * 3. and 4. will be part of FTL log
+ * Metadata layout in media:
+ * First sector:
+ * 1. struct line_emeta
+ * 2. bad block bitmap (u64 * window_wr_lun)
+ * Mid sectors (start at lbas_sector):
+ * 3. nr_lbas (u64) forming lba list
+ * Last sectors (start at vsc_sector):
+ * 4. u32 valid sector count (vsc) for all lines (~0U: free line)
*/
struct line_emeta {
struct line_header header;
@@ -339,6 +366,23 @@ struct line_emeta {
__le32 next_id; /* Line id for next line */
__le64 nr_lbas; /* Number of lbas mapped in line */
__le64 nr_valid_lbas; /* Number of valid lbas mapped in line */
+ __le64 bb_bitmap[]; /* Updated bad block bitmap for line */
+};
+
+struct pblk_emeta {
+ struct line_emeta *buf; /* emeta buffer in media format */
+ int mem; /* Write offset - points to next
+ * writable entry in memory
+ */
+ atomic_t sync; /* Synced - backpointer that signals the
+ * last entry that has been successfully
+ * persisted to media
+ */
+ unsigned int nr_entries; /* Number of emeta entries */
+};
+
+struct pblk_smeta {
+ struct line_smeta *buf; /* smeta buffer in persistent format */
};
struct pblk_line {
@@ -355,9 +399,12 @@ struct pblk_line {
unsigned long *lun_bitmap; /* Bitmap for LUNs mapped in line */
- struct line_smeta *smeta; /* Start metadata */
- struct line_emeta *emeta; /* End metadata */
+ struct pblk_smeta *smeta; /* Start metadata */
+ struct pblk_emeta *emeta; /* End medatada */
+
int meta_line; /* Metadata line id */
+ int meta_distance; /* Distance between data and metadata */
+
u64 smeta_ssec; /* Sector where smeta starts */
u64 emeta_ssec; /* Sector where emeta starts */
@@ -374,9 +421,10 @@ struct pblk_line {
atomic_t left_seblks; /* Blocks left for sync erasing */
int left_msecs; /* Sectors left for mapping */
- int left_ssecs; /* Sectors left to sync */
unsigned int cur_sec; /* Sector map pointer */
- unsigned int vsc; /* Valid sector count in line */
+ unsigned int nr_valid_lbas; /* Number of valid lbas in line */
+
+ __le32 *vsc; /* Valid sector count in line */
struct kref ref; /* Write buffer L2P references */
@@ -385,13 +433,15 @@ struct pblk_line {
#define PBLK_DATA_LINES 4
-enum{
+enum {
PBLK_KMALLOC_META = 1,
PBLK_VMALLOC_META = 2,
};
-struct pblk_line_metadata {
- void *meta;
+enum {
+ PBLK_EMETA_TYPE_HEADER = 1, /* struct line_emeta first sector */
+ PBLK_EMETA_TYPE_LLBA = 2, /* lba list - type: __le64 */
+ PBLK_EMETA_TYPE_VSC = 3, /* vsc list - type: __le32 */
};
struct pblk_line_mgmt {
@@ -404,7 +454,7 @@ struct pblk_line_mgmt {
struct list_head bad_list; /* Full lines bad */
/* GC lists - use gc_lock */
- struct list_head *gc_lists[PBLK_NR_GC_LISTS];
+ struct list_head *gc_lists[PBLK_GC_NR_LISTS];
struct list_head gc_high_list; /* Full lines ready to GC, high isc */
struct list_head gc_mid_list; /* Full lines ready to GC, mid isc */
struct list_head gc_low_list; /* Full lines ready to GC, low isc */
@@ -417,13 +467,16 @@ struct pblk_line_mgmt {
struct pblk_line *log_next; /* Next FTL log line */
struct pblk_line *data_next; /* Next data line */
+ struct list_head emeta_list; /* Lines queued to schedule emeta */
+
+ __le32 *vsc_list; /* Valid sector counts for all lines */
+
/* Metadata allocation type: VMALLOC | KMALLOC */
- int smeta_alloc_type;
int emeta_alloc_type;
/* Pre-allocated metadata for data lines */
- struct pblk_line_metadata sline_meta[PBLK_DATA_LINES];
- struct pblk_line_metadata eline_meta[PBLK_DATA_LINES];
+ struct pblk_smeta *sline_meta[PBLK_DATA_LINES];
+ struct pblk_emeta *eline_meta[PBLK_DATA_LINES];
unsigned long meta_bitmap;
/* Helpers for fast bitmap calculations */
@@ -434,25 +487,40 @@ struct pblk_line_mgmt {
unsigned long l_seq_nr; /* Log line unique sequence number */
spinlock_t free_lock;
+ spinlock_t close_lock;
spinlock_t gc_lock;
};
struct pblk_line_meta {
unsigned int smeta_len; /* Total length for smeta */
- unsigned int smeta_sec; /* Sectors needed for smeta*/
- unsigned int emeta_len; /* Total length for emeta */
- unsigned int emeta_sec; /* Sectors needed for emeta*/
+ unsigned int smeta_sec; /* Sectors needed for smeta */
+
+ unsigned int emeta_len[4]; /* Lengths for emeta:
+ * [0]: Total length
+ * [1]: struct line_emeta length
+ * [2]: L2P portion length
+ * [3]: vsc list length
+ */
+ unsigned int emeta_sec[4]; /* Sectors needed for emeta. Same layout
+ * as emeta_len
+ */
+
unsigned int emeta_bb; /* Boundary for bb that affects emeta */
+
+ unsigned int vsc_list_len; /* Length for vsc list */
unsigned int sec_bitmap_len; /* Length for sector bitmap in line */
unsigned int blk_bitmap_len; /* Length for block bitmap in line */
unsigned int lun_bitmap_len; /* Length for lun bitmap in line */
unsigned int blk_per_line; /* Number of blocks in a full line */
unsigned int sec_per_line; /* Number of sectors in a line */
+ unsigned int dsec_per_line; /* Number of data sectors in a line */
unsigned int min_blk_line; /* Min. number of good blocks in line */
unsigned int mid_thrs; /* Threshold for GC mid list */
unsigned int high_thrs; /* Threshold for GC high list */
+
+ unsigned int meta_distance; /* Distance between data and metadata */
};
struct pblk_addr_format {
@@ -470,6 +538,13 @@ struct pblk_addr_format {
u8 sec_offset;
};
+enum {
+ PBLK_STATE_RUNNING = 0,
+ PBLK_STATE_STOPPING = 1,
+ PBLK_STATE_RECOVERING = 2,
+ PBLK_STATE_STOPPED = 3,
+};
+
struct pblk {
struct nvm_tgt_dev *dev;
struct gendisk *disk;
@@ -487,6 +562,8 @@ struct pblk {
struct pblk_rb rwb;
+ int state; /* pblk line state */
+
int min_write_pgs; /* Minimum amount of pages required by controller */
int max_write_pgs; /* Maximum amount of pages supported by controller */
int pgs_in_buffer; /* Number of pages that need to be held in buffer to
@@ -499,7 +576,7 @@ struct pblk {
/* pblk provisioning values. Used by rate limiter */
struct pblk_rl rl;
- struct semaphore erase_sem;
+ int sec_per_write;
unsigned char instance_uuid[16];
#ifdef CONFIG_NVM_DEBUG
@@ -511,8 +588,8 @@ struct pblk {
atomic_long_t req_writes; /* Sectors stored on write buffer */
atomic_long_t sub_writes; /* Sectors submitted from buffer */
atomic_long_t sync_writes; /* Sectors synced to media */
- atomic_long_t compl_writes; /* Sectors completed in write bio */
atomic_long_t inflight_reads; /* Inflight sector read requests */
+ atomic_long_t cache_reads; /* Read requests that hit the cache */
atomic_long_t sync_reads; /* Completed sector read requests */
atomic_long_t recov_writes; /* Sectors submitted from recovery */
atomic_long_t recov_gc_writes; /* Sectors submitted from write GC */
@@ -528,6 +605,8 @@ struct pblk {
atomic_long_t write_failed;
atomic_long_t erase_failed;
+ atomic_t inflight_io; /* General inflight I/O counter */
+
struct task_struct *writer_ts;
/* Simple translation map of logical addresses to physical addresses.
@@ -542,11 +621,13 @@ struct pblk {
mempool_t *page_pool;
mempool_t *line_ws_pool;
mempool_t *rec_pool;
- mempool_t *r_rq_pool;
+ mempool_t *g_rq_pool;
mempool_t *w_rq_pool;
mempool_t *line_meta_pool;
- struct workqueue_struct *kw_wq;
+ struct workqueue_struct *close_wq;
+ struct workqueue_struct *bb_wq;
+
struct timer_list wtimer;
struct pblk_gc gc;
@@ -559,7 +640,7 @@ struct pblk_line_ws {
struct work_struct ws;
};
-#define pblk_r_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_r_ctx))
+#define pblk_g_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_g_ctx))
#define pblk_w_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_c_ctx))
/*
@@ -579,18 +660,17 @@ void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
struct pblk_w_ctx w_ctx, struct pblk_line *gc_line,
unsigned int pos);
struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos);
+void pblk_rb_flush(struct pblk_rb *rb);
void pblk_rb_sync_l2p(struct pblk_rb *rb);
-unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
- struct pblk_c_ctx *c_ctx,
- unsigned int pos,
- unsigned int nr_entries,
- unsigned int count);
+unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
+ struct bio *bio, unsigned int pos,
+ unsigned int nr_entries, unsigned int count);
unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
struct list_head *list,
unsigned int max);
int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
- u64 pos, int bio_iter);
+ struct ppa_addr ppa, int bio_iter);
unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries);
unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags);
@@ -601,6 +681,7 @@ void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags);
unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb);
unsigned int pblk_rb_read_count(struct pblk_rb *rb);
+unsigned int pblk_rb_sync_count(struct pblk_rb *rb);
unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos);
int pblk_rb_tear_down_check(struct pblk_rb *rb);
@@ -612,40 +693,50 @@ ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf);
* pblk core
*/
struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw);
+void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write);
int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
struct pblk_c_ctx *c_ctx);
void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw);
-void pblk_flush_writer(struct pblk *pblk);
+void pblk_wait_for_meta(struct pblk *pblk);
struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba);
void pblk_discard(struct pblk *pblk, struct bio *bio);
void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
+int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line);
struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
unsigned int nr_secs, unsigned int len,
- gfp_t gfp_mask);
+ int alloc_type, gfp_t gfp_mask);
struct pblk_line *pblk_line_get(struct pblk *pblk);
struct pblk_line *pblk_line_get_first_data(struct pblk *pblk);
-struct pblk_line *pblk_line_replace_data(struct pblk *pblk);
+void pblk_line_replace_data(struct pblk *pblk);
int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line);
void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line);
struct pblk_line *pblk_line_get_data(struct pblk *pblk);
-struct pblk_line *pblk_line_get_data_next(struct pblk *pblk);
+struct pblk_line *pblk_line_get_erase(struct pblk *pblk);
int pblk_line_erase(struct pblk *pblk, struct pblk_line *line);
int pblk_line_is_full(struct pblk_line *line);
void pblk_line_free(struct pblk *pblk, struct pblk_line *line);
-void pblk_line_close_ws(struct work_struct *work);
+void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line);
void pblk_line_close(struct pblk *pblk, struct pblk_line *line);
+void pblk_line_close_meta_sync(struct pblk *pblk);
+void pblk_line_close_ws(struct work_struct *work);
+void pblk_pipeline_stop(struct pblk *pblk);
void pblk_line_mark_bb(struct work_struct *work);
void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
- void (*work)(struct work_struct *));
+ void (*work)(struct work_struct *),
+ struct workqueue_struct *wq);
u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line);
int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line);
-int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line);
+int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
+ void *emeta_buf);
int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa);
void pblk_line_put(struct kref *ref);
struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line);
+u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line);
+void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
+u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
unsigned long secs_to_flush);
void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
@@ -656,11 +747,11 @@ void pblk_end_bio_sync(struct bio *bio);
void pblk_end_io_sync(struct nvm_rq *rqd);
int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
int nr_pages);
-void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
- u64 paddr);
void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
int nr_pages);
void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa);
+void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
+ u64 paddr);
void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa);
void pblk_update_map_cache(struct pblk *pblk, sector_t lba,
struct ppa_addr ppa);
@@ -702,6 +793,7 @@ void pblk_write_should_kick(struct pblk *pblk);
/*
* pblk read path
*/
+extern struct bio_set *pblk_bio_set;
int pblk_submit_read(struct pblk *pblk, struct bio *bio);
int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
unsigned int nr_secs, unsigned int *secs_to_gc,
@@ -711,7 +803,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
*/
void pblk_submit_rec(struct work_struct *work);
struct pblk_line *pblk_recov_l2p(struct pblk *pblk);
-void pblk_recov_pad(struct pblk *pblk);
+int pblk_recov_pad(struct pblk *pblk);
__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta);
int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
struct pblk_rec_ctx *recovery, u64 *comp_bits,
@@ -720,33 +812,40 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
/*
* pblk gc
*/
-#define PBLK_GC_TRIES 3
+#define PBLK_GC_MAX_READERS 8 /* Max number of outstanding GC reader jobs */
+#define PBLK_GC_W_QD 128 /* Queue depth for inflight GC write I/Os */
+#define PBLK_GC_L_QD 4 /* Queue depth for inflight GC lines */
+#define PBLK_GC_RSV_LINE 1 /* Reserved lines for GC */
int pblk_gc_init(struct pblk *pblk);
void pblk_gc_exit(struct pblk *pblk);
void pblk_gc_should_start(struct pblk *pblk);
void pblk_gc_should_stop(struct pblk *pblk);
-int pblk_gc_status(struct pblk *pblk);
+void pblk_gc_should_kick(struct pblk *pblk);
+void pblk_gc_kick(struct pblk *pblk);
void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
int *gc_active);
-void pblk_gc_sysfs_force(struct pblk *pblk, int force);
+int pblk_gc_sysfs_force(struct pblk *pblk, int force);
/*
* pblk rate limiter
*/
void pblk_rl_init(struct pblk_rl *rl, int budget);
void pblk_rl_free(struct pblk_rl *rl);
-int pblk_rl_gc_thrs(struct pblk_rl *rl);
+int pblk_rl_high_thrs(struct pblk_rl *rl);
+int pblk_rl_low_thrs(struct pblk_rl *rl);
unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl);
int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries);
+void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries);
void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries);
int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries);
void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries);
void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc);
-void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv);
int pblk_rl_sysfs_rate_show(struct pblk_rl *rl);
void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line);
void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line);
+void pblk_rl_set_space_limit(struct pblk_rl *rl, int entries_left);
+int pblk_rl_is_limit(struct pblk_rl *rl);
/*
* pblk sysfs
@@ -774,9 +873,30 @@ static inline struct nvm_rq *nvm_rq_from_c_ctx(void *c_ctx)
return c_ctx - sizeof(struct nvm_rq);
}
-static inline void *pblk_line_emeta_to_lbas(struct line_emeta *emeta)
+static inline void *emeta_to_bb(struct line_emeta *emeta)
+{
+ return emeta->bb_bitmap;
+}
+
+static inline void *emeta_to_lbas(struct pblk *pblk, struct line_emeta *emeta)
+{
+ return ((void *)emeta + pblk->lm.emeta_len[1]);
+}
+
+static inline void *emeta_to_vsc(struct pblk *pblk, struct line_emeta *emeta)
{
- return (emeta) + 1;
+ return (emeta_to_lbas(pblk, emeta) + pblk->lm.emeta_len[2]);
+}
+
+static inline int pblk_line_vsc(struct pblk_line *line)
+{
+ int vsc;
+
+ spin_lock(&line->lock);
+ vsc = le32_to_cpu(*line->vsc);
+ spin_unlock(&line->lock);
+
+ return vsc;
}
#define NVM_MEM_PAGE_WRITE (8)
@@ -917,6 +1037,14 @@ static inline void pblk_ppa_set_empty(struct ppa_addr *ppa_addr)
ppa_addr->ppa = ADDR_EMPTY;
}
+static inline bool pblk_ppa_comp(struct ppa_addr lppa, struct ppa_addr rppa)
+{
+ if (lppa.ppa == rppa.ppa)
+ return true;
+
+ return false;
+}
+
static inline int pblk_addr_in_cache(struct ppa_addr ppa)
{
return (ppa.ppa != ADDR_EMPTY && ppa.c.is_cached);
@@ -964,11 +1092,11 @@ static inline struct ppa_addr addr_to_pblk_ppa(struct pblk *pblk, u64 paddr,
}
static inline u32 pblk_calc_meta_header_crc(struct pblk *pblk,
- struct line_smeta *smeta)
+ struct line_header *header)
{
u32 crc = ~(u32)0;
- crc = crc32_le(crc, (unsigned char *)smeta + sizeof(crc),
+ crc = crc32_le(crc, (unsigned char *)header + sizeof(crc),
sizeof(struct line_header) - sizeof(crc));
return crc;
@@ -996,7 +1124,7 @@ static inline u32 pblk_calc_emeta_crc(struct pblk *pblk,
crc = crc32_le(crc, (unsigned char *)emeta +
sizeof(struct line_header) + sizeof(crc),
- lm->emeta_len -
+ lm->emeta_len[0] -
sizeof(struct line_header) - sizeof(crc));
return crc;
@@ -1016,9 +1144,27 @@ static inline int pblk_set_progr_mode(struct pblk *pblk, int type)
return flags;
}
-static inline int pblk_set_read_mode(struct pblk *pblk)
+enum {
+ PBLK_READ_RANDOM = 0,
+ PBLK_READ_SEQUENTIAL = 1,
+};
+
+static inline int pblk_set_read_mode(struct pblk *pblk, int type)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ int flags;
+
+ flags = NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE;
+ if (type == PBLK_READ_SEQUENTIAL)
+ flags |= geo->plane_mode >> 1;
+
+ return flags;
+}
+
+static inline int pblk_io_aligned(struct pblk *pblk, int nr_secs)
{
- return NVM_IO_SNGL_ACCESS | NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE;
+ return !(nr_secs % pblk->min_write_pgs);
}
#ifdef CONFIG_NVM_DEBUG
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index cf0e28a0ff61..267f01ae87e4 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -279,8 +279,8 @@ static void rrpc_end_sync_bio(struct bio *bio)
{
struct completion *waiting = bio->bi_private;
- if (bio->bi_error)
- pr_err("nvm: gc request failed (%u).\n", bio->bi_error);
+ if (bio->bi_status)
+ pr_err("nvm: gc request failed (%u).\n", bio->bi_status);
complete(waiting);
}
@@ -359,7 +359,7 @@ try:
goto finished;
}
wait_for_completion_io(&wait);
- if (bio->bi_error) {
+ if (bio->bi_status) {
rrpc_inflight_laddr_release(rrpc, rqd);
goto finished;
}
@@ -385,7 +385,7 @@ try:
wait_for_completion_io(&wait);
rrpc_inflight_laddr_release(rrpc, rqd);
- if (bio->bi_error)
+ if (bio->bi_status)
goto finished;
bio_reset(bio);
@@ -994,7 +994,7 @@ static blk_qc_t rrpc_make_rq(struct request_queue *q, struct bio *bio)
struct nvm_rq *rqd;
int err;
- blk_queue_split(q, &bio, q->bio_split);
+ blk_queue_split(q, &bio);
if (bio_op(bio) == REQ_OP_DISCARD) {
rrpc_discard(rrpc, bio);