summaryrefslogtreecommitdiff
path: root/drivers/block
diff options
context:
space:
mode:
authorIlya Dryomov <idryomov@gmail.com>2019-06-13 17:44:08 +0200
committerIlya Dryomov <idryomov@gmail.com>2019-07-08 14:01:45 +0200
commit793333a303c90174c317e3fa12e898bbc02daee4 (patch)
treec7bb5b44d5a0f7d443b2954bd162f3baf644de74 /drivers/block
parentea9b743c97dc52c74b97d968bccbb51f6e5c92b6 (diff)
rbd: introduce copyup state machine
Both write and copyup paths will get more complex with object map. Factor copyup code out into a separate state machine. While at it, take advantage of obj_req->osd_reqs list and issue empty and current snapc OSD requests together, one after another. Signed-off-by: Ilya Dryomov <idryomov@gmail.com> Reviewed-by: Dongsheng Yang <dongsheng.yang@easystack.cn>
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/rbd.c187
1 files changed, 123 insertions, 64 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index c471e7d13f36..99a0ebbf379c 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -226,6 +226,7 @@ enum obj_operation_type {
#define RBD_OBJ_FLAG_DELETION (1U << 0)
#define RBD_OBJ_FLAG_COPYUP_ENABLED (1U << 1)
+#define RBD_OBJ_FLAG_COPYUP_ZEROS (1U << 2)
enum rbd_obj_read_state {
RBD_OBJ_READ_START = 1,
@@ -261,9 +262,15 @@ enum rbd_obj_read_state {
enum rbd_obj_write_state {
RBD_OBJ_WRITE_START = 1,
RBD_OBJ_WRITE_OBJECT,
- RBD_OBJ_WRITE_READ_FROM_PARENT,
- RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC,
- RBD_OBJ_WRITE_COPYUP_OPS,
+ __RBD_OBJ_WRITE_COPYUP,
+ RBD_OBJ_WRITE_COPYUP,
+};
+
+enum rbd_obj_copyup_state {
+ RBD_OBJ_COPYUP_START = 1,
+ RBD_OBJ_COPYUP_READ_PARENT,
+ __RBD_OBJ_COPYUP_WRITE_OBJECT,
+ RBD_OBJ_COPYUP_WRITE_OBJECT,
};
struct rbd_obj_request {
@@ -286,12 +293,15 @@ struct rbd_obj_request {
u32 bvec_idx;
};
};
+
+ enum rbd_obj_copyup_state copyup_state;
struct bio_vec *copyup_bvecs;
u32 copyup_bvec_count;
struct list_head osd_reqs; /* w/ r_private_item */
struct mutex state_mutex;
+ struct pending_result pending;
struct kref kref;
};
@@ -2568,8 +2578,8 @@ static bool is_zero_bvecs(struct bio_vec *bvecs, u32 bytes)
#define MODS_ONLY U32_MAX
-static int rbd_obj_issue_copyup_empty_snapc(struct rbd_obj_request *obj_req,
- u32 bytes)
+static int rbd_obj_copyup_empty_snapc(struct rbd_obj_request *obj_req,
+ u32 bytes)
{
struct ceph_osd_request *osd_req;
int ret;
@@ -2595,7 +2605,8 @@ static int rbd_obj_issue_copyup_empty_snapc(struct rbd_obj_request *obj_req,
return 0;
}
-static int rbd_obj_issue_copyup_ops(struct rbd_obj_request *obj_req, u32 bytes)
+static int rbd_obj_copyup_current_snapc(struct rbd_obj_request *obj_req,
+ u32 bytes)
{
struct ceph_osd_request *osd_req;
int num_ops = count_write_ops(obj_req);
@@ -2628,33 +2639,6 @@ static int rbd_obj_issue_copyup_ops(struct rbd_obj_request *obj_req, u32 bytes)
return 0;
}
-static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
-{
- /*
- * Only send non-zero copyup data to save some I/O and network
- * bandwidth -- zero copyup data is equivalent to the object not
- * existing.
- */
- if (is_zero_bvecs(obj_req->copyup_bvecs, bytes)) {
- dout("%s obj_req %p detected zeroes\n", __func__, obj_req);
- bytes = 0;
- }
-
- if (obj_req->img_request->snapc->num_snaps && bytes > 0) {
- /*
- * Send a copyup request with an empty snapshot context to
- * deep-copyup the object through all existing snapshots.
- * A second request with the current snapshot context will be
- * sent for the actual modification.
- */
- obj_req->write_state = RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC;
- return rbd_obj_issue_copyup_empty_snapc(obj_req, bytes);
- }
-
- obj_req->write_state = RBD_OBJ_WRITE_COPYUP_OPS;
- return rbd_obj_issue_copyup_ops(obj_req, bytes);
-}
-
static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap)
{
u32 i;
@@ -2688,7 +2672,7 @@ static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap)
* target object up to the overlap point (if any) from the parent,
* so we can use it for a copyup.
*/
-static int rbd_obj_handle_write_guard(struct rbd_obj_request *obj_req)
+static int rbd_obj_copyup_read_parent(struct rbd_obj_request *obj_req)
{
struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
int ret;
@@ -2703,22 +2687,111 @@ static int rbd_obj_handle_write_guard(struct rbd_obj_request *obj_req)
* request -- pass MODS_ONLY since the copyup isn't needed
* anymore.
*/
- obj_req->write_state = RBD_OBJ_WRITE_COPYUP_OPS;
- return rbd_obj_issue_copyup_ops(obj_req, MODS_ONLY);
+ return rbd_obj_copyup_current_snapc(obj_req, MODS_ONLY);
}
ret = setup_copyup_bvecs(obj_req, rbd_obj_img_extents_bytes(obj_req));
if (ret)
return ret;
- obj_req->write_state = RBD_OBJ_WRITE_READ_FROM_PARENT;
return rbd_obj_read_from_parent(obj_req);
}
+static void rbd_obj_copyup_write_object(struct rbd_obj_request *obj_req)
+{
+ u32 bytes = rbd_obj_img_extents_bytes(obj_req);
+ int ret;
+
+ rbd_assert(!obj_req->pending.result && !obj_req->pending.num_pending);
+
+ /*
+ * Only send non-zero copyup data to save some I/O and network
+ * bandwidth -- zero copyup data is equivalent to the object not
+ * existing.
+ */
+ if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ZEROS)
+ bytes = 0;
+
+ if (obj_req->img_request->snapc->num_snaps && bytes > 0) {
+ /*
+ * Send a copyup request with an empty snapshot context to
+ * deep-copyup the object through all existing snapshots.
+ * A second request with the current snapshot context will be
+ * sent for the actual modification.
+ */
+ ret = rbd_obj_copyup_empty_snapc(obj_req, bytes);
+ if (ret) {
+ obj_req->pending.result = ret;
+ return;
+ }
+
+ obj_req->pending.num_pending++;
+ bytes = MODS_ONLY;
+ }
+
+ ret = rbd_obj_copyup_current_snapc(obj_req, bytes);
+ if (ret) {
+ obj_req->pending.result = ret;
+ return;
+ }
+
+ obj_req->pending.num_pending++;
+}
+
+static bool rbd_obj_advance_copyup(struct rbd_obj_request *obj_req, int *result)
+{
+ int ret;
+
+again:
+ switch (obj_req->copyup_state) {
+ case RBD_OBJ_COPYUP_START:
+ rbd_assert(!*result);
+
+ ret = rbd_obj_copyup_read_parent(obj_req);
+ if (ret) {
+ *result = ret;
+ return true;
+ }
+ if (obj_req->num_img_extents)
+ obj_req->copyup_state = RBD_OBJ_COPYUP_READ_PARENT;
+ else
+ obj_req->copyup_state = RBD_OBJ_COPYUP_WRITE_OBJECT;
+ return false;
+ case RBD_OBJ_COPYUP_READ_PARENT:
+ if (*result)
+ return true;
+
+ if (is_zero_bvecs(obj_req->copyup_bvecs,
+ rbd_obj_img_extents_bytes(obj_req))) {
+ dout("%s %p detected zeros\n", __func__, obj_req);
+ obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ZEROS;
+ }
+
+ rbd_obj_copyup_write_object(obj_req);
+ if (!obj_req->pending.num_pending) {
+ *result = obj_req->pending.result;
+ obj_req->copyup_state = RBD_OBJ_COPYUP_WRITE_OBJECT;
+ goto again;
+ }
+ obj_req->copyup_state = __RBD_OBJ_COPYUP_WRITE_OBJECT;
+ return false;
+ case __RBD_OBJ_COPYUP_WRITE_OBJECT:
+ if (!pending_result_dec(&obj_req->pending, result))
+ return false;
+ /* fall through */
+ case RBD_OBJ_COPYUP_WRITE_OBJECT:
+ return true;
+ default:
+ BUG();
+ }
+}
+
static bool rbd_obj_advance_write(struct rbd_obj_request *obj_req, int *result)
{
+ struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
int ret;
+again:
switch (obj_req->write_state) {
case RBD_OBJ_WRITE_START:
rbd_assert(!*result);
@@ -2733,12 +2806,10 @@ static bool rbd_obj_advance_write(struct rbd_obj_request *obj_req, int *result)
case RBD_OBJ_WRITE_OBJECT:
if (*result == -ENOENT) {
if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED) {
- ret = rbd_obj_handle_write_guard(obj_req);
- if (ret) {
- *result = ret;
- return true;
- }
- return false;
+ *result = 0;
+ obj_req->copyup_state = RBD_OBJ_COPYUP_START;
+ obj_req->write_state = __RBD_OBJ_WRITE_COPYUP;
+ goto again;
}
/*
* On a non-existent object:
@@ -2747,31 +2818,19 @@ static bool rbd_obj_advance_write(struct rbd_obj_request *obj_req, int *result)
if (obj_req->flags & RBD_OBJ_FLAG_DELETION)
*result = 0;
}
- /* fall through */
- case RBD_OBJ_WRITE_COPYUP_OPS:
- return true;
- case RBD_OBJ_WRITE_READ_FROM_PARENT:
if (*result)
return true;
- ret = rbd_obj_issue_copyup(obj_req,
- rbd_obj_img_extents_bytes(obj_req));
- if (ret) {
- *result = ret;
- return true;
- }
- return false;
- case RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC:
+ obj_req->write_state = RBD_OBJ_WRITE_COPYUP;
+ goto again;
+ case __RBD_OBJ_WRITE_COPYUP:
+ if (!rbd_obj_advance_copyup(obj_req, result))
+ return false;
+ /* fall through */
+ case RBD_OBJ_WRITE_COPYUP:
if (*result)
- return true;
-
- obj_req->write_state = RBD_OBJ_WRITE_COPYUP_OPS;
- ret = rbd_obj_issue_copyup_ops(obj_req, MODS_ONLY);
- if (ret) {
- *result = ret;
- return true;
- }
- return false;
+ rbd_warn(rbd_dev, "copyup failed: %d", *result);
+ return true;
default:
BUG();
}