diff options
-rw-r--r-- | fs/gfs2/bmap.c | 741 | ||||
-rw-r--r-- | fs/gfs2/file.c | 6 | ||||
-rw-r--r-- | fs/gfs2/glock.c | 81 | ||||
-rw-r--r-- | fs/gfs2/incore.h | 8 | ||||
-rw-r--r-- | fs/gfs2/inode.c | 4 | ||||
-rw-r--r-- | fs/gfs2/rgrp.c | 7 | ||||
-rw-r--r-- | fs/gfs2/rgrp.h | 7 | ||||
-rw-r--r-- | fs/gfs2/super.c | 11 |
8 files changed, 518 insertions, 347 deletions
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 01b97c012c6e..3814a60e0aea 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -38,11 +38,6 @@ struct metapath { __u16 mp_list[GFS2_MAX_META_HEIGHT]; }; -struct strip_mine { - int sm_first; - unsigned int sm_height; -}; - /** * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page * @ip: the inode @@ -253,6 +248,19 @@ static inline unsigned int metapath_branch_start(const struct metapath *mp) } /** + * metaptr1 - Return the first possible metadata pointer in a metaath buffer + * @height: The metadata height (0 = dinode) + * @mp: The metapath + */ +static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp) +{ + struct buffer_head *bh = mp->mp_bh[height]; + if (height == 0) + return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode))); + return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header))); +} + +/** * metapointer - Return pointer to start of metadata in a buffer * @height: The metadata height (0 = dinode) * @mp: The metapath @@ -264,10 +272,8 @@ static inline unsigned int metapath_branch_start(const struct metapath *mp) static inline __be64 *metapointer(unsigned int height, const struct metapath *mp) { - struct buffer_head *bh = mp->mp_bh[height]; - unsigned int head_size = (height > 0) ? - sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode); - return ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height]; + __be64 *p = metaptr1(height, mp); + return p + mp->mp_list[height]; } static void gfs2_metapath_ra(struct gfs2_glock *gl, @@ -296,6 +302,23 @@ static void gfs2_metapath_ra(struct gfs2_glock *gl, } /** + * lookup_mp_height - helper function for lookup_metapath + * @ip: the inode + * @mp: the metapath + * @h: the height which needs looking up + */ +static int lookup_mp_height(struct gfs2_inode *ip, struct metapath *mp, int h) +{ + __be64 *ptr = metapointer(h, mp); + u64 dblock = be64_to_cpu(*ptr); + + if (!dblock) + return h + 1; + + return gfs2_meta_indirect_buffer(ip, h + 1, dblock, &mp->mp_bh[h + 1]); +} + +/** * lookup_metapath - Walk the metadata tree to a specific point * @ip: The inode * @mp: The metapath @@ -316,17 +339,10 @@ static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp) { unsigned int end_of_metadata = ip->i_height - 1; unsigned int x; - __be64 *ptr; - u64 dblock; int ret; for (x = 0; x < end_of_metadata; x++) { - ptr = metapointer(x, mp); - dblock = be64_to_cpu(*ptr); - if (!dblock) - return x + 1; - - ret = gfs2_meta_indirect_buffer(ip, x+1, dblock, &mp->mp_bh[x+1]); + ret = lookup_mp_height(ip, mp, x); if (ret) return ret; } @@ -334,6 +350,35 @@ static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp) return ip->i_height; } +/** + * fillup_metapath - fill up buffers for the metadata path to a specific height + * @ip: The inode + * @mp: The metapath + * @h: The height to which it should be mapped + * + * Similar to lookup_metapath, but does lookups for a range of heights + * + * Returns: error or height of metadata tree + */ + +static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h) +{ + unsigned int start_h = h - 1; + int ret; + + if (h) { + /* find the first buffer we need to look up. */ + while (start_h > 0 && mp->mp_bh[start_h] == NULL) + start_h--; + for (; start_h < h; start_h++) { + ret = lookup_mp_height(ip, mp, start_h); + if (ret) + return ret; + } + } + return ip->i_height; +} + static inline void release_metapath(struct metapath *mp) { int i; @@ -422,6 +467,13 @@ enum alloc_state { /* ALLOC_UNSTUFF = 3, TBD and rather complicated */ }; +static inline unsigned int hptrs(struct gfs2_sbd *sdp, const unsigned int hgt) +{ + if (hgt) + return sdp->sd_inptrs; + return sdp->sd_diptrs; +} + /** * gfs2_bmap_alloc - Build a metadata tree of the requested height * @inode: The GFS2 inode @@ -620,7 +672,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock, BUG_ON(maxlen == 0); - memset(mp.mp_bh, 0, sizeof(mp.mp_bh)); + memset(&mp, 0, sizeof(mp)); bmap_lock(ip, create); clear_buffer_mapped(bh_map); clear_buffer_new(bh_map); @@ -702,252 +754,6 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi } /** - * do_strip - Look for a layer a particular layer of the file and strip it off - * @ip: the inode - * @dibh: the dinode buffer - * @bh: A buffer of pointers - * @top: The first pointer in the buffer - * @bottom: One more than the last pointer - * @height: the height this buffer is at - * @sm: a pointer to a struct strip_mine - * - * Returns: errno - */ - -static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, - struct buffer_head *bh, __be64 *top, __be64 *bottom, - unsigned int height, struct strip_mine *sm) -{ - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_rgrp_list rlist; - struct gfs2_trans *tr; - u64 bn, bstart; - u32 blen, btotal; - __be64 *p; - unsigned int rg_blocks = 0; - int metadata; - unsigned int revokes = 0; - int x; - int error; - int jblocks_rqsted; - - error = gfs2_rindex_update(sdp); - if (error) - return error; - - if (!*top) - sm->sm_first = 0; - - if (height != sm->sm_height) - return 0; - - if (sm->sm_first) { - top++; - sm->sm_first = 0; - } - - metadata = (height != ip->i_height - 1); - if (metadata) - revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; - else if (ip->i_depth) - revokes = sdp->sd_inptrs; - - memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); - bstart = 0; - blen = 0; - - for (p = top; p < bottom; p++) { - if (!*p) - continue; - - bn = be64_to_cpu(*p); - - if (bstart + blen == bn) - blen++; - else { - if (bstart) - gfs2_rlist_add(ip, &rlist, bstart); - - bstart = bn; - blen = 1; - } - } - - if (bstart) - gfs2_rlist_add(ip, &rlist, bstart); - else - goto out; /* Nothing to do */ - - gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE); - - for (x = 0; x < rlist.rl_rgrps; x++) { - struct gfs2_rgrpd *rgd; - rgd = rlist.rl_ghs[x].gh_gl->gl_object; - rg_blocks += rgd->rd_length; - } - - error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); - if (error) - goto out_rlist; - - if (gfs2_rs_active(&ip->i_res)) /* needs to be done with the rgrp glock held */ - gfs2_rs_deltree(&ip->i_res); - -restart: - jblocks_rqsted = rg_blocks + RES_DINODE + - RES_INDIRECT + RES_STATFS + RES_QUOTA + - gfs2_struct2blk(sdp, revokes, sizeof(u64)); - if (jblocks_rqsted > atomic_read(&sdp->sd_log_thresh2)) - jblocks_rqsted = atomic_read(&sdp->sd_log_thresh2); - error = gfs2_trans_begin(sdp, jblocks_rqsted, revokes); - if (error) - goto out_rg_gunlock; - - tr = current->journal_info; - down_write(&ip->i_rw_mutex); - - gfs2_trans_add_meta(ip->i_gl, dibh); - gfs2_trans_add_meta(ip->i_gl, bh); - - bstart = 0; - blen = 0; - btotal = 0; - - for (p = top; p < bottom; p++) { - if (!*p) - continue; - - /* check for max reasonable journal transaction blocks */ - if (tr->tr_num_buf_new + RES_STATFS + - RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) { - if (rg_blocks >= tr->tr_num_buf_new) - rg_blocks -= tr->tr_num_buf_new; - else - rg_blocks = 0; - break; - } - - bn = be64_to_cpu(*p); - - if (bstart + blen == bn) - blen++; - else { - if (bstart) { - __gfs2_free_blocks(ip, bstart, blen, metadata); - btotal += blen; - } - - bstart = bn; - blen = 1; - } - - *p = 0; - gfs2_add_inode_blocks(&ip->i_inode, -1); - } - if (p == bottom) - rg_blocks = 0; - - if (bstart) { - __gfs2_free_blocks(ip, bstart, blen, metadata); - btotal += blen; - } - - gfs2_statfs_change(sdp, 0, +btotal, 0); - gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid, - ip->i_inode.i_gid); - - ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode); - - gfs2_dinode_out(ip, dibh->b_data); - - up_write(&ip->i_rw_mutex); - - gfs2_trans_end(sdp); - - if (rg_blocks) - goto restart; - -out_rg_gunlock: - gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); -out_rlist: - gfs2_rlist_free(&rlist); -out: - return error; -} - -/** - * recursive_scan - recursively scan through the end of a file - * @ip: the inode - * @dibh: the dinode buffer - * @mp: the path through the metadata to the point to start - * @height: the height the recursion is at - * @block: the indirect block to look at - * @first: 1 if this is the first block - * @sm: data opaque to this function to pass to @bc - * - * When this is first called @height and @block should be zero and - * @first should be 1. - * - * Returns: errno - */ - -static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh, - struct metapath *mp, unsigned int height, - u64 block, int first, struct strip_mine *sm) -{ - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct buffer_head *bh = NULL; - __be64 *top, *bottom; - u64 bn; - int error; - int mh_size = sizeof(struct gfs2_meta_header); - - if (!height) { - error = gfs2_meta_inode_buffer(ip, &bh); - if (error) - return error; - dibh = bh; - - top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0]; - bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs; - } else { - error = gfs2_meta_indirect_buffer(ip, height, block, &bh); - if (error) - return error; - - top = (__be64 *)(bh->b_data + mh_size) + - (first ? mp->mp_list[height] : 0); - - bottom = (__be64 *)(bh->b_data + mh_size) + sdp->sd_inptrs; - } - - error = do_strip(ip, dibh, bh, top, bottom, height, sm); - if (error) - goto out; - - if (height < ip->i_height - 1) { - - gfs2_metapath_ra(ip->i_gl, bh, top); - - for (; top < bottom; top++, first = 0) { - if (!*top) - continue; - - bn = be64_to_cpu(*top); - - error = recursive_scan(ip, dibh, mp, height + 1, bn, - first, sm); - if (error) - break; - } - } -out: - brelse(bh); - return error; -} - - -/** * gfs2_block_truncate_page - Deal with zeroing out data for truncate * * This is partly borrowed from ext3. @@ -1106,41 +912,406 @@ out: return error; } -static int trunc_dealloc(struct gfs2_inode *ip, u64 size) +/** + * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein + * @ip: inode + * @rg_gh: holder of resource group glock + * @mp: current metapath fully populated with buffers + * @btotal: place to keep count of total blocks freed + * @hgt: height we're processing + * @first: true if this is the first call to this function for this height + * + * We sweep a metadata buffer (provided by the metapath) for blocks we need to + * free, and free them all. However, we do it one rgrp at a time. If this + * block has references to multiple rgrps, we break it into individual + * transactions. This allows other processes to use the rgrps while we're + * focused on a single one, for better concurrency / performance. + * At every transaction boundary, we rewrite the inode into the journal. + * That way the bitmaps are kept consistent with the inode and we can recover + * if we're interrupted by power-outages. + * + * Returns: 0, or return code if an error occurred. + * *btotal has the total number of blocks freed + */ +static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh, + const struct metapath *mp, u32 *btotal, int hgt, + bool preserve1) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - unsigned int height = ip->i_height; - u64 lblock; - struct metapath mp; - int error; + struct gfs2_rgrpd *rgd; + struct gfs2_trans *tr; + struct buffer_head *bh = mp->mp_bh[hgt]; + __be64 *top, *bottom, *p; + int blks_outside_rgrp; + u64 bn, bstart, isize_blks; + s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */ + int meta = ((hgt != ip->i_height - 1) ? 1 : 0); + int ret = 0; + bool buf_in_tr = false; /* buffer was added to transaction */ + + if (gfs2_metatype_check(sdp, bh, + (hgt ? GFS2_METATYPE_IN : GFS2_METATYPE_DI))) + return -EIO; + +more_rgrps: + blks_outside_rgrp = 0; + bstart = 0; + blen = 0; + top = metapointer(hgt, mp); /* first ptr from metapath */ + /* If we're keeping some data at the truncation point, we've got to + preserve the metadata tree by adding 1 to the starting metapath. */ + if (preserve1) + top++; + + bottom = (__be64 *)(bh->b_data + bh->b_size); + + for (p = top; p < bottom; p++) { + if (!*p) + continue; + bn = be64_to_cpu(*p); + if (gfs2_holder_initialized(rd_gh)) { + rgd = (struct gfs2_rgrpd *)rd_gh->gh_gl->gl_object; + gfs2_assert_withdraw(sdp, + gfs2_glock_is_locked_by_me(rd_gh->gh_gl)); + } else { + rgd = gfs2_blk2rgrpd(sdp, bn, false); + ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, + 0, rd_gh); + if (ret) + goto out; + + /* Must be done with the rgrp glock held: */ + if (gfs2_rs_active(&ip->i_res) && + rgd == ip->i_res.rs_rbm.rgd) + gfs2_rs_deltree(&ip->i_res); + } + + if (!rgrp_contains_block(rgd, bn)) { + blks_outside_rgrp++; + continue; + } + + /* The size of our transactions will be unknown until we + actually process all the metadata blocks that relate to + the rgrp. So we estimate. We know it can't be more than + the dinode's i_blocks and we don't want to exceed the + journal flush threshold, sd_log_thresh2. */ + if (current->journal_info == NULL) { + unsigned int jblocks_rqsted, revokes; + + jblocks_rqsted = rgd->rd_length + RES_DINODE + + RES_INDIRECT; + isize_blks = gfs2_get_inode_blocks(&ip->i_inode); + if (isize_blks > atomic_read(&sdp->sd_log_thresh2)) + jblocks_rqsted += + atomic_read(&sdp->sd_log_thresh2); + else + jblocks_rqsted += isize_blks; + revokes = jblocks_rqsted; + if (meta) + revokes += hptrs(sdp, hgt); + else if (ip->i_depth) + revokes += sdp->sd_inptrs; + ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes); + if (ret) + goto out_unlock; + down_write(&ip->i_rw_mutex); + } + /* check if we will exceed the transaction blocks requested */ + tr = current->journal_info; + if (tr->tr_num_buf_new + RES_STATFS + + RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) { + /* We set blks_outside_rgrp to ensure the loop will + be repeated for the same rgrp, but with a new + transaction. */ + blks_outside_rgrp++; + /* This next part is tricky. If the buffer was added + to the transaction, we've already set some block + pointers to 0, so we better follow through and free + them, or we will introduce corruption (so break). + This may be impossible, or at least rare, but I + decided to cover the case regardless. + + If the buffer was not added to the transaction + (this call), doing so would exceed our transaction + size, so we need to end the transaction and start a + new one (so goto). */ + + if (buf_in_tr) + break; + goto out_unlock; + } + + gfs2_trans_add_meta(ip->i_gl, bh); + buf_in_tr = true; + *p = 0; + if (bstart + blen == bn) { + blen++; + continue; + } + if (bstart) { + __gfs2_free_blocks(ip, bstart, (u32)blen, meta); + (*btotal) += blen; + gfs2_add_inode_blocks(&ip->i_inode, -blen); + } + bstart = bn; + blen = 1; + } + if (bstart) { + __gfs2_free_blocks(ip, bstart, (u32)blen, meta); + (*btotal) += blen; + gfs2_add_inode_blocks(&ip->i_inode, -blen); + } +out_unlock: + if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks + outside the rgrp we just processed, + do it all over again. */ + if (current->journal_info) { + struct buffer_head *dibh = mp->mp_bh[0]; + + /* Every transaction boundary, we rewrite the dinode + to keep its di_blocks current in case of failure. */ + ip->i_inode.i_mtime = ip->i_inode.i_ctime = + CURRENT_TIME; + gfs2_trans_add_meta(ip->i_gl, dibh); + gfs2_dinode_out(ip, dibh->b_data); + up_write(&ip->i_rw_mutex); + gfs2_trans_end(sdp); + } + gfs2_glock_dq_uninit(rd_gh); + cond_resched(); + goto more_rgrps; + } +out: + return ret; +} + +/** + * find_nonnull_ptr - find a non-null pointer given a metapath and height + * assumes the metapath is valid (with buffers) out to height h + * @mp: starting metapath + * @h: desired height to search + * + * Returns: true if a non-null pointer was found in the metapath buffer + * false if all remaining pointers are NULL in the buffer + */ +static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp, + unsigned int h) +{ + __be64 *ptr; + unsigned int ptrs = hptrs(sdp, h) - 1; + + while (true) { + ptr = metapointer(h, mp); + if (*ptr) /* if we have a non-null pointer */ + return true; + + if (mp->mp_list[h] < ptrs) + mp->mp_list[h]++; + else + return false; /* no more pointers in this buffer */ + } +} + +enum dealloc_states { + DEALLOC_MP_FULL = 0, /* Strip a metapath with all buffers read in */ + DEALLOC_MP_LOWER = 1, /* lower the metapath strip height */ + DEALLOC_FILL_MP = 2, /* Fill in the metapath to the given height. */ + DEALLOC_DONE = 3, /* process complete */ +}; - if (!size) +/** + * trunc_dealloc - truncate a file down to a desired size + * @ip: inode to truncate + * @newsize: The desired size of the file + * + * This function truncates a file to newsize. It works from the + * bottom up, and from the right to the left. In other words, it strips off + * the highest layer (data) before stripping any of the metadata. Doing it + * this way is best in case the operation is interrupted by power failure, etc. + * The dinode is rewritten in every transaction to guarantee integrity. + */ +static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize) +{ + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct metapath mp; + struct buffer_head *dibh, *bh; + struct gfs2_holder rd_gh; + u64 lblock; + __u16 nbof[GFS2_MAX_META_HEIGHT]; /* new beginning of truncation */ + unsigned int strip_h = ip->i_height - 1; + u32 btotal = 0; + int ret, state; + int mp_h; /* metapath buffers are read in to this height */ + sector_t last_ra = 0; + u64 prev_bnr = 0; + bool preserve1; /* need to preserve the first meta pointer? */ + + if (!newsize) lblock = 0; else - lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift; + lblock = (newsize - 1) >> sdp->sd_sb.sb_bsize_shift; + memset(&mp, 0, sizeof(mp)); find_metapath(sdp, lblock, &mp, ip->i_height); - error = gfs2_rindex_update(sdp); - if (error) - return error; - error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); - if (error) - return error; + memcpy(&nbof, &mp.mp_list, sizeof(nbof)); + + ret = gfs2_meta_inode_buffer(ip, &dibh); + if (ret) + return ret; - while (height--) { - struct strip_mine sm; - sm.sm_first = !!size; - sm.sm_height = height; + mp.mp_bh[0] = dibh; + ret = lookup_metapath(ip, &mp); + if (ret == ip->i_height) + state = DEALLOC_MP_FULL; /* We have a complete metapath */ + else + state = DEALLOC_FILL_MP; /* deal with partial metapath */ - error = recursive_scan(ip, NULL, &mp, 0, 0, 1, &sm); - if (error) + ret = gfs2_rindex_update(sdp); + if (ret) + goto out_metapath; + + ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); + if (ret) + goto out_metapath; + gfs2_holder_mark_uninitialized(&rd_gh); + + mp_h = strip_h; + + while (state != DEALLOC_DONE) { + switch (state) { + /* Truncate a full metapath at the given strip height. + * Note that strip_h == mp_h in order to be in this state. */ + case DEALLOC_MP_FULL: + if (mp_h > 0) { /* issue read-ahead on metadata */ + __be64 *top; + + bh = mp.mp_bh[mp_h - 1]; + if (bh->b_blocknr != last_ra) { + last_ra = bh->b_blocknr; + top = metaptr1(mp_h - 1, &mp); + gfs2_metapath_ra(ip->i_gl, bh, top); + } + } + /* If we're truncating to a non-zero size and the mp is + at the beginning of file for the strip height, we + need to preserve the first metadata pointer. */ + preserve1 = (newsize && + (mp.mp_list[mp_h] == nbof[mp_h])); + bh = mp.mp_bh[mp_h]; + gfs2_assert_withdraw(sdp, bh); + if (gfs2_assert_withdraw(sdp, + prev_bnr != bh->b_blocknr)) { + printk(KERN_EMERG "GFS2: fsid=%s:inode %llu, " + "block:%llu, i_h:%u, s_h:%u, mp_h:%u\n", + sdp->sd_fsname, + (unsigned long long)ip->i_no_addr, + prev_bnr, ip->i_height, strip_h, mp_h); + } + prev_bnr = bh->b_blocknr; + ret = sweep_bh_for_rgrps(ip, &rd_gh, &mp, &btotal, + mp_h, preserve1); + /* If we hit an error or just swept dinode buffer, + just exit. */ + if (ret || !mp_h) { + state = DEALLOC_DONE; + break; + } + state = DEALLOC_MP_LOWER; + break; + + /* lower the metapath strip height */ + case DEALLOC_MP_LOWER: + /* We're done with the current buffer, so release it, + unless it's the dinode buffer. Then back up to the + previous pointer. */ + if (mp_h) { + brelse(mp.mp_bh[mp_h]); + mp.mp_bh[mp_h] = NULL; + } + /* If we can't get any lower in height, we've stripped + off all we can. Next step is to back up and start + stripping the previous level of metadata. */ + if (mp_h == 0) { + strip_h--; + memcpy(&mp.mp_list, &nbof, sizeof(nbof)); + mp_h = strip_h; + state = DEALLOC_FILL_MP; + break; + } + mp.mp_list[mp_h] = 0; + mp_h--; /* search one metadata height down */ + if (mp.mp_list[mp_h] >= hptrs(sdp, mp_h) - 1) + break; /* loop around in the same state */ + mp.mp_list[mp_h]++; + /* Here we've found a part of the metapath that is not + * allocated. We need to search at that height for the + * next non-null pointer. */ + if (find_nonnull_ptr(sdp, &mp, mp_h)) { + state = DEALLOC_FILL_MP; + mp_h++; + } + /* No more non-null pointers at this height. Back up + to the previous height and try again. */ + break; /* loop around in the same state */ + + /* Fill the metapath with buffers to the given height. */ + case DEALLOC_FILL_MP: + /* Fill the buffers out to the current height. */ + ret = fillup_metapath(ip, &mp, mp_h); + if (ret < 0) + goto out; + + /* If buffers found for the entire strip height */ + if ((ret == ip->i_height) && (mp_h == strip_h)) { + state = DEALLOC_MP_FULL; + break; + } + if (ret < ip->i_height) /* We have a partial height */ + mp_h = ret - 1; + + /* If we find a non-null block pointer, crawl a bit + higher up in the metapath and try again, otherwise + we need to look lower for a new starting point. */ + if (find_nonnull_ptr(sdp, &mp, mp_h)) + mp_h++; + else + state = DEALLOC_MP_LOWER; break; + } } - gfs2_quota_unhold(ip); + if (btotal) { + if (current->journal_info == NULL) { + ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + + RES_QUOTA, 0); + if (ret) + goto out; + down_write(&ip->i_rw_mutex); + } + gfs2_statfs_change(sdp, 0, +btotal, 0); + gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid, + ip->i_inode.i_gid); + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; + gfs2_trans_add_meta(ip->i_gl, dibh); + gfs2_dinode_out(ip, dibh->b_data); + up_write(&ip->i_rw_mutex); + gfs2_trans_end(sdp); + } - return error; +out: + if (gfs2_holder_initialized(&rd_gh)) + gfs2_glock_dq_uninit(&rd_gh); + if (current->journal_info) { + up_write(&ip->i_rw_mutex); + gfs2_trans_end(sdp); + cond_resched(); + } + gfs2_quota_unhold(ip); +out_metapath: + release_metapath(&mp); + return ret; } static int trunc_end(struct gfs2_inode *ip) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 6fe2a59c6a9a..c2062a108d19 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -911,11 +911,15 @@ out_qunlock: static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); + struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int ret; - if ((mode & ~FALLOC_FL_KEEP_SIZE) || gfs2_is_jdata(ip)) + if (mode & ~FALLOC_FL_KEEP_SIZE) + return -EOPNOTSUPP; + /* fallocate is needed by gfs2_grow to reserve space in the rindex */ + if (gfs2_is_jdata(ip) && inode != sdp->sd_rindex) return -EOPNOTSUPP; inode_lock(inode); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index ec0848fcca02..959a19ced4d5 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -73,7 +73,7 @@ static DEFINE_SPINLOCK(lru_lock); static struct rhashtable_params ht_parms = { .nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4, - .key_len = sizeof(struct lm_lockname), + .key_len = offsetofend(struct lm_lockname, ln_type), .key_offset = offsetof(struct gfs2_glock, gl_name), .head_offset = offsetof(struct gfs2_glock, gl_node), }; @@ -449,6 +449,9 @@ __acquires(&gl->gl_lockref.lock) unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0); int ret; + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) && + target != LM_ST_UNLOCKED) + return; lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | LM_FLAG_PRIORITY); GLOCK_BUG_ON(gl, gl->gl_state == target); @@ -484,7 +487,8 @@ __acquires(&gl->gl_lockref.lock) } else if (ret) { pr_err("lm_lock ret %d\n", ret); - GLOCK_BUG_ON(gl, 1); + GLOCK_BUG_ON(gl, !test_bit(SDF_SHUTDOWN, + &sdp->sd_flags)); } } else { /* lock_nolock */ finish_xmote(gl, target); @@ -653,10 +657,10 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, struct lm_lockname name = { .ln_number = number, .ln_type = glops->go_type, .ln_sbd = sdp }; - struct gfs2_glock *gl, *tmp = NULL; + struct gfs2_glock *gl, *tmp; struct address_space *mapping; struct kmem_cache *cachep; - int ret, tries = 0; + int ret = 0; rcu_read_lock(); gl = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms); @@ -721,35 +725,32 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, } again: - ret = rhashtable_lookup_insert_fast(&gl_hash_table, &gl->gl_node, - ht_parms); - if (ret == 0) { + rcu_read_lock(); + tmp = rhashtable_lookup_get_insert_fast(&gl_hash_table, &gl->gl_node, + ht_parms); + if (!tmp) { *glp = gl; - return 0; + goto out; } - - if (ret == -EEXIST) { - ret = 0; - rcu_read_lock(); - tmp = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms); - if (tmp == NULL || !lockref_get_not_dead(&tmp->gl_lockref)) { - if (++tries < 100) { - rcu_read_unlock(); - cond_resched(); - goto again; - } - tmp = NULL; - ret = -ENOMEM; - } - rcu_read_unlock(); - } else { - WARN_ON_ONCE(ret); + if (IS_ERR(tmp)) { + ret = PTR_ERR(tmp); + goto out_free; } + if (lockref_get_not_dead(&tmp->gl_lockref)) { + *glp = tmp; + goto out_free; + } + rcu_read_unlock(); + cond_resched(); + goto again; + +out_free: kfree(gl->gl_lksb.sb_lvbptr); kmem_cache_free(cachep, gl); atomic_dec(&sdp->sd_glock_disposal); - *glp = tmp; +out: + rcu_read_unlock(); return ret; } @@ -1918,10 +1919,10 @@ static const struct seq_operations gfs2_sbstats_seq_ops = { #define GFS2_SEQ_GOODSIZE min(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER, 65536UL) -static int gfs2_glocks_open(struct inode *inode, struct file *file) +static int __gfs2_glocks_open(struct inode *inode, struct file *file, + const struct seq_operations *ops) { - int ret = seq_open_private(file, &gfs2_glock_seq_ops, - sizeof(struct gfs2_glock_iter)); + int ret = seq_open_private(file, ops, sizeof(struct gfs2_glock_iter)); if (ret == 0) { struct seq_file *seq = file->private_data; struct gfs2_glock_iter *gi = seq->private; @@ -1932,11 +1933,16 @@ static int gfs2_glocks_open(struct inode *inode, struct file *file) if (seq->buf) seq->size = GFS2_SEQ_GOODSIZE; gi->gl = NULL; - ret = rhashtable_walk_init(&gl_hash_table, &gi->hti, GFP_KERNEL); + rhashtable_walk_enter(&gl_hash_table, &gi->hti); } return ret; } +static int gfs2_glocks_open(struct inode *inode, struct file *file) +{ + return __gfs2_glocks_open(inode, file, &gfs2_glock_seq_ops); +} + static int gfs2_glocks_release(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; @@ -1949,20 +1955,7 @@ static int gfs2_glocks_release(struct inode *inode, struct file *file) static int gfs2_glstats_open(struct inode *inode, struct file *file) { - int ret = seq_open_private(file, &gfs2_glstats_seq_ops, - sizeof(struct gfs2_glock_iter)); - if (ret == 0) { - struct seq_file *seq = file->private_data; - struct gfs2_glock_iter *gi = seq->private; - gi->sdp = inode->i_private; - gi->last_pos = 0; - seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); - if (seq->buf) - seq->size = GFS2_SEQ_GOODSIZE; - gi->gl = NULL; - ret = rhashtable_walk_init(&gl_hash_table, &gi->hti, GFP_KERNEL); - } - return ret; + return __gfs2_glocks_open(inode, file, &gfs2_glstats_seq_ops); } static int gfs2_sbstats_open(struct inode *inode, struct file *file) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 511e1ed7e2de..b7cf65d13561 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -203,11 +203,15 @@ enum { DFL_DLM_RECOVERY = 6, }; +/* + * We are using struct lm_lockname as an rhashtable key. Avoid holes within + * the struct; padding at the end is fine. + */ struct lm_lockname { - struct gfs2_sbd *ln_sbd; u64 ln_number; + struct gfs2_sbd *ln_sbd; unsigned int ln_type; -} __packed __aligned(sizeof(int)); +}; #define lm_name_equal(name1, name2) \ (((name1)->ln_number == (name2)->ln_number) && \ diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index e279c3ce27be..9f605ea4810c 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -202,8 +202,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, fail_refresh: ip->i_iopen_gh.gh_flags |= GL_NOCACHE; ip->i_iopen_gh.gh_gl->gl_object = NULL; - gfs2_glock_dq_wait(&ip->i_iopen_gh); - gfs2_holder_uninit(&ip->i_iopen_gh); + gfs2_glock_dq_uninit(&ip->i_iopen_gh); fail_put: if (io_gl) gfs2_glock_put(io_gl); @@ -667,6 +666,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, ip->i_height = 0; ip->i_depth = 0; ip->i_entries = 0; + ip->i_no_addr = 0; /* Temporarily zero until real addr is assigned */ switch(mode & S_IFMT) { case S_IFREG: diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 86ccc0159393..83c9909ff14a 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -483,13 +483,6 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd) } } -static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block) -{ - u64 first = rgd->rd_data0; - u64 last = first + rgd->rd_data; - return first <= block && block < last; -} - /** * gfs2_blk2rgrpd - Find resource group for a given data/meta block number * @sdp: The GFS2 superblock diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 66b51cf66dfa..e90478e2f545 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -83,5 +83,12 @@ static inline bool gfs2_rs_active(const struct gfs2_blkreserv *rs) return rs && !RB_EMPTY_NODE(&rs->rs_node); } +static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block) +{ + u64 first = rgd->rd_data0; + u64 last = first + rgd->rd_data; + return first <= block && block < last; +} + extern void check_and_update_goal(struct gfs2_inode *ip); #endif /* __RGRP_DOT_H__ */ diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 361796a84fce..29b0473f6e74 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -793,7 +793,8 @@ static void gfs2_dirty_inode(struct inode *inode, int flags) if (!(flags & (I_DIRTY_DATASYNC|I_DIRTY_SYNC))) return; - + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + return; if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); if (ret) { @@ -1538,8 +1539,7 @@ static void gfs2_evict_inode(struct inode *inode) error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh); if (unlikely(error)) { ip->i_iopen_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq_wait(&ip->i_iopen_gh); - gfs2_holder_uninit(&ip->i_iopen_gh); + gfs2_glock_dq_uninit(&ip->i_iopen_gh); goto out; } @@ -1617,7 +1617,7 @@ out_unlock: if (gfs2_holder_initialized(&ip->i_iopen_gh)) { if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { ip->i_iopen_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq_wait(&ip->i_iopen_gh); + gfs2_glock_dq(&ip->i_iopen_gh); } gfs2_holder_uninit(&ip->i_iopen_gh); } @@ -1639,8 +1639,7 @@ out: if (gfs2_holder_initialized(&ip->i_iopen_gh)) { ip->i_iopen_gh.gh_gl->gl_object = NULL; ip->i_iopen_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq_wait(&ip->i_iopen_gh); - gfs2_holder_uninit(&ip->i_iopen_gh); + gfs2_glock_dq_uninit(&ip->i_iopen_gh); } } |