diff options
Diffstat (limited to 'fs/inode.c')
-rw-r--r-- | fs/inode.c | 194 |
1 files changed, 89 insertions, 105 deletions
diff --git a/fs/inode.c b/fs/inode.c index 0f7e88a7803f..73920d555c88 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -33,11 +33,11 @@ * * inode->i_lock protects: * inode->i_state, inode->i_hash, __iget() - * inode_lru_lock protects: - * inode_lru, inode->i_lru + * inode->i_sb->s_inode_lru_lock protects: + * inode->i_sb->s_inode_lru, inode->i_lru * inode_sb_list_lock protects: * sb->s_inodes, inode->i_sb_list - * inode_wb_list_lock protects: + * bdi->wb.list_lock protects: * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list * inode_hash_lock protects: * inode_hashtable, inode->i_hash @@ -46,9 +46,9 @@ * * inode_sb_list_lock * inode->i_lock - * inode_lru_lock + * inode->i_sb->s_inode_lru_lock * - * inode_wb_list_lock + * bdi->wb.list_lock * inode->i_lock * * inode_hash_lock @@ -64,22 +64,7 @@ static unsigned int i_hash_shift __read_mostly; static struct hlist_head *inode_hashtable __read_mostly; static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); -static LIST_HEAD(inode_lru); -static DEFINE_SPINLOCK(inode_lru_lock); - __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); -__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock); - -/* - * iprune_sem provides exclusion between the icache shrinking and the - * umount path. - * - * We don't actually need it to protect anything in the umount path, - * but only need to cycle through it to make sure any inode that - * prune_icache took off the LRU list has been fully torn down by the - * time we are past evict_inodes. - */ -static DECLARE_RWSEM(iprune_sem); /* * Empty aops. Can be used for the cases where the user does not @@ -95,6 +80,7 @@ EXPORT_SYMBOL(empty_aops); struct inodes_stat_t inodes_stat; static DEFINE_PER_CPU(unsigned int, nr_inodes); +static DEFINE_PER_CPU(unsigned int, nr_unused); static struct kmem_cache *inode_cachep __read_mostly; @@ -109,7 +95,11 @@ static int get_nr_inodes(void) static inline int get_nr_inodes_unused(void) { - return inodes_stat.nr_unused; + int i; + int sum = 0; + for_each_possible_cpu(i) + sum += per_cpu(nr_unused, i); + return sum < 0 ? 0 : sum; } int get_nr_dirty_inodes(void) @@ -127,6 +117,7 @@ int proc_nr_inodes(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { inodes_stat.nr_inodes = get_nr_inodes(); + inodes_stat.nr_unused = get_nr_inodes_unused(); return proc_dointvec(table, write, buffer, lenp, ppos); } #endif @@ -152,6 +143,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_op = &empty_iops; inode->i_fop = &empty_fops; inode->i_nlink = 1; + inode->i_opflags = 0; inode->i_uid = 0; inode->i_gid = 0; atomic_set(&inode->i_writecount, 0); @@ -176,8 +168,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) mutex_init(&inode->i_mutex); lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); - init_rwsem(&inode->i_alloc_sem); - lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key); + atomic_set(&inode->i_dio_count, 0); mapping->a_ops = &empty_aops; mapping->host = inode; @@ -337,22 +328,24 @@ EXPORT_SYMBOL(ihold); static void inode_lru_list_add(struct inode *inode) { - spin_lock(&inode_lru_lock); + spin_lock(&inode->i_sb->s_inode_lru_lock); if (list_empty(&inode->i_lru)) { - list_add(&inode->i_lru, &inode_lru); - inodes_stat.nr_unused++; + list_add(&inode->i_lru, &inode->i_sb->s_inode_lru); + inode->i_sb->s_nr_inodes_unused++; + this_cpu_inc(nr_unused); } - spin_unlock(&inode_lru_lock); + spin_unlock(&inode->i_sb->s_inode_lru_lock); } static void inode_lru_list_del(struct inode *inode) { - spin_lock(&inode_lru_lock); + spin_lock(&inode->i_sb->s_inode_lru_lock); if (!list_empty(&inode->i_lru)) { list_del_init(&inode->i_lru); - inodes_stat.nr_unused--; + inode->i_sb->s_nr_inodes_unused--; + this_cpu_dec(nr_unused); } - spin_unlock(&inode_lru_lock); + spin_unlock(&inode->i_sb->s_inode_lru_lock); } /** @@ -369,9 +362,11 @@ EXPORT_SYMBOL_GPL(inode_sb_list_add); static inline void inode_sb_list_del(struct inode *inode) { - spin_lock(&inode_sb_list_lock); - list_del_init(&inode->i_sb_list); - spin_unlock(&inode_sb_list_lock); + if (!list_empty(&inode->i_sb_list)) { + spin_lock(&inode_sb_list_lock); + list_del_init(&inode->i_sb_list); + spin_unlock(&inode_sb_list_lock); + } } static unsigned long hash(struct super_block *sb, unsigned long hashval) @@ -405,12 +400,12 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval) EXPORT_SYMBOL(__insert_inode_hash); /** - * remove_inode_hash - remove an inode from the hash + * __remove_inode_hash - remove an inode from the hash * @inode: inode to unhash * * Remove an inode from the superblock. */ -void remove_inode_hash(struct inode *inode) +void __remove_inode_hash(struct inode *inode) { spin_lock(&inode_hash_lock); spin_lock(&inode->i_lock); @@ -418,12 +413,19 @@ void remove_inode_hash(struct inode *inode) spin_unlock(&inode->i_lock); spin_unlock(&inode_hash_lock); } -EXPORT_SYMBOL(remove_inode_hash); +EXPORT_SYMBOL(__remove_inode_hash); void end_writeback(struct inode *inode) { might_sleep(); + /* + * We have to cycle tree_lock here because reclaim can be still in the + * process of removing the last page (in __delete_from_page_cache()) + * and we must not free mapping under it. + */ + spin_lock_irq(&inode->i_data.tree_lock); BUG_ON(inode->i_data.nrpages); + spin_unlock_irq(&inode->i_data.tree_lock); BUG_ON(!list_empty(&inode->i_data.private_list)); BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(inode->i_state & I_CLEAR); @@ -453,7 +455,9 @@ static void evict(struct inode *inode) BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(!list_empty(&inode->i_lru)); - inode_wb_list_del(inode); + if (!list_empty(&inode->i_wb_list)) + inode_wb_list_del(inode); + inode_sb_list_del(inode); if (op->evict_inode) { @@ -530,14 +534,6 @@ void evict_inodes(struct super_block *sb) spin_unlock(&inode_sb_list_lock); dispose_list(&dispose); - - /* - * Cycle through iprune_sem to make sure any inode that prune_icache - * moved off the list before we took the lock has been fully torn - * down. - */ - down_write(&iprune_sem); - up_write(&iprune_sem); } /** @@ -600,8 +596,10 @@ static int can_unuse(struct inode *inode) } /* - * Scan `goal' inodes on the unused list for freeable ones. They are moved to a - * temporary list and then are freed outside inode_lru_lock by dispose_list(). + * Walk the superblock inode LRU for freeable inodes and attempt to free them. + * This is called from the superblock shrinker function with a number of inodes + * to trim from the LRU. Inodes to be freed are moved to a temporary list and + * then are freed outside inode_lock by dispose_list(). * * Any inodes which are pinned purely because of attached pagecache have their * pagecache removed. If the inode has metadata buffers attached to @@ -615,29 +613,28 @@ static int can_unuse(struct inode *inode) * LRU does not have strict ordering. Hence we don't want to reclaim inodes * with this flag set because they are the inodes that are out of order. */ -static void prune_icache(int nr_to_scan) +void prune_icache_sb(struct super_block *sb, int nr_to_scan) { LIST_HEAD(freeable); int nr_scanned; unsigned long reap = 0; - down_read(&iprune_sem); - spin_lock(&inode_lru_lock); - for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { + spin_lock(&sb->s_inode_lru_lock); + for (nr_scanned = nr_to_scan; nr_scanned >= 0; nr_scanned--) { struct inode *inode; - if (list_empty(&inode_lru)) + if (list_empty(&sb->s_inode_lru)) break; - inode = list_entry(inode_lru.prev, struct inode, i_lru); + inode = list_entry(sb->s_inode_lru.prev, struct inode, i_lru); /* - * we are inverting the inode_lru_lock/inode->i_lock here, + * we are inverting the sb->s_inode_lru_lock/inode->i_lock here, * so use a trylock. If we fail to get the lock, just move the * inode to the back of the list so we don't spin on it. */ if (!spin_trylock(&inode->i_lock)) { - list_move(&inode->i_lru, &inode_lru); + list_move(&inode->i_lru, &sb->s_inode_lru); continue; } @@ -649,28 +646,29 @@ static void prune_icache(int nr_to_scan) (inode->i_state & ~I_REFERENCED)) { list_del_init(&inode->i_lru); spin_unlock(&inode->i_lock); - inodes_stat.nr_unused--; + sb->s_nr_inodes_unused--; + this_cpu_dec(nr_unused); continue; } /* recently referenced inodes get one more pass */ if (inode->i_state & I_REFERENCED) { inode->i_state &= ~I_REFERENCED; - list_move(&inode->i_lru, &inode_lru); + list_move(&inode->i_lru, &sb->s_inode_lru); spin_unlock(&inode->i_lock); continue; } if (inode_has_buffers(inode) || inode->i_data.nrpages) { __iget(inode); spin_unlock(&inode->i_lock); - spin_unlock(&inode_lru_lock); + spin_unlock(&sb->s_inode_lru_lock); if (remove_inode_buffers(inode)) reap += invalidate_mapping_pages(&inode->i_data, 0, -1); iput(inode); - spin_lock(&inode_lru_lock); + spin_lock(&sb->s_inode_lru_lock); - if (inode != list_entry(inode_lru.next, + if (inode != list_entry(sb->s_inode_lru.next, struct inode, i_lru)) continue; /* wrong inode or list_empty */ /* avoid lock inversions with trylock */ @@ -686,51 +684,18 @@ static void prune_icache(int nr_to_scan) spin_unlock(&inode->i_lock); list_move(&inode->i_lru, &freeable); - inodes_stat.nr_unused--; + sb->s_nr_inodes_unused--; + this_cpu_dec(nr_unused); } if (current_is_kswapd()) __count_vm_events(KSWAPD_INODESTEAL, reap); else __count_vm_events(PGINODESTEAL, reap); - spin_unlock(&inode_lru_lock); + spin_unlock(&sb->s_inode_lru_lock); dispose_list(&freeable); - up_read(&iprune_sem); -} - -/* - * shrink_icache_memory() will attempt to reclaim some unused inodes. Here, - * "unused" means that no dentries are referring to the inodes: the files are - * not open and the dcache references to those inodes have already been - * reclaimed. - * - * This function is passed the number of inodes to scan, and it returns the - * total number of remaining possibly-reclaimable inodes. - */ -static int shrink_icache_memory(struct shrinker *shrink, - struct shrink_control *sc) -{ - int nr = sc->nr_to_scan; - gfp_t gfp_mask = sc->gfp_mask; - - if (nr) { - /* - * Nasty deadlock avoidance. We may hold various FS locks, - * and we don't want to recurse into the FS that called us - * in clear_inode() and friends.. - */ - if (!(gfp_mask & __GFP_FS)) - return -1; - prune_icache(nr); - } - return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure; } -static struct shrinker icache_shrinker = { - .shrink = shrink_icache_memory, - .seeks = DEFAULT_SEEKS, -}; - static void __wait_on_freeing_inode(struct inode *inode); /* * Called with the inode lock held. @@ -836,6 +801,29 @@ unsigned int get_next_ino(void) EXPORT_SYMBOL(get_next_ino); /** + * new_inode_pseudo - obtain an inode + * @sb: superblock + * + * Allocates a new inode for given superblock. + * Inode wont be chained in superblock s_inodes list + * This means : + * - fs can't be unmount + * - quotas, fsnotify, writeback can't work + */ +struct inode *new_inode_pseudo(struct super_block *sb) +{ + struct inode *inode = alloc_inode(sb); + + if (inode) { + spin_lock(&inode->i_lock); + inode->i_state = 0; + spin_unlock(&inode->i_lock); + INIT_LIST_HEAD(&inode->i_sb_list); + } + return inode; +} + +/** * new_inode - obtain an inode * @sb: superblock * @@ -853,13 +841,9 @@ struct inode *new_inode(struct super_block *sb) spin_lock_prefetch(&inode_sb_list_lock); - inode = alloc_inode(sb); - if (inode) { - spin_lock(&inode->i_lock); - inode->i_state = 0; - spin_unlock(&inode->i_lock); + inode = new_inode_pseudo(sb); + if (inode) inode_sb_list_add(inode); - } return inode; } EXPORT_SYMBOL(new_inode); @@ -1324,7 +1308,7 @@ static void iput_final(struct inode *inode) WARN_ON(inode->i_state & I_NEW); - if (op && op->drop_inode) + if (op->drop_inode) drop = op->drop_inode(inode); else drop = generic_drop_inode(inode); @@ -1347,7 +1331,8 @@ static void iput_final(struct inode *inode) } inode->i_state |= I_FREEING; - inode_lru_list_del(inode); + if (!list_empty(&inode->i_lru)) + inode_lru_list_del(inode); spin_unlock(&inode->i_lock); evict(inode); @@ -1610,7 +1595,6 @@ void __init inode_init(void) (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| SLAB_MEM_SPREAD), init_once); - register_shrinker(&icache_shrinker); /* Hash may have been set up in inode_init_early */ if (!hashdist) |