summaryrefslogtreecommitdiff
path: root/fs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/inode.c')
-rw-r--r--fs/inode.c194
1 files changed, 89 insertions, 105 deletions
diff --git a/fs/inode.c b/fs/inode.c
index 0f7e88a7803f..73920d555c88 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -33,11 +33,11 @@
*
* inode->i_lock protects:
* inode->i_state, inode->i_hash, __iget()
- * inode_lru_lock protects:
- * inode_lru, inode->i_lru
+ * inode->i_sb->s_inode_lru_lock protects:
+ * inode->i_sb->s_inode_lru, inode->i_lru
* inode_sb_list_lock protects:
* sb->s_inodes, inode->i_sb_list
- * inode_wb_list_lock protects:
+ * bdi->wb.list_lock protects:
* bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list
* inode_hash_lock protects:
* inode_hashtable, inode->i_hash
@@ -46,9 +46,9 @@
*
* inode_sb_list_lock
* inode->i_lock
- * inode_lru_lock
+ * inode->i_sb->s_inode_lru_lock
*
- * inode_wb_list_lock
+ * bdi->wb.list_lock
* inode->i_lock
*
* inode_hash_lock
@@ -64,22 +64,7 @@ static unsigned int i_hash_shift __read_mostly;
static struct hlist_head *inode_hashtable __read_mostly;
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
-static LIST_HEAD(inode_lru);
-static DEFINE_SPINLOCK(inode_lru_lock);
-
__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock);
-
-/*
- * iprune_sem provides exclusion between the icache shrinking and the
- * umount path.
- *
- * We don't actually need it to protect anything in the umount path,
- * but only need to cycle through it to make sure any inode that
- * prune_icache took off the LRU list has been fully torn down by the
- * time we are past evict_inodes.
- */
-static DECLARE_RWSEM(iprune_sem);
/*
* Empty aops. Can be used for the cases where the user does not
@@ -95,6 +80,7 @@ EXPORT_SYMBOL(empty_aops);
struct inodes_stat_t inodes_stat;
static DEFINE_PER_CPU(unsigned int, nr_inodes);
+static DEFINE_PER_CPU(unsigned int, nr_unused);
static struct kmem_cache *inode_cachep __read_mostly;
@@ -109,7 +95,11 @@ static int get_nr_inodes(void)
static inline int get_nr_inodes_unused(void)
{
- return inodes_stat.nr_unused;
+ int i;
+ int sum = 0;
+ for_each_possible_cpu(i)
+ sum += per_cpu(nr_unused, i);
+ return sum < 0 ? 0 : sum;
}
int get_nr_dirty_inodes(void)
@@ -127,6 +117,7 @@ int proc_nr_inodes(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
inodes_stat.nr_inodes = get_nr_inodes();
+ inodes_stat.nr_unused = get_nr_inodes_unused();
return proc_dointvec(table, write, buffer, lenp, ppos);
}
#endif
@@ -152,6 +143,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_op = &empty_iops;
inode->i_fop = &empty_fops;
inode->i_nlink = 1;
+ inode->i_opflags = 0;
inode->i_uid = 0;
inode->i_gid = 0;
atomic_set(&inode->i_writecount, 0);
@@ -176,8 +168,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
mutex_init(&inode->i_mutex);
lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
- init_rwsem(&inode->i_alloc_sem);
- lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key);
+ atomic_set(&inode->i_dio_count, 0);
mapping->a_ops = &empty_aops;
mapping->host = inode;
@@ -337,22 +328,24 @@ EXPORT_SYMBOL(ihold);
static void inode_lru_list_add(struct inode *inode)
{
- spin_lock(&inode_lru_lock);
+ spin_lock(&inode->i_sb->s_inode_lru_lock);
if (list_empty(&inode->i_lru)) {
- list_add(&inode->i_lru, &inode_lru);
- inodes_stat.nr_unused++;
+ list_add(&inode->i_lru, &inode->i_sb->s_inode_lru);
+ inode->i_sb->s_nr_inodes_unused++;
+ this_cpu_inc(nr_unused);
}
- spin_unlock(&inode_lru_lock);
+ spin_unlock(&inode->i_sb->s_inode_lru_lock);
}
static void inode_lru_list_del(struct inode *inode)
{
- spin_lock(&inode_lru_lock);
+ spin_lock(&inode->i_sb->s_inode_lru_lock);
if (!list_empty(&inode->i_lru)) {
list_del_init(&inode->i_lru);
- inodes_stat.nr_unused--;
+ inode->i_sb->s_nr_inodes_unused--;
+ this_cpu_dec(nr_unused);
}
- spin_unlock(&inode_lru_lock);
+ spin_unlock(&inode->i_sb->s_inode_lru_lock);
}
/**
@@ -369,9 +362,11 @@ EXPORT_SYMBOL_GPL(inode_sb_list_add);
static inline void inode_sb_list_del(struct inode *inode)
{
- spin_lock(&inode_sb_list_lock);
- list_del_init(&inode->i_sb_list);
- spin_unlock(&inode_sb_list_lock);
+ if (!list_empty(&inode->i_sb_list)) {
+ spin_lock(&inode_sb_list_lock);
+ list_del_init(&inode->i_sb_list);
+ spin_unlock(&inode_sb_list_lock);
+ }
}
static unsigned long hash(struct super_block *sb, unsigned long hashval)
@@ -405,12 +400,12 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval)
EXPORT_SYMBOL(__insert_inode_hash);
/**
- * remove_inode_hash - remove an inode from the hash
+ * __remove_inode_hash - remove an inode from the hash
* @inode: inode to unhash
*
* Remove an inode from the superblock.
*/
-void remove_inode_hash(struct inode *inode)
+void __remove_inode_hash(struct inode *inode)
{
spin_lock(&inode_hash_lock);
spin_lock(&inode->i_lock);
@@ -418,12 +413,19 @@ void remove_inode_hash(struct inode *inode)
spin_unlock(&inode->i_lock);
spin_unlock(&inode_hash_lock);
}
-EXPORT_SYMBOL(remove_inode_hash);
+EXPORT_SYMBOL(__remove_inode_hash);
void end_writeback(struct inode *inode)
{
might_sleep();
+ /*
+ * We have to cycle tree_lock here because reclaim can be still in the
+ * process of removing the last page (in __delete_from_page_cache())
+ * and we must not free mapping under it.
+ */
+ spin_lock_irq(&inode->i_data.tree_lock);
BUG_ON(inode->i_data.nrpages);
+ spin_unlock_irq(&inode->i_data.tree_lock);
BUG_ON(!list_empty(&inode->i_data.private_list));
BUG_ON(!(inode->i_state & I_FREEING));
BUG_ON(inode->i_state & I_CLEAR);
@@ -453,7 +455,9 @@ static void evict(struct inode *inode)
BUG_ON(!(inode->i_state & I_FREEING));
BUG_ON(!list_empty(&inode->i_lru));
- inode_wb_list_del(inode);
+ if (!list_empty(&inode->i_wb_list))
+ inode_wb_list_del(inode);
+
inode_sb_list_del(inode);
if (op->evict_inode) {
@@ -530,14 +534,6 @@ void evict_inodes(struct super_block *sb)
spin_unlock(&inode_sb_list_lock);
dispose_list(&dispose);
-
- /*
- * Cycle through iprune_sem to make sure any inode that prune_icache
- * moved off the list before we took the lock has been fully torn
- * down.
- */
- down_write(&iprune_sem);
- up_write(&iprune_sem);
}
/**
@@ -600,8 +596,10 @@ static int can_unuse(struct inode *inode)
}
/*
- * Scan `goal' inodes on the unused list for freeable ones. They are moved to a
- * temporary list and then are freed outside inode_lru_lock by dispose_list().
+ * Walk the superblock inode LRU for freeable inodes and attempt to free them.
+ * This is called from the superblock shrinker function with a number of inodes
+ * to trim from the LRU. Inodes to be freed are moved to a temporary list and
+ * then are freed outside inode_lock by dispose_list().
*
* Any inodes which are pinned purely because of attached pagecache have their
* pagecache removed. If the inode has metadata buffers attached to
@@ -615,29 +613,28 @@ static int can_unuse(struct inode *inode)
* LRU does not have strict ordering. Hence we don't want to reclaim inodes
* with this flag set because they are the inodes that are out of order.
*/
-static void prune_icache(int nr_to_scan)
+void prune_icache_sb(struct super_block *sb, int nr_to_scan)
{
LIST_HEAD(freeable);
int nr_scanned;
unsigned long reap = 0;
- down_read(&iprune_sem);
- spin_lock(&inode_lru_lock);
- for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
+ spin_lock(&sb->s_inode_lru_lock);
+ for (nr_scanned = nr_to_scan; nr_scanned >= 0; nr_scanned--) {
struct inode *inode;
- if (list_empty(&inode_lru))
+ if (list_empty(&sb->s_inode_lru))
break;
- inode = list_entry(inode_lru.prev, struct inode, i_lru);
+ inode = list_entry(sb->s_inode_lru.prev, struct inode, i_lru);
/*
- * we are inverting the inode_lru_lock/inode->i_lock here,
+ * we are inverting the sb->s_inode_lru_lock/inode->i_lock here,
* so use a trylock. If we fail to get the lock, just move the
* inode to the back of the list so we don't spin on it.
*/
if (!spin_trylock(&inode->i_lock)) {
- list_move(&inode->i_lru, &inode_lru);
+ list_move(&inode->i_lru, &sb->s_inode_lru);
continue;
}
@@ -649,28 +646,29 @@ static void prune_icache(int nr_to_scan)
(inode->i_state & ~I_REFERENCED)) {
list_del_init(&inode->i_lru);
spin_unlock(&inode->i_lock);
- inodes_stat.nr_unused--;
+ sb->s_nr_inodes_unused--;
+ this_cpu_dec(nr_unused);
continue;
}
/* recently referenced inodes get one more pass */
if (inode->i_state & I_REFERENCED) {
inode->i_state &= ~I_REFERENCED;
- list_move(&inode->i_lru, &inode_lru);
+ list_move(&inode->i_lru, &sb->s_inode_lru);
spin_unlock(&inode->i_lock);
continue;
}
if (inode_has_buffers(inode) || inode->i_data.nrpages) {
__iget(inode);
spin_unlock(&inode->i_lock);
- spin_unlock(&inode_lru_lock);
+ spin_unlock(&sb->s_inode_lru_lock);
if (remove_inode_buffers(inode))
reap += invalidate_mapping_pages(&inode->i_data,
0, -1);
iput(inode);
- spin_lock(&inode_lru_lock);
+ spin_lock(&sb->s_inode_lru_lock);
- if (inode != list_entry(inode_lru.next,
+ if (inode != list_entry(sb->s_inode_lru.next,
struct inode, i_lru))
continue; /* wrong inode or list_empty */
/* avoid lock inversions with trylock */
@@ -686,51 +684,18 @@ static void prune_icache(int nr_to_scan)
spin_unlock(&inode->i_lock);
list_move(&inode->i_lru, &freeable);
- inodes_stat.nr_unused--;
+ sb->s_nr_inodes_unused--;
+ this_cpu_dec(nr_unused);
}
if (current_is_kswapd())
__count_vm_events(KSWAPD_INODESTEAL, reap);
else
__count_vm_events(PGINODESTEAL, reap);
- spin_unlock(&inode_lru_lock);
+ spin_unlock(&sb->s_inode_lru_lock);
dispose_list(&freeable);
- up_read(&iprune_sem);
-}
-
-/*
- * shrink_icache_memory() will attempt to reclaim some unused inodes. Here,
- * "unused" means that no dentries are referring to the inodes: the files are
- * not open and the dcache references to those inodes have already been
- * reclaimed.
- *
- * This function is passed the number of inodes to scan, and it returns the
- * total number of remaining possibly-reclaimable inodes.
- */
-static int shrink_icache_memory(struct shrinker *shrink,
- struct shrink_control *sc)
-{
- int nr = sc->nr_to_scan;
- gfp_t gfp_mask = sc->gfp_mask;
-
- if (nr) {
- /*
- * Nasty deadlock avoidance. We may hold various FS locks,
- * and we don't want to recurse into the FS that called us
- * in clear_inode() and friends..
- */
- if (!(gfp_mask & __GFP_FS))
- return -1;
- prune_icache(nr);
- }
- return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure;
}
-static struct shrinker icache_shrinker = {
- .shrink = shrink_icache_memory,
- .seeks = DEFAULT_SEEKS,
-};
-
static void __wait_on_freeing_inode(struct inode *inode);
/*
* Called with the inode lock held.
@@ -836,6 +801,29 @@ unsigned int get_next_ino(void)
EXPORT_SYMBOL(get_next_ino);
/**
+ * new_inode_pseudo - obtain an inode
+ * @sb: superblock
+ *
+ * Allocates a new inode for given superblock.
+ * Inode wont be chained in superblock s_inodes list
+ * This means :
+ * - fs can't be unmount
+ * - quotas, fsnotify, writeback can't work
+ */
+struct inode *new_inode_pseudo(struct super_block *sb)
+{
+ struct inode *inode = alloc_inode(sb);
+
+ if (inode) {
+ spin_lock(&inode->i_lock);
+ inode->i_state = 0;
+ spin_unlock(&inode->i_lock);
+ INIT_LIST_HEAD(&inode->i_sb_list);
+ }
+ return inode;
+}
+
+/**
* new_inode - obtain an inode
* @sb: superblock
*
@@ -853,13 +841,9 @@ struct inode *new_inode(struct super_block *sb)
spin_lock_prefetch(&inode_sb_list_lock);
- inode = alloc_inode(sb);
- if (inode) {
- spin_lock(&inode->i_lock);
- inode->i_state = 0;
- spin_unlock(&inode->i_lock);
+ inode = new_inode_pseudo(sb);
+ if (inode)
inode_sb_list_add(inode);
- }
return inode;
}
EXPORT_SYMBOL(new_inode);
@@ -1324,7 +1308,7 @@ static void iput_final(struct inode *inode)
WARN_ON(inode->i_state & I_NEW);
- if (op && op->drop_inode)
+ if (op->drop_inode)
drop = op->drop_inode(inode);
else
drop = generic_drop_inode(inode);
@@ -1347,7 +1331,8 @@ static void iput_final(struct inode *inode)
}
inode->i_state |= I_FREEING;
- inode_lru_list_del(inode);
+ if (!list_empty(&inode->i_lru))
+ inode_lru_list_del(inode);
spin_unlock(&inode->i_lock);
evict(inode);
@@ -1610,7 +1595,6 @@ void __init inode_init(void)
(SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
SLAB_MEM_SPREAD),
init_once);
- register_shrinker(&icache_shrinker);
/* Hash may have been set up in inode_init_early */
if (!hashdist)