From: Linus Torvalds Date: Tue, 26 Jul 2011 17:39:54 +0000 (-0700) Subject: Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/wfg/writeback X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=f01ef569cddb1a8627b1c6b3a134998ad1cf4b22;p=~shefty%2Frdma-dev.git Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/wfg/writeback * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/wfg/writeback: (27 commits) mm: properly reflect task dirty limits in dirty_exceeded logic writeback: don't busy retry writeback on new/freeing inodes writeback: scale IO chunk size up to half device bandwidth writeback: trace global_dirty_state writeback: introduce max-pause and pass-good dirty limits writeback: introduce smoothed global dirty limit writeback: consolidate variable names in balance_dirty_pages() writeback: show bdi write bandwidth in debugfs writeback: bdi write bandwidth estimation writeback: account per-bdi accumulated written pages writeback: make writeback_control.nr_to_write straight writeback: skip tmpfs early in balance_dirty_pages_ratelimited_nr() writeback: trace event writeback_queue_io writeback: trace event writeback_single_inode writeback: remove .nonblocking and .encountered_congestion writeback: remove writeback_control.more_io writeback: skip balance_dirty_pages() for in-memory fs writeback: add bdi_dirty_limit() kernel-doc writeback: avoid extra sync work at enqueue time writeback: elevate queue_io() into wb_writeback() ... Fix up trivial conflicts in fs/fs-writeback.c and mm/filemap.c --- f01ef569cddb1a8627b1c6b3a134998ad1cf4b22 diff --cc fs/fs-writeback.c index b8c507ca42f,6d49439ca31..1599aa985fe --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@@ -460,6 -480,63 +480,37 @@@ writeback_single_inode(struct inode *in return ret; } -/* - * For background writeback the caller does not have the sb pinned - * before calling writeback. So make sure that we do pin it, so it doesn't - * go away while we are writing inodes from it. - */ -static bool pin_sb_for_writeback(struct super_block *sb) -{ - spin_lock(&sb_lock); - if (list_empty(&sb->s_instances)) { - spin_unlock(&sb_lock); - return false; - } - - sb->s_count++; - spin_unlock(&sb_lock); - - if (down_read_trylock(&sb->s_umount)) { - if (sb->s_root) - return true; - up_read(&sb->s_umount); - } - - put_super(sb); - return false; -} - + static long writeback_chunk_size(struct backing_dev_info *bdi, + struct wb_writeback_work *work) + { + long pages; + + /* + * WB_SYNC_ALL mode does livelock avoidance by syncing dirty + * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX + * here avoids calling into writeback_inodes_wb() more than once. + * + * The intended call sequence for WB_SYNC_ALL writeback is: + * + * wb_writeback() + * writeback_sb_inodes() <== called only once + * write_cache_pages() <== called once for each inode + * (quickly) tag currently dirty pages + * (maybe slowly) sync all tagged pages + */ + if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages) + pages = LONG_MAX; + else { + pages = min(bdi->avg_write_bandwidth / 2, + global_dirty_limit / DIRTY_SCOPE); + pages = min(pages, work->nr_pages); + pages = round_down(pages + MIN_WRITEBACK_PAGES, + MIN_WRITEBACK_PAGES); + } + + return pages; + } + /* * Write a portion of b_io inodes which belong to @sb. * @@@ -559,40 -643,41 +617,41 @@@ static long __writeback_inodes_wb(struc struct inode *inode = wb_inode(wb->b_io.prev); struct super_block *sb = inode->i_sb; - if (!pin_sb_for_writeback(sb)) { + if (!grab_super_passive(sb)) { - requeue_io(inode); + requeue_io(inode, wb); continue; } - ret = writeback_sb_inodes(sb, wb, wbc, false); + wrote += writeback_sb_inodes(sb, wb, work); drop_super(sb); - if (ret) - break; + /* refer to the same tests at the end of writeback_sb_inodes */ + if (wrote) { + if (time_is_before_jiffies(start_time + HZ / 10UL)) + break; + if (work->nr_pages <= 0) + break; + } } - spin_unlock(&inode_wb_list_lock); /* Leave any unwritten inodes on b_io */ + return wrote; } - static void __writeback_inodes_sb(struct super_block *sb, - struct bdi_writeback *wb, struct writeback_control *wbc) + long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages) { - WARN_ON(!rwsem_is_locked(&sb->s_umount)); + struct wb_writeback_work work = { + .nr_pages = nr_pages, + .sync_mode = WB_SYNC_NONE, + .range_cyclic = 1, + }; - spin_lock(&inode_wb_list_lock); - if (!wbc->for_kupdate || list_empty(&wb->b_io)) - queue_io(wb, wbc->older_than_this); - writeback_sb_inodes(sb, wb, wbc, true); - spin_unlock(&inode_wb_list_lock); - } + spin_lock(&wb->list_lock); + if (list_empty(&wb->b_io)) + queue_io(wb, NULL); + __writeback_inodes_wb(wb, &work); + spin_unlock(&wb->list_lock); - /* - * The maximum number of pages to writeout in a single bdi flush/kupdate - * operation. We do this so we don't hold I_SYNC against an inode for - * enormous amounts of time, which would block a userspace task which has - * been forced to throttle against that inode. Also, the code reevaluates - * the dirty each time it has written this many pages. - */ - #define MAX_WRITEBACK_PAGES 1024 + return nr_pages - work.nr_pages; + } static inline bool over_bground_thresh(void) { diff --cc fs/inode.c index 96c77b81167,4be128cbc75..a48fa5355fb --- a/fs/inode.c +++ b/fs/inode.c @@@ -33,11 -33,11 +33,11 @@@ * * inode->i_lock protects: * inode->i_state, inode->i_hash, __iget() - * inode_lru_lock protects: - * inode_lru, inode->i_lru + * inode->i_sb->s_inode_lru_lock protects: + * inode->i_sb->s_inode_lru, inode->i_lru * inode_sb_list_lock protects: * sb->s_inodes, inode->i_sb_list - * inode_wb_list_lock protects: + * bdi->wb.list_lock protects: * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list * inode_hash_lock protects: * inode_hashtable, inode->i_hash @@@ -46,9 -46,9 +46,9 @@@ * * inode_sb_list_lock * inode->i_lock - * inode_lru_lock + * inode->i_sb->s_inode_lru_lock * - * inode_wb_list_lock + * bdi->wb.list_lock * inode->i_lock * * inode_hash_lock @@@ -64,9 -64,22 +64,8 @@@ static unsigned int i_hash_shift __read static struct hlist_head *inode_hashtable __read_mostly; static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); -static LIST_HEAD(inode_lru); -static DEFINE_SPINLOCK(inode_lru_lock); - __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); - __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock); -/* - * iprune_sem provides exclusion between the icache shrinking and the - * umount path. - * - * We don't actually need it to protect anything in the umount path, - * but only need to cycle through it to make sure any inode that - * prune_icache took off the LRU list has been fully torn down by the - * time we are past evict_inodes. - */ -static DECLARE_RWSEM(iprune_sem); - /* * Empty aops. Can be used for the cases where the user does not * define any of the address_space operations. diff --cc mm/filemap.c index 10a17111327,1e492c3dd6f..867d40222ec --- a/mm/filemap.c +++ b/mm/filemap.c @@@ -78,7 -78,10 +78,7 @@@ * ->i_mutex (generic_file_buffered_write) * ->mmap_sem (fault_in_pages_readable->do_page_fault) * - * inode_wb_list_lock - * ->i_mutex - * ->i_alloc_sem (various) - * + * bdi->wb.list_lock * sb_lock (fs/fs-writeback.c) * ->mapping->tree_lock (__sync_single_inode) * diff --cc mm/rmap.c index 9701574bb67,d04e36a7cc9..8005080fb9e --- a/mm/rmap.c +++ b/mm/rmap.c @@@ -35,10 -36,11 +35,10 @@@ * sb_lock (within inode_lock in fs/fs-writeback.c) * mapping->tree_lock (widely used, in set_page_dirty, * in arch-dependent flush_dcache_mmap_lock, - * within inode_wb_list_lock in __sync_single_inode) + * within bdi.wb->list_lock in __sync_single_inode) * - * (code doesn't rely on that order so it could be switched around) - * ->tasklist_lock - * anon_vma->mutex (memory_failure, collect_procs_anon) + * anon_vma->mutex,mapping->i_mutex (memory_failure, collect_procs_anon) + * ->tasklist_lock * pte map lock */