From: Jens Axboe Date: Thu, 10 Mar 2011 07:58:35 +0000 (+0100) Subject: Merge branch 'for-2.6.39/stack-plug' into for-2.6.39/core X-Git-Tag: v2.6.39-rc1~80^2~20 X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=4c63f5646e405b5010cc9499419060bf2e838f5b;p=~shefty%2Frdma-dev.git Merge branch 'for-2.6.39/stack-plug' into for-2.6.39/core Conflicts: block/blk-core.c block/blk-flush.c drivers/md/raid1.c drivers/md/raid10.c drivers/md/raid5.c fs/nilfs2/btnode.c fs/nilfs2/mdt.c Signed-off-by: Jens Axboe --- 4c63f5646e405b5010cc9499419060bf2e838f5b diff --cc block/blk-core.c index 74d496ccf4d,7e9715ae18c..e1fcf7a2466 --- a/block/blk-core.c +++ b/block/blk-core.c @@@ -198,76 -199,26 +199,26 @@@ void blk_dump_rq_flags(struct request * EXPORT_SYMBOL(blk_dump_rq_flags); /* - * "plug" the device if there are no outstanding requests: this will - * force the transfer to start only after we have put all the requests - * on the list. - * - * This is called with interrupts off and no requests on the queue and - * with the queue lock held. - */ - void blk_plug_device(struct request_queue *q) + * Make sure that plugs that were pending when this function was entered, + * are now complete and requests pushed to the queue. + */ + static inline void queue_sync_plugs(struct request_queue *q) { - WARN_ON(!irqs_disabled()); - /* - * don't plug a stopped queue, it must be paired with blk_start_queue() - * which will restart the queueing + * If the current process is plugged and has barriers submitted, + * we will livelock if we don't unplug first. */ - if (blk_queue_stopped(q)) - return; - - if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) { - mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); - trace_block_plug(q); - } - } - EXPORT_SYMBOL(blk_plug_device); - - /** - * blk_plug_device_unlocked - plug a device without queue lock held - * @q: The &struct request_queue to plug - * - * Description: - * Like @blk_plug_device(), but grabs the queue lock and disables - * interrupts. - **/ - void blk_plug_device_unlocked(struct request_queue *q) - { - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - blk_plug_device(q); - spin_unlock_irqrestore(q->queue_lock, flags); - } - EXPORT_SYMBOL(blk_plug_device_unlocked); - - /* - * remove the queue from the plugged list, if present. called with - * queue lock held and interrupts disabled. - */ - int blk_remove_plug(struct request_queue *q) - { - WARN_ON(!irqs_disabled()); - - if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q)) - return 0; - - del_timer(&q->unplug_timer); - return 1; + blk_flush_plug(current); } - EXPORT_SYMBOL(blk_remove_plug); - /* - * remove the plug and let it rip.. - */ - void __generic_unplug_device(struct request_queue *q) + static void blk_delay_work(struct work_struct *work) { - if (unlikely(blk_queue_stopped(q))) - return; - if (!blk_remove_plug(q) && !blk_queue_nonrot(q)) - return; + struct request_queue *q; - q->request_fn(q); + q = container_of(work, struct request_queue, delay_work.work); + spin_lock_irq(q->queue_lock); - __blk_run_queue(q); ++ __blk_run_queue(q, false); + spin_unlock_irq(q->queue_lock); } /** @@@ -387,9 -292,10 +296,9 @@@ EXPORT_SYMBOL(blk_stop_queue) */ void blk_sync_queue(struct request_queue *q) { - del_timer_sync(&q->unplug_timer); del_timer_sync(&q->timeout); - cancel_work_sync(&q->unplug_work); - throtl_shutdown_timer_wq(q); + cancel_delayed_work_sync(&q->delay_work); + queue_sync_plugs(q); } EXPORT_SYMBOL(blk_sync_queue); @@@ -403,10 -308,8 +312,8 @@@ * held and interrupts disabled. * */ -void __blk_run_queue(struct request_queue *q) +void __blk_run_queue(struct request_queue *q, bool force_kblockd) { - blk_remove_plug(q); - if (unlikely(blk_queue_stopped(q))) return; @@@ -417,13 -317,11 +321,11 @@@ * Only recurse once to avoid overrunning the stack, let the unplug * handling reinvoke the handler shortly if we already got there. */ - if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { + if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { q->request_fn(q); queue_flag_clear(QUEUE_FLAG_REENTER, q); - } else { - queue_flag_set(QUEUE_FLAG_PLUGGED, q); - kblockd_schedule_work(q, &q->unplug_work); - } + } else + queue_delayed_work(kblockd_workqueue, &q->delay_work, 0); } EXPORT_SYMBOL(__blk_run_queue); @@@ -639,12 -521,8 +538,11 @@@ blk_init_allocated_queue_node(struct re q->request_fn = rfn; q->prep_rq_fn = NULL; q->unprep_rq_fn = NULL; - q->unplug_fn = generic_unplug_device; q->queue_flags = QUEUE_FLAG_DEFAULT; - q->queue_lock = lock; + + /* Override internal queue lock with supplied lock pointer */ + if (lock) + q->queue_lock = lock; /* * This also sets hw/phys segments, boundary and size @@@ -1084,9 -970,8 +990,8 @@@ void blk_insert_request(struct request_ if (blk_rq_tagged(rq)) blk_queue_end_tag(q, rq); - drive_stat_acct(rq, 1); - __elv_add_request(q, rq, where, 0); + add_acct_request(q, rq, where); - __blk_run_queue(q); + __blk_run_queue(q, false); spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL(blk_insert_request); @@@ -1347,20 -1283,35 +1303,35 @@@ get_rq */ init_request_from_bio(req, bio); - spin_lock_irq(q->queue_lock); if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || - bio_flagged(bio, BIO_CPU_AFFINE)) - req->cpu = blk_cpu_to_group(smp_processor_id()); - if (queue_should_plug(q) && elv_queue_empty(q)) - blk_plug_device(q); - - /* insert the request into the elevator */ - drive_stat_acct(req, 1); - __elv_add_request(q, req, where, 0); + bio_flagged(bio, BIO_CPU_AFFINE)) { + req->cpu = blk_cpu_to_group(get_cpu()); + put_cpu(); + } + + plug = current->plug; + if (plug) { + if (!plug->should_sort && !list_empty(&plug->list)) { + struct request *__rq; + + __rq = list_entry_rq(plug->list.prev); + if (__rq->q != q) + plug->should_sort = 1; + } + /* + * Debug flag, kill later + */ + req->cmd_flags |= REQ_ON_PLUG; + list_add_tail(&req->queuelist, &plug->list); + drive_stat_acct(req, 1); + } else { + spin_lock_irq(q->queue_lock); + add_acct_request(q, req, where); - __blk_run_queue(q); ++ __blk_run_queue(q, false); + out_unlock: + spin_unlock_irq(q->queue_lock); + } out: - if (unplug || !queue_should_plug(q)) - __generic_unplug_device(q); - spin_unlock_irq(q->queue_lock); return 0; } @@@ -2643,6 -2592,113 +2612,113 @@@ int kblockd_schedule_work(struct reques } EXPORT_SYMBOL(kblockd_schedule_work); + int kblockd_schedule_delayed_work(struct request_queue *q, + struct delayed_work *dwork, unsigned long delay) + { + return queue_delayed_work(kblockd_workqueue, dwork, delay); + } + EXPORT_SYMBOL(kblockd_schedule_delayed_work); + + #define PLUG_MAGIC 0x91827364 + + void blk_start_plug(struct blk_plug *plug) + { + struct task_struct *tsk = current; + + plug->magic = PLUG_MAGIC; + INIT_LIST_HEAD(&plug->list); + plug->should_sort = 0; + + /* + * If this is a nested plug, don't actually assign it. It will be + * flushed on its own. + */ + if (!tsk->plug) { + /* + * Store ordering should not be needed here, since a potential + * preempt will imply a full memory barrier + */ + tsk->plug = plug; + } + } + EXPORT_SYMBOL(blk_start_plug); + + static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b) + { + struct request *rqa = container_of(a, struct request, queuelist); + struct request *rqb = container_of(b, struct request, queuelist); + + return !(rqa->q == rqb->q); + } + + static void flush_plug_list(struct blk_plug *plug) + { + struct request_queue *q; + unsigned long flags; + struct request *rq; + + BUG_ON(plug->magic != PLUG_MAGIC); + + if (list_empty(&plug->list)) + return; + + if (plug->should_sort) + list_sort(NULL, &plug->list, plug_rq_cmp); + + q = NULL; + local_irq_save(flags); + while (!list_empty(&plug->list)) { + rq = list_entry_rq(plug->list.next); + list_del_init(&rq->queuelist); + BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG)); + BUG_ON(!rq->q); + if (rq->q != q) { + if (q) { - __blk_run_queue(q); ++ __blk_run_queue(q, false); + spin_unlock(q->queue_lock); + } + q = rq->q; + spin_lock(q->queue_lock); + } + rq->cmd_flags &= ~REQ_ON_PLUG; + + /* + * rq is already accounted, so use raw insert + */ + __elv_add_request(q, rq, ELEVATOR_INSERT_SORT); + } + + if (q) { - __blk_run_queue(q); ++ __blk_run_queue(q, false); + spin_unlock(q->queue_lock); + } + + BUG_ON(!list_empty(&plug->list)); + local_irq_restore(flags); + } + + static void __blk_finish_plug(struct task_struct *tsk, struct blk_plug *plug) + { + flush_plug_list(plug); + + if (plug == tsk->plug) + tsk->plug = NULL; + } + + void blk_finish_plug(struct blk_plug *plug) + { + if (plug) + __blk_finish_plug(current, plug); + } + EXPORT_SYMBOL(blk_finish_plug); + + void __blk_flush_plug(struct task_struct *tsk, struct blk_plug *plug) + { + __blk_finish_plug(tsk, plug); + tsk->plug = plug; + } + EXPORT_SYMBOL(__blk_flush_plug); + int __init blk_dev_init(void) { BUILD_BUG_ON(__REQ_NR_BITS > 8 * diff --cc block/blk-exec.c index cf1456a02ac,81e31819a59..7482b7fa863 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@@ -54,8 -54,8 +54,8 @@@ void blk_execute_rq_nowait(struct reque rq->end_io = done; WARN_ON(irqs_disabled()); spin_lock_irq(q->queue_lock); - __elv_add_request(q, rq, where, 1); - __generic_unplug_device(q); + __elv_add_request(q, rq, where); - __blk_run_queue(q); ++ __blk_run_queue(q, false); /* the queue is stopped so it won't be plugged+unplugged */ if (rq->cmd_type == REQ_TYPE_PM_RESUME) q->request_fn(q); diff --cc block/blk-flush.c index 0bd8c9c5d6e,671fa9da756..93d5fd8e51e --- a/block/blk-flush.c +++ b/block/blk-flush.c @@@ -212,14 -211,9 +211,14 @@@ static void flush_end_io(struct reques queued |= blk_flush_complete_seq(rq, seq, error); } - /* after populating an empty queue, kick it to avoid stall */ + /* + * Moving a request silently to empty queue_head may stall the + * queue. Kick the queue in those cases. This function is called + * from request completion path and calling directly into + * request_fn may confuse the driver. Always use kblockd. + */ - if (queued && was_empty) + if (queued) - __blk_run_queue(q); + __blk_run_queue(q, true); } /** @@@ -269,14 -263,10 +268,13 @@@ static bool blk_kick_flush(struct reque static void flush_data_end_io(struct request *rq, int error) { struct request_queue *q = rq->q; - bool was_empty = elv_queue_empty(q); - /* after populating an empty queue, kick it to avoid stall */ + /* + * After populating an empty queue, kick it to avoid stall. Read + * the comment in flush_end_io(). + */ - if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error) && was_empty) + if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error)) - __blk_run_queue(q); + __blk_run_queue(q, true); } /** diff --cc fs/nilfs2/btnode.c index 85f7baa15f5,f4f1c08807e..609cd223eea --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@@ -34,11 -34,12 +34,6 @@@ #include "page.h" #include "btnode.h" -- - static const struct address_space_operations def_btnode_aops = { - .sync_page = block_sync_page, - }; -void nilfs_btnode_cache_init_once(struct address_space *btnc) -{ - nilfs_mapping_init_once(btnc); -} -- void nilfs_btnode_cache_init(struct address_space *btnc, struct backing_dev_info *bdi) { diff --cc fs/nilfs2/mdt.c index a0babd2bff6,3fdb61d79c9..a649b05f706 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@@ -454,10 -449,10 +449,10 @@@ int nilfs_mdt_setup_shadow_map(struct i struct backing_dev_info *bdi = inode->i_sb->s_bdi; INIT_LIST_HEAD(&shadow->frozen_buffers); - nilfs_mapping_init_once(&shadow->frozen_data); + address_space_init_once(&shadow->frozen_data); - nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops); + nilfs_mapping_init(&shadow->frozen_data, bdi); - nilfs_mapping_init_once(&shadow->frozen_btnodes); + address_space_init_once(&shadow->frozen_btnodes); - nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops); + nilfs_mapping_init(&shadow->frozen_btnodes, bdi); mi->mi_shadow = shadow; return 0; } diff --cc fs/nilfs2/page.c index a585b35fd6b,3da37cc5de3..4d2a1ee0eb4 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@@ -492,9 -492,21 +492,8 @@@ unsigned nilfs_page_count_clean_buffers return nc; } -void nilfs_mapping_init_once(struct address_space *mapping) -{ - memset(mapping, 0, sizeof(*mapping)); - INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC); - spin_lock_init(&mapping->tree_lock); - INIT_LIST_HEAD(&mapping->private_list); - spin_lock_init(&mapping->private_lock); - - spin_lock_init(&mapping->i_mmap_lock); - INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap); - INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); -} - void nilfs_mapping_init(struct address_space *mapping, - struct backing_dev_info *bdi, - const struct address_space_operations *aops) + struct backing_dev_info *bdi) { mapping->host = NULL; mapping->flags = 0; diff --cc fs/nilfs2/page.h index 2a00953ebd5,ba4d6fd40b0..f06b79ad749 --- a/fs/nilfs2/page.h +++ b/fs/nilfs2/page.h @@@ -61,9 -61,9 +61,8 @@@ void nilfs_free_private_page(struct pag int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); void nilfs_copy_back_pages(struct address_space *, struct address_space *); void nilfs_clear_dirty_pages(struct address_space *); -void nilfs_mapping_init_once(struct address_space *mapping); void nilfs_mapping_init(struct address_space *mapping, - struct backing_dev_info *bdi, - const struct address_space_operations *aops); + struct backing_dev_info *bdi); unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); unsigned long nilfs_find_uncommitted_extent(struct inode *inode, sector_t start_blk,