From: Jerome Glisse Date: Wed, 9 May 2012 13:34:55 +0000 (+0200) Subject: drm/radeon: use one wait queue for all rings add fence_wait_any v2 X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=0085c95061e836f3ed489d042b502733c094e7e4;p=~shefty%2Frdma-dev.git drm/radeon: use one wait queue for all rings add fence_wait_any v2 Use one wait queue for all rings. When one ring progress, other likely does to and we are not expecting to have a lot of waiter anyway. Also add a fence_wait_any that will wait until the first fence in the fence array (one fence per ring) is signaled. This allow to wait on all rings. v2: some minor cleanups and improvements. Signed-off-by: Christian König Signed-off-by: Jerome Glisse Signed-off-by: Dave Airlie --- diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index ada70d157e1..37a74597e9d 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -262,7 +262,6 @@ struct radeon_fence_driver { uint64_t seq; atomic64_t last_seq; unsigned long last_activity; - wait_queue_head_t queue; bool initialized; }; @@ -286,6 +285,9 @@ bool radeon_fence_signaled(struct radeon_fence *fence); int radeon_fence_wait(struct radeon_fence *fence, bool interruptible); int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring); int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring); +int radeon_fence_wait_any(struct radeon_device *rdev, + struct radeon_fence **fences, + bool intr); struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence); void radeon_fence_unref(struct radeon_fence **fence); unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring); @@ -1534,6 +1536,7 @@ struct radeon_device { struct radeon_scratch scratch; struct radeon_mman mman; struct radeon_fence_driver fence_drv[RADEON_NUM_RINGS]; + wait_queue_head_t fence_queue; struct radeon_semaphore_driver semaphore_drv; struct mutex ring_lock; struct radeon_ring ring[RADEON_NUM_RINGS]; diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 098d1faed1a..14dbc287cac 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -129,7 +129,7 @@ void radeon_fence_process(struct radeon_device *rdev, int ring) if (wake) { rdev->fence_drv[ring].last_activity = jiffies; - wake_up_all(&rdev->fence_drv[ring].queue); + wake_up_all(&rdev->fence_queue); } } @@ -224,11 +224,11 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 target_seq, trace_radeon_fence_wait_begin(rdev->ddev, seq); radeon_irq_kms_sw_irq_get(rdev, ring); if (intr) { - r = wait_event_interruptible_timeout(rdev->fence_drv[ring].queue, + r = wait_event_interruptible_timeout(rdev->fence_queue, (signaled = radeon_fence_seq_signaled(rdev, target_seq, ring)), timeout); } else { - r = wait_event_timeout(rdev->fence_drv[ring].queue, + r = wait_event_timeout(rdev->fence_queue, (signaled = radeon_fence_seq_signaled(rdev, target_seq, ring)), timeout); } @@ -306,6 +306,159 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr) return 0; } +bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq) +{ + unsigned i; + + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + if (seq[i] && radeon_fence_seq_signaled(rdev, seq[i], i)) { + return true; + } + } + return false; +} + +static int radeon_fence_wait_any_seq(struct radeon_device *rdev, + u64 *target_seq, bool intr) +{ + unsigned long timeout, last_activity, tmp; + unsigned i, ring = RADEON_NUM_RINGS; + bool signaled; + int r; + + for (i = 0, last_activity = 0; i < RADEON_NUM_RINGS; ++i) { + if (!target_seq[i]) { + continue; + } + + /* use the most recent one as indicator */ + if (time_after(rdev->fence_drv[i].last_activity, last_activity)) { + last_activity = rdev->fence_drv[i].last_activity; + } + + /* For lockup detection just pick the lowest ring we are + * actively waiting for + */ + if (i < ring) { + ring = i; + } + } + + /* nothing to wait for ? */ + if (ring == RADEON_NUM_RINGS) { + return 0; + } + + while (!radeon_fence_any_seq_signaled(rdev, target_seq)) { + timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT; + if (time_after(last_activity, timeout)) { + /* the normal case, timeout is somewhere before last_activity */ + timeout = last_activity - timeout; + } else { + /* either jiffies wrapped around, or no fence was signaled in the last 500ms + * anyway we will just wait for the minimum amount and then check for a lockup + */ + timeout = 1; + } + + trace_radeon_fence_wait_begin(rdev->ddev, target_seq[ring]); + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + if (target_seq[i]) { + radeon_irq_kms_sw_irq_get(rdev, i); + } + } + if (intr) { + r = wait_event_interruptible_timeout(rdev->fence_queue, + (signaled = radeon_fence_any_seq_signaled(rdev, target_seq)), + timeout); + } else { + r = wait_event_timeout(rdev->fence_queue, + (signaled = radeon_fence_any_seq_signaled(rdev, target_seq)), + timeout); + } + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + if (target_seq[i]) { + radeon_irq_kms_sw_irq_put(rdev, i); + } + } + if (unlikely(r < 0)) { + return r; + } + trace_radeon_fence_wait_end(rdev->ddev, target_seq[ring]); + + if (unlikely(!signaled)) { + /* we were interrupted for some reason and fence + * isn't signaled yet, resume waiting */ + if (r) { + continue; + } + + mutex_lock(&rdev->ring_lock); + for (i = 0, tmp = 0; i < RADEON_NUM_RINGS; ++i) { + if (time_after(rdev->fence_drv[i].last_activity, tmp)) { + tmp = rdev->fence_drv[i].last_activity; + } + } + /* test if somebody else has already decided that this is a lockup */ + if (last_activity != tmp) { + last_activity = tmp; + mutex_unlock(&rdev->ring_lock); + continue; + } + + if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) { + /* good news we believe it's a lockup */ + dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016llx)\n", + target_seq[ring]); + + /* change last activity so nobody else think there is a lockup */ + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + rdev->fence_drv[i].last_activity = jiffies; + } + + /* mark the ring as not ready any more */ + rdev->ring[ring].ready = false; + mutex_unlock(&rdev->ring_lock); + return -EDEADLK; + } + mutex_unlock(&rdev->ring_lock); + } + } + return 0; +} + +int radeon_fence_wait_any(struct radeon_device *rdev, + struct radeon_fence **fences, + bool intr) +{ + uint64_t seq[RADEON_NUM_RINGS]; + unsigned i; + int r; + + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + seq[i] = 0; + + if (!fences[i]) { + continue; + } + + if (fences[i]->seq == RADEON_FENCE_SIGNALED_SEQ) { + /* something was allready signaled */ + return 0; + } + + if (fences[i]->seq < RADEON_FENCE_NOTEMITED_SEQ) { + seq[i] = fences[i]->seq; + } + } + + r = radeon_fence_wait_any_seq(rdev, seq, intr); + if (r) { + return r; + } + return 0; +} + int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring) { uint64_t seq; @@ -354,10 +507,10 @@ unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring) { uint64_t emitted; - radeon_fence_process(rdev, ring); /* We are not protected by ring lock when reading the last sequence * but it's ok to report slightly wrong fence count here. */ + radeon_fence_process(rdev, ring); emitted = rdev->fence_drv[ring].seq - atomic64_read(&rdev->fence_drv[ring].last_seq); /* to avoid 32bits warp around */ if (emitted > 0x10000000) { @@ -402,7 +555,6 @@ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring) rdev->fence_drv[ring].seq = 0; atomic64_set(&rdev->fence_drv[ring].last_seq, 0); rdev->fence_drv[ring].last_activity = jiffies; - init_waitqueue_head(&rdev->fence_drv[ring].queue); rdev->fence_drv[ring].initialized = false; } @@ -410,6 +562,7 @@ int radeon_fence_driver_init(struct radeon_device *rdev) { int ring; + init_waitqueue_head(&rdev->fence_queue); for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { radeon_fence_driver_init_ring(rdev, ring); } @@ -428,7 +581,7 @@ void radeon_fence_driver_fini(struct radeon_device *rdev) if (!rdev->fence_drv[ring].initialized) continue; radeon_fence_wait_empty_locked(rdev, ring); - wake_up_all(&rdev->fence_drv[ring].queue); + wake_up_all(&rdev->fence_queue); radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); rdev->fence_drv[ring].initialized = false; }