From: David Chinner Date: Thu, 10 Apr 2008 02:18:39 +0000 (+1000) Subject: [XFS] Per iclog callback chain lock X-Git-Tag: v2.6.26-rc1~1147^2~51 X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=114d23aae51233b2bc62d8e2a632bcb55de1953d;p=~emulex%2Finfiniband.git [XFS] Per iclog callback chain lock Rather than use the icloglock for protecting the iclog completion callback chain, use a new per-iclog lock so that walking the callback chain doesn't require holding a global lock. This reduces contention on the icloglock during transaction commit and log I/O completion by reducing the number of times we need to hold the global icloglock during these operations. SGI-PV: 978729 SGI-Modid: xfs-linux-melb:xfs-kern:30770a Signed-off-by: David Chinner Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 1fa98093389..7a5b12d9353 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -397,12 +397,10 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */ void *iclog_hndl, /* iclog to hang callback off */ xfs_log_callback_t *cb) { - xlog_t *log = mp->m_log; xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl; int abortflg; - cb->cb_next = NULL; - spin_lock(&log->l_icloglock); + spin_lock(&iclog->ic_callback_lock); abortflg = (iclog->ic_state & XLOG_STATE_IOERROR); if (!abortflg) { ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) || @@ -411,7 +409,7 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */ *(iclog->ic_callback_tail) = cb; iclog->ic_callback_tail = &(cb->cb_next); } - spin_unlock(&log->l_icloglock); + spin_unlock(&iclog->ic_callback_lock); return abortflg; } /* xfs_log_notify */ @@ -1257,6 +1255,8 @@ xlog_alloc_log(xfs_mount_t *mp, iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize; iclog->ic_state = XLOG_STATE_ACTIVE; iclog->ic_log = log; + atomic_set(&iclog->ic_refcnt, 0); + spin_lock_init(&iclog->ic_callback_lock); iclog->ic_callback_tail = &(iclog->ic_callback); iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize; @@ -1987,7 +1987,7 @@ xlog_state_clean_log(xlog_t *log) if (iclog->ic_state == XLOG_STATE_DIRTY) { iclog->ic_state = XLOG_STATE_ACTIVE; iclog->ic_offset = 0; - iclog->ic_callback = NULL; /* don't need to free */ + ASSERT(iclog->ic_callback == NULL); /* * If the number of ops in this iclog indicate it just * contains the dummy transaction, we can @@ -2190,37 +2190,40 @@ xlog_state_do_callback( be64_to_cpu(iclog->ic_header.h_lsn); spin_unlock(&log->l_grant_lock); - /* - * Keep processing entries in the callback list - * until we come around and it is empty. We - * need to atomically see that the list is - * empty and change the state to DIRTY so that - * we don't miss any more callbacks being added. - */ - spin_lock(&log->l_icloglock); } else { + spin_unlock(&log->l_icloglock); ioerrors++; } - cb = iclog->ic_callback; + /* + * Keep processing entries in the callback list until + * we come around and it is empty. We need to + * atomically see that the list is empty and change the + * state to DIRTY so that we don't miss any more + * callbacks being added. + */ + spin_lock(&iclog->ic_callback_lock); + cb = iclog->ic_callback; while (cb) { iclog->ic_callback_tail = &(iclog->ic_callback); iclog->ic_callback = NULL; - spin_unlock(&log->l_icloglock); + spin_unlock(&iclog->ic_callback_lock); /* perform callbacks in the order given */ for (; cb; cb = cb_next) { cb_next = cb->cb_next; cb->cb_func(cb->cb_arg, aborted); } - spin_lock(&log->l_icloglock); + spin_lock(&iclog->ic_callback_lock); cb = iclog->ic_callback; } loopdidcallbacks++; funcdidcallbacks++; + spin_lock(&log->l_icloglock); ASSERT(iclog->ic_callback == NULL); + spin_unlock(&iclog->ic_callback_lock); if (!(iclog->ic_state & XLOG_STATE_IOERROR)) iclog->ic_state = XLOG_STATE_DIRTY; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 01c63db25a1..104b623aa08 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -324,6 +324,19 @@ typedef struct xlog_rec_ext_header { * - ic_offset is the current number of bytes written to in this iclog. * - ic_refcnt is bumped when someone is writing to the log. * - ic_state is the state of the iclog. + * + * Because of cacheline contention on large machines, we need to separate + * various resources onto different cachelines. To start with, make the + * structure cacheline aligned. The following fields can be contended on + * by independent processes: + * + * - ic_callback_* + * - ic_refcnt + * - fields protected by the global l_icloglock + * + * so we need to ensure that these fields are located in separate cachelines. + * We'll put all the read-only and l_icloglock fields in the first cacheline, + * and move everything else out to subsequent cachelines. */ typedef struct xlog_iclog_fields { sv_t ic_forcesema; @@ -332,18 +345,23 @@ typedef struct xlog_iclog_fields { struct xlog_in_core *ic_prev; struct xfs_buf *ic_bp; struct log *ic_log; - xfs_log_callback_t *ic_callback; - xfs_log_callback_t **ic_callback_tail; -#ifdef XFS_LOG_TRACE - struct ktrace *ic_trace; -#endif int ic_size; int ic_offset; - atomic_t ic_refcnt; int ic_bwritecnt; ushort_t ic_state; char *ic_datap; /* pointer to iclog data */ -} xlog_iclog_fields_t; +#ifdef XFS_LOG_TRACE + struct ktrace *ic_trace; +#endif + + /* Callback structures need their own cacheline */ + spinlock_t ic_callback_lock ____cacheline_aligned_in_smp; + xfs_log_callback_t *ic_callback; + xfs_log_callback_t **ic_callback_tail; + + /* reference counts need their own cacheline */ + atomic_t ic_refcnt ____cacheline_aligned_in_smp; +} xlog_iclog_fields_t ____cacheline_aligned_in_smp; typedef union xlog_in_core2 { xlog_rec_header_t hic_header; @@ -366,6 +384,7 @@ typedef struct xlog_in_core { #define ic_bp hic_fields.ic_bp #define ic_log hic_fields.ic_log #define ic_callback hic_fields.ic_callback +#define ic_callback_lock hic_fields.ic_callback_lock #define ic_callback_tail hic_fields.ic_callback_tail #define ic_trace hic_fields.ic_trace #define ic_size hic_fields.ic_size