static short scif_sport = SCIF_OFED_PORT_8;
static int scif_listen_qlen = 64;
static int mcm_affinity = 1;
-static int mcm_affinity_base = 0;
+static int mcm_affinity_base_mic = 1;
+static int mcm_affinity_base_hca = 1;
static scif_epd_t scif_listen_ep;
static struct scif_portID scif_id;
static char gid_str[INET6_ADDRSTRLEN];
/* scif-rdma cmd and data channel parameters */
static int mix_align = 64;
static int mix_buffer_mb = 128;
-static int mix_buffer_sg = 131072;
+static int mix_buffer_sg = 262144;
static int mix_buffer_sg_cnt = 300;
static int mix_cmd_depth = 50;
static int mix_cmd_size = 256;
static int mix_shared_buffer = 1;
static int mix_max_msg_mb = 64;
static int mix_inline_threshold = 256;
-static int mix_eager_completion = 0;
+static int mix_eager_completion = 1;
static int mcm_ib_inline = 128;
static int mcm_ib_signal_rate = 20;
static int mcm_counters = 0;
mcm_retry = atoi(value);
else if (!strcasecmp("mcm_affinity", opt))
mcm_affinity = atoi(value);
- else if (!strcasecmp("mcm_affinity_base", opt))
- mcm_affinity_base = atoi(value);
+ else if (!strcasecmp("mcm_affinity_base_hca", opt))
+ mcm_affinity_base_hca = atoi(value);
+ else if (!strcasecmp("mcm_affinity_base_mic", opt))
+ mcm_affinity_base_mic = atoi(value);
else if (!strcasecmp("mcm_ib_inline", opt))
mcm_ib_inline = atoi(value);
else if (!strcasecmp("mcm_perf_profile", opt))
mlog(0, "SCIF server_port %d%s\n", scif_sport, scif_sport?"":"(auto)");
mlog(0, "SCIF listen queue length %d\n", scif_listen_qlen);
mlog(0, "CPU affinity enabled %d\n", mcm_affinity);
- mlog(0, "CPU affinity base core_id %d\n", mcm_affinity_base);
+ mlog(0, "CPU affinity base core_id for HCA %d\n", mcm_affinity_base_hca);
+ mlog(0, "CPU affinity base core_id for MIC %d\n", mcm_affinity_base_mic);
mlog(0, "RDMA buffer pool shared %d\n", mix_shared_buffer);
mlog(0, "RDMA buffer pool size %d MB\n", mix_buffer_mb);
mlog(0, "RDMA buffer segment size %d\n", mix_buffer_sg);
/* inline, OP thread posted */
if (m_wr->flags & M_SEND_POSTED) {
- mlog(1, " POSTED: qp %p hd %d tl %d idx %d wr %p wr_id %p, addr %p sz %d sflg 0x%x mflg 0x%x\n",
+ mlog(2, " POSTED: qp %p hd %d tl %d idx %d wr %p wr_id %p, addr %p sz %d sflg 0x%x mflg 0x%x\n",
m_qp, m_qp->wr_hd, m_qp->wr_tl, wr_idx, m_wr,
m_wr->org_id, m_wr->wr.sg_list->addr, m_wr->sg->length,
m_wr->wr.send_flags, m_wr->flags);
m_wr->wr.num_sge, m_wr->sg->length, m_wr->wr.opcode,
m_wr->wr.send_flags, m_wr->wr.imm_data,
m_wr->wr.wr.rdma.remote_addr, m_wr->wr.wr.rdma.rkey, m_wr->flags);
-
}
}
*data += m_qp->wr_pp;
if (mcm_affinity) {
CPU_ZERO( &mc->tx_mask );
- CPU_SET( mcm_affinity_base + mc->scif_id, &mc->tx_mask );
+ CPU_SET( mcm_affinity_base_hca + mc->scif_id, &mc->tx_mask );
if(sched_setaffinity( 0, sizeof(mc->tx_mask), &mc->tx_mask) == -1)
mlog(0, "WARNING: could not set CPU Affinity (%s), continuing...\n", strerror(errno));
}
mlog(0, "TX thread (%x) started for MIC %p node_id %d, CPU_affinity(%s)=%d\n",
pthread_self(), mc, mc->scif_id, mcm_affinity ? "SET":"UNSET",
- mcm_affinity ? (mcm_affinity_base + mc->scif_id):0 );
+ mcm_affinity ? (mcm_affinity_base_hca + mc->scif_id):0 );
while (!finished) {
pthread_mutex_lock(&mc->txlock);
if (mcm_affinity) {
CPU_ZERO( &mc->op_mask );
- CPU_SET( (mcm_affinity_base + mc->scif_id + 1), &mc->op_mask );
+ CPU_SET( (mcm_affinity_base_mic + mc->scif_id + 1), &mc->op_mask );
if(sched_setaffinity( 0, sizeof(mc->op_mask), &mc->op_mask) == -1)
mlog(0, "WARNING: could not set CPU Affinity (%s), continuing...\n", strerror(errno));
}
mlog(0, "OP/CM thread (%x) started for MIC %p node_id %d, CPU_affinity(%s)=%d\n",
pthread_self(), mc, mc->scif_id, mcm_affinity ? "SET":"UNSET",
- mcm_affinity ? (mcm_affinity_base + mc->scif_id + 1):0 );
+ mcm_affinity ? (mcm_affinity_base_mic + mc->scif_id + 1):0 );
/* FD array */
set = mcm_alloc_fd_set();
if (mcm_affinity) {
CPU_ZERO( &mc->cm_mask );
- CPU_SET( mcm_affinity_base + mc->scif_id + 2, &mc->cm_mask );
+ CPU_SET( mcm_affinity_base_hca + mc->scif_id + 2, &mc->cm_mask );
if(sched_setaffinity( 0, sizeof(mc->cm_mask), &mc->cm_mask) == -1)
mlog(0, "WARNING: could not set CPU Affinity (%s), continuing...\n", strerror(errno));
}
mlog(0, "CM thread (%x) started for MIC %p node_id %d, CPU_affinity(%s)=%d\n",
pthread_self(), mc, mc->scif_id, mcm_affinity ? "SET":"UNSET",
- mcm_affinity ? (mcm_affinity_base + mc->scif_id + 2):0 );
+ mcm_affinity ? (mcm_affinity_base_hca + mc->scif_id + 2):0 );
while (!finished) {
fds = 0;
mcm_affinity 1
# mcm_affinity_base:
-# Specifies the starting CPU id base value used for affinity support of
-# all mpxyd data proxy threads. Default = 0, which specifies that
-# TX thread would be on CPU 0, OP thread would be on CPU 1
+# Specifies a hard binding for CPU id base value used for affinity support of
+# hca and mic mpxyd data proxy threads. Default = 0, which specifies use of
+# dynamic affinity binding support via mpxyd. For specific bindings,
+# set to CPU id value desired. For example, mcm_affinity_base_hca = 8,
+# and mcm_affinit_base_mic = 1 would setup the following bindings:
+# MIC dma thread bound to CPU 1 and HCA rdma thread bound to CPU 8
-mcm_affinity_base 0
+mcm_affinity_base_hca 0
+mcm_affinity_base_mic 0
# mcm_depth:
# Specifies the number of request queue entries available for RDMA.
# mcm_signal_rate:
# Specifies the number of request posted before signaling for completions.
-# Larger the value reduces interrupts but could increse reserve times on
+# Larger the value reduces interrupts but could increase reserve times on
# buffer resources.
mcm_signal_rate 100
buffer_pool_mb 128
# buffer_segment_size:
-# Specifies the segment size of large DMA transfers. Default is 1MB.
+# Specifies the segment size of large DMA transfers. Default is 256KB.
# The size must be a multiple of 4096.
-buffer_segment_size 1048576
+buffer_segment_size 262144
# buffer_alignment:
# Specifies the alignment of the RDMA buffer. Default is 64 bytes.
# complete and signal write request after scif_readfrom has moved all data, otherwise wait for IB completions.
# Default is disabled.
-mcm_eager_completion 0
+mcm_eager_completion 1
# mcm_counters
# log and print device CM, DTO, and event counters. Output any non-zero results at device close.