--- /dev/null
+#!/bin/sh
+#
+# Copyright (c) 2016 Intel Corporation. All rights reserved.
+#
+# This Software is licensed under one of the following licenses:
+#
+# 1) under the terms of the "Common Public License 1.0" a copy of which is
+# in the file LICENSE.txt in the root directory. The license is also
+# available from the Open Source Initiative, see
+# http://www.opensource.org/licenses/cpl.php.
+#
+# 2) under the terms of the "The BSD License" a copy of which is in the file
+# LICENSE2.txt in the root directory. The license is also available from
+# the Open Source Initiative, see
+# http://www.opensource.org/licenses/bsd-license.php.
+#
+# 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+# copy of which is in the file LICENSE3.txt in the root directory. The
+# license is also available from the Open Source Initiative, see
+# http://www.opensource.org/licenses/gpl-license.php.
+#
+# Licensee has the right to choose one of the above licenses.
+#
+# Redistributions of source code must retain the above copyright
+# notice and one of the license notices.
+#
+# Redistributions in binary form must reproduce both the above copyright
+# notice, one of the license notices in the documentation
+# and/or other materials provided with the distribution.
+#
+# Test Suite to test uDAPL Providers and CCL Proxy on MICs and Hosts
+#
+# Sample Usage, all providers, one loop, fast:
+#
+# ./dtest_suite.sh -P ALL -l 1 -f
+#
+
+### --- user input section --- ###
+server_list="cst-kc1 cst-kc1-mic0 cst-kc1-mic1"
+client_list="cst-kc2 cst-kc2-mic0 cst-kc2-mic1 cst-kc1 cst-kc1-mic0 cst-kc1-mic1"
+### --- dtest test cases fine tune zone --- ###
+# Note: value zero indicacte dtest will use the test default value
+b_options="0 1 4096"
+u_options="0 1"
+w_options="0 1"
+S_options="0 9"
+B_options="0 1"
+D_options="0 1"
+W_options="0"
+# test defaults
+def_provider="ofa-v2-mlx4_0-1u"
+dat_conf="/etc/dat.conf"
+### --- End of user input section --- ###
+
+script_version="1.05"
+
+# History log
+# 1.05 - Disable data validation mode when using scif provider
+# From: Amir Hanania <amir.hanania@intel.com>
+# 1.04 - Add data validation for dtest ping pong
+# Add option not to use CPU mask in performance test
+# From: Amir Hanania <amir.hanania@intel.com>
+# 1.03 - Add dapl tests
+# From: Amir Hanania <amir.hanania@intel.com>
+# 1.02 - Change performane test to use dtest -W case for latency.
+# Note: You must have a dtesr version that support -W to run performane test.
+# From: Amir Hanania <amir.hanania@intel.com>
+# 1.01 - Add multi provider test
+# From: Amir Hanania <amir.hanania@intel.com>
+# 1.00 - Initial Version
+# From: Amir Hanania <amir.hanania@intel.com>
+# Test script to test dapl.
+# Run dtest test in multiple options.
+# Notes:
+# 1. For performance test. Same dtest configuration is used twice.
+# Once with -W for latency and once without for BW.
+#
+
+user_provider=$def_provider
+server_client_list=$server_list" "$client_list
+host_list=`for i in $server_client_list; do echo $i | awk -F "-mic" '{ print $1 }'; done | sort | uniq`
+provider_search_debug=0
+dapl_test_user_input="y"
+ran_one_dapltest=0
+dapl_test_rep_max=100
+dapl_test_rep=$dapl_test_rep_max
+mfo_test=0
+fast_test=0
+fast_test_str=""
+perf_test=0
+no_inline_data=0
+debug_info=0
+v_for_test=""
+user_srting=""
+ctrl_c=0
+runs=0
+max_run_time=0
+dapl_mtu=0
+loops=0
+log_file_dir="dtest_perf_logs"
+log_file="$log_file_dir/dtest_performance_"
+unidirection_test=0
+cpu_mask="no_cpu_mask"
+user_b_options="none"
+dog_file=/tmp/dog.log
+dog_ser=/tmp/dog.ser
+dog_cli=/tmp/dog.cli
+i=1
+while [ $i -lt 5000000 ]; do
+ b_options_for_perf_test+=" $i"
+ i=$(( $i*2 ))
+done
+mkdir -p $log_file_dir
+
+control_c()
+# run if user hits control-c
+{
+ echo -en "\n*** ^c ***\n"
+ if [ $ctrl_c -ne 0 ]; then
+ echo -ne "\n*** Forced EXIT! ***\n\n"
+ for s in $server_list; do
+ ssh root@$s "killall dtest" > /dev/null 2>&1
+ ssh root@$s "killall dapltest" > /dev/null 2>&1
+ done
+ for c in $client_list; do
+ ssh root@$c "killall dtest" > /dev/null 2>&1
+ ssh root@$c "killall dapltest" > /dev/null 2>&1
+ done
+ exit 1
+ fi
+ let "ctrl_c+=1"
+ echo -en "\n*** Will break after this test case ***\n\n"
+}
+
+# trap keyboard interrupt (control-c)
+trap control_c SIGINT
+
+exit_control()
+{
+ # if dog killed us. Clean up the dtest still working.
+ for s in $server_list; do
+ ssh root@$c "killall dtest" > /dev/null 2>&1
+ done
+ for c in $client_list; do
+ ssh root@$c "killall dtest" > /dev/null 2>&1
+ done
+
+ echo "2" > $dog_file
+ sleep 2
+ #kill dog
+ # jobs -p | xargs kill
+}
+# trap exit to kill dog when script exit
+#trap 'jobs -p | xargs kill' EXIT
+trap exit_control EXIT
+
+function dog(){
+ while true; do
+ val=`cat $dog_file`
+ if [ $val -eq 2 ]; then
+ exit
+ fi
+ if [ $val -eq 1 ]; then
+ server=`cat $dog_ser`
+ client=`cat $dog_cli`
+ server_err=`ssh root@$server "cat /tmp/dtest_ser_run.log | grep -c ERR"`
+ client_err=`ssh root@$client "cat /tmp/dtest_cli_run.log | grep -c ERR"`
+ server_fail=`ssh root@$server "cat /tmp/dtest_ser_run.log | grep -c FAIL"`
+ client_fail=`ssh root@$client "cat /tmp/dtest_cli_run.log | grep -c FAIL"`
+ if [ $server_err -gt 0 ] || [ $client_err -gt 0 ] || [ $server_fail -gt 0 ] || [ $client_fail -gt 0 ]; then
+ sleep 2
+ echo -e "\n\n\twatchdog bark - validation test failed\n\n"
+ killall ${0##*/}
+ fi
+ echo -n "."
+ fi
+ sleep 1
+ done
+}
+
+function wait_for_server_to_be_ready(){
+ i=99
+ echo -ne "Waiting to servers to come up... $i \r"
+ until [ $i -eq 0 ]; do
+ up=0
+ file_found="NOT found"
+ ssh root@$server [ -f /tmp/dtest_ser_run.log ] && file_found="file found"
+ if [ "$file_found" == "file found" ]; then
+ up=`ssh root@$server "cat /tmp/dtest_ser_run.log | grep -c waiting"`
+ fi
+ if [ $up -eq 1 ]; then
+ break;
+ fi
+ let "i = i - 1"
+ echo -ne "Waiting to servers to come up... $i \r"
+ sleep 0.1
+ done
+}
+
+
+u=0
+w=0
+B=0
+b=0
+S=0
+D=0
+
+function testcase(){
+ # Setting the dtest options
+ if [ $u -ne 0 ]; then
+ u_for_test="-u"
+ else
+ u_for_test=""
+ fi
+ if [ $w -ne 0 ]; then
+ w_for_test="-w"
+ else
+ w_for_test=""
+ fi
+ if [ $B -ne 0 ]; then
+ B_for_test="-B $B"
+ else
+ B_for_test=""
+ fi
+ if [ $b -ne 0 ]; then
+ b_for_test="-b $b"
+ else
+ b_for_test=""
+ fi
+ if [ $S -ne 0 ]; then
+ S_for_test="-S $S"
+ else
+ S_for_test=""
+ fi
+ if [ $W -ne 0 ]; then
+ W_for_test="-W"
+ else
+ W_for_test=""
+ fi
+ if [ $D -ne 0 ]; then
+ if [ $do_not_validate_data_with_scif -eq 1 ]; then
+ return 0
+ fi
+ D_for_test="-D -a -B 10"
+ else
+ D_for_test=""
+ fi
+
+ if [ $ctrl_c -ne 0 ]; then
+ echo -ne "\n*** Stop test due to ctrl c ***\n\n"
+ exit 1
+ fi
+
+ # in case the prev test failed. The files will be still there for debug. Delete them for the new run.
+ ssh root@$server "rm /tmp/dtest_ser_run.log" > /dev/null 2>&1
+ ssh root@$client "rm /tmp/dtest_cli_run.log" > /dev/null 2>&1
+
+ if [ $D -eq 1 ]; then
+ support_data_validation
+ if [ $dtest_support_data_val -ne 1 ]; then
+ return
+ fi
+ fi
+
+ #Start the server
+ echo "----------------------------------------------------------"
+ echo "Test case: $W_for_test $D_for_test $u_for_test $w_for_test $B_for_test $b_for_test $S_for_test $v_for_test $user_srting"
+ echo -ne "Start $taskset_4_server dtest -P $provider server $server\r"
+ ssh root@$server "$export_str $taskset_4_server dtest -P $provider $W_for_test $u_for_test $w_for_test $B_for_test $b_for_test $S_for_test $v_for_test $user_srting $D_for_test >& /tmp/dtest_ser_run.log" &
+ ser_pid=$!
+
+ # Wait for server to be ready
+ wait_for_server_to_be_ready
+
+ if [ $i -eq 0 ]; then
+ echo $server dtest failed - did not start
+ ssh root@$server "killall dtest"
+ ssh root@$client "killall dtest"
+ exit 1
+ fi
+
+ # Start client
+ echo -ne "Start $taskset_4_client dtest -P $provider client \r"
+ ssh root@$client "$export_str $taskset_4_client dtest -P $provider -h $server $W_for_test $u_for_test $w_for_test $B_for_test $b_for_test $S_for_test $v_for_test $user_srting $D_for_test >& /tmp/dtest_cli_run.log" &
+ cli_pid=$!
+
+ if [ $D -eq 1 ]; then
+ echo $server > $dog_ser
+ echo $client > $dog_cli
+ echo "1" > $dog_file
+ fi
+
+ # Wait for Server and Client to be done
+ wait $ser_pid $cli_pid
+
+ if [ $D -eq 1 ]; then
+ echo "0" > $dog_file
+ fi
+
+ # Check results from log files
+ server_pass=`ssh root@$server "cat /tmp/dtest_ser_run.log | grep -c PASSED"`
+ client_pass=`ssh root@$client "cat /tmp/dtest_cli_run.log | grep -c PASSED"`
+ server_err=`ssh root@$server "cat /tmp/dtest_ser_run.log | grep -c ERR"`
+ client_err=`ssh root@$client "cat /tmp/dtest_cli_run.log | grep -c ERR"`
+ do_exit=0
+ if [ $ctrl_c -ne 0 ]; then
+ ssh root@$server "killall -9 dtest" > /dev/null 2>&1
+ ssh root@$client "killall -9 dtest" > /dev/null 2>&1
+ do_exit=1
+ fi
+
+ if [ $server_pass -ne 1 ] || [ $server_err -ne 0 ]; then
+ echo "****** ERROR - $server server failed (with $client client) *******"
+ echo " log file: /tmp/dtest_ser_run.log on $server"
+ do_exit=1
+ fi
+
+ if [ $client_pass -ne 1 ] || [ $client_err -ne 0 ]; then
+ echo "****** ERROR - $client client failed (with $server server) *******"
+ echo " log file: /tmp/dtest_cli_run.log on $client"
+ do_exit=1
+ fi
+
+ if [ $do_exit -eq 1 ]; then
+ echo
+ exit 1
+ fi
+
+ # Print to screen or file the results if needed
+ if [ $perf_test -eq 1 ]; then
+ echo -ne " \r"
+ if [ $fast_test -eq 1 ]; then
+ if [ $W -ne 0 ]; then
+ # second run is latency test called with -W
+ lat=`ssh root@$client cat /tmp/dtest_cli_run.log | grep PingPong | awk -F "latency " '{print $2}' | awk -F " us" '{ print $1 }'`
+ res="$lat, Tx size=$res"
+ echo "latency: $res"
+ echo $res >> $log_file
+ else
+ # First test for BW
+ res=`ssh root@$client cat /tmp/dtest_cli_run.log | grep direction | awk -F "00 x " '{ print $2 }'`
+ fi
+ else
+ if [ $W -ne 0 ]; then
+ # second run is latency test called with -W
+ lat=`ssh root@$client cat /tmp/dtest_cli_run.log | grep PingPong | awk -F "latency " '{print $2}' | awk -F " us" '{ print $1 }'`
+ echo -e "Byte size: $b\t\tlatency: $lat\t\tBW: $res"
+ res=`echo $res | awk -F " MB" '{ print $1 }'`
+ res=$(printf "%15s" $res)
+ lat=$(printf "%10s" $lat)
+ echo -e "$b\t\t$lat\t\t$res" >> $log_file
+ else
+ # First test for BW
+ res=`ssh root@$client cat /tmp/dtest_cli_run.log | grep direction | awk -F "00 x $b, " '{ print $2 }'`
+ fi
+ fi
+ fi
+ ssh root@$server "rm /tmp/dtest_ser_run.log"
+ ssh root@$client "rm /tmp/dtest_cli_run.log"
+
+ echo "Test case passed "
+
+ read -t 0.01 -n 1 -s u_input
+ ret=$?
+ if [ $ret -eq 0 ] && [ "$u_input" == "i" ]; then
+ print_round_info
+ fi
+
+ return 0
+}
+
+
+function wait_for_it(){
+ max_wait=900
+ i=$max_wait
+ sleep_for=0.1
+ test_start_time=`date +%s`
+ until [ $i -eq 0 ]; do
+ echo -n "."
+ sleep $sleep_for
+ up=`ssh root@$wait_for_it_machine cat $wait_for_it_file | grep -c "$wait_for_it_string"`
+ if [ $up -eq 1 ]; then
+ break;
+ fi
+ let "i = i - 1"
+ if [ $ctrl_c -ne 0 ]; then
+ i=0
+ fi
+ if [ $i -eq $(( $max_wait - 20 )) ]; then
+ sleep_for=1
+ fi
+ if [ $i -eq $(( $max_wait - 40 )) ]; then
+ sleep_for=3
+ fi
+ done
+
+ if [ $i -eq 0 ]; then
+ if [ $ctrl_c -ne 0 ]; then
+ echo -ne "\n\t*** Stop test due to ctrl c ***\n\n"
+ else
+ echo " failed"
+ echo -e "\n\n\tDid not find $wait_for_it_string string on machine: $wait_for_it_machine at file $wait_for_it_file - EXIT\n\n"
+ fi
+ ssh root@$server killall dapltest > /dev/null 2>&1
+ ssh root@$client killall dapltest > /dev/null 2>&1
+ exit
+ fi
+ test_end_time=`date +%s`
+ test_run_time=$(($test_end_time-$test_start_time))
+ echo " done in $test_run_time sec"
+}
+
+
+function print_round_info(){
+ now=`date +%s`
+ run_time=$(($now-$start_time))
+ ss=$(($run_time%60))
+ mm=$(($run_time/60))
+ mm=$(($mm%60))
+ hh=$(($run_time/3600))
+ echo "**************************************************************"
+ echo -e "\tin round $runs - $hh h $mm m $ss s"
+ echo "**************************************************************"
+}
+
+
+# Check if client and server dtest support data validation
+function support_data_validation() {
+ dtest_support_data_val=0
+
+ ssh root@$server "dtest -U >& /tmp/dtest_ser_run.log"
+ ssh root@$client "dtest -U >& /tmp/dtest_cli_run.log"
+ sleep .1
+ ser_is_valid=`ssh root@$server cat /tmp/dtest_ser_run.log | grep -c "validate data"`
+ if [ $ser_is_valid -ne 1 ]; then
+ return 0
+ fi
+ cli_is_valid=`ssh root@$client cat /tmp/dtest_cli_run.log | grep -c "validate data"`
+ if [ $cli_is_valid -ne 1 ]; then
+ return 0
+ fi
+ dtest_support_data_val=1
+}
+
+
+# Run dtest in all data size ping pong test with data validation mode between client and server
+function server_client_data_validation_test(){
+
+ echo -e "\n\n\n\t**** dtest data validation test\t\tprovider: $provider\t\tserver: $server $taskset_4_server\t\tclient: $client $taskset_4_client ****\n"
+ support_data_validation
+ if [ $dtest_support_data_val -ne 1 ]; then
+ echo -e "\t**** $client or $server dtest does not support data validation - skipping ****"
+ return
+ fi
+
+ echo -e " Start $taskset_4_server dtest -P $provider -D -a on server $server"
+ ssh root@$server "$export_str $taskset_4_server dtest -P $provider -D -a -B 100 >& /tmp/dtest_ser_run.log" &
+ ser_pid=$!
+ wait_for_server_to_be_ready
+
+ echo -e " Start $taskset_4_client dtest -P $provider -D -a on client $client"
+ ssh root@$client "$export_str $taskset_4_client dtest -P $provider -h $server -D -a -B 100 >& /tmp/dtest_cli_run.log" &
+ cli_pid=$!
+ # just wait a bit for files on server and clien be ready before waking up the dog
+ sleep 1
+
+ echo $server > $dog_ser
+ echo $client > $dog_cli
+ echo "1" > $dog_file
+
+ # Wait for Server and Client to be done
+ wait $ser_pid $cli_pid
+
+ echo "0" > $dog_file
+ echo
+ # Check results from log files
+ server_pass=`ssh root@$server "cat /tmp/dtest_ser_run.log | grep -c PASSED"`
+ client_pass=`ssh root@$client "cat /tmp/dtest_cli_run.log | grep -c PASSED"`
+ server_err=`ssh root@$server "cat /tmp/dtest_ser_run.log | grep -c ERR"`
+ client_err=`ssh root@$client "cat /tmp/dtest_cli_run.log | grep -c ERR"`
+ do_exit=0
+ if [ $ctrl_c -ne 0 ]; then
+ ssh root@$server "killall -9 dtest" > /dev/null 2>&1
+ ssh root@$client "killall -9 dtest" > /dev/null 2>&1
+ do_exit=1
+ fi
+
+ if [ $server_pass -ne 1 ] || [ $server_err -ne 0 ]; then
+ echo "****** ERROR - $server server failed (with $client client) *******"
+ echo " log file: /tmp/dtest_ser_run.log on $server"
+ do_exit=1
+ fi
+
+ if [ $client_pass -ne 1 ] || [ $client_err -ne 0 ]; then
+ echo "****** ERROR - $client client failed (with $server server) *******"
+ echo " log file: /tmp/dtest_cli_run.log on $client"
+ do_exit=1
+ fi
+
+ if [ $do_exit -eq 1 ]; then
+ echo
+ exit 1
+ fi
+
+ echo -e "\n\tdtest data validation test\t\tserver: $server\t\tclient: $client\t\tprovider: $provider\t\tTEST PASSED\n\n"
+
+}
+
+
+# Run dapltest between client and server
+function server_client_dapl_test(){
+ ofa_post=""
+ dapl_test_rep=$dapl_test_rep_max
+ if [ $ctrl_c -ne 0 ]; then
+ echo -ne "\n*** Stop test due to ctrl c ***\n\n"
+ exit 1
+ fi
+
+ echo "----------------------------------------------------------"
+ echo -ne "\t**** dapltest\t\tprovider: $provider\t\tserver: $server\t\tclient: $client "
+
+ # in case the prev test failed. The files will be still there for debug. Delete them for the new run.
+ ssh root@$server "rm /tmp/dapltest_ser_run.log" > /dev/null 2>&1
+ ssh root@$client "rm /tmp/dapltest_cli_run.log" > /dev/null 2>&1
+
+ # 1. skip if roc
+ # 2. check that provider is ofa or scm
+ is_roe=`echo $provider | grep -c roe`
+ if [ $is_roe -eq 1 ]; then
+ good_provider_for_dapltest=0
+ echo -e " - provider $provider not supported - skipping ****"
+ echo "----------------------------------------------------------"
+ return 0
+ fi
+ is_ofa=`ssh root@$server cat $dat_conf | grep $provider | grep -c libdaplofa`
+ is_scm=`ssh root@$server cat $dat_conf | grep $provider | grep -c libdaploscm`
+ if [ $is_ofa -eq 0 ] && [ $is_scm -eq 0 ]; then
+ good_provider_for_dapltest=0
+ echo -e " - provider $provider not supported - skipping ****"
+ echo "----------------------------------------------------------"
+ return 0
+ fi
+ if [ $is_ofa -eq 1 ]; then
+ dat_line=`ssh root@$server cat $dat_conf | grep $provider`
+ ofa_post=`echo $dat_line | grep lofa | awk '{ print $1 }' | awk -F "ofa-v2" '{ print $2 }'`
+ fi
+ ran_one_dapltest=1
+
+ # start server
+ wait_for_it_machine=$server
+ wait_for_it_file="/tmp/dapltest_ser_run.log"
+ wait_for_it_string="Dapltest: Service Point Ready"
+ echo -e " ****\n----------------------------------------------------------"
+ echo -e "dapltest\tprovider: $provider\tserver: $server\tclient: $client"
+ echo -ne "start dapltest server..."
+ ssh root@$server "dapltest -T S -D $provider >& /tmp/dapltest_ser_run.log" &
+ wait_for_it
+
+ # tests
+ wait_for_it_machine=$client
+ wait_for_it_file="/tmp/dapltest_cli_run.log"
+ wait_for_it_string="Total WQE"
+ # test 1
+ echo -ne "start dapltest client test 1 ..."
+ ssh root@$client "dapltest -T T -s $server$ofa_post -D $provider -i $dapl_test_rep -t 1 -w 1 client SR 256 server SR 256 >& /tmp/dapltest_cli_run.log" &
+ wait_for_it
+
+ if [ $fast_test -eq 0 ]; then
+ # test 2
+ if [ $dapl_test_rep -ne 1 ] && [ $test_run_time -ge 4 ]; then
+ dapl_test_rep=$(($dapl_test_rep/$test_run_time/8))
+ if [ $dapl_test_rep -eq 0 ]; then
+ dapl_test_rep=1
+ fi
+ echo Reduce rep to $dapl_test_rep
+ fi
+ echo -ne "start dapltest client test 2 ..."
+ ssh root@$client "rm /tmp/dapltest_cli_run.log" > /dev/null 2>&1
+ ssh root@$client "dapltest -T T -s $server$ofa_post -D $provider -i $dapl_test_rep -t 1 -w 1 client SR 256 server RW 4096 server SR 256 >& /tmp/dapltest_cli_run.log" &
+ wait_for_it
+
+ # test 3
+ echo -ne "start dapltest client test 3 ..."
+ ssh root@$client "rm /tmp/dapltest_cli_run.log" > /dev/null 2>&1
+ ssh root@$client "dapltest -T T -s $server$ofa_post -D $provider -i $dapl_test_rep -t 1 -w 1 client SR 256 server RR 4096 server SR 256 >& /tmp/dapltest_cli_run.log" &
+ wait_for_it
+
+ # test 4
+ echo -ne "start dapltest client test 4 ..."
+ ssh root@$client "rm /tmp/dapltest_cli_run.log" > /dev/null 2>&1
+ ssh root@$client "dapltest -T T -s $server$ofa_post -D $provider -i $dapl_test_rep -t 1 -w 1 client SR 256 server RW 4096 server SR 256 client SR 256 server RW 4096 server SR 256 client SR 4096 server SR 256 >& /tmp/dapltest_cli_run.log" &
+ wait_for_it
+
+ # test 5
+ if [ $dapl_test_rep -ne 1 ] && [ $test_run_time -ge 2 ]; then
+ dapl_test_rep=$(($dapl_test_rep/8))
+ if [ $dapl_test_rep -eq 0 ]; then
+ dapl_test_rep=1
+ fi
+ echo Reduce rep to $dapl_test_rep
+ fi
+ echo -ne "start dapltest client test 5 ..."
+ ssh root@$client "rm /tmp/dapltest_cli_run.log" > /dev/null 2>&1
+ ssh root@$client "dapltest -T T -s $server$ofa_post -D $provider -i $dapl_test_rep -t 1 -w 8 client SR 256 server RW 4096 server SR 256 client SR 256 server RW 4096 server SR 256 client SR 4096 server SR 256 >& /tmp/dapltest_cli_run.log" &
+ wait_for_it
+
+ if [ $dapl_test_rep -ne 1 ] && [ $test_run_time -ge 2 ]; then
+ dapl_test_rep=$(($dapl_test_rep/4))
+ if [ $dapl_test_rep -eq 0 ]; then
+ dapl_test_rep=1
+ fi
+ echo Reduce rep to $dapl_test_rep
+ fi
+ # test 6
+ echo -ne "start dapltest client test 6 ..."
+ ssh root@$client "rm /tmp/dapltest_cli_run.log" > /dev/null 2>&1
+ ssh root@$client "dapltest -T T -s $server$ofa_post -D $provider -i $dapl_test_rep -t 4 -w 8 client SR 256 server RW 4096 server SR 256 client SR 256 server RW 4096 server SR 256 client SR 4096 server SR 256 >& /tmp/dapltest_cli_run.log" &
+ wait_for_it
+ fi
+
+ # stop server
+ echo -n "stop dapltest server..."
+ ssh root@$client "rm /tmp/dapltest_cli_run.log" > /dev/null 2>&1
+ ssh root@$client "dapltest -T Q -s $server$ofa_post -D $provider >& /tmp/dapltest_cli_run.log" &
+ cli_pid=$!
+
+ wait_for_it_machine=$server
+ wait_for_it_file="/tmp/dapltest_ser_run.log"
+ wait_for_it_string="Exiting"
+ echo -n "wait for dapltest server to stop..."
+ wait_for_it
+
+ # Wait for Server and Client to be done
+ wait $cli_pid
+
+ # clean up
+ ssh root@$server "rm /tmp/dapltest_ser_run.log" > /dev/null 2>&1
+ ssh root@$client "rm /tmp/dapltest_cli_run.log" > /dev/null 2>&1
+
+ echo -e "\tdapltest\t\tserver: $server\t\tclient: $client\t\tprovider: $provider\t\tTESTS PASSED"
+ echo -e "----------------------------------------------------------\n"
+}
+
+
+# Run all the test cases between two machines.
+function server_host_test(){
+ taskset_4_server=""
+ taskset_4_client=""
+ if [ $perf_test -eq 1 ]; then
+ is_mic=`echo $server | grep -c mic`
+ if [ $is_mic -eq 0 ] && [ "$cpu_mask" != "no_cpu_mask" ]; then
+ taskset_4_server="taskset $cpu_mask "
+ fi
+ is_mic=`echo $client | grep -c mic`
+ if [ $is_mic -eq 0 ] && [ "$cpu_mask" != "no_cpu_mask" ]; then
+ taskset_4_client="taskset $cpu_mask "
+ fi
+
+ echo -e "\n**** dtest: provider: $provider \tserver: $server \tclient: $client ****\n" >> $log_file
+ if [ $fast_test -eq 0 ]; then
+ echo -e "\nBytes\t\t Latency\t\t\t MB/s" >> $log_file
+ fi
+ fi
+
+ if [ "$dapl_test_user_input" != "o" ]; then
+ echo -e "\n\n\n\t**** dtest\t\tprovider: $provider\t\tserver: $server $taskset_4_server\t\tclient: $client $taskset_4_client ****"
+
+ #set var value to zero in order to use dtest default value for that option.
+ for u in $u_options; do
+ for w in $w_options; do
+ for b in $b_options; do
+ for S in $S_options; do
+ for B in $B_options; do
+ for D in $D_options; do
+ for W in $W_options; do # Always keep last. See Note 1.
+ # Run one test case between Client and Server.
+ testcase
+ ret=$?
+ if [ $ret -ne 0 ]; then
+ echo TEST FAILED
+ exit 1
+ fi
+ sleep 1
+ done
+ done
+ done
+ done
+ done
+ done
+ done
+
+ echo -e "\n\tdtest\t\tserver: $server\t\tclient: $client\t\tprovider: $provider\t\tTEST PASSED\n\n"
+
+ if [ $perf_test -ne 1 ] && [ $do_not_validate_data_with_scif -eq 0 ] && [ $fast_test -ne 1 ]; then
+ server_client_data_validation_test
+ fi
+ fi
+
+ if [ "$dapl_test_user_input" != "n" ] && [ $good_provider_for_dapltest -eq 1 ] && [ $fast_test -ne 1 ]; then
+ server_client_dapl_test
+ fi
+
+}
+
+function help(){
+ echo -e "\n\tRun dtest and dapltest accross cluster - from each client to each server\n"
+ echo -e "\t\tServer list: $server_list"
+ echo -e "\t\tClient list: $client_list\n"
+ echo -e "\t-P <PROVIDER NAME> : Provider name or 'ALL' for all prividers (default $def_provider)"
+ echo -e "\t-f: Fast test"
+ echo -e "\t-l <NUM> : How many test loops to run. Def forever"
+ echo -e "\t-t <NUM> : How many minutes to run. Def forever"
+ echo -e "\t-p <CPUs mask> or \"no_cpu_mask\": Performance test"
+ echo -e "\t\tMask in 0xHEX format. should match host's /sys/class/mic/mic0/device/local_cpus"
+ echo -e "\t\tFor no CPU mask enter \"no_cpu_mask\""
+ echo -e "\t\tConsider also: taskset mpxyd, set mcm_affinity to 2 in /etc/mpxyd.conf, performance mode at the host scaling_governor"
+ echo -e "\t\tConsider also to change DAPL MTU (-M optoin)"
+ echo -e "\t-w: Write only test"
+ echo -e "\t-u: uni-direction only test"
+ echo -e "\t-d <n|y|o> : dapl test options. \"n\": No dapl tests. \"y\": Run dapl tests. \"o\": Run Only dapl tests (no dtest). Def: Run dapl_test"
+ echo -e "\t-M <NUM> : DAPL MTU"
+ echo -e "\t-b <NUM> : data size. Can be: one size, many sizes as a string or type \`all\` for all sizes power of 2"
+ echo -e "\t-U: \"user string\". user dtest option string ( -w -b -u and -S dtest options )"
+ echo -e "\t-z: use zero for -w -b -u and -S dtest options (zero mean test default value)"
+ echo -e "\t-i: No inline data test"
+ echo -e "\t-m: Force MFO test"
+ echo -e "\t-D: DAPL debug print in log files"
+ echo -e "\t-v: dtest verbose mode"
+ echo -e "\t-q: qib test over mlx4 (same as -m and -i options)"
+ echo -e "\t-V: Print the script version"
+ echo -e "\t-h: help"
+ echo -e "\n\tWhile test is running:"
+ echo -e "\t^c: Exit gracefully"
+ echo -e "\t^c^c: Forced exit"
+ echo -e "\ti: Print round number and time duration"
+ echo -e "\n\n"
+ exit 1
+}
+
+
+function log(){
+ if [ $provider_search_debug -eq 1 ]; then
+ echo -e "$@"
+ else
+ echo -n "."
+ fi
+}
+
+
+function providers_search(){
+ echo -e "\nSearching for devices"
+ first_host=1
+ for host in $host_list; do
+ # make sure host dat file exist
+ dat_conf_found="NOT found"
+ ssh root@$host "[ -f $dat_conf ]" && dat_conf_found="dat_conf_found"
+ if [ "$dat_conf_found" == "dat_conf_found" ]; then
+ log "$dat_conf found on $host"
+ else
+ echo -e "\n\t$dat_conf was not found on $host.\n\n"
+ exit 1
+ fi
+
+ #ib devices list
+ dev_list=`ssh root@$host ibv_devices | tail -n +3 | awk '{ print $1 }'`
+ for dev in $dev_list; do
+ # for each device
+ log Found $dev device
+ port_cnt=`ssh root@$host ibv_devinfo -d $dev | grep phys_port_cnt | awk '{print$2 }'`
+ log " $dev phys_port_cnt: $port_cnt"
+ for port in $(seq 1 $port_cnt); do
+ # for each post
+ log " checking $dev port $port status"
+ up=`ssh root@$host ibv_devinfo -d $dev -i $port | grep state | grep -c PORT_ACTIVE`
+ if [ $up -ne 1 ]; then
+ log " $dev port $i is not active"
+ continue
+ fi
+ log " $dev port $port is active"
+ log " add it to the list"
+ # get a list of providers that this device can use
+ providers+=`ssh root@$host cat $dat_conf | grep "$dev $port" | awk '{ print $1 }'`
+ providers+=" "
+ done
+ done
+
+ #add network ib devices
+ net_dev_list=` ssh root@$host netstat -i | grep -v "no statistics available" | tail -n +3 | awk '{ print $1 }'`
+ for net_dev in $net_dev_list; do
+ # for each net device
+ log Found $net_dev net device
+ is_ib=`ssh root@$host ip addr show $net_dev | grep -c infiniband`
+ if [ $is_ib -ne 1 ]; then
+ log " $net_dev net device is not ib device"
+ continue
+ fi
+ log " $net_dev is infiniband device"
+ has_ip_addr=`ssh root@$host ip addr show $net_dev | grep inet | grep -vc inet6`
+ if [ $has_ip_addr -ne 1 ]; then
+ log " $net_dev net device has no ip addr"
+ continue
+ fi
+ log " $net_dev net device has IP address"
+ log " add it to the list"
+ # get a list of providers that this device can use
+ providers+=`ssh root@$host cat $dat_conf | grep "$net_dev 0" | awk '{ print $1 }'`
+ providers+=" "
+ done
+
+ log; log -n "$host povider list: "; for i in $providers; do log -n "$i "; done; log
+ if [ $first_host -eq 1 ]; then
+ # just save providers from first host
+ hosts_providers_list=$providers
+ first_host=0
+ else
+ # Merge providers from prev hosts with the one from the new host
+ # Keep only the providers that are on both lists
+ log hosts p from prev hosts: $hosts_providers_list
+ hosts_providers_list+=$providers
+ hosts_providers_list=`for p in $hosts_providers_list; do echo $p; done | sort | uniq -d`
+ log hosts p after merge: $hosts_providers_list
+ fi
+ providers=""
+ done
+ cnt=0
+ echo -e "\nPovider list:"
+ for i in $hosts_providers_list; do
+ echo $i
+ let cnt+=1
+ done
+ if [ $cnt -eq 0 ]; then
+ echo -e "no devices where found\n\n"
+ exit
+ fi
+ echo -e "Total $cnt providers\n\n"
+}
+
+
+# check if the "server-client-provider" combination is OK
+# Set server_client_provider_is_not_valid_combo to one if not a valid combo
+function check_provider_server_client_combo(){
+ server_client_provider_is_not_valid_combo=0
+ #check the following:
+ # 1. scif providers can only run on the same machine.
+ is_scif=`echo $provider | grep -c scif`
+ if [ $is_scif -eq 1 ]; then
+ server_host=`echo $server | awk -F "-mic" '{ print $1 }'`
+ client_host=`echo $client | awk -F "-mic" '{ print $1 }'`
+ if [ $server_host == $client_host ]; then
+ return
+ else
+ server_client_provider_is_not_valid_combo=1
+ return
+ fi
+ fi
+ # 2. MIC qib can only run mcm provider
+ is_ser_mic=`echo $server | grep -c mic`
+ is_cli_mic=`echo $client | grep -c mic`
+ if [ $is_ser_mic -eq 1 ] || [ $is_cli_mic -eq 1 ]; then
+ # MIC Server or Client
+ is_qib_provider=`echo $provider | grep -c qib`
+ if [ $is_qib_provider -eq 1 ]; then
+ # Server or Client is MIC AND qib provider - make sure provider is MCM
+ is_mcm=`echo $provider | grep -c m`
+ if [ $is_mcm -eq 1 ]; then
+ return
+ else
+ server_client_provider_is_not_valid_combo=1
+ return
+ fi
+ fi
+ fi
+ # 3. check if MICs ib interface is UP
+ is_ib_provider=`echo $provider | grep -ce -ib`
+ if [ $is_ib_provider -eq 1 ]; then
+ interface=`echo $provider | awk -F "ofa-v2-" '{ print $2 }'`
+ if [ $is_ser_mic -eq 1 ]; then
+ up=`ssh root@$server ifconfig | grep -c $interface`
+ if [ $up -eq 1 ]; then
+ return
+ else
+ server_client_provider_is_not_valid_combo=1
+ return
+ fi
+ fi
+ if [ $is_cli_mic -eq 1 ]; then
+ up=`ssh root@$client ifconfig | grep -c $interface`
+ if [ $up -eq 1 ]; then
+ return
+ else
+ server_client_provider_is_not_valid_combo=1
+ return
+ fi
+ fi
+ fi
+}
+
+
+
+
+
+
+while getopts uviVzDmfwhiql:t:P:U:p:d:M:b: option
+do
+ case "${option}"
+ in
+ P) user_provider=${OPTARG};;
+ m) no_inline_data=1 ; mfo_test=1;;
+ f) fast_test=1; loops=1; fast_test_str=" fast test";;
+ p) cpu_mask=${OPTARG}; perf_test=1; W_options="0 1";;
+ U) user_srting=${OPTARG}; b_options="0"; u_options="0"; S_options="0"; w_options="0"; B_options="0";;
+ z) b_options="0"; u_options="0"; S_options="0"; w_options="0"; B_options="0";;
+ w) w_options="1";;
+ u) unidirection_test=1; u_options="1";;
+ D) debug_info=1;;
+ d) dapl_test_user_input=${OPTARG};;
+ v) v_for_test=" -v ";;
+ i) no_inline_data=1;;
+ q) no_inline_data=1 ; mfo_test=1;;
+ t) max_run_time=${OPTARG};;
+ M) dapl_mtu=${OPTARG};;
+ l) loops=${OPTARG};;
+ b) user_b_options=${OPTARG};;
+ V) echo -e "\n\t${0##*/} version $script_version\n\n"; exit;;
+ h) help;;
+ esac
+done
+
+if [ $fast_test -eq 1 ]; then
+ b_options="0"; u_options="0"; S_options="0"; w_options="0"; B_options="0"; D_options="0";
+fi
+
+if [ $perf_test -eq 1 ]; then
+ b_options=$b_options_for_perf_test; u_options="0"; S_options="0"; loops=1; w_options="1"; B_options="0"; user_srting="$user_string -p";dapl_test_user_input="n"; D_options="0";
+ legit_input=`echo $cpu_mask | grep -ci 0x`
+ if [ $legit_input -ne 1 ] && [ "$cpu_mask" != "no_cpu_mask" ]; then
+ echo -e "\n\t< 0xCPUs_mask > or \"no_cpu_mask\" in option -p is missing - input=$cpu_mask - Exit\n\n"
+ exit
+ fi
+fi
+
+if [ $fast_test -eq 1 ] && [ $perf_test -eq 1 ]; then
+ b_options="0"
+fi
+
+if [ $unidirection_test -eq 1 ]; then
+ u_options="1"
+fi
+
+if [ "$user_b_options" != "none" ]; then
+ if [ "$user_b_options" == "all" ]; then
+ b_options=$b_options_for_perf_test
+ else
+ b_options="$user_b_options"
+ fi
+fi
+
+if [ "$dapl_test_user_input" != "n" ] && [ "$dapl_test_user_input" != "y" ] && [ "$dapl_test_user_input" != "o" ]; then
+ echo -e "\n\tdapl test option must be n/y/o - Exit\n\n"
+ exit
+fi
+
+# check mpxyd is running on host machines.
+for host in $host_list; do
+ up=`ssh root@$host "ps ax | grep -c mpxyd"`
+ if [ $up -ne 3 ]; then
+ echo -e "\n\tERROR - mpxyd is not running on $host\n\n"
+ exit
+ fi
+ if [ $no_inline_data -eq 1 ]; then
+ up=`ssh root@$host cat /var/log/mpxyd.log | grep -c "RDMA IB inline threshold 0"`
+ if [ $up -ne 1 ]; then
+ echo on host $host you need to run mpxyd with mcm_ib_inline 0 in /etc/mpxyd.conf file for no inline data test
+ exit 1
+ fi
+ fi
+done
+
+if [ $user_provider == "ALL" ] || [ $user_provider == "all" ]; then
+ providers_search
+else
+ hosts_providers_list=$user_provider
+fi
+
+echo -e "\nServer list: $server_list"
+echo -e "Client list: $client_list"
+echo -e "Host list:"
+for i in $host_list; do
+ echo $i
+done
+echo
+
+if [ $mfo_test -eq 1 ]; then
+ export_str="export DAPL_MAX_INLINE=0 ; export DAPL_MCM_MFO=1 ; "
+ echo -ne "\n\t\t**** Running MFO test case \t\t$export_str ****\n\n"
+elif [ $no_inline_data -eq 1 ]; then
+ export_str="export DAPL_MAX_INLINE=0 ; "
+ echo -ne "\n\t\t**** Running no inline data test case \t\t$export_str ****\n\n"
+else
+ export_str=""
+fi
+
+if [ $debug_info -eq 1 ]; then
+ export_str="$export_str export DAPL_DBG_TYPE=0xffffffff ; "
+ echo -ne "\n\t\t**** Running in debug mode\t\texport value: $export_str ****\n\n"
+fi
+if [ $dapl_mtu -ne 0 ]; then
+ export_str="$export_str export DAPL_IB_MTU=$dapl_mtu ; "
+ echo -ne "\n\t\t**** Setting DAPL_IB_MTU to $dapl_mtu \t\texport value: $export_str ****\n\n"
+fi
+
+if [ $loops -ne 0 ]; then
+ echo -e "\n\tRunning$fast_test_str for $loops iterations"
+fi
+
+if [ $max_run_time -ne 0 ]; then
+ echo -e "\n\tRunning$fast_test_str for $max_run_time minutes"
+fi
+
+if [ $loops -eq 0 ] && [ $max_run_time -eq 0 ]; then
+ echo -ne "\n\tRunning$fast_test_str forever\n\n"
+fi
+
+if [ $perf_test -eq 1 ]; then
+ if [ $unidirection_test -eq 1 ]; then
+ log_file+="unidirection_test-"
+ else
+ log_file+="bidirection_test-"
+ fi
+ log_file+=`date +%F-%H-%M-%S`
+ echo -e "\n\tRunning performance test with cpu mask: $cpu_mask\n\tOutput file: $log_file"
+ echo "Server list: $server_list" > $log_file
+ echo "Client list: $client_list" >> $log_file
+ echo "CPU mask: $cpu_mask" >> $log_file
+ if [ $dapl_mtu -ne 0 ]; then
+ echo "DAPL_IB_MTU: $dapl_mtu" >> $log_file
+ else
+ echo "DAPL_IB_MTU: Default value" >> $log_file
+ fi
+ if [ $unidirection_test -eq 1 ]; then
+ echo "Test type: unidirection test" >> $log_file
+ else
+ echo "Test type:bidirection test" >> $log_file
+ fi
+ for host in $host_list; do
+ op_poll=`ssh root@$host cat /var/log/mpxyd.log | grep -c "OP thread polling enabled"`
+ if [ $op_poll -ne 1 ]; then
+ echo "OP thread polling on $host: disabled" >> $log_file
+ echo -e "\tOP thread polling on $host: disabled"
+ else
+ echo "OP thread polling on $host: enabled" >> $log_file
+ echo -e "\tOP thread polling on $host: enabled"
+ fi
+ done
+ echo -e "\n\n"
+fi
+echo "0" > $dog_file
+dog &
+
+sleep 1
+start_time=`date +%s`
+
+while [ 1 ]; do
+ now=`date +%s`
+ run_time=$(($now-$start_time))
+ ss=$(($run_time%60))
+ mm=$(($run_time/60))
+ total_run_time_in_min=$mm
+ mm=$(($mm%60))
+ hh=$(($run_time/3600))
+ dd=$(($hh/24))
+ hh=$(($hh%24))
+ pp=$(printf "%d days %d hours %d min and %d sec" $dd $hh $mm $ss)
+
+ echo
+ echo
+ echo "**************************************************************"
+ echo "**************************************************************"
+ echo Run time: $pp
+ if [ $max_run_time -ne 0 ] && [ $total_run_time_in_min -ge $max_run_time ]; then
+ echo -e "Ran for the $max_run_time minute requested by the user - Exiting\n\n"
+ break;
+ fi
+ if [ $loops -ne 0 ] && [ $runs -eq $loops ]; then
+ echo -e "Ran for the $loops iterations requested by the user - Exiting\n\n"
+ break;
+ fi
+ runs=$(( $runs + 1 ))
+ echo Starting round $runs
+ date
+ echo "**************************************************************"
+ echo "**************************************************************"
+ echo
+
+ # Runinng
+ for provider in $hosts_providers_list; do
+ do_not_validate_data_with_scif=`echo $provider | grep -c scif`
+ good_provider_for_dapltest=1
+ for server in $server_list; do
+ for client in $client_list; do
+ check_provider_server_client_combo
+ if [ $server_client_provider_is_not_valid_combo -ne 0 ]; then
+ #echo -e "***** ***** skipping test case: Server:$server Client:$client provider:$provider ***** *****"
+ continue
+ fi
+ # Run all test cases between Client and Server.
+ server_host_test
+ done
+ done
+
+ if [ "$dapl_test_user_input" == "o" ] && [ $ran_one_dapltest -eq 0 ]; then
+ echo -e "\n\n\n\n\t\t***** ***** WARNING: only dapltest was set up but no dapltest was done with $provider provider $export_str ***** *****\n\n"
+ else
+ echo -e "\n\n\n\n\t\t***** ***** server client tests with $provider provider $export_str - TEST PASSED ***** *****\n\n"
+ fi
+ done
+done