home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
ftp.intel.com
/
2015-02-03.ftp.intel.com.tar
/
ftp.intel.com
/
Pub
/
sites
/
FPD
/
ibsupport
/
scripts
/
sma_pmaquery_lids.sh
< prev
Wrap
Linux/UNIX/POSIX Shell Script
|
2014-06-18
|
6KB
|
190 lines
#!/bin/sh
#####################################################################
# This script queries the InfiniBand fabric for all LIDs then
# attempts to test SMA and PMA responses on each LID. The SMA and
# PMA queries will verify whether all destination LIDs can respond
# to VL0 and VL15 packets.
#####################################################################
# Escape hatch in case the ib commands take too long to run
trap cleanexit INT
function cleanexit()
{ exit
}
Usage()
{
echo "Usage: $0 [-s] [-t] [-v]"
echo " or"
echo " $0 [-h | --help]"
echo " -h|--help: show usage"
echo " -s: queries all switch ports"
echo " -t: includes traceroute information on failed ports"
echo " -q: quiet, only report failures."
echo " -v: verbose, reports all LIDs, not just failures."
echo " Verbose wins over quiet."
echo "This command runs SMA and PMA queries on all LIDs in "
echo "the fabric. The -s, -t and -v options expand the "
echo "queries to include switch ports and tracerouting."
exit
}
QUIET=0 # Set to only report FAILURES
SWITCH=0 # Set to include all switch ports in queries
TRACE=0 # Set to include tracerouting of failed ports
VERBOSE=0 # Set to include verbose reporting of all LIDs
ARGC=$#
ARGV="$*"
for i in $(seq 1 $ARGC)
do
if [ x"$1" = "x--help" -o x"$1" = "x-h" ]
then
Usage
elif [ x"$1" = "x-q" ]
then
QUIET=1
elif [ x"$1" = "x-s" ]
then
SWITCH=1
elif [ x"$1" = "x-t" ]
then
TRACE=1
elif [ x"$1" = "x-v" ]
then
VERBOSE=1
fi
shift
done
if [ ! -x /usr/sbin/saquery -o ! -x /usr/sbin/ibportstate -o ! -x /usr/sbin/perfquery ]
then
echo "This script requires infiniband-diag tools to work properly. Exiting."
exit -1
fi
LIDNODELIST=$(mktemp); if [ $? -ne 0 ]; then echo "Unable to create temporary file in /tmp. Check for permissions or disk space."; exit; fi
FAILED=0 # Flag set if any SMA/PMA failure
SMAFAILED=0 # Flag set if any SMA failure
PMAFAILED=0 # Flag set if any PMA failure
SWITCHCOUNT=0 # Number of switch ASICs in the fabric
HCACOUNT=0 # Number of HCA ports in the fabric
LIDCOUNT=0 # Count how many LIDs actually checked
PORTCOUNT=0 # Count how many ports actually checked
FAILEDLIDS="" # list of failed lids to use for tracert
LIDALREADYFAILED=0 # Use flag to avoid adding LID to FAILEDLIDS list if it fails both SMA and PMA queries
# lid;nodetype;numports;portnum;nodename
#/usr/sbin/saquery |egrep "lid.|NodeDescription|port_num"|sed -e 's/\t\t//'|
/usr/sbin/saquery |egrep "lid.|node_type|num_ports|port_num|NodeDescription"|sed -e 's/\t\t//'| sed -e :a -e '$!N;s/\nnode_type.............../;/;ta' -e'P;D'| sed -e :a -e '$!N;s/\nnum_ports.............../;/;ta' -e'P;D'| sed -e :a -e '$!N;s/\nport_num................/;/;ta' -e'P;D'| sed -e :a -e '$!N;s/\nNodeDescription........./;/;ta' -e'P;D'| sed -e 's/lid.....................//' > $LIDNODELIST 2> /dev/null
LIDLIST=$(cat $LIDNODELIST | cut -d \; -f 1)
SWITCHCOUNT=$(grep Switch $LIDNODELIST|wc -l)
HCACOUNT=$(grep "Channel Adapter" $LIDNODELIST|wc -l)
if [ $VERBOSE -eq 1 -o $QUIET -eq 0 ]
then
echo "Beginning SMA and PMA queries of $(wc -l $LIDNODELIST|cut -d " " -f 1) LIDs"
echo "Number of switches: $SWITCHCOUNT"
echo "Number of HCA ports: $HCACOUNT"
fi
MYLID=$(ibaddr -l|cut -d " " -f 5)
for LID in $LIDLIST
do
LIDCOUNT=$((LIDCOUNT+1))
PORTCOUNT=$((PORTCOUNT+1))
NODE=$(grep ^$LID\; $LIDNODELIST | cut -d \; -f 5) # was 3
PORT=$(grep ^$LID\; $LIDNODELIST | cut -d \; -f 4) # was 2
NPORTS=$(grep ^$LID\; $LIDNODELIST | cut -d \; -f 3)
TYPE=$(grep ^$LID\; $LIDNODELIST | cut -d \; -f 2)
if [ "$TYPE" = "Switch" -a $VERBOSE -eq 1 ]
then
echo LID=$LID NODE=$NODE PORT=$PORT NPORTS=$NPORTS TYPE=$TYPE
elif [ $VERBOSE -eq 1 ]
then
echo LID=$LID NODE=$NODE PORT=$PORT
fi
# another SMA command option is smpquery
/usr/sbin/ibportstate $LID $PORT query > /dev/null 2>&1
if [ $? -ne 0 ]
then
echo "LID=$LID NODE=$NODE PORT=$PORT failed ibportstate"
SMAFAILED=$((SMAFAILED+1))
FAILEDLIDS="$FAILEDLIDS $LID "
LIDALREADYFAILED=1
FAILED=1
fi
/usr/sbin/perfquery $LID $PORT > /dev/null 2>&1
if [ $? -ne 0 ]
then
echo "LID=$LID NODE=$NODE PORT=$PORT failed perfquery"
PMAFAILED=$((PMAFAILED+1))
FAILED=1
if [ $LIDALREADYFAILED -eq 0 ]
then
FAILEDLIDS="$FAILEDLIDS $LID "
fi
LIDALREADYFAILED=0 # Clear flag for the next LID queries
fi
if [ "$TYPE" = "Switch" -a $SWITCH -eq 1 ]
then
for PORT in $(seq 1 $NPORTS)
do
/usr/sbin/ibportstate $LID $PORT query > /dev/null 2>&1
if [ $? -ne 0 ]
then
echo "LID=$LID NODE=$NODE PORT=$PORT failed ibportstate"
SMAFAILED=$((SMAFAILED+1))
FAILED=1
fi
/usr/sbin/perfquery $LID $PORT > /dev/null 2>&1
if [ $? -ne 0 ]
then
echo "LID=$LID NODE=$NODE PORT=$PORT failed perfquery"
PMAFAILED=$((PMAFAILED+1))
FAILED=1
fi
PORTCOUNT=$((PORTCOUNT+1))
done
fi
done
if [ -f $LIDNODELIST ]; then rm -f $LIDNODELIST ; fi
echo -n SMA and PMA queries on $LIDCOUNT LIDs and $PORTCOUNT ports:
if [ $FAILED -eq 1 ]
then
echo " $SMAFAILED SMA and $PMAFAILED PMA queries FAILED"
if [ $TRACE -eq 1 ]
then
echo "######################### Trace Route Info ##########################"
for LID in $FAILEDLIDS
do
ibtracert $MYLID $LID
echo "#####################################################################"
done
fi
exit -1
else
echo " PASSED"
exit 0
fi
#####################################################################
# Revision History
# Rev 1.00.00 2014/05/05
# - Initial script after many, many cleanups.
# Rev 1.00.01 2014/05/06
# - Added verbose logging and reporting switch and HCA port count
# Rev 1.00.02 2014/05/19
# - Added quiet option to only report if failures
# Rev 1.00.03 2014/06/11
# - Fixed issue with saquery reporting decimal rather than hex LIDs
#
#####################################################################