Share this:
Posted in:
Shawn Bodily's POWER/AIX/AI BlogCopy and paste the following script in your path somewhere and make it executable. The following flags are valid and explained below. I personally use qha -nev the most.
qha version 9.06
Usage: qha [-n] [-N] [-v] [-l] [-e] [-m] [-1] [-c]
-n displays network interfaces
-N displays network interfaces + non IP heartbeat disk
-v shows online VGs
-l logs entries to /tmp/qha.out
-e shows running event
-m shows appmon status
-1 single interation
-c shows CAA SAN/Disk Status (AIX7.1 TL3 min.)
#!/bin/ksh # Purpose: Provides an alternative to SNMP monitoring for PowerHA/HACMP (clinfo and clstat). # Designed to be run within the cluster, not remotely. See next point! # Can be customised to run remotely and monitor multiple clusters! # Version: 9.06 # Updates for PowerHA version 7.1 # Authors: 1. Alex Abderrazag IBM UK # # 2. Bill Miller IBM US # Additions since 8.14. # qha can be freely distributed. If you have any questions or would like to see any enhancements/updates, please email abderra@uk.ibm.com # VARS export PATH=$PATH:/usr/es/sbin/cluster/utilities VERSION=`lslpp -L |grep -i cluster.es.server.rte |awk '{print $2}'| sed 's/\.//g'` CLUSTER=`odmget HACMPcluster | grep -v node |grep name | awk '{print $3}' |sed "s:\"::g"` UTILDIR=/usr/es/sbin/cluster/utilities # clrsh dir in v7 must be /usr/sbin in previous version's it's /usr/es/sbin/cluster/utilities. # Don't forget also that the rhost file for >v7 is /etc/cluster/rhosts if [[ `lslpp -L |grep -i cluster.es.server.rte |awk '{print $2}' | cut -d'.' -f1` -ge 7 ]]; then CDIR=/usr/sbin else CDIR=$UTILDIR fi OUTFILE=/tmp/.qha.$ LOGGING=/tmp/qha.out.$ ADFILE=/tmp/.ad.$ HACMPOUT=`/usr/bin/odmget -q name="hacmp.out" HACMPlogs | fgrep value | sed 's/.*=\ "\(.*\)"$/\1\/hacmp.out/'` COMMcmd="$CDIR/clrsh" REFRESH=0 usage() { echo "qha version 9.06" echo "Usage: qha [-n] [-N] [-v] [-l] [-e] [-m] [-1] [-c]" echo "\t\t-n displays network interfaces\n\t\t-N displays network \ interfaces + nonIP heartbeat disk\n\t\t-v shows online VGs\n\t\t-l logs entries to \ /tmp/qha.out\n\t\t-e shows running event\n\t\t-m shows appmon status\n\t\t-1 \ single interation\n\t\t-c shows CAA SAN/Disk Status (AIX7.1 TL3 min.)" } function adapters { i=1 j=1 cat $ADFILE | while read line do en[i]=`echo $line | awk '{print $1}'` name[i]=`echo $line | awk '{print $2}'` if [ i -eq 1 ]; then printf " ${en[1]} "; fi if [[ ${en[i]} = ${en[j]} ]]; then printf "${name[i]} " else printf "\n${en[i]} ${name[i]} " fi let i=i+1 let j=i-1 done rm $ADFILE if [ $HBOD = "TRUE" ]; then # Code for v6 and below only. To be deleted soon. # Process Heartbeat on Disk networks (Bill Millers code) VER=`echo $VERSION | cut -c 1` if [[ $VER = "7" ]]; then print "[HBOD option not supported]" >> $OUTFILE fi HBODs=$($COMMcmd $HANODE "$UTILDIR/cllsif" | grep diskhb | grep -w $HANODE | awk '{print $8}') for i in $(print $HBODs) do APVID=$($COMMcmd $HANODE "lspv" | grep -w $i | awk '{print $2}' | cut -c 13-) AHBOD=$($COMMcmd $HANODE lssrc -ls topsvcs | grep -w r$i | awk '{print $4}') if [ $AHBOD ] then printf "\n\t%-13s %-10s" $i"("$APVID")" [activeHBOD] else printf "\n\t%-13s %-10s" $i [inactiveHBOD] fi done fi } function work { HANODE=$1; CNT=$2 NET=$3 VGP=$4 #clrsh $HANODE date > /dev/null 2>&1 || ping -w 1 -c1 $HANODE > /dev/null 2>&1 $COMMcmd $HANODE date > /dev/null 2>&1 if [ $? -eq 0 ]; then EVENT=""; CLSTRMGR=`$COMMcmd $HANODE lssrc -ls clstrmgrES | grep -i state | sed 's/Current state: //g'` if [[ $CLSTRMGR != ST_STABLE && $CLSTRMGR != ST_INIT && $SHOWEVENT = TRUE ]]; then EVENT=$($COMMcmd $HANODE cat $HACMPOUT | grep "EVENT START" |tail -1 | awk '{print $6}') printf "\n%-8s %-7s %-15s\n" $HANODE iState: "$CLSTRMGR [$EVENT]" else printf "\n%-8s %-7s %-15s\n" $HANODE iState: "$CLSTRMGR" fi $UTILDIR/clfindres -s 2>/dev/null |grep -v OFFLINE | while read A do if [[ "`echo $A | awk -F: '{print $3}'`" == "$HANODE" ]]; then echo $A | awk -F: '{printf " %-18.16s %-10.12s %-1.20s", $1, $2, $9}' if [ $APPMONSTAT = "TRUE" ]; then RG=`echo $A | awk -F':' '{print $1}'` APPMON=`$UTILDIR/clRGinfo -m | grep -p $RG | grep "ONLINE" | awk 'NR>1 {print $1" "$2}'` print "($APPMON)" else print "" fi fi done if [ $CAA = "TRUE" ]; then IP_Comm_method=`odmget HACMPcluster | grep heartbeattype | awk -F'"' '{print $2}'` case $IP_Comm_method in C) # we're multicasting printf " CAA Multicasting:" $COMMcmd $HANODE lscluster -m | grep en[0-9] | awk '{printf " ("$1" "$2")"}' echo "" ;; U) # we're unicasting printf " CAA Unicasting:" $COMMcmd $HANODE lscluster -m | grep tcpsock | awk '{printf " ("$2" "$3" "$5")"}' echo "" ;; esac SAN_COMMS_STATUS=$(/usr/lib/cluster/clras sancomm_status | egrep -v "(--|UUID)" | awk -F'|' '{print $4}' | sed 's/ //g') DP_COMM_STATUS=$(/usr/lib/cluster/clras dpcomm_status | grep $HANODE | awk -F'|' '{print $4}' | sed 's/ //g') print " CAA SAN Comms: $SAN_COMMS_STATUS | DISK Comms: $DP_COMM_STATUS" fi if [ $NET = "TRUE" ]; then $COMMcmd $HANODE netstat -i | egrep -v "(Name|link|lo)" | awk '{print $1" "$4" "}' > $ADFILE adapters; printf "\n- " fi if [ $VGP = "TRUE" ]; then VGO=`$COMMcmd $HANODE "lsvg -o |fgrep -v caavg_private |fgrep -v rootvg |lsvg -pi 2> /dev/null" |awk '{printf $1")"}' |sed 's:)PV_NAME)hdisk::g' | sed 's/:/(/g' |sed 's:):) :g' |sed 's: hdisk:(:g' 2> /dev/null` if [ $NET = "TRUE" ]; then echo "$VGO-" else echo "- $VGO-" fi fi else ping -w 1 -c1 $HANODE > /dev/null 2>&1 if [ $? -eq 0 ]; then echo "\nPing to $HANODE good, but can't get the status. Check clcomdES." else echo "\n$HANODE not responding, check network availability." fi fi } # Main NETWORK="FALSE"; VG="FALSE"; HBOD="FALSE"; LOG=false; APPMONSTAT="FALSE"; STOP=0; CAA=FALSE; REMOTE="FALSE"; # Get Vars while getopts :nNvlem1c ARGs do case $ARGs in n) # -n show interface info NETWORK="TRUE";; N) # -N show interface info and activeHBOD NETWORK="TRUE"; HBOD="TRUE";; v) # -v show ONLINE VG info VG="TRUE";; l) # -l log to /tmp/qha.out LOG="TRUE";; e) # -e show running events if cluster is unstable SHOWEVENT="TRUE";; m) # -m show status of monitor app servers if present APPMONSTAT="TRUE";; 1) # -1 exit after first iteration STOP=1;; c) # CAA SAN / DISK Comms CAA=TRUE;; \?) printf "\nNot a valid option\n\n" ; usage ; exit ;; esac done OO="" trap "rm $OUTFILE; exit 0" 1 2 12 9 15 while true do COUNT=0 print "\\033[H\\033[2J\t\tCluster: $CLUSTER ($VERSION)" > $OUTFILE echo "\t\t$(date +%T" "%d%b%y)" >> $OUTFILE if [[ $REMOTE = "TRUE" ]]; then Fstr=`cat $CLHOSTS |grep -v "^#"` else Fstr=`odmget HACMPnode |grep name |sort -u | awk '{print $3}' |sed "s:\"::g"` fi for MAC in `echo $Fstr` do let COUNT=COUNT+1 work $MAC $COUNT $NETWORK $VG $HBOD done >> $OUTFILE cat $OUTFILE if [ $LOG = "TRUE" ]; then wLINE=$(cat $OUTFILE |sed s'/^.*Cluster://g' | awk '{print " "$0}' |tr -s '[:space:]' '[ *]' | awk '{print $0}') wLINE_three=$(echo $wLINE | awk '{for(i=4;i<=NF;++i) printf("%s ", $i) }') if [[ ! "$OO" = "$wLINE_three" ]]; then # Note, there's been a state change, so write to the log # Alternatively, do something addtional, for example: send an snmp trap alert, using the snmptrap command. For example: # snmptrap -c <community> -h <anmp agent> -m "appropriate message" echo "$wLINE" >> $LOGGING fi OO="$wLINE_three" fi if [[ $STOP -eq 1 ]]; then exit fi sleep $REFRESH done