218 lines
6.6 KiB
Bash
Executable File
218 lines
6.6 KiB
Bash
Executable File
#!/usr/bin/ksh
|
|
#
|
|
# SCRIPT: stale_VG_PV_LV_PP_mon.ksh
|
|
#
|
|
# AUTHOR: Randy Michael
|
|
# DATE: 01/29/2007
|
|
# REV: 1.2.P
|
|
#
|
|
# PLATFORM: AIX only
|
|
#
|
|
# PURPOSE: This shell script is used to query the system for stale PPs.
|
|
# The method queries the system for all of the currently vaied-on
|
|
# volume groups and then builds a list of the PVs to query. If a PV
|
|
# query detects any stale partitions notification is sent to the
|
|
# screen. Each step in the process has user notification
|
|
#
|
|
# REVISION LIST:
|
|
#
|
|
#
|
|
# set -x # Uncomment to debug this shell script
|
|
# set -n # Uncomment to check command syntax without any execution
|
|
#
|
|
# EXIT CODES: 0 ==> Normal execution or no stale PP were found
|
|
# 1 ==> Trap EXIT
|
|
# 2 ==> Auto resyncing failed
|
|
#
|
|
####################################################
|
|
######### DEFINE VARIABLES HERE ####################
|
|
####################################################
|
|
|
|
case $(uname) in
|
|
AIX) : # Correct OS
|
|
# NOTE: a (:) colon is a no-op in Korn shell
|
|
;;
|
|
*) echo "\nERROR: This shell script will only work on AIX"
|
|
echo "...EXITING...\n"
|
|
exit 99
|
|
;;
|
|
esac
|
|
|
|
|
|
ATTEMPT_RESYNC=FALSE # Flag to enable auto resync, "TRUE" will resync
|
|
|
|
LOGFILE="/tmp/stale_PP_log" # Stale PP logfile
|
|
THIS_HOST=$(hostname) # Hostname of this machine
|
|
STALE_PP_COUNT=0 # Initialize to zero
|
|
STALE_PV_COUNT=0 # Initialize to zero
|
|
HDISK_LIST= # Initialize to NULL
|
|
INACTIVE_PP_LIST= # Initialize to NULL
|
|
STALE_PV_LIST= # Initialize to NULL
|
|
STALE_LV_LIST= # Initialize to NULL
|
|
STALE_VG_LIST= # Initialize to NULL
|
|
RESYNC_LV_LIST= # Initialize to NULL
|
|
PV_LIST= # Initialize to NULL
|
|
|
|
#######################################
|
|
#### INITIALIZE THE LOG FILE ####
|
|
|
|
>$LOGFILE # Initialize the log file to empty
|
|
date >> $LOGFILE # Date the log file was created
|
|
echo "\n$THIS_HOST \n" >> $LOGFILE # Host name for this report
|
|
|
|
#### DEFINE FUNCTIONS HERE ############
|
|
|
|
# Trap Exit function
|
|
|
|
function trap_exit
|
|
{
|
|
echo "\n\t...EXITING on a TRAPPED signal...\n"
|
|
}
|
|
|
|
#######################################
|
|
|
|
# Set a trap...
|
|
|
|
trap 'trap_exit; exit 1' 1 2 3 5 15
|
|
|
|
#######################################
|
|
######### BEGINNING OF MAIN ###########
|
|
#######################################
|
|
|
|
# Inform the user at each step
|
|
|
|
# Loop through each currently varied-on VG and query VG for stale PVs.
|
|
# For any VG that has at least one stale PV we then query the VG
|
|
# for the list of associated PV and build the $PV_LIST
|
|
|
|
echo "\nSearching each Volume Group for stale Physical Volumes...\c" \
|
|
| tee -a $LOGFILE
|
|
|
|
# Search each VG for stale PVs, then build a list of VGs and PVs
|
|
# that have stale disk partitions
|
|
|
|
for VG in $(lsvg -o)
|
|
do
|
|
NUM_STALE_PV=$(lsvg $VG | grep 'STALE PVs:' | awk '{print $3}')
|
|
|
|
if ((NUM_STALE_PV > 0))
|
|
then
|
|
STALE_VG_LIST="$STALE_VG_LIST $VG"
|
|
PV_LIST="$PV_LIST $(lsvg -p $VG | tail +3 | awk '{print $1}')"
|
|
((STALE_PV_COUNT = $STALE_PV_COUNT + 1))
|
|
fi
|
|
done
|
|
|
|
# Test to see if any stale PVs were found, if not then
|
|
# exit with return code 0
|
|
|
|
if ((STALE_PV_COUNT == 0))
|
|
then
|
|
echo "\nNo Stale Disk Mirrors Found...EXITING...\n" | tee -a $LOGFILE
|
|
exit 0
|
|
else
|
|
echo "\nStale Disk Mirrors Found!...Searching each hdisk for stale \
|
|
PPs...\c" | tee -a $LOGFILE
|
|
fi
|
|
|
|
# Now we have a list of PVs from every VG that reported stale PVs
|
|
# The next step is to query each PV to make sure each PV is in
|
|
# and "active" state and then query each PV for stale PPs.
|
|
# If a PV is found to be inactive then we will not query
|
|
# the PV for stale partitions, but move on to the next PV in
|
|
# the $PV_LIST.
|
|
|
|
for HDISK in $(echo $PV_LIST)
|
|
do
|
|
PV_STATE=$(lspv $HDISK | grep 'PV STATE:' | awk '{print $3}')
|
|
if [[ $PV_STATE != 'active' ]]
|
|
then
|
|
INACTIVE_PV_LIST="$INACTIVE_PV_LIST $HDISK"
|
|
fi
|
|
if ! $(echo $INACTIVE_PV_LIST | grep $HDISK) >/dev/null 2>&1
|
|
then
|
|
NUM_STALE_PP=$(lspv $HDISK | grep 'STALE PARTITIONS:' \
|
|
| awk '{print $3}')
|
|
if ((NUM_STALE_PP > 0))
|
|
then
|
|
STALE_PV_LIST="$STALE_PV_LIST $HDISK"
|
|
((STALE_PP_COUNT = $STALE_PP_COUNT + 1))
|
|
fi
|
|
fi
|
|
done
|
|
|
|
# Now we have the list of PVs that contain the stale PPs.
|
|
# Next we want to get a list of all of the LVs affected.
|
|
|
|
echo "\nSearching each disk with stale PPs for associated LVs\c" \
|
|
| tee -a $LOGFILE
|
|
|
|
for PV in $(echo $STALE_PV_LIST)
|
|
do
|
|
STALE_LV_LIST="$STALE_LV_LIST $(lspv -l $PV | tail +3 \
|
|
| awk '{print $1}')"
|
|
done
|
|
|
|
# Using the STALE_LV_LIST variable list we want to query
|
|
# each LV to find which ones need to be resynced
|
|
|
|
echo "\nSearch each LV for stale partitions to build a resync LV list\c" \
|
|
| tee -a $LOGFILE
|
|
|
|
for LV in $(echo $STALE_LV_LIST)
|
|
do
|
|
LV_NUM_STALE_PP=$(lslv $LV | grep "STALE PPs:" | awk '{print $3}')
|
|
(($LV_NUM_STALE_PP == 0)) & RESYNC_LV_LIST="$RESYNC_LV_LIST $LV"
|
|
done
|
|
|
|
# If any inactive PV were found we need to inform the user
|
|
# of each inactive PV
|
|
|
|
# Check for a NULL variable
|
|
|
|
if [[ -n $INACTIVE_PV_LIST && $INACTIVE_PV_LIST != '' ]]
|
|
then
|
|
for PV in $(echo $INACTIVE_PV_LIST)
|
|
do
|
|
echo "\nWARNING: Inactive Physical Volume Found:" | tee -a $LOGFILE
|
|
echo "\n$PV is currently inactive:\n" | tee -a $LOGFILE
|
|
echo "\nThis script is not suitable to to correct this problem..." \
|
|
| tee -a $LOGFILE
|
|
echo " ...CALL IBM SUPPORT ABOUT ${PV}..." | tee -a $LOGFILE
|
|
done
|
|
fi
|
|
|
|
echo "\nStale Partitions have been found on at least one disk!" \
|
|
| tee -a $LOGFILE
|
|
echo "\nThe following Volume Group(s) have stale PVs:\n" \
|
|
| tee -a $LOGFILE
|
|
echo $STALE_VG_LIST | tee -a $LOGFILE
|
|
echo "\nThe stale disk(s) involved include the following:\n" \
|
|
| tee -a $LOGFILE
|
|
echo $STALE_PV_LIST | tee -a $LOGFILE
|
|
echo "\nThe following Logical Volumes need to be resynced:\n" \
|
|
| tee -a $LOGFILE
|
|
echo $RESYNC_LV_LIST | tee -a $LOGFILE
|
|
|
|
if [[ $ATTEMPT_RESYNC = "TRUE" ]]
|
|
then
|
|
echo "\nAttempting to resync the LVs on $RESYNC_PV_LIST ...\n" \
|
|
| tee -a $LOGFILE
|
|
syncvg -l $RESYNC_LV_LIST | tee -a $LOGFILE 2>&1
|
|
if (( $? == 0))
|
|
then
|
|
echo "\nResyncing all of the LVs SUCCESSFUL...EXITING..." \
|
|
| tee -a $LOGFILE
|
|
else
|
|
echo "\nResyncing FAILED...EXITING...\n" | tee -a $LOGFILE
|
|
exit 2
|
|
fi
|
|
else
|
|
echo "\nAuto resync is not enabled...set to TRUE to automatically \
|
|
resync\n" | tee -a $LOGFILE
|
|
echo "\n\t...EXITING...\n" | tee -a $LOGFILE
|
|
fi
|
|
|
|
echo "\nThe log file is: $LOGFILE\n"
|
|
|