return to PRS Technologies website


alarmprog1.sh
################################################################################ #!/usr/bin/ksh ############################################################################### # # Module: alarmprog1.sh # # Author: Peter R. Schmidt # # Description: # # Set ALARMPROGRAM to the full pathname of an executable file that you # write and that the database server executes when noteworthy events occur. # Noteworthy events include database, table, index, or blob failure; chunk or # dbspace taken off-line; internal subsystem failure; initialization failure; and # detection of long transaction. # # Change Log # # Date Who Description # # 08/09/2000 Peter Schmidt Start Program # 10/03/2000 Peter Schmidt Added code to eliminate quickly repeating messages # 10/05/2000 Peter Schmidt Add code for checking percent of logs full # 03/20/2001 Peter Schmidt Treat newly added logs as backed up # ################################################################################ # # Run time Arguments # # Arg Parameter Data Type # # arg 1 Event severity integer # arg 2 Event class ID integer # arg 3 Event class msg string # arg 4 Event specific msg string # arg 5 Event see also file string # # -------------------------------------------------------------------------------- # # Severity Description # # 1 Not noteworthy. # The event is not reported to the alarm program # (for example, date change in the message log). # # 2 Information. # No error has occurred, but some routine event completed successfully # (for example, checkpoint or log backup completes). # # 3 Attention. This event does not compromise data or prevent the use of the system; however, # it warrants attention (for example, one chunk of a mirrored pair goes down). # # 4 Emergency. Something unexpected occurred that might compromise data or access to data # (assertion failure, or oncheck reports data corrupt). Take action immediately. # # 5 Fatal. Something unexpected occurred and caused the database server to fail. # # -------------------------------------------------------------------------------- # # Class ID Class Message # # 1 Table failure: %s (dbsname:owner.tabname) # 2 Index failure: %s (dbsname:owner.tabname-idxname) # 3 Blob failure: %s (dbsname:owner.tabname) # 4 Chunk is off-line, mirror is active: %ld (chunk number) # 5 DBSpace is off-line: %s (dbspace name) # 6 Internal Subsystem failure: %s # 7 Database server initialization failure # 8 Physical Restore failed # 9 Physical Recovery failed # 10 Logical Recovery failed # 11 Cannot open Chunk: %s (pathname) # 12 Cannot open Dbspace: %s (dbspace name) # 13 Performance Improvement possible # 14 Database failure. %s (database name) # 15 High-availability data-replication failure # 16 Archive completed: %s (dbspace list) # 17 Archive aborted: %s (dbspace list) # 18 Log Backup completed: %ld (log number) # 19 Log Backup aborted: %ld (log number) # 20 Logical Logs are full -- Backup is needed # 21 Database server resource overflow: %s (resource name) # 22 Long Transaction detected # 23 Logical Log %ld (number) Complete # 24 Unable to Allocate Memory # ################################################################################ EMAIL_LIST1="informix" EMAIL_LIST2="informix root" MAX_PCT1=50 # percentage of full logs that should trigger an alarm warning MAX_PCT2=75 # percentage of full logs that should trigger an alarm emergency EVENT_SEVERITY=$1 EVENT_CLASS=$2 EVENT_MSG="$3" EVENT_ADD_TEXT="$4" EVENT_FILE="$5" #################################################################################### # set the Informix environment if [ "x$INFORMIXDIR" = "x" ] # Is my environment already set ? then # Note: You can set the Informix environment several ways. # You can hardcode it here - or else you can set it using a file. # If you hardcode it here, it would look something like this... # #export INFORMIXDIR=/opt/informix #export INFORMIXSERVER=myserver_shm #export ONCONFIG=onconfig.myserver # If you set it using a file, it might look like this... ENV_FILE=/usr/local/bin/informix.env if [ -x $ENV_FILE ] then . $ENV_FILE else echo "Error: the file used to set the Informix environment does not exist!" echo "The missing filename is: $ENV_FILE" exit 1 fi fi #------------------------------------------------------------------------------- # Check to make sure the $INFORMIXHOME directory exists #------------------------------------------------------------------------------- INFORMIXHOME=${INFORMIXHOME:-"/home/informix"} if [ ! -d $INFORMIXHOME/logs ] then echo "Error: $0: the directory $INFORMIXHOME does not exist!" echo "I don't know where to put the lockfile." exit 1 fi #------------------------------------------------------------------------------- # Try to deal with multiple occurances of this program #------------------------------------------------------------------------------- LOCKFILE=$INFORMIXHOME/logs/alarm.lockfile RETRIES=200 CNT=0 while true do if [ -f $LOCKFILE ] then sleep 5 CNT=`expr $CNT + 1` if [ $CNT -gt $RETRIES ] then exit 1 fi else date > $LOCKFILE break fi done #------------------------------------------------------------------------------- case $EVENT_SEVERITY in 1) SEVERITY_DESC="Not noteworthy";; 2) SEVERITY_DESC="Information";; 3) SEVERITY_DESC="Attention";; 4) SEVERITY_DESC="Emergency";; 5) SEVERITY_DESC="Fatal";; esac #------------------------------------------------------------------------------- if [ $EVENT_SEVERITY -eq 2 ] then # You usually do NOT want to log successful Logical Log Messages if [ $EVENT_CLASS -eq 18 ] then rm -f $LOCKFILE exit 0 fi if [ $EVENT_CLASS -eq 23 ] then # Note: if you running on-bar, you will need to kick off a logical log backup #/home/informix/bin/log_full.sh $1 $2 $3 $4 $5 & # # how many logs are full now (in percent) ? LOGFILES=`onstat -c | grep LOGFILES | awk '{print $2}'` BACKED=`onstat -l | egrep "U-B----|F------" | wc -l` FULL1=`expr $LOGFILES - $BACKED` FULL2=`expr $FULL1 \* 100` CURR_PCT=`expr $FULL2 \/ $LOGFILES` if [ $CURR_PCT -lt $MAX_PCT1 ] then rm -f $LOCKFILE exit 0 fi if [ $CURR_PCT -ge $MAX_PCT2 ] then EVENT_SEVERITY=4 SEVERITY_DESC="Emergency" else EVENT_SEVERITY=3 SEVERITY_DESC="Warning" fi EVENT_MSG="$CURR_PCT% ($FULL1 of $LOGFILES) of Informix logical logs not backed up." fi fi #------------------------------------------------------------------------------- # Check for quickly repeating messages APART=2 # THIS IS HOW FAR APART MESSAGES MUST BE (IN MINUTES) FLAG_REPEAT=false HH=`date +%H` MM=`date +%M` DATE=`date +%D` TIME=`date +%T` MINUTES_IN_DAY=`expr $HH \* 60 + $MM` LOGFILE=$INFORMIXHOME/logs/alarm.log if [ $EVENT_CLASS != 16 ] # Always report class 16 - Archive completed then if [ -f $LOGFILE ] then tail -1 $LOGFILE | read LASTDATE LASTTIME LASTMINUTES LASTSEVERITY LASTCLASS OTHER else LASTDATE="00/00/00" LASTTIME="00:00:00" LASTMINUTES=0 LASTSEVERITY=0 LASTCLASS=0 fi if [ $DATE = $LASTDATE -a $EVENT_CLASS = $LASTCLASS ] then DURATION=`expr $MINUTES_IN_DAY - $LASTMINUTES` if [ $DURATION -le $APART ] then FLAG_REPEAT=true fi fi fi echo "$DATE $TIME $MINUTES_IN_DAY $EVENT_SEVERITY $EVENT_CLASS $msg" >> $LOGFILE if [ $FLAG_REPEAT = true ] then rm -f $LOCKFILE exit 0 fi #------------------------------------------------------------------------------- if [ "$EVENT_MSG" = "$EVENT_ADD_TEXT" ] then msg="Informix: $SEVERITY_DESC, $EVENT_MSG" else msg="Informix: $SEVERITY_DESC, $EVENT_MSG, $EVENT_ADD_TEXT" fi #------------------------------------------------------------------------------- # Maybe if the severity is high, you might want to send to a different email list if [ $EVENT_SEVERITY -ge 3 ] then echo $msg | mailx -s "$SEVERITY_DESC: Informix Event Alarm: $EVENT_MSG" $EMAIL_LIST2 else echo $msg | mailx -s "$SEVERITY_DESC: Informix Event Alarm: $EVENT_MSG" $EMAIL_LIST1 fi #------------------------------------------------------------------------------- rm -f $LOCKFILE ################################################################################